index df287d8295a3df930643ec6db4a093465ca3ad08..4e1ee10f0642763492c2fb62f801d2fd948f0f79 100644 (file)
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
#
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
#
-#$Id: back_anydbm.py,v 1.30 2002-02-27 03:40:59 richard Exp $
+#$Id: back_anydbm.py,v 1.43 2002-07-10 06:30:30 richard Exp $
'''
This module defines a backend that saves the hyperdatabase in a database
chosen by anydbm. It is guaranteed to always be available in python
'''
This module defines a backend that saves the hyperdatabase in a database
chosen by anydbm. It is guaranteed to always be available in python
import whichdb, anydbm, os, marshal
from roundup import hyperdb, date
from blobfiles import FileStorage
import whichdb, anydbm, os, marshal
from roundup import hyperdb, date
from blobfiles import FileStorage
+from roundup.indexer import Indexer
+from locking import acquire_lock, release_lock
#
# Now the database
#
# Now the database
self.dirtynodes = {} # keep track of the dirty nodes by class
self.newnodes = {} # keep track of the new nodes by class
self.transactions = []
self.dirtynodes = {} # keep track of the dirty nodes by class
self.newnodes = {} # keep track of the new nodes by class
self.transactions = []
+ self.indexer = Indexer(self.dir)
+ # ensure files are group readable and writable
+ os.umask(0002)
+
+ def post_init(self):
+ """Called once the schema initialisation has finished."""
+ # reindex the db if necessary
+ if self.indexer.should_reindex():
+ self.reindex()
+
+ def reindex(self):
+ for klass in self.classes.values():
+ for nodeid in klass.list():
+ klass.index(nodeid)
+ self.indexer.save_index()
def __repr__(self):
return '<back_anydbm instance at %x>'%id(self)
def __repr__(self):
return '<back_anydbm instance at %x>'%id(self)
def __getattr__(self, classname):
"""A convenient way of calling self.getclass(classname)."""
if self.classes.has_key(classname):
def __getattr__(self, classname):
"""A convenient way of calling self.getclass(classname)."""
if self.classes.has_key(classname):
- if hyperdb.DEBUG:
- print '__getattr__', (self, classname)
+ if __debug__:
+ print >>hyperdb.DEBUG, '__getattr__', (self, classname)
return self.classes[classname]
raise AttributeError, classname
def addclass(self, cl):
return self.classes[classname]
raise AttributeError, classname
def addclass(self, cl):
- if hyperdb.DEBUG:
- print 'addclass', (self, cl)
+ if __debug__:
+ print >>hyperdb.DEBUG, 'addclass', (self, cl)
cn = cl.classname
if self.classes.has_key(cn):
raise ValueError, cn
cn = cl.classname
if self.classes.has_key(cn):
raise ValueError, cn
def getclasses(self):
"""Return a list of the names of all existing classes."""
def getclasses(self):
"""Return a list of the names of all existing classes."""
- if hyperdb.DEBUG:
- print 'getclasses', (self,)
+ if __debug__:
+ print >>hyperdb.DEBUG, 'getclasses', (self,)
l = self.classes.keys()
l.sort()
return l
l = self.classes.keys()
l.sort()
return l
If 'classname' is not a valid class name, a KeyError is raised.
"""
If 'classname' is not a valid class name, a KeyError is raised.
"""
- if hyperdb.DEBUG:
- print 'getclass', (self, classname)
+ if __debug__:
+ print >>hyperdb.DEBUG, 'getclass', (self, classname)
return self.classes[classname]
#
return self.classes[classname]
#
def clear(self):
'''Delete all database contents
'''
def clear(self):
'''Delete all database contents
'''
- if hyperdb.DEBUG:
- print 'clear', (self,)
+ if __debug__:
+ print >>hyperdb.DEBUG, 'clear', (self,)
for cn in self.classes.keys():
for dummy in 'nodes', 'journals':
path = os.path.join(self.dir, 'journals.%s'%cn)
for cn in self.classes.keys():
for dummy in 'nodes', 'journals':
path = os.path.join(self.dir, 'journals.%s'%cn)
''' grab a connection to the class db that will be used for
multiple actions
'''
''' grab a connection to the class db that will be used for
multiple actions
'''
- if hyperdb.DEBUG:
- print 'getclassdb', (self, classname, mode)
+ if __debug__:
+ print >>hyperdb.DEBUG, 'getclassdb', (self, classname, mode)
return self._opendb('nodes.%s'%classname, mode)
def _opendb(self, name, mode):
'''Low-level database opener that gets around anydbm/dbm
eccentricities.
'''
return self._opendb('nodes.%s'%classname, mode)
def _opendb(self, name, mode):
'''Low-level database opener that gets around anydbm/dbm
eccentricities.
'''
- if hyperdb.DEBUG:
- print '_opendb', (self, name, mode)
+ if __debug__:
+ print >>hyperdb.DEBUG, '_opendb', (self, name, mode)
+
# determine which DB wrote the class file
db_type = ''
path = os.path.join(os.getcwd(), self.dir, name)
# determine which DB wrote the class file
db_type = ''
path = os.path.join(os.getcwd(), self.dir, name)
# new database? let anydbm pick the best dbm
if not db_type:
# new database? let anydbm pick the best dbm
if not db_type:
- if hyperdb.DEBUG:
- print "_opendb anydbm.open(%r, 'n')"%path
+ if __debug__:
+ print >>hyperdb.DEBUG, "_opendb anydbm.open(%r, 'n')"%path
return anydbm.open(path, 'n')
# open the database with the correct module
return anydbm.open(path, 'n')
# open the database with the correct module
except ImportError:
raise hyperdb.DatabaseError, \
"Couldn't open database - the required module '%s'"\
except ImportError:
raise hyperdb.DatabaseError, \
"Couldn't open database - the required module '%s'"\
- "is not available"%db_type
- if hyperdb.DEBUG:
- print "_opendb %r.open(%r, %r)"%(db_type, path, mode)
+ " is not available"%db_type
+ if __debug__:
+ print >>hyperdb.DEBUG, "_opendb %r.open(%r, %r)"%(db_type, path,
+ mode)
return dbm.open(path, mode)
return dbm.open(path, mode)
+ def _lockdb(self, name):
+ ''' Lock a database file
+ '''
+ path = os.path.join(os.getcwd(), self.dir, '%s.lock'%name)
+ return acquire_lock(path)
+
+ #
+ # Node IDs
+ #
+ def newid(self, classname):
+ ''' Generate a new id for the given class
+ '''
+ # open the ids DB - create if if doesn't exist
+ lock = self._lockdb('_ids')
+ db = self._opendb('_ids', 'c')
+ if db.has_key(classname):
+ newid = db[classname] = str(int(db[classname]) + 1)
+ else:
+ # the count() bit is transitional - older dbs won't start at 1
+ newid = str(self.getclass(classname).count()+1)
+ db[classname] = newid
+ db.close()
+ release_lock(lock)
+ return newid
+
#
# Nodes
#
def addnode(self, classname, nodeid, node):
''' add the specified node to its class's db
'''
#
# Nodes
#
def addnode(self, classname, nodeid, node):
''' add the specified node to its class's db
'''
- if hyperdb.DEBUG:
- print 'addnode', (self, classname, nodeid, node)
+ if __debug__:
+ print >>hyperdb.DEBUG, 'addnode', (self, classname, nodeid, node)
self.newnodes.setdefault(classname, {})[nodeid] = 1
self.cache.setdefault(classname, {})[nodeid] = node
self.savenode(classname, nodeid, node)
self.newnodes.setdefault(classname, {})[nodeid] = 1
self.cache.setdefault(classname, {})[nodeid] = node
self.savenode(classname, nodeid, node)
def setnode(self, classname, nodeid, node):
''' change the specified node
'''
def setnode(self, classname, nodeid, node):
''' change the specified node
'''
- if hyperdb.DEBUG:
- print 'setnode', (self, classname, nodeid, node)
+ if __debug__:
+ print >>hyperdb.DEBUG, 'setnode', (self, classname, nodeid, node)
self.dirtynodes.setdefault(classname, {})[nodeid] = 1
self.dirtynodes.setdefault(classname, {})[nodeid] = 1
+
# can't set without having already loaded the node
self.cache[classname][nodeid] = node
self.savenode(classname, nodeid, node)
# can't set without having already loaded the node
self.cache[classname][nodeid] = node
self.savenode(classname, nodeid, node)
def savenode(self, classname, nodeid, node):
''' perform the saving of data specified by the set/addnode
'''
def savenode(self, classname, nodeid, node):
''' perform the saving of data specified by the set/addnode
'''
- if hyperdb.DEBUG:
- print 'savenode', (self, classname, nodeid, node)
+ if __debug__:
+ print >>hyperdb.DEBUG, 'savenode', (self, classname, nodeid, node)
self.transactions.append((self._doSaveNode, (classname, nodeid, node)))
def getnode(self, classname, nodeid, db=None, cache=1):
''' get a node from the database
'''
self.transactions.append((self._doSaveNode, (classname, nodeid, node)))
def getnode(self, classname, nodeid, db=None, cache=1):
''' get a node from the database
'''
- if hyperdb.DEBUG:
- print 'getnode', (self, classname, nodeid, db)
+ if __debug__:
+ print >>hyperdb.DEBUG, 'getnode', (self, classname, nodeid, db)
if cache:
# try the cache
if cache:
# try the cache
- cache = self.cache.setdefault(classname, {})
- if cache.has_key(nodeid):
- return cache[nodeid]
+ cache_dict = self.cache.setdefault(classname, {})
+ if cache_dict.has_key(nodeid):
+ if __debug__:
+ print >>hyperdb.TRACE, 'get %s %s cached'%(classname,
+ nodeid)
+ return cache_dict[nodeid]
+
+ if __debug__:
+ print >>hyperdb.TRACE, 'get %s %s'%(classname, nodeid)
# get from the database and save in the cache
if db is None:
db = self.getclassdb(classname)
if not db.has_key(nodeid):
raise IndexError, "no such %s %s"%(classname, nodeid)
# get from the database and save in the cache
if db is None:
db = self.getclassdb(classname)
if not db.has_key(nodeid):
raise IndexError, "no such %s %s"%(classname, nodeid)
+
+ # decode
res = marshal.loads(db[nodeid])
res = marshal.loads(db[nodeid])
+
+ # reverse the serialisation
+ res = self.unserialise(classname, res)
+
+ # store off in the cache dict
if cache:
if cache:
- cache[nodeid] = res
+ cache_dict[nodeid] = res
+
return res
def hasnode(self, classname, nodeid, db=None):
''' determine if the database has a given node
'''
return res
def hasnode(self, classname, nodeid, db=None):
''' determine if the database has a given node
'''
- if hyperdb.DEBUG:
- print 'hasnode', (self, classname, nodeid, db)
+ if __debug__:
+ print >>hyperdb.DEBUG, 'hasnode', (self, classname, nodeid, db)
+
# try the cache
cache = self.cache.setdefault(classname, {})
if cache.has_key(nodeid):
# try the cache
cache = self.cache.setdefault(classname, {})
if cache.has_key(nodeid):
+ if __debug__:
+ print >>hyperdb.TRACE, 'has %s %s cached'%(classname, nodeid)
return 1
return 1
+ if __debug__:
+ print >>hyperdb.TRACE, 'has %s %s'%(classname, nodeid)
# not in the cache - check the database
if db is None:
# not in the cache - check the database
if db is None:
return res
def countnodes(self, classname, db=None):
return res
def countnodes(self, classname, db=None):
- if hyperdb.DEBUG:
- print 'countnodes', (self, classname, db)
+ if __debug__:
+ print >>hyperdb.DEBUG, 'countnodes', (self, classname, db)
# include the new nodes not saved to the DB yet
count = len(self.newnodes.get(classname, {}))
# include the new nodes not saved to the DB yet
count = len(self.newnodes.get(classname, {}))
return count
def getnodeids(self, classname, db=None):
return count
def getnodeids(self, classname, db=None):
- if hyperdb.DEBUG:
- print 'getnodeids', (self, classname, db)
+ if __debug__:
+ print >>hyperdb.DEBUG, 'getnodeids', (self, classname, db)
# start off with the new nodes
res = self.newnodes.get(classname, {}).keys()
# start off with the new nodes
res = self.newnodes.get(classname, {}).keys()
'link' or 'unlink' -- 'params' is (classname, nodeid, propname)
'retire' -- 'params' is None
'''
'link' or 'unlink' -- 'params' is (classname, nodeid, propname)
'retire' -- 'params' is None
'''
- if hyperdb.DEBUG:
- print 'addjournal', (self, classname, nodeid, action, params)
+ if __debug__:
+ print >>hyperdb.DEBUG, 'addjournal', (self, classname, nodeid,
+ action, params)
self.transactions.append((self._doSaveJournal, (classname, nodeid,
action, params)))
def getjournal(self, classname, nodeid):
''' get the journal for id
'''
self.transactions.append((self._doSaveJournal, (classname, nodeid,
action, params)))
def getjournal(self, classname, nodeid):
''' get the journal for id
'''
- if hyperdb.DEBUG:
- print 'getjournal', (self, classname, nodeid)
+ if __debug__:
+ print >>hyperdb.DEBUG, 'getjournal', (self, classname, nodeid)
# attempt to open the journal - in some rare cases, the journal may
# not exist
try:
# attempt to open the journal - in some rare cases, the journal may
# not exist
try:
if str(error) == "need 'c' or 'n' flag to open new db": return []
elif error.args[0] != 2: raise
return []
if str(error) == "need 'c' or 'n' flag to open new db": return []
elif error.args[0] != 2: raise
return []
- journal = marshal.loads(db[nodeid])
+ try:
+ journal = marshal.loads(db[nodeid])
+ except KeyError:
+ db.close()
+ raise KeyError, 'no such %s %s'%(classname, nodeid)
+ db.close()
res = []
for entry in journal:
(nodeid, date_stamp, user, action, params) = entry
res = []
for entry in journal:
(nodeid, date_stamp, user, action, params) = entry
def pack(self, pack_before):
''' delete all journal entries before 'pack_before' '''
def pack(self, pack_before):
''' delete all journal entries before 'pack_before' '''
- if hyperdb.DEBUG:
- print 'packjournal', (self, pack_before)
+ if __debug__:
+ print >>hyperdb.DEBUG, 'packjournal', (self, pack_before)
pack_before = pack_before.get_tuple()
pack_before = pack_before.get_tuple()
def commit(self):
''' Commit the current transactions.
'''
def commit(self):
''' Commit the current transactions.
'''
- if hyperdb.DEBUG:
- print 'commit', (self,)
+ if __debug__:
+ print >>hyperdb.DEBUG, 'commit', (self,)
# TODO: lock the DB
# keep a handle to all the database files opened
self.databases = {}
# now, do all the transactions
# TODO: lock the DB
# keep a handle to all the database files opened
self.databases = {}
# now, do all the transactions
+ reindex = {}
for method, args in self.transactions:
for method, args in self.transactions:
- method(*args)
+ reindex[method(*args)] = 1
# now close all the database files
for db in self.databases.values():
# now close all the database files
for db in self.databases.values():
del self.databases
# TODO: unlock the DB
del self.databases
# TODO: unlock the DB
+ # reindex the nodes that request it
+ for classname, nodeid in filter(None, reindex.keys()):
+ print >>hyperdb.DEBUG, 'commit.reindex', (classname, nodeid)
+ self.getclass(classname).index(nodeid)
+
+ # save the indexer state
+ self.indexer.save_index()
+
# all transactions committed, back to normal
self.cache = {}
self.dirtynodes = {}
# all transactions committed, back to normal
self.cache = {}
self.dirtynodes = {}
self.transactions = []
def _doSaveNode(self, classname, nodeid, node):
self.transactions = []
def _doSaveNode(self, classname, nodeid, node):
- if hyperdb.DEBUG:
- print '_doSaveNode', (self, classname, nodeid, node)
+ if __debug__:
+ print >>hyperdb.DEBUG, '_doSaveNode', (self, classname, nodeid,
+ node)
# get the database handle
db_name = 'nodes.%s'%classname
# get the database handle
db_name = 'nodes.%s'%classname
db = self.databases[db_name] = self.getclassdb(classname, 'c')
# now save the marshalled data
db = self.databases[db_name] = self.getclassdb(classname, 'c')
# now save the marshalled data
- db[nodeid] = marshal.dumps(node)
+ db[nodeid] = marshal.dumps(self.serialise(classname, node))
+
+ # return the classname, nodeid so we reindex this content
+ return (classname, nodeid)
def _doSaveJournal(self, classname, nodeid, action, params):
def _doSaveJournal(self, classname, nodeid, action, params):
+ # serialise first
+ if action in ('set', 'create'):
+ params = self.serialise(classname, params)
+
+ # create the journal entry
entry = (nodeid, date.Date().get_tuple(), self.journaltag, action,
params)
entry = (nodeid, date.Date().get_tuple(), self.journaltag, action,
params)
- if hyperdb.DEBUG:
- print '_doSaveJournal', entry
+
+ if __debug__:
+ print >>hyperdb.DEBUG, '_doSaveJournal', entry
# get the database handle
db_name = 'journals.%s'%classname
# get the database handle
db_name = 'journals.%s'%classname
# now insert the journal entry
if db.has_key(nodeid):
# now insert the journal entry
if db.has_key(nodeid):
+ # append to existing
s = db[nodeid]
l = marshal.loads(s)
l.append(entry)
else:
l = [entry]
s = db[nodeid]
l = marshal.loads(s)
l.append(entry)
else:
l = [entry]
- db[nodeid] = marshal.dumps(l)
- def _doStoreFile(self, name, **databases):
- # the file is currently ".tmp" - move it to its real name to commit
- os.rename(name+".tmp", name)
+ db[nodeid] = marshal.dumps(l)
def rollback(self):
''' Reverse all actions from the current transaction.
'''
def rollback(self):
''' Reverse all actions from the current transaction.
'''
- if hyperdb.DEBUG:
- print 'rollback', (self, )
+ if __debug__:
+ print >>hyperdb.DEBUG, 'rollback', (self, )
for method, args in self.transactions:
# delete temporary files
if method == self._doStoreFile:
for method, args in self.transactions:
# delete temporary files
if method == self._doStoreFile:
- if os.path.exists(args[0]+".tmp"):
- os.remove(args[0]+".tmp")
+ self._rollbackStoreFile(*args)
self.cache = {}
self.dirtynodes = {}
self.newnodes = {}
self.cache = {}
self.dirtynodes = {}
self.newnodes = {}
#
#$Log: not supported by cvs2svn $
#
#$Log: not supported by cvs2svn $
+#Revision 1.42 2002/07/10 06:21:38 richard
+#Be extra safe
+#
+#Revision 1.41 2002/07/10 00:21:45 richard
+#explicit database closing
+#
+#Revision 1.40 2002/07/09 04:19:09 richard
+#Added reindex command to roundup-admin.
+#Fixed reindex on first access.
+#Also fixed reindexing of entries that change.
+#
+#Revision 1.39 2002/07/09 03:02:52 richard
+#More indexer work:
+#- all String properties may now be indexed too. Currently there's a bit of
+# "issue" specific code in the actual searching which needs to be
+# addressed. In a nutshell:
+# + pass 'indexme="yes"' as a String() property initialisation arg, eg:
+# file = FileClass(db, "file", name=String(), type=String(),
+# comment=String(indexme="yes"))
+# + the comment will then be indexed and be searchable, with the results
+# related back to the issue that the file is linked to
+#- as a result of this work, the FileClass has a default MIME type that may
+# be overridden in a subclass, or by the use of a "type" property as is
+# done in the default templates.
+#- the regeneration of the indexes (if necessary) is done once the schema is
+# set up in the dbinit.
+#
+#Revision 1.38 2002/07/08 06:58:15 richard
+#cleaned up the indexer code:
+# - it splits more words out (much simpler, faster splitter)
+# - removed code we'll never use (roundup.roundup_indexer has the full
+# implementation, and replaces roundup.indexer)
+# - only index text/plain and rfc822/message (ideas for other text formats to
+# index are welcome)
+# - added simple unit test for indexer. Needs more tests for regression.
+#
+#Revision 1.37 2002/06/20 23:52:35 richard
+#More informative error message
+#
+#Revision 1.36 2002/06/19 03:07:19 richard
+#Moved the file storage commit into blobfiles where it belongs.
+#
+#Revision 1.35 2002/05/25 07:16:24 rochecompaan
+#Merged search_indexing-branch with HEAD
+#
+#Revision 1.34 2002/05/15 06:21:21 richard
+# . node caching now works, and gives a small boost in performance
+#
+#As a part of this, I cleaned up the DEBUG output and implemented TRACE
+#output (HYPERDBTRACE='file to trace to') with checkpoints at the start of
+#CGI requests. Run roundup with python -O to skip all the DEBUG/TRACE stuff
+#(using if __debug__ which is compiled out with -O)
+#
+#Revision 1.33 2002/04/24 10:38:26 rochecompaan
+#All database files are now created group readable and writable.
+#
+#Revision 1.32 2002/04/15 23:25:15 richard
+#. node ids are now generated from a lockable store - no more race conditions
+#
+#We're using the portalocker code by Jonathan Feinberg that was contributed
+#to the ASPN Python cookbook. This gives us locking across Unix and Windows.
+#
+#Revision 1.31 2002/04/03 05:54:31 richard
+#Fixed serialisation problem by moving the serialisation step out of the
+#hyperdb.Class (get, set) into the hyperdb.Database.
+#
+#Also fixed htmltemplate after the showid changes I made yesterday.
+#
+#Unit tests for all of the above written.
+#
+#Revision 1.30.2.1 2002/04/03 11:55:57 rochecompaan
+# . Added feature #526730 - search for messages capability
+#
+#Revision 1.30 2002/02/27 03:40:59 richard
+#Ran it through pychecker, made fixes
+#
#Revision 1.29 2002/02/25 14:34:31 grubert
# . use blobfiles in back_anydbm which is used in back_bsddb.
# change test_db as dirlist does not work for subdirectories.
#Revision 1.29 2002/02/25 14:34:31 grubert
# . use blobfiles in back_anydbm which is used in back_bsddb.
# change test_db as dirlist does not work for subdirectories.