roundup/backends/back_anydbm.py

   1 #
   2 # Copyright (c) 2001 Bizar Software Pty Ltd (http://www.bizarsoftware.com.au/)
   3 # This module is free software, and you may redistribute it and/or modify
   4 # under the same terms as Python, so long as this copyright message and
   5 # disclaimer are retained in their original form.
   6 #
   7 # IN NO EVENT SHALL BIZAR SOFTWARE PTY LTD BE LIABLE TO ANY PARTY FOR
   8 # DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING
   9 # OUT OF THE USE OF THIS CODE, EVEN IF THE AUTHOR HAS BEEN ADVISED OF THE
  10 # POSSIBILITY OF SUCH DAMAGE.
  11 #
  12 # BIZAR SOFTWARE PTY LTD SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  13 # BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  14 # FOR A PARTICULAR PURPOSE.  THE CODE PROVIDED HEREUNDER IS ON AN "AS IS"
  15 # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
  16 # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  17 #
  18 #$Id: back_anydbm.py,v 1.31 2002-04-03 05:54:31 richard Exp $
  19 '''
  20 This module defines a backend that saves the hyperdatabase in a database
  21 chosen by anydbm. It is guaranteed to always be available in python
  22 versions >2.1.1 (the dumbdbm fallback in 2.1.1 and earlier has several
  23 serious bugs, and is not available)
  24 '''
  25
  26 import whichdb, anydbm, os, marshal
  27 from roundup import hyperdb, date
  28 from blobfiles import FileStorage
  29
  30 #
  31 # Now the database
  32 #
  33 class Database(FileStorage, hyperdb.Database):
  34     """A database for storing records containing flexible data types.
  35
  36     Transaction stuff TODO:
  37         . check the timestamp of the class file and nuke the cache if it's
  38           modified. Do some sort of conflict checking on the dirty stuff.
  39         . perhaps detect write collisions (related to above)?
  40
  41     """
  42     def __init__(self, config, journaltag=None):
  43         """Open a hyperdatabase given a specifier to some storage.
  44
  45         The 'storagelocator' is obtained from config.DATABASE.
  46         The meaning of 'storagelocator' depends on the particular
  47         implementation of the hyperdatabase.  It could be a file name,
  48         a directory path, a socket descriptor for a connection to a
  49         database over the network, etc.
  50
  51         The 'journaltag' is a token that will be attached to the journal
  52         entries for any edits done on the database.  If 'journaltag' is
  53         None, the database is opened in read-only mode: the Class.create(),
  54         Class.set(), and Class.retire() methods are disabled.
  55         """
  56         self.config, self.journaltag = config, journaltag
  57         self.dir = config.DATABASE
  58         self.classes = {}
  59         self.cache = {}         # cache of nodes loaded or created
  60         self.dirtynodes = {}    # keep track of the dirty nodes by class
  61         self.newnodes = {}      # keep track of the new nodes by class
  62         self.transactions = []
  63
  64     def __repr__(self):
  65         return '<back_anydbm instance at %x>'%id(self)
  66
  67     #
  68     # Classes
  69     #
  70     def __getattr__(self, classname):
  71         """A convenient way of calling self.getclass(classname)."""
  72         if self.classes.has_key(classname):
  73             if hyperdb.DEBUG:
  74                 print '__getattr__', (self, classname)
  75             return self.classes[classname]
  76         raise AttributeError, classname
  77
  78     def addclass(self, cl):
  79         if hyperdb.DEBUG:
  80             print 'addclass', (self, cl)
  81         cn = cl.classname
  82         if self.classes.has_key(cn):
  83             raise ValueError, cn
  84         self.classes[cn] = cl
  85
  86     def getclasses(self):
  87         """Return a list of the names of all existing classes."""
  88         if hyperdb.DEBUG:
  89             print 'getclasses', (self,)
  90         l = self.classes.keys()
  91         l.sort()
  92         return l
  93
  94     def getclass(self, classname):
  95         """Get the Class object representing a particular class.
  96
  97         If 'classname' is not a valid class name, a KeyError is raised.
  98         """
  99         if hyperdb.DEBUG:
 100             print 'getclass', (self, classname)
 101         return self.classes[classname]
 102
 103     #
 104     # Class DBs
 105     #
 106     def clear(self):
 107         '''Delete all database contents
 108         '''
 109         if hyperdb.DEBUG:
 110             print 'clear', (self,)
 111         for cn in self.classes.keys():
 112             for dummy in 'nodes', 'journals':
 113                 path = os.path.join(self.dir, 'journals.%s'%cn)
 114                 if os.path.exists(path):
 115                     os.remove(path)
 116                 elif os.path.exists(path+'.db'):    # dbm appends .db
 117                     os.remove(path+'.db')
 118
 119     def getclassdb(self, classname, mode='r'):
 120         ''' grab a connection to the class db that will be used for
 121             multiple actions
 122         '''
 123         if hyperdb.DEBUG:
 124             print 'getclassdb', (self, classname, mode)
 125         return self._opendb('nodes.%s'%classname, mode)
 126
 127     def _opendb(self, name, mode):
 128         '''Low-level database opener that gets around anydbm/dbm
 129            eccentricities.
 130         '''
 131         if hyperdb.DEBUG:
 132             print '_opendb', (self, name, mode)
 133         # determine which DB wrote the class file
 134         db_type = ''
 135         path = os.path.join(os.getcwd(), self.dir, name)
 136         if os.path.exists(path):
 137             db_type = whichdb.whichdb(path)
 138             if not db_type:
 139                 raise hyperdb.DatabaseError, "Couldn't identify database type"
 140         elif os.path.exists(path+'.db'):
 141             # if the path ends in '.db', it's a dbm database, whether
 142             # anydbm says it's dbhash or not!
 143             db_type = 'dbm'
 144
 145         # new database? let anydbm pick the best dbm
 146         if not db_type:
 147             if hyperdb.DEBUG:
 148                 print "_opendb anydbm.open(%r, 'n')"%path
 149             return anydbm.open(path, 'n')
 150
 151         # open the database with the correct module
 152         try:
 153             dbm = __import__(db_type)
 154         except ImportError:
 155             raise hyperdb.DatabaseError, \
 156                 "Couldn't open database - the required module '%s'"\
 157                 "is not available"%db_type
 158         if hyperdb.DEBUG:
 159             print "_opendb %r.open(%r, %r)"%(db_type, path, mode)
 160         return dbm.open(path, mode)
 161
 162     #
 163     # Nodes
 164     #
 165     def addnode(self, classname, nodeid, node):
 166         ''' add the specified node to its class's db
 167         '''
 168         if hyperdb.DEBUG:
 169             print 'addnode', (self, classname, nodeid, node)
 170         self.newnodes.setdefault(classname, {})[nodeid] = 1
 171         self.cache.setdefault(classname, {})[nodeid] = node
 172         self.savenode(classname, nodeid, node)
 173
 174     def setnode(self, classname, nodeid, node):
 175         ''' change the specified node
 176         '''
 177         if hyperdb.DEBUG:
 178             print 'setnode', (self, classname, nodeid, node)
 179         self.dirtynodes.setdefault(classname, {})[nodeid] = 1
 180
 181         # can't set without having already loaded the node
 182         self.cache[classname][nodeid] = node
 183         self.savenode(classname, nodeid, node)
 184
 185     def savenode(self, classname, nodeid, node):
 186         ''' perform the saving of data specified by the set/addnode
 187         '''
 188         if hyperdb.DEBUG:
 189             print 'savenode', (self, classname, nodeid, node)
 190         self.transactions.append((self._doSaveNode, (classname, nodeid, node)))
 191
 192     def getnode(self, classname, nodeid, db=None, cache=1):
 193         ''' get a node from the database
 194         '''
 195         if hyperdb.DEBUG:
 196             print 'getnode', (self, classname, nodeid, db)
 197         if cache:
 198             # try the cache
 199             cache = self.cache.setdefault(classname, {})
 200             if cache.has_key(nodeid):
 201                 return cache[nodeid]
 202
 203         # get from the database and save in the cache
 204         if db is None:
 205             db = self.getclassdb(classname)
 206         if not db.has_key(nodeid):
 207             raise IndexError, "no such %s %s"%(classname, nodeid)
 208
 209         # decode
 210         res = marshal.loads(db[nodeid])
 211
 212         # reverse the serialisation
 213         res = self.unserialise(classname, res)
 214
 215         # store off in the cache
 216         if cache:
 217             cache[nodeid] = res
 218
 219         return res
 220
 221     def hasnode(self, classname, nodeid, db=None):
 222         ''' determine if the database has a given node
 223         '''
 224         if hyperdb.DEBUG:
 225             print 'hasnode', (self, classname, nodeid, db)
 226         # try the cache
 227         cache = self.cache.setdefault(classname, {})
 228         if cache.has_key(nodeid):
 229             return 1
 230
 231         # not in the cache - check the database
 232         if db is None:
 233             db = self.getclassdb(classname)
 234         res = db.has_key(nodeid)
 235         return res
 236
 237     def countnodes(self, classname, db=None):
 238         if hyperdb.DEBUG:
 239             print 'countnodes', (self, classname, db)
 240         # include the new nodes not saved to the DB yet
 241         count = len(self.newnodes.get(classname, {}))
 242
 243         # and count those in the DB
 244         if db is None:
 245             db = self.getclassdb(classname)
 246         count = count + len(db.keys())
 247         return count
 248
 249     def getnodeids(self, classname, db=None):
 250         if hyperdb.DEBUG:
 251             print 'getnodeids', (self, classname, db)
 252         # start off with the new nodes
 253         res = self.newnodes.get(classname, {}).keys()
 254
 255         if db is None:
 256             db = self.getclassdb(classname)
 257         res = res + db.keys()
 258         return res
 259
 260
 261     #
 262     # Files - special node properties
 263     # inherited from FileStorage
 264
 265     #
 266     # Journal
 267     #
 268     def addjournal(self, classname, nodeid, action, params):
 269         ''' Journal the Action
 270         'action' may be:
 271
 272             'create' or 'set' -- 'params' is a dictionary of property values
 273             'link' or 'unlink' -- 'params' is (classname, nodeid, propname)
 274             'retire' -- 'params' is None
 275         '''
 276         if hyperdb.DEBUG:
 277             print 'addjournal', (self, classname, nodeid, action, params)
 278         self.transactions.append((self._doSaveJournal, (classname, nodeid,
 279             action, params)))
 280
 281     def getjournal(self, classname, nodeid):
 282         ''' get the journal for id
 283         '''
 284         if hyperdb.DEBUG:
 285             print 'getjournal', (self, classname, nodeid)
 286         # attempt to open the journal - in some rare cases, the journal may
 287         # not exist
 288         try:
 289             db = self._opendb('journals.%s'%classname, 'r')
 290         except anydbm.error, error:
 291             if str(error) == "need 'c' or 'n' flag to open new db": return []
 292             elif error.args[0] != 2: raise
 293             return []
 294         journal = marshal.loads(db[nodeid])
 295         res = []
 296         for entry in journal:
 297             (nodeid, date_stamp, user, action, params) = entry
 298             date_obj = date.Date(date_stamp)
 299             res.append((nodeid, date_obj, user, action, params))
 300         return res
 301
 302     def pack(self, pack_before):
 303         ''' delete all journal entries before 'pack_before' '''
 304         if hyperdb.DEBUG:
 305             print 'packjournal', (self, pack_before)
 306
 307         pack_before = pack_before.get_tuple()
 308
 309         classes = self.getclasses()
 310
 311         # TODO: factor this out to method - we're already doing it in
 312         # _opendb.
 313         db_type = ''
 314         path = os.path.join(os.getcwd(), self.dir, classes[0])
 315         if os.path.exists(path):
 316             db_type = whichdb.whichdb(path)
 317             if not db_type:
 318                 raise hyperdb.DatabaseError, "Couldn't identify database type"
 319         elif os.path.exists(path+'.db'):
 320             db_type = 'dbm'
 321
 322         for classname in classes:
 323             db_name = 'journals.%s'%classname
 324             db = self._opendb(db_name, 'w')
 325
 326             for key in db.keys():
 327                 journal = marshal.loads(db[key])
 328                 l = []
 329                 last_set_entry = None
 330                 for entry in journal:
 331                     (nodeid, date_stamp, self.journaltag, action,
 332                         params) = entry
 333                     if date_stamp > pack_before or action == 'create':
 334                         l.append(entry)
 335                     elif action == 'set':
 336                         # grab the last set entry to keep information on
 337                         # activity
 338                         last_set_entry = entry
 339                 if last_set_entry:
 340                     date_stamp = last_set_entry[1]
 341                     # if the last set entry was made after the pack date
 342                     # then it is already in the list
 343                     if date_stamp < pack_before:
 344                         l.append(last_set_entry)
 345                 db[key] = marshal.dumps(l)
 346             if db_type == 'gdbm':
 347                 db.reorganize()
 348             db.close()
 349
 350
 351     #
 352     # Basic transaction support
 353     #
 354     def commit(self):
 355         ''' Commit the current transactions.
 356         '''
 357         if hyperdb.DEBUG:
 358             print 'commit', (self,)
 359         # TODO: lock the DB
 360
 361         # keep a handle to all the database files opened
 362         self.databases = {}
 363
 364         # now, do all the transactions
 365         for method, args in self.transactions:
 366             method(*args)
 367
 368         # now close all the database files
 369         for db in self.databases.values():
 370             db.close()
 371         del self.databases
 372         # TODO: unlock the DB
 373
 374         # all transactions committed, back to normal
 375         self.cache = {}
 376         self.dirtynodes = {}
 377         self.newnodes = {}
 378         self.transactions = []
 379
 380     def _doSaveNode(self, classname, nodeid, node):
 381         if hyperdb.DEBUG:
 382             print '_doSaveNode', (self, classname, nodeid, node)
 383
 384         # get the database handle
 385         db_name = 'nodes.%s'%classname
 386         if self.databases.has_key(db_name):
 387             db = self.databases[db_name]
 388         else:
 389             db = self.databases[db_name] = self.getclassdb(classname, 'c')
 390
 391         # now save the marshalled data
 392         db[nodeid] = marshal.dumps(self.serialise(classname, node))
 393
 394     def _doSaveJournal(self, classname, nodeid, action, params):
 395         # serialise first
 396         if action in ('set', 'create'):
 397             params = self.serialise(classname, params)
 398
 399         # create the journal entry
 400         entry = (nodeid, date.Date().get_tuple(), self.journaltag, action,
 401             params)
 402
 403         if hyperdb.DEBUG:
 404             print '_doSaveJournal', entry
 405
 406         # get the database handle
 407         db_name = 'journals.%s'%classname
 408         if self.databases.has_key(db_name):
 409             db = self.databases[db_name]
 410         else:
 411             db = self.databases[db_name] = self._opendb(db_name, 'c')
 412
 413         # now insert the journal entry
 414         if db.has_key(nodeid):
 415             # append to existing
 416             s = db[nodeid]
 417             l = marshal.loads(s)
 418             l.append(entry)
 419         else:
 420             l = [entry]
 421
 422         db[nodeid] = marshal.dumps(l)
 423
 424     def _doStoreFile(self, name, **databases):
 425         # the file is currently ".tmp" - move it to its real name to commit
 426         os.rename(name+".tmp", name)
 427
 428     def rollback(self):
 429         ''' Reverse all actions from the current transaction.
 430         '''
 431         if hyperdb.DEBUG:
 432             print 'rollback', (self, )
 433         for method, args in self.transactions:
 434             # delete temporary files
 435             if method == self._doStoreFile:
 436                 if os.path.exists(args[0]+".tmp"):
 437                     os.remove(args[0]+".tmp")
 438         self.cache = {}
 439         self.dirtynodes = {}
 440         self.newnodes = {}
 441         self.transactions = []
 442
 443 #
 444 #$Log: not supported by cvs2svn $
 445 #Revision 1.30  2002/02/27 03:40:59  richard
 446 #Ran it through pychecker, made fixes
 447 #
 448 #Revision 1.29  2002/02/25 14:34:31  grubert
 449 # . use blobfiles in back_anydbm which is used in back_bsddb.
 450 #   change test_db as dirlist does not work for subdirectories.
 451 #   ATTENTION: blobfiles now creates subdirectories for files.
 452 #
 453 #Revision 1.28  2002/02/16 09:14:17  richard
 454 # . #514854 ] History: "User" is always ticket creator
 455 #
 456 #Revision 1.27  2002/01/22 07:21:13  richard
 457 #. fixed back_bsddb so it passed the journal tests
 458 #
 459 #... it didn't seem happy using the back_anydbm _open method, which is odd.
 460 #Yet another occurrance of whichdb not being able to recognise older bsddb
 461 #databases. Yadda yadda. Made the HYPERDBDEBUG stuff more sane in the
 462 #process.
 463 #
 464 #Revision 1.26  2002/01/22 05:18:38  rochecompaan
 465 #last_set_entry was referenced before assignment
 466 #
 467 #Revision 1.25  2002/01/22 05:06:08  rochecompaan
 468 #We need to keep the last 'set' entry in the journal to preserve
 469 #information on 'activity' for nodes.
 470 #
 471 #Revision 1.24  2002/01/21 16:33:20  rochecompaan
 472 #You can now use the roundup-admin tool to pack the database
 473 #
 474 #Revision 1.23  2002/01/18 04:32:04  richard
 475 #Rollback was breaking because a message hadn't actually been written to the file. Needs
 476 #more investigation.
 477 #
 478 #Revision 1.22  2002/01/14 02:20:15  richard
 479 # . changed all config accesses so they access either the instance or the
 480 #   config attriubute on the db. This means that all config is obtained from
 481 #   instance_config instead of the mish-mash of classes. This will make
 482 #   switching to a ConfigParser setup easier too, I hope.
 483 #
 484 #At a minimum, this makes migration a _little_ easier (a lot easier in the
 485 #0.5.0 switch, I hope!)
 486 #
 487 #Revision 1.21  2002/01/02 02:31:38  richard
 488 #Sorry for the huge checkin message - I was only intending to implement #496356
 489 #but I found a number of places where things had been broken by transactions:
 490 # . modified ROUNDUPDBSENDMAILDEBUG to be SENDMAILDEBUG and hold a filename
 491 #   for _all_ roundup-generated smtp messages to be sent to.
 492 # . the transaction cache had broken the roundupdb.Class set() reactors
 493 # . newly-created author users in the mailgw weren't being committed to the db
 494 #
 495 #Stuff that made it into CHANGES.txt (ie. the stuff I was actually working
 496 #on when I found that stuff :):
 497 # . #496356 ] Use threading in messages
 498 # . detectors were being registered multiple times
 499 # . added tests for mailgw
 500 # . much better attaching of erroneous messages in the mail gateway
 501 #
 502 #Revision 1.20  2001/12/18 15:30:34  rochecompaan
 503 #Fixed bugs:
 504 # .  Fixed file creation and retrieval in same transaction in anydbm
 505 #    backend
 506 # .  Cgi interface now renders new issue after issue creation
 507 # .  Could not set issue status to resolved through cgi interface
 508 # .  Mail gateway was changing status back to 'chatting' if status was
 509 #    omitted as an argument
 510 #
 511 #Revision 1.19  2001/12/17 03:52:48  richard
 512 #Implemented file store rollback. As a bonus, the hyperdb is now capable of
 513 #storing more than one file per node - if a property name is supplied,
 514 #the file is called designator.property.
 515 #I decided not to migrate the existing files stored over to the new naming
 516 #scheme - the FileClass just doesn't specify the property name.
 517 #
 518 #Revision 1.18  2001/12/16 10:53:38  richard
 519 #take a copy of the node dict so that the subsequent set
 520 #operation doesn't modify the oldvalues structure
 521 #
 522 #Revision 1.17  2001/12/14 23:42:57  richard
 523 #yuck, a gdbm instance tests false :(
 524 #I've left the debugging code in - it should be removed one day if we're ever
 525 #_really_ anal about performace :)
 526 #
 527 #Revision 1.16  2001/12/12 03:23:14  richard
 528 #Cor blimey this anydbm/whichdb stuff is yecchy. Turns out that whichdb
 529 #incorrectly identifies a dbm file as a dbhash file on my system. This has
 530 #been submitted to the python bug tracker as issue #491888:
 531 #https://sourceforge.net/tracker/index.php?func=detail&aid=491888&group_id=5470&atid=105470
 532 #
 533 #Revision 1.15  2001/12/12 02:30:51  richard
 534 #I fixed the problems with people whose anydbm was using the dbm module at the
 535 #backend. It turns out the dbm module modifies the file name to append ".db"
 536 #and my check to determine if we're opening an existing or new db just
 537 #tested os.path.exists() on the filename. Well, no longer! We now perform a
 538 #much better check _and_ cope with the anydbm implementation module changing
 539 #too!
 540 #I also fixed the backends __init__ so only ImportError is squashed.
 541 #
 542 #Revision 1.14  2001/12/10 22:20:01  richard
 543 #Enabled transaction support in the bsddb backend. It uses the anydbm code
 544 #where possible, only replacing methods where the db is opened (it uses the
 545 #btree opener specifically.)
 546 #Also cleaned up some change note generation.
 547 #Made the backends package work with pydoc too.
 548 #
 549 #Revision 1.13  2001/12/02 05:06:16  richard
 550 #. We now use weakrefs in the Classes to keep the database reference, so
 551 #  the close() method on the database is no longer needed.
 552 #  I bumped the minimum python requirement up to 2.1 accordingly.
 553 #. #487480 ] roundup-server
 554 #. #487476 ] INSTALL.txt
 555 #
 556 #I also cleaned up the change message / post-edit stuff in the cgi client.
 557 #There's now a clearly marked "TODO: append the change note" where I believe
 558 #the change note should be added there. The "changes" list will obviously
 559 #have to be modified to be a dict of the changes, or somesuch.
 560 #
 561 #More testing needed.
 562 #
 563 #Revision 1.12  2001/12/01 07:17:50  richard
 564 #. We now have basic transaction support! Information is only written to
 565 #  the database when the commit() method is called. Only the anydbm
 566 #  backend is modified in this way - neither of the bsddb backends have been.
 567 #  The mail, admin and cgi interfaces all use commit (except the admin tool
 568 #  doesn't have a commit command, so interactive users can't commit...)
 569 #. Fixed login/registration forwarding the user to the right page (or not,
 570 #  on a failure)
 571 #
 572 #Revision 1.11  2001/11/21 02:34:18  richard
 573 #Added a target version field to the extended issue schema
 574 #
 575 #Revision 1.10  2001/10/09 23:58:10  richard
 576 #Moved the data stringification up into the hyperdb.Class class' get, set
 577 #and create methods. This means that the data is also stringified for the
 578 #journal call, and removes duplication of code from the backends. The
 579 #backend code now only sees strings.
 580 #
 581 #Revision 1.9  2001/10/09 07:25:59  richard
 582 #Added the Password property type. See "pydoc roundup.password" for
 583 #implementation details. Have updated some of the documentation too.
 584 #
 585 #Revision 1.8  2001/09/29 13:27:00  richard
 586 #CGI interfaces now spit up a top-level index of all the instances they can
 587 #serve.
 588 #
 589 #Revision 1.7  2001/08/12 06:32:36  richard
 590 #using isinstance(blah, Foo) now instead of isFooType
 591 #
 592 #Revision 1.6  2001/08/07 00:24:42  richard
 593 #stupid typo
 594 #
 595 #Revision 1.5  2001/08/07 00:15:51  richard
 596 #Added the copyright/license notice to (nearly) all files at request of
 597 #Bizar Software.
 598 #
 599 #Revision 1.4  2001/07/30 01:41:36  richard
 600 #Makes schema changes mucho easier.
 601 #
 602 #Revision 1.3  2001/07/25 01:23:07  richard
 603 #Added the Roundup spec to the new documentation directory.
 604 #
 605 #Revision 1.2  2001/07/23 08:20:44  richard
 606 #Moved over to using marshal in the bsddb and anydbm backends.
 607 #roundup-admin now has a "freshen" command that'll load/save all nodes (not
 608 # retired - mod hyperdb.Class.list() so it lists retired nodes)
 609 #
 610 #