roundup/backends/back_anydbm.py

   1 #
   2 # Copyright (c) 2001 Bizar Software Pty Ltd (http://www.bizarsoftware.com.au/)
   3 # This module is free software, and you may redistribute it and/or modify
   4 # under the same terms as Python, so long as this copyright message and
   5 # disclaimer are retained in their original form.
   6 #
   7 # IN NO EVENT SHALL BIZAR SOFTWARE PTY LTD BE LIABLE TO ANY PARTY FOR
   8 # DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING
   9 # OUT OF THE USE OF THIS CODE, EVEN IF THE AUTHOR HAS BEEN ADVISED OF THE
  10 # POSSIBILITY OF SUCH DAMAGE.
  11 #
  12 # BIZAR SOFTWARE PTY LTD SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
  13 # BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  14 # FOR A PARTICULAR PURPOSE.  THE CODE PROVIDED HEREUNDER IS ON AN "AS IS"
  15 # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
  16 # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  17 #
  18 #$Id: back_anydbm.py,v 1.29 2002-02-25 14:34:31 grubert Exp $
  19 '''
  20 This module defines a backend that saves the hyperdatabase in a database
  21 chosen by anydbm. It is guaranteed to always be available in python
  22 versions >2.1.1 (the dumbdbm fallback in 2.1.1 and earlier has several
  23 serious bugs, and is not available)
  24 '''
  25
  26 import whichdb, anydbm, os, marshal
  27 from roundup import hyperdb, date, password
  28 from blobfiles import FileStorage
  29
  30 #
  31 # Now the database
  32 #
  33 class Database(FileStorage, hyperdb.Database):
  34     """A database for storing records containing flexible data types.
  35
  36     Transaction stuff TODO:
  37         . check the timestamp of the class file and nuke the cache if it's
  38           modified. Do some sort of conflict checking on the dirty stuff.
  39         . perhaps detect write collisions (related to above)?
  40
  41     """
  42     def __init__(self, config, journaltag=None):
  43         """Open a hyperdatabase given a specifier to some storage.
  44
  45         The 'storagelocator' is obtained from config.DATABASE.
  46         The meaning of 'storagelocator' depends on the particular
  47         implementation of the hyperdatabase.  It could be a file name,
  48         a directory path, a socket descriptor for a connection to a
  49         database over the network, etc.
  50
  51         The 'journaltag' is a token that will be attached to the journal
  52         entries for any edits done on the database.  If 'journaltag' is
  53         None, the database is opened in read-only mode: the Class.create(),
  54         Class.set(), and Class.retire() methods are disabled.
  55         """
  56         self.config, self.journaltag = config, journaltag
  57         self.dir = config.DATABASE
  58         self.classes = {}
  59         self.cache = {}         # cache of nodes loaded or created
  60         self.dirtynodes = {}    # keep track of the dirty nodes by class
  61         self.newnodes = {}      # keep track of the new nodes by class
  62         self.transactions = []
  63
  64     def __repr__(self):
  65         return '<back_anydbm instance at %x>'%id(self)
  66
  67     #
  68     # Classes
  69     #
  70     def __getattr__(self, classname):
  71         """A convenient way of calling self.getclass(classname)."""
  72         if self.classes.has_key(classname):
  73             if hyperdb.DEBUG:
  74                 print '__getattr__', (self, classname)
  75             return self.classes[classname]
  76         raise AttributeError, classname
  77
  78     def addclass(self, cl):
  79         if hyperdb.DEBUG:
  80             print 'addclass', (self, cl)
  81         cn = cl.classname
  82         if self.classes.has_key(cn):
  83             raise ValueError, cn
  84         self.classes[cn] = cl
  85
  86     def getclasses(self):
  87         """Return a list of the names of all existing classes."""
  88         if hyperdb.DEBUG:
  89             print 'getclasses', (self,)
  90         l = self.classes.keys()
  91         l.sort()
  92         return l
  93
  94     def getclass(self, classname):
  95         """Get the Class object representing a particular class.
  96
  97         If 'classname' is not a valid class name, a KeyError is raised.
  98         """
  99         if hyperdb.DEBUG:
 100             print 'getclass', (self, classname)
 101         return self.classes[classname]
 102
 103     #
 104     # Class DBs
 105     #
 106     def clear(self):
 107         '''Delete all database contents
 108         '''
 109         if hyperdb.DEBUG:
 110             print 'clear', (self,)
 111         for cn in self.classes.keys():
 112             for type in 'nodes', 'journals':
 113                 path = os.path.join(self.dir, 'journals.%s'%cn)
 114                 if os.path.exists(path):
 115                     os.remove(path)
 116                 elif os.path.exists(path+'.db'):    # dbm appends .db
 117                     os.remove(path+'.db')
 118
 119     def getclassdb(self, classname, mode='r'):
 120         ''' grab a connection to the class db that will be used for
 121             multiple actions
 122         '''
 123         if hyperdb.DEBUG:
 124             print 'getclassdb', (self, classname, mode)
 125         return self._opendb('nodes.%s'%classname, mode)
 126
 127     def _opendb(self, name, mode):
 128         '''Low-level database opener that gets around anydbm/dbm
 129            eccentricities.
 130         '''
 131         if hyperdb.DEBUG:
 132             print '_opendb', (self, name, mode)
 133         # determine which DB wrote the class file
 134         db_type = ''
 135         path = os.path.join(os.getcwd(), self.dir, name)
 136         if os.path.exists(path):
 137             db_type = whichdb.whichdb(path)
 138             if not db_type:
 139                 raise hyperdb.DatabaseError, "Couldn't identify database type"
 140         elif os.path.exists(path+'.db'):
 141             # if the path ends in '.db', it's a dbm database, whether
 142             # anydbm says it's dbhash or not!
 143             db_type = 'dbm'
 144
 145         # new database? let anydbm pick the best dbm
 146         if not db_type:
 147             if hyperdb.DEBUG:
 148                 print "_opendb anydbm.open(%r, 'n')"%path
 149             return anydbm.open(path, 'n')
 150
 151         # open the database with the correct module
 152         try:
 153             dbm = __import__(db_type)
 154         except ImportError:
 155             raise hyperdb.DatabaseError, \
 156                 "Couldn't open database - the required module '%s'"\
 157                 "is not available"%db_type
 158         if hyperdb.DEBUG:
 159             print "_opendb %r.open(%r, %r)"%(db_type, path, mode)
 160         return dbm.open(path, mode)
 161
 162     #
 163     # Nodes
 164     #
 165     def addnode(self, classname, nodeid, node):
 166         ''' add the specified node to its class's db
 167         '''
 168         if hyperdb.DEBUG:
 169             print 'addnode', (self, classname, nodeid, node)
 170         self.newnodes.setdefault(classname, {})[nodeid] = 1
 171         self.cache.setdefault(classname, {})[nodeid] = node
 172         self.savenode(classname, nodeid, node)
 173
 174     def setnode(self, classname, nodeid, node):
 175         ''' change the specified node
 176         '''
 177         if hyperdb.DEBUG:
 178             print 'setnode', (self, classname, nodeid, node)
 179         self.dirtynodes.setdefault(classname, {})[nodeid] = 1
 180         # can't set without having already loaded the node
 181         self.cache[classname][nodeid] = node
 182         self.savenode(classname, nodeid, node)
 183
 184     def savenode(self, classname, nodeid, node):
 185         ''' perform the saving of data specified by the set/addnode
 186         '''
 187         if hyperdb.DEBUG:
 188             print 'savenode', (self, classname, nodeid, node)
 189         self.transactions.append((self._doSaveNode, (classname, nodeid, node)))
 190
 191     def getnode(self, classname, nodeid, db=None, cache=1):
 192         ''' get a node from the database
 193         '''
 194         if hyperdb.DEBUG:
 195             print 'getnode', (self, classname, nodeid, db)
 196         if cache:
 197             # try the cache
 198             cache = self.cache.setdefault(classname, {})
 199             if cache.has_key(nodeid):
 200                 return cache[nodeid]
 201
 202         # get from the database and save in the cache
 203         if db is None:
 204             db = self.getclassdb(classname)
 205         if not db.has_key(nodeid):
 206             raise IndexError, "no such %s %s"%(classname, nodeid)
 207         res = marshal.loads(db[nodeid])
 208         if cache:
 209             cache[nodeid] = res
 210         return res
 211
 212     def hasnode(self, classname, nodeid, db=None):
 213         ''' determine if the database has a given node
 214         '''
 215         if hyperdb.DEBUG:
 216             print 'hasnode', (self, classname, nodeid, db)
 217         # try the cache
 218         cache = self.cache.setdefault(classname, {})
 219         if cache.has_key(nodeid):
 220             return 1
 221
 222         # not in the cache - check the database
 223         if db is None:
 224             db = self.getclassdb(classname)
 225         res = db.has_key(nodeid)
 226         return res
 227
 228     def countnodes(self, classname, db=None):
 229         if hyperdb.DEBUG:
 230             print 'countnodes', (self, classname, db)
 231         # include the new nodes not saved to the DB yet
 232         count = len(self.newnodes.get(classname, {}))
 233
 234         # and count those in the DB
 235         if db is None:
 236             db = self.getclassdb(classname)
 237         count = count + len(db.keys())
 238         return count
 239
 240     def getnodeids(self, classname, db=None):
 241         if hyperdb.DEBUG:
 242             print 'getnodeids', (self, classname, db)
 243         # start off with the new nodes
 244         res = self.newnodes.get(classname, {}).keys()
 245
 246         if db is None:
 247             db = self.getclassdb(classname)
 248         res = res + db.keys()
 249         return res
 250
 251
 252     #
 253     # Files - special node properties
 254     # inherited from FileStorage
 255
 256     #
 257     # Journal
 258     #
 259     def addjournal(self, classname, nodeid, action, params):
 260         ''' Journal the Action
 261         'action' may be:
 262
 263             'create' or 'set' -- 'params' is a dictionary of property values
 264             'link' or 'unlink' -- 'params' is (classname, nodeid, propname)
 265             'retire' -- 'params' is None
 266         '''
 267         if hyperdb.DEBUG:
 268             print 'addjournal', (self, classname, nodeid, action, params)
 269         self.transactions.append((self._doSaveJournal, (classname, nodeid,
 270             action, params)))
 271
 272     def getjournal(self, classname, nodeid):
 273         ''' get the journal for id
 274         '''
 275         if hyperdb.DEBUG:
 276             print 'getjournal', (self, classname, nodeid)
 277         # attempt to open the journal - in some rare cases, the journal may
 278         # not exist
 279         try:
 280             db = self._opendb('journals.%s'%classname, 'r')
 281         except anydbm.error, error:
 282             if str(error) == "need 'c' or 'n' flag to open new db": return []
 283             elif error.args[0] != 2: raise
 284             return []
 285         journal = marshal.loads(db[nodeid])
 286         res = []
 287         for entry in journal:
 288             (nodeid, date_stamp, user, action, params) = entry
 289             date_obj = date.Date(date_stamp)
 290             res.append((nodeid, date_obj, user, action, params))
 291         return res
 292
 293     def pack(self, pack_before):
 294         ''' delete all journal entries before 'pack_before' '''
 295         if hyperdb.DEBUG:
 296             print 'packjournal', (self, pack_before)
 297
 298         pack_before = pack_before.get_tuple()
 299
 300         classes = self.getclasses()
 301
 302         # TODO: factor this out to method - we're already doing it in
 303         # _opendb.
 304         db_type = ''
 305         path = os.path.join(os.getcwd(), self.dir, classes[0])
 306         if os.path.exists(path):
 307             db_type = whichdb.whichdb(path)
 308             if not db_type:
 309                 raise hyperdb.DatabaseError, "Couldn't identify database type"
 310         elif os.path.exists(path+'.db'):
 311             db_type = 'dbm'
 312
 313         for classname in classes:
 314             db_name = 'journals.%s'%classname
 315             db = self._opendb(db_name, 'w')
 316
 317             for key in db.keys():
 318                 journal = marshal.loads(db[key])
 319                 l = []
 320                 last_set_entry = None
 321                 for entry in journal:
 322                     (nodeid, date_stamp, self.journaltag, action,
 323                         params) = entry
 324                     if date_stamp > pack_before or action == 'create':
 325                         l.append(entry)
 326                     elif action == 'set':
 327                         # grab the last set entry to keep information on
 328                         # activity
 329                         last_set_entry = entry
 330                 if last_set_entry:
 331                     date_stamp = last_set_entry[1]
 332                     # if the last set entry was made after the pack date
 333                     # then it is already in the list
 334                     if date_stamp < pack_before:
 335                         l.append(last_set_entry)
 336                 db[key] = marshal.dumps(l)
 337             if db_type == 'gdbm':
 338                 db.reorganize()
 339             db.close()
 340
 341
 342     #
 343     # Basic transaction support
 344     #
 345     def commit(self):
 346         ''' Commit the current transactions.
 347         '''
 348         if hyperdb.DEBUG:
 349             print 'commit', (self,)
 350         # TODO: lock the DB
 351
 352         # keep a handle to all the database files opened
 353         self.databases = {}
 354
 355         # now, do all the transactions
 356         for method, args in self.transactions:
 357             method(*args)
 358
 359         # now close all the database files
 360         for db in self.databases.values():
 361             db.close()
 362         del self.databases
 363         # TODO: unlock the DB
 364
 365         # all transactions committed, back to normal
 366         self.cache = {}
 367         self.dirtynodes = {}
 368         self.newnodes = {}
 369         self.transactions = []
 370
 371     def _doSaveNode(self, classname, nodeid, node):
 372         if hyperdb.DEBUG:
 373             print '_doSaveNode', (self, classname, nodeid, node)
 374
 375         # get the database handle
 376         db_name = 'nodes.%s'%classname
 377         if self.databases.has_key(db_name):
 378             db = self.databases[db_name]
 379         else:
 380             db = self.databases[db_name] = self.getclassdb(classname, 'c')
 381
 382         # now save the marshalled data
 383         db[nodeid] = marshal.dumps(node)
 384
 385     def _doSaveJournal(self, classname, nodeid, action, params):
 386         entry = (nodeid, date.Date().get_tuple(), self.journaltag, action,
 387             params)
 388         if hyperdb.DEBUG:
 389             print '_doSaveJournal', entry
 390
 391         # get the database handle
 392         db_name = 'journals.%s'%classname
 393         if self.databases.has_key(db_name):
 394             db = self.databases[db_name]
 395         else:
 396             db = self.databases[db_name] = self._opendb(db_name, 'c')
 397
 398         # now insert the journal entry
 399         if db.has_key(nodeid):
 400             s = db[nodeid]
 401             l = marshal.loads(db[nodeid])
 402             l.append(entry)
 403         else:
 404             l = [entry]
 405         db[nodeid] = marshal.dumps(l)
 406
 407     def _doStoreFile(self, name, **databases):
 408         # the file is currently ".tmp" - move it to its real name to commit
 409         os.rename(name+".tmp", name)
 410
 411     def rollback(self):
 412         ''' Reverse all actions from the current transaction.
 413         '''
 414         if hyperdb.DEBUG:
 415             print 'rollback', (self, )
 416         for method, args in self.transactions:
 417             # delete temporary files
 418             if method == self._doStoreFile:
 419                 if os.path.exists(args[0]+".tmp"):
 420                     os.remove(args[0]+".tmp")
 421         self.cache = {}
 422         self.dirtynodes = {}
 423         self.newnodes = {}
 424         self.transactions = []
 425
 426 #
 427 #$Log: not supported by cvs2svn $
 428 #Revision 1.28  2002/02/16 09:14:17  richard
 429 # . #514854 ] History: "User" is always ticket creator
 430 #
 431 #Revision 1.27  2002/01/22 07:21:13  richard
 432 #. fixed back_bsddb so it passed the journal tests
 433 #
 434 #... it didn't seem happy using the back_anydbm _open method, which is odd.
 435 #Yet another occurrance of whichdb not being able to recognise older bsddb
 436 #databases. Yadda yadda. Made the HYPERDBDEBUG stuff more sane in the
 437 #process.
 438 #
 439 #Revision 1.26  2002/01/22 05:18:38  rochecompaan
 440 #last_set_entry was referenced before assignment
 441 #
 442 #Revision 1.25  2002/01/22 05:06:08  rochecompaan
 443 #We need to keep the last 'set' entry in the journal to preserve
 444 #information on 'activity' for nodes.
 445 #
 446 #Revision 1.24  2002/01/21 16:33:20  rochecompaan
 447 #You can now use the roundup-admin tool to pack the database
 448 #
 449 #Revision 1.23  2002/01/18 04:32:04  richard
 450 #Rollback was breaking because a message hadn't actually been written to the file. Needs
 451 #more investigation.
 452 #
 453 #Revision 1.22  2002/01/14 02:20:15  richard
 454 # . changed all config accesses so they access either the instance or the
 455 #   config attriubute on the db. This means that all config is obtained from
 456 #   instance_config instead of the mish-mash of classes. This will make
 457 #   switching to a ConfigParser setup easier too, I hope.
 458 #
 459 #At a minimum, this makes migration a _little_ easier (a lot easier in the
 460 #0.5.0 switch, I hope!)
 461 #
 462 #Revision 1.21  2002/01/02 02:31:38  richard
 463 #Sorry for the huge checkin message - I was only intending to implement #496356
 464 #but I found a number of places where things had been broken by transactions:
 465 # . modified ROUNDUPDBSENDMAILDEBUG to be SENDMAILDEBUG and hold a filename
 466 #   for _all_ roundup-generated smtp messages to be sent to.
 467 # . the transaction cache had broken the roundupdb.Class set() reactors
 468 # . newly-created author users in the mailgw weren't being committed to the db
 469 #
 470 #Stuff that made it into CHANGES.txt (ie. the stuff I was actually working
 471 #on when I found that stuff :):
 472 # . #496356 ] Use threading in messages
 473 # . detectors were being registered multiple times
 474 # . added tests for mailgw
 475 # . much better attaching of erroneous messages in the mail gateway
 476 #
 477 #Revision 1.20  2001/12/18 15:30:34  rochecompaan
 478 #Fixed bugs:
 479 # .  Fixed file creation and retrieval in same transaction in anydbm
 480 #    backend
 481 # .  Cgi interface now renders new issue after issue creation
 482 # .  Could not set issue status to resolved through cgi interface
 483 # .  Mail gateway was changing status back to 'chatting' if status was
 484 #    omitted as an argument
 485 #
 486 #Revision 1.19  2001/12/17 03:52:48  richard
 487 #Implemented file store rollback. As a bonus, the hyperdb is now capable of
 488 #storing more than one file per node - if a property name is supplied,
 489 #the file is called designator.property.
 490 #I decided not to migrate the existing files stored over to the new naming
 491 #scheme - the FileClass just doesn't specify the property name.
 492 #
 493 #Revision 1.18  2001/12/16 10:53:38  richard
 494 #take a copy of the node dict so that the subsequent set
 495 #operation doesn't modify the oldvalues structure
 496 #
 497 #Revision 1.17  2001/12/14 23:42:57  richard
 498 #yuck, a gdbm instance tests false :(
 499 #I've left the debugging code in - it should be removed one day if we're ever
 500 #_really_ anal about performace :)
 501 #
 502 #Revision 1.16  2001/12/12 03:23:14  richard
 503 #Cor blimey this anydbm/whichdb stuff is yecchy. Turns out that whichdb
 504 #incorrectly identifies a dbm file as a dbhash file on my system. This has
 505 #been submitted to the python bug tracker as issue #491888:
 506 #https://sourceforge.net/tracker/index.php?func=detail&aid=491888&group_id=5470&atid=105470
 507 #
 508 #Revision 1.15  2001/12/12 02:30:51  richard
 509 #I fixed the problems with people whose anydbm was using the dbm module at the
 510 #backend. It turns out the dbm module modifies the file name to append ".db"
 511 #and my check to determine if we're opening an existing or new db just
 512 #tested os.path.exists() on the filename. Well, no longer! We now perform a
 513 #much better check _and_ cope with the anydbm implementation module changing
 514 #too!
 515 #I also fixed the backends __init__ so only ImportError is squashed.
 516 #
 517 #Revision 1.14  2001/12/10 22:20:01  richard
 518 #Enabled transaction support in the bsddb backend. It uses the anydbm code
 519 #where possible, only replacing methods where the db is opened (it uses the
 520 #btree opener specifically.)
 521 #Also cleaned up some change note generation.
 522 #Made the backends package work with pydoc too.
 523 #
 524 #Revision 1.13  2001/12/02 05:06:16  richard
 525 #. We now use weakrefs in the Classes to keep the database reference, so
 526 #  the close() method on the database is no longer needed.
 527 #  I bumped the minimum python requirement up to 2.1 accordingly.
 528 #. #487480 ] roundup-server
 529 #. #487476 ] INSTALL.txt
 530 #
 531 #I also cleaned up the change message / post-edit stuff in the cgi client.
 532 #There's now a clearly marked "TODO: append the change note" where I believe
 533 #the change note should be added there. The "changes" list will obviously
 534 #have to be modified to be a dict of the changes, or somesuch.
 535 #
 536 #More testing needed.
 537 #
 538 #Revision 1.12  2001/12/01 07:17:50  richard
 539 #. We now have basic transaction support! Information is only written to
 540 #  the database when the commit() method is called. Only the anydbm
 541 #  backend is modified in this way - neither of the bsddb backends have been.
 542 #  The mail, admin and cgi interfaces all use commit (except the admin tool
 543 #  doesn't have a commit command, so interactive users can't commit...)
 544 #. Fixed login/registration forwarding the user to the right page (or not,
 545 #  on a failure)
 546 #
 547 #Revision 1.11  2001/11/21 02:34:18  richard
 548 #Added a target version field to the extended issue schema
 549 #
 550 #Revision 1.10  2001/10/09 23:58:10  richard
 551 #Moved the data stringification up into the hyperdb.Class class' get, set
 552 #and create methods. This means that the data is also stringified for the
 553 #journal call, and removes duplication of code from the backends. The
 554 #backend code now only sees strings.
 555 #
 556 #Revision 1.9  2001/10/09 07:25:59  richard
 557 #Added the Password property type. See "pydoc roundup.password" for
 558 #implementation details. Have updated some of the documentation too.
 559 #
 560 #Revision 1.8  2001/09/29 13:27:00  richard
 561 #CGI interfaces now spit up a top-level index of all the instances they can
 562 #serve.
 563 #
 564 #Revision 1.7  2001/08/12 06:32:36  richard
 565 #using isinstance(blah, Foo) now instead of isFooType
 566 #
 567 #Revision 1.6  2001/08/07 00:24:42  richard
 568 #stupid typo
 569 #
 570 #Revision 1.5  2001/08/07 00:15:51  richard
 571 #Added the copyright/license notice to (nearly) all files at request of
 572 #Bizar Software.
 573 #
 574 #Revision 1.4  2001/07/30 01:41:36  richard
 575 #Makes schema changes mucho easier.
 576 #
 577 #Revision 1.3  2001/07/25 01:23:07  richard
 578 #Added the Roundup spec to the new documentation directory.
 579 #
 580 #Revision 1.2  2001/07/23 08:20:44  richard
 581 #Moved over to using marshal in the bsddb and anydbm backends.
 582 #roundup-admin now has a "freshen" command that'll load/save all nodes (not
 583 # retired - mod hyperdb.Class.list() so it lists retired nodes)
 584 #
 585 #