1 # $Id: hyperdb.py,v 1.1 2001-07-22 11:58:35 richard Exp $
3 # standard python modules
4 import cPickle, re, string
6 # roundup modules
7 import date
10 #
11 # Types
12 #
13 class BaseType:
14 isStringType = 0
15 isDateType = 0
16 isIntervalType = 0
17 isLinkType = 0
18 isMultilinkType = 0
20 class String(BaseType):
21 def __init__(self):
22 """An object designating a String property."""
23 pass
24 def __repr__(self):
25 return '<%s>'%self.__class__
26 isStringType = 1
28 class Date(BaseType, String):
29 isDateType = 1
31 class Interval(BaseType, String):
32 isIntervalType = 1
34 class Link(BaseType):
35 def __init__(self, classname):
36 """An object designating a Link property that links to
37 nodes in a specified class."""
38 self.classname = classname
39 def __repr__(self):
40 return '<%s to "%s">'%(self.__class__, self.classname)
41 isLinkType = 1
43 class Multilink(BaseType, Link):
44 """An object designating a Multilink property that links
45 to nodes in a specified class.
46 """
47 isMultilinkType = 1
49 class DatabaseError(ValueError):
50 pass
53 #
54 # the base Database class
55 #
56 class Database:
57 # flag to set on retired entries
58 RETIRED_FLAG = '__hyperdb_retired'
61 #
62 # The base Class class
63 #
64 class Class:
65 """The handle to a particular class of nodes in a hyperdatabase."""
67 def __init__(self, db, classname, **properties):
68 """Create a new class with a given name and property specification.
70 'classname' must not collide with the name of an existing class,
71 or a ValueError is raised. The keyword arguments in 'properties'
72 must map names to property objects, or a TypeError is raised.
73 """
74 self.classname = classname
75 self.properties = properties
76 self.db = db
77 self.key = ''
79 # do the db-related init stuff
80 db.addclass(self)
82 # Editing nodes:
84 def create(self, **propvalues):
85 """Create a new node of this class and return its id.
87 The keyword arguments in 'propvalues' map property names to values.
89 The values of arguments must be acceptable for the types of their
90 corresponding properties or a TypeError is raised.
92 If this class has a key property, it must be present and its value
93 must not collide with other key strings or a ValueError is raised.
95 Any other properties on this class that are missing from the
96 'propvalues' dictionary are set to None.
98 If an id in a link or multilink property does not refer to a valid
99 node, an IndexError is raised.
100 """
101 if self.db.journaltag is None:
102 raise DatabaseError, 'Database open read-only'
103 newid = str(self.count() + 1)
105 # validate propvalues
106 num_re = re.compile('^\d+$')
107 for key, value in propvalues.items():
108 if key == self.key:
109 try:
110 self.lookup(value)
111 except KeyError:
112 pass
113 else:
114 raise ValueError, 'node with key "%s" exists'%value
116 prop = self.properties[key]
118 if prop.isLinkType:
119 value = str(value)
120 link_class = self.properties[key].classname
121 if not num_re.match(value):
122 try:
123 value = self.db.classes[link_class].lookup(value)
124 except:
125 raise ValueError, 'new property "%s": %s not a %s'%(
126 key, value, self.properties[key].classname)
127 propvalues[key] = value
128 if not self.db.hasnode(link_class, value):
129 raise ValueError, '%s has no node %s'%(link_class, value)
131 # register the link with the newly linked node
132 self.db.addjournal(link_class, value, 'link',
133 (self.classname, newid, key))
135 elif prop.isMultilinkType:
136 if type(value) != type([]):
137 raise TypeError, 'new property "%s" not a list of ids'%key
138 link_class = self.properties[key].classname
139 l = []
140 for entry in map(str, value):
141 if not num_re.match(entry):
142 try:
143 entry = self.db.classes[link_class].lookup(entry)
144 except:
145 raise ValueError, 'new property "%s": %s not a %s'%(
146 key, entry, self.properties[key].classname)
147 l.append(entry)
148 value = l
149 propvalues[key] = value
151 # handle additions
152 for id in value:
153 if not self.db.hasnode(link_class, id):
154 raise ValueError, '%s has no node %s'%(link_class, id)
155 # register the link with the newly linked node
156 self.db.addjournal(link_class, id, 'link',
157 (self.classname, newid, key))
159 elif prop.isStringType:
160 if type(value) != type(''):
161 raise TypeError, 'new property "%s" not a string'%key
163 elif prop.isDateType:
164 if not hasattr(value, 'isDate'):
165 raise TypeError, 'new property "%s" not a Date'% key
167 elif prop.isIntervalType:
168 if not hasattr(value, 'isInterval'):
169 raise TypeError, 'new property "%s" not an Interval'% key
171 for key,prop in self.properties.items():
172 if propvalues.has_key(str(key)):
173 continue
174 if prop.isMultilinkType:
175 propvalues[key] = []
176 else:
177 propvalues[key] = None
179 # done
180 self.db.addnode(self.classname, newid, propvalues)
181 self.db.addjournal(self.classname, newid, 'create', propvalues)
182 return newid
184 def get(self, nodeid, propname):
185 """Get the value of a property on an existing node of this class.
187 'nodeid' must be the id of an existing node of this class or an
188 IndexError is raised. 'propname' must be the name of a property
189 of this class or a KeyError is raised.
190 """
191 d = self.db.getnode(self.classname, str(nodeid))
192 return d[propname]
194 # XXX not in spec
195 def getnode(self, nodeid):
196 ''' Return a convenience wrapper for the node
197 '''
198 return Node(self, nodeid)
200 def set(self, nodeid, **propvalues):
201 """Modify a property on an existing node of this class.
203 'nodeid' must be the id of an existing node of this class or an
204 IndexError is raised.
206 Each key in 'propvalues' must be the name of a property of this
207 class or a KeyError is raised.
209 All values in 'propvalues' must be acceptable types for their
210 corresponding properties or a TypeError is raised.
212 If the value of the key property is set, it must not collide with
213 other key strings or a ValueError is raised.
215 If the value of a Link or Multilink property contains an invalid
216 node id, a ValueError is raised.
217 """
218 if not propvalues:
219 return
220 if self.db.journaltag is None:
221 raise DatabaseError, 'Database open read-only'
222 nodeid = str(nodeid)
223 node = self.db.getnode(self.classname, nodeid)
224 if node.has_key(self.db.RETIRED_FLAG):
225 raise IndexError
226 num_re = re.compile('^\d+$')
227 for key, value in propvalues.items():
228 if not node.has_key(key):
229 raise KeyError, key
231 if key == self.key:
232 try:
233 self.lookup(value)
234 except KeyError:
235 pass
236 else:
237 raise ValueError, 'node with key "%s" exists'%value
239 prop = self.properties[key]
241 if prop.isLinkType:
242 value = str(value)
243 link_class = self.properties[key].classname
244 if not num_re.match(value):
245 try:
246 value = self.db.classes[link_class].lookup(value)
247 except:
248 raise ValueError, 'new property "%s": %s not a %s'%(
249 key, value, self.properties[key].classname)
251 if not self.db.hasnode(link_class, value):
252 raise ValueError, '%s has no node %s'%(link_class, value)
254 # register the unlink with the old linked node
255 if node[key] is not None:
256 self.db.addjournal(link_class, node[key], 'unlink',
257 (self.classname, nodeid, key))
259 # register the link with the newly linked node
260 if value is not None:
261 self.db.addjournal(link_class, value, 'link',
262 (self.classname, nodeid, key))
264 elif prop.isMultilinkType:
265 if type(value) != type([]):
266 raise TypeError, 'new property "%s" not a list of ids'%key
267 link_class = self.properties[key].classname
268 l = []
269 for entry in map(str, value):
270 if not num_re.match(entry):
271 try:
272 entry = self.db.classes[link_class].lookup(entry)
273 except:
274 raise ValueError, 'new property "%s": %s not a %s'%(
275 key, entry, self.properties[key].classname)
276 l.append(entry)
277 value = l
278 propvalues[key] = value
280 #handle removals
281 l = node[key]
282 for id in l[:]:
283 if id in value:
284 continue
285 # register the unlink with the old linked node
286 self.db.addjournal(link_class, id, 'unlink',
287 (self.classname, nodeid, key))
288 l.remove(id)
290 # handle additions
291 for id in value:
292 if not self.db.hasnode(link_class, id):
293 raise ValueError, '%s has no node %s'%(link_class, id)
294 if id in l:
295 continue
296 # register the link with the newly linked node
297 self.db.addjournal(link_class, id, 'link',
298 (self.classname, nodeid, key))
299 l.append(id)
301 elif prop.isStringType:
302 if value is not None and type(value) != type(''):
303 raise TypeError, 'new property "%s" not a string'%key
305 elif prop.isDateType:
306 if not hasattr(value, 'isDate'):
307 raise TypeError, 'new property "%s" not a Date'% key
309 elif prop.isIntervalType:
310 if not hasattr(value, 'isInterval'):
311 raise TypeError, 'new property "%s" not an Interval'% key
313 node[key] = value
315 self.db.setnode(self.classname, nodeid, node)
316 self.db.addjournal(self.classname, nodeid, 'set', propvalues)
318 def retire(self, nodeid):
319 """Retire a node.
321 The properties on the node remain available from the get() method,
322 and the node's id is never reused.
324 Retired nodes are not returned by the find(), list(), or lookup()
325 methods, and other nodes may reuse the values of their key properties.
326 """
327 nodeid = str(nodeid)
328 if self.db.journaltag is None:
329 raise DatabaseError, 'Database open read-only'
330 node = self.db.getnode(self.classname, nodeid)
331 node[self.db.RETIRED_FLAG] = 1
332 self.db.setnode(self.classname, nodeid, node)
333 self.db.addjournal(self.classname, nodeid, 'retired', None)
335 def history(self, nodeid):
336 """Retrieve the journal of edits on a particular node.
338 'nodeid' must be the id of an existing node of this class or an
339 IndexError is raised.
341 The returned list contains tuples of the form
343 (date, tag, action, params)
345 'date' is a Timestamp object specifying the time of the change and
346 'tag' is the journaltag specified when the database was opened.
347 """
348 return self.db.getjournal(self.classname, nodeid)
350 # Locating nodes:
352 def setkey(self, propname):
353 """Select a String property of this class to be the key property.
355 'propname' must be the name of a String property of this class or
356 None, or a TypeError is raised. The values of the key property on
357 all existing nodes must be unique or a ValueError is raised.
358 """
359 self.key = propname
361 def getkey(self):
362 """Return the name of the key property for this class or None."""
363 return self.key
365 # TODO: set up a separate index db file for this? profile?
366 def lookup(self, keyvalue):
367 """Locate a particular node by its key property and return its id.
369 If this class has no key property, a TypeError is raised. If the
370 'keyvalue' matches one of the values for the key property among
371 the nodes in this class, the matching node's id is returned;
372 otherwise a KeyError is raised.
373 """
374 cldb = self.db.getclassdb(self.classname)
375 for nodeid in self.db.getnodeids(self.classname, cldb):
376 node = self.db.getnode(self.classname, nodeid, cldb)
377 if node.has_key(self.db.RETIRED_FLAG):
378 continue
379 if node[self.key] == keyvalue:
380 return nodeid
381 cldb.close()
382 raise KeyError, keyvalue
384 # XXX: change from spec - allows multiple props to match
385 def find(self, **propspec):
386 """Get the ids of nodes in this class which link to a given node.
388 'propspec' consists of keyword args propname=nodeid
389 'propname' must be the name of a property in this class, or a
390 KeyError is raised. That property must be a Link or Multilink
391 property, or a TypeError is raised.
393 'nodeid' must be the id of an existing node in the class linked
394 to by the given property, or an IndexError is raised.
395 """
396 propspec = propspec.items()
397 for propname, nodeid in propspec:
398 nodeid = str(nodeid)
399 # check the prop is OK
400 prop = self.properties[propname]
401 if not prop.isLinkType and not prop.isMultilinkType:
402 raise TypeError, "'%s' not a Link/Multilink property"%propname
403 if not self.db.hasnode(prop.classname, nodeid):
404 raise ValueError, '%s has no node %s'%(link_class, nodeid)
406 # ok, now do the find
407 cldb = self.db.getclassdb(self.classname)
408 l = []
409 for id in self.db.getnodeids(self.classname, cldb):
410 node = self.db.getnode(self.classname, id, cldb)
411 if node.has_key(self.db.RETIRED_FLAG):
412 continue
413 for propname, nodeid in propspec:
414 nodeid = str(nodeid)
415 property = node[propname]
416 if prop.isLinkType and nodeid == property:
417 l.append(id)
418 elif prop.isMultilinkType and nodeid in property:
419 l.append(id)
420 cldb.close()
421 return l
423 def stringFind(self, **requirements):
424 """Locate a particular node by matching a set of its String properties.
426 If the property is not a String property, a TypeError is raised.
428 The return is a list of the id of all nodes that match.
429 """
430 for propname in requirements.keys():
431 prop = self.properties[propname]
432 if not prop.isStringType:
433 raise TypeError, "'%s' not a String property"%propname
434 l = []
435 cldb = self.db.getclassdb(self.classname)
436 for nodeid in self.db.getnodeids(self.classname, cldb):
437 node = self.db.getnode(self.classname, nodeid, cldb)
438 if node.has_key(self.db.RETIRED_FLAG):
439 continue
440 for key, value in requirements.items():
441 if node[key] != value:
442 break
443 else:
444 l.append(nodeid)
445 cldb.close()
446 return l
448 def list(self):
449 """Return a list of the ids of the active nodes in this class."""
450 l = []
451 cn = self.classname
452 cldb = self.db.getclassdb(cn)
453 for nodeid in self.db.getnodeids(cn, cldb):
454 node = self.db.getnode(cn, nodeid, cldb)
455 if node.has_key(self.db.RETIRED_FLAG):
456 continue
457 l.append(nodeid)
458 l.sort()
459 cldb.close()
460 return l
462 # XXX not in spec
463 def filter(self, filterspec, sort, group, num_re = re.compile('^\d+$')):
464 ''' Return a list of the ids of the active nodes in this class that
465 match the 'filter' spec, sorted by the group spec and then the
466 sort spec
467 '''
468 cn = self.classname
470 # optimise filterspec
471 l = []
472 props = self.getprops()
473 for k, v in filterspec.items():
474 propclass = props[k]
475 if propclass.isLinkType:
476 if type(v) is not type([]):
477 v = [v]
478 # replace key values with node ids
479 u = []
480 link_class = self.db.classes[propclass.classname]
481 for entry in v:
482 if not num_re.match(entry):
483 try:
484 entry = link_class.lookup(entry)
485 except:
486 raise ValueError, 'new property "%s": %s not a %s'%(
487 k, entry, self.properties[k].classname)
488 u.append(entry)
490 l.append((0, k, u))
491 elif propclass.isMultilinkType:
492 if type(v) is not type([]):
493 v = [v]
494 # replace key values with node ids
495 u = []
496 link_class = self.db.classes[propclass.classname]
497 for entry in v:
498 if not num_re.match(entry):
499 try:
500 entry = link_class.lookup(entry)
501 except:
502 raise ValueError, 'new property "%s": %s not a %s'%(
503 k, entry, self.properties[k].classname)
504 u.append(entry)
505 l.append((1, k, u))
506 elif propclass.isStringType:
507 v = v[0]
508 if '*' in v or '?' in v:
509 # simple glob searching
510 v = v.replace('?', '.')
511 v = v.replace('*', '.*?')
512 v = re.compile(v)
513 l.append((2, k, v))
514 elif v[0] == '^':
515 # start-anchored
516 if v[-1] == '$':
517 # _and_ end-anchored
518 l.append((6, k, v[1:-1]))
519 l.append((3, k, v[1:]))
520 elif v[-1] == '$':
521 # end-anchored
522 l.append((4, k, v[:-1]))
523 else:
524 # substring
525 l.append((5, k, v))
526 else:
527 l.append((6, k, v))
528 filterspec = l
530 # now, find all the nodes that are active and pass filtering
531 l = []
532 cldb = self.db.getclassdb(cn)
533 for nodeid in self.db.getnodeids(cn, cldb):
534 node = self.db.getnode(cn, nodeid, cldb)
535 if node.has_key(self.db.RETIRED_FLAG):
536 continue
537 # apply filter
538 for t, k, v in filterspec:
539 if t == 0 and node[k] not in v:
540 # link - if this node'd property doesn't appear in the
541 # filterspec's nodeid list, skip it
542 break
543 elif t == 1:
544 # multilink - if any of the nodeids required by the
545 # filterspec aren't in this node's property, then skip
546 # it
547 for value in v:
548 if value not in node[k]:
549 break
550 else:
551 continue
552 break
553 elif t == 2 and not v.search(node[k]):
554 # RE search
555 break
556 elif t == 3 and node[k][:len(v)] != v:
557 # start anchored
558 break
559 elif t == 4 and node[k][-len(v):] != v:
560 # end anchored
561 break
562 elif t == 5 and node[k].find(v) == -1:
563 # substring search
564 break
565 elif t == 6 and node[k] != v:
566 # straight value comparison for the other types
567 break
568 else:
569 l.append((nodeid, node))
570 l.sort()
571 cldb.close()
573 # optimise sort
574 m = []
575 for entry in sort:
576 if entry[0] != '-':
577 m.append(('+', entry))
578 else:
579 m.append((entry[0], entry[1:]))
580 sort = m
582 # optimise group
583 m = []
584 for entry in group:
585 if entry[0] != '-':
586 m.append(('+', entry))
587 else:
588 m.append((entry[0], entry[1:]))
589 group = m
591 # now, sort the result
592 def sortfun(a, b, sort=sort, group=group, properties=self.getprops(),
593 db = self.db, cl=self):
594 a_id, an = a
595 b_id, bn = b
596 for list in group, sort:
597 for dir, prop in list:
598 # handle the properties that might be "faked"
599 if not an.has_key(prop):
600 an[prop] = cl.get(a_id, prop)
601 av = an[prop]
602 if not bn.has_key(prop):
603 bn[prop] = cl.get(b_id, prop)
604 bv = bn[prop]
606 # sorting is class-specific
607 propclass = properties[prop]
609 # String and Date values are sorted in the natural way
610 if propclass.isStringType:
611 # clean up the strings
612 if av and av[0] in string.uppercase:
613 av = an[prop] = av.lower()
614 if bv and bv[0] in string.uppercase:
615 bv = bn[prop] = bv.lower()
616 if propclass.isStringType or propclass.isDateType:
617 if dir == '+':
618 r = cmp(av, bv)
619 if r != 0: return r
620 elif dir == '-':
621 r = cmp(bv, av)
622 if r != 0: return r
624 # Link properties are sorted according to the value of
625 # the "order" property on the linked nodes if it is
626 # present; or otherwise on the key string of the linked
627 # nodes; or finally on the node ids.
628 elif propclass.isLinkType:
629 link = db.classes[propclass.classname]
630 if link.getprops().has_key('order'):
631 if dir == '+':
632 r = cmp(link.get(av, 'order'),
633 link.get(bv, 'order'))
634 if r != 0: return r
635 elif dir == '-':
636 r = cmp(link.get(bv, 'order'),
637 link.get(av, 'order'))
638 if r != 0: return r
639 elif link.getkey():
640 key = link.getkey()
641 if dir == '+':
642 r = cmp(link.get(av, key), link.get(bv, key))
643 if r != 0: return r
644 elif dir == '-':
645 r = cmp(link.get(bv, key), link.get(av, key))
646 if r != 0: return r
647 else:
648 if dir == '+':
649 r = cmp(av, bv)
650 if r != 0: return r
651 elif dir == '-':
652 r = cmp(bv, av)
653 if r != 0: return r
655 # Multilink properties are sorted according to how many
656 # links are present.
657 elif propclass.isMultilinkType:
658 if dir == '+':
659 r = cmp(len(av), len(bv))
660 if r != 0: return r
661 elif dir == '-':
662 r = cmp(len(bv), len(av))
663 if r != 0: return r
664 return cmp(a[0], b[0])
665 l.sort(sortfun)
666 return [i[0] for i in l]
668 def count(self):
669 """Get the number of nodes in this class.
671 If the returned integer is 'numnodes', the ids of all the nodes
672 in this class run from 1 to numnodes, and numnodes+1 will be the
673 id of the next node to be created in this class.
674 """
675 return self.db.countnodes(self.classname)
677 # Manipulating properties:
679 def getprops(self):
680 """Return a dictionary mapping property names to property objects."""
681 return self.properties
683 def addprop(self, **properties):
684 """Add properties to this class.
686 The keyword arguments in 'properties' must map names to property
687 objects, or a TypeError is raised. None of the keys in 'properties'
688 may collide with the names of existing properties, or a ValueError
689 is raised before any properties have been added.
690 """
691 for key in properties.keys():
692 if self.properties.has_key(key):
693 raise ValueError, key
694 self.properties.update(properties)
697 # XXX not in spec
698 class Node:
699 ''' A convenience wrapper for the given node
700 '''
701 def __init__(self, cl, nodeid):
702 self.__dict__['cl'] = cl
703 self.__dict__['nodeid'] = nodeid
704 def keys(self):
705 return self.cl.getprops().keys()
706 def has_key(self, name):
707 return self.cl.getprops().has_key(name)
708 def __getattr__(self, name):
709 if self.__dict__.has_key(name):
710 return self.__dict__['name']
711 try:
712 return self.cl.get(self.nodeid, name)
713 except KeyError, value:
714 raise AttributeError, str(value)
715 def __getitem__(self, name):
716 return self.cl.get(self.nodeid, name)
717 def __setattr__(self, name, value):
718 try:
719 return self.cl.set(self.nodeid, **{name: value})
720 except KeyError, value:
721 raise AttributeError, str(value)
722 def __setitem__(self, name, value):
723 self.cl.set(self.nodeid, **{name: value})
724 def history(self):
725 return self.cl.history(self.nodeid)
726 def retire(self):
727 return self.cl.retire(self.nodeid)
730 def Choice(name, *options):
731 cl = Class(db, name, name=hyperdb.String(), order=hyperdb.String())
732 for i in range(len(options)):
733 cl.create(name=option[i], order=i)
734 return hyperdb.Link(name)
736 #
737 # $Log: not supported by cvs2svn $
738 # Revision 1.6 2001/07/20 08:20:24 richard
739 # Fixed a bug in the filter - wrong variable names in the error message.
740 # Recognised that the filter has an outstanding bug. Hrm. we need a bug tracker
741 # for this project :)
742 #
743 # Revision 1.5 2001/07/20 07:35:55 richard
744 # largish changes as a start of splitting off bits and pieces to allow more
745 # flexible installation / database back-ends
746 #