- optimisation for date: if the database provides us with a datetime

[roundup.git] / roundup / backends / rdbms_common.py
diff --git a/roundup/backends/rdbms_common.py b/roundup/backends/rdbms_common.py

index 6942a78b847af1b0a37b5d581b1fc2367bb04e9d..eb1b7c9a33e777053775b656a1c759a54ba25be2 100644 (file)
--- a/roundup/backends/rdbms_common.py
+++ b/roundup/backends/rdbms_common.py
@@ -52,7 +52,7 @@ the same name.
  __docformat__ = 'restructuredtext'
  
  # standard python modules
-import sys, os, time, re, errno, weakref, copy, logging
+import sys, os, time, re, errno, weakref, copy, logging, datetime
  
  # roundup modules
  from roundup import hyperdb, date, password, roundupdb, security, support
@@ -62,6 +62,7 @@ from roundup.backends import locking
  from roundup.support import reversed
  from roundup.i18n import _
  
+
  # support
  from roundup.backends.blobfiles import FileStorage
  try:
@@ -71,6 +72,9 @@ except ImportError:
  from roundup.backends.sessions_rdbms import Sessions, OneTimeKeys
  from roundup.date import Range
  
+from roundup.backends.back_anydbm import compile_expression
+
+
  # dummy value meaning "argument not passed"
  _marker = []
  
@@ -87,6 +91,13 @@ def _bool_cvt(value):
      # assume it's a number returned from the db API
      return int(value)
  
+def date_to_hyperdb_value(d):
+    """ convert date d to a roundup date """
+    if isinstance (d, datetime.datetime):
+        return date.Date(d)
+    return date.Date (str(d).replace(' ', '.'))
+
+
  def connection_dict(config, dbnamestr=None):
      """ Used by Postgresql and MySQL to detemine the keyword args for
      opening the database connection."""
@@ -100,6 +111,54 @@ def connection_dict(config, dbnamestr=None):
              d[name] = config[cvar]
      return d
  
+
+class IdListOptimizer:
+    """ To prevent flooding the SQL parser of the underlaying
+        db engine with "x IN (1, 2, 3, ..., <large number>)" collapses
+        these cases to "x BETWEEN 1 AND <large number>".
+    """
+
+    def __init__(self):
+        self.ranges  = []
+        self.singles = []
+
+    def append(self, nid):
+        """ Invariant: nid are ordered ascending """
+        if self.ranges:
+            last = self.ranges[-1]
+            if last[1] == nid-1:
+                last[1] = nid
+                return
+        if self.singles:
+            last = self.singles[-1]
+            if last == nid-1:
+                self.singles.pop()
+                self.ranges.append([last, nid])
+                return
+        self.singles.append(nid)
+
+    def where(self, field, placeholder):
+        ranges  = self.ranges
+        singles = self.singles
+
+        if not singles and not ranges: return "(1=0)", []
+
+        if ranges:
+            between = '%s BETWEEN %s AND %s' % (
+                field, placeholder, placeholder)
+            stmnt = [between] * len(ranges)
+        else:
+            stmnt = []
+        if singles:
+            stmnt.append('%s in (%s)' % (
+                field, ','.join([placeholder]*len(singles))))
+
+        return '(%s)' % ' OR '.join(stmnt), sum(ranges, []) + singles
+
+    def __str__(self):
+        return "ranges: %r / singles: %r" % (self.ranges, self.singles)
+
+
  class Database(FileStorage, hyperdb.Database, roundupdb.Database):
      """ Wrapper around an SQL database that presents a hyperdb interface.
  
@@ -123,8 +182,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          # keep a cache of the N most recently retrieved rows of any kind
          # (classname, nodeid) = row
          self.cache_size = config.RDBMS_CACHE_SIZE
-        self.cache = {}
-        self.cache_lru = []
+        self.clearCache()
          self.stats = {'cache_hits': 0, 'cache_misses': 0, 'get_items': 0,
              'filtering': 0}
  
@@ -151,14 +209,16 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          """
          raise NotImplemented
  
-    def sql(self, sql, args=None):
+    def sql(self, sql, args=None, cursor=None):
          """ Execute the sql with the optional args.
          """
          self.log_debug('SQL %r %r'%(sql, args))
+        if not cursor:
+            cursor = self.cursor
          if args:
-            self.cursor.execute(sql, args)
+            cursor.execute(sql, args)
          else:
-            self.cursor.execute(sql)
+            cursor.execute(sql)
  
      def sql_fetchone(self):
          """ Fetch a single row. If there's nothing to fetch, return None.
@@ -170,6 +230,14 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          """
          return self.cursor.fetchall()
  
+    def sql_fetchiter(self):
+        """ Fetch all row as a generator
+        """
+        while True:
+            row = self.cursor.fetchone()
+            if not row: break
+            yield row
+
      def sql_stringquote(self, value):
          """ Quote the string so it's safe to put in the 'sql quotes'
          """
@@ -445,7 +513,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
              # no changes
              return 0
  
-        logger = logging.getLogger('hyperdb')
+        logger = logging.getLogger('roundup.hyperdb')
          logger.info('update_class %s'%spec.classname)
  
          logger.debug('old_spec %r'%(old_spec,))
@@ -755,7 +823,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          Note: I don't commit here, which is different behaviour to the
                "nuke from orbit" behaviour in the dbs.
          """
-        logging.getLogger('hyperdb').info('clear')
+        logging.getLogger('roundup.hyperdb').info('clear')
          for cn in self.classes:
              sql = 'delete from _%s'%cn
              self.sql(sql)
@@ -788,6 +856,21 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
  
          raise ValueError('%r is not a hyperdb property class' % propklass)
  
+    def _cache_del(self, key):
+        del self.cache[key]
+        self.cache_lru.remove(key)
+
+    def _cache_refresh(self, key):
+        self.cache_lru.remove(key)
+        self.cache_lru.insert(0, key)
+
+    def _cache_save(self, key, node):
+        self.cache[key] = node
+        # update the LRU
+        self.cache_lru.insert(0, key)
+        if len(self.cache_lru) > self.cache_size:
+            del self.cache[self.cache_lru.pop()]
+
      def addnode(self, classname, nodeid, node):
          """ Add the specified node to its class's db.
          """
@@ -821,8 +904,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          # clear this node out of the cache if it's in there
          key = (classname, nodeid)
          if key in self.cache:
-            del self.cache[key]
-            self.cache_lru.remove(key)
+            self._cache_del(key)
  
          # figure the values to insert
          vals = []
@@ -871,8 +953,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          # clear this node out of the cache if it's in there
          key = (classname, nodeid)
          if key in self.cache:
-            del self.cache[key]
-            self.cache_lru.remove(key)
+            self._cache_del(key)
  
          cl = self.classes[classname]
          props = cl.getprops()
@@ -966,7 +1047,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
  
      sql_to_hyperdb_value = {
          hyperdb.String : str,
-        hyperdb.Date   : lambda x:date.Date(str(x).replace(' ', '.')),
+        hyperdb.Date   : date_to_hyperdb_value,
  #        hyperdb.Link   : int,      # XXX numeric ids
          hyperdb.Link   : str,
          hyperdb.Interval  : date.Interval,
@@ -995,8 +1076,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          key = (classname, nodeid)
          if key in self.cache:
              # push us back to the top of the LRU
-            self.cache_lru.remove(key)
-            self.cache_lru.insert(0, key)
+            self._cache_refresh(key)
              if __debug__:
                  self.stats['cache_hits'] += 1
              # return the cached information
@@ -1033,26 +1113,9 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
                  value = self.to_hyperdb_value(props[name].__class__)(value)
              node[name] = value
  
-
-        # now the multilinks
-        for col in mls:
-            # get the link ids
-            sql = 'select linkid from %s_%s where nodeid=%s'%(classname, col,
-                self.arg)
-            self.sql(sql, (nodeid,))
-            # extract the first column from the result
-            # XXX numeric ids
-            items = [int(x[0]) for x in self.cursor.fetchall()]
-            items.sort ()
-            node[col] = [str(x) for x in items]
-
          # save off in the cache
          key = (classname, nodeid)
-        self.cache[key] = node
-        # update the LRU
-        self.cache_lru.insert(0, key)
-        if len(self.cache_lru) > self.cache_size:
-            del self.cache[self.cache_lru.pop()]
+        self._cache_save(key, node)
  
          if __debug__:
              self.stats['get_items'] += (time.time() - start_t)
@@ -1063,7 +1126,8 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          """Remove a node from the database. Called exclusively by the
             destroy() method on Class.
          """
-        logging.getLogger('hyperdb').info('destroynode %s%s'%(classname, nodeid))
+        logging.getLogger('roundup.hyperdb').info('destroynode %s%s'%(
+            classname, nodeid))
  
          # make sure the node exists
          if not self.hasnode(classname, nodeid):
@@ -1268,7 +1332,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
      def sql_commit(self, fail_ok=False):
          """ Actually commit to the database.
          """
-        logging.getLogger('hyperdb').info('commit')
+        logging.getLogger('roundup.hyperdb').info('commit')
  
          self.conn.commit()
  
@@ -1300,6 +1364,11 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          # clear out the transactions
          self.transactions = []
  
+        # clear the cache: Don't carry over cached values from one
+        # transaction to the next (there may be other changes from other
+        # transactions)
+        self.clearCache()
+
      def sql_rollback(self):
          self.conn.rollback()
  
@@ -1309,7 +1378,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          Undo all the changes made since the database was opened or the last
          commit() or rollback() was performed.
          """
-        logging.getLogger('hyperdb').info('rollback')
+        logging.getLogger('roundup.hyperdb').info('rollback')
  
          self.sql_rollback()
  
@@ -1324,7 +1393,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          self.clearCache()
  
      def sql_close(self):
-        logging.getLogger('hyperdb').info('close')
+        logging.getLogger('roundup.hyperdb').info('close')
          self.conn.close()
  
      def close(self):
@@ -1559,9 +1628,20 @@ class Class(hyperdb.Class):
              else:
                  return self.db.getuid()
  
-        # get the property (raises KeyErorr if invalid)
+        # get the property (raises KeyError if invalid)
          prop = self.properties[propname]
  
+        # lazy evaluation of Multilink
+        if propname not in d and isinstance(prop, Multilink):
+            sql = 'select linkid from %s_%s where nodeid=%s'%(self.classname,
+                propname, self.db.arg)
+            self.db.sql(sql, (nodeid,))
+            # extract the first column from the result
+            # XXX numeric ids
+            items = [int(x[0]) for x in self.db.cursor.fetchall()]
+            items.sort ()
+            d[propname] = [str(x) for x in items]
+
          # handle there being no value in the table for the property
          if propname not in d or d[propname] is None:
              if default is _marker:
@@ -2128,32 +2208,106 @@ class Class(hyperdb.Class):
      # The format parameter is replaced with the attribute.
      order_by_null_values = None
  
-    def filter(self, search_matches, filterspec, sort=[], group=[]):
-        """Return a list of the ids of the active nodes in this class that
-        match the 'filter' spec, sorted by the group spec and then the
-        sort spec
-
-        "filterspec" is {propname: value(s)}
-
-        "sort" and "group" are [(dir, prop), ...] where dir is '+', '-'
-        or None and prop is a prop name or None. Note that for
-        backward-compatibility reasons a single (dir, prop) tuple is
-        also allowed.
+    def supports_subselects(self): 
+        '''Assuming DBs can do subselects, overwrite if they cannot.
+       '''
+        return True
+
+    def _filter_multilink_expression_fallback(
+        self, classname, multilink_table, expr):
+        '''This is a fallback for database that do not support
+           subselects.'''
+
+        is_valid = expr.evaluate
+
+        last_id, kws = None, []
+
+        ids = IdListOptimizer()
+        append = ids.append
+
+        # This join and the evaluation in program space
+        # can be expensive for larger databases!
+        # TODO: Find a faster way to collect the data needed
+        # to evalute the expression.
+        # Moving the expression evaluation into the database
+        # would be nice but this tricky: Think about the cases
+        # where the multilink table does not have join values
+        # needed in evaluation.
+
+        stmnt = "SELECT c.id, m.linkid FROM _%s c " \
+                "LEFT OUTER JOIN %s m " \
+                "ON c.id = m.nodeid ORDER BY c.id" % (
+                    classname, multilink_table)
+        self.db.sql(stmnt)
+
+        # collect all multilink items for a class item
+        for nid, kw in self.db.sql_fetchiter():
+            if nid != last_id:
+                if last_id is None:
+                    last_id = nid
+                else:
+                    # we have all multilink items -> evaluate!
+                    if is_valid(kws): append(last_id)
+                    last_id, kws = nid, []
+            if kw is not None:
+                kws.append(kw)
  
-        "search_matches" is a container type or None
+        if last_id is not None and is_valid(kws): 
+            append(last_id)
  
-        The filter must match all properties specificed. If the property
-        value to match is a list:
+        # we have ids of the classname table
+        return ids.where("_%s.id" % classname, self.db.arg)
  
-        1. String properties must match all elements in the list, and
-        2. Other properties must match any of the elements in the list.
+    def _filter_multilink_expression(self, classname, multilink_table, v):
+        """ Filters out elements of the classname table that do not
+            match the given expression.
+            Returns tuple of 'WHERE' introns for the overall filter.
+        """
+        try:
+            opcodes = [int(x) for x in v]
+            if min(opcodes) >= -1: raise ValueError()
+
+            expr = compile_expression(opcodes)
+
+            if not self.supports_subselects():
+                # We heavily rely on subselects. If there is
+                # no decent support fall back to slower variant.
+                return self._filter_multilink_expression_fallback(
+                    classname, multilink_table, expr)
+
+            atom = \
+                "%s IN(SELECT linkid FROM %s WHERE nodeid=a.id)" % (
+                self.db.arg,
+                multilink_table)
+
+            intron = \
+                "_%(classname)s.id in (SELECT id " \
+                "FROM _%(classname)s AS a WHERE %(condition)s) " % {
+                    'classname' : classname,
+                    'condition' : expr.generate(lambda n: atom) }
+
+            values = []
+            def collect_values(n): values.append(n.x)
+            expr.visit(collect_values)
+
+            return intron, values
+        except:
+            # original behavior
+            where = "%s.linkid in (%s)" % (
+                multilink_table, ','.join([self.db.arg] * len(v)))
+            return where, v, True # True to indicate original
+
+    def _filter_sql (self, search_matches, filterspec, srt=[], grp=[], retr=0):
+        """ Compute the proptree and the SQL/ARGS for a filter.
+        For argument description see filter below.
+        We return a 3-tuple, the proptree, the sql and the sql-args
+        or None if no SQL is necessary.
+        The flag retr serves to retrieve *all* non-Multilink properties
+        (for filling the cache during a filter_iter)
          """
          # we can't match anything if search_matches is empty
          if not search_matches and search_matches is not None:
-            return []
-
-        if __debug__:
-            start_t = time.time()
+            return None
  
          icn = self.classname
  
@@ -2166,8 +2320,8 @@ class Class(hyperdb.Class):
  
          # figure the WHERE clause from the filterspec
          mlfilt = 0      # are we joining with Multilink tables?
-        sortattr = self._sortattr (group = group, sort = sort)
-        proptree = self._proptree(filterspec, sortattr)
+        sortattr = self._sortattr (group = grp, sort = srt)
+        proptree = self._proptree(filterspec, sortattr, retr)
          mlseen = 0
          for pt in reversed(proptree.sortattr):
              p = pt
@@ -2182,12 +2336,11 @@ class Class(hyperdb.Class):
                  pt.attr_sort_done = pt.tree_sort_done = True
          proptree.compute_sort_done()
  
-        ordercols = []
-        auxcols = {}
+        cols = ['_%s.id'%icn]
          mlsort = []
          rhsnum = 0
          for p in proptree:
-            oc = None
+            rc = ac = oc = None
              cn = p.classname
              ln = p.uniqname
              pln = p.parent.uniqname
@@ -2195,10 +2348,13 @@ class Class(hyperdb.Class):
              k = p.name
              v = p.val
              propclass = p.propclass
-            if p.sort_type > 0:
-                oc = ac = '_%s._%s'%(pln, k)
+            if p.parent == proptree and p.name == 'id' \
+                and 'retrieve' in p.need_for:
+                p.sql_idx = 0
+            if 'sort' in p.need_for or 'retrieve' in p.need_for:
+                rc = oc = ac = '_%s._%s'%(pln, k)
              if isinstance(propclass, Multilink):
-                if p.sort_type < 2:
+                if 'search' in p.need_for:
                      mlfilt = 1
                      tn = '%s_%s'%(pcn, k)
                      if v in ('-1', ['-1'], []):
@@ -2207,33 +2363,47 @@ class Class(hyperdb.Class):
                          where.append(self._subselect(pcn, tn))
                      else:
                          frum.append(tn)
-                        where.append('_%s.id=%s.nodeid'%(pln,tn))
+                        gen_join = True
+
+                        if p.has_values and isinstance(v, type([])):
+                            result = self._filter_multilink_expression(pln, tn, v)
+                            # XXX: We dont need an id join if we used the filter
+                            gen_join = len(result) == 3
+
+                        if gen_join:
+                            where.append('_%s.id=%s.nodeid'%(pln,tn))
+
                          if p.children:
                              frum.append('_%s as _%s' % (cn, ln))
                              where.append('%s.linkid=_%s.id'%(tn, ln))
+
                          if p.has_values:
                              if isinstance(v, type([])):
-                                s = ','.join([a for x in v])
-                                where.append('%s.linkid in (%s)'%(tn, s))
-                                args = args + v
+                                where.append(result[0])
+                                args += result[1]
                              else:
                                  where.append('%s.linkid=%s'%(tn, a))
                                  args.append(v)
-                if p.sort_type > 0:
+                if 'sort' in p.need_for:
                      assert not p.attr_sort_done and not p.sort_ids_needed
              elif k == 'id':
-                if p.sort_type < 2:
+                if 'search' in p.need_for:
                      if isinstance(v, type([])):
+                        # If there are no permitted values, then the
+                        # where clause will always be false, and we
+                        # can optimize the query away.
+                        if not v:
+                            return []
                          s = ','.join([a for x in v])
                          where.append('_%s.%s in (%s)'%(pln, k, s))
                          args = args + v
                      else:
                          where.append('_%s.%s=%s'%(pln, k, a))
                          args.append(v)
-                if p.sort_type > 0:
-                    oc = ac = '_%s.id'%pln
+                if 'sort' in p.need_for or 'retrieve' in p.need_for:
+                    rc = oc = ac = '_%s.id'%pln
              elif isinstance(propclass, String):
-                if p.sort_type < 2:
+                if 'search' in p.need_for:
                      if not isinstance(v, type([])):
                          v = [v]
  
@@ -2247,12 +2417,12 @@ class Class(hyperdb.Class):
                          +' and '.join(["_%s._%s LIKE '%s'"%(pln, k, s) for s in v])
                          +')')
                      # note: args are embedded in the query string now
-                if p.sort_type > 0:
+                if 'sort' in p.need_for:
                      oc = ac = 'lower(_%s._%s)'%(pln, k)
              elif isinstance(propclass, Link):
-                if p.sort_type < 2:
+                if 'search' in p.need_for:
                      if p.children:
-                        if p.sort_type == 0:
+                        if 'sort' not in p.need_for:
                              frum.append('_%s as _%s' % (cn, ln))
                          where.append('_%s._%s=_%s.id'%(pln, k, ln))
                      if p.has_values:
@@ -2280,16 +2450,18 @@ class Class(hyperdb.Class):
                              else:
                                  where.append('_%s._%s=%s'%(pln, k, a))
                                  args.append(v)
-                if p.sort_type > 0:
+                if 'sort' in p.need_for:
                      lp = p.cls.labelprop()
                      oc = ac = '_%s._%s'%(pln, k)
                      if lp != 'id':
-                        if p.tree_sort_done and p.sort_type > 0:
+                        if p.tree_sort_done:
                              loj.append(
                                  'LEFT OUTER JOIN _%s as _%s on _%s._%s=_%s.id'%(
                                  cn, ln, pln, k, ln))
                          oc = '_%s._%s'%(ln, lp)
-            elif isinstance(propclass, Date) and p.sort_type < 2:
+                if 'retrieve' in p.need_for:
+                    rc = '_%s._%s'%(pln, k)
+            elif isinstance(propclass, Date) and 'search' in p.need_for:
                  dc = self.db.to_sql_value(hyperdb.Date)
                  if isinstance(v, type([])):
                      s = ','.join([a for x in v])
@@ -2310,7 +2482,7 @@ class Class(hyperdb.Class):
                          pass
              elif isinstance(propclass, Interval):
                  # filter/sort using the __<prop>_int__ column
-                if p.sort_type < 2:
+                if 'search' in p.need_for:
                      if isinstance(v, type([])):
                          s = ','.join([a for x in v])
                          where.append('_%s.__%s_int__ in (%s)'%(pln, k, s))
@@ -2328,9 +2500,11 @@ class Class(hyperdb.Class):
                          except ValueError:
                              # If range creation fails - ignore search parameter
                              pass
-                if p.sort_type > 0:
+                if 'sort' in p.need_for:
                      oc = ac = '_%s.__%s_int__'%(pln,k)
-            elif isinstance(propclass, Boolean) and p.sort_type < 2:
+                if 'retrieve' in p.need_for:
+                    rc = '_%s._%s'%(pln,k)
+            elif isinstance(propclass, Boolean) and 'search' in p.need_for:
                  if type(v) == type(""):
                      v = v.split(',')
                  if type(v) != type([]):
@@ -2348,7 +2522,7 @@ class Class(hyperdb.Class):
                      s = ','.join([a for x in v])
                      where.append('_%s._%s in (%s)'%(pln, k, s))
                      args = args + bv
-            elif p.sort_type < 2:
+            elif 'search' in p.need_for:
                  if isinstance(v, type([])):
                      s = ','.join([a for x in v])
                      where.append('_%s._%s in (%s)'%(pln, k, s))
@@ -2358,18 +2532,28 @@ class Class(hyperdb.Class):
                      args.append(v)
              if oc:
                  if p.sort_ids_needed:
-                    auxcols[ac] = p
+                    if rc == ac:
+                        p.sql_idx = len(cols)
+                    p.auxcol = len(cols)
+                    cols.append(ac)
                  if p.tree_sort_done and p.sort_direction:
-                    # Don't select top-level id twice
-                    if p.name != 'id' or p.parent != proptree:
-                        ordercols.append(oc)
+                    # Don't select top-level id or multilink twice
+                    if (not p.sort_ids_needed or ac != oc) and (p.name != 'id'
+                        or p.parent != proptree):
+                        if rc == oc:
+                            p.sql_idx = len(cols)
+                        cols.append(oc)
                      desc = ['', ' desc'][p.sort_direction == '-']
                      # Some SQL dbs sort NULL values last -- we want them first.
                      if (self.order_by_null_values and p.name != 'id'):
                          nv = self.order_by_null_values % oc
-                        ordercols.append(nv)
+                        cols.append(nv)
                          p.orderby.append(nv + desc)
                      p.orderby.append(oc + desc)
+            if 'retrieve' in p.need_for and p.sql_idx is None:
+                assert(rc)
+                p.sql_idx = len(cols)
+                cols.append (rc)
  
          props = self.getprops()
  
@@ -2392,11 +2576,8 @@ class Class(hyperdb.Class):
          if mlfilt:
              # we're joining tables on the id, so we will get dupes if we
              # don't distinct()
-            cols = ['distinct(_%s.id)'%icn]
-        else:
-            cols = ['_%s.id'%icn]
-        if ordercols:
-            cols = cols + ordercols
+            cols[0] = 'distinct(_%s.id)'%icn
+
          order = []
          # keep correct sequence of order attributes.
          for sa in proptree.sortattr:
@@ -2407,21 +2588,50 @@ class Class(hyperdb.Class):
              order = ' order by %s'%(','.join(order))
          else:
              order = ''
-        for o, p in auxcols.iteritems ():
-            cols.append (o)
-            p.auxcol = len (cols) - 1
  
          cols = ','.join(cols)
          loj = ' '.join(loj)
          sql = 'select %s from %s %s %s%s'%(cols, frum, loj, where, order)
          args = tuple(args)
          __traceback_info__ = (sql, args)
+        return proptree, sql, args
+
+    def filter(self, search_matches, filterspec, sort=[], group=[]):
+        """Return a list of the ids of the active nodes in this class that
+        match the 'filter' spec, sorted by the group spec and then the
+        sort spec
+
+        "filterspec" is {propname: value(s)}
+
+        "sort" and "group" are [(dir, prop), ...] where dir is '+', '-'
+        or None and prop is a prop name or None. Note that for
+        backward-compatibility reasons a single (dir, prop) tuple is
+        also allowed.
+
+        "search_matches" is a container type or None
+
+        The filter must match all properties specificed. If the property
+        value to match is a list:
+
+        1. String properties must match all elements in the list, and
+        2. Other properties must match any of the elements in the list.
+        """
+        if __debug__:
+            start_t = time.time()
+
+        sq = self._filter_sql (search_matches, filterspec, sort, group)
+        # nothing to match?
+        if sq is None:
+            return []
+        proptree, sql, args = sq
+
          self.db.sql(sql, args)
          l = self.db.sql_fetchall()
  
          # Compute values needed for sorting in proptree.sort
-        for p in auxcols.itervalues():
-            p.sort_ids = p.sort_result = [row[p.auxcol] for row in l]
+        for p in proptree:
+            if hasattr(p, 'auxcol'):
+                p.sort_ids = p.sort_result = [row[p.auxcol] for row in l]
          # return the IDs (the first column)
          # XXX numeric ids
          l = [str(row[0]) for row in l]
@@ -2431,6 +2641,53 @@ class Class(hyperdb.Class):
              self.db.stats['filtering'] += (time.time() - start_t)
          return l
  
+    def filter_iter(self, search_matches, filterspec, sort=[], group=[]):
+        """Iterator similar to filter above with same args.
+        Limitation: We don't sort on multilinks.
+        This uses an optimisation: We put all nodes that are in the
+        current row into the node cache. Then we return the node id.
+        That way a fetch of a node won't create another sql-fetch (with
+        a join) from the database because the nodes are already in the
+        cache. We're using our own temporary cursor.
+        """
+        sq = self._filter_sql(search_matches, filterspec, sort, group, retr=1)
+        # nothing to match?
+        if sq is None:
+            return
+        proptree, sql, args = sq
+        cursor = self.db.conn.cursor()
+        self.db.sql(sql, args, cursor)
+        classes = {}
+        for p in proptree:
+            if 'retrieve' in p.need_for:
+                cn = p.parent.classname
+                ptid = p.parent.id # not the nodeid!
+                key = (cn, ptid)
+                if key not in classes:
+                    classes[key] = {}
+                name = p.name
+                assert (name)
+                classes[key][name] = p
+                p.to_hyperdb = self.db.to_hyperdb_value(p.propclass.__class__)
+        while True:
+            row = cursor.fetchone()
+            if not row: break
+            # populate cache with current items
+            for (classname, ptid), pt in classes.iteritems():
+                nodeid = str(row[pt['id'].sql_idx])
+                key = (classname, nodeid)
+                if key in self.db.cache:
+                    self.db._cache_refresh(key)
+                    continue
+                node = {}
+                for propname, p in pt.iteritems():
+                    value = row[p.sql_idx]
+                    if value is not None:
+                        value = p.to_hyperdb(value)
+                    node[propname] = value
+                self.db._cache_save(key, node)
+            yield str(row[0])
+
      def filter_sql(self, sql):
          """Return a list of the ids of the items in this class that match
          the SQL provided. The SQL is a complete "select" statement.
@@ -2640,37 +2897,6 @@ class Class(hyperdb.Class):
                  r.append(list(map(repr, l)))
          return r
  
-    def import_journals(self, entries):
-        """Import a class's journal.
-
-        Uses setjournal() to set the journal for each item."""
-        properties = self.getprops()
-        d = {}
-        for l in entries:
-            nodeid, jdate, user, action, params = map(eval, l)
-            r = d.setdefault(nodeid, [])
-            if action == 'set':
-                for propname, value in params.iteritems():
-                    prop = properties[propname]
-                    if value is None:
-                        pass
-                    elif isinstance(prop, Date):
-                        value = date.Date(value)
-                    elif isinstance(prop, Interval):
-                        value = date.Interval(value)
-                    elif isinstance(prop, Password):
-                        pwd = password.Password()
-                        pwd.unpack(value)
-                        value = pwd
-                    params[propname] = value
-            elif action == 'create' and params:
-                # old tracker with data stored in the create!
-                params = {}
-            r.append((nodeid, date.Date(jdate), user, action, params))
-
-        for nodeid, l in d.iteritems():
-            self.db.setjournal(self.classname, nodeid, l)
-
  class FileClass(hyperdb.FileClass, Class):
      """This class defines a large chunk of data. To support this, it has a
         mandatory String property "content" which is typically saved off
@@ -2729,7 +2955,7 @@ class FileClass(hyperdb.FileClass, Class):
          if propname == 'content':
              try:
                  return self.db.getfile(self.classname, nodeid, None)
-            except IOError, (strerror):
+            except IOError, strerror:
                  # BUG: by catching this we donot see an error in the log.
                  return 'ERROR reading file: %s%s\n%s\n%s'%(
                          self.classname, nodeid, poss_msg, strerror)