- optimisation for date: if the database provides us with a datetime

[roundup.git] / roundup / backends / rdbms_common.py
diff --git a/roundup/backends/rdbms_common.py b/roundup/backends/rdbms_common.py

index 6942a78b847af1b0a37b5d581b1fc2367bb04e9d..eb1b7c9a33e777053775b656a1c759a54ba25be2 100644 (file)
--- a/roundup/backends/rdbms_common.py
+++ b/roundup/backends/rdbms_common.py
@@ -52,7 +52,7 @@ the same name.
  __docformat__ = 'restructuredtext'
  
  # standard python modules
  __docformat__ = 'restructuredtext'
  
  # standard python modules
-import sys, os, time, re, errno, weakref, copy, logging
+import sys, os, time, re, errno, weakref, copy, logging, datetime
  
  # roundup modules
  from roundup import hyperdb, date, password, roundupdb, security, support
  
  # roundup modules
  from roundup import hyperdb, date, password, roundupdb, security, support
@@ -62,6 +62,7 @@ from roundup.backends import locking
  from roundup.support import reversed
  from roundup.i18n import _
  
  from roundup.support import reversed
  from roundup.i18n import _
  
+
  # support
  from roundup.backends.blobfiles import FileStorage
  try:
  # support
  from roundup.backends.blobfiles import FileStorage
  try:
@@ -71,6 +72,9 @@ except ImportError:
  from roundup.backends.sessions_rdbms import Sessions, OneTimeKeys
  from roundup.date import Range
  
  from roundup.backends.sessions_rdbms import Sessions, OneTimeKeys
  from roundup.date import Range
  
+from roundup.backends.back_anydbm import compile_expression
+
+
  # dummy value meaning "argument not passed"
  _marker = []
  
  # dummy value meaning "argument not passed"
  _marker = []
  
@@ -87,6 +91,13 @@ def _bool_cvt(value):
      # assume it's a number returned from the db API
      return int(value)
  
      # assume it's a number returned from the db API
      return int(value)
  
+def date_to_hyperdb_value(d):
+    """ convert date d to a roundup date """
+    if isinstance (d, datetime.datetime):
+        return date.Date(d)
+    return date.Date (str(d).replace(' ', '.'))
+
+
  def connection_dict(config, dbnamestr=None):
      """ Used by Postgresql and MySQL to detemine the keyword args for
      opening the database connection."""
  def connection_dict(config, dbnamestr=None):
      """ Used by Postgresql and MySQL to detemine the keyword args for
      opening the database connection."""
@@ -100,6 +111,54 @@ def connection_dict(config, dbnamestr=None):
              d[name] = config[cvar]
      return d
  
              d[name] = config[cvar]
      return d
  
+
+class IdListOptimizer:
+    """ To prevent flooding the SQL parser of the underlaying
+        db engine with "x IN (1, 2, 3, ..., <large number>)" collapses
+        these cases to "x BETWEEN 1 AND <large number>".
+    """
+
+    def __init__(self):
+        self.ranges  = []
+        self.singles = []
+
+    def append(self, nid):
+        """ Invariant: nid are ordered ascending """
+        if self.ranges:
+            last = self.ranges[-1]
+            if last[1] == nid-1:
+                last[1] = nid
+                return
+        if self.singles:
+            last = self.singles[-1]
+            if last == nid-1:
+                self.singles.pop()
+                self.ranges.append([last, nid])
+                return
+        self.singles.append(nid)
+
+    def where(self, field, placeholder):
+        ranges  = self.ranges
+        singles = self.singles
+
+        if not singles and not ranges: return "(1=0)", []
+
+        if ranges:
+            between = '%s BETWEEN %s AND %s' % (
+                field, placeholder, placeholder)
+            stmnt = [between] * len(ranges)
+        else:
+            stmnt = []
+        if singles:
+            stmnt.append('%s in (%s)' % (
+                field, ','.join([placeholder]*len(singles))))
+
+        return '(%s)' % ' OR '.join(stmnt), sum(ranges, []) + singles
+
+    def __str__(self):
+        return "ranges: %r / singles: %r" % (self.ranges, self.singles)
+
+
  class Database(FileStorage, hyperdb.Database, roundupdb.Database):
      """ Wrapper around an SQL database that presents a hyperdb interface.
  
  class Database(FileStorage, hyperdb.Database, roundupdb.Database):
      """ Wrapper around an SQL database that presents a hyperdb interface.
  
@@ -123,8 +182,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          # keep a cache of the N most recently retrieved rows of any kind
          # (classname, nodeid) = row
          self.cache_size = config.RDBMS_CACHE_SIZE
          # keep a cache of the N most recently retrieved rows of any kind
          # (classname, nodeid) = row
          self.cache_size = config.RDBMS_CACHE_SIZE
-        self.cache = {}
-        self.cache_lru = []
+        self.clearCache()
          self.stats = {'cache_hits': 0, 'cache_misses': 0, 'get_items': 0,
              'filtering': 0}
  
          self.stats = {'cache_hits': 0, 'cache_misses': 0, 'get_items': 0,
              'filtering': 0}
  
@@ -151,14 +209,16 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          """
          raise NotImplemented
  
          """
          raise NotImplemented
  
-    def sql(self, sql, args=None):
+    def sql(self, sql, args=None, cursor=None):
          """ Execute the sql with the optional args.
          """
          self.log_debug('SQL %r %r'%(sql, args))
          """ Execute the sql with the optional args.
          """
          self.log_debug('SQL %r %r'%(sql, args))
+        if not cursor:
+            cursor = self.cursor
          if args:
          if args:
-            self.cursor.execute(sql, args)
+            cursor.execute(sql, args)
          else:
          else:
-            self.cursor.execute(sql)
+            cursor.execute(sql)
  
      def sql_fetchone(self):
          """ Fetch a single row. If there's nothing to fetch, return None.
  
      def sql_fetchone(self):
          """ Fetch a single row. If there's nothing to fetch, return None.
@@ -170,6 +230,14 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          """
          return self.cursor.fetchall()
  
          """
          return self.cursor.fetchall()
  
+    def sql_fetchiter(self):
+        """ Fetch all row as a generator
+        """
+        while True:
+            row = self.cursor.fetchone()
+            if not row: break
+            yield row
+
      def sql_stringquote(self, value):
          """ Quote the string so it's safe to put in the 'sql quotes'
          """
      def sql_stringquote(self, value):
          """ Quote the string so it's safe to put in the 'sql quotes'
          """
@@ -445,7 +513,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
              # no changes
              return 0
  
              # no changes
              return 0
  
-        logger = logging.getLogger('hyperdb')
+        logger = logging.getLogger('roundup.hyperdb')
          logger.info('update_class %s'%spec.classname)
  
          logger.debug('old_spec %r'%(old_spec,))
          logger.info('update_class %s'%spec.classname)
  
          logger.debug('old_spec %r'%(old_spec,))
@@ -755,7 +823,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          Note: I don't commit here, which is different behaviour to the
                "nuke from orbit" behaviour in the dbs.
          """
          Note: I don't commit here, which is different behaviour to the
                "nuke from orbit" behaviour in the dbs.
          """
-        logging.getLogger('hyperdb').info('clear')
+        logging.getLogger('roundup.hyperdb').info('clear')
          for cn in self.classes:
              sql = 'delete from _%s'%cn
              self.sql(sql)
          for cn in self.classes:
              sql = 'delete from _%s'%cn
              self.sql(sql)
@@ -788,6 +856,21 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
  
          raise ValueError('%r is not a hyperdb property class' % propklass)
  
  
          raise ValueError('%r is not a hyperdb property class' % propklass)
  
+    def _cache_del(self, key):
+        del self.cache[key]
+        self.cache_lru.remove(key)
+
+    def _cache_refresh(self, key):
+        self.cache_lru.remove(key)
+        self.cache_lru.insert(0, key)
+
+    def _cache_save(self, key, node):
+        self.cache[key] = node
+        # update the LRU
+        self.cache_lru.insert(0, key)
+        if len(self.cache_lru) > self.cache_size:
+            del self.cache[self.cache_lru.pop()]
+
      def addnode(self, classname, nodeid, node):
          """ Add the specified node to its class's db.
          """
      def addnode(self, classname, nodeid, node):
          """ Add the specified node to its class's db.
          """
@@ -821,8 +904,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          # clear this node out of the cache if it's in there
          key = (classname, nodeid)
          if key in self.cache:
          # clear this node out of the cache if it's in there
          key = (classname, nodeid)
          if key in self.cache:
-            del self.cache[key]
-            self.cache_lru.remove(key)
+            self._cache_del(key)
  
          # figure the values to insert
          vals = []
  
          # figure the values to insert
          vals = []
@@ -871,8 +953,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          # clear this node out of the cache if it's in there
          key = (classname, nodeid)
          if key in self.cache:
          # clear this node out of the cache if it's in there
          key = (classname, nodeid)
          if key in self.cache:
-            del self.cache[key]
-            self.cache_lru.remove(key)
+            self._cache_del(key)
  
          cl = self.classes[classname]
          props = cl.getprops()
  
          cl = self.classes[classname]
          props = cl.getprops()
@@ -966,7 +1047,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
  
      sql_to_hyperdb_value = {
          hyperdb.String : str,
  
      sql_to_hyperdb_value = {
          hyperdb.String : str,
-        hyperdb.Date   : lambda x:date.Date(str(x).replace(' ', '.')),
+        hyperdb.Date   : date_to_hyperdb_value,
  #        hyperdb.Link   : int,      # XXX numeric ids
          hyperdb.Link   : str,
          hyperdb.Interval  : date.Interval,
  #        hyperdb.Link   : int,      # XXX numeric ids
          hyperdb.Link   : str,
          hyperdb.Interval  : date.Interval,
@@ -995,8 +1076,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          key = (classname, nodeid)
          if key in self.cache:
              # push us back to the top of the LRU
          key = (classname, nodeid)
          if key in self.cache:
              # push us back to the top of the LRU
-            self.cache_lru.remove(key)
-            self.cache_lru.insert(0, key)
+            self._cache_refresh(key)
              if __debug__:
                  self.stats['cache_hits'] += 1
              # return the cached information
              if __debug__:
                  self.stats['cache_hits'] += 1
              # return the cached information
@@ -1033,26 +1113,9 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
                  value = self.to_hyperdb_value(props[name].__class__)(value)
              node[name] = value
  
                  value = self.to_hyperdb_value(props[name].__class__)(value)
              node[name] = value
  
-
-        # now the multilinks
-        for col in mls:
-            # get the link ids
-            sql = 'select linkid from %s_%s where nodeid=%s'%(classname, col,
-                self.arg)
-            self.sql(sql, (nodeid,))
-            # extract the first column from the result
-            # XXX numeric ids
-            items = [int(x[0]) for x in self.cursor.fetchall()]
-            items.sort ()
-            node[col] = [str(x) for x in items]
-
          # save off in the cache
          key = (classname, nodeid)
          # save off in the cache
          key = (classname, nodeid)
-        self.cache[key] = node
-        # update the LRU
-        self.cache_lru.insert(0, key)
-        if len(self.cache_lru) > self.cache_size:
-            del self.cache[self.cache_lru.pop()]
+        self._cache_save(key, node)
  
          if __debug__:
              self.stats['get_items'] += (time.time() - start_t)
  
          if __debug__:
              self.stats['get_items'] += (time.time() - start_t)
@@ -1063,7 +1126,8 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          """Remove a node from the database. Called exclusively by the
             destroy() method on Class.
          """
          """Remove a node from the database. Called exclusively by the
             destroy() method on Class.
          """
-        logging.getLogger('hyperdb').info('destroynode %s%s'%(classname, nodeid))
+        logging.getLogger('roundup.hyperdb').info('destroynode %s%s'%(
+            classname, nodeid))
  
          # make sure the node exists
          if not self.hasnode(classname, nodeid):
  
          # make sure the node exists
          if not self.hasnode(classname, nodeid):
@@ -1268,7 +1332,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
      def sql_commit(self, fail_ok=False):
          """ Actually commit to the database.
          """
      def sql_commit(self, fail_ok=False):
          """ Actually commit to the database.
          """
-        logging.getLogger('hyperdb').info('commit')
+        logging.getLogger('roundup.hyperdb').info('commit')
  
          self.conn.commit()
  
  
          self.conn.commit()
  
@@ -1300,6 +1364,11 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          # clear out the transactions
          self.transactions = []
  
          # clear out the transactions
          self.transactions = []
  
+        # clear the cache: Don't carry over cached values from one
+        # transaction to the next (there may be other changes from other
+        # transactions)
+        self.clearCache()
+
      def sql_rollback(self):
          self.conn.rollback()
  
      def sql_rollback(self):
          self.conn.rollback()
  
@@ -1309,7 +1378,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          Undo all the changes made since the database was opened or the last
          commit() or rollback() was performed.
          """
          Undo all the changes made since the database was opened or the last
          commit() or rollback() was performed.
          """
-        logging.getLogger('hyperdb').info('rollback')
+        logging.getLogger('roundup.hyperdb').info('rollback')
  
          self.sql_rollback()
  
  
          self.sql_rollback()
  
@@ -1324,7 +1393,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          self.clearCache()
  
      def sql_close(self):
          self.clearCache()
  
      def sql_close(self):
-        logging.getLogger('hyperdb').info('close')
+        logging.getLogger('roundup.hyperdb').info('close')
          self.conn.close()
  
      def close(self):
          self.conn.close()
  
      def close(self):
@@ -1559,9 +1628,20 @@ class Class(hyperdb.Class):
              else:
                  return self.db.getuid()
  
              else:
                  return self.db.getuid()
  
-        # get the property (raises KeyErorr if invalid)
+        # get the property (raises KeyError if invalid)
          prop = self.properties[propname]
  
          prop = self.properties[propname]
  
+        # lazy evaluation of Multilink
+        if propname not in d and isinstance(prop, Multilink):
+            sql = 'select linkid from %s_%s where nodeid=%s'%(self.classname,
+                propname, self.db.arg)
+            self.db.sql(sql, (nodeid,))
+            # extract the first column from the result
+            # XXX numeric ids
+            items = [int(x[0]) for x in self.db.cursor.fetchall()]
+            items.sort ()
+            d[propname] = [str(x) for x in items]
+
          # handle there being no value in the table for the property
          if propname not in d or d[propname] is None:
              if default is _marker:
          # handle there being no value in the table for the property
          if propname not in d or d[propname] is None:
              if default is _marker:
@@ -2128,32 +2208,106 @@ class Class(hyperdb.Class):
      # The format parameter is replaced with the attribute.
      order_by_null_values = None
  
      # The format parameter is replaced with the attribute.
      order_by_null_values = None
  
-    def filter(self, search_matches, filterspec, sort=[], group=[]):
-        """Return a list of the ids of the active nodes in this class that
-        match the 'filter' spec, sorted by the group spec and then the
-        sort spec
-
-        "filterspec" is {propname: value(s)}
-
-        "sort" and "group" are [(dir, prop), ...] where dir is '+', '-'
-        or None and prop is a prop name or None. Note that for
-        backward-compatibility reasons a single (dir, prop) tuple is
-        also allowed.
+    def supports_subselects(self): 
+        '''Assuming DBs can do subselects, overwrite if they cannot.
+       '''
+        return True
+
+    def _filter_multilink_expression_fallback(
+        self, classname, multilink_table, expr):
+        '''This is a fallback for database that do not support
+           subselects.'''
+
+        is_valid = expr.evaluate
+
+        last_id, kws = None, []
+
+        ids = IdListOptimizer()
+        append = ids.append
+
+        # This join and the evaluation in program space
+        # can be expensive for larger databases!
+        # TODO: Find a faster way to collect the data needed
+        # to evalute the expression.
+        # Moving the expression evaluation into the database
+        # would be nice but this tricky: Think about the cases
+        # where the multilink table does not have join values
+        # needed in evaluation.
+
+        stmnt = "SELECT c.id, m.linkid FROM _%s c " \
+                "LEFT OUTER JOIN %s m " \
+                "ON c.id = m.nodeid ORDER BY c.id" % (
+                    classname, multilink_table)
+        self.db.sql(stmnt)
+
+        # collect all multilink items for a class item
+        for nid, kw in self.db.sql_fetchiter():
+            if nid != last_id:
+                if last_id is None:
+                    last_id = nid
+                else:
+                    # we have all multilink items -> evaluate!
+                    if is_valid(kws): append(last_id)
+                    last_id, kws = nid, []
+            if kw is not None:
+                kws.append(kw)
  
  
-        "search_matches" is a container type or None
+        if last_id is not None and is_valid(kws): 
+            append(last_id)
  
  
-        The filter must match all properties specificed. If the property
-        value to match is a list:
+        # we have ids of the classname table
+        return ids.where("_%s.id" % classname, self.db.arg)
  
  
-        1. String properties must match all elements in the list, and
-        2. Other properties must match any of the elements in the list.
+    def _filter_multilink_expression(self, classname, multilink_table, v):
+        """ Filters out elements of the classname table that do not
+            match the given expression.
+            Returns tuple of 'WHERE' introns for the overall filter.
+        """
+        try:
+            opcodes = [int(x) for x in v]
+            if min(opcodes) >= -1: raise ValueError()
+
+            expr = compile_expression(opcodes)
+
+            if not self.supports_subselects():
+                # We heavily rely on subselects. If there is
+                # no decent support fall back to slower variant.
+                return self._filter_multilink_expression_fallback(
+                    classname, multilink_table, expr)
+
+            atom = \
+                "%s IN(SELECT linkid FROM %s WHERE nodeid=a.id)" % (
+                self.db.arg,
+                multilink_table)
+
+            intron = \
+                "_%(classname)s.id in (SELECT id " \
+                "FROM _%(classname)s AS a WHERE %(condition)s) " % {
+                    'classname' : classname,
+                    'condition' : expr.generate(lambda n: atom) }
+
+            values = []
+            def collect_values(n): values.append(n.x)
+            expr.visit(collect_values)
+
+            return intron, values
+        except:
+            # original behavior
+            where = "%s.linkid in (%s)" % (
+                multilink_table, ','.join([self.db.arg] * len(v)))
+            return where, v, True # True to indicate original
+
+    def _filter_sql (self, search_matches, filterspec, srt=[], grp=[], retr=0):
+        """ Compute the proptree and the SQL/ARGS for a filter.
+        For argument description see filter below.
+        We return a 3-tuple, the proptree, the sql and the sql-args
+        or None if no SQL is necessary.
+        The flag retr serves to retrieve *all* non-Multilink properties
+        (for filling the cache during a filter_iter)
          """
          # we can't match anything if search_matches is empty
          if not search_matches and search_matches is not None:
          """
          # we can't match anything if search_matches is empty
          if not search_matches and search_matches is not None:
-            return []
-
-        if __debug__:
-            start_t = time.time()
+            return None
  
          icn = self.classname
  
  
          icn = self.classname
  
@@ -2166,8 +2320,8 @@ class Class(hyperdb.Class):
  
          # figure the WHERE clause from the filterspec
          mlfilt = 0      # are we joining with Multilink tables?
  
          # figure the WHERE clause from the filterspec
          mlfilt = 0      # are we joining with Multilink tables?
-        sortattr = self._sortattr (group = group, sort = sort)
-        proptree = self._proptree(filterspec, sortattr)
+        sortattr = self._sortattr (group = grp, sort = srt)
+        proptree = self._proptree(filterspec, sortattr, retr)
          mlseen = 0
          for pt in reversed(proptree.sortattr):
              p = pt
          mlseen = 0
          for pt in reversed(proptree.sortattr):
              p = pt
@@ -2182,12 +2336,11 @@ class Class(hyperdb.Class):
                  pt.attr_sort_done = pt.tree_sort_done = True
          proptree.compute_sort_done()
  
                  pt.attr_sort_done = pt.tree_sort_done = True
          proptree.compute_sort_done()
  
-        ordercols = []
-        auxcols = {}
+        cols = ['_%s.id'%icn]
          mlsort = []
          rhsnum = 0
          for p in proptree:
          mlsort = []
          rhsnum = 0
          for p in proptree:
-            oc = None
+            rc = ac = oc = None
              cn = p.classname
              ln = p.uniqname
              pln = p.parent.uniqname
              cn = p.classname
              ln = p.uniqname
              pln = p.parent.uniqname
@@ -2195,10 +2348,13 @@ class Class(hyperdb.Class):
              k = p.name
              v = p.val
              propclass = p.propclass
              k = p.name
              v = p.val
              propclass = p.propclass
-            if p.sort_type > 0:
-                oc = ac = '_%s._%s'%(pln, k)
+            if p.parent == proptree and p.name == 'id' \
+                and 'retrieve' in p.need_for:
+                p.sql_idx = 0
+            if 'sort' in p.need_for or 'retrieve' in p.need_for:
+                rc = oc = ac = '_%s._%s'%(pln, k)
              if isinstance(propclass, Multilink):
              if isinstance(propclass, Multilink):
-                if p.sort_type < 2:
+                if 'search' in p.need_for:
                      mlfilt = 1
                      tn = '%s_%s'%(pcn, k)
                      if v in ('-1', ['-1'], []):
                      mlfilt = 1
                      tn = '%s_%s'%(pcn, k)
                      if v in ('-1', ['-1'], []):
@@ -2207,33 +2363,47 @@ class Class(hyperdb.Class):
                          where.append(self._subselect(pcn, tn))
                      else:
                          frum.append(tn)
                          where.append(self._subselect(pcn, tn))
                      else:
                          frum.append(tn)
-                        where.append('_%s.id=%s.nodeid'%(pln,tn))
+                        gen_join = True
+
+                        if p.has_values and isinstance(v, type([])):
+                            result = self._filter_multilink_expression(pln, tn, v)
+                            # XXX: We dont need an id join if we used the filter
+                            gen_join = len(result) == 3
+
+                        if gen_join:
+                            where.append('_%s.id=%s.nodeid'%(pln,tn))
+
                          if p.children:
                              frum.append('_%s as _%s' % (cn, ln))
                              where.append('%s.linkid=_%s.id'%(tn, ln))
                          if p.children:
                              frum.append('_%s as _%s' % (cn, ln))
                              where.append('%s.linkid=_%s.id'%(tn, ln))
+
                          if p.has_values:
                              if isinstance(v, type([])):
                          if p.has_values:
                              if isinstance(v, type([])):
-                                s = ','.join([a for x in v])
-                                where.append('%s.linkid in (%s)'%(tn, s))
-                                args = args + v
+                                where.append(result[0])
+                                args += result[1]
                              else:
                                  where.append('%s.linkid=%s'%(tn, a))
                                  args.append(v)
                              else:
                                  where.append('%s.linkid=%s'%(tn, a))
                                  args.append(v)
-                if p.sort_type > 0:
+                if 'sort' in p.need_for:
                      assert not p.attr_sort_done and not p.sort_ids_needed
              elif k == 'id':
                      assert not p.attr_sort_done and not p.sort_ids_needed
              elif k == 'id':
-                if p.sort_type < 2:
+                if 'search' in p.need_for:
                      if isinstance(v, type([])):
                      if isinstance(v, type([])):
+                        # If there are no permitted values, then the
+                        # where clause will always be false, and we
+                        # can optimize the query away.
+                        if not v:
+                            return []
                          s = ','.join([a for x in v])
                          where.append('_%s.%s in (%s)'%(pln, k, s))
                          args = args + v
                      else:
                          where.append('_%s.%s=%s'%(pln, k, a))
                          args.append(v)
                          s = ','.join([a for x in v])
                          where.append('_%s.%s in (%s)'%(pln, k, s))
                          args = args + v
                      else:
                          where.append('_%s.%s=%s'%(pln, k, a))
                          args.append(v)
-                if p.sort_type > 0:
-                    oc = ac = '_%s.id'%pln
+                if 'sort' in p.need_for or 'retrieve' in p.need_for:
+                    rc = oc = ac = '_%s.id'%pln
              elif isinstance(propclass, String):
              elif isinstance(propclass, String):
-                if p.sort_type < 2:
+                if 'search' in p.need_for:
                      if not isinstance(v, type([])):
                          v = [v]
  
                      if not isinstance(v, type([])):
                          v = [v]
  
@@ -2247,12 +2417,12 @@ class Class(hyperdb.Class):
                          +' and '.join(["_%s._%s LIKE '%s'"%(pln, k, s) for s in v])
                          +')')
                      # note: args are embedded in the query string now
                          +' and '.join(["_%s._%s LIKE '%s'"%(pln, k, s) for s in v])
                          +')')
                      # note: args are embedded in the query string now
-                if p.sort_type > 0:
+                if 'sort' in p.need_for:
                      oc = ac = 'lower(_%s._%s)'%(pln, k)
              elif isinstance(propclass, Link):
                      oc = ac = 'lower(_%s._%s)'%(pln, k)
              elif isinstance(propclass, Link):
-                if p.sort_type < 2:
+                if 'search' in p.need_for:
                      if p.children:
                      if p.children:
-                        if p.sort_type == 0:
+                        if 'sort' not in p.need_for:
                              frum.append('_%s as _%s' % (cn, ln))
                          where.append('_%s._%s=_%s.id'%(pln, k, ln))
                      if p.has_values:
                              frum.append('_%s as _%s' % (cn, ln))
                          where.append('_%s._%s=_%s.id'%(pln, k, ln))
                      if p.has_values:
@@ -2280,16 +2450,18 @@ class Class(hyperdb.Class):
                              else:
                                  where.append('_%s._%s=%s'%(pln, k, a))
                                  args.append(v)
                              else:
                                  where.append('_%s._%s=%s'%(pln, k, a))
                                  args.append(v)
-                if p.sort_type > 0:
+                if 'sort' in p.need_for:
                      lp = p.cls.labelprop()
                      oc = ac = '_%s._%s'%(pln, k)
                      if lp != 'id':
                      lp = p.cls.labelprop()
                      oc = ac = '_%s._%s'%(pln, k)
                      if lp != 'id':
-                        if p.tree_sort_done and p.sort_type > 0:
+                        if p.tree_sort_done:
                              loj.append(
                                  'LEFT OUTER JOIN _%s as _%s on _%s._%s=_%s.id'%(
                                  cn, ln, pln, k, ln))
                          oc = '_%s._%s'%(ln, lp)
                              loj.append(
                                  'LEFT OUTER JOIN _%s as _%s on _%s._%s=_%s.id'%(
                                  cn, ln, pln, k, ln))
                          oc = '_%s._%s'%(ln, lp)
-            elif isinstance(propclass, Date) and p.sort_type < 2:
+                if 'retrieve' in p.need_for:
+                    rc = '_%s._%s'%(pln, k)
+            elif isinstance(propclass, Date) and 'search' in p.need_for:
                  dc = self.db.to_sql_value(hyperdb.Date)
                  if isinstance(v, type([])):
                      s = ','.join([a for x in v])
                  dc = self.db.to_sql_value(hyperdb.Date)
                  if isinstance(v, type([])):
                      s = ','.join([a for x in v])
@@ -2310,7 +2482,7 @@ class Class(hyperdb.Class):
                          pass
              elif isinstance(propclass, Interval):
                  # filter/sort using the __<prop>_int__ column
                          pass
              elif isinstance(propclass, Interval):
                  # filter/sort using the __<prop>_int__ column
-                if p.sort_type < 2:
+                if 'search' in p.need_for:
                      if isinstance(v, type([])):
                          s = ','.join([a for x in v])
                          where.append('_%s.__%s_int__ in (%s)'%(pln, k, s))
                      if isinstance(v, type([])):
                          s = ','.join([a for x in v])
                          where.append('_%s.__%s_int__ in (%s)'%(pln, k, s))
@@ -2328,9 +2500,11 @@ class Class(hyperdb.Class):
                          except ValueError:
                              # If range creation fails - ignore search parameter
                              pass
                          except ValueError:
                              # If range creation fails - ignore search parameter
                              pass
-                if p.sort_type > 0:
+                if 'sort' in p.need_for:
                      oc = ac = '_%s.__%s_int__'%(pln,k)
                      oc = ac = '_%s.__%s_int__'%(pln,k)
-            elif isinstance(propclass, Boolean) and p.sort_type < 2:
+                if 'retrieve' in p.need_for:
+                    rc = '_%s._%s'%(pln,k)
+            elif isinstance(propclass, Boolean) and 'search' in p.need_for:
                  if type(v) == type(""):
                      v = v.split(',')
                  if type(v) != type([]):
                  if type(v) == type(""):
                      v = v.split(',')
                  if type(v) != type([]):
@@ -2348,7 +2522,7 @@ class Class(hyperdb.Class):
                      s = ','.join([a for x in v])
                      where.append('_%s._%s in (%s)'%(pln, k, s))
                      args = args + bv
                      s = ','.join([a for x in v])
                      where.append('_%s._%s in (%s)'%(pln, k, s))
                      args = args + bv
-            elif p.sort_type < 2:
+            elif 'search' in p.need_for:
                  if isinstance(v, type([])):
                      s = ','.join([a for x in v])
                      where.append('_%s._%s in (%s)'%(pln, k, s))
                  if isinstance(v, type([])):
                      s = ','.join([a for x in v])
                      where.append('_%s._%s in (%s)'%(pln, k, s))
@@ -2358,18 +2532,28 @@ class Class(hyperdb.Class):
                      args.append(v)
              if oc:
                  if p.sort_ids_needed:
                      args.append(v)
              if oc:
                  if p.sort_ids_needed:
-                    auxcols[ac] = p
+                    if rc == ac:
+                        p.sql_idx = len(cols)
+                    p.auxcol = len(cols)
+                    cols.append(ac)
                  if p.tree_sort_done and p.sort_direction:
                  if p.tree_sort_done and p.sort_direction:
-                    # Don't select top-level id twice
-                    if p.name != 'id' or p.parent != proptree:
-                        ordercols.append(oc)
+                    # Don't select top-level id or multilink twice
+                    if (not p.sort_ids_needed or ac != oc) and (p.name != 'id'
+                        or p.parent != proptree):
+                        if rc == oc:
+                            p.sql_idx = len(cols)
+                        cols.append(oc)
                      desc = ['', ' desc'][p.sort_direction == '-']
                      # Some SQL dbs sort NULL values last -- we want them first.
                      if (self.order_by_null_values and p.name != 'id'):
                          nv = self.order_by_null_values % oc
                      desc = ['', ' desc'][p.sort_direction == '-']
                      # Some SQL dbs sort NULL values last -- we want them first.
                      if (self.order_by_null_values and p.name != 'id'):
                          nv = self.order_by_null_values % oc
-                        ordercols.append(nv)
+                        cols.append(nv)
                          p.orderby.append(nv + desc)
                      p.orderby.append(oc + desc)
                          p.orderby.append(nv + desc)
                      p.orderby.append(oc + desc)
+            if 'retrieve' in p.need_for and p.sql_idx is None:
+                assert(rc)
+                p.sql_idx = len(cols)
+                cols.append (rc)
  
          props = self.getprops()
  
  
          props = self.getprops()
  
@@ -2392,11 +2576,8 @@ class Class(hyperdb.Class):
          if mlfilt:
              # we're joining tables on the id, so we will get dupes if we
              # don't distinct()
          if mlfilt:
              # we're joining tables on the id, so we will get dupes if we
              # don't distinct()
-            cols = ['distinct(_%s.id)'%icn]
-        else:
-            cols = ['_%s.id'%icn]
-        if ordercols:
-            cols = cols + ordercols
+            cols[0] = 'distinct(_%s.id)'%icn
+
          order = []
          # keep correct sequence of order attributes.
          for sa in proptree.sortattr:
          order = []
          # keep correct sequence of order attributes.
          for sa in proptree.sortattr:
@@ -2407,21 +2588,50 @@ class Class(hyperdb.Class):
              order = ' order by %s'%(','.join(order))
          else:
              order = ''
              order = ' order by %s'%(','.join(order))
          else:
              order = ''
-        for o, p in auxcols.iteritems ():
-            cols.append (o)
-            p.auxcol = len (cols) - 1
  
          cols = ','.join(cols)
          loj = ' '.join(loj)
          sql = 'select %s from %s %s %s%s'%(cols, frum, loj, where, order)
          args = tuple(args)
          __traceback_info__ = (sql, args)
  
          cols = ','.join(cols)
          loj = ' '.join(loj)
          sql = 'select %s from %s %s %s%s'%(cols, frum, loj, where, order)
          args = tuple(args)
          __traceback_info__ = (sql, args)
+        return proptree, sql, args
+
+    def filter(self, search_matches, filterspec, sort=[], group=[]):
+        """Return a list of the ids of the active nodes in this class that
+        match the 'filter' spec, sorted by the group spec and then the
+        sort spec
+
+        "filterspec" is {propname: value(s)}
+
+        "sort" and "group" are [(dir, prop), ...] where dir is '+', '-'
+        or None and prop is a prop name or None. Note that for
+        backward-compatibility reasons a single (dir, prop) tuple is
+        also allowed.
+
+        "search_matches" is a container type or None
+
+        The filter must match all properties specificed. If the property
+        value to match is a list:
+
+        1. String properties must match all elements in the list, and
+        2. Other properties must match any of the elements in the list.
+        """
+        if __debug__:
+            start_t = time.time()
+
+        sq = self._filter_sql (search_matches, filterspec, sort, group)
+        # nothing to match?
+        if sq is None:
+            return []
+        proptree, sql, args = sq
+
          self.db.sql(sql, args)
          l = self.db.sql_fetchall()
  
          # Compute values needed for sorting in proptree.sort
          self.db.sql(sql, args)
          l = self.db.sql_fetchall()
  
          # Compute values needed for sorting in proptree.sort
-        for p in auxcols.itervalues():
-            p.sort_ids = p.sort_result = [row[p.auxcol] for row in l]
+        for p in proptree:
+            if hasattr(p, 'auxcol'):
+                p.sort_ids = p.sort_result = [row[p.auxcol] for row in l]
          # return the IDs (the first column)
          # XXX numeric ids
          l = [str(row[0]) for row in l]
          # return the IDs (the first column)
          # XXX numeric ids
          l = [str(row[0]) for row in l]
@@ -2431,6 +2641,53 @@ class Class(hyperdb.Class):
              self.db.stats['filtering'] += (time.time() - start_t)
          return l
  
              self.db.stats['filtering'] += (time.time() - start_t)
          return l
  
+    def filter_iter(self, search_matches, filterspec, sort=[], group=[]):
+        """Iterator similar to filter above with same args.
+        Limitation: We don't sort on multilinks.
+        This uses an optimisation: We put all nodes that are in the
+        current row into the node cache. Then we return the node id.
+        That way a fetch of a node won't create another sql-fetch (with
+        a join) from the database because the nodes are already in the
+        cache. We're using our own temporary cursor.
+        """
+        sq = self._filter_sql(search_matches, filterspec, sort, group, retr=1)
+        # nothing to match?
+        if sq is None:
+            return
+        proptree, sql, args = sq
+        cursor = self.db.conn.cursor()
+        self.db.sql(sql, args, cursor)
+        classes = {}
+        for p in proptree:
+            if 'retrieve' in p.need_for:
+                cn = p.parent.classname
+                ptid = p.parent.id # not the nodeid!
+                key = (cn, ptid)
+                if key not in classes:
+                    classes[key] = {}
+                name = p.name
+                assert (name)
+                classes[key][name] = p
+                p.to_hyperdb = self.db.to_hyperdb_value(p.propclass.__class__)
+        while True:
+            row = cursor.fetchone()
+            if not row: break
+            # populate cache with current items
+            for (classname, ptid), pt in classes.iteritems():
+                nodeid = str(row[pt['id'].sql_idx])
+                key = (classname, nodeid)
+                if key in self.db.cache:
+                    self.db._cache_refresh(key)
+                    continue
+                node = {}
+                for propname, p in pt.iteritems():
+                    value = row[p.sql_idx]
+                    if value is not None:
+                        value = p.to_hyperdb(value)
+                    node[propname] = value
+                self.db._cache_save(key, node)
+            yield str(row[0])
+
      def filter_sql(self, sql):
          """Return a list of the ids of the items in this class that match
          the SQL provided. The SQL is a complete "select" statement.
      def filter_sql(self, sql):
          """Return a list of the ids of the items in this class that match
          the SQL provided. The SQL is a complete "select" statement.
@@ -2640,37 +2897,6 @@ class Class(hyperdb.Class):
                  r.append(list(map(repr, l)))
          return r
  
                  r.append(list(map(repr, l)))
          return r
  
-    def import_journals(self, entries):
-        """Import a class's journal.
-
-        Uses setjournal() to set the journal for each item."""
-        properties = self.getprops()
-        d = {}
-        for l in entries:
-            nodeid, jdate, user, action, params = map(eval, l)
-            r = d.setdefault(nodeid, [])
-            if action == 'set':
-                for propname, value in params.iteritems():
-                    prop = properties[propname]
-                    if value is None:
-                        pass
-                    elif isinstance(prop, Date):
-                        value = date.Date(value)
-                    elif isinstance(prop, Interval):
-                        value = date.Interval(value)
-                    elif isinstance(prop, Password):
-                        pwd = password.Password()
-                        pwd.unpack(value)
-                        value = pwd
-                    params[propname] = value
-            elif action == 'create' and params:
-                # old tracker with data stored in the create!
-                params = {}
-            r.append((nodeid, date.Date(jdate), user, action, params))
-
-        for nodeid, l in d.iteritems():
-            self.db.setjournal(self.classname, nodeid, l)
-
  class FileClass(hyperdb.FileClass, Class):
      """This class defines a large chunk of data. To support this, it has a
         mandatory String property "content" which is typically saved off
  class FileClass(hyperdb.FileClass, Class):
      """This class defines a large chunk of data. To support this, it has a
         mandatory String property "content" which is typically saved off
@@ -2729,7 +2955,7 @@ class FileClass(hyperdb.FileClass, Class):
          if propname == 'content':
              try:
                  return self.db.getfile(self.classname, nodeid, None)
          if propname == 'content':
              try:
                  return self.db.getfile(self.classname, nodeid, None)
-            except IOError, (strerror):
+            except IOError, strerror:
                  # BUG: by catching this we donot see an error in the log.
                  return 'ERROR reading file: %s%s\n%s\n%s'%(
                          self.classname, nodeid, poss_msg, strerror)
                  # BUG: by catching this we donot see an error in the log.
                  return 'ERROR reading file: %s%s\n%s\n%s'%(
                          self.classname, nodeid, poss_msg, strerror)