1 #$Id: indexer_common.py,v 1.11 2008-09-11 19:41:07 schlatterbeck Exp $
2 import re
3 # Python 2.3 ... 2.6 compatibility:
4 from roundup.anypy.sets_ import set
6 from roundup import hyperdb
8 STOPWORDS = [
9 "A", "AND", "ARE", "AS", "AT", "BE", "BUT", "BY",
10 "FOR", "IF", "IN", "INTO", "IS", "IT",
11 "NO", "NOT", "OF", "ON", "OR", "SUCH",
12 "THAT", "THE", "THEIR", "THEN", "THERE", "THESE",
13 "THEY", "THIS", "TO", "WAS", "WILL", "WITH"
14 ]
16 def _isLink(propclass):
17 return (isinstance(propclass, hyperdb.Link) or
18 isinstance(propclass, hyperdb.Multilink))
20 class Indexer:
21 def __init__(self, db):
22 self.stopwords = set(STOPWORDS)
23 for word in db.config[('main', 'indexer_stopwords')]:
24 self.stopwords.add(word)
25 # Do not index anything longer than 25 characters since that'll be
26 # gibberish (encoded text or somesuch) or shorter than 2 characters
27 self.minlength = 2
28 self.maxlength = 25
30 def is_stopword(self, word):
31 return word in self.stopwords
33 def getHits(self, search_terms, klass):
34 return self.find(search_terms)
36 def search(self, search_terms, klass, ignore={}):
37 """Display search results looking for [search, terms] associated
38 with the hyperdb Class "klass". Ignore hits on {class: property}.
39 """
40 # do the index lookup
41 hits = self.getHits(search_terms, klass)
42 if not hits:
43 return {}
45 designator_propname = {}
46 for nm, propclass in klass.getprops().iteritems():
47 if _isLink(propclass):
48 designator_propname.setdefault(propclass.classname,
49 []).append(nm)
51 # build a dictionary of nodes and their associated messages
52 # and files
53 nodeids = {} # this is the answer
54 propspec = {} # used to do the klass.find
55 for l in designator_propname.itervalues():
56 for propname in l:
57 propspec[propname] = {} # used as a set (value doesn't matter)
59 # don't unpack hits entries as sqlite3's Row can't be unpacked :(
60 for entry in hits:
61 # skip this result if we don't care about this class/property
62 classname = entry[0]
63 property = entry[2]
64 if (classname, property) in ignore:
65 continue
67 # if it's a property on klass, it's easy
68 # (make sure the nodeid is str() not unicode() as returned by some
69 # backends as that can cause problems down the track)
70 nodeid = str(entry[1])
71 if classname == klass.classname:
72 if nodeid not in nodeids:
73 nodeids[nodeid] = {}
74 continue
76 # make sure the class is a linked one, otherwise ignore
77 if classname not in designator_propname:
78 continue
80 # it's a linked class - set up to do the klass.find
81 for linkprop in designator_propname[classname]:
82 propspec[linkprop][nodeid] = 1
84 # retain only the meaningful entries
85 for propname, idset in list(propspec.items()):
86 if not idset:
87 del propspec[propname]
89 # klass.find tells me the klass nodeids the linked nodes relate to
90 propdefs = klass.getprops()
91 for resid in klass.find(**propspec):
92 resid = str(resid)
93 if resid in nodeids:
94 continue # we ignore duplicate resids
95 nodeids[resid] = {}
96 node_dict = nodeids[resid]
97 # now figure out where it came from
98 for linkprop in propspec:
99 v = klass.get(resid, linkprop)
100 # the link might be a Link so deal with a single result or None
101 if isinstance(propdefs[linkprop], hyperdb.Link):
102 if v is None: continue
103 v = [v]
104 for nodeid in v:
105 if nodeid in propspec[linkprop]:
106 # OK, this node[propname] has a winner
107 if linkprop not in node_dict:
108 node_dict[linkprop] = [nodeid]
109 else:
110 node_dict[linkprop].append(nodeid)
111 return nodeids