don't have mailgw tests close the db - they're not testing persistence

[roundup.git] / test / test_indexer.py
diff --git a/test/test_indexer.py b/test/test_indexer.py

index 4a3369e7fa4c9f154d1d8167f816370ea3ff618d..5d8a395efa2d774e0ad710c2397c50afda1972b6 100644 (file)
--- a/test/test_indexer.py
+++ b/test/test_indexer.py
@@ -48,14 +48,14 @@ class IndexerTest(unittest.TestCase):
          self.dex.load_index()
  
      def assertSeqEqual(self, s1, s2):
-        # first argument is the db result we're testing, second is the
-        # desired result some db results don't have iterable rows, so we
-        # have to work around that
+        # First argument is the db result we're testing, second is the
+        # desired result. Some db results don't have iterable rows, so we
+        # have to work around that.
          # Also work around some dbs not returning items in the expected
-        # order. This would be *so* much easier with python2.4's sorted.
-        s1 = list(s1)
+        # order.
+        s1 = list([tuple([r[n] for n in range(len(r))]) for r in s1])
          s1.sort()
-        if [i for x,y in zip(s1, s2) for i,j in enumerate(y) if x[i] != j]:
+        if s1 != s2:
              self.fail('contents of %r != %r'%(s1, s2))
  
      def test_basics(self):
@@ -82,6 +82,56 @@ class IndexerTest(unittest.TestCase):
          self.dex.add_text(('test', '1', 'foo'), '')
          self.assertSeqEqual(self.dex.find(['world']), [('test', '2', 'foo')])
  
+    def test_stopwords(self):
+        """Test that we can find a text with a stopword in it."""
+        stopword = "with"
+        self.assert_(self.dex.is_stopword(stopword.upper()))
+        self.dex.add_text(('test', '1', 'bar'), '%s hello world' % stopword)
+        self.dex.add_text(('test', '2', 'bar'), 'blah a %s world' % stopword)
+        self.dex.add_text(('test', '3', 'bar'), 'blah Blub river')
+        self.dex.add_text(('test', '4', 'bar'), 'blah river %s' % stopword)
+        self.assertSeqEqual(self.dex.find(['with','world']),
+                                                    [('test', '1', 'bar'),
+                                                     ('test', '2', 'bar')])
+    def test_extremewords(self):
+        """Testing too short or too long words."""
+        short = "b"
+        long = "abcdefghijklmnopqrstuvwxyz"
+        self.dex.add_text(('test', '1', 'a'), '%s hello world' % short)
+        self.dex.add_text(('test', '2', 'a'), 'blah a %s world' % short)
+        self.dex.add_text(('test', '3', 'a'), 'blah Blub river')
+        self.dex.add_text(('test', '4', 'a'), 'blah river %s %s'
+                                                        % (short, long))
+        self.assertSeqEqual(self.dex.find([short,'world', long, short]),
+                                                    [('test', '1', 'a'),
+                                                     ('test', '2', 'a')])
+        self.assertSeqEqual(self.dex.find([long]),[])
+
+        # special test because some faulty code indexed length(word)>=2
+        # but only considered length(word)>=3 to be significant
+        self.dex.add_text(('test', '5', 'a'), 'blah py %s %s'
+                                                        % (short, long))
+        self.assertSeqEqual(self.dex.find(["py"]), [('test', '5', 'a')])
+
+    def test_casesensitity(self):
+        """Test if searches are case-in-sensitive."""
+        self.dex.add_text(('test', '1', 'a'), 'aaaa bbbb')
+        self.dex.add_text(('test', '2', 'a'), 'aAaa BBBB')
+        self.assertSeqEqual(self.dex.find(['aaaa']),
+                                                    [('test', '1', 'a'),
+                                                     ('test', '2', 'a')])
+        self.assertSeqEqual(self.dex.find(['BBBB']),
+                                                    [('test', '1', 'a'),
+                                                     ('test', '2', 'a')])
+
+    def test_wordsplitting(self):
+        """Test if word splitting works."""
+        self.dex.add_text(('test', '1', 'a'), 'aaaa-aaa bbbb*bbb')
+        self.dex.add_text(('test', '2', 'a'), 'aaaA-aaa BBBB*BBB')
+        for k in 'aaaa', 'aaa', 'bbbb', 'bbb':
+            self.assertSeqEqual(self.dex.find([k]),
+                [('test', '1', 'a'), ('test', '2', 'a')])
+
      def tearDown(self):
          shutil.rmtree('test-index')