From 8dd370cca99658a1aacf7c5baaa0078bbac9d446 Mon Sep 17 00:00:00 2001 From: richard Date: Mon, 31 Dec 2001 05:09:20 +0000 Subject: [PATCH] Added better tokenising to roundup-admin - handles spaces and stuff. Can use quoting or backslashes. See the roundup.token pydoc. git-svn-id: http://svn.roundup-tracker.org/svnroot/roundup/trunk@482 57a73879-2fb5-44c3-a270-3262357dd7e2 --- roundup-admin | 17 +++++-- roundup/token.py | 118 +++++++++++++++++++++++++++++++++++++++++++++ test/__init__.py | 8 ++- test/test_token.py | 60 +++++++++++++++++++++++ 4 files changed, 196 insertions(+), 7 deletions(-) create mode 100644 roundup/token.py create mode 100644 test/test_token.py diff --git a/roundup-admin b/roundup-admin index 6a1d5c0..59e2251 100755 --- a/roundup-admin +++ b/roundup-admin @@ -16,17 +16,17 @@ # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE, # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # -# $Id: roundup-admin,v 1.55 2001-12-17 03:52:47 richard Exp $ +# $Id: roundup-admin,v 1.56 2001-12-31 05:09:20 richard Exp $ # python version check from roundup import version_check -import sys, os, getpass, getopt, re, UserDict +import sys, os, getpass, getopt, re, UserDict, shlex try: import csv except ImportError: csv = None -from roundup import date, hyperdb, roundupdb, init, password +from roundup import date, hyperdb, roundupdb, init, password, token import roundup.instance class CommandDict(UserDict.UserDict): @@ -930,7 +930,7 @@ Command help: ret = 1 return ret - def interactive(self, ws_re=re.compile(r'\s+')): + def interactive(self): '''Run in an interactive mode ''' print 'Roundup {version} ready for input.' @@ -947,7 +947,7 @@ Command help: print 'exit...' break if not command: continue - args = ws_re.split(command) + args = token.token_split(command) if not args: continue if args[0] in ('quit', 'exit'): break self.run_command(args) @@ -1000,6 +1000,13 @@ if __name__ == '__main__': # # $Log: not supported by cvs2svn $ +# Revision 1.55 2001/12/17 03:52:47 richard +# Implemented file store rollback. As a bonus, the hyperdb is now capable of +# storing more than one file per node - if a property name is supplied, +# the file is called designator.property. +# I decided not to migrate the existing files stored over to the new naming +# scheme - the FileClass just doesn't specify the property name. +# # Revision 1.54 2001/12/15 23:09:23 richard # Some cleanups in roundup-admin, also made it work again... # diff --git a/roundup/token.py b/roundup/token.py new file mode 100644 index 0000000..c51a8ab --- /dev/null +++ b/roundup/token.py @@ -0,0 +1,118 @@ +# +# Copyright (c) 2001 Richard Jones. +# This module is free software, and you may redistribute it and/or modify +# under the same terms as Python, so long as this copyright message and +# disclaimer are retained in their original form. +# +# This module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# $Id: token.py,v 1.1 2001-12-31 05:09:20 richard Exp $ +# + +__doc__ = """ +This module provides the tokeniser used by roundup-admin. +""" + +def token_split(s, whitespace=' \r\n\t', quotes='\'"', + escaped={'r':'\r', 'n':'\n', 't':'\t'}): + '''Split the string up into tokens. An occurence of a ' or " in the + input will cause the splitter to ignore whitespace until a matching + quote char is found. Embedded non-matching quote chars are also + skipped. + Whitespace and quoting characters may be escaped using a backslash. + \r, \n and \t are converted to carriage-return, newline and tab. + All other backslashed characters are left as-is. + Valid: + hello world (2 tokens: hello, world) + "hello world" (1 token: hello world) + "Roch'e" Compaan (2 tokens: Roch'e Compaan) + Roch\'e Compaan (2 tokens: Roch'e Compaan) + address="1 2 3" (1 token: address=1 2 3) + \\ (1 token: \) + \n (1 token: a newline) + \o (1 token: \o) + Invalid: + "hello world (no matching quote) + Roch'e Compaan (no matching quote) + ''' + l = [] + pos = 0 + NEWTOKEN = 'newtoken' + TOKEN = 'token' + QUOTE = 'quote' + ESCAPE = 'escape' + quotechar = '' + state = NEWTOKEN + oldstate = '' # one-level state stack ;) + length = len(s) + finish = 0 + token = '' + while 1: + # end of string, finish off the current token + if pos == length: + if state == QUOTE: raise ValueError, "unmatched quote" + elif state == TOKEN: l.append(token) + break + c = s[pos] + if state == NEWTOKEN: + # looking for a new token + if c in quotes: + # quoted token + state = QUOTE + quotechar = c + pos = pos + 1 + continue + elif c in whitespace: + # skip whitespace + pos = pos + 1 + continue + elif c == '\\': + pos = pos + 1 + oldstate = TOKEN + state = ESCAPE + continue + # otherwise we have a token + state = TOKEN + elif state == TOKEN: + if c in whitespace: + # have a token, and have just found a whitespace terminator + l.append(token) + pos = pos + 1 + state = NEWTOKEN + token = '' + continue + elif c in quotes: + # have a token, just found embedded quotes + state = QUOTE + quotechar = c + pos = pos + 1 + continue + elif c == '\\': + pos = pos + 1 + oldstate = state + state = ESCAPE + continue + elif state == QUOTE and c == quotechar: + # in a quoted token and found a matching quote char + pos = pos + 1 + # now we're looking for whitespace + state = TOKEN + continue + elif state == ESCAPE: + # escaped-char conversions (t, r, n) + # TODO: octal, hexdigit + state = oldstate + if escaped.has_key(c): + c = escaped[c] + # just add this char to the token and move along + token = token + c + pos = pos + 1 + return l + +# +# $Log: not supported by cvs2svn $ +# +# +# vim: set filetype=python ts=4 sw=4 et si diff --git a/test/__init__.py b/test/__init__.py index d74c445..419d6ae 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -15,12 +15,12 @@ # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE, # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # -# $Id: __init__.py,v 1.7 2001-08-07 00:24:43 richard Exp $ +# $Id: __init__.py,v 1.8 2001-12-31 05:09:20 richard Exp $ import unittest import test_dates, test_schema, test_db, test_multipart, test_mailsplit -import test_init +import test_init, test_token def go(): suite = unittest.TestSuite(( @@ -30,12 +30,16 @@ def go(): test_init.suite(), test_multipart.suite(), test_mailsplit.suite(), + test_token.suite(), )) runner = unittest.TextTestRunner() runner.run(suite) # # $Log: not supported by cvs2svn $ +# Revision 1.7 2001/08/07 00:24:43 richard +# stupid typo +# # Revision 1.6 2001/08/07 00:15:51 richard # Added the copyright/license notice to (nearly) all files at request of # Bizar Software. diff --git a/test/test_token.py b/test/test_token.py new file mode 100644 index 0000000..60a06be --- /dev/null +++ b/test/test_token.py @@ -0,0 +1,60 @@ +# +# Copyright (c) 2001 Richard Jones +# This module is free software, and you may redistribute it and/or modify +# under the same terms as Python, so long as this copyright message and +# disclaimer are retained in their original form. +# +# This module is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# $Id: test_token.py,v 1.1 2001-12-31 05:09:20 richard Exp $ + +import unittest, time + +from roundup.token import token_split + +class TokenTestCase(unittest.TestCase): + def testValid(self): + l = token_split('hello world') + self.assertEqual(l, ['hello', 'world']) + + def testIgnoreExtraSpace(self): + l = token_split('hello world ') + self.assertEqual(l, ['hello', 'world']) + + def testQuoting(self): + l = token_split('"hello world"') + self.assertEqual(l, ['hello world']) + l = token_split("'hello world'") + self.assertEqual(l, ['hello world']) + + def testEmbedQuote(self): + l = token_split(r'Roch\'e Compaan') + self.assertEqual(l, ["Roch'e", "Compaan"]) + l = token_split('address="1 2 3"') + self.assertEqual(l, ['address=1 2 3']) + + def testEscaping(self): + l = token_split('"Roch\'e" Compaan') + self.assertEqual(l, ["Roch'e", "Compaan"]) + l = token_split(r'hello\ world') + self.assertEqual(l, ['hello world']) + l = token_split(r'\\') + self.assertEqual(l, ['\\']) + l = token_split(r'\n') + self.assertEqual(l, ['\n']) + + def testBadQuote(self): + self.assertRaises(ValueError, token_split, '"hello world') + self.assertRaises(ValueError, token_split, "Roch'e Compaan") + +def suite(): + return unittest.makeSuite(TokenTestCase, 'test') + + +# +# $Log: not supported by cvs2svn $ +# +# +# vim: set filetype=python ts=4 sw=4 et si -- 2.30.2