even more decoder optimizations

git-svn-id: http://simplejson.googlecode.com/svn/trunk@103 a4795897-2c25-0410-b006-0d3caba88fa1
author: Bob Ippolito <bob@redivi.com> 2008-09-24 04:40:52 +0000
committer: Bob Ippolito <bob@redivi.com> 2008-09-24 04:40:52 +0000
commit: a8165ef80b2390951619bb199e6defe9b6cafd9a (patch)
tree: 8e981607ca1a03ed188444d403c28b5be2f508c0 /simplejson/scanner.py
parent: 00a0d85515743777d62cac6b5b3570f25511fedd (diff)
download: simplejson-a8165ef80b2390951619bb199e6defe9b6cafd9a.tar.gz
1 files changed, 39 insertions, 45 deletions
diff --git a/simplejson/scanner.py b/simplejson/scanner.py
index bdd841e..338d2a8 100644
--- a/simplejson/scanner.py
+++ b/simplejson/scanner.py
@@ -3,60 +3,54 @@ Iterator based sre token scanner
 """
 import re
 from re import VERBOSE, MULTILINE, DOTALL
-import sre_parse
-import sre_compile
-import sre_constants
-from sre_constants import BRANCH, SUBPATTERN
 
-__all__ = ['make_scanner', 'pattern']
+__all__ = ['make_scanner']
 
 FLAGS = (VERBOSE | MULTILINE | DOTALL)
 
-def make_scanner(lexicon, flags=FLAGS):
-    actions = [None]
-    # Combine phrases into a compound pattern
-    s = sre_parse.Pattern()
-    s.flags = flags
-    charpatterns = {}
-    p = []
-    idx = 0
-    for token in lexicon:
-        if token.pattern in (r'\[', r'{', r'"'):
-            charpatterns[token.pattern[-1]] = token
-        idx += 1
-        phrase = token.pattern
-        try:
-            subpattern = sre_parse.SubPattern(s,
-                [(SUBPATTERN, (idx, sre_parse.parse(phrase, flags)))])
-        except sre_constants.error:
-            raise
-        p.append(subpattern)
-        actions.append(token)
 
-    s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work
-    p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
-    scanner = sre_compile.compile(p).scanner
+NUMBER_PATTERN = r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?'
+
+def make_scanner(lexicon):
+    parse_object = lexicon['object']
+    parse_array = lexicon['array']
+    parse_string = lexicon['string']
+    match_number = re.compile(NUMBER_PATTERN, FLAGS).match
 
-    def _scan_once(string, idx=0, context=None):
+    def _scan_once(string, idx, context):
         try:
-            action = charpatterns[string[idx]]
-        except KeyError:
-            pass
+            nextchar = string[idx]
         except IndexError:
             raise StopIteration
-        else:
-            return action((string, idx + 1), context)
         
-        m = scanner(string, idx).match()
-        if m is None or m.end() == idx:
+        if nextchar == '"':
+            return parse_string(string, idx + 1, context.encoding, context.strict)
+        elif nextchar == '{':
+            return parse_object((string, idx + 1), context)
+        elif nextchar == '[':
+            return parse_array((string, idx + 1), context)
+        elif nextchar == 'n' and string[idx:idx + 4] == 'null':
+            return None, idx + 4
+        elif nextchar == 't' and string[idx:idx + 4] == 'true':
+            return True, idx + 4
+        elif nextchar == 'f' and string[idx:idx + 5] == 'false':
+            return False, idx + 5
+        
+        m = match_number(string, idx)
+        if m is not None:
+            integer, frac, exp = m.groups()
+            if frac or exp:
+                res = context.parse_float(integer + (frac or '') + (exp or ''))
+            else:
+                res = context.parse_int(integer)
+            return res, m.end()
+        elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
+            return context.parse_constant('NaN'), idx + 3
+        elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
+            return context.parse_constant('Infinity'), idx + 8
+        elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
+            return context.parse_constant('-Infinity'), idx + 9
+        else:
             raise StopIteration
-        return actions[m.lastindex](m, context)
     
-    return _scan_once
-
-def pattern(pattern, flags=FLAGS):
-    def decorator(fn):
-        fn.pattern = pattern
-        fn.regex = re.compile(pattern, flags)
-        return fn
-    return decorator
-\ No newline at end of file
+    return _scan_once
+\ No newline at end of file
author	Bob Ippolito <bob@redivi.com>	2008-09-24 04:40:52 +0000
committer	Bob Ippolito <bob@redivi.com>	2008-09-24 04:40:52 +0000
commit	a8165ef80b2390951619bb199e6defe9b6cafd9a (patch)
tree	8e981607ca1a03ed188444d403c28b5be2f508c0 /simplejson/scanner.py
parent	00a0d85515743777d62cac6b5b3570f25511fedd (diff)
download	simplejson-a8165ef80b2390951619bb199e6defe9b6cafd9a.tar.gz