diff options
author | Eevee (Alex Munroe) <eevee.git@veekun.com> | 2014-08-28 18:22:05 -0700 |
---|---|---|
committer | Eevee (Alex Munroe) <eevee.git@veekun.com> | 2014-08-29 17:07:05 -0700 |
commit | 45d3a91713e7a26050121f300d3ea2b93c7908d1 (patch) | |
tree | cfb95af250dc3a2e6889d1ee14e61a5afef90054 | |
parent | 743dd8bca34035e701c2d769b9b5cc010c3840c2 (diff) | |
download | pyscss-45d3a91713e7a26050121f300d3ea2b93c7908d1.tar.gz |
Remove some old Python that I don't think is useful any more...?
-rwxr-xr-x | scss/src/block_locator.py | 546 | ||||
-rwxr-xr-x | scss/src/build.py | 26 | ||||
-rwxr-xr-x | scss/src/scanner.py | 288 |
3 files changed, 0 insertions, 860 deletions
diff --git a/scss/src/block_locator.py b/scss/src/block_locator.py deleted file mode 100755 index d4ffd74..0000000 --- a/scss/src/block_locator.py +++ /dev/null @@ -1,546 +0,0 @@ -#!/usr/bin/env python - -## locate_blocks() needs heavy optimizations... is way too slow right now! -## Any suggestion from python wizards? :-) - -import re -import sys -from datetime import datetime - -from six.moves import xrange - -import pstats -import cProfile -from cStringIO import StringIO -def profile(fn): - def wrapper(*args, **kwargs): - profiler = cProfile.Profile() - stream = StringIO() - profiler.enable() - try: - res = fn(*args, **kwargs) - finally: - profiler.disable() - stats = pstats.Stats(profiler, stream=stream) - stats.sort_stats('time') - print >>stream, "" - print >>stream, "=" * 100 - print >>stream, "Stats:" - stats.print_stats() - - print >>stream, "=" * 100 - print >>stream, "Callers:" - stats.print_callers() - - print >>stream, "=" * 100 - print >>stream, "Callees:" - stats.print_callees() - print >>sys.stderr, stream.getvalue() - stream.close() - return res - return wrapper - - -DEBUG = False -################################################################################ -# Helper functions - - -SEPARATOR = '\x00' -_nl_num_re = re.compile(r'\n.+' + SEPARATOR, re.MULTILINE) -_blocks_re = re.compile(r'[{},;()\'"\n]') - - -def load_string(codestr): - """ - Add line numbers to the string using SEPARATOR as the separation between - the line number and the line. - """ - # Decorate lines with their line numbers and a delimiting NUL and remove empty lines - codestr = '\n'.join(str(i + 1) + SEPARATOR + s for i, l in enumerate(codestr.splitlines()) for s in (l.strip(),) if s) - - return codestr - - -def _strip_selprop(selprop, lineno): - # Get the line number of the selector or property and strip all other - # line numbers that might still be there (from multiline selectors) - _lineno, _sep, selprop = selprop.partition(SEPARATOR) - if _sep == SEPARATOR: - _lineno = _lineno.strip(' \t\n;') - try: - lineno = int(_lineno) - except ValueError: - pass - else: - selprop = _lineno - selprop = _nl_num_re.sub('\n', selprop) - selprop = selprop.strip() - return selprop, lineno - - -def _strip(selprop): - # Strip all line numbers, ignoring them in the way - selprop, _ = _strip_selprop(selprop, None) - return selprop - - -################################################################################ -# Algorithm implemented in C (much slower here): - -PAR = 0 -INSTR = 1 -DEPTH = 2 -SKIP = 3 -THIN = 4 -INIT = 5 -SAFE = 6 -LOSE = 7 -START = 8 -END = 9 -LINENO = 10 -SELPROP = 11 - - -def _start_string(codestr, ctx, i, c): - if DEBUG: print "_start_string" - # A string starts - ctx[INSTR] = c - return - yield - - -def _end_string(codestr, ctx, i, c): - if DEBUG: print "_end_string" - # A string ends (FIXME: needs to accept escaped characters) - ctx[INSTR] = None - return - yield - - -def _start_parenthesis(codestr, ctx, i, c): - if DEBUG: print "_start_parenthesis" - # parenthesis begins: - ctx[PAR] += 1 - ctx[THIN] = None - ctx[SAFE] = i + 1 - return - yield - - -def _end_parenthesis(codestr, ctx, i, c): - if DEBUG: print "_end_parenthesis" - ctx[PAR] -= 1 - return - yield - - -def _flush_properties(codestr, ctx, i, c): - if DEBUG: print "_flush_properties" - # Flush properties - if ctx[LOSE] <= ctx[INIT]: - _property, ctx[LINENO] = _strip_selprop(codestr[ctx[LOSE]:ctx[INIT]], ctx[LINENO]) - if _property: - yield ctx[LINENO], _property, None - ctx[SELPROP] = _property - ctx[LOSE] = ctx[INIT] - return - yield - - -def _start_block1(codestr, ctx, i, c): - if DEBUG: print "_start_block1" - # Start level-1 block - if i > 0 and codestr[i - 1] == '#': # Do not process #{...} as blocks! - ctx[SKIP] = True - else: - ctx[START] = i - if ctx[THIN] is not None and _strip(codestr[ctx[THIN]:i]): - ctx[INIT] = ctx[THIN] - for y in _flush_properties(codestr, ctx, i, c): - yield y - ctx[THIN] = None - ctx[DEPTH] += 1 - return - yield - - -def _start_block(codestr, ctx, i, c): - if DEBUG: print "_start_block" - # Start blocks: - ctx[DEPTH] += 1 - return - yield - - -def _end_block1(codestr, ctx, i, c): - if DEBUG: print "_end_block1" - # End level-1 block: - ctx[DEPTH] -= 1 - if not ctx[SKIP]: - ctx[END] = i - _selectors, ctx[LINENO] = _strip_selprop(codestr[ctx[INIT]:ctx[START]], ctx[LINENO]) - _codestr = codestr[ctx[START] + 1:ctx[END]] - if _selectors: - yield ctx[LINENO], _selectors, _codestr - ctx[SELPROP] = _selectors - ctx[INIT] = ctx[SAFE] = ctx[LOSE] = ctx[END] + 1 - ctx[THIN] = None - ctx[SKIP] = False - return - yield - - -def _end_block(codestr, ctx, i, c): - if DEBUG: print "_end_block" - # Block ends: - ctx[DEPTH] -= 1 - return - yield - - -def _end_property(codestr, ctx, i, c): - if DEBUG: print "_end_property" - # End of property (or block): - ctx[INIT] = i - if ctx[LOSE] <= ctx[INIT]: - _property, ctx[LINENO] = _strip_selprop(codestr[ctx[LOSE]:ctx[INIT]], ctx[LINENO]) - if _property: - yield ctx[LINENO], _property, None - ctx[SELPROP] = _property - ctx[INIT] = ctx[SAFE] = ctx[LOSE] = i + 1 - ctx[THIN] = None - return - yield - - -def _mark_safe(codestr, ctx, i, c): - if DEBUG: print "_mark_safe" - # We are on a safe zone - if ctx[THIN] is not None and _strip(codestr[ctx[THIN]:i]): - ctx[INIT] = ctx[THIN] - ctx[THIN] = None - ctx[SAFE] = i + 1 - return - yield - - -def _mark_thin(codestr, ctx, i, c): - if DEBUG: print "_mark_thin" - # Step on thin ice, if it breaks, it breaks here - if ctx[THIN] is not None and _strip(codestr[ctx[THIN]:i]): - ctx[INIT] = ctx[THIN] - ctx[THIN] = i + 1 - elif ctx[THIN] is None and _strip(codestr[ctx[SAFE]:i]): - ctx[THIN] = i + 1 - return - yield - - -scss_function_map = { - # (c, instr, par, depth) - ('"', None, False, 0): _start_string, - ("'", None, False, 0): _start_string, - ('"', None, True, 0): _start_string, - ("'", None, True, 0): _start_string, - ('"', None, False, 1): _start_string, - ("'", None, False, 1): _start_string, - ('"', None, True, 1): _start_string, - ("'", None, True, 1): _start_string, - ('"', None, False, 2): _start_string, - ("'", None, False, 2): _start_string, - ('"', None, True, 2): _start_string, - ("'", None, True, 2): _start_string, - - ('"', '"', False, 0): _end_string, - ("'", "'", False, 0): _end_string, - ('"', '"', True, 0): _end_string, - ("'", "'", True, 0): _end_string, - ('"', '"', False, 1): _end_string, - ("'", "'", False, 1): _end_string, - ('"', '"', True, 1): _end_string, - ("'", "'", True, 1): _end_string, - ('"', '"', False, 2): _end_string, - ("'", "'", False, 2): _end_string, - ('"', '"', True, 2): _end_string, - ("'", "'", True, 2): _end_string, - - ("(", None, False, 0): _start_parenthesis, - ("(", None, True, 0): _start_parenthesis, - ("(", None, False, 1): _start_parenthesis, - ("(", None, True, 1): _start_parenthesis, - ("(", None, False, 2): _start_parenthesis, - ("(", None, True, 2): _start_parenthesis, - - (")", None, True, 0): _end_parenthesis, - (")", None, True, 1): _end_parenthesis, - (")", None, True, 2): _end_parenthesis, - - ("{", None, False, 0): _start_block1, - ("{", None, False, 1): _start_block, - ("{", None, False, 2): _start_block, - - ("}", None, False, 1): _end_block1, - ("}", None, False, 2): _end_block, - - (";", None, False, 0): _end_property, - - (",", None, False, 0): _mark_safe, - - ("\n", None, False, 0): _mark_thin, - - (None, None, False, 0): _flush_properties, - (None, None, False, 1): _flush_properties, - (None, None, False, 2): _flush_properties, -} - - -def _locate_blocks_a(codestr): - """ - For processing CSS like strings. - - Either returns all selectors (that can be "smart" multi-lined, as - long as it's joined by `,`, or enclosed in `(` and `)`) with its code block - (the one between `{` and `}`, which can be nested), or the "lose" code - (properties) that doesn't have any blocks. - - threshold is the number of blank lines before selectors are broken into - pieces (properties). - """ - ctx = [0, None, 0, False, None, 0, 0, 0, None, None, 0, '??'] - - for m in _blocks_re.finditer(codestr): - c = m.group() - - fn = scss_function_map.get((c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH])) - if DEBUG: print fn and ' > ' or ' ', fn and fn.__name__, (c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH]) - if fn: - for y in fn(codestr, ctx, m.start(), c): - yield y - - codestr_end = len(codestr) - exc = None - if ctx[PAR]: - exc = exc or "Missing closing parenthesis somewhere in block: '%s'" % ctx[SELPROP] - elif ctx[INSTR]: - exc = exc or "Missing closing string somewhere in block: '%s'" % ctx[SELPROP] - elif ctx[DEPTH]: - exc = exc or "Block never closed: '%s'" % ctx[SELPROP] - while ctx[DEPTH] > 0 and ctx[INIT] < codestr_end: - c = '}' - fn = scss_function_map.get((c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH])) - if DEBUG: print fn and ' > ' or ' ! ', fn and fn.__name__, (c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH]) - if fn: - for y in fn(codestr, ctx, m.start(), c): - yield y - - if ctx[INIT] < codestr_end: - ctx[INIT] = codestr_end - c = None - fn = scss_function_map.get((c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH])) - if DEBUG: print fn and ' > ' or ' ! ', fn and fn.__name__, (c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH]) - if fn: - for y in fn(codestr, ctx, m.start(), c): - yield y - - if exc: - raise Exception(exc) - - -################################################################################ -# Algorithm using Regexps in pure Python (fastest pure python): - - -def _locate_blocks_b(codestr): - """ - For processing CSS like strings. - - Either returns all selectors (that can be "smart" multi-lined, as - long as it's joined by `,`, or enclosed in `(` and `)`) with its code block - (the one between `{` and `}`, which can be nested), or the "lose" code - (properties) that doesn't have any blocks. - - threshold is the number of blank lines before selectors are broken into - pieces (properties). - """ - lineno = 0 - - par = 0 - instr = None - depth = 0 - skip = False - thin = None - i = init = safe = lose = 0 - start = end = None - - for m in _blocks_re.finditer(codestr): - i = m.start(0) - c = codestr[i] - if instr is not None: - if c == instr: - instr = None # A string ends (FIXME: needs to accept escaped characters) - elif c in ('"', "'"): - instr = c # A string starts - elif c == '(': # parenthesis begins: - par += 1 - thin = None - safe = i + 1 - elif c == ')': # parenthesis ends: - par -= 1 - elif not par and not instr: - if c == '{': # block begins: - if depth == 0: - if i > 0 and codestr[i - 1] == '#': # Do not process #{...} as blocks! - skip = True - else: - start = i - if thin is not None and _strip(codestr[thin:i]): - init = thin - if lose <= init: - _property, lineno = _strip_selprop(codestr[lose:init], lineno) - if _property: - yield lineno, _property, None - lose = init - thin = None - depth += 1 - elif c == '}': # block ends: - if depth > 0: - depth -= 1 - if depth == 0: - if not skip: - end = i - _selectors, lineno = _strip_selprop(codestr[init:start], lineno) - _codestr = codestr[start + 1:end].strip() - if _selectors: - yield lineno, _selectors, _codestr - init = safe = lose = end + 1 - thin = None - skip = False - elif depth == 0: - if c == ';': # End of property (or block): - init = i - if lose <= init: - _property, lineno = _strip_selprop(codestr[lose:init], lineno) - if _property: - yield lineno, _property, None - init = safe = lose = i + 1 - thin = None - elif c == ',': - if thin is not None and _strip(codestr[thin:i]): - init = thin - thin = None - safe = i + 1 - elif c == '\n': - if thin is not None and _strip(codestr[thin:i]): - init = thin - thin = i + 1 - elif thin is None and _strip(codestr[safe:i]): - thin = i + 1 # Step on thin ice, if it breaks, it breaks here - if depth > 0: - if not skip: - _selectors, lineno = _strip_selprop(codestr[init:start], lineno) - _codestr = codestr[start + 1:].strip() - if _selectors: - yield lineno, _selectors, _codestr - if par: - raise Exception("Missing closing parenthesis somewhere in block: '%s'" % _selectors) - elif instr: - raise Exception("Missing closing string somewhere in block: '%s'" % _selectors) - else: - raise Exception("Block never closed: '%s'" % _selectors) - losestr = codestr[lose:] - for _property in losestr.split(';'): - _property, lineno = _strip_selprop(_property, lineno) - if _property: - yield lineno, _property, None - - -################################################################################ -# Algorithm implemented in C: - - -try: - from _speedups import locate_blocks as _locate_blocks_c -except ImportError: - _locate_blocks_c = None - print >>sys.stderr, "Scanning acceleration disabled (_speedups not found)!" - - - -################################################################################ - - -codestr = """ -simple { - block; -} -#{ignored}; -some, -selectors, -and multi-lined, -selectors -with more -{ - the block in here; - can have, nested, selectors { - and properties in nested blocks; - and stuff with #{ ignored blocks }; - } - properties-can: "have strings with stuff like this: }"; -} -and other, -selectors -can be turned into "lose" -properties -if no commas are found -however this is a selector ( - as well as these things, - which are parameters - and can expand - any number of - lines) { - and this is its block;; -} -""" -verify = '\t----------------------------------------------------------------------\n\t>[1] \'simple\'\n\t----------------------------------------------------------------------\n\t>\t[3] \'block\'\n\t----------------------------------------------------------------------\n\t>[5] \'#{ignored}\'\n\t----------------------------------------------------------------------\n\t>[6] \'some,\\nselectors,\\nand multi-lined,\\nselectors\'\n\t----------------------------------------------------------------------\n\t>[10] \'with more\'\n\t----------------------------------------------------------------------\n\t>\t[12] \'the block in here\'\n\t----------------------------------------------------------------------\n\t>\t[13] \'can have, nested, selectors\'\n\t----------------------------------------------------------------------\n\t>\t\t[14] \'and properties in nested blocks\'\n\t----------------------------------------------------------------------\n\t>\t\t[15] \'and stuff with #{ ignored blocks }\'\n\t----------------------------------------------------------------------\n\t>\t[17] \'properties-can: "have strings with stuff like this: }"\'\n\t----------------------------------------------------------------------\n\t>[19] \'and other,\\nselectors\\ncan be turned into "lose"\\nproperties\'\n\t----------------------------------------------------------------------\n\t>[23] \'if no commas are found\\nhowever this is a selector (\\nas well as these things,\\nwhich are parameters\\nand can expand\\nany number of\\nlines)\'\n\t----------------------------------------------------------------------\n\t>\t[30] \'and this is its block\'\n' - - -def process_block(locate_blocks, codestr, level=0, dump=False): - ret = '' if dump else None - for lineno, selprop, block in locate_blocks(codestr): - if dump: - ret += '\t%s\n\t>%s[%s] %s\n' % ('-' * 70, '\t' * level, lineno, repr(selprop)) - if block: - _ret = process_block(locate_blocks, block, level + 1, dump) - if dump: - ret += _ret - return ret - - -def process_blocks(locate_blocks, codestr): - for q in xrange(20000): - process_block(locate_blocks, codestr) -profiled_process_blocks = profile(process_blocks) - -if __name__ == "__main__": - codestr = load_string(codestr) - - for locate_blocks, desc in ( - (_locate_blocks_a, "Pure Python, Full algorithm (_locate_blocks_a)"), - (_locate_blocks_b, "Pure Python, Condensed algorithm (_locate_blocks_b)"), - (_locate_blocks_c, "Builtin C Function, Full algorithm (_locate_blocks_c)"), - ): - if locate_blocks: - ret = process_block(locate_blocks, codestr, dump=True) - # print "This is what %s returned:" % desc - # print ret - # print repr(ret) - assert ret == verify, '\nFrom %s, got:\n%s\nShould be:\n%s' % (desc, ret, verify) - - start = datetime.now() - print >>sys.stderr, "Timing: %s..." % desc, - process_blocks(locate_blocks, codestr) - elap = datetime.now() - start - - elapms = elap.seconds * 1000.0 + elap.microseconds / 1000.0 - print >>sys.stderr, "Done! took %06.3fms" % elapms diff --git a/scss/src/build.py b/scss/src/build.py deleted file mode 100755 index 0d9efd3..0000000 --- a/scss/src/build.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python -import sys -from distutils.core import setup, Extension -from distutils.command.build_ext import build_ext as _build_ext -import os - -abspath = os.path.abspath(__file__) -dname = os.path.dirname(abspath) -os.chdir(dname) - -class build_ext(_build_ext): - def finalize_options(self): - _build_ext.finalize_options(self) - self.build_temp = './' - self.build_lib = '../' - -if len(sys.argv) == 1: - sys.argv.append('build') - -setup(ext_modules=[ - Extension( - '_speedups', - sources=['_speedups.c', 'block_locator.c', 'scanner.c'], - libraries=['pcre'], - ), -], cmdclass={'build_ext': build_ext}) diff --git a/scss/src/scanner.py b/scss/src/scanner.py deleted file mode 100755 index 777fd67..0000000 --- a/scss/src/scanner.py +++ /dev/null @@ -1,288 +0,0 @@ -#!/usr/bin/env python - -## locate_blocks() needs heavy optimizations... is way too slow right now! -## Any suggestion from python wizards? :-) - -import re -import sys -from datetime import datetime - -from six.moves import xrange - -import pstats -import cProfile -from cStringIO import StringIO -def profile(fn): - def wrapper(*args, **kwargs): - profiler = cProfile.Profile() - stream = StringIO() - profiler.enable() - try: - res = fn(*args, **kwargs) - finally: - profiler.disable() - stats = pstats.Stats(profiler, stream=stream) - stats.sort_stats('time') - print >>stream, "" - print >>stream, "=" * 100 - print >>stream, "Stats:" - stats.print_stats() - - print >>stream, "=" * 100 - print >>stream, "Callers:" - stats.print_callers() - - print >>stream, "=" * 100 - print >>stream, "Callees:" - stats.print_callees() - print >>sys.stderr, stream.getvalue() - stream.close() - return res - return wrapper - - -DEBUG = False -################################################################################ -# Helpers - -_units = ['em', 'ex', 'px', 'cm', 'mm', 'in', 'pt', 'pc', 'deg', 'rad' - 'grad', 'ms', 's', 'hz', 'khz', '%'] -PATTERNS = [ - ('":"', ':'), - ('[ \r\t\n]+', '[ \r\t\n]+'), - ('COMMA', ','), - ('LPAR', '\\(|\\['), - ('RPAR', '\\)|\\]'), - ('END', '$'), - ('MUL', '[*]'), - ('DIV', '/'), - ('ADD', '[+]'), - ('SUB', '-\\s'), - ('SIGN', '-(?![a-zA-Z_])'), - ('AND', '(?<![-\\w])and(?![-\\w])'), - ('OR', '(?<![-\\w])or(?![-\\w])'), - ('NOT', '(?<![-\\w])not(?![-\\w])'), - ('NE', '!='), - ('INV', '!'), - ('EQ', '=='), - ('LE', '<='), - ('GE', '>='), - ('LT', '<'), - ('GT', '>'), - ('STR', "'[^']*'"), - ('QSTR', '"[^"]*"'), - ('UNITS', '(?<!\\s)(?:' + '|'.join(_units) + ')(?![-\\w])'), - ('NUM', '(?:\\d+(?:\\.\\d*)?|\\.\\d+)'), - ('BOOL', '(?<![-\\w])(?:true|false)(?![-\\w])'), - ('COLOR', '#(?:[a-fA-F0-9]{6}|[a-fA-F0-9]{3})(?![a-fA-F0-9])'), - ('VAR', '\\$[-a-zA-Z0-9_]+'), - ('FNCT', '[-a-zA-Z_][-a-zA-Z0-9_]*(?=\\()'), - ('ID', '[-a-zA-Z_][-a-zA-Z0-9_]*'), -] - - -################################################################################ - -class NoMoreTokens(Exception): - """ - Another exception object, for when we run out of tokens - """ - pass - - -class Scanner(object): - def __init__(self, patterns, ignore, input=None): - """ - Patterns is [(terminal,regex)...] - Ignore is [terminal,...]; - Input is a string - """ - self.reset(input) - self.ignore = ignore - # The stored patterns are a pair (compiled regex,source - # regex). If the patterns variable passed in to the - # constructor is None, we assume that the class already has a - # proper .patterns list constructed - if patterns is not None: - self.patterns = [] - for k, r in patterns: - self.patterns.append((k, re.compile(r))) - - def reset(self, input): - self.tokens = [] - self.restrictions = [] - self.input = input - self.pos = 0 - - def __repr__(self): - """ - Print the last 10 tokens that have been scanned in - """ - output = '' - for t in self.tokens[-10:]: - output = "%s\n (@%s) %s = %s" % (output, t[0], t[2], repr(t[3])) - return output - - def _scan(self, restrict): - """ - Should scan another token and add it to the list, self.tokens, - and add the restriction to self.restrictions - """ - # Keep looking for a token, ignoring any in self.ignore - token = None - while True: - best_pat = None - # Search the patterns for a match, with earlier - # tokens in the list having preference - best_pat_len = 0 - for tok, regex in self.patterns: - if DEBUG: - print("\tTrying %s: %s at pos %d -> %s" % (repr(tok), repr(regex.pattern), self.pos, repr(self.input))) - # First check to see if we're restricting to this token - if restrict and tok not in restrict and tok not in self.ignore: - if DEBUG: - print "\tSkipping %s!" % repr(tok) - continue - m = regex.match(self.input, self.pos) - if m: - # We got a match - best_pat = tok - best_pat_len = len(m.group(0)) - if DEBUG: - print("Match OK! %s: %s at pos %d" % (repr(tok), repr(regex.pattern), self.pos)) - break - - # If we didn't find anything, raise an error - if best_pat is None: - msg = "Bad Token" - if restrict: - msg = "Trying to find one of " + ", ".join(restrict) - raise SyntaxError("SyntaxError[@ char %s: %s]" % (repr(self.pos), msg)) - - # If we found something that isn't to be ignored, return it - if best_pat in self.ignore: - # This token should be ignored... - self.pos += best_pat_len - else: - end_pos = self.pos + best_pat_len - # Create a token with this data - token = ( - self.pos, - end_pos, - best_pat, - self.input[self.pos:end_pos] - ) - break - if token is not None: - self.pos = token[1] - # Only add this token if it's not in the list - # (to prevent looping) - if not self.tokens or token != self.tokens[-1]: - self.tokens.append(token) - self.restrictions.append(restrict) - return 1 - return 0 - - def token(self, i, restrict=None): - """ - Get the i'th token, and if i is one past the end, then scan - for another token; restrict is a list of tokens that - are allowed, or 0 for any token. - """ - tokens_len = len(self.tokens) - if i == tokens_len: # We are at the end, get the next... - tokens_len += self._scan(restrict) - elif i >= 0 and i < tokens_len: - if restrict and self.restrictions[i] and restrict > self.restrictions[i]: - raise NotImplementedError("Unimplemented: restriction set changed") - if i >= 0 and i < tokens_len: - return self.tokens[i] - raise NoMoreTokens() - - def rewind(self, i): - tokens_len = len(self.tokens) - if i <= tokens_len: - token = self.tokens[i] - self.tokens = self.tokens[:i] - self.restrictions = self.restrictions[:i] - self.pos = token[0] - - -class _Scanner_a(Scanner): - patterns = None - _patterns = PATTERNS - - def __init__(self, input=None): - if hasattr(self, 'setup_patterns'): - self.setup_patterns(self._patterns) - elif self.patterns is None: - self.__class__.patterns = [] - for t, p in self._patterns: - self.patterns.append((t, re.compile(p))) - super(_Scanner_a, self).__init__(None, ['[ \r\t\n]+'], input) - - -################################################################################ - -try: - from _speedups import Scanner - - class _Scanner_b(Scanner): - patterns = None - _patterns = PATTERNS - - def __init__(self, input=None): - if hasattr(self, 'setup_patterns'): - self.setup_patterns(self._patterns) - elif self.patterns is None: - self.__class__.patterns = [] - for t, p in self._patterns: - self.patterns.append((t, re.compile(p))) - super(_Scanner_b, self).__init__(None, ['[ \r\t\n]+'], input) - -except ImportError: - _Scanner_b = None - - -def process_scan(Scanner, level=0, dump=False): - ret = '' if dump else None - s = Scanner('[(5px - 3) * (5px - 3)]') - i = 0 - while True: - try: - s.token(i) - i += 1 - if dump: - ret += '%s\n%s\n' % ('-' * 70, repr(s)) - except: - break - return ret - - -verify = "----------------------------------------------------------------------\n\n (@0) LPAR = '['\n----------------------------------------------------------------------\n\n (@0) LPAR = '['\n (@1) LPAR = '('\n----------------------------------------------------------------------\n\n (@0) LPAR = '['\n (@1) LPAR = '('\n (@2) NUM = '5'\n----------------------------------------------------------------------\n\n (@0) LPAR = '['\n (@1) LPAR = '('\n (@2) NUM = '5'\n (@3) UNITS = 'px'\n----------------------------------------------------------------------\n\n (@0) LPAR = '['\n (@1) LPAR = '('\n (@2) NUM = '5'\n (@3) UNITS = 'px'\n (@6) SUB = '- '\n----------------------------------------------------------------------\n\n (@0) LPAR = '['\n (@1) LPAR = '('\n (@2) NUM = '5'\n (@3) UNITS = 'px'\n (@6) SUB = '- '\n (@8) NUM = '3'\n----------------------------------------------------------------------\n\n (@0) LPAR = '['\n (@1) LPAR = '('\n (@2) NUM = '5'\n (@3) UNITS = 'px'\n (@6) SUB = '- '\n (@8) NUM = '3'\n (@9) RPAR = ')'\n----------------------------------------------------------------------\n\n (@0) LPAR = '['\n (@1) LPAR = '('\n (@2) NUM = '5'\n (@3) UNITS = 'px'\n (@6) SUB = '- '\n (@8) NUM = '3'\n (@9) RPAR = ')'\n (@11) MUL = '*'\n----------------------------------------------------------------------\n\n (@0) LPAR = '['\n (@1) LPAR = '('\n (@2) NUM = '5'\n (@3) UNITS = 'px'\n (@6) SUB = '- '\n (@8) NUM = '3'\n (@9) RPAR = ')'\n (@11) MUL = '*'\n (@13) LPAR = '('\n----------------------------------------------------------------------\n\n (@0) LPAR = '['\n (@1) LPAR = '('\n (@2) NUM = '5'\n (@3) UNITS = 'px'\n (@6) SUB = '- '\n (@8) NUM = '3'\n (@9) RPAR = ')'\n (@11) MUL = '*'\n (@13) LPAR = '('\n (@14) NUM = '5'\n----------------------------------------------------------------------\n\n (@1) LPAR = '('\n (@2) NUM = '5'\n (@3) UNITS = 'px'\n (@6) SUB = '- '\n (@8) NUM = '3'\n (@9) RPAR = ')'\n (@11) MUL = '*'\n (@13) LPAR = '('\n (@14) NUM = '5'\n (@15) UNITS = 'px'\n----------------------------------------------------------------------\n\n (@2) NUM = '5'\n (@3) UNITS = 'px'\n (@6) SUB = '- '\n (@8) NUM = '3'\n (@9) RPAR = ')'\n (@11) MUL = '*'\n (@13) LPAR = '('\n (@14) NUM = '5'\n (@15) UNITS = 'px'\n (@18) SUB = '- '\n----------------------------------------------------------------------\n\n (@3) UNITS = 'px'\n (@6) SUB = '- '\n (@8) NUM = '3'\n (@9) RPAR = ')'\n (@11) MUL = '*'\n (@13) LPAR = '('\n (@14) NUM = '5'\n (@15) UNITS = 'px'\n (@18) SUB = '- '\n (@20) NUM = '3'\n----------------------------------------------------------------------\n\n (@6) SUB = '- '\n (@8) NUM = '3'\n (@9) RPAR = ')'\n (@11) MUL = '*'\n (@13) LPAR = '('\n (@14) NUM = '5'\n (@15) UNITS = 'px'\n (@18) SUB = '- '\n (@20) NUM = '3'\n (@21) RPAR = ')'\n----------------------------------------------------------------------\n\n (@8) NUM = '3'\n (@9) RPAR = ')'\n (@11) MUL = '*'\n (@13) LPAR = '('\n (@14) NUM = '5'\n (@15) UNITS = 'px'\n (@18) SUB = '- '\n (@20) NUM = '3'\n (@21) RPAR = ')'\n (@22) RPAR = ']'\n----------------------------------------------------------------------\n\n (@9) RPAR = ')'\n (@11) MUL = '*'\n (@13) LPAR = '('\n (@14) NUM = '5'\n (@15) UNITS = 'px'\n (@18) SUB = '- '\n (@20) NUM = '3'\n (@21) RPAR = ')'\n (@22) RPAR = ']'\n (@23) END = ''\n" - - -def process_scans(Scanner): - for q in xrange(20000): - process_scan(Scanner) -profiled_process_scans = profile(process_scans) - -if __name__ == "__main__": - for scanner, desc in ( - (_Scanner_a, "Pure Python, Full algorithm (_Scanner_a)"), - (_Scanner_b, "Builtin C Function, Full algorithm (_Scanner_b)"), - ): - if scanner: - ret = process_scan(scanner, dump=True) - # print "This is what %s returned:" % desc - # print ret - # print repr(ret) - assert ret == verify, '\nFrom %s, got:\n%s\nShould be:\n%s' % (desc, ret, verify) - - start = datetime.now() - print >>sys.stderr, "Timing: %s..." % desc, - process_scans(scanner) - elap = datetime.now() - start - - elapms = elap.seconds * 1000.0 + elap.microseconds / 1000.0 - print >>sys.stderr, "Done! took %06.3fms" % elapms |