# -*- coding: utf-8 -*- """ scanner ~~~~~~~ This library implements a regex based scanner. :copyright: 2006-2007 by Armin Ronacher, Georg Brandl. :license: BSD license. """ import re class EndOfText(RuntimeError): """ Raise if end of text is reached and the user tried to call a match function. """ class Scanner(object): """ Simple scanner All method patterns are regular expression strings (not compiled expressions!) """ def __init__(self, text, flags=0): """ :param text: The text which should be scanned :param flags: default regular expression flags """ self.data = text self.data_length = len(text) self.start_pos = 0 self.pos = 0 self.flags = flags self.last = None self.match = None self._re_cache = {} def eos(self): """`True` if the scanner reached the end of text.""" return self.pos >= self.data_length eos = property(eos, eos.__doc__) def check(self, pattern): """ Apply `pattern` on the current position and return the match object. (Doesn't touch pos). Use this for lookahead. """ if self.eos: raise EndOfText() if pattern not in self._re_cache: self._re_cache[pattern] = re.compile(pattern, self.flags) return self._re_cache[pattern].match(self.data, self.pos) def test(self, pattern): """Apply a pattern on the current position and check if it patches. Doesn't touch pos.""" return self.check(pattern) is not None def scan(self, pattern): """ Scan the text for the given pattern and update pos/match and related fields. The return value is a boolen that indicates if the pattern matched. The matched value is stored on the instance as ``match``, the last value is stored as ``last``. ``start_pos`` is the position of the pointer before the pattern was matched, ``pos`` is the end position. """ if self.eos: raise EndOfText() if pattern not in self._re_cache: self._re_cache[pattern] = re.compile(pattern, self.flags) self.last = self.match m = self._re_cache[pattern].match(self.data, self.pos) if m is None: return False self.start_pos = m.start() self.pos = m.end() self.match = m return True def get_char(self): """Scan exactly one char.""" self.scan('.') def __repr__(self): return '<%s %d/%d>' % ( self.__class__.__name__, self.pos, self.data_length )