diff options
Diffstat (limited to 'Cython/Plex/Traditional.py')
-rw-r--r-- | Cython/Plex/Traditional.py | 158 |
1 files changed, 0 insertions, 158 deletions
diff --git a/Cython/Plex/Traditional.py b/Cython/Plex/Traditional.py deleted file mode 100644 index ec7252dae..000000000 --- a/Cython/Plex/Traditional.py +++ /dev/null @@ -1,158 +0,0 @@ -#======================================================================= -# -# Python Lexical Analyser -# -# Traditional Regular Expression Syntax -# -#======================================================================= - -from __future__ import absolute_import - -from .Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char -from .Errors import PlexError - - -class RegexpSyntaxError(PlexError): - pass - - -def re(s): - """ - Convert traditional string representation of regular expression |s| - into Plex representation. - """ - return REParser(s).parse_re() - - -class REParser(object): - def __init__(self, s): - self.s = s - self.i = -1 - self.end = 0 - self.next() - - def parse_re(self): - re = self.parse_alt() - if not self.end: - self.error("Unexpected %s" % repr(self.c)) - return re - - def parse_alt(self): - """Parse a set of alternative regexps.""" - re = self.parse_seq() - if self.c == '|': - re_list = [re] - while self.c == '|': - self.next() - re_list.append(self.parse_seq()) - re = Alt(*re_list) - return re - - def parse_seq(self): - """Parse a sequence of regexps.""" - re_list = [] - while not self.end and not self.c in "|)": - re_list.append(self.parse_mod()) - return Seq(*re_list) - - def parse_mod(self): - """Parse a primitive regexp followed by *, +, ? modifiers.""" - re = self.parse_prim() - while not self.end and self.c in "*+?": - if self.c == '*': - re = Rep(re) - elif self.c == '+': - re = Rep1(re) - else: # self.c == '?' - re = Opt(re) - self.next() - return re - - def parse_prim(self): - """Parse a primitive regexp.""" - c = self.get() - if c == '.': - re = AnyBut("\n") - elif c == '^': - re = Bol - elif c == '$': - re = Eol - elif c == '(': - re = self.parse_alt() - self.expect(')') - elif c == '[': - re = self.parse_charset() - self.expect(']') - else: - if c == '\\': - c = self.get() - re = Char(c) - return re - - def parse_charset(self): - """Parse a charset. Does not include the surrounding [].""" - char_list = [] - invert = 0 - if self.c == '^': - invert = 1 - self.next() - if self.c == ']': - char_list.append(']') - self.next() - while not self.end and self.c != ']': - c1 = self.get() - if self.c == '-' and self.lookahead(1) != ']': - self.next() - c2 = self.get() - for a in range(ord(c1), ord(c2) + 1): - char_list.append(chr(a)) - else: - char_list.append(c1) - chars = ''.join(char_list) - if invert: - return AnyBut(chars) - else: - return Any(chars) - - def next(self): - """Advance to the next char.""" - s = self.s - i = self.i = self.i + 1 - if i < len(s): - self.c = s[i] - else: - self.c = '' - self.end = 1 - - def get(self): - if self.end: - self.error("Premature end of string") - c = self.c - self.next() - return c - - def lookahead(self, n): - """Look ahead n chars.""" - j = self.i + n - if j < len(self.s): - return self.s[j] - else: - return '' - - def expect(self, c): - """ - Expect to find character |c| at current position. - Raises an exception otherwise. - """ - if self.c == c: - self.next() - else: - self.error("Missing %s" % repr(c)) - - def error(self, mess): - """Raise exception to signal syntax error in regexp.""" - raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % ( - repr(self.s), self.i, mess)) - - - |