summaryrefslogtreecommitdiff
path: root/Cython/Plex/Traditional.py
diff options
context:
space:
mode:
Diffstat (limited to 'Cython/Plex/Traditional.py')
-rw-r--r--Cython/Plex/Traditional.py158
1 files changed, 0 insertions, 158 deletions
diff --git a/Cython/Plex/Traditional.py b/Cython/Plex/Traditional.py
deleted file mode 100644
index ec7252dae..000000000
--- a/Cython/Plex/Traditional.py
+++ /dev/null
@@ -1,158 +0,0 @@
-#=======================================================================
-#
-# Python Lexical Analyser
-#
-# Traditional Regular Expression Syntax
-#
-#=======================================================================
-
-from __future__ import absolute_import
-
-from .Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char
-from .Errors import PlexError
-
-
-class RegexpSyntaxError(PlexError):
- pass
-
-
-def re(s):
- """
- Convert traditional string representation of regular expression |s|
- into Plex representation.
- """
- return REParser(s).parse_re()
-
-
-class REParser(object):
- def __init__(self, s):
- self.s = s
- self.i = -1
- self.end = 0
- self.next()
-
- def parse_re(self):
- re = self.parse_alt()
- if not self.end:
- self.error("Unexpected %s" % repr(self.c))
- return re
-
- def parse_alt(self):
- """Parse a set of alternative regexps."""
- re = self.parse_seq()
- if self.c == '|':
- re_list = [re]
- while self.c == '|':
- self.next()
- re_list.append(self.parse_seq())
- re = Alt(*re_list)
- return re
-
- def parse_seq(self):
- """Parse a sequence of regexps."""
- re_list = []
- while not self.end and not self.c in "|)":
- re_list.append(self.parse_mod())
- return Seq(*re_list)
-
- def parse_mod(self):
- """Parse a primitive regexp followed by *, +, ? modifiers."""
- re = self.parse_prim()
- while not self.end and self.c in "*+?":
- if self.c == '*':
- re = Rep(re)
- elif self.c == '+':
- re = Rep1(re)
- else: # self.c == '?'
- re = Opt(re)
- self.next()
- return re
-
- def parse_prim(self):
- """Parse a primitive regexp."""
- c = self.get()
- if c == '.':
- re = AnyBut("\n")
- elif c == '^':
- re = Bol
- elif c == '$':
- re = Eol
- elif c == '(':
- re = self.parse_alt()
- self.expect(')')
- elif c == '[':
- re = self.parse_charset()
- self.expect(']')
- else:
- if c == '\\':
- c = self.get()
- re = Char(c)
- return re
-
- def parse_charset(self):
- """Parse a charset. Does not include the surrounding []."""
- char_list = []
- invert = 0
- if self.c == '^':
- invert = 1
- self.next()
- if self.c == ']':
- char_list.append(']')
- self.next()
- while not self.end and self.c != ']':
- c1 = self.get()
- if self.c == '-' and self.lookahead(1) != ']':
- self.next()
- c2 = self.get()
- for a in range(ord(c1), ord(c2) + 1):
- char_list.append(chr(a))
- else:
- char_list.append(c1)
- chars = ''.join(char_list)
- if invert:
- return AnyBut(chars)
- else:
- return Any(chars)
-
- def next(self):
- """Advance to the next char."""
- s = self.s
- i = self.i = self.i + 1
- if i < len(s):
- self.c = s[i]
- else:
- self.c = ''
- self.end = 1
-
- def get(self):
- if self.end:
- self.error("Premature end of string")
- c = self.c
- self.next()
- return c
-
- def lookahead(self, n):
- """Look ahead n chars."""
- j = self.i + n
- if j < len(self.s):
- return self.s[j]
- else:
- return ''
-
- def expect(self, c):
- """
- Expect to find character |c| at current position.
- Raises an exception otherwise.
- """
- if self.c == c:
- self.next()
- else:
- self.error("Missing %s" % repr(c))
-
- def error(self, mess):
- """Raise exception to signal syntax error in regexp."""
- raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
- repr(self.s), self.i, mess))
-
-
-