1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
# -*- coding: utf-8 -*-
"""
scanner
~~~~~~~
This library implements a regex based scanner.
:copyright: 2006-2007 by Armin Ronacher, Georg Brandl.
:license: BSD license.
"""
import re
class EndOfText(RuntimeError):
"""
Raise if end of text is reached and the user
tried to call a match function.
"""
class Scanner(object):
"""
Simple scanner
All method patterns are regular expression strings (not
compiled expressions!)
"""
def __init__(self, text, flags=0):
"""
:param text: The text which should be scanned
:param flags: default regular expression flags
"""
self.data = text
self.data_length = len(text)
self.start_pos = 0
self.pos = 0
self.flags = flags
self.last = None
self.match = None
self._re_cache = {}
def eos(self):
"""`True` if the scanner reached the end of text."""
return self.pos >= self.data_length
eos = property(eos, eos.__doc__)
def check(self, pattern):
"""
Apply `pattern` on the current position and return
the match object. (Doesn't touch pos). Use this for
lookahead.
"""
if self.eos:
raise EndOfText()
if pattern not in self._re_cache:
self._re_cache[pattern] = re.compile(pattern, self.flags)
return self._re_cache[pattern].match(self.data, self.pos)
def test(self, pattern):
"""Apply a pattern on the current position and check
if it patches. Doesn't touch pos."""
return self.check(pattern) is not None
def scan(self, pattern):
"""
Scan the text for the given pattern and update pos/match
and related fields. The return value is a boolen that
indicates if the pattern matched. The matched value is
stored on the instance as ``match``, the last value is
stored as ``last``. ``start_pos`` is the position of the
pointer before the pattern was matched, ``pos`` is the
end position.
"""
if self.eos:
raise EndOfText()
if pattern not in self._re_cache:
self._re_cache[pattern] = re.compile(pattern, self.flags)
self.last = self.match
m = self._re_cache[pattern].match(self.data, self.pos)
if m is None:
return False
self.start_pos = m.start()
self.pos = m.end()
self.match = m
return True
def get_char(self):
"""Scan exactly one char."""
self.scan('.')
def __repr__(self):
return '<%s %d/%d>' % (
self.__class__.__name__,
self.pos,
self.data_length
)
|