summaryrefslogtreecommitdiff
path: root/pygments/lexer.py
diff options
context:
space:
mode:
authorTim Hatch <tim@timhatch.com>2010-12-21 08:07:13 -0800
committerTim Hatch <tim@timhatch.com>2010-12-21 08:07:13 -0800
commit132b2b954b9ae62b6c895bf48109de16ce2e1b1b (patch)
treed0725d2a9a23c7437688e1da888cf0b97aca5cd9 /pygments/lexer.py
parent89c859f902df3f7f3a3a7696e78d5b5dc78b4ce9 (diff)
downloadpygments-132b2b954b9ae62b6c895bf48109de16ce2e1b1b.tar.gz
Patch to allow Jgments to pull out the raw regexes easier (#477)
Diffstat (limited to 'pygments/lexer.py')
-rw-r--r--pygments/lexer.py94
1 files changed, 56 insertions, 38 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index fbcc39a6..a22768a3 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -349,7 +349,53 @@ class RegexLexerMeta(LexerMeta):
self.tokens on the first instantiation.
"""
+ def _process_regex(cls, regex, rflags):
+ """Preprocess the regular expression component of a token definition."""
+ return re.compile(regex, rflags).match
+
+ def _process_token(cls, token):
+ """Preprocess the token component of a token definition."""
+ assert type(token) is _TokenType or callable(token), \
+ 'token type must be simple type or callable, not %r' % (token,)
+ return token
+
+ def _process_new_state(cls, new_state, unprocessed, processed):
+ """Preprocess the state transition action of a token definition."""
+ if isinstance(new_state, str):
+ # an existing state
+ if new_state == '#pop':
+ return -1
+ elif new_state in unprocessed:
+ return (new_state,)
+ elif new_state == '#push':
+ return new_state
+ elif new_state[:5] == '#pop:':
+ return -int(new_state[5:])
+ else:
+ assert False, 'unknown new state %r' % new_state
+ elif isinstance(new_state, combined):
+ # combine a new state from existing ones
+ tmp_state = '_tmp_%d' % cls._tmpname
+ cls._tmpname += 1
+ itokens = []
+ for istate in new_state:
+ assert istate != new_state, 'circular state ref %r' % istate
+ itokens.extend(cls._process_state(unprocessed,
+ processed, istate))
+ processed[tmp_state] = itokens
+ return (tmp_state,)
+ elif isinstance(new_state, tuple):
+ # push more than one state
+ for istate in new_state:
+ assert (istate in unprocessed or
+ istate in ('#pop', '#push')), \
+ 'unknown new state ' + istate
+ return new_state
+ else:
+ assert False, 'unknown new state def %r' % new_state
+
def _process_state(cls, unprocessed, processed, state):
+ """Preprocess a single state definition."""
assert type(state) is str, "wrong state name %r" % state
assert state[0] != '#', "invalid state name %r" % state
if state in processed:
@@ -360,60 +406,31 @@ class RegexLexerMeta(LexerMeta):
if isinstance(tdef, include):
# it's a state reference
assert tdef != state, "circular state reference %r" % state
- tokens.extend(cls._process_state(unprocessed, processed, str(tdef)))
+ tokens.extend(cls._process_state(unprocessed, processed,
+ str(tdef)))
continue
assert type(tdef) is tuple, "wrong rule def %r" % tdef
try:
- rex = re.compile(tdef[0], rflags).match
+ rex = cls._process_regex(tdef[0], rflags)
except Exception, err:
raise ValueError("uncompilable regex %r in state %r of %r: %s" %
(tdef[0], state, cls, err))
- assert type(tdef[1]) is _TokenType or callable(tdef[1]), \
- 'token type must be simple type or callable, not %r' % (tdef[1],)
+ token = cls._process_token(tdef[1])
if len(tdef) == 2:
new_state = None
else:
- tdef2 = tdef[2]
- if isinstance(tdef2, str):
- # an existing state
- if tdef2 == '#pop':
- new_state = -1
- elif tdef2 in unprocessed:
- new_state = (tdef2,)
- elif tdef2 == '#push':
- new_state = tdef2
- elif tdef2[:5] == '#pop:':
- new_state = -int(tdef2[5:])
- else:
- assert False, 'unknown new state %r' % tdef2
- elif isinstance(tdef2, combined):
- # combine a new state from existing ones
- new_state = '_tmp_%d' % cls._tmpname
- cls._tmpname += 1
- itokens = []
- for istate in tdef2:
- assert istate != state, 'circular state ref %r' % istate
- itokens.extend(cls._process_state(unprocessed,
- processed, istate))
- processed[new_state] = itokens
- new_state = (new_state,)
- elif isinstance(tdef2, tuple):
- # push more than one state
- for state in tdef2:
- assert (state in unprocessed or
- state in ('#pop', '#push')), \
- 'unknown new state ' + state
- new_state = tdef2
- else:
- assert False, 'unknown new state def %r' % tdef2
- tokens.append((rex, tdef[1], new_state))
+ new_state = cls._process_new_state(tdef[2],
+ unprocessed, processed)
+
+ tokens.append((rex, token, new_state))
return tokens
def process_tokendef(cls, name, tokendefs=None):
+ """Preprocess a dictionary of token definitions."""
processed = cls._all_tokens[name] = {}
tokendefs = tokendefs or cls.tokens[name]
for state in tokendefs.keys():
@@ -421,6 +438,7 @@ class RegexLexerMeta(LexerMeta):
return processed
def __call__(cls, *args, **kwds):
+ """Instantiate cls after preprocessing its token definitions."""
if not hasattr(cls, '_tokens'):
cls._all_tokens = {}
cls._tmpname = 0