1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
# -*- coding: utf-8 -*-
"""
Basic Grammar Notation Tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
:copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import pytest
from pygments.token import Token
from pygments.lexers import PegLexer
@pytest.fixture(scope='module')
def lexer_peg():
yield PegLexer()
def test_peg_basic(lexer_peg):
fragment = u'rule<-("terminal"/nonterminal/[cls])*\n'
tokens = [
(Token.Name.Class, u'rule'),
(Token.Operator, u'<-'),
(Token.Punctuation, u'('),
(Token.String.Double, u'"terminal"'),
(Token.Operator, u'/'),
(Token.Name.Class, u'nonterminal'),
(Token.Operator, u'/'),
(Token.Punctuation, u'['),
(Token.String, u'cls'),
(Token.Punctuation, u']'),
(Token.Punctuation, u')'),
(Token.Operator, u'*'),
(Token.Text, u'\n'),
]
assert list(lexer_peg.get_tokens(fragment)) == tokens
def test_peg_operators(lexer_peg):
# see for example:
# - https://github.com/gvanrossum/pegen
# - https://nim-lang.org/docs/pegs.html
fragment = u"rule = 'a' | 'b'\n"
tokens = [
(Token.Name.Class, u'rule'),
(Token.Text, u' '),
(Token.Operator, u'='),
(Token.Text, u' '),
(Token.String.Single, u"'a'"),
(Token.Text, u' '),
(Token.Operator, u'|'),
(Token.Text, u' '),
(Token.String.Single, u"'b'"),
(Token.Text, u'\n'),
]
assert list(lexer_peg.get_tokens(fragment)) == tokens
fragment = u"rule: 'a' ~ 'b'\n"
tokens = [
(Token.Name.Class, u'rule'),
(Token.Operator, u':'),
(Token.Text, u' '),
(Token.String.Single, u"'a'"),
(Token.Text, u' '),
(Token.Operator, u'~'),
(Token.Text, u' '),
(Token.String.Single, u"'b'"),
(Token.Text, u'\n'),
]
assert list(lexer_peg.get_tokens(fragment)) == tokens
def test_peg_modified_strings(lexer_peg):
# see for example:
# - http://textx.github.io/Arpeggio/
# - https://nim-lang.org/docs/pegs.html
# - https://github.com/erikrose/parsimonious
fragment = u'~"regex" i"insensitive" "multimod"ilx ("not modified")\n'
tokens = [
# can't handle parsimonious-style regex while ~ is a cut operator
(Token.Operator, u'~'),
(Token.String.Double, u'"regex"'),
(Token.Text, u' '),
(Token.String.Double, u'i"insensitive"'),
(Token.Text, u' '),
(Token.String.Double, u'"multimod"ilx'),
(Token.Text, u' '),
(Token.Punctuation, u'('),
(Token.String.Double, u'"not modified"'),
(Token.Punctuation, u')'),
(Token.Text, u'\n'),
]
assert list(lexer_peg.get_tokens(fragment)) == tokens
|