summaryrefslogtreecommitdiff
path: root/src/examples/select_parser.py
blob: f969f028b8203b0058415aaa4925c36f0ce9888b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# select_parser.py
# Copyright 2010, Paul McGuire
#
# a simple SELECT statement parser, taken from SQLite's SELECT statement
# definition at http://www.sqlite.org/lang_select.html
#
from pyparsing import *
ParserElement.enablePackrat()

LPAR,RPAR,COMMA = map(Suppress,"(),")
select_stmt = Forward().setName("select statement")

# keywords
(UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, 
 CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY,
 HAVING, ORDER, BY, LIMIT, OFFSET, OR) =  map(CaselessKeyword, """UNION, ALL, AND, INTERSECT, 
 EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, 
 DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR""".replace(",","").split())
(CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS,
 COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, 
 CURRENT_TIMESTAMP) = map(CaselessKeyword, """CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, 
 END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, 
 CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP""".replace(",","").split())
keyword = MatchFirst((UNION, ALL, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, 
 CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY,
 HAVING, ORDER, BY, LIMIT, OFFSET, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS,
 COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, 
 CURRENT_TIMESTAMP))
 
identifier = ~keyword + Word(alphas, alphanums+"_")
collation_name = identifier.copy()
column_name = identifier.copy()
column_alias = identifier.copy()
table_name = identifier.copy()
table_alias = identifier.copy()
index_name = identifier.copy()
function_name = identifier.copy()
parameter_name = identifier.copy()
database_name = identifier.copy()

# expression
expr = Forward().setName("expression")

integer = Regex(r"[+-]?\d+")
numeric_literal = Regex(r"\d+(\.\d*)?([eE][+-]?\d+)?")
string_literal = QuotedString("'")
blob_literal = Regex(r"[xX]'[0-9A-Fa-f]+'")
literal_value = ( numeric_literal | string_literal | blob_literal |
    NULL | CURRENT_TIME | CURRENT_DATE | CURRENT_TIMESTAMP )
bind_parameter = (
    Word("?",nums) |
    Combine(oneOf(": @ $") + parameter_name)
    )
type_name = oneOf("TEXT REAL INTEGER BLOB NULL")

expr_term = (
    CAST + LPAR + expr + AS + type_name + RPAR |
    EXISTS + LPAR + select_stmt + RPAR |
    function_name.setName("function_name") + LPAR + Optional("*" | delimitedList(expr)) + RPAR |
    literal_value |
    bind_parameter |
    Combine(identifier+('.'+identifier)*(0,2)).setName("ident")
    )

UNARY,BINARY,TERNARY=1,2,3
expr << infixNotation(expr_term,
    [
    (oneOf('- + ~') | NOT, UNARY, opAssoc.RIGHT),
    (ISNULL | NOTNULL | NOT + NULL, UNARY, opAssoc.LEFT),
    ('||', BINARY, opAssoc.LEFT),
    (oneOf('* / %'), BINARY, opAssoc.LEFT),
    (oneOf('+ -'), BINARY, opAssoc.LEFT),
    (oneOf('<< >> & |'), BINARY, opAssoc.LEFT),
    (oneOf('< <= > >='), BINARY, opAssoc.LEFT),
    (oneOf('= == != <>') | IS | IN | LIKE | GLOB | MATCH | REGEXP, BINARY, opAssoc.LEFT),
    ((BETWEEN,AND), TERNARY, opAssoc.LEFT),
    (IN + LPAR + Group(select_stmt | delimitedList(expr)) + RPAR, UNARY, opAssoc.LEFT),
    (AND, BINARY, opAssoc.LEFT),
    (OR, BINARY, opAssoc.LEFT),
    ])

compound_operator = (UNION + Optional(ALL) | INTERSECT | EXCEPT)

ordering_term = Group(expr('order_key') + Optional(COLLATE + collation_name('collate')) + Optional(ASC | DESC)('direction'))

join_constraint = Group(Optional(ON + expr | USING + LPAR + Group(delimitedList(column_name)) + RPAR))

join_op = COMMA | Group(Optional(NATURAL) + Optional(INNER | CROSS | LEFT + OUTER | LEFT | OUTER) + JOIN)

join_source = Forward()
single_source = ( (Group(database_name("database") + "." + table_name("table*")) | table_name("table*")) + 
                    Optional(Optional(AS) + table_alias("table_alias*")) +
                    Optional(INDEXED + BY + index_name("name") | NOT + INDEXED)("index") | 
                  (LPAR + select_stmt + RPAR + Optional(Optional(AS) + table_alias)) | 
                  (LPAR + join_source + RPAR) )

join_source << (Group(single_source + OneOrMore(join_op + single_source + join_constraint)) | 
                single_source)

result_column = "*" | table_name + "." + "*" | Group(expr + Optional(Optional(AS) + column_alias))
select_core = (SELECT + Optional(DISTINCT | ALL) + Group(delimitedList(result_column))("columns") +
                Optional(FROM + join_source("from*")) +
                Optional(WHERE + expr("where_expr")) +
                Optional(GROUP + BY + Group(delimitedList(ordering_term)("group_by_terms")) + 
                        Optional(HAVING + expr("having_expr"))))

select_stmt << (select_core + ZeroOrMore(compound_operator + select_core) +
                Optional(ORDER + BY + Group(delimitedList(ordering_term))("order_by_terms")) +
                Optional(LIMIT + (Group(expr + OFFSET + expr) | Group(expr + COMMA + expr) | expr)("limit")))

tests = """\
    select * from xyzzy where z > 100
    select * from xyzzy where z > 100 order by zz
    select * from xyzzy
    select z.* from xyzzy
    select a, b from test_table where 1=1 and b='yes'
    select a, b from test_table where 1=1 and b in (select bb from foo)
    select z.a, b from test_table where 1=1 and b in (select bb from foo)
    select z.a, b from test_table where 1=1 and b in (select bb from foo) order by b,c desc,d
    select z.a, b from test_table left join test2_table where 1=1 and b in (select bb from foo)
    select a, db.table.b as BBB from db.table where 1=1 and BBB='yes'
    select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes'
    select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes' limit 50
    """

select_stmt.runTests(tests)