summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul McGuire <ptmcg@austin.rr.com>2019-07-06 15:11:30 -0500
committerPaul McGuire <ptmcg@austin.rr.com>2019-07-06 15:11:30 -0500
commitccfc76ac49189de3fe7de5062008fe12d981e5da (patch)
treefb57f8aed575098f3b3701259aea2995427e154a
parent9ceaeb15f3054ba798392374437444815ac45d17 (diff)
downloadpyparsing-git-ccfc76ac49189de3fe7de5062008fe12d981e5da.tar.gz
Code reorg/reformat, added results names
-rw-r--r--examples/select_parser.py278
1 files changed, 151 insertions, 127 deletions
diff --git a/examples/select_parser.py b/examples/select_parser.py
index 79a3366..7f9273c 100644
--- a/examples/select_parser.py
+++ b/examples/select_parser.py
@@ -1,127 +1,151 @@
-# select_parser.py
-# Copyright 2010, Paul McGuire
-#
-# a simple SELECT statement parser, taken from SQLite's SELECT statement
-# definition at https://www.sqlite.org/lang_select.html
-#
-from pyparsing import *
-ParserElement.enablePackrat()
-
-LPAR,RPAR,COMMA = map(Suppress,"(),")
-select_stmt = Forward().setName("select statement")
-
-# keywords
-(UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER,
- CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY,
- HAVING, ORDER, BY, LIMIT, OFFSET, OR) = map(CaselessKeyword, """UNION, ALL, AND, INTERSECT,
- EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT,
- DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR""".replace(",","").split())
-(CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS,
- COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE,
- CURRENT_TIMESTAMP) = map(CaselessKeyword, """CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE,
- END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE,
- CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP""".replace(",","").split())
-keyword = MatchFirst((UNION, ALL, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER,
- CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY,
- HAVING, ORDER, BY, LIMIT, OFFSET, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS,
- COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE,
- CURRENT_TIMESTAMP))
-
-identifier = ~keyword + Word(alphas, alphanums+"_")
-collation_name = identifier.copy()
-column_name = identifier.copy()
-column_alias = identifier.copy()
-table_name = identifier.copy()
-table_alias = identifier.copy()
-index_name = identifier.copy()
-function_name = identifier.copy()
-parameter_name = identifier.copy()
-database_name = identifier.copy()
-
-# expression
-expr = Forward().setName("expression")
-
-integer = Regex(r"[+-]?\d+")
-numeric_literal = Regex(r"\d+(\.\d*)?([eE][+-]?\d+)?")
-string_literal = QuotedString("'")
-blob_literal = Regex(r"[xX]'[0-9A-Fa-f]+'")
-literal_value = ( numeric_literal | string_literal | blob_literal |
- NULL | CURRENT_TIME | CURRENT_DATE | CURRENT_TIMESTAMP )
-bind_parameter = (
- Word("?",nums) |
- Combine(oneOf(": @ $") + parameter_name)
- )
-type_name = oneOf("TEXT REAL INTEGER BLOB NULL")
-
-expr_term = (
- CAST + LPAR + expr + AS + type_name + RPAR |
- EXISTS + LPAR + select_stmt + RPAR |
- function_name.setName("function_name") + LPAR + Optional("*" | delimitedList(expr)) + RPAR |
- literal_value |
- bind_parameter |
- Combine(identifier+('.'+identifier)*(0,2)).setName("ident")
- )
-
-UNARY,BINARY,TERNARY=1,2,3
-expr << infixNotation(expr_term,
- [
- (oneOf('- + ~') | NOT, UNARY, opAssoc.RIGHT),
- (ISNULL | NOTNULL | NOT + NULL, UNARY, opAssoc.LEFT),
- ('||', BINARY, opAssoc.LEFT),
- (oneOf('* / %'), BINARY, opAssoc.LEFT),
- (oneOf('+ -'), BINARY, opAssoc.LEFT),
- (oneOf('<< >> & |'), BINARY, opAssoc.LEFT),
- (oneOf('< <= > >='), BINARY, opAssoc.LEFT),
- (oneOf('= == != <>') | IS | IN | LIKE | GLOB | MATCH | REGEXP, BINARY, opAssoc.LEFT),
- ((BETWEEN,AND), TERNARY, opAssoc.LEFT),
- (IN + LPAR + Group(select_stmt | delimitedList(expr)) + RPAR, UNARY, opAssoc.LEFT),
- (AND, BINARY, opAssoc.LEFT),
- (OR, BINARY, opAssoc.LEFT),
- ])
-
-compound_operator = (UNION + Optional(ALL) | INTERSECT | EXCEPT)
-
-ordering_term = Group(expr('order_key') + Optional(COLLATE + collation_name('collate')) + Optional(ASC | DESC)('direction'))
-
-join_constraint = Group(Optional(ON + expr | USING + LPAR + Group(delimitedList(column_name)) + RPAR))
-
-join_op = COMMA | Group(Optional(NATURAL) + Optional(INNER | CROSS | LEFT + OUTER | LEFT | OUTER) + JOIN)
-
-join_source = Forward()
-single_source = ( (Group(database_name("database") + "." + table_name("table*")) | table_name("table*")) +
- Optional(Optional(AS) + table_alias("table_alias*")) +
- Optional(INDEXED + BY + index_name("name") | NOT + INDEXED)("index") |
- (LPAR + select_stmt + RPAR + Optional(Optional(AS) + table_alias)) |
- (LPAR + join_source + RPAR) )
-
-join_source << (Group(single_source + OneOrMore(join_op + single_source + join_constraint)) |
- single_source)
-
-result_column = "*" | table_name + "." + "*" | Group(expr + Optional(Optional(AS) + column_alias))
-select_core = (SELECT + Optional(DISTINCT | ALL) + Group(delimitedList(result_column))("columns") +
- Optional(FROM + join_source("from*")) +
- Optional(WHERE + expr("where_expr")) +
- Optional(GROUP + BY + Group(delimitedList(ordering_term))("group_by_terms") +
- Optional(HAVING + expr("having_expr"))))
-
-select_stmt << (select_core + ZeroOrMore(compound_operator + select_core) +
- Optional(ORDER + BY + Group(delimitedList(ordering_term))("order_by_terms")) +
- Optional(LIMIT + (Group(expr + OFFSET + expr) | Group(expr + COMMA + expr) | expr)("limit")))
-
-tests = """\
- select * from xyzzy where z > 100
- select * from xyzzy where z > 100 order by zz
- select * from xyzzy
- select z.* from xyzzy
- select a, b from test_table where 1=1 and b='yes'
- select a, b from test_table where 1=1 and b in (select bb from foo)
- select z.a, b from test_table where 1=1 and b in (select bb from foo)
- select z.a, b from test_table where 1=1 and b in (select bb from foo) order by b,c desc,d
- select z.a, b from test_table left join test2_table where 1=1 and b in (select bb from foo)
- select a, db.table.b as BBB from db.table where 1=1 and BBB='yes'
- select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes'
- select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes' limit 50
- select a, b from test_table where (1=1 or 2=3) and b='yes' group by zx having b=2 order by 1
- """
-
-select_stmt.runTests(tests)
+# select_parser.py
+# Copyright 2010,2019 Paul McGuire
+#
+# a simple SELECT statement parser, taken from SQLite's SELECT statement
+# definition at https://www.sqlite.org/lang_select.html
+#
+from pyparsing import *
+ParserElement.enablePackrat()
+
+LPAR,RPAR,COMMA = map(Suppress,"(),")
+DOT,STAR = map(Literal, ".*")
+select_stmt = Forward().setName("select statement")
+
+# keywords
+(UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER,
+ CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY,
+ HAVING, ORDER, BY, LIMIT, OFFSET, OR) = map(CaselessKeyword, """UNION, ALL, AND, INTERSECT,
+ EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT,
+ DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR""".replace(",","").split())
+(CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, IN, LIKE, GLOB, REGEXP,
+ MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP) = map(CaselessKeyword,
+ """CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, IN, LIKE, GLOB,
+ REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP""".replace(",","").split())
+keyword = MatchFirst((UNION, ALL, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER,
+ CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY,
+ HAVING, ORDER, BY, LIMIT, OFFSET, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END,
+ CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE,
+ CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP))
+
+identifier = ~keyword + Word(alphas, alphanums+"_")
+collation_name = identifier.copy()
+column_name = identifier.copy()
+column_alias = identifier.copy()
+table_name = identifier.copy()
+table_alias = identifier.copy()
+index_name = identifier.copy()
+function_name = identifier.copy()
+parameter_name = identifier.copy()
+database_name = identifier.copy()
+
+# expression
+expr = Forward().setName("expression")
+
+integer = Regex(r"[+-]?\d+")
+numeric_literal = Regex(r"\d+(\.\d*)?([eE][+-]?\d+)?")
+string_literal = QuotedString("'")
+blob_literal = Regex(r"[xX]'[0-9A-Fa-f]+'")
+literal_value = (
+ numeric_literal
+ | string_literal
+ | blob_literal
+ | NULL
+ | CURRENT_TIME
+ | CURRENT_DATE
+ | CURRENT_TIMESTAMP
+ )
+bind_parameter = (
+ Word("?",nums)
+ | Combine(oneOf(": @ $") + parameter_name)
+ )
+type_name = oneOf("TEXT REAL INTEGER BLOB NULL")
+
+expr_term = (
+ CAST + LPAR + expr + AS + type_name + RPAR
+ | EXISTS + LPAR + select_stmt + RPAR
+ | function_name.setName("function_name") + LPAR + Optional(STAR | delimitedList(expr)) + RPAR
+ | literal_value
+ | bind_parameter
+ | Group(identifier('col_db') + DOT + identifier('col_tab') + DOT + identifier('col'))
+ | Group(identifier('col_tab') + DOT + identifier('col'))
+ | Group(identifier('col'))
+ )
+
+UNARY,BINARY,TERNARY=1,2,3
+expr << infixNotation(expr_term,
+ [
+ (oneOf('- + ~') | NOT, UNARY, opAssoc.RIGHT),
+ (ISNULL | NOTNULL | NOT + NULL, UNARY, opAssoc.LEFT),
+ ('||', BINARY, opAssoc.LEFT),
+ (oneOf('* / %'), BINARY, opAssoc.LEFT),
+ (oneOf('+ -'), BINARY, opAssoc.LEFT),
+ (oneOf('<< >> & |'), BINARY, opAssoc.LEFT),
+ (oneOf('< <= > >='), BINARY, opAssoc.LEFT),
+ (oneOf('= == != <>') | IS | IN | LIKE | GLOB | MATCH | REGEXP, BINARY, opAssoc.LEFT),
+ ((BETWEEN,AND), TERNARY, opAssoc.LEFT),
+ (IN + LPAR + Group(select_stmt | delimitedList(expr)) + RPAR, UNARY, opAssoc.LEFT),
+ (AND, BINARY, opAssoc.LEFT),
+ (OR, BINARY, opAssoc.LEFT),
+ ])
+
+compound_operator = (UNION + Optional(ALL) | INTERSECT | EXCEPT)
+
+ordering_term = Group(expr('order_key')
+ + Optional(COLLATE + collation_name('collate'))
+ + Optional(ASC | DESC)('direction'))
+
+join_constraint = Group(Optional(ON + expr | USING + LPAR + Group(delimitedList(column_name)) + RPAR))
+
+join_op = COMMA | Group(Optional(NATURAL) + Optional(INNER | CROSS | LEFT + OUTER | LEFT | OUTER) + JOIN)
+
+join_source = Forward()
+single_source = (
+ Group(database_name("database") + DOT + table_name("table*")
+ | table_name("table*"))
+ + Optional(Optional(AS) + table_alias("table_alias*"))
+ + Optional(INDEXED + BY + index_name("name") | NOT + INDEXED)("index")
+ | (LPAR + select_stmt + RPAR + Optional(Optional(AS) + table_alias))
+ | (LPAR + join_source + RPAR)
+ )
+
+join_source <<= (Group(single_source + OneOrMore(join_op + single_source + join_constraint))
+ | single_source)
+
+# result_column = "*" | table_name + "." + "*" | Group(expr + Optional(Optional(AS) + column_alias))
+result_column = Group(STAR('col')
+ | table_name('col_table') + DOT + STAR('col')
+ | expr('col') + Optional(Optional(AS) + column_alias('alias'))
+ )
+
+select_core = (SELECT + Optional(DISTINCT | ALL) + Group(delimitedList(result_column))("columns")
+ + Optional(FROM + join_source("from*"))
+ + Optional(WHERE + expr("where_expr"))
+ + Optional(GROUP + BY + Group(delimitedList(ordering_term))("group_by_terms")
+ + Optional(HAVING + expr("having_expr"))))
+
+select_stmt << (select_core + ZeroOrMore(compound_operator + select_core)
+ + Optional(ORDER + BY + Group(delimitedList(ordering_term))("order_by_terms"))
+ + Optional(LIMIT + (Group(expr + OFFSET + expr) | Group(expr + COMMA + expr) | expr)("limit"))
+ )
+
+tests = """\
+ select * from xyzzy where z > 100
+ select * from xyzzy where z > 100 order by zz
+ select * from xyzzy
+ select z.* from xyzzy
+ select a, b from test_table where 1=1 and b='yes'
+ select a, b from test_table where 1=1 and b in (select bb from foo)
+ select z.a, b from test_table where 1=1 and b in (select bb from foo)
+ select z.a, b from test_table where 1=1 and b in (select bb from foo) order by b,c desc,d
+ select z.a, b from test_table left join test2_table where 1=1 and b in (select bb from foo)
+ select a, db.table.b as BBB from db.table where 1=1 and BBB='yes'
+ select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes'
+ select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes' limit 50
+ select a, b from test_table where (1=1 or 2=3) and b='yes' group by zx having b=2 order by 1
+ SELECT emp.ename as e FROM scott.employee as emp
+ SELECT ename as e, fname as f FROM scott.employee as emp
+ SELECT emp.eid, fname,lname FROM scott.employee as emp
+ SELECT ename, lname, emp.eid FROM scott.employee as emp
+ select emp.salary * (1.0 + emp.bonus) as salary_plus_bonus from scott.employee as emp
+"""
+select_stmt.runTests(tests)