# select_parser.py # Copyright 2010,2019 Paul McGuire # # a simple SELECT statement parser, taken from SQLite's SELECT statement # definition at https://www.sqlite.org/lang_select.html # from pyparsing import * ParserElement.enablePackrat() LPAR, RPAR, COMMA = map(Suppress, "(),") DOT, STAR = map(Literal, ".*") select_stmt = Forward().setName("select statement") # keywords ( UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR, ) = map( CaselessKeyword, """UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR""".replace( ",", "" ).split(), ) ( CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP, ) = map( CaselessKeyword, """CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP""".replace( ",", "" ).split(), ) keyword = MatchFirst( ( UNION, ALL, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP, ) ) identifier = ~keyword + Word(alphas, alphanums + "_") collation_name = identifier.copy() column_name = identifier.copy() column_alias = identifier.copy() table_name = identifier.copy() table_alias = identifier.copy() index_name = identifier.copy() function_name = identifier.copy() parameter_name = identifier.copy() database_name = identifier.copy() # expression expr = Forward().setName("expression") integer = Regex(r"[+-]?\d+") numeric_literal = Regex(r"\d+(\.\d*)?([eE][+-]?\d+)?") string_literal = QuotedString("'") blob_literal = Regex(r"[xX]'[0-9A-Fa-f]+'") literal_value = ( numeric_literal | string_literal | blob_literal | NULL | CURRENT_TIME | CURRENT_DATE | CURRENT_TIMESTAMP ) bind_parameter = Word("?", nums) | Combine(oneOf(": @ $") + parameter_name) type_name = oneOf("TEXT REAL INTEGER BLOB NULL") expr_term = ( CAST + LPAR + expr + AS + type_name + RPAR | EXISTS + LPAR + select_stmt + RPAR | function_name.setName("function_name") + LPAR + Optional(STAR | delimitedList(expr)) + RPAR | literal_value | bind_parameter | Group( identifier("col_db") + DOT + identifier("col_tab") + DOT + identifier("col") ) | Group(identifier("col_tab") + DOT + identifier("col")) | Group(identifier("col")) ) UNARY, BINARY, TERNARY = 1, 2, 3 expr << infixNotation( expr_term, [ (oneOf("- + ~") | NOT, UNARY, opAssoc.RIGHT), (ISNULL | NOTNULL | NOT + NULL, UNARY, opAssoc.LEFT), ("||", BINARY, opAssoc.LEFT), (oneOf("* / %"), BINARY, opAssoc.LEFT), (oneOf("+ -"), BINARY, opAssoc.LEFT), (oneOf("<< >> & |"), BINARY, opAssoc.LEFT), (oneOf("< <= > >="), BINARY, opAssoc.LEFT), ( oneOf("= == != <>") | IS | IN | LIKE | GLOB | MATCH | REGEXP, BINARY, opAssoc.LEFT, ), ((BETWEEN, AND), TERNARY, opAssoc.LEFT), ( IN + LPAR + Group(select_stmt | delimitedList(expr)) + RPAR, UNARY, opAssoc.LEFT, ), (AND, BINARY, opAssoc.LEFT), (OR, BINARY, opAssoc.LEFT), ], ) compound_operator = UNION + Optional(ALL) | INTERSECT | EXCEPT ordering_term = Group( expr("order_key") + Optional(COLLATE + collation_name("collate")) + Optional(ASC | DESC)("direction") ) join_constraint = Group( Optional(ON + expr | USING + LPAR + Group(delimitedList(column_name)) + RPAR) ) join_op = COMMA | Group( Optional(NATURAL) + Optional(INNER | CROSS | LEFT + OUTER | LEFT | OUTER) + JOIN ) join_source = Forward() single_source = ( Group(database_name("database") + DOT + table_name("table*") | table_name("table*")) + Optional(Optional(AS) + table_alias("table_alias*")) + Optional(INDEXED + BY + index_name("name") | NOT + INDEXED)("index") | (LPAR + select_stmt + RPAR + Optional(Optional(AS) + table_alias)) | (LPAR + join_source + RPAR) ) join_source <<= ( Group(single_source + OneOrMore(join_op + single_source + join_constraint)) | single_source ) # result_column = "*" | table_name + "." + "*" | Group(expr + Optional(Optional(AS) + column_alias)) result_column = Group( STAR("col") | table_name("col_table") + DOT + STAR("col") | expr("col") + Optional(Optional(AS) + column_alias("alias")) ) select_core = ( SELECT + Optional(DISTINCT | ALL) + Group(delimitedList(result_column))("columns") + Optional(FROM + join_source("from*")) + Optional(WHERE + expr("where_expr")) + Optional( GROUP + BY + Group(delimitedList(ordering_term))("group_by_terms") + Optional(HAVING + expr("having_expr")) ) ) select_stmt << ( select_core + ZeroOrMore(compound_operator + select_core) + Optional(ORDER + BY + Group(delimitedList(ordering_term))("order_by_terms")) + Optional( LIMIT + (Group(expr + OFFSET + expr) | Group(expr + COMMA + expr) | expr)("limit") ) ) tests = """\ select * from xyzzy where z > 100 select * from xyzzy where z > 100 order by zz select * from xyzzy select z.* from xyzzy select a, b from test_table where 1=1 and b='yes' select a, b from test_table where 1=1 and b in (select bb from foo) select z.a, b from test_table where 1=1 and b in (select bb from foo) select z.a, b from test_table where 1=1 and b in (select bb from foo) order by b,c desc,d select z.a, b from test_table left join test2_table where 1=1 and b in (select bb from foo) select a, db.table.b as BBB from db.table where 1=1 and BBB='yes' select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes' select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes' limit 50 select a, b from test_table where (1=1 or 2=3) and b='yes' group by zx having b=2 order by 1 SELECT emp.ename as e FROM scott.employee as emp SELECT ename as e, fname as f FROM scott.employee as emp SELECT emp.eid, fname,lname FROM scott.employee as emp SELECT ename, lname, emp.eid FROM scott.employee as emp select emp.salary * (1.0 + emp.bonus) as salary_plus_bonus from scott.employee as emp """ select_stmt.runTests(tests)