# This example illustrates how to extract table names from nested # SELECT statements. # See: # http://groups.google.com/group/sqlparse/browse_thread/thread/b0bd9a022e9d4895 sql = """ select K.a from (select H.b from (select G.c from (select F.d from (select E.e from A, B, C, D, E), F), G), H), I, J, K; """ import sqlparse from sqlparse.sql import IdentifierList, Identifier from sqlparse.tokens import Keyword, DML def is_subselect(parsed): if not parsed.is_group(): return False for item in parsed.tokens: if item.ttype is DML and item.value.upper() == 'SELECT': return True return False def extract_from_part(parsed): from_seen = False for item in parsed.tokens: if from_seen: if is_subselect(item): for x in extract_from_part(item): yield x else: yield item elif item.ttype is Keyword and item.value.upper() == 'FROM': from_seen = True def extract_table_identifiers(token_stream): for item in token_stream: if isinstance(item, IdentifierList): for identifier in item.get_identifiers(): yield identifier.get_name() elif isinstance(item, Identifier): yield item.get_name() # It's a bug to check for Keyword here, but in the example # above some tables names are identified as keywords... elif item.ttype is Keyword: yield item.value def extract_tables(): stream = extract_from_part(sqlparse.parse(sql)[0]) return list(extract_table_identifiers(stream)) if __name__ == '__main__': print 'Tables: %s' % ', '.join(extract_tables())