Cython/Parser/ConcreteSyntaxTree.pyx


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91

cdef extern from "graminit.c":
    ctypedef struct grammar:
        pass
    cdef grammar _PyParser_Grammar
    cdef int Py_file_input

cdef extern from "node.h":
    ctypedef struct node
    void PyNode_Free(node* n)
    int NCH(node* n)
    node* CHILD(node* n, int ix)
    node* RCHILD(node* n, int ix)
    short TYPE(node* n)
    char* STR(node* n)

cdef extern from "parsetok.h":
    ctypedef struct perrdetail:
        pass
    cdef void PyParser_SetError(perrdetail *err) except *
    cdef node * PyParser_ParseStringFlagsFilenameEx(
        const char * s,
        const char * filename,
        grammar * g,
        int start,
        perrdetail * err_ret,
        int * flags)

import distutils.sysconfig
import os
import re

def extract_names(path):
    # All parse tree types are #defined in these files as ints.
    type_names = {}
    with open(path) as fid:
        for line in fid:
            if line.startswith('#define'):
                try:
                    _, name, value = line.strip().split()
                    type_names[int(value)] = name
                except:
                    pass
    return type_names

cdef dict type_names = {}

cdef print_tree(node* n, indent=""):
    if not type_names:
        type_names.update(extract_names(
            os.path.join(distutils.sysconfig.get_python_inc(), 'token.h')))
        type_names.update(extract_names(
            os.path.join(os.path.dirname(__file__), 'graminit.h')))

    print indent, type_names.get(TYPE(n), 'unknown'), <object>STR(n) if NCH(n) == 0 else NCH(n)
    indent += "  "
    for i in range(NCH(n)):
        print_tree(CHILD(n, i), indent)

def handle_includes(source, path):
    # TODO: Use include directory.
    def include_here(include_line):
        included = os.path.join(os.path.dirname(path), include_line.group(1)[1:-1])
        if not os.path.exists(included):
            return include_line.group(0) + ' # no such path: ' + included
        with open(included) as fid:
            return handle_includes(fid.read(), path)
    # TODO: Proper string tokenizing.
    return re.sub(r'^include\s+([^\n]+[\'"])\s*(#.*)?$', include_here, source, flags=re.M)

def p_module(path):
    cdef perrdetail err
    cdef int flags
    cdef node* n
    with open(path) as fid:
        source = fid.read()
    if '\ninclude ' in source:
        # TODO: Tokanizer needs to understand includes.
        source = handle_includes(source, path)
        path = "preparse(%s)" % path
    n = PyParser_ParseStringFlagsFilenameEx(
        source,
        path,
        &_PyParser_Grammar,
        Py_file_input,
        &err,
        &flags)
    if n:
#        print_tree(n)
        PyNode_Free(n)
    else:
        PyParser_SetError(&err)