Cython/Plex/DFA.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149

# cython: auto_cpdef=True
"""
Python Lexical Analyser

Converting NFA to DFA
"""
from __future__ import absolute_import

from . import Machines
from .Machines import LOWEST_PRIORITY
from .Transitions import TransitionMap


def nfa_to_dfa(old_machine, debug=None):
    """
    Given a nondeterministic Machine, return a new equivalent
    Machine which is deterministic.
    """
    # We build a new machine whose states correspond to sets of states
    # in the old machine. Initially we add a new state corresponding to
    # the epsilon-closure of each initial old state. Then we give transitions
    # to each new state which are the union of all transitions out of any
    # of the corresponding old states. The new state reached on a given
    # character is the one corresponding to the set of states reachable
    # on that character from any of the old states. As new combinations of
    # old states are created, new states are added as needed until closure
    # is reached.
    new_machine = Machines.FastMachine()
    state_map = StateMap(new_machine)

    # Seed the process using the initial states of the old machine.
    # Make the corresponding new states into initial states of the new
    # machine with the same names.
    for (key, old_state) in old_machine.initial_states.items():
        new_state = state_map.old_to_new(epsilon_closure(old_state))
        new_machine.make_initial_state(key, new_state)

    # Tricky bit here: we add things to the end of this list while we're
    # iterating over it. The iteration stops when closure is achieved.
    for new_state in new_machine.states:
        transitions = TransitionMap()
        for old_state in state_map.new_to_old(new_state):
            for event, old_target_states in old_state.transitions.items():
                if event and old_target_states:
                    transitions.add_set(event, set_epsilon_closure(old_target_states))
        for event, old_states in transitions.items():
            new_machine.add_transitions(new_state, event, state_map.old_to_new(old_states))

    if debug:
        debug.write("\n===== State Mapping =====\n")
        state_map.dump(debug)
    return new_machine


def set_epsilon_closure(state_set):
    """
    Given a set of states, return the union of the epsilon
    closures of its member states.
    """
    result = {}
    for state1 in state_set:
        for state2 in epsilon_closure(state1):
            result[state2] = 1
    return result


def epsilon_closure(state):
    """
    Return the set of states reachable from the given state
    by epsilon moves.
    """
    # Cache the result
    result = state.epsilon_closure
    if result is None:
        result = {}
        state.epsilon_closure = result
        add_to_epsilon_closure(result, state)
    return result


def add_to_epsilon_closure(state_set, state):
    """
    Recursively add to |state_set| states reachable from the given state
    by epsilon moves.
    """
    if not state_set.get(state, 0):
        state_set[state] = 1
        state_set_2 = state.transitions.get_epsilon()
        if state_set_2:
            for state2 in state_set_2:
                add_to_epsilon_closure(state_set, state2)


class StateMap(object):
    """
    Helper class used by nfa_to_dfa() to map back and forth between
    sets of states from the old machine and states of the new machine.
    """

    def __init__(self, new_machine):
        self.new_machine = new_machine  # Machine
        self.old_to_new_dict = {}  # {(old_state,...) : new_state}
        self.new_to_old_dict = {}  # {id(new_state) : old_state_set}

    def old_to_new(self, old_state_set):
        """
        Return the state of the new machine corresponding to the
        set of old machine states represented by |state_set|. A new
        state will be created if necessary. If any of the old states
        are accepting states, the new state will be an accepting state
        with the highest priority action from the old states.
        """
        key = self.make_key(old_state_set)
        new_state = self.old_to_new_dict.get(key, None)
        if not new_state:
            action = self.highest_priority_action(old_state_set)
            new_state = self.new_machine.new_state(action)
            self.old_to_new_dict[key] = new_state
            self.new_to_old_dict[id(new_state)] = old_state_set
        return new_state

    def highest_priority_action(self, state_set):
        best_action = None
        best_priority = LOWEST_PRIORITY
        for state in state_set:
            priority = state.action_priority
            if priority > best_priority:
                best_action = state.action
                best_priority = priority
        return best_action

    def new_to_old(self, new_state):
        """Given a new state, return a set of corresponding old states."""
        return self.new_to_old_dict[id(new_state)]

    def make_key(self, state_set):
        """
        Convert a set of states into a uniquified
        sorted tuple suitable for use as a dictionary key.
        """
        return tuple(sorted(state_set))

    def dump(self, file):
        from .Transitions import state_set_str

        for new_state in self.new_machine.states:
            old_state_set = self.new_to_old_dict[id(new_state)]
            file.write("   State %s <-- %s\n" % (
                new_state['number'], state_set_str(old_state_set)))