-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNFA.py
More file actions
executable file
·115 lines (103 loc) · 4.63 KB
/
NFA.py
File metadata and controls
executable file
·115 lines (103 loc) · 4.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
""" Class to create a NFA from a given regular expression """
from AutomatonFactory import AutomatonFactory
class NFA:
"""A class used to represent a DFA
See also https://en.wikipedia.org/wiki/Nondeterministic_finite_automaton
Attributes
----------
nfa : Automaton
the NFA represented by the class instance
"""
def __init__(self, regex):
self.nfa_stack = []
self.op_stack = []
self.nfa = None
self.star = '*'
self.choice = '|'
self.concat = '&'
self.operators = [self.star, self.concat, self.choice]
self.regex = regex
self.alphabet = [chr(i) for i in range(33, 127)]
self.alphabet.extend(
['−', 'Å', '°', 'µ', 'μ', 'Â', '∞', '¢', '∑', '·', 'Δ', 'γ', '∼', 'α', 'β', '≥', '′', '×', '±', 'Њ',
u'\u202B', u'\u202C', 'ע', 'ω', 'λ', 'π', '→', 'θ', '•', 'À', 'º', '∈', 'Ϯ', 'ϭ', 'þ', '˚', 'Ϫ', 'σ', 'χ',
'²', '≤', 'τ', '', '¼', 'à', '〈', '〉', 'Ã', 'δ', 'ζ', 'ο', 'Ν', 'Π', 'ψ', 'ε', 'Ι', 'Σ', 'Κ', 'ι', 'Ζ',
'Χ', 'Φ', 'ν', 'Γ', 'ξ', 'η', 'Λ', 'Μ', 'Υ', 'υ', 'Θ', 'Τ', 'Ξ', 'Ε', 'φ', 'Ω', 'Η', 'ρ', 'Ψ', 'Ο', 'Ρ',
'κ', 'Ͻ', 'Ͼ', '£', '≈'])
self.build_nfa()
def get_nfa(self):
return self.nfa
def print_nfa(self):
self.nfa.print_automaton()
def build_nfa(self):
"""Builds a NFA from a given regular expression (in shunting yard format) using thompson's construction.
See also https://en.wikipedia.org/wiki/Thompson%27s_construction
Returns
-------
Automaton
the converted Automaton
"""
lang = set()
escaped = False
command = False
escaped_class = False
char_store = ''
for char in self.regex:
if escaped_class:
if char == ']' and char_store[-1] != '\\':
char_store += char
lang.add(char_store)
self.nfa_stack.append(AutomatonFactory.default_automaton(char_store))
char_store = ''
escaped_class = False
else:
char_store += char
elif command:
if char == 'e':
self.nfa_stack.append(AutomatonFactory.empty_automaton())
elif char == '[':
escaped_class = True
char_store = '!['
else:
lang.add('!' + char)
self.nfa_stack.append(AutomatonFactory.default_automaton('!' + char))
command = False
elif char == '!' and not escaped:
if escaped:
lang.add(char)
self.nfa_stack.append(AutomatonFactory.default_automaton(char))
else:
command = True
elif char == '\\' and not escaped:
escaped = True
elif char in self.operators and not escaped:
if len(self.nfa_stack) < 1:
raise Exception("Can't apply %s on empty NFA stack" % char)
if char == self.star:
nfa = self.nfa_stack.pop()
self.nfa_stack.append(AutomatonFactory.star_automaton(nfa))
elif len(self.nfa_stack) < 2:
raise Exception("Can't apply %s on less than two NFAs" % char)
elif char == self.choice:
nfa1 = self.nfa_stack.pop()
nfa2 = self.nfa_stack.pop()
self.nfa_stack.append(AutomatonFactory.choice_automaton(nfa2, nfa1))
elif char == self.concat:
nfa1 = self.nfa_stack.pop()
nfa2 = self.nfa_stack.pop()
self.nfa_stack.append(AutomatonFactory.concat_automaton(nfa2, nfa1))
else:
raise Exception("Unhandled operator %s!" % char)
elif char in self.alphabet or escaped:
if escaped and char == 'e':
self.nfa_stack.append(AutomatonFactory.empty_automaton())
else:
lang.add(char)
self.nfa_stack.append(AutomatonFactory.default_automaton(char))
escaped = False
else:
raise Exception("Unrecognized letter %s! Consider adding it to your alphabet." % char)
if len(self.nfa_stack) > 1: # invalid regex
raise Exception("Invalid regex.")
self.nfa = self.nfa_stack.pop()
self.nfa.SIGMA = lang