环境:python3.6 + jupyter notebook
词法分析中的NFA 转DFA
原理介绍(a case):
代码实现:
import numpy as np
K = []
Sgm = []
F = []
Z = []
K_num = {}
def data_input():
global K,Sgm,F,S,Z
with open('nfa.txt', 'r') as f:
lines = f.readlines()
Sgm = lines[0].split()
F = np.zeros((len(lines)-1)*len(Sgm), dtype=np.int32).reshape((len(lines)-1),len(Sgm))
K_num['#'] = -1
for i in range(1,len(lines)):
K.append(lines[i][0])
K_num[K[i-1]] = i-1
for i in range(1,len(lines)):
for j in range(len(lines[i][1:].split())):
if len(lines[i][1:].split()[j]) > 1: # 能到多处
mul = int(''.join([str(K_num[n]) for n in lines[i][1:].split()[j].split('_')]))
F[i-1,j] = -mul #用于区分
else:
F[i-1,j] = K_num[lines[i][1:].split()[j]]
def ns2n(n):
if n < -1:
n = -n
m = []
while n > 0:
m.append(str(n%10))
n //= 10
return [int(mm) for mm in m]#np.array(m, dtype=np.int32)
elif n == -1: #因为闭包函数的m接收list
return []
else:
return [n]
def e_closure(K): # 传入状态集,不申明global可以这么写
global II
for k in K:
n = F[k][0]
m = ns2n(n)
II.add(k)
if len(m) > 0:
for n in m:
if n not in II:
II.add(n)
II = II.union(e_closure([n]))
return II
def move(k, sgm): #传入状态和符号
n = F[k, sgm]
return ns2n(n) #状态集
pre_I_queue = [] #新的 DFA集合,即将要访问的集合
I_queue = [] # DFA 集合队列
II = set() #求闭包前都要初始化
I = e_closure({0}) #一个set
pre_I_queue.append(I)
I_queue.append(I)
F_DFA = np.zeros(3).reshape(-1,3)
def main():
while 1:
global I,II #当前状态
if len(pre_I_queue) == 0:
break
else:
I = pre_I_queue[0]
print(I, end='')
for sgm in range(1,len(Sgm)):
I_next = set()
print(" SGM:", sgm, end=',')
for i in I:
II = set()
e_move = e_closure(move(i,sgm))
II = set()
# print(i, e_move, end='')
II = set()
I_next = I_next.union(e_move)
print(I_next, end='')
if I_next not in I_queue and len(I_next)!=0:
I_queue.append(I_next)
pre_I_queue.append(I_next)
pre_I_queue.pop(0)
print()
print("DFA状态数为:",len(I_queue))
data_input()
main()
input:
eps a b
X # # A
A B_F_E # #
B D C #
C # # D
D B_E # #
E A I #
F # # G
G # # H
H E # #
I # # Y
Y # # #
data_intput结果:
转换完结果:
语法分析中 求first集
利用算法:
代码实现:
NonTermSet = set() # 非终结符集合
TermSet = set() # 终结符集合
First = {} # First集
GramaDict = {} # 处理过的产生式
Code = [] # 读入的产生式
StartSym = "" # 开始符号
EndSym = '#' # 结束符号为“#“
Epsilon = "~" # 由于没有epsilon符号用“~”代替
"""
(1)如果X是终结符,则FIRST(X)={X}
(2)如果X →ε是一个产生式则ε∈ FIRST(X)
(3)如果X是非终结符,且X →Y1 Y2…… Yk,则
a)如 Y1 =>ε,则FIRST(Y1 )中的所有符号
都在FIRST(X)中
b) 如Y1 Y2…… Yi-1=> ε, FIRST( Yi ),
中的所有符号都在FIRST(X)中
c) Y1 Y2…… Yk=> ε,则ε ∈ FIRST(X)
"""
def getFirst():
global NonTermSet, TermSet, First, FirstA
for X in NonTermSet:
First[X] = set() # 初始化非终结符First集为空
for X in TermSet:
First[X] = set(X) # 初始化终结符First集为自己
Change = True
while Change: # 当First集没有更新则算法结束
Change = False
for X in NonTermSet:#非终结符
for Y in GramaDict[X]:#这个非终结符的产生式
k = 0
eps = True #要加入eps
while eps and k < len(Y):
if not First[Y[k]] - set(Epsilon) <= First[X]: # 没有一样的就添加,并且改变标志
if Epsilon not in First[Y[k]] and Y[k] in NonTermSet: # Y1到Yi候选式都有~存在
eps = False
else:
First[X] |= First[Y[k]] - set(Epsilon)
Change = True
if Epsilon not in First[Y[k]]:
eps = False
k += 1
if eps: # X->~或者Y1到Yk均有~产生式
First[X] |= set(Epsilon)
# 显示格式
def display(show_list):
for item in show_list:
print(' %-25s' % item, end='')
print()
# 读取文法
def readGrammar():
try:
file = open('grammar.txt', 'r')
for line in file:
line = line.replace('\n', "")
Code.append(line)
except IOError as e:
print(e)
exit()
finally:
file.close()
return Code
# 初始化
def main():
global NonTermSet, TermSet, First, StartSym, Code
Code = readGrammar()
n = int(len(Code))
print('产生式个数:', n)
StartSym = Code[0][0]
print("开始符号:", StartSym)
print('产生式:G[', StartSym, ']:')
for i in range(n):
X, Y = Code[i].split('->')
print(' ', Code[i])
NonTermSet.add(X)
Y = Y.split('|')
for Yi in Y:
TermSet |= set(Yi)
if X not in GramaDict:
GramaDict[X] = set()
GramaDict[X] |= set(Y) # 生成产生式集
TermSet -= NonTermSet
print('非终结符:', NonTermSet)
print('终结符:', TermSet)
print('文法字典:',GramaDict)
getFirst()
print("FIRST集:")
for k in NonTermSet:
print(' FIRST[', k, ']: ', First[k])
main()
input:
E->eBaA
A->a|bAcB
B->aC|dEd
C->e|dC
output: