概述
delimiters = '{},;n'
backspace = ' t'
operator = '()[]!*/%+-<>=.~&|^'
fen = delimiters + backspace + operator # 分隔符
fen = fen.replace('.', '')
keywords = ['int', 'char', 'float', 'break', 'const', 'return', 'void',
'continue', 'do', 'while', 'if', 'else', 'for', 'long', 'typedef',
'static', 'short', 'switch', 'case', 'default', 'double', 'struct', 'sizeof',
'string', 'bool', 'gets', 'getchar', 'true']
class Word:
def __init__(self, s):
self.row = 1 # 行数
self.token = [] # 存取分析的结果
self.error = [] # 错误
self.s = s + ' n' # 代码文件,附加的结束标志
# 判断行数
def judge_row(self, i):
if self.s[i] == 'n':
self.row += 1 # 当前行数+1
i = i + 1 # 下一个字符
return i
def l_analysis(self):
i = 0
while i < len(self.s):
if self.s[i] in (' ', 't'): # 空格和制表符跳过
i = self.judge_row(i)
continue
elif self.s[i] == '/': # 判断注释还是除号
i = self.get_comments(i)
continue
elif self.s[i] == '_' or self.s[i].isalpha(): # 获取标识符
i = self.get_identifier(i)
continue
elif self.s[i].isdigit(): # 获取数字
i = self.get_num(i)
continue
elif self.s[i] in operator: # 获取四则逻辑等运算符
i = self.get_operator(i)
continue
elif self.s[i] in '()[]~': # ( ) [ ] . ~
self.token.append(('op', self.s[i], self.row))
i = self.judge_row(i)
continue
elif self.s[i] in '{};,': # 分隔符
self.token.append(('bounder', self.s[i], self.row))
i = self.judge_row(i)
continue
elif self.s[i] == '"': # 处理字符串
i = self.get_string(i)
continue
elif self.s[i] == "'": # 处理字符
i = self.get_char(i)
continue
elif self.s[i] != 'n':
info = 'error:' + '(row ' + str(self.row) + '): ' + "unknown character, not use:" + self.s[i]
self.error.append(info)
i = self.judge_row(i)
def judge_key(self, str1): # 判断是不是关键字
for i in range(len(keywords)):
if keywords[i] == str1:
return i + 1
return 0
def get_identifier(self, n): # 获取标识符
state = 0 # 初始状态
i = n
while i < len(self.s):
if state == 0:
state = 1
i = self.judge_row(i)
continue
if state == 1:
if self.s[i] == '_' or self.s[i].isalpha() or self.s[i].isdigit():
state = 1
i = self.judge_row(i)
else:
state = 2
break
if state == 2:
pp = self.judge_key(self.s[n:i]) # 判断是否为关键字
if pp > 0:
self.token.append(('keyword', self.s[n:i], self.row))
else:
self.token.append(('identifier', self.s[n:i], self.row))
return i
def get_num(self, n): # 获取数字
i = n # 便于记录
state = 0 # 状态
m = 'const' # 区分小数整数(以E|e前的数区分科学计数法是小数还是整数)
t = 0 # 科学计数法的结束标志
while i < len(self.s):
if state == 0:
if '1' <= self.s[i] <= '9': # 10进制数最高位1-9
state = 1
else:
if self.s[i] == '0': # 0或八、十六进制
state = 3
i = self.judge_row(i) # 去下一个字符
continue
if state == 1:
if '0' <= self.s[i] <= '9': # 十进制
state = 1
else:
if self.s[i] in fen: # 正确10进制
self.token.append((m, self.s[n:i], self.row))
break
if self.s[i] == 'e' or self.s[i] == 'E': # 科学计数
state = 10
elif self.s[i] == '.': # >1的小数
state = 8
else: # 其余错误
info = 'error:' + '(row ' + str(self.row) + '): ' + self.s[n:i] + "number error!"
self.error.append(info)
break
i = self.judge_row(i)
continue
if state == 3: # 以0开头的数
if self.s[i] in fen: # 10进制0
self.token.append((m, self.s[n:i], self.row))
break
if self.s[i] == 'X' or self.s[i] == 'x': # 16进制
state = 5
elif '1' <= self.s[i] <= '7': # 8进制
state = '3'
elif self.s[i] == '.': # 0.小数
state = 8
else:
info = 'error:' + '(row ' + str(self.row) + '): ' + self.s[n:i] + "number error!"
self.error.append(info)
break
i = self.judge_row(i)
continue
if state == '3': # 8进制
if self.s[i] in fen:
self.token.append(('const', self.s[n:i], self.row))
break
if '0' <= self.s[i] <= '7': # 8进制正常的数
state = '3'
i = self.judge_row(i)
continue
else: # 其余错误
info = 'error:' + '(row ' + str(self.row) + '): ' + self.s[n:i] + "number error!"
self.error.append(info)
break
if state == 5: # 十六进制开头
x = '123456789abcdefABCDEF'
if self.s[i] in x:
state = 6
i = self.judge_row(i)
continue
else:
info = 'error:' + '(row ' + str(self.row) + '): ' + self.s[n:i] + "number error!"
self.error.append(info)
break
if state == 6: # 十六进制后部分
x = '0123456789abcdefABCDEF'
if self.s[i] in fen:
self.token.append(('const', self.s[n:i], self.row))
break
if self.s[i] in x:
state = 6
i = self.judge_row(i)
continue
else:
info = 'error:' + '(row ' + str(self.row) + '): ' + self.s[n:i] + "number error!"
self.error.append(info)
break
if state == 8:
if self.s[i].isdigit():
state = 9
i = self.judge_row(i)
continue
else:
info = 'error:' + '(row ' + str(self.row) + '): ' + self.s[n:i] + "number error!"
self.error.append(info)
break
if state == 9: # 小数的小数部分
if self.s[i] in fen:
self.token.append(('const', self.s[n:i], self.row))
break
elif self.s[i].isdigit():
state = 9
i = self.judge_row(i)
continue
elif self.s[i] == 'e' or self.s[i] == 'E':
state = 10
i = self.judge_row(i)
continue
else:
info = 'error:' + '(row ' + str(self.row) + '): ' + self.s[n:i] + "number error!"
self.error.append(info)
break
if state == 10: # 指数指出正负或者不带符号(为正)
if self.s[i] == '+' or self.s[i] == '-' or self.s[i].isdigit():
if self.s[i].isdigit():
t = 1
state = 11
i = self.judge_row(i)
continue
else:
info = 'error:' + '(row ' + str(self.row) + '): ' + self.s[n:i] + "number error!"
self.error.append(info)
break
if state == 11: # 指数部分
if self.s[i] in fen and t == 1:
self.token.append((m, self.s[n:i], self.row))
break
if self.s[i].isdigit():
state = 11
t = 1
i = self.judge_row(i)
continue
else:
info = 'error:' + '(row ' + str(self.row) + '): ' + self.s[n:i] + "number error!"
self.error.append(info)
break
return i
def get_comments(self, n): # 去除注释
state = 1 # 初始状态
i = n
while i < len(self.s):
i = self.judge_row(i)
if i >= len(self.s):
break
if state == 1:
if self.s[i] == '/': # 单行注释
state = 2
continue
if self.s[i] == '*': # 多行注释
state = 3
continue
else: # 除号
if self.s[i] == '=':
self.token.append(('op', '/=', self.row))
i = self.judge_row(i)
else:
self.token.append(('op', '/', self.row))
break
if state == 2:
while i < len(self.s) and self.s[i] != 'n':
i = self.judge_row(i)
break
if state == 3:
if self.s[i] == '*':
state = 4
continue
else:
continue
if state == 4:
if self.s[i] == '/':
i = self.judge_row(i)
break
elif self.s[i] == '*':
continue
else:
state = 3
if i >= len(self.s):
info = 'error:' + '(row ' + str(self.row) + '): ' + "symbol mismatch!"
self.error.append(info)
return i
def judge_operator(self, str1): # 判断是不是运算符
j = 0
for i in operator:
j = j + 1
if i == str1:
return j
return 0
def get_operator(self, n): # 运算符
i = n
state = 0
while i < len(self.s):
if state == 0:
if self.s[i] == '<':
state = 1
i = self.judge_row(i)
continue
elif self.s[i] == '>':
state = 2
i = self.judge_row(i)
continue
elif self.s[i] == '!':
state = 3
i = self.judge_row(i)
continue
elif self.s[i] == '&':
state = 4
i = self.judge_row(i)
continue
elif self.s[i] == '|':
state = 5
i = self.judge_row(i)
continue
elif self.s[i] == '+':
state = 6
i = self.judge_row(i)
continue
elif self.s[i] == '-':
state = 7
i = self.judge_row(i)
continue
elif self.s[i] == '=':
state = 8
i = self.judge_row(i)
continue
elif self.judge_operator(self.s[i]):
self.token.append(('op', self.s[i], self.row))
i = self.judge_row(i)
return i
if state == 1: # <=
if self.s[i] == '=':
state = 9
continue
elif self.judge_operator(self.s[i]):
state = 11
break
else:
self.token.append(('op', '<', self.row))
return i
if state == 2: # >= >
if self.s[i] == '=':
state = 9
continue
elif self.judge_operator(self.s[i]):
state = 11
break
else:
self.token.append(('op', '>', self.row))
return i
if state == 3: # ! !=
if self.s[i] == '=':
state = 9
f = 220
continue
elif self.judge_operator(self.s[i]):
state = 11
break
else:
self.token.append(('op', '!', self.row))
return i
if state == 4: # & &&
if self.s[i] == '&':
state = 9
continue
elif self.judge_operator(self.s[i]):
state = 11
break
else:
self.token.append(('op', '&', self.row))
return i
if state == 5: # | ||
if self.s[i] == '|':
state = 9
continue
elif self.judge_operator(self.s[i]):
state = 11
break
else:
self.token.append(('op', '|', self.row))
return i
if state == 6: # + ++
if self.s[i] == '+':
state = 9
continue
elif self.s[i] == '=':
state = 9
continue
elif self.judge_operator(self.s[i]):
state = 11
break
else:
self.token.append(('op', '+', self.row))
return i
if state == 7: # -- -
if self.s[i] == '-':
state = 9
f = 224
continue
elif self.s[i] == '=':
state = 9
f = 227
continue
elif self.judge_operator(self.s[i]):
state = 11
break
else:
self.token.append(('op', '-', self.row))
return i
if state == 8: # == =
if self.s[i] == '=':
state = 9
continue
elif self.judge_operator(self.s[i]):
state = 11
break
else:
self.token.append(('op', '=', self.row))
return i
if state == 9:
i = self.judge_row(i) # ++后面可以跟)]
if self.judge_operator(self.s[i])
and self.s[i] != ')' and self.s[i] != ']':
state = 11
break
else:
state = 10
break
if state == 10:
self.token.append(('op', self.s[n:i], self.row))
elif state == 11:
while self.s[i] not in fen:
i = i + 1
info = 'error:' + '(row ' + str(self.row) + '): ' + self.s[n:i+1]+'symbol error!'
self.error.append(info)
return i
def get_string(self, n): # 获取字符(串)
i = n
state = 0
while i < len(self.s):
if state == 0: # 初始开始获取字符
state = 2
i = self.judge_row(i)
continue
if state == 2:
if self.s[i] != '"':
if i >= len(self.s):
state = 4
continue
if self.s[i] == 'n':
state = 4
continue
state = 2
i = self.judge_row(i)
continue
else:
if self.s[i - 1] == '\':
state = 2
i = self.judge_row(i)
continue
else:
self.token.append(('string', self.s[n:i + 1], self.row))
i = self.judge_row(i)
return i
if state == 4:
info = 'error:' + '(row ' + str(self.row) + '): ' + "symbol mismatch!"
self.error.append(info)
return i
def get_char(self, n):
i = n
state = 0
while i < len(self.s):
if state == 0:
state = 1
i = self.judge_row(i)
continue
if state == 1:
if self.s[i] == "'":
i = self.judge_row(i)
info = 'error:' + '(row ' + str(self.row) + '): ' + "symbol mismatch!"
self.error.append(info)
return i
if self.s[i] == '\':
state = 7
i = self.judge_row(i)
continue
state = 2
i = self.judge_row(i)
continue
if state == 2:
if self.s[i] == "'":
state = 3
i = self.judge_row(i)
else:
while i < len(self.s) and self.s[i] not in ",;'n{}()[]":
i = self.judge_row(i)
if i < len(self.s) and self.s[i] == "'":
i = self.judge_row(i)
line = 'error:' + '(row ' + str(self.row) + '): ' + 'symbol mismatch!' # 行数
self.error.append(line) # 添加信息
return i
if state == 3:
self.token.append(('character', self.s[n:i], self.row))
return i
if state == 7:
i = self.judge_row(i)
state = 2
continue
最后
以上就是专注小白菜为你收集整理的用python写词法分词器的全部内容,希望文章能够帮你解决用python写词法分词器所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复