词法分析器
实验要求
1、根据以下的正规式,编制正规文法,画出状态图;
标识符 <字母>(<字母>|<数字字符>)*
十进制整数 0 | ((1|2|3|4|5|6|7|8|9)(0|1|2|3|4|5|6|7|8|9))
八进制整数 0(1|2|3|4|5|6|7)(0|1|2|3|4|5|6|7)
十六进制整数 0x(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)*
运算符和界符 + - * / > < = ( ) ;
关键字 if then else while do
2、根据状态图,设计词法分析函数int scan( ),完成以下功能:
1) 从文本文件中读入测试源代码,根据状态转换图,分析出一个单词,
2) 以二元式形式输出单词<单词种类,单词属性>
其中单词种类用整数表示:
0:标识符
1:十进制整数
2:八进制整数
3:十六进制整数
运算符和界符,关键字采用一字一符,不编码
其中单词属性表示如下:
标识符,整数由于采用一类一符,属性用单词表示
运算符和界符,关键字采用一字一符,属性为空
实验分析
1、 词法的正规式描述;
标识符 <字母>(<字母>|<数字字符>)*
十进制整数 0 | ((1|2|3|4|5|6|7|8|9)(0|1|2|3|4|5|6|7|8|9))
八进制整数 0(1|2|3|4|5|6|7)(0|1|2|3|4|5|6|7)
十六进制整数 0x(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)*
运算符和界符 + - * / > < = ( ) ;
关键字 if then else while do
由于实验描述中已经包含相关的正规式描述,所以我们并不需要做过多处理。
2、 变换后的状态图;
3、 词法分析程序的数据结构与算法。
以上为整体代码框架部分,在代码的构造函数中读入输入文件中的数据,单行读入,将读入的串作为参数调用scan函数扫描。通过判断每一个单词是iscal(运算符界符),是isKey(关键字),是isTen(十进制),是isEight(八进制),是isSixteen(十六进制)来区分单词,而标识符直接使用isalpha()实现判断。同时还有一个结构体Twotuples用于记录结果,内含有两个分量, 就是结果显示左右部分。
实验代码
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <iomanip>
using namespace std;
const int maxn = 1e4;
const int maxlen = 1e4;
class lexical {
private:
char operate[10][10] = { "+","-","*","/",">","<","=","(",")",";" };
const int rowope = 10;
char keyword[5][10] = { "if","then","else","while","do" };
const int rowkey = 5;
struct Twotuples {
char kind[10];
char proper[10];
}tuples[maxlen];
string filename;
public:
bool isCal(char *s, int &length) {
char sub[100];
for (int i = 0; i < rowope; i++) {
int len = strlen(operate[i]);
strncpy(sub, s, len);
sub[len] = '\0';
if (strcmp(sub, operate[i]) == 0) {
length = len;
return true;
}
}
return false;
}
bool isKey(char *s, int &length) {
char sub[100];
int num = 0;
while (isalpha(*(s + num))) {
num++;
}
length = num;
for (int i = 0; i < rowkey; i++) {
strncpy(sub, s, length);
sub[length] = '\0';
if (strcmp(sub, keyword[i]) == 0 && !isalpha(*(s + length))) {
return true;
}
}
return false;
}
bool isTen(char *s, int &length) {
if (s[0] == '0'&&!isdigit(s[1]) && s[1] != 'x') {
length = 1;
return true;
}
else
if (s[0] >= '1'&&s[0] <= '9') {
int num = 0;
while (isdigit(*(s + num))) {
num++;
}
length = num;
return true;
}
return false;
}
bool isEight(char *s, int &length) {
if (s[0] == '0' && (s[1] >= '1'&&s[1] <= '7')) {
int num = 0;
while (s[num] >= '0'&&s[num] <= '7') {
num++;
}
length = num;
return true;
}
return false;
}
bool isSixteen(char *s, int &length) {
if (s[0] == '0' && s[1] == 'x' && ((s[2] >= '0'&&s[2] <= '9') || (s[2] >= 'a'&&s[2] <= 'f'))) {
int num = 2;
while ((s[num] >= '0'&&s[num] <= '9') || (s[num] >= 'a'&&s[num] <= 'f')) {
num++;
}
length = num;
return true;
}
return false;
}
void scan(char *str, int &p1, int &p2) {
int len = 0;
char str_[] = "-";
char sub[100];
//运算符和界符
if (isCal(str + p1, len)) {
strncpy(sub, str + p1, len);
sub[len] = '\0';
strcpy(tuples[p2].kind, sub);
strcpy(tuples[p2].proper, str_);
p1 += len;
p2++;
}
//关键字
if (isKey(str + p1, len)) {
strncpy(sub, str + p1, len);
sub[len] = '\0';
strcpy(tuples[p2].kind, sub);
strcpy(tuples[p2].proper, str_);
p1 += len;
p2++;
}
//标识符
if (isalpha(*(str + p1))) {
int len = 0;
while (isalpha(*(str + p1 + len)) || isdigit(*(str + p1 + len))) {
len++;
}
strncpy(sub, str + p1, len);
sub[len] = '\0';
strcpy(tuples[p2].kind, "0");
strcpy(tuples[p2].proper, sub);
p1 += len;
p2++;
}
//十进制数字
if (isTen(str + p1, len)) {
strncpy(sub, str + p1, len);
sub[len] = '\0';
strcpy(tuples[p2].kind, "1");
strcpy(tuples[p2].proper, sub);
p1 += len;
p2++;
}
if (isEight(str + p1, len)) {
strncpy(sub, str + p1 + 1, len - 1);
sub[len - 1] = '\0';
strcpy(tuples[p2].kind, "2");
strcpy(tuples[p2].proper, sub);
p1 += len;
p2++;
}
if (isSixteen(str + p1, len)) {
strncpy(sub, str + p1 + 2, len - 2);
sub[len - 2] = '\0';
strcpy(tuples[p2].kind, "3");
strcpy(tuples[p2].proper, sub);
p1 += len;
p2++;
}
}
lexical(string inputfile , string outputfile) {
this->filename = inputfile;
char *buffer = new char[maxlen];
ifstream in(filename);
if (!in.is_open()) {
cout << "文件打开失败" << endl;
exit(1);
}
in.getline(buffer, maxlen, '#');
int len = strlen(buffer);
bool flagend = false;
for (int i = 0; i < strlen(buffer); i++) {
if (buffer[i] == '#') {
flagend = true;
break;
}
}
if (!flagend)buffer[len++] = '#';
buffer[len] = '\0';
cout << buffer << endl;
int buf_ptr = 0;
int tup_ptr = 0;
while (true) {
if (buffer[buf_ptr] == '#')break;
if (buffer[buf_ptr] == ' ' || buffer[buf_ptr] == '\n') {
buf_ptr++;
continue;
}
if (buffer[buf_ptr] == '\t') {
buf_ptr += 4;
continue;
}
scan(buffer, buf_ptr, tup_ptr);
}
cout.setf(std::ios::left);
ofstream out(outputfile);
for (int i = 0; i < tup_ptr; i++) {
out << "<" << setw(5) << tuples[i].kind << "," << setw(5) << tuples[i].proper << ">" << endl;
cout << "<" << setw(5) << tuples[i].kind << "," << setw(5) << tuples[i].proper << ">" << endl;
}
}
};
int main()
{
string filename1 = "D:\\c++Project\\fundamentals_of_compiling\\Parsing\\test.txt";
string filename2= "D:\\c++Project\\fundamentals_of_compiling\\Parsing\\out.txt";
lexical *text = new lexical(filename1,filename2);
system("pause");
return 0;
}