编译原理-词法分析器

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/Jamence/article/details/84502269

词法分析器

实验要求

1、根据以下的正规式,编制正规文法,画出状态图;
标识符 <字母>(<字母>|<数字字符>)*
十进制整数 0 | ((1|2|3|4|5|6|7|8|9)(0|1|2|3|4|5|6|7|8|9)
八进制整数 0(1|2|3|4|5|6|7)(0|1|2|3|4|5|6|7)

十六进制整数 0x(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)*
运算符和界符 + - * / > < = ( ) ;
关键字 if then else while do
2、根据状态图,设计词法分析函数int scan( ),完成以下功能:
1) 从文本文件中读入测试源代码,根据状态转换图,分析出一个单词,
2) 以二元式形式输出单词<单词种类,单词属性>
其中单词种类用整数表示:
0:标识符
1:十进制整数
2:八进制整数
3:十六进制整数
运算符和界符,关键字采用一字一符,不编码
其中单词属性表示如下:
标识符,整数由于采用一类一符,属性用单词表示
运算符和界符,关键字采用一字一符,属性为空

实验分析

1、 词法的正规式描述;
标识符 <字母>(<字母>|<数字字符>)*
十进制整数 0 | ((1|2|3|4|5|6|7|8|9)(0|1|2|3|4|5|6|7|8|9)
八进制整数 0(1|2|3|4|5|6|7)(0|1|2|3|4|5|6|7)

十六进制整数 0x(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)*
运算符和界符 + - * / > < = ( ) ;
关键字 if then else while do
由于实验描述中已经包含相关的正规式描述,所以我们并不需要做过多处理。
2、 变换后的状态图;
状态图
3、 词法分析程序的数据结构与算法。
在这里插入图片描述
以上为整体代码框架部分,在代码的构造函数中读入输入文件中的数据,单行读入,将读入的串作为参数调用scan函数扫描。通过判断每一个单词是iscal(运算符界符),是isKey(关键字),是isTen(十进制),是isEight(八进制),是isSixteen(十六进制)来区分单词,而标识符直接使用isalpha()实现判断。同时还有一个结构体Twotuples用于记录结果,内含有两个分量, 就是结果显示左右部分。

实验代码

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <iomanip>
using namespace std;
const int maxn = 1e4;
const int maxlen = 1e4;
class lexical {
private:
	char operate[10][10] = { "+","-","*","/",">","<","=","(",")",";" };
	const int rowope = 10;
	char keyword[5][10] = { "if","then","else","while","do" };
	const int rowkey = 5;
	struct Twotuples {
		char kind[10];
		char proper[10];
	}tuples[maxlen];
	string filename;
public:
	bool isCal(char *s, int &length) {
		char sub[100];
		for (int i = 0; i < rowope; i++) {
			int len = strlen(operate[i]);
			strncpy(sub, s, len);
			sub[len] = '\0';
			if (strcmp(sub, operate[i]) == 0) {
				length = len;
				return true;
			}
		}
		return false;
	}
	bool isKey(char *s, int &length) {
		char sub[100];
		int num = 0;
		while (isalpha(*(s + num))) {
			num++;
		}
		length = num;
		for (int i = 0; i < rowkey; i++) {
			strncpy(sub, s, length);
			sub[length] = '\0';
			if (strcmp(sub, keyword[i]) == 0 && !isalpha(*(s + length))) {
				return true;
			}
		}
		return false;
	}
	bool isTen(char *s, int &length) {
		if (s[0] == '0'&&!isdigit(s[1]) && s[1] != 'x') {
			length = 1;
			return true;
		}
		else
			if (s[0] >= '1'&&s[0] <= '9') {
				int num = 0;
				while (isdigit(*(s + num))) {
					num++;
				}
				length = num;
				return true;
			}
		return false;
	}
	bool isEight(char *s, int &length) {
		if (s[0] == '0' && (s[1] >= '1'&&s[1] <= '7')) {
			int num = 0;
			while (s[num] >= '0'&&s[num] <= '7') {
				num++;
			}
			length = num;
			return true;
		}
		return false;
	}
	bool isSixteen(char *s, int &length) {
		if (s[0] == '0' && s[1] == 'x' && ((s[2] >= '0'&&s[2] <= '9') || (s[2] >= 'a'&&s[2] <= 'f'))) {
			int num = 2;
			while ((s[num] >= '0'&&s[num] <= '9') || (s[num] >= 'a'&&s[num] <= 'f')) {
				num++;
			}
			length = num;
			return true;
		}
		return false;
	}
	void scan(char *str, int &p1, int &p2) {
		int len = 0;
		char str_[] = "-";
		char sub[100];
		//运算符和界符
		if (isCal(str + p1, len)) {

			strncpy(sub, str + p1, len);
			sub[len] = '\0';
			strcpy(tuples[p2].kind, sub);
			strcpy(tuples[p2].proper, str_);
			p1 += len;
			p2++;
		}
		//关键字
		if (isKey(str + p1, len)) {
			strncpy(sub, str + p1, len);
			sub[len] = '\0';

			strcpy(tuples[p2].kind, sub);
			strcpy(tuples[p2].proper, str_);
			p1 += len;
			p2++;
		}
		//标识符
		if (isalpha(*(str + p1))) {
			int len = 0;
			while (isalpha(*(str + p1 + len)) || isdigit(*(str + p1 + len))) {
				len++;
			}
			strncpy(sub, str + p1, len);
			sub[len] = '\0';
			strcpy(tuples[p2].kind, "0");
			strcpy(tuples[p2].proper, sub);
			p1 += len;
			p2++;
		}
		//十进制数字

		if (isTen(str + p1, len)) {
			strncpy(sub, str + p1, len);
			sub[len] = '\0';

			strcpy(tuples[p2].kind, "1");
			strcpy(tuples[p2].proper, sub);
			p1 += len;
			p2++;
		}
		if (isEight(str + p1, len)) {
			strncpy(sub, str + p1 + 1, len - 1);
			sub[len - 1] = '\0';
			strcpy(tuples[p2].kind, "2");
			strcpy(tuples[p2].proper, sub);
			p1 += len;
			p2++;
		}
		if (isSixteen(str + p1, len)) {
			strncpy(sub, str + p1 + 2, len - 2);
			sub[len - 2] = '\0';

			strcpy(tuples[p2].kind, "3");
			strcpy(tuples[p2].proper, sub);
			p1 += len;
			p2++;
		}

	}
	lexical(string inputfile , string outputfile) {
		this->filename = inputfile;
		char *buffer = new char[maxlen];
		ifstream in(filename);
		if (!in.is_open()) {
			cout << "文件打开失败" << endl;
			exit(1);
		}
		in.getline(buffer, maxlen, '#');
		int len = strlen(buffer);
		bool flagend = false;
		for (int i = 0; i < strlen(buffer); i++) {
			if (buffer[i] == '#') {
				flagend = true;
				break;
			}
		}
		if (!flagend)buffer[len++] = '#';
		buffer[len] = '\0';
		cout << buffer << endl;
		int buf_ptr = 0;
		int tup_ptr = 0;


		while (true) {
			if (buffer[buf_ptr] == '#')break;
			if (buffer[buf_ptr] == ' ' || buffer[buf_ptr] == '\n') {
				buf_ptr++;
				continue;
			}
			if (buffer[buf_ptr] == '\t') {
				buf_ptr += 4;
				continue;
			}
			scan(buffer, buf_ptr, tup_ptr);
		}
		cout.setf(std::ios::left);
		ofstream out(outputfile);
		for (int i = 0; i < tup_ptr; i++) {
			out  << "<" << setw(5) << tuples[i].kind << "," << setw(5) << tuples[i].proper << ">" << endl;
			cout << "<" << setw(5) << tuples[i].kind << "," << setw(5) << tuples[i].proper << ">" << endl;
		}
		
		
	}
};
int main()
{
	string filename1 = "D:\\c++Project\\fundamentals_of_compiling\\Parsing\\test.txt";
	string filename2= "D:\\c++Project\\fundamentals_of_compiling\\Parsing\\out.txt";
	lexical *text = new lexical(filename1,filename2);
	system("pause");
	return 0;
}

猜你喜欢

转载自blog.csdn.net/Jamence/article/details/84502269