编译原理词法分析器

单词种类	单词符号	种别码	单词种类	单词符号	种别码
整型常数	digit digit*	1	运算符	*	20
字符串（标识符ID）	letter(letter\|digit)*	2	运算符	/	21
关键字	main	3	运算符	=	22
关键字	if	4	运算符	>=	23
关键字	else	5	运算符	<	24
关键字	do	6	运算符	<=	25
关键字	while	7	运算符	==	26
关键字	for	8	界符	;	27
关键字	switch	9	界符	"	28
关键字	case	10	运算符	++	29
关键字	int	11	运算符	- -	30
关键字	double	12		/*	31
关键字	float	13		*/	32
关键字	long	14	界符	{	33
关键字	void	15	界符	}	34
界符	(	16	界符	,	35
界符	)	17	运算符	!	36
运算符	+	18	运算符	!=	37
运算符	-	19		EOF	-1
运算符	*	20	界符	#	-2
	其他	-10

我写的包括编译预处理，宏替换，生成新文件new.txt

#include<iostream>
#include <fstream>
#include <stdio.h>
#include <string.h>
#include <cstring>
#include<cstdio>
#include <stack>
using namespace std;
const int max_word = 505;
char token[12];
char in[105];
FILE *fin,*fout;
int cnt = 0,token_num = 0;
int row = 1;
int flag = 0;
char ch;
//关键字 
const char keyWord[13][20] = {"main","if","else","do","while","for","switch",
"case","int","double","float","long","void"};
string defination(string content);
string removezhushi(string oldpath);
string removespace(string content);
//初始化数组
void init_token(){
	int i;
	for(i = 0;i < 12;i++){
		token[i] = '\0';
	}
}
//预处理
string preprocess(string oldpath)
{
	string content;
	content=removezhushi(oldpath);
	content=defination(content);
	content=removespace(content);
	cout<<content<<endl;
	//数值转换
	ofstream fout("new.txt");//将content写入新文件
	fout<<content;
	return content;

}
//去空格
string removespace(string content)
{
	//去掉多余空格
    int i=0;
	while(i<content.length()-1)
	{
		int j=i;
		if(content[j]==' '&&content[j+1]==' ')
		{
			int begin=j;
			j+=2;
			while((j<content.length())&&(content[j]==' '&&content[j+1]==' '))
			{
				j++;
			}
			int end=j;
			//cout<<end-begin<<endl;
			content.erase(begin,end-begin);
		}
		i++;
	}
	return content;
}
//去注释
string removezhushi(string oldpath)
{
	string content;
	fstream oldfile(oldpath, ios::in);
	if (oldfile.is_open())
	{
		const int size = 250;   //设定每行的缓存大小
		char temp[size];
		oldfile.seekg(0, ios::beg);   //指针移向文件头
		while (oldfile.getline(temp, size)) //逐行读取，遇单行注释则终止
		{
			for (int i = 0; i < size; i++)
			{
				if (temp[i] == '/'&&temp[i + 1] == '/')
				{
					temp[i] = 0;
				}
			}
			content+=temp;
			content+=" ";

			//newfile << temp << endl;

		}
		oldfile.close();
		//newfile.close();
		/*删除单行注释成功*/
	}
	else
	{
		cout << "源文件无法打开或无法创建新文件，请重试！\n";
	}
	//去掉多行注释
	int i=0;
	while(i<content.length()-1)
	{
		 if(content[i]=='/'&&content[i+1]=='*'){
			int startIndex=i;//定义注释开始部分
			i+=2;//move to the charactor after *.
			while(i<content.length()&&!(content[i]=='*'&&content[i+1]=='/'))
				i++;
			if(i==content.length()-1){
				cout<<"illegal notation format!"<<endl;
				break;
			}
			for(int j=startIndex;j<=i+1;j++) content[j]=' ';
		}
		i++;
	}
	return content;
}
//宏替换
string defination(string content)
{
	string name1;
	string name2;
	//int pos;
	int pos=content.find("#define");
	while(pos != -1)
	{
		int ph=pos+8;
		int pe=ph;
		while(content[pe+1]!=' ')
		{
			pe++;
		}
		name1=content.substr(ph,pe-ph+1);
		pe=pe+2;
		int ph2=pe;
		while(content[pe+1]!=';')
		{
			pe++;
		}
		name2=content.substr(ph2,pe-ph2+1);
		int position = content.find(name1);////查找指定的串
		int index=0;
		content.erase(pos,pe-pos+3);
 		while (position != -1)
 		{
			 if(index==0)
			 {
				 position = content.find(name1);//////继续查找指定的串，直到所有的都找到为止
			 }
			 else
			 {
				content.replace(position,name1.length(),name2);////用新的串替换掉指定的串
  				position = content.find(name1);//////继续查找指定的串，直到所有的都找到为止
			 }
			 index++;
 		}
		pos=content.find("#define");
	}
	//cout<<content<<endl;
	return content;
}
int judge_token(){
	//预处理
	init_token();
	if(flag == 0){
		ch = getc(fin);
	}
	flag = 1;
	while(ch == ' ' || ch == '\t' || ch == '\n'){
		if(ch == '\n'){
			row++;
		}
		ch=getc(fin);//读下一个字符
	}
	token_num = 0;
	if((ch>='a' && ch <= 'z') || (ch >= 'A' &&ch <= 'Z')){
		//可能为标识符或者变量名 
		while((ch>='a' && ch <= 'z') || (ch >= 'A' &&ch <= 'Z') || (ch >= '0' && ch <= '9')){
			token[token_num++] = ch;//将连续读入的字符存入token
			ch = getc(fin);//
		}
		token[token_num++] = '\0';
		for(int i = 0;i <13;i++){
			if(strcmp(token,keyWord[i]) == 0){
				//3为关键字
				return 3;
			}
		}
		//2为标识符
		return 2;
	}
	//是数字 
	else if(ch >= '0' && ch <= '9'){
		while((ch >= '0'&& ch <= '9') || ch == '.'){
			token[token_num++] = ch;
			ch = getc(fin);
		}
		return 1;
	}
	else{
		token[token_num++] = ch;
		switch(ch){
			case '(': ch = getc(fin); return 16;
			case ')': ch = getc(fin); return 17;
			case '{': ch = getc(fin); return 33;
			case '}': ch = getc(fin); return 34;
			case '+':
				ch = getc(fin);
				if(ch == '+'){
					token[token_num++] = ch;
					ch = getc(fin); 
					return 29;
				}
				else{
					return 18;
				}
			case '-':
				ch = getc(fin);
				if(ch == '-'){
					token[token_num++] = ch;
					ch = getc(fin); 
					return 30;
				}
				else{
					return 19;
				}
			case '*':
				ch = getc(fin);
				if(ch == '/'){
					token[token_num++] = ch;
					ch = getc(fin); 
					return 32;
				}
				else{
					return 20;
				}
			case '/':
				ch = getc(fin);
				if(ch == '*'){
					token[token_num++] = ch;
					ch = getc(fin); 
					return 31;
				}else if(ch=='/')
				{
					token[token_num++] = ch;
					ch = getc(fin);
					return 31;
				}
				else{ 
					return 21;
				}
			//这里要重新编码 
			case '=':
				ch = getc(fin);
				if(ch == '='){
					token[token_num++] = ch;
					ch = getc(fin); 
					return 23;
				}
				else{
					return 22;
				}
			case '>':
				ch = getc(fin);
				if(ch == '='){
					token[token_num++] = ch;
					ch = getc(fin); 
					return 24;
				}
				else{
					return 23;
				}
			case '<':
				ch = getc(fin);
				if(ch == '='){
					token[token_num++] = ch;
					ch = getc(fin); 
					return 26;
				}
				else{
					return 25;
				}
			case ';': ch = getc(fin); return 27;
			case '"': ch = getc(fin); return 28;
			case '!':
				ch = getc(fin);
				if(ch == '='){
					token[token_num++] = ch;
					ch = getc(fin); 
					return 37;
				}
				else{
					return 36;
				}
			case '#': ch = getc(fin); return -2;
			case ',': ch = getc(fin); return 35;
			case EOF: return -1;
			default: ch = getc(fin); return -10;
		}
	}
}

void getWord(){
	int temp;
	while(1){
		temp = judge_token();
		if(temp==-1){
			break;
		}
		switch(temp){
			case -10:
				cout<<"第 "<<row<<" 行出现错误."<<endl;
				break;
			default:
				cout<<"("<<temp<<","<<token<<")"<<endl;//输出状态码和对应的
				break;
		}
	}
	cout<<"一共有"<<row<<"行"<<endl;
}

int main(){
	fin = fopen("compiler.txt","r");
	//去掉单行注释
	string content=preprocess("compiler.txt");
	//去多行注释
    //string newcontent=deletemore(content);
	getWord();
	return 0;
}

乐多

原创文章 159 获赞 4 访问量 1万+

关注私信

编译原理 词法分析器

猜你喜欢

编译原理词法分析器