编译原理 词法分析器

单词种类 单词符号 种别码 单词种类 单词符号 种别码
整型常数 digit digit* 1 运算符 * 20
字符串(标识符ID) letter(letter|digit)* 2 运算符 / 21
关键字 main 3 运算符 = 22
关键字 if 4 运算符 >= 23
关键字 else 5 运算符 < 24
关键字 do 6 运算符 <= 25
关键字 while 7 运算符 == 26
关键字 for 8 界符 ; 27
关键字 switch 9 界符 " 28
关键字 case 10 运算符 ++ 29
关键字 int 11 运算符 - - 30
关键字 double 12 /* 31
关键字 float 13 */ 32
关键字 long 14 界符 { 33
关键字 void 15 界符 } 34
界符 ( 16 界符 , 35
界符 ) 17 运算符 ! 36
运算符 + 18 运算符 != 37
运算符 - 19 EOF -1
运算符 * 20 界符 # -2
其他 -10

我写的包括编译预处理,宏替换,生成新文件new.txt

#include<iostream>
#include <fstream>
#include <stdio.h>
#include <string.h>
#include <cstring>
#include<cstdio>
#include <stack>
using namespace std;
const int max_word = 505;
char token[12];
char in[105];
FILE *fin,*fout;
int cnt = 0,token_num = 0;
int row = 1;
int flag = 0;
char ch;
//关键字 
const char keyWord[13][20] = {"main","if","else","do","while","for","switch",
"case","int","double","float","long","void"};
string defination(string content);
string removezhushi(string oldpath);
string removespace(string content);
//初始化数组
void init_token(){
	int i;
	for(i = 0;i < 12;i++){
		token[i] = '\0';
	}
}
//预处理
string preprocess(string oldpath)
{
	string content;
	content=removezhushi(oldpath);
	content=defination(content);
	content=removespace(content);
	cout<<content<<endl;
	//数值转换
	ofstream fout("new.txt");//将content写入新文件
	fout<<content;
	return content;

}
//去空格
string removespace(string content)
{
	//去掉多余空格
    int i=0;
	while(i<content.length()-1)
	{
		int j=i;
		if(content[j]==' '&&content[j+1]==' ')
		{
			int begin=j;
			j+=2;
			while((j<content.length())&&(content[j]==' '&&content[j+1]==' '))
			{
				j++;
			}
			int end=j;
			//cout<<end-begin<<endl;
			content.erase(begin,end-begin);
		}
		i++;
	}
	return content;
}
//去注释
string removezhushi(string oldpath)
{
	string content;
	fstream oldfile(oldpath, ios::in);
	if (oldfile.is_open())
	{
		const int size = 250;   //设定每行的缓存大小
		char temp[size];
		oldfile.seekg(0, ios::beg);   //指针移向文件头
		while (oldfile.getline(temp, size)) //逐行读取,遇单行注释则终止
		{
			for (int i = 0; i < size; i++)
			{
				if (temp[i] == '/'&&temp[i + 1] == '/')
				{
					temp[i] = 0;
				}
			}
			content+=temp;
			content+=" ";

			//newfile << temp << endl;

		}
		oldfile.close();
		//newfile.close();
		/*删除单行注释成功*/
	}
	else
	{
		cout << "源文件无法打开或无法创建新文件,请重试!\n";
	}
	//去掉多行注释
	int i=0;
	while(i<content.length()-1)
	{
		 if(content[i]=='/'&&content[i+1]=='*'){
			int startIndex=i;//定义注释开始部分
			i+=2;//move to the charactor after *.
			while(i<content.length()&&!(content[i]=='*'&&content[i+1]=='/'))
				i++;
			if(i==content.length()-1){
				cout<<"illegal notation format!"<<endl;
				break;
			}
			for(int j=startIndex;j<=i+1;j++) content[j]=' ';
		}
		i++;
	}
	return content;
}
//宏替换
string defination(string content)
{
	string name1;
	string name2;
	//int pos;
	int pos=content.find("#define");
	while(pos != -1)
	{
		int ph=pos+8;
		int pe=ph;
		while(content[pe+1]!=' ')
		{
			pe++;
		}
		name1=content.substr(ph,pe-ph+1);
		pe=pe+2;
		int ph2=pe;
		while(content[pe+1]!=';')
		{
			pe++;
		}
		name2=content.substr(ph2,pe-ph2+1);
		int position = content.find(name1);////查找指定的串
		int index=0;
		content.erase(pos,pe-pos+3);
 		while (position != -1)
 		{
			 if(index==0)
			 {
				 position = content.find(name1);//////继续查找指定的串,直到所有的都找到为止
			 }
			 else
			 {
				content.replace(position,name1.length(),name2);////用新的串替换掉指定的串
  				position = content.find(name1);//////继续查找指定的串,直到所有的都找到为止
			 }
			 index++;
 		}
		pos=content.find("#define");
	}
	//cout<<content<<endl;
	return content;
}
int judge_token(){
	//预处理
	init_token();
	if(flag == 0){
		ch = getc(fin);
	}
	flag = 1;
	while(ch == ' ' || ch == '\t' || ch == '\n'){
		if(ch == '\n'){
			row++;
		}
		ch=getc(fin);//读下一个字符
	}
	token_num = 0;
	if((ch>='a' && ch <= 'z') || (ch >= 'A' &&ch <= 'Z')){
		//可能为标识符或者变量名 
		while((ch>='a' && ch <= 'z') || (ch >= 'A' &&ch <= 'Z') || (ch >= '0' && ch <= '9')){
			token[token_num++] = ch;//将连续读入的字符存入token
			ch = getc(fin);//
		}
		token[token_num++] = '\0';
		for(int i = 0;i <13;i++){
			if(strcmp(token,keyWord[i]) == 0){
				//3为关键字
				return 3;
			}
		}
		//2为标识符
		return 2;
	}
	//是数字 
	else if(ch >= '0' && ch <= '9'){
		while((ch >= '0'&& ch <= '9') || ch == '.'){
			token[token_num++] = ch;
			ch = getc(fin);
		}
		return 1;
	}
	else{
		token[token_num++] = ch;
		switch(ch){
			case '(': ch = getc(fin); return 16;
			case ')': ch = getc(fin); return 17;
			case '{': ch = getc(fin); return 33;
			case '}': ch = getc(fin); return 34;
			case '+':
				ch = getc(fin);
				if(ch == '+'){
					token[token_num++] = ch;
					ch = getc(fin); 
					return 29;
				}
				else{
					return 18;
				}
			case '-':
				ch = getc(fin);
				if(ch == '-'){
					token[token_num++] = ch;
					ch = getc(fin); 
					return 30;
				}
				else{
					return 19;
				}
			case '*':
				ch = getc(fin);
				if(ch == '/'){
					token[token_num++] = ch;
					ch = getc(fin); 
					return 32;
				}
				else{
					return 20;
				}
			case '/':
				ch = getc(fin);
				if(ch == '*'){
					token[token_num++] = ch;
					ch = getc(fin); 
					return 31;
				}else if(ch=='/')
				{
					token[token_num++] = ch;
					ch = getc(fin);
					return 31;
				}
				else{ 
					return 21;
				}
			//这里要重新编码 
			case '=':
				ch = getc(fin);
				if(ch == '='){
					token[token_num++] = ch;
					ch = getc(fin); 
					return 23;
				}
				else{
					return 22;
				}
			case '>':
				ch = getc(fin);
				if(ch == '='){
					token[token_num++] = ch;
					ch = getc(fin); 
					return 24;
				}
				else{
					return 23;
				}
			case '<':
				ch = getc(fin);
				if(ch == '='){
					token[token_num++] = ch;
					ch = getc(fin); 
					return 26;
				}
				else{
					return 25;
				}
			case ';': ch = getc(fin); return 27;
			case '"': ch = getc(fin); return 28;
			case '!':
				ch = getc(fin);
				if(ch == '='){
					token[token_num++] = ch;
					ch = getc(fin); 
					return 37;
				}
				else{
					return 36;
				}
			case '#': ch = getc(fin); return -2;
			case ',': ch = getc(fin); return 35;
			case EOF: return -1;
			default: ch = getc(fin); return -10;
		}
	}
}

void getWord(){
	int temp;
	while(1){
		temp = judge_token();
		if(temp==-1){
			break;
		}
		switch(temp){
			case -10:
				cout<<"第 "<<row<<" 行出现错误."<<endl;
				break;
			default:
				cout<<"("<<temp<<","<<token<<")"<<endl;//输出状态码和对应的
				break;
		}
	}
	cout<<"一共有"<<row<<"行"<<endl;
}

int main(){
	fin = fopen("compiler.txt","r");
	//去掉单行注释
	string content=preprocess("compiler.txt");
	//去多行注释
    //string newcontent=deletemore(content);
	getWord();
	return 0;
}
原创文章 159 获赞 4 访问量 1万+

猜你喜欢

转载自blog.csdn.net/weixin_44769592/article/details/105113600