【编译原理】基于Flex构造词法分析器

【问题描述】设计c语言常见单词的正规式,编制lex源文件,利用flex编译得到词法分析的.c文件,继而对该文件编译得到词法分析器。

【输入形式】输入一段c语言程序

【输出形式】各类单词的token字,或者给出程序中的单词错误。

【样例输入】

int main(){

    int a = 10;

    double b = 20.9;

    if(a <= b)

        a+=b;

    else a = 0;

    return a;

}



【样例输出】

line1:(type, int)

line1:(keyword, main)

line1:(bracket, ()

line1:(bracket, ))

line1:(bracket, {)

line2:(type, int)

line2:(identify, a)

line2:(OPT, =)

line2:(integer, 10)

line2:(bracket, ;)

line3:(type, double)

line3:(identify, b)

line3:(OPT, =)

line3:(decimal, 20.9)

line3:(bracket, ;)

line4:(keyword, if)

line4:(bracket, ()

line4:(identify, a)

line4:(OPT, <=)

line4:(identify, b)

line4:(bracket, ))

line5:(identify, a)

line5:(OPT, +=)

line5:(identify, b)

line5:(bracket, ;)

line6:(keyword, else)

line6:(identify, a)

line6:(OPT, =)

line6:(integer, 0)

line6:(bracket, ;)

line7:(keyword, return)

line7:(identify, a)

line7:(bracket, ;)

line8:(bracket, })



【样例说明】需要识别的关键字包括void, int, main, double, return, float, if, else, do, while, for, scanf, printf, char, sqrt, abs, 运算符(算术、关系、逻辑、位);需要识别的其他单词有标识符, 整数(十进制形式、指数形式),实数(十进制形式、指数形式),字符串;过滤注释及空格。

【评分标准】根据设计文档的质量、lex文件的正确性,代码的正确性、代码的时间空间复杂度、识别单词的种类等综合评分

相关代码

%option noyywrap

%{
    
    
     #include <stdlib.h>
     #include <string.h>
     int line = 1;
     typedef struct msg{
    
    
		int line;
		char type[100];
		char text[100];
		struct msg* next;
	}msg,*Msg;

     Msg m,p;

	void store(int line,char *type, char *text)
	{
    
    
		Msg newmsg = (Msg)malloc(sizeof(msg));
		newmsg->line = line;
		strcpy(newmsg->type,type);
		strcpy(newmsg->text,text);
		p->next = newmsg;
		p = p->next;
	}
     int error = 0;
%}
digital [0-9]
integer ("+"|"-")?[1-9]{
    
    digital}*
decimal {
    
    integer}(.(0)*[1-9]{
    
    digital}*)
float [0-9]*([0-9]\.?|\.[0-9])[0-9]*([Ee]([-+]?[0-9]+)?)
alphabet [A-Za-z]
identifier ({
    
    alphabet}|"_")({
    
    alphabet}|"_"|{
    
    digital})*
operator ("+"|"-"|"*"|"/"|"+="|"-="|"*="|"/="|"++"|"--"|">"|"<"|">="|"<="|"=="|"=")
type ("int"|"void"|"char"|"double"|"short")
keyword ("if"|"else"|"scanf"|"for"|"printf"|"return"|"sqrt"|"abs"|"main"|"float")
typeidentify ("%"|"&"){
    
    alphabet}
%%
\n {
    
    ++line;}
{
    
    type} {
    
    
     store(line,"type",yytext);
}
{
    
    keyword} {
    
    
     store(line,"keyword",yytext);
     }
{
    
    integer} {
    
    
     store(line,"integer",yytext);
     }
{
    
    decimal} {
    
    
     store(line,"decimal",yytext);
     }
{
    
    float} {
    
    
     char *chare = strchr(yytext,'e');
     int n = strlen(yytext);
     if(chare!= 0 && yytext[n-1] == 'e')
     {
    
    
          error = 1;
          printf("Error at Line %d: Illegal floating point number \"%s\".\n",line,yytext);
     }
     store(line,"float",yytext);
}
{
    
    typeidentify} {
    
    
     store(line,"typeidentify",yytext);
     }
{
    
    identifier} {
    
    
     store(line,"identify",yytext);
     }
("("|")"|"{"|"}"|"["|"]"|"\""|","|";") {
    
    
     store(line,"bracket",yytext);
     }
{
    
    operator} {
    
    
     store(line,"OPT",yytext);
     }
"//".*  {
    
    }
[/][*][^*]*[*]+([^*/][^*]*[*]+)*[/]       {
    
    }
. {
    
    }
%%
/* 存储 再统一输出 实现出现错误时不输出其他结果*/



int main()
{
    
    
    m = (Msg)malloc(sizeof(msg));
    p = m;
    yyin=stdin;
    yylex();
    if (error == 1)
        return 0;
    p = m;
    p = p->next;
    while(p) {
    
    
        printf("line%d:(%s, %s)\n", p->line, p->type, p->text);
        p = p->next;
    }
}

猜你喜欢

转载自blog.csdn.net/weixin_44795952/article/details/112407065