一.算符优先分析法即是一种针对算符优先文法的分析方法。
二.如果一个文法的任一产生式的右部都不存在两个相邻的非终结符,则称这个文法为算符文法(OG)。
三.假定文法G是一个不含e的算符文法,a,b∈Vt,P,Q,R∈Vn,满足:
(一)a=b当且仅当文法G含有形如P->...ab...或者P->...aQb...的产生式
(二)a<b当且仅当文法G含有形如P->...aR...的产生式,且R加号推导b...或者R加号推导Qb...
(三)a>b当且仅当文法G含有形如P->...Rb...的产生式,且R加号推导...a或者R加号推导...aQ
文法G中任何非终结符号对(a,b)至多满足上述三种关系之一,则称文法G为算符优先文法(operator precedence grammar)
四.(一)FirstVt(P)={a|P加号推导a...或者P加号推导Qa...,a∈Vt,Q∈Vn}
LastVt(P)={a|P加号推导...a或者P加号推导...aQ,a∈Vt,Q∈Vn}
(二)假如一个产生式存在Q->...aP...的形式,那么对于任意b∈First(P),有a<b
假如一个产生式存在Q->...Pa...的形式,那么对于任意b∈Last(P),有a>b
(三)若存在产生式P->a...或者P->Qa...,则a∈First(P)
若存在产生式P->Q...,对于任意a∈First(Q),有a∈First(P)
若存在产生式P->...a或者P->...aQ,则a∈Last(P)
若存在产生式P->...Q,对于任意a∈Last(Q),有a∈Last(P)
五.求出算符优先关系,构造算符优先分析表,#<任意其它算符,任意其它算符>#,#=#
六.1.初始化:#入栈,在输入串尾部加上#,栈顶指针p,输入串头指针为s
2.p指向的符号为非终结符则下移,为终结符(记为a)则执行步骤3
3.s指向的符号记为b,查算符优先分析表,若(a,b)为<或者=,则将s指向的符号入栈,若为>则执行步骤4
4.从栈顶开始向下找两个终结符c,d(c靠近栈顶,c和d之间不存在其它终结符,c和d相邻或者其间只有非终结符),查表若(d,c)为=,则继续向下找终结符e,将d,e重新记为c,d,重复步骤4,若(d,c)为<则执行步骤5,若为空则不匹配,输入串不是该文法的句子
5.d上面到栈顶的所有符号为最左素短语,可规约为非终结符,栈顶指针p指向这个非终结符,s指向下一个符号,栈中只存在一个非终结符和#,输入串只剩下#,则规约完毕输入串是该文法的句子,否则执行步骤2
#include<stdio.h> #include<malloc.h> #include<string.h> #define TERMINAL_SYMBOL_MAX_NUM 10 #define FIRST_VT_SET_MAX_NUM TERMINAL_SYMBOL_MAX_NUM #define LAST_VT_SET_MAX_NUM TERMINAL_SYMBOL_MAX_NUM #define STRING_MAX_LENGTH 10 typedef enum{false, true} bool; typedef struct right{ char c; int label;//-1表示非终结符,0和正整数表示在终结符数组中的下标 struct right *next; struct right *pre; }*PRight, Right;//产生式右部 typedef struct grammar{ char left_symbol; Right *right_head; Right *right_tail; struct grammar *next; }*PGrammar, Grammar;//文法产生式 typedef struct str{ char c; struct str *next; }*PStr, Str;//输入串 typedef struct non_terminal{ char c; bool isFirstInvariant; bool isLastInvariant; bool firstVtSet[FIRST_VT_SET_MAX_NUM]; bool lastVtSet[LAST_VT_SET_MAX_NUM]; struct non_terminal *next; }*PNon_terminal, Non_terminal;//非终结符 typedef Right Stack; typedef Stack* PStack; char terminal_symbol[TERMINAL_SYMBOL_MAX_NUM]; char operator_precedence[TERMINAL_SYMBOL_MAX_NUM][TERMINAL_SYMBOL_MAX_NUM];//算符优先关系表 Grammar *head_Grammar, *tail_Grammar; Non_terminal *head_Non_terminal, *tail_Non_terminal; void ReadGrammar(FILE *fp); void FindAllNonTerminal(); bool IsNonTerminal(char c); void FindAndMark(); void FindAllTerminal(); bool IsNewTerminal(Right *pR); void FirstLastVtSet();//计算非终结符firstVt和lastVt集 void AddFirstLast(Non_terminal *pN, Right *pR, bool type);//false表示添加first,true表示添加last void OPATable();//构建算符优先分析表 void Input(Str *s); void Judge(Str *s); void PrintGrammar(); void PrintSymbol(); void PrintFirstLastVtSet(); void PrintOPATable(); void PrintJudge(Stack *pS, Stack *last_terminal, Str *p, int i); int main(int argc, char *argv[]) { FILE *fp = NULL; Str *s; if((fp=fopen("OPG.txt", "r")) == NULL) { printf("File open failed!\n"); exit(1); } s = (PStr)malloc(sizeof(Str)); s->next = NULL; head_Grammar = NULL; tail_Grammar = NULL; head_Non_terminal = NULL; tail_Non_terminal = NULL; memset(operator_precedence, '\0', TERMINAL_SYMBOL_MAX_NUM*TERMINAL_SYMBOL_MAX_NUM*sizeof(char)); ReadGrammar(fp); PrintGrammar(); FindAllNonTerminal(); FindAndMark(); FindAllTerminal(); PrintSymbol(); FirstLastVtSet(); PrintFirstLastVtSet(); OPATable(); PrintOPATable(); Input(s); Judge(s); fclose(fp); return 0; } void ReadGrammar(FILE *fp) { char ch; Grammar *pG = NULL; Right *pR = NULL; bool one_production = false;//标记一个产生式是否读取完毕,是针对一条语句中有|可以分解为多条产生式的情况,true表示读取到|,这条语句后面至少还有一个和当前产生式左部非终结符相同的产生式,false则表示一条语句读取完毕初值为false,因为开始读取时是一条语句的开始 while(!feof(fp)) { pG = (PGrammar)malloc(sizeof(Grammar)); pG->right_head = NULL; pG->right_tail = NULL; pG->next = NULL; if(!one_production)//新的一行产生式 { ch = fgetc(fp); pG->left_symbol = ch; fgetc(fp); fgetc(fp);//读取掉-> } else { pG->left_symbol = tail_Grammar->left_symbol; } while(true) { ch = fgetc(fp); if(ch=='\n' || ch=='\r' || feof(fp)) { one_production = false;//一条语句产生式读取完毕 break; } else if(ch == '|') { one_production = true;//读取到|表明当前可以形成一个产生式,且这一条后面内容至少有一个且和当前产生式的左部相同 break; } pR = (PRight)malloc(sizeof(Right)); pR->c = ch; pR->next = NULL; pR->pre = NULL; if(pG->right_head == NULL) { pG->right_head = pR; pG->right_tail = pR; } else { pG->right_tail->next = pR; pR->pre = pG->right_tail; pG->right_tail = pG->right_tail->next; } } if(head_Grammar == NULL) { head_Grammar = pG; tail_Grammar = pG; } else { tail_Grammar->next = pG; tail_Grammar = tail_Grammar->next; } } printf("算符优先文法文法读取完毕!\n"); } void PrintGrammar() { Grammar *pG; pG = head_Grammar; while(pG != NULL) { printf("%c->", pG->left_symbol); Right *pR; pR = pG->right_head; while(pR != NULL) { printf("%c", pR->c); pR = pR->next; } printf("\n"); pG = pG->next; } } void FindAllNonTerminal() { Grammar *pG; pG = head_Grammar; Non_terminal *pN, *pN_temp; bool isNewSymbol;//标识产生式链表中依次出现的左部非终结符是否为新符号,true为新,false为重复 while(pG != NULL) { pN_temp = head_Non_terminal; isNewSymbol = true; while(pN_temp != NULL) { if(pG->left_symbol == pN_temp->c) { isNewSymbol = false; break; } pN_temp = pN_temp->next; } if(isNewSymbol) { pN = (PNon_terminal)malloc(sizeof(Non_terminal)); pN->c = pG->left_symbol; pN->isFirstInvariant = false; pN->isLastInvariant = false; memset(pN->firstVtSet, '\0', FIRST_VT_SET_MAX_NUM*sizeof(bool)); memset(pN->lastVtSet, '\0', LAST_VT_SET_MAX_NUM*sizeof(bool)); pN->next = NULL; if(head_Non_terminal == NULL) { head_Non_terminal = pN; tail_Non_terminal = pN; } else { tail_Non_terminal->next = pN; tail_Non_terminal = tail_Non_terminal->next; } } pG = pG->next; } printf("Have found all non_terminals!\n"); } bool IsNonTerminal(char c) { Non_terminal *pN; pN = head_Non_terminal; while(pN != NULL) { if(c == pN->c) return true; pN = pN->next; } return false; } void FindAndMark() { Grammar *pG; Right *pR; pG = head_Grammar; while(pG != NULL) { pR = pG->right_head; while(pR != NULL) { if(IsNonTerminal(pR->c)) pR->label = -1; else pR->label = 0; pR = pR->next; } pG = pG->next; } printf("Find and Mark finished!\n"); } void FindAllTerminal() { Grammar *pG; pG = head_Grammar; int count = 0; while(pG != NULL) { Right *pR; pR = pG->right_head; while(pR != NULL) { if(pR->label == 0) { if(IsNewTerminal(pR)) { terminal_symbol[count++] = pR->c; pR->label = count-1; } } pR = pR->next; } pG = pG->next; } terminal_symbol[count] = '#'; printf("Have found all terminal symbols!\n"); } bool IsNewTerminal(Right *pR) { int i; for(i = 0; i < TERMINAL_SYMBOL_MAX_NUM; i++) { if(terminal_symbol[i] != '\0') { if(pR->c == terminal_symbol[i]) { pR->label = i; return false; } } else return true; } } void FirstLastVtSet() { Non_terminal *pN; bool stop[2] = {false, false}; int count = 0; while(true) { pN = head_Non_terminal; while(pN != NULL) { Grammar *pG; pG = head_Grammar; while(pG != NULL) { if(pG->left_symbol == pN->c) { Right *pR; if(!stop[0]) { pR = pG->right_head; if(pR->label != -1) pN->firstVtSet[pR->label] = true; else { AddFirstLast(pN, pR, false); pR = pR->next; if(pR != NULL) { if(pR->label != -1) pN->firstVtSet[pR->label] = true; } } } if(!stop[1]) { pR = pG->right_tail; if(pR->label != -1) pN->lastVtSet[pR->label] = true; else { AddFirstLast(pN, pR, true); pR = pR->pre; if(pR != NULL) { if(pR->label != -1) pN->lastVtSet[pR->label] = true; } } } } pG = pG->next; } pN = pN->next; } pN = head_Non_terminal; stop[0] = true; stop[1] = true; while(pN != NULL) { if(!pN->isFirstInvariant) stop[0] = false; pN->isFirstInvariant = true;//每次迭代前要将所有标识置为true pN = pN->next; } pN = head_Non_terminal; while(pN != NULL) { if(!pN->isLastInvariant) stop[1] = false; pN->isLastInvariant = true;//每次迭代前要将所有标识置为true pN = pN->next; } if(stop[0] && stop[1]) break; count++; //printf("count==%d stop[%d]==%d stop[%d]==%d\n", count, 0, stop[0], 1, stop[1]); } printf("FirstVtSet and LastVtSet finished!\n"); } void AddFirstLast(Non_terminal *pN, Right *pR, bool type) { Non_terminal *pN_temp; pN_temp = head_Non_terminal; while(pN_temp != NULL) { if(pN_temp->c == pR->c) break; pN_temp = pN_temp->next; } if(pN_temp != NULL) { int i; if(type) { for(i = 0; i < LAST_VT_SET_MAX_NUM; i++) if(pN_temp->lastVtSet[i]) { if(!pN->lastVtSet[i]) pN->isLastInvariant = false; pN->lastVtSet[i] = true; } } else { for(i = 0; i < FIRST_VT_SET_MAX_NUM; i++) if(pN_temp->firstVtSet[i]) { if(!pN->firstVtSet[i]) pN->isFirstInvariant = false; pN->firstVtSet[i] = true; } } } } void OPATable() { Grammar *pG; pG = head_Grammar; int i; Non_terminal *pN; while(pG != NULL) { Right *pR_pre, *pR; pR_pre = pG->right_head; pR = pR_pre->next; while(pR != NULL) { if(pR_pre->label != -1) { if(pR->label != -1) operator_precedence[pR_pre->label][pR->label] = '='; else { pN = head_Non_terminal; while(pN != NULL) { if(pN->c == pR->c) { for(i = 0; i < FIRST_VT_SET_MAX_NUM; i++) { if(pN->firstVtSet[i]) operator_precedence[pR->pre->label][i] = '<'; } break; } pN = pN->next; } if(pR->next != NULL) { if(pR->next->label != -1) operator_precedence[pR_pre->label][pR->next->label] = '='; } } } else if(pR->label != -1) { pN = head_Non_terminal; while(pN != NULL) { if(pN->c == pR->pre->c) { for(i = 0; i < FIRST_VT_SET_MAX_NUM; i++) { if(pN->lastVtSet[i]) operator_precedence[i][pR->label] = '>'; } break; } pN = pN->next; } } pR_pre = pR; pR = pR->next; } pG = pG->next; } for(i = 0; i < TERMINAL_SYMBOL_MAX_NUM; i++) { if(terminal_symbol[i] == '#') { int j; for(j = 0; j < TERMINAL_SYMBOL_MAX_NUM; j++) { if(i == j) operator_precedence[i][j] = '='; else if(terminal_symbol[j] != '\0') { operator_precedence[i][j] = '<'; operator_precedence[j][i] = '>'; } } } } printf("operator priority analysis table finished!\n"); } void Input(Str *s) { char c[STRING_MAX_LENGTH]; int i; Str *t; memset(c, '\0', STRING_MAX_LENGTH); printf("Please input the string(length<%d):", STRING_MAX_LENGTH-1); scanf("%s", c); for(i = 0; i < STRING_MAX_LENGTH; i++) { if(c[i] != '\0') { t = (PStr)malloc(sizeof(Str)); t->c = c[i]; t->next = NULL; s->next = t; s = s->next; } } } void Judge(Str *s) { Stack *head_stack, *tail_stack, *pS, *pS_pre; Str *head_str, *tail_str, *p; int i; head_str = s->next; tail_str = head_str; while(tail_str->next != NULL) tail_str = tail_str->next; p = (PStr)malloc(sizeof(Str)); p->c = '#'; p->next = NULL; tail_str->next = p; p = NULL; pS = (PStack)malloc(sizeof(Stack)); pS->c = '#'; for(i = 0; i < TERMINAL_SYMBOL_MAX_NUM; i++) if(terminal_symbol[i] == '#') break; pS->label = i; pS->next = NULL; pS->pre = NULL; head_stack = pS; tail_stack = pS; printf("符号栈\t关系\t输入串\t\t最左素短语\n"); while(true)//前面要加入#的优先关系 { pS = tail_stack; while(pS->label == -1) pS = pS->pre; //Stack *last_terminal = pS; for(i = 0; i < TERMINAL_SYMBOL_MAX_NUM; i++) if(terminal_symbol[i] == head_str->c) break; if(i == TERMINAL_SYMBOL_MAX_NUM) { printf("输入串中有无法识别符号!\n"); return; } if(operator_precedence[pS->label][i] == '\0') { PrintJudge(head_stack, pS, head_str, i); printf("\t\t无匹配!\n"); printf("该串不是该文法的句子!\n"); return; } else if(pS->c=='#' && terminal_symbol[i]=='#') { PrintJudge(head_stack, pS, head_str, i); printf("\t\t匹配成功!\n"); printf("该串是该文法的句子!\n"); return; } else if(operator_precedence[pS->label][i]=='<' || operator_precedence[pS->label][i]=='=') { //PrintJudge(head_stack, last_terminal, head_str, i); PrintJudge(head_stack, pS, head_str, i); printf("\t\t入栈\n"); pS = (PStack)malloc(sizeof(Stack)); pS->c = head_str->c; pS->label = i; pS->next = NULL; pS->pre = tail_stack; tail_stack->next = pS; tail_stack = tail_stack->next; p = head_str; head_str = head_str->next; free(p); p = NULL; } else { //PrintJudge(head_stack, last_terminal, head_str, i); PrintJudge(head_stack, pS, head_str, i); printf("\t"); pS_pre = pS->pre; while(pS_pre->label == -1) pS_pre = pS_pre->pre; while(operator_precedence[pS_pre->label][pS->label] == '=') {//找到第一个<得到素短语 pS = pS_pre; pS_pre = pS_pre->pre; while(pS_pre != NULL) { if(pS_pre->label != -1) break; pS_pre = pS_pre->pre; } if(pS_pre == NULL)//实际上这种情况不存在,这里只是为了严谨 { printf("\t匹配错误!\n"); printf("该串不是该文法的句子!\n"); return; } } pS = pS_pre->next; tail_stack = pS_pre; printf("\t"); while(pS != NULL) { printf("%c", pS->c); pS = pS->next; } printf("\n"); //释放结点空间 pS_pre = pS_pre->next; tail_stack->next = NULL; pS = pS_pre->next; while(pS != NULL) { free(pS_pre); pS_pre = pS; pS = pS->next; } free(pS_pre); //归纳为非终结符V pS_pre = (PStack)malloc(sizeof(Stack)); pS_pre->c = 'V'; pS_pre->label = -1; pS_pre->pre = tail_stack; pS_pre->next = NULL; tail_stack->next = pS_pre; tail_stack = tail_stack->next; pS_pre = NULL; } } } void PrintJudge(Stack *pS, Stack *last_terminal, Str* p, int i) { while(pS != NULL) { printf("%c", pS->c); pS = pS->next; } if(operator_precedence[last_terminal->label][i] == '\0') printf("\t无\t"); else printf("\t%c\t", operator_precedence[last_terminal->label][i]); while(p != NULL) { printf("%c", p->c); p = p->next; } } void PrintSymbol() { int i; Non_terminal *pN; pN = head_Non_terminal; printf("-------------------------------------------------\n"); printf("非终结符:\n"); while(pN != NULL) { printf("%c ", pN->c); pN = pN->next; } printf("\n终结符:\n"); for(i = 0; i < TERMINAL_SYMBOL_MAX_NUM; i++) { if(terminal_symbol[i] != '\0') printf("%c ", terminal_symbol[i]); } printf("\n-------------------------------------------------\n"); } void PrintFirstLastVtSet() { int i; Non_terminal *pN; pN = head_Non_terminal; printf("-------------------------------------------------\n"); printf("FirstVtSet:\n"); while(pN != NULL) { printf("firstVtSet[%c]={", pN->c); for(i = 0; i < FIRST_VT_SET_MAX_NUM; i++) if(pN->firstVtSet[i] != '\0') printf("%c ", terminal_symbol[i]); printf("}\n"); pN = pN->next; } printf("LastVtSet:\n"); pN = head_Non_terminal; while(pN != NULL) { printf("lastVtSet[%c]={", pN->c); for(i = 0; i < FIRST_VT_SET_MAX_NUM; i++) if(pN->lastVtSet[i] != '\0') printf("%c ", terminal_symbol[i]); printf("}\n"); pN = pN->next; } printf("\n-------------------------------------------------\n"); } void PrintOPATable() { int i, j; printf("-------------------------------------------------\n"); printf("operator priority analysis table:\n"); for(i = 0; i < TERMINAL_SYMBOL_MAX_NUM; i++) if(terminal_symbol[i] != '\0') printf("\t%c", terminal_symbol[i]); for(i = 0; i < TERMINAL_SYMBOL_MAX_NUM; i++) if(terminal_symbol[i] != '\0') { printf("\n%c", terminal_symbol[i]); for(j = 0; j < TERMINAL_SYMBOL_MAX_NUM; j++) if(terminal_symbol[j] != '\0') if(operator_precedence[i][j] != '\0') printf("\t%c", operator_precedence[i][j]); else printf("\t");//空位置也要制表,不然就会显示出错 } printf("\n-------------------------------------------------\n"); }
OPG.txt中的文法为:
E->E+T|T
T->T*F|F
F->(E)|i
代码的数据结构和上次实验基本一致,读取文法、输入判别符号串、找出并标记非终结符、终结符和部分输出函数均和上次相同,求FirstVt和LastVt采用迭代,直到所有非终极符的FirstVt和LastVt不再增大(不变)终止。
3次测试结果:
输出对齐有问题,因为制表符\t对应8个空格的问题,前面字符长度对8求余然后和8相减去绝对值,则为实际补齐空格数
代码写得比较冗余,需要多多练习,代码风格和可读性需要努力改善和提高,程序可能存在部分未发现的bug,需要完善