该方案实现了一个分析C语言的词法分析+解析。
注意:
1.简单语法,部分秕。它可以在本文法的基础上进行扩展,此过程使用自上而下LL(1)语法。
2.自己主动能达到求First 集和 Follow 集。
3.处终结符外(有些硬编码的成分),终结符的文法能够自己定义,也就是说读者能够自己定义文法。
4.为方便理解。C语言的文法描写叙述写成中文。
5.程序将词法分析和语法分析结合起来。词法分析的结果作为语法分析的输入。
6.终于结果在控制台显示的有:词法分析、First集、Follow集、Select集,在preciateResult.txt 中写入了语法分析结果。在preciateTable.txt 中写入了预測分析表。
7.文法的词素之间必须有空格分开。
项目结构例如以下:
文法例如以下:
wenfa.txt:
-> ( ) { }
-> | $
-> describe
-> type
->
-> | $
->
->
-> id
-> 【 】 | $
-> ( ) | |
-> digit
->
->
-> | / | $
-> + | - | $
-> | $
->
-> = | $
-> | { }
->
-> , | $
-> , | $
->
-> | $
-> ;
-> | | | | $
->
-> = ; | ( ) ;
->
-> , | $
-> | |
-> string
-> for ( ; ) { }
->
-> | == | !=
->
-> ++ | --
-> if ( ) { }
-> else { } | $
-> return ;
词法分析头文件:
LexAnalysis.h
//LexAnalysis.h
#ifndef _LEXANALYSIS_H
#define _LEXANALYSIS_H
//keyword
#define AUTO 1
#define BREAK 2
#define CASE 3
#define CHAR 4
#define CONST 5
#define CONTINUE 6
#define DEFAULT 7
#define DO 8
#define DOUBLE 9
#define ELSE 10
#define ENUM 11
#define EXTERN 12
#define FLOAT 13
#define FOR 14
#define GOTO 15
#define IF 16
#define INT 17
#define LONG 18
#define REGISTER 19
#define RETURN 20
#define SHORT 21
#define SIGNED 22
#define SIZEOF 23
#define STATIC 24
#define STRUCT 25
#define SWITCH 26
#define TYPEDEF 27
#define UNION 28
#define UNSIGNED 29
#define VOID 30
#define VOLATILE 31
#define WHILE 32
#define KEY_DESC "keyword"
//标志符
#define IDENTIFER 40
#define IDENTIFER_DESC "标志符"
//常量
#define INT_VAL 51 //整形常量
#define CHAR_VAL 52 //字符常量
#define FLOAT_VAL 53 //浮点数常量
#define STRING_VAL 54 //双精度浮点数常量
#define MACRO_VAL 55 //宏常量
#define CONSTANT_DESC "常量"
//运算符
#define //代码效果参考:http://hnjlyzjd.com/xl/wz_24225.html
NOT 61 // !#define BYTE_AND 62 //&
#define COMPLEMENT 63 // ~
#define BYTE_XOR 64 // ^
#define MUL 65 //
#define DIV 66// /
#define MOD 67 // %
#define ADD 68 // +
#define SUB 69 // -
#define LES_THAN 70 //
#define ASG 72 // =
#define ARROW 73 // ->
#define SELF_ADD 74 // ++
#define SELF_SUB 75 // --
#define LEFT_MOVE 76 // [
#define RIGHT_MOVE 77 // ]
#define LES_EQUAL 78 // =
#define EQUAL 80 // ==
#define NOT_EQUAL 81 // !=
#define AND 82 // &&
#define OR 83 // ||
#define COMPLETE_ADD 84 // +=
#define COMPLETE_SUB 85 // -=
#define COMPLETE_MUL 86 // =
#define COMPLETE_DIV 87 // /=
#define COMPLETE_BYTE_XOR 88 ////代码效果参考:http://hnjlyzjd.com/hw/wz_24223.html
^=#define COMPLETE_BYTE_AND 89 // &=
#define COMPLETE_COMPLEMENT 90 // ~=
#define COMPLETE_MOD 91 //%=
#define BYTE_OR 92 // |
#define OPE_DESC "运算符"
//限界符
#define LEFT_BRA 100 // (
#define RIGHT_BRA 101 // )
#define LEFT_INDEX 102 // 【
#define RIGHT_INDEX 103 // 】
#define L_BOUNDER 104 // {
#define R_BOUNDER 105 // }
#define POINTER 106 // .
#define JING 107 // #
#define UNDERLINE 108 //
#define COMMA 109 // ,
#define SEMI 110 // ;
#define SIN_QUE 111 // '
#define DOU_QUE 112 // "
#define CLE_OPE_DESC "限界符"
#define NOTE1 120 // "//"凝视
#define NOTE2 121 // "//"凝视
#define NOTE_DESC "凝视"
#define HEADER 130 //头文件
#define HEADER_DESC "头文件"
//错误类型
#define FLOAT_ERROR "float表示错误"
#define FLOAT_ERROR_NUM 1
#define DOUBLE_ERROR "double表示错误"
#define DOUBLE_ERROR_NUM 2
#define NOTE_ERROR "凝视没有结束符"
#define NOTE_ERROR_NUM 3
#define STRING_ERROR "字符串常量没有结束符"
#define STRING_ERROR_NUM 4
#define CHARCONST_ERROR "字符常量没有结束符"
#define CHARCONST_ERROR_NUM 5
#define CHAR_ERROR "非法字符"
#define CHAR_ERROR_NUM 6
#define LEFT_BRA_ERROR "'('没有相应项"
#define LEFT_BRA_ERROR_NUM 7
#define RIGHT_BRA_ERROR "')'没有相应项"
#define RIGHT_BRA_ERROR_NUM 8
#define LEFT_INDEX_ERROR "'【'没有相应项"
#define LEFT_INDEX_ERROR_NUM 9
#define RIGHT_INDEX_ERROR "'】'没有相应项"
#define RIGHT_INDEX_ERROR_NUM 10
#define L_BOUNDER_ERROR "'{'没有相应项"
#define L_BOUNDER_ERROR_NUM 11
#define R_BOUNDER_ERROR "'}'没有相应项"
#define R_BOUNDER_ERROR_NUM 12
#define PRE_PROCESS_ERROR "预处理错误" //头文件或者宏定义错误
#define PRE_PROCESS_ERROR_NUM 13
#define _NULL "无"
#define DESCRIBE 4000
#define TYPE 4001
#define STRING 4002
#define DIGIT 4003
struct NormalNode
{
char content【30】;//内容
char describe【30】;//描写叙述
int type;//种别码
int addr;//入口地址
int line;//所在行数
NormalNode next;//下一个节点
};
void initKeyMapping();
void initOperMapping();
void initLimitMapping();
void initNode();
void createNewNode(char content,char descirbe,int type,int addr,int line);
void createNewError(char content,char descirbe,int type,int line);
int createNewIden(char content,char descirbe,int type,int addr,int line);
void printNodeLink();
void printErrorLink();
void printIdentLink();
int mystrlen(char word);
void preProcess(char word,int line);
void close();
int seekKey(char word);
void scanner();
void BraMappingError();
#endif
语法分析头文件:
SynAnalysis.h
//SynAnalysis.h
#ifndef _SYNANALYSIS_H
#define _SYNANALYSIS_H
#define GRAMMAR_ARROW 2000 //->
#define GRAMMAR_OR 2001 // |
#define GRAMMAR_NULL 2002 //空值
#define GRAMMAR_SPECIAL 2003 //特殊符号#
#define GRAMMAR_BASE 2010 //动态生成的基值
#define Stack_Size 5000
typedef struct
{
int elem【Stack_Size】;
int top;
} SeqStack;
void initGrammer();
int seekCodeNum(char word);
void ceshi();
void First();
void Follow();
void Select();
void MTable();
void Analysis();
#endif
词法分析Cpp文件:(与先前写过的一片博客相类似,改了部分)
LexAnalysis.cpp
//LexAnalysis.cpp
#include
#include
#include
#include
#include
#include
#include
#include "LexAnalysis.h"
using namespace std;
int leftSmall = 0;//左小括号
int rightSmall = 0;//右小括号
int leftMiddle = 0;//左中括号
int rightMiddle = 0;//右中括号
int leftBig = 0;//左大括号
int rightBig = 0;//右大括号
int lineBra【6】【1000】 = {0};//括号和行数的相应关系,第一维代表左右6种括号
int static_iden_number = 0;//模拟标志符的地址,自增
//Token节点
NormalNode normalHead;//首结点
//错误节点
struct ErrorNode
{
char content【30】;//错误内容
char describe【30】;//错误描写叙述
int type;
int line;//所在行数
ErrorNode next;//下一个节点
};
ErrorNode errorHead;//首节点
//标志符节点
struct IdentiferNode
{
char content【30】;//内容
char describe【30】;//描写叙述
int type;//种别码
int addr;//入口地址
int line;//所在行数
IdentiferNode next;//下一个节点
};
IdentiferNode idenHead;//首节点
vector
keyMap;
vector
operMap;
vector
limitMap;
//初始化C语言的keyword的集合
void initKeyMapping()
{
keyMap.clear();
keyMap.push_back(make_pair("auto",AUTO));
keyMap.push_back(make_pair("break",BREAK));
keyMap.push_back(make_pair("case",CASE));
keyMap.push_back(make_pair("char",CHAR));
keyMap.push_back(make_pair("const",CONST));
keyMap.push_back(make_pair("continue",CONTINUE));
keyMap.push_back(make_pair("default",DEFAULT));
keyMap.push_back(make_pair("do",DO));
keyMap.push_back(make_pair("double",DOUBLE));
keyMap.push_back(make_pair("else",ELSE));
keyMap.push_back(make_pair("enum",ENUM));
keyMap.push_back(make_pair("extern",EXTERN));
keyMap.push_back(make_pair("float",FLOAT));
keyMap.push_back(make_pair("for",FOR));
keyMap.push_back(make_pair("goto",GOTO));
keyMap.push_back(make_pair("if",IF));
keyMap.push_back(make_pair("int",INT));
keyMap.push_back(make_pair("long",LONG));
keyMap.push_back(make_pair("register",REGISTER));
keyMap.push_back(make_pair("return",RETURN));
keyMap.push_back(make_pair("short",SHORT));
keyMap.push_back(make_pair("signed",SIGNED));
keyMap.push_back(make_pair("sizeof",SIZEOF));
keyMap.push_back(make_pair("static",STATIC));
keyMap.push_back(make_pair("struct",STRUCT));
keyMap.push_back(make_pair("switch",SWITCH));
keyMap.push_back(make_pair("typedef",TYPEDEF));
keyMap.push_back(make_pair("union",UNION));
keyMap.push_back(make_pair("unsigned",UNSIGNED));
keyMap.push_back(make_pair("void",VOID));
keyMap.push_back(make_pair("volatile",VOLATILE));
keyMap.push_back(make_pair("while",WHILE));
keyMap.push_back(make_pair("describe",DESCRIBE));
keyMap.push_back(make_pair("type",TYPE));
keyMap.push_back(make_pair("string",STRING));
keyMap.push_back(make_pair("digit",DIGIT));
}
void initOperMapping()
{
operMap.clear();
operMap.push_back(make_pair("!",NOT));
operMap.push_back(make_pair("&",BYTE_AND));
operMap.push_back(make_pair("~",COMPLEMENT));
operMap.push_back(make_pair("^",BYTE_XOR));
operMap.push_back(make_pair("",MUL));
operMap.push_back(make_pair("/",DIV));
operMap.push_back(make_pair("%",MOD));
operMap.push_back(make_pair("+",ADD));
operMap.push_back(make_pair("-",SUB));
operMap.push_back(make_pair("",GRT_THAN));
operMap.push_back(make_pair("=",ASG));
operMap.push_back(make_pair("->",ARROW));
operMap.push_back(make_pair("++",SELF_ADD));
operMap.push_back(make_pair("--",SELF_SUB));
operMap.push_back(make_pair("[",LEFT_MOVE));
operMap.push_back(make_pair("]",RIGHT_MOVE));
operMap.push_back(make_pair("=",GRT_EQUAL));
operMap.push_back(make_pair("==",EQUAL));
operMap.push_back(make_pair("!=",NOT_EQUAL));
operMap.push_back(make_pair("&&",AND));
operMap.push_back(make_pair("||",OR));
operMap.push_back(make_pair("+=",COMPLETE_ADD));
operMap.push_back(make_pair("-=",COMPLETE_SUB));
operMap.push_back(make_pair("=",COMPLETE_MUL));
operMap.push_back(make_pair("/=",COMPLETE_DIV));
operMap.push_back(make_pair("^=",COMPLETE_BYTE_XOR));
operMap.push_back(make_pair("&=",COMPLETE_BYTE_AND));
operMap.push_back(make_pair("~=",COMPLETE_COMPLEMENT));
operMap.push_back(make_pair("%=",COMPLETE_MOD));
operMap.push_back(make_pair("|",BYTE_OR));
}
void initLimitMapping()
{
limitMap.clear();
limitMap.push_back(make_pair("(",LEFT_BRA));
limitMap.push_back(make_pair(")",RIGHT_BRA));
limitMap.push_back(make_pair("【",LEFT_INDEX));
limitMap.push_back(make_pair("】",RIGHT_INDEX));
limitMap.push_back(make_pair("{",L_BOUNDER));
limitMap.push_back(make_pair("}",R_BOUNDER));
limitMap.push_back(make_pair(".",POINTER));
limitMap.push_back(make_pair("#",JING));
limitMap.push_back(makepair("",UNDER_LINE));
limitMap.push_back(make_pair(",",COMMA));
limitMap.push_back(make_pair(";",SEMI));
limitMap.push_back(make_pair("'",SIN_QUE));
limitMap.push_back(make_pair("\"",DOU_QUE));
}
void initNode()
{
normalHead = new NormalNode();
strcpy(normalHead->content,"");
strcpy(normalHead->describe,"");
normalHead->type = -1;
normalHead->addr = -1;
normalHead->line = -1;
normalHead->next = NULL;
errorHead = new ErrorNode();
strcpy(errorHead->content,"");
strcpy(errorHead->describe,"");
errorHead->line = -1;
errorHead->next = NULL;
idenHead = new IdentiferNode();
strcpy(idenHead->content,"");
strcpy(idenHead->describe,"");
idenHead->type = -1;
idenHead->addr = -1;
idenHead->line = -1;
idenHead->next = NULL;
}
void createNewNode(char content,char descirbe,int type,int addr,int line)
{
NormalNode p = normalHead;
NormalNode temp = new NormalNode();
while(p->next!=NULL)
{
p = p->next;
}
strcpy(temp->content,content);
strcpy(temp->describe,descirbe);
temp->type = type;
temp->addr = addr;
temp->line = line;
temp->next = NULL;
p->next = temp;
}
void createNewError(char content,char descirbe,int type,int line)
{
ErrorNode p = errorHead;
ErrorNode temp = new ErrorNode();
strcpy(temp->content,content);
strcpy(temp->describe,descirbe);
temp->type = type;
temp->line = line;
temp->next = NULL;
while(p->next!=NULL)
{
p = p->next;
}
p->next = temp;
}
//返回值是新的标志符的入口地址
int createNewIden(char content,char descirbe,int type,int addr,int line)
{
IdentiferNode p = idenHead;
IdentiferNode temp = new IdentiferNode();
int flag = 0;
int addr_temp = -2;
while(p->next!=NULL)
{
if(strcmp(content,p->next->content) == 0)
{
flag = 1;
addr_temp = p->next->addr;
}
p = p->next;
}
if(flag == 0)
{
addr_temp = ++static_iden_number;//用自增来模拟入口地址
}
strcpy(temp->content,content);
strcpy(temp->describe,descirbe);
temp->type = type;
temp->addr = addr_temp;
temp->line = line;
temp->next = NULL;
p->next = temp;
return addr_temp;
}
void printNodeLink()
{
NormalNode p = normalHead;
p = p->next;
cout["*分析表**"[endl[endl;
cout[setw(30)["内容"[setw(10)["描写叙述"["\t"["种别码"["\t"["地址"["\t"["行号"[endl;
while(p!=NULL)
{
if(p->type == IDENTIFER)
{
cout[setw(30)[p->content[setw(10)[p->describe["\t"[p->type["\t"[p->addr["\t"[p->line[endl;
}
else
{
cout[setw(30)[p->content[setw(10)[p->describe["\t"[p->type["\t"["\t"[p->line[endl;
}
p = p->next;
}
cout[endl[endl;
}
/
错误种类:
1.float表示错误
2.double表示错误
3.凝视没有结束符
4.字符串常量没有结束符
5.字符常量没有结束符
6.非法字符
7.'('没有相应项
8.预处理错误
/
void printErrorLink()
{
ErrorNode p = errorHead;
p = p->next;
cout["**错误表**"[endl[endl;
cout[setw(10)["内容"[setw(30)["描写叙述"["\t"["类型"["\t"["行号"[endl;
while(p!=NULL)
{
cout[setw(10)[p->content[setw(30)[p->describe["\t"[p->type["\t"[p->line[endl;
p = p->next;
}
cout[endl[endl;
}
//标志符表,有反复部分。暂不考虑
void printIdentLink()
{
IdentiferNode p = idenHead;
p = p->next;
cout["**标志符表**"[endl[endl;
cout[setw(30)["内容"[setw(10)["描写叙述"["\t"["种别码"["\t"["地址"["\t"["行号"[endl;
while(p!=NULL)
{
cout[setw(30)[p->content[setw(10)[p->describe["\t"[p->type["\t"[p->addr["\t"[p->line[endl;
p = p->next;
}
cout[endl[endl;
}
int mystrlen(char word)
{
if(word == '\0')
{
return 0;
}
else
{
return 1+mystrlen(word+1);
}
}
//预处理,处理头文件和宏定义
void preProcess(char word,int line)
{
const char include_temp = "include";
const char define_temp = "define";
char p_include,p_define;
int flag = 0;
p_include = strstr(word,include_temp);
if(p_include!=NULL)
{
flag = 1;
int i;
for(i=7;;)
{
if((p_include+i) == ' ' || (p_include+i) == '\t')
{
i++;
}
else
{
break;
}
}
createNewNode(p_include+i,HEADER_DESC,HEADER,-1,line);
}
else
{
p_define = strstr(word,define_temp);
if(p_define!=NULL)
{
flag = 1;
int i;
for(i=7;;)
{
if((p_define+i) == ' ' || (p_define+i) == '\t')
{
i++;
}
else
{
break;
}
}
createNewNode(p_define+i,CONSTANT_DESC,MACRO_VAL,-1,line);
}
}
if(flag == 0)
{
createNewError(word,PRE_PROCESS_ERROR,PRE_PROCESS_ERROR_NUM,line);
}
}
void close()
{
//delete idenHead;
//delete errorHead;
//delete normalHead;
}
int seekKey(char word)
{
for(int i=0; i='A' && ch='a' && ch='A' && ch='a' && ch='0' && ch='0' && ch='0' && ch='0' && ch='0' && ch='0' && ch='0' && ch='0' && ch='0' && ch='0' && ch='0' && ch')
{
createNewNode("->",OPE_DESC,ARROW,-1,line);
}
else if(ch == '-')
{
createNewNode("--",OPE_DESC,SELF_SUB,-1,line);
}
else if(ch == '=')
{
createNewNode("--",OPE_DESC,SELF_SUB,-1,line);
}
else
{
createNewNode("-",OPE_DESC,SUB,-1,line);
fseek(infile,-1L,SEEK_CUR);
}
}
//处理+开头的运算符
else if(ch == '+')
{
ch = fgetc(infile);
if(ch == '+')
{
createNewNode("++",OPE_DESC,SELF_ADD,-1,line);
}
else if(ch == '=')
{
createNewNode("+=",OPE_DESC,COMPLETE_ADD,-1,line);
}
else
{
createNewNode("+",OPE_DESC,ADD,-1,line);
fseek(infile,-1L,SEEK_CUR);
}
}
//处理开头的运算符
else if(ch == '')
{
ch = fgetc(infile);
if(ch == '=')
{
createNewNode("=",OPE_DESC,COMPLETE_MUL,-1,line);
}
else
{
createNewNode("",OPE_DESC,MUL,-1,line);
fseek(infile,-1L,SEEK_CUR);
}
}
//处理按^开头的运算符
else if(ch == '^')
{
ch = fgetc(infile);
if(ch == '=')
{
createNewNode("^=",OPE_DESC,COMPLETE_BYTE_XOR,-1,line);
}
else
{
createNewNode("^",OPE_DESC,BYTE_XOR,-1,line);
fseek(infile,-1L,SEEK_CUR);
}
}
//处理%开头的运算符
else if(ch == '%')
{
ch = fgetc(infile);
if(ch == '=')
{
createNewNode("%=",OPE_DESC,COMPLETE_MOD,-1,line);
}
else
{
createNewNode("%",OPE_DESC,MOD,-1,line);
fseek(infile,-1L,SEEK_CUR);
}
}
//处理&开头的运算符
else if(ch == '&')
{
ch = fgetc(infile);
if(ch == '=')
{
createNewNode("&=",OPE_DESC,COMPLETE_BYTE_AND,-1,line);
}
else if(ch == '&')
{
createNewNode("&&",OPE_DESC,AND,-1,line);
}
else
{
createNewNode("&",OPE_DESC,BYTE_AND,-1,line);
fseek(infile,-1L,SEEK_CUR);
}
}
//处理~开头的运算符
else if(ch == '~')
{
ch = fgetc(infile);
if(ch == '=')
{
createNewNode("~=",OPE_DESC,COMPLETE_COMPLEMENT,-1,line);
}
else
{
createNewNode("~",OPE_DESC,COMPLEMENT,-1,line);
fseek(infile,-1L,SEEK_CUR);
}
}
//处理!开头的运算符
else if(ch == '!')
{
ch = fgetc(infile);
if(ch == '=')
{
createNewNode("!=",OPE_DESC,NOT_EQUAL,-1,line);
}
else
{
createNewNode("!",OPE_DESC,NOT,-1,line);
fseek(infile,-1L,SEEK_CUR);
}
}
//处理<开头的运算符
else if(ch == '<')
{
ch = fgetc(infile);
if(ch == '<')
{
createNewNode("[",OPE_DESC,LEFT_MOVE,-1,line);
}
else if(ch == '=')
{
createNewNode("<=",OPE_DESC,LES_EQUAL,-1,line);
}
else
{
createNewNode("开头的运算符
else if(ch == '>')
{
ch = fgetc(infile);
if(ch == '>')
{
createNewNode("]",OPE_DESC,RIGHT_MOVE,-1,line);
}
else if(ch == '=')
{
createNewNode(">=",OPE_DESC,GRT_EQUAL,-1,line);
}
else
{
createNewNode(">",OPE_DESC,GRT_THAN,-1,line);
fseek(infile,-1L,SEEK_CUR);
}
}
//处理|开头的运算符
else if(ch == '|')
{
ch = fgetc(infile);
if(ch == '|')
{
createNewNode("||",OPE_DESC,OR,-1,line);
}
else
{
createNewNode("|",OPE_DESC,BYTE_OR,-1,line);
fseek(infile,-1L,SEEK_CUR);
}
}
else if(ch == '=')
{
ch = fgetc(infile);
if(ch == '=')
{
createNewNode("==",OPE_DESC,EQUAL,