一个简单的C语言词法分析与语法分析器-阿里云开发者社区

词法分析
可识别内容:
标识符：id
数字：num
关键字：int,char,if,else,while,do,for
标号：, ， . ， ;
算术运算符号：=，+，-，*，/，&，!，|，&&，||
关系运算符：<，<=，>=，>，==，!=
注释：//

内码定义：
单个符号，如{，+，*，> 等，均使用其ascii码做内码，占双或多个字节的符号（包括保留字，标号，数字，运算符等）为其取名如下：
Enum { END=0,INT,CHAR,IF,ELSE,WHILE=5,
DO,FOR,ARGAND,ARGOR,NUM=10,
ID,LESSEQUAL,EQUAL,GREATEQUAL,NOTEQUAL=15 };
其中NUM代表数字，ID代表标识符.

测试程序1-1的词法分析结果如下：

内码表
123 { 11 X 61 = 10 12 43 + 43 + 11 b 47 / 10 13 45 - 10 5 42 * 10 4 42 * 10 9 59 ; 11 Y 61 =	10 4 42 * 10 1024 59 ; 3 if 40 ( 11 X 14 >= 11 Y 41 ) 123 { 3 if 40 ( 11 i 13 == 11 i	41 ) 123 { 11 X 61 = 11 Y 125 } 125 } 59 ; 59 ; 59 ; 59 ; 5 while 40 ( 11 X 60 <	11 Y 41 ) 123 { 11 X 61 = 11 X 43 + 10 1 59 ; 125 } 125 }

语法分析
C语言子集，可支持
语句块，语句，条件语句，While循环语句，赋值语句，基本算术表达式等。例如：
{
// Comment Supported : This is only a Test ^_^

X = 12 + b / 13 - 5 * 4 * 9; // A AssignmentStatement
Y = 4 * 1024;
if( X >= Y){
if( i == i){   // This is nested if Statement
X=Y;
}
}
;;;;      // This is Null Statement
while( X < Y){   // This is while Statement
X = X +1;
}
}
测试程序1-1

支持错误检测，如将上面例子中X = 12 + b / 13 - 5 * 4 * 9;
故意修改为：X = 12 ++ b / 13 - 5 * 4 * 9; 则会出现如下错误提示，指示了出错行数和行内偏移位置：

规则如下：
<StatementBlock> ::= '{'<StatementSequence>'}'
<StatementSequence> ::= {<NullStatement>|<CommonStatement>|<VariantStatement>}

<AssignmentStatement> ::= ID=<Expression>
<ConditionStatement> ::= if(<Condition> <StatementBlock>
<LoopWhileStatement> ::= while(<Condition> <StatementBlock>

<Condition> ::= <Expression><RelationOperator><Expression>
<Expression> ::= <Item>{+<Item>|-<Item>}
<Item> ::= <Factor>{*<Factor>|/<Factor>}
<Factor> ::= ID|NUM|(<Expression>)
<RelationOperator> ::= <|<=|>=|>|==|!=

//非终结符的英文定义
void StatementBlock(); //语句块
void StatementSequence(); //语句串

// XxxxxStatement() 为三类语句
void NullStatement(); //空语句--仅仅含有一个;号
void CommonStatement(); //语句
void VariantStatement(); //变种语句--包括 if(){},while{},他们都不以;结尾

// 下面的属于CommonStatement
void AssignmentStatement(); //赋值语句

// 下面两种属于VariantStatement
void ConditionStatement(); //条件语句
void LoopWhileStatement(); //while循环语句

void Condition();   //条件
void Expression();   //表达式
void Item();    //项
void Factor();    //因子
void RelationOperator();  //关系运算符

不能支持的主要方面：函数调用的识别，逗号表达式，for循环，switch语句。

词法分析：

// LexAly.cpp :C子集词法分析程序

/**/ /*

支持内容:

标识符:id

关键字: int,char,if,else,while,do,for

标号: ,, ., ;

算术运算符号: =,+,-,&,!,|,&&,||

全局字符串:

instr 记录待解析的字符串

string 存贮当前被解析到的ID

接口:

gettoken();

Sample:

输入:

instr =

for(i=0;i<10;i++){

j=i+10;

printf("%d",j);

}

输出:

for

(

……

}

注意:

要记得处理程序中未显示出来的符号，如空白(' ')，回车(' ')，值表符(' ')

#include " stdafx.h "

#include < ctype.h >

#include < stdlib.h >

#include < string.h >

#include " Constant.h "

extern void grammar_check();

// 注意：这里累计数目(最大值)不能大于32 (10 0000B)TOKEN

// enum {END=0,INT=1,CHAR,IF,ELSE,WHILE,DO,FOR,ARGAND,ARGOR,NUM,ID} ;

char index[][ 20 ] = ... {

...{"END OF File"}, /**//* 0 END*/

...{"int"}, /**//* 1 INT*/

...{"char"}, /**//* 2 CHAR*/

...{"if"}, /**//* 3 IF*/

...{"else"}, /**//* 4 ELSE*/

...{"while"}, /**//* 5 WHILE*/

...{"do"}, /**//* 6 DO*/

...{"for"}, /**//* 7 FOR*/

...{"&&"}, /**//* 8 ARGAND*/

...{"||"}, /**//* 9 ARGOR*/

...{""}, /**//* 10 NUM */

...{""}, /**//* 11 ID */

...{"<="}, /**//* 12 LESSEQUAL */

...{"=="}, /**//* 13 EQUAL */

...{">="}, /**//* 14 GREATEQUAL */

...{"!="}, /**//* 15 NOTEQUAL */

...{""} /**//* 16 ID */

} ;

char input[ 10000 ] = ... {0} ;

char * instr = input;

char * const start_of_instr = input;

// string 包含gettoken最新得到的id等串

// gym包含 gettoken得到的内容的代号

// current_line 包含当前行号

char string[MAX_INDENT];

int sym;

int current_line = 1 ;

int start_pos_of_current_line;

char * strstart; // 用于辅助识别num,id

int gettoken();

int _gettoken();

void error( char * cur);

char * getlinestring( int line, char * in_buf);

int nextline();

int getline();

int getcurrentpos();

int nextline() ... { return ++current_line; }

int getline() ... { return current_line; }

int getcurrentpos() ... { return (int)instr; }

char * getlinestring( int line, char * in_buf)

... {

char * t = input;

int i = 1;

while(*t != 0 && i < line )...{

if( *t==' ' ) i++;

t++;

}

int len = 0;

while ( *t != ' ' )...{

in_buf[len] = *t;

len++;

t++;

}

in_buf[len] = 0;

return in_buf;

}

void error( char * cur)

... {

printf("Spell Error found at line %d ",getline());

exit(0);

}

// 语法分析

int main_grammar( char * filename)

... {

int i;

FILE *f;

if(!(f=fopen(filename,"r")))...{

printf("Fail to open source file %s! ",filename);

exit(0);

}

int k=0;

char c;

while((c=fgetc(f))!=EOF)

...{

input[k]=c;

k++;

}

input[k] = 0;

//打印出程序

printf("%s ",start_of_instr);

//开始语法检查

grammar_check();

printf("Success! ");

return 0;

}

// 词法分析

int main_spell( char * filename)

... {

int i;

FILE *f;

if(!(f=fopen(filename,"r")))...{

printf("Fail to open source file %s! ",filename);

exit(0);

}

int k=0;

char c;

while((c=fgetc(f))!=EOF)

...{

input[k]=c;

k++;

}

input[k] = 0;

printf("%s ",start_of_instr);

while((i=gettoken())!=END)

...{

if(i == ID)...{

printf("%d %s ",i,string);

continue;

}

if(i == NUM)...{

printf("%d %s ",i,string);

continue;

}

if(i<20)...{

printf("%d %s ",i,index[i]);

}else...{

printf("%d %c ",i,i);

}

return 0;

}

int gettoken()

... {

int i= (sym = _gettoken());

#if 0

if(i == ID)...{

printf("%s",string);

}

if(i == NUM)...{

printf("%s",string);

}

if(i<20)...{

printf("%s",index[i]);

}else...{

printf("%c",i);

}

#endif

return sym;

}

int _gettoken()

... {

char *cp = instr;

for(;;)...{

if( *instr == 0)

return END;

/**//*

if( 可能读入的字符 > 当前可用缓冲区大小 )

扩展缓冲区

//int,char,if,else,while,do,for

switch ( *instr )

...{

case 'i':

if( instr[1] == 'f' && notda(instr[2]) )

...{

instr+=2;return IF;

}

if( instr[1] == 'n' && instr[2] == 't' && notda(instr[3]) )

...{

instr+=3; return INT;

}

// not a keyword. but an id.

strstart = instr;

instr++;

goto id_label;

case 'c':

if( instr[1] == 'h' && instr[2] == 'a' && instr[3] == 'r' && notda(instr[4]) )

...{instr+=4;return CHAR; }

strstart = instr;

instr++;

goto id_label;

break;

case 'e':

if( instr[1] == 'l' && instr[2] == 's' && instr[3] == 'e' && notda(instr[4]) )

...{instr+=4;return ELSE; }

strstart = instr;

instr++;

goto id_label;

break;

case 'w':

if( instr[1] == 'h' && instr[2] == 'i' && instr[3] == 'l' && instr[4] == 'e' && notda(instr[5]) )

...{instr+=5;return WHILE; }

strstart = instr;

instr++;

goto id_label;

case 'd':

if( instr[1] == 'o' && notda(instr[4]) )

...{instr+=2;return DO; }

strstart = instr;

instr++;

goto id_label;

case 'f':

if( instr[1] == 'o' && instr[2] == 'r' && notda(instr[3]) )

...{instr+=3;return FOR; }

strstart = instr;

instr++;

goto id_label;

// deal with IDs.

// EXCLUDE:i,c,d,e,w,f

case 'a': ; case 'b': ;

case 'g': ; case 'h': ;

case 'j': ; case 'k': ; case 'l': ;

case 'm': ; case 'n': ; case 'o': ;

case 'p': ; case 'q': ; case 'r': ;

case 's': ; case 't': ; case 'u': ;

case 'v': ; case 'x': ;

case 'y': ; case 'z': ;

case 'A': ; case 'B': ;

case 'C': ; case 'D': ; case 'E': ;

case 'F': ; case 'G': ; case 'H': ;

case 'I': ; case 'J': ; case 'K': ;

case 'L': ; case 'M': ; case 'N': ;

case 'O': ; case 'P': ; case 'Q': ;

case 'R': ; case 'S': ; case 'T': ;

case 'U': ; case 'V': ; case 'W': ;

case 'X': ; case 'Y': ; case 'Z': ;

strstart = instr;

instr++;

goto id_label;

case '0': ;

case '1': ; case '2': ; case '3': ;

case '4': ; case '5': ; case '6': ;

case '7': ; case '8': ; case '9': ;

strstart = instr;

instr++;

goto num_label;

case '{':

instr++;

return '{';

case '}':

instr++;

return '}';

case '(':

instr++;

return '(';

case ')':

instr++;

return ')';

case '+':

instr++;

return '+';

case '-':

instr++;

return '-';

case '*':

instr++;

return '*';

case '/':

if( instr[1] == '/' )...{ // ‘//’形式的注释

instr += 2;

while( *(instr) != 10 && *(instr) != 0 )

instr ++;

// instr ++;

}else...{ // 除号'/'

instr++;

return '/';

}

break;

case '=':

if( instr[1] == '=') ...{instr+=2; return EQUAL; }

else ...{instr++; return '=';}

break;

case '<':

if( instr[1] == '=') ...{instr+=2; return LESSEQUAL; }

else ...{instr++; return '<';}

break;

case '>':

if( instr[1] == '=') ...{instr+=2; return GREATEQUAL; }

else ...{instr++; return '>';}

break;

case '!':

if( instr[1] == '=') ...{instr+=2; return NOTEQUAL; }

else ...{instr++; return '!';}

break;

case '&':

if( instr[1] == '&') ...{instr+=2; return ARGAND; }

if( instr[1] == '&' && (isid(instr[2]) || isspace(instr[2])) )...{ instr++; return '&'; }

error(instr);

break;

case '|':

if( instr[1] == '|') ...{instr+=2; return ARGAND; }

if( instr[1] == '|' && (isid(instr[2]) || isspace(instr[2])) )...{ instr++; return '|'; }

error(instr);

break;

case ';':

instr++;

return ';';

case ' ':

//printf("new line (%d) ",getline());

nextline();

instr++;

start_pos_of_current_line = (int)instr;

break;

default:

instr++;

break;

id_label:

while( isid(*instr) )

instr++;

strncpy(string,strstart,instr-strstart);

string[instr-strstart]=0;

return ID;

num_label:

while( isdigit(*instr) )

instr++;

// if(isalpha(*(instr+1))

// error(instr); 让语法分析来做吧~

strncpy(string,strstart,instr-strstart);

string[instr-strstart]=0;

return NUM;

}

int main( int argc, char * argv[])

... {

if(argc <= 1 || argc >=4) ...{

printf("Usage:>LexAly [g|s] [filename] ");

exit(0);

}

if(argc == 3 )...{

argv[1][0] == 'g'? main_grammar(argv[2]): main_spell(argv[2]);

}else if(argc == 2)...{

argv[1][0] == 'g'? main_grammar("source2.txt") : main_spell("source2.txt");

;

}

return 0;

}

// grammar.cpp :C子集语法分析程序

/**/ /*

C语言子集，可支持

语句块，语句，条件语句，While循环语句，赋值语句，基本算术表达式等。例如：

{

// Comment Supported : This is only a Test ^_^

X = 12 + b / 13 - 5 * 4 * 9; // A AssignmentStatement

Y = 4 * 1024;

if( X >= Y){

if( i == i){ // This is nested if Statement

X=Y;

}

;;;; // This is Null Statement

while( X < Y){ // This is while Statement

X = X +1;

}

规则如下：

<StatementBlock> ::= '{'<StatementSequence>'}'

<StatementSequence> ::= {<NullStatement>|<CommonStatement>|<VariantStatement>}

<NullStatement> ::= ';'

<LoopWhileStatement> ::= while(<Condition> <StatementBlock>

<Expression> ::= <Item>{+<Item>|-<Item>}

<Item> ::= <Factor>{*<Factor>|/<Factor>}

<Factor> ::= ID|NUM|(<Expression>)

<RelationOperator> ::= <|<=|>=|>|==|!=

#include " stdafx.h "

#include < ctype.h >

#include < conio.h >

#include < stdlib.h >

#include < string.h >

#include " Constant.h "

extern int gettoken();

extern int getcurrentpos();

extern char * string;

extern int sym;

extern int current_line;

extern int start_pos_of_current_line;

extern char * getlinestring( int line, char * in_buf);

extern char input[];

// 非终结符的英文定义

void StatementBlock(); // 语句块

void StatementSequence(); // 语句串

// XxxxxStatement() 为三类语句

void NullStatement(); // 空语句--仅仅含有一个;号

void CommonStatement(); // 语句

void VariantStatement(); // 变种语句--包括 if(){},while{},他们都不以;结尾

// 下面的属于CommonStatement

void AssignmentStatement(); // 赋值语句

// 下面两种属于VariantStatement

void ConditionStatement(); // 条件语句

void LoopWhileStatement(); // while循环语句

void Condition(); // 条件

void Expression(); // 表达式

void Item(); // 项

void Factor(); // 因子

void RelationOperator(); // 关系运算符

/**/ /*

注：以上未考虑函数调用表达式

void match_error( char * c)

... {

char error_buf[1024];

int in_line_pos = getcurrentpos()-start_pos_of_current_line;

printf("Grammar Error! ");

printf("Line %d[%d] : %s expected ",current_line,in_line_pos,c);

//获取错误行并打印出来

getlinestring(current_line,error_buf);

printf("%s ",error_buf);

//输出错误指示点(Not Exact!)

for(int i=1; i< in_line_pos; i++)

printf("%c",'^');

printf(" ");

exit(0);

}

// expecetedSym 期望符号

// msg 出错时给出的消息

void match( int expecetedSym, char * msg)

... {

if(expecetedSym != sym)...{

#if 0

if(sym < 0x20 )

printf(" Fail: ExpecetedSym=%d sym=%d ",expecetedSym,sym);

else

printf(" Fail: ExpecetedSym=%d sym=%c ",expecetedSym,sym);

#endif

match_error(msg);

}

gettoken(); //预读一个符号

}

void grammar_check()

... {

// printf("%s",input);

gettoken(); //开始检查，填充预读区

// match('-',"DK");

StatementBlock();

if(sym != END) match(END,"End Of File");

}

void StatementBlock() // 语句块

... {

match('{',"{"); //和预读符号比较

StatementSequence();

match('}',"}");

}

void StatementSequence() // 语句串

... {

while(sym == ID ||

sym == IF ||

sym == WHILE||

sym == ';')

...{

while(sym == ID) //也可以用if(),但从统计角度看,while效率会略高。

//因为一般普通CommonStatement()出现概率较大

...{

CommonStatement();

match(';',";");

}

while(sym == IF ||

sym == WHILE)

...{

VariantStatement();

}

while(sym == ';')

...{

NullStatement();

}

void VariantStatement() // 变种语句--包括 if(){},while{},他们都不以;结尾

... {

switch(sym)...{ //若sym与下面两种均不匹配，什么也不做，空语句是也~

case IF:

ConditionStatement(); //条件语句

break;

case WHILE:

LoopWhileStatement();

break;

}

return;

}

void NullStatement() // 空语句--仅仅含有一个;号

... {

match(';',";");

}

void CommonStatement() // 语句,以;结尾,但不以;开头

... {

switch(sym)...{ //若sym与下面任何一种均不匹配，什么也不做，空语句是也~

case ID:

AssignmentStatement();

break;

}

return;

}

void AssignmentStatement() // 赋值语句

... {

match(ID,"ID");

match('=',"=");

Expression();

}

void ConditionStatement() // 条件语句

... {

match(IF,"if");

match('(',"(");

Condition();

match(')',")");

StatementBlock();

}

void LoopWhileStatement() // 循环语句

... {

match(WHILE,"while");

match('(',"(");

Condition();

match(')',")");

StatementBlock();

}

void Condition() // 条件

... {

Expression();

RelationOperator();

Expression();

}

// <Expression> ::= <Item>{+<Item>|-<Item>}

void Expression() // 表达式

... {

Item();

while(true)...{ // {+<Item>|-<Item>} 可以有多个

switch(sym)...{

case '+':

match('+',"+");

Item();

break;

case '-':

match('-',"-");

Item();

break;

default:

return;

}

return;

}

// <Item> ::= <Factor>{*<Factor>|/<Factor>}

void Item() // 项

... {

Factor();

while(true)...{ // {*<Factor>|/<Factor>} -- 可以有多个

switch(sym)...{

case '*':

match('*',"*");

Factor();

break;

case '/':

match('/',"/");

Factor();

break;

default:

return;

}

return;

}

// <Factor> ::= ID|NUM|(<Expression>)

void Factor() // 因子

... {

switch(sym)...{

case ID:

match(ID,"ID");

break;

case NUM:

match(NUM,"NUM");

break;

case '(':

match('(',"(");

Expression();

match(')',")");

break;

default:

match(ID,"A factor "); // ID在这里肯定不match，利用它来报错（找不到因子）

break;

}

// <RelationOperator> ::= <|<=|>=|>|==|!=

void RelationOperator() // 关系运算符

... {

switch(sym)...{

case '<':

match('<',"<");

break;

case '>':

match('>',">");

break;

case LESSEQUAL:

match(LESSEQUAL,"<=");

break;

case GREATEQUAL:

match(GREATEQUAL,">=");

break;

case EQUAL:

match(EQUAL,"==");

break;

case NOTEQUAL:

match(NOTEQUAL,"!=");

break;

}

核心：规则----

<StatementBlock> ::= '{'<StatementSequence>'}'
<StatementSequence> ::= {<NullStatement>|<CommonStatement>|<VariantStatement>}

<AssignmentStatement> ::= ID=<Expression>
<ConditionStatement> ::= if(<Condition> <StatementBlock>
<LoopWhileStatement> ::= while(<Condition> <StatementBlock>

一个简单的C语言词法分析与语法分析器

热门文章

最新文章

相关课程

相关电子书

热门

活动广场

任务中心

开发者评测

高校计划

乘风者计划

训练营

阿里云MVP

话题

直播

下载

镜像站

技术资料

插件

一个简单的C语言词法分析与语法分析器

热门文章

最新文章

相关课程

相关电子书