词法分析
可识别内容:
标识符:id
数字:num
关键字:int,char,if,else,while,do,for
标号:, , . , ;
算术运算符号:=,+,-,*,/,&,!,|,&&,||
关系运算符:<,<=,>=,>,==,!=
注释://
内码定义:
单个符号,如{,+,*,> 等,均使用其ascii码做内码,占双或多个字节的符号(包括保留字,标号,数字,运算符等)为其取名如下:
Enum { END=0,INT,CHAR,IF,ELSE,WHILE=5,
DO,FOR,ARGAND,ARGOR,NUM=10,
ID,LESSEQUAL,EQUAL,GREATEQUAL,NOTEQUAL=15 };
其中NUM代表数字,ID代表标识符.
测试程序1-1的词法分析结果如下:
内码表
123 { 11 X 61 = 10 12 43 + 43 + 11 b 47 / 10 13 45 - 10 5 42 * 10 4 42 * 10 9 59 ; 11 Y 61 = |
10 4 42 * 10 1024 59 ; 3 if 40 ( 11 X 14 >= 11 Y 41 ) 123 { 3 if 40 ( 11 i 13 == 11 i |
41 ) 123 { 11 X 61 = 11 Y 125 } 125 } 59 ; 59 ; 59 ; 59 ; 5 while 40 ( 11 X 60 <
|
11 Y 41 ) 123 { 11 X 61 = 11 X 43 + 10 1 59 ; 125 } 125 } |
语法分析
C语言子集,可支持
语句块,语句,条件语句,While循环语句,赋值语句,基本算术表达式等。例如:
{
// Comment Supported : This is only a Test ^_^
X = 12 + b / 13 - 5 * 4 * 9; // A AssignmentStatement
Y = 4 * 1024;
if( X >= Y){
if( i == i){ // This is nested if Statement
X=Y;
}
}
;;;; // This is Null Statement
while( X < Y){ // This is while Statement
X = X +1;
}
}
测试程序1-1
支持错误检测,如将上面例子中X = 12 + b / 13 - 5 * 4 * 9;
故意修改为:X = 12 ++ b / 13 - 5 * 4 * 9; 则会出现如下错误提示,指示了出错行数和行内偏移位置:
规则如下:
<StatementBlock> ::= '{'<StatementSequence>'}'
<StatementSequence> ::= {<NullStatement>|<CommonStatement>|<VariantStatement>}
<NullStatement> ::= ';'
<CommonStatement> ::= <AssignmentStatement>
<VariantStatement> ::= <ConditionStatement>| <LoopWhileStatement>
<AssignmentStatement> ::= ID=<Expression>
<ConditionStatement> ::= if(<Condition> <StatementBlock>
<LoopWhileStatement> ::= while(<Condition> <StatementBlock>
<Condition> ::= <Expression><RelationOperator><Expression>
<Expression> ::= <Item>{+<Item>|-<Item>}
<Item> ::= <Factor>{*<Factor>|/<Factor>}
<Factor> ::= ID|NUM|(<Expression>)
<RelationOperator> ::= <|<=|>=|>|==|!=
//非终结符的英文定义
void StatementBlock(); //语句块
void StatementSequence(); //语句串
// XxxxxStatement() 为三类语句
void NullStatement(); //空语句--仅仅含有一个;号
void CommonStatement(); //语句
void VariantStatement(); //变种语句--包括 if(){},while{},他们都不以;结尾
// 下面的属于CommonStatement
void AssignmentStatement(); //赋值语句
// 下面两种属于VariantStatement
void ConditionStatement(); //条件语句
void LoopWhileStatement(); //while循环语句
void Condition(); //条件
void Expression(); //表达式
void Item(); //项
void Factor(); //因子
void RelationOperator(); //关系运算符
不能支持的主要方面:函数调用的识别,逗号表达式,for循环,switch语句。
词法分析:
//
LexAly.cpp :C子集词法分析程序

/**/
/*
支持内容:
标识符:id
关键字: int,char,if,else,while,do,for
标号: ,, ., ;
算术运算符号: =,+,-,&,!,|,&&,||

全局字符串:
instr 记录待解析的字符串
string 存贮当前被解析到的ID

接口:
gettoken();



Sample:
输入:
instr =
for(i=0;i<10;i++){
j=i+10;
printf("%d",j);
}
输出:
for
(
i
……
}


注意:
要记得处理程序中未显示出来的符号,如空白(' '),回车(' '),值表符(' ')
*/

#include
"
stdafx.h
"
#include
<
ctype.h
>
#include
<
stdlib.h
>
#include
<
string.h
>
#include
"
Constant.h
"

extern
void
grammar_check();


//
注意:这里累计数目(最大值)不能大于32 (10 0000B)TOKEN
//
enum {END=0,INT=1,CHAR,IF,ELSE,WHILE,DO,FOR,ARGAND,ARGOR,NUM,ID} ;

char
index[][
20
]
=
...
{

...{"END OF File"}, /**//* 0 END*/

...{"int"}, /**//* 1 INT*/

...{"char"}, /**//* 2 CHAR*/

...{"if"}, /**//* 3 IF*/

...{"else"}, /**//* 4 ELSE*/

...{"while"}, /**//* 5 WHILE*/

...{"do"}, /**//* 6 DO*/

...{"for"}, /**//* 7 FOR*/

...{"&&"}, /**//* 8 ARGAND*/

...{"||"}, /**//* 9 ARGOR*/

...{""}, /**//* 10 NUM */

...{""}, /**//* 11 ID */

...{"<="}, /**//* 12 LESSEQUAL */

...{"=="}, /**//* 13 EQUAL */

...{">="}, /**//* 14 GREATEQUAL */

...{"!="}, /**//* 15 NOTEQUAL */

...{""} /**//* 16 ID */
}
;


char
input[
10000
]
=
...
{0}
;

char
*
instr
=
input;
char
*
const
start_of_instr
=
input;

//
string 包含gettoken最新得到的id等串
//
gym包含 gettoken得到的内容的代号
//
current_line 包含当前行号
char
string[MAX_INDENT];
int
sym;
int
current_line
=
1
;
int
start_pos_of_current_line;

char
*
strstart;
//
用于辅助识别num,id
int
gettoken();
int
_gettoken();

void
error(
char
*
cur);
char
*
getlinestring(
int
line,
char
*
in_buf);
int
nextline();
int
getline();
int
getcurrentpos();



int
nextline()
...
{ return ++current_line; }

int
getline()
...
{ return current_line; }

int
getcurrentpos()
...
{ return (int)instr; }

char
*
getlinestring(
int
line,
char
*
in_buf)

...
{
char * t = input;
int i = 1;


while(*t != 0 && i < line )...{
if( *t==' ' ) i++;
t++;
}

int len = 0;

while ( *t != ' ' )...{
in_buf[len] = *t;
len++;
t++;
}
in_buf[len] = 0;
return in_buf;
}

void
error(
char
*
cur)

...
{
printf("Spell Error found at line %d ",getline());
exit(0);
}


//
语法分析
int
main_grammar(
char
*
filename)

...
{
int i;

FILE *f;

if(!(f=fopen(filename,"r")))...{
printf("Fail to open source file %s! ",filename);
exit(0);
}
int k=0;
char c;
while((c=fgetc(f))!=EOF)

...{
input[k]=c;
k++;
}
input[k] = 0;
//打印出程序
printf("%s ",start_of_instr);

//开始语法检查
grammar_check();

printf("Success! ");
return 0;
}

//
词法分析
int
main_spell(
char
*
filename)

...
{
int i;

FILE *f;

if(!(f=fopen(filename,"r")))...{
printf("Fail to open source file %s! ",filename);
exit(0);
}


int k=0;
char c;
while((c=fgetc(f))!=EOF)

...{
input[k]=c;
k++;
}

input[k] = 0;

printf("%s ",start_of_instr);


while((i=gettoken())!=END)

...{

if(i == ID)...{
printf("%d %s ",i,string);
continue;
}

if(i == NUM)...{
printf("%d %s ",i,string);
continue;
}


if(i<20)...{
printf("%d %s ",i,index[i]);

}else...{
printf("%d %c ",i,i);
}
}

return 0;
}

int
gettoken()

...
{
int i= (sym = _gettoken());

#if 0

if(i == ID)...{
printf("%s",string);
}

if(i == NUM)...{
printf("%s",string);
}


if(i<20)...{
printf("%s",index[i]);

}else...{
printf("%c",i);
}
#endif
return sym;
}


int
_gettoken()

...
{
char *cp = instr;


for(;;)...{
if( *instr == 0)
return END;

/**//*
if( 可能读入的字符 > 当前可用缓冲区大小 )
扩展缓冲区
*/
//int,char,if,else,while,do,for
switch ( *instr )

...{
case 'i':
if( instr[1] == 'f' && notda(instr[2]) )

...{
instr+=2;return IF;
}
if( instr[1] == 'n' && instr[2] == 't' && notda(instr[3]) )

...{
instr+=3; return INT;
}
// not a keyword. but an id.
strstart = instr;
instr++;
goto id_label;
case 'c':
if( instr[1] == 'h' && instr[2] == 'a' && instr[3] == 'r' && notda(instr[4]) )

...{instr+=4;return CHAR; }
strstart = instr;
instr++;
goto id_label;
break;
case 'e':
if( instr[1] == 'l' && instr[2] == 's' && instr[3] == 'e' && notda(instr[4]) )

...{instr+=4;return ELSE; }
strstart = instr;
instr++;
goto id_label;
break;
case 'w':
if( instr[1] == 'h' && instr[2] == 'i' && instr[3] == 'l' && instr[4] == 'e' && notda(instr[5]) )

...{instr+=5;return WHILE; }
strstart = instr;
instr++;
goto id_label;
case 'd':
if( instr[1] == 'o' && notda(instr[4]) )

...{instr+=2;return DO; }
strstart = instr;
instr++;
goto id_label;
case 'f':
if( instr[1] == 'o' && instr[2] == 'r' && notda(instr[3]) )

...{instr+=3;return FOR; }
strstart = instr;
instr++;
goto id_label;
// deal with IDs.
// EXCLUDE:i,c,d,e,w,f
case 'a': ; case 'b': ;
case 'g': ; case 'h': ;
case 'j': ; case 'k': ; case 'l': ;
case 'm': ; case 'n': ; case 'o': ;
case 'p': ; case 'q': ; case 'r': ;
case 's': ; case 't': ; case 'u': ;
case 'v': ; case 'x': ;
case 'y': ; case 'z': ;
case 'A': ; case 'B': ;
case 'C': ; case 'D': ; case 'E': ;
case 'F': ; case 'G': ; case 'H': ;
case 'I': ; case 'J': ; case 'K': ;
case 'L': ; case 'M': ; case 'N': ;
case 'O': ; case 'P': ; case 'Q': ;
case 'R': ; case 'S': ; case 'T': ;
case 'U': ; case 'V': ; case 'W': ;
case 'X': ; case 'Y': ; case 'Z': ;
strstart = instr;
instr++;
goto id_label;

case '0': ;
case '1': ; case '2': ; case '3': ;
case '4': ; case '5': ; case '6': ;
case '7': ; case '8': ; case '9': ;
strstart = instr;
instr++;
goto num_label;

case '{':
instr++;
return '{';
case '}':
instr++;
return '}';
case '(':
instr++;
return '(';
case ')':
instr++;
return ')';
case '+':
instr++;
return '+';
case '-':
instr++;
return '-';
case '*':
instr++;
return '*';
case '/':

if( instr[1] == '/' )...{ // ‘//’形式的注释
instr += 2;
while( *(instr) != 10 && *(instr) != 0 )
instr ++;
// instr ++;

}else...{ // 除号'/'
instr++;
return '/';
}
break;
case '=':

if( instr[1] == '=') ...{instr+=2; return EQUAL; }

else ...{instr++; return '=';}
break;
case '<':

if( instr[1] == '=') ...{instr+=2; return LESSEQUAL; }

else ...{instr++; return '<';}
break;
case '>':

if( instr[1] == '=') ...{instr+=2; return GREATEQUAL; }

else ...{instr++; return '>';}
break;
case '!':

if( instr[1] == '=') ...{instr+=2; return NOTEQUAL; }

else ...{instr++; return '!';}
break;

case '&':

if( instr[1] == '&') ...{instr+=2; return ARGAND; }

if( instr[1] == '&' && (isid(instr[2]) || isspace(instr[2])) )...{ instr++; return '&'; }
error(instr);
break;

case '|':

if( instr[1] == '|') ...{instr+=2; return ARGAND; }

if( instr[1] == '|' && (isid(instr[2]) || isspace(instr[2])) )...{ instr++; return '|'; }
error(instr);
break;

case ';':
instr++;
return ';';
case ' ':
//printf("new line (%d) ",getline());
nextline();
instr++;
start_pos_of_current_line = (int)instr;
break;
default:
instr++;
break;
id_label:
while( isid(*instr) )
instr++;
strncpy(string,strstart,instr-strstart);
string[instr-strstart]=0;
return ID;
num_label:
while( isdigit(*instr) )
instr++;

// if(isalpha(*(instr+1))
// error(instr); 让语法分析来做吧~

strncpy(string,strstart,instr-strstart);
string[instr-strstart]=0;
return NUM;
}



}
}


int
main(
int
argc,
char
*
argv[])

...
{

if(argc <= 1 || argc >=4) ...{
printf("Usage:>LexAly [g|s] [filename] ");
exit(0);
}


if(argc == 3 )...{
argv[1][0] == 'g'? main_grammar(argv[2]): main_spell(argv[2]);

}else if(argc == 2)...{
argv[1][0] == 'g'? main_grammar("source2.txt") : main_spell("source2.txt");
;
}
return 0;
}
//
grammar.cpp :C子集语法分析程序

/**/
/*
C语言子集,可支持
语句块,语句,条件语句,While循环语句,赋值语句,基本算术表达式等。例如:
{
// Comment Supported : This is only a Test ^_^

X = 12 + b / 13 - 5 * 4 * 9; // A AssignmentStatement
Y = 4 * 1024;

if( X >= Y){
if( i == i){ // This is nested if Statement
X=Y;
}
}

;;;; // This is Null Statement

while( X < Y){ // This is while Statement
X = X +1;
}
}

规则如下:

<StatementBlock> ::= '{'<StatementSequence>'}'
<StatementSequence> ::= {<NullStatement>|<CommonStatement>|<VariantStatement>}

<NullStatement> ::= ';'
<CommonStatement> ::= <AssignmentStatement>
<VariantStatement> ::= <ConditionStatement>| <LoopWhileStatement>

<AssignmentStatement> ::= ID=<Expression>
<ConditionStatement> ::= if(<Condition> <StatementBlock>
<LoopWhileStatement> ::= while(<Condition> <StatementBlock>

<Condition> ::= <Expression><RelationOperator><Expression>
<Expression> ::= <Item>{+<Item>|-<Item>}
<Item> ::= <Factor>{*<Factor>|/<Factor>}
<Factor> ::= ID|NUM|(<Expression>)
<RelationOperator> ::= <|<=|>=|>|==|!=

*/



#include
"
stdafx.h
"
#include
<
ctype.h
>
#include
<
conio.h
>
#include
<
stdlib.h
>
#include
<
string.h
>

#include
"
Constant.h
"

extern
int
gettoken();
extern
int
getcurrentpos();
extern
char
*
string;
extern
int
sym;
extern
int
current_line;
extern
int
start_pos_of_current_line;
extern
char
*
getlinestring(
int
line,
char
*
in_buf);
extern
char
input[];

//
非终结符的英文定义
void
StatementBlock();
//
语句块
void
StatementSequence();
//
语句串

//
XxxxxStatement() 为三类语句
void
NullStatement();
//
空语句--仅仅含有一个;号
void
CommonStatement();
//
语句
void
VariantStatement();
//
变种语句--包括 if(){},while{},他们都不以;结尾

//
下面的属于CommonStatement
void
AssignmentStatement();
//
赋值语句

//
下面两种属于VariantStatement
void
ConditionStatement();
//
条件语句
void
LoopWhileStatement();
//
while循环语句
void
Condition();
//
条件
void
Expression();
//
表达式
void
Item();
//
项
void
Factor();
//
因子
void
RelationOperator();
//
关系运算符

/**/
/*
注:以上未考虑函数调用表达式
*/


void
match_error(
char
*
c)

...
{
char error_buf[1024];
int in_line_pos = getcurrentpos()-start_pos_of_current_line;

printf("Grammar Error! ");
printf("Line %d[%d] : %s expected ",current_line,in_line_pos,c);
//获取错误行并打印出来
getlinestring(current_line,error_buf);
printf("%s ",error_buf);
//输出错误指示点(Not Exact!)
for(int i=1; i< in_line_pos; i++)
printf("%c",'^');
printf(" ");

exit(0);
}

//
expecetedSym 期望符号
//
msg 出错时给出的消息
void
match(
int
expecetedSym,
char
*
msg)

...
{

if(expecetedSym != sym)...{
#if 0
if(sym < 0x20 )
printf(" Fail: ExpecetedSym=%d sym=%d ",expecetedSym,sym);
else
printf(" Fail: ExpecetedSym=%d sym=%c ",expecetedSym,sym);
#endif
match_error(msg);
}
gettoken(); //预读一个符号

}
void
grammar_check()

...
{

// printf("%s",input);
gettoken(); //开始检查,填充预读区
// match('-',"DK");
StatementBlock();
if(sym != END) match(END,"End Of File");
}

void
StatementBlock()
//
语句块

...
{
match('{',"{"); //和预读符号比较
StatementSequence();
match('}',"}");
}

void
StatementSequence()
//
语句串

...
{

while(sym == ID ||
sym == IF ||
sym == WHILE||
sym == ';')

...{
while(sym == ID) //也可以用if(),但从统计角度看,while效率会略高。
//因为一般普通CommonStatement()出现概率较大

...{
CommonStatement();
match(';',";");
}
while(sym == IF ||
sym == WHILE)

...{
VariantStatement();
}
while(sym == ';')

...{
NullStatement();
}
}
}

void
VariantStatement()
//
变种语句--包括 if(){},while{},他们都不以;结尾

...
{

switch(sym)...{ //若sym与下面两种均不匹配,什么也不做,空语句是也~
case IF:
ConditionStatement(); //条件语句
break;
case WHILE:
LoopWhileStatement();
break;
}
return;
}

void
NullStatement()
//
空语句--仅仅含有一个;号

...
{
match(';',";");
}

void
CommonStatement()
//
语句,以;结尾,但不以;开头

...
{

switch(sym)...{ //若sym与下面任何一种均不匹配,什么也不做,空语句是也~
case ID:
AssignmentStatement();
break;
}
return;
}

void
AssignmentStatement()
//
赋值语句

...
{
match(ID,"ID");
match('=',"=");
Expression();
}

void
ConditionStatement()
//
条件语句

...
{
match(IF,"if");
match('(',"(");
Condition();
match(')',")");
StatementBlock();
}

void
LoopWhileStatement()
//
循环语句

...
{
match(WHILE,"while");
match('(',"(");
Condition();
match(')',")");
StatementBlock();
}

void
Condition()
//
条件

...
{
Expression();
RelationOperator();
Expression();
}

//
<Expression> ::= <Item>{+<Item>|-<Item>}
void
Expression()
//
表达式

...
{
Item();

while(true)...{ // {+<Item>|-<Item>} 可以有多个

switch(sym)...{
case '+':
match('+',"+");
Item();
break;
case '-':
match('-',"-");
Item();
break;
default:
return;
}
}
return;

}
//
<Item> ::= <Factor>{*<Factor>|/<Factor>}
void
Item()
//
项

...
{
Factor();

while(true)...{ // {*<Factor>|/<Factor>} -- 可以有多个

switch(sym)...{
case '*':
match('*',"*");
Factor();
break;
case '/':
match('/',"/");
Factor();
break;
default:
return;
}
}
return;
}
//
<Factor> ::= ID|NUM|(<Expression>)
void
Factor()
//
因子

...
{

switch(sym)...{
case ID:
match(ID,"ID");
break;
case NUM:
match(NUM,"NUM");
break;
case '(':
match('(',"(");
Expression();
match(')',")");
break;
default:
match(ID,"A factor "); // ID在这里肯定不match,利用它来报错(找不到因子)
break;
}
}
//
<RelationOperator> ::= <|<=|>=|>|==|!=
void
RelationOperator()
//
关系运算符

...
{

switch(sym)...{
case '<':
match('<',"<");
break;
case '>':
match('>',">");
break;
case LESSEQUAL:
match(LESSEQUAL,"<=");
break;
case GREATEQUAL:
match(GREATEQUAL,">=");
break;
case EQUAL:
match(EQUAL,"==");
break;
case NOTEQUAL:
match(NOTEQUAL,"!=");
break;
}
}
核心:规则----
<StatementBlock> ::= '{'<StatementSequence>'}'
<StatementSequence> ::= {<NullStatement>|<CommonStatement>|<VariantStatement>}
<NullStatement> ::= ';'
<CommonStatement> ::= <AssignmentStatement>
<VariantStatement> ::= <ConditionStatement>| <LoopWhileStatement>
<AssignmentStatement> ::= ID=<Expression>
<ConditionStatement> ::= if(<Condition> <StatementBlock>
<LoopWhileStatement> ::= while(<Condition> <StatementBlock>
<Condition> ::= <Expression><RelationOperator><Expression>
<Expression> ::= <Item>{+<Item>|-<Item>}
<Item> ::= <Factor>{*<Factor>|/<Factor>}
<Factor> ::= ID|NUM|(<Expression>)
<RelationOperator> ::= <|<=|>=|>|==|!=