lex bison

218 阅读 0 评论 144 点赞

我是靠谱客的博主飘逸春天，这篇文章主要介绍lex bison，现在分享给大家，希望可以做个参考。

还是人家的例子，自己亲手输入，虽然这样麻烦了不少，但是一字一句的输入对建立一个编译器的过程有了最
为生动的理解。我们的目标就是建立一个这样的程序，可以运行如下的代码：
a = 1;
b = 6 - 2 * (3 - 1);
if(a > b){
print a;
}
else
{
print b;
}
// 斐波那契数列
c = 1;
d = 1;
e = 1;
while(a < 10){
f = d;
//临时变量
d = d + e;
e = f; a = a + 1;
//增加计数
print d;
//打印斐波那契数列
}
语法有些熟悉吧！虽然这个脚本还只支持整数的运算，只有简单的流程控制（已经包含了程序设计的所有必要
流成了），一个简单的打印输出语句。

言归正传，看看这个程序有那些部分组成呢？
? calc.h 语法分析的结构声明
? calc.lex 词法分析文件
? calc.y 语法分析文件
? ex_parser.c 解释方式运行程序的引擎（还有编译为汇编和输出语法树方式的输出，这里没有列出）

________________________________________
下面还是让代码直接说话吧！
-- calc.h --
/* * 简单计算器分析的节点类型定义 */
typedef enum{ typeCon, typeId, typeOpr}nodeEnum;
/* 常量 */
typedef struct{
int value;
}conNodeType;
/* 变量 */
typedef struct{
int i; /* 指向变量符号表的序号 */
}idNodeType;
/* 运算符 */
typedef struct{
int oper; /* 运算符*/
int nops; /* 算子个数 */
struct nodeTypeTag *op[1]; /* 可扩展的算子数组 */
}oprNodeType;
typedef struct nodeTypeTag{
nodeEnum type; /* 节点的类型 */
/* 各种节点公用的联合体*/
union{
conNodeType con;
idNodeType id;
oprNodeType opr;
};
}nodeType;
extern int sym[26];
--calc.lex-- 是一个相当简单的文件，这里支持的关键词比较少。
%{
/*
* 简单计算器的词法分析程序。
*/
#include <stdlib.h>
#include "calc.h"
#include "calc.tab.h"
void yyerror(char*);
%}
%%
[a-z] {
yylval.sIndex = *yytext - 'a';
return VARIABLE;
}
[0-9]+ {
yylval.iValue = atoi(yytext);
return INTEGER;
}
[-()<>=+*/;{}.] return *yytext;
">=" return GE;
"<=" return LE;
"==" return EQ;
"!=" return NE;
"while" return WHILE;
"if" return IF;
"else" return ELSE;
"print" return PRINT;
[ /t/n] /* 忽略空白 */
.* /* 忽略注释 */
. yyerror("未知字符");
%%
int yywrap(void){
return 1;
}
--calc.y-- 这里是构建语法的核心，如果想自己构建一个编译器什么的，还是首先弄明白自己的语法怎么
使用BNF表达式来表示吧，写成Yacc格式的文件反而不是那么复杂的事情。
%{
/*
*----------------------------.
*| 简单计算程序语法分析程序 |
*`---------------------------/
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "calc.h"
/*
* 类型定义
*/
nodeType* opr(int oper, int nops, ...);
nodeType* id(int i);
nodeType* con(int value);
void freeNode(nodeType* p);
int ex(nodeType* p);
int yylex(void);
void yyerror(char*s);
int sym[26]; /* 变量符号表 */
%}
%union {
int iValue; /*整数类型值*/
char sIndex; /* 符号表的序号*/
nodeType* nPtr; /* 节点的指针 */
};
%token <ivalue> INTEGER
%token <sindex> VARIABLE
%token WHILE IF PRINT
%nonassoc IFX
%nonassoc ELSE
%left GE LE EQ NE '>' '<'
%left '+' '-'
%nonassoc UMINUS
%type <nptr> stmt expr stmt_list
%%
program:
       function {exit(0);}
       ;
function:
function stmt {ex($2); freeNode($2);}
| /*NULL*/
;
stmt:
    ';'   {$ = opr(';', 2, NULL, NULL);}
    |expr ';' {$ = $1;}
    |PRINT expr ';' {$ = opr(PRINT, 1, $2);}
    |VARIABLE '=' expr ';' {$ = opr('=', 2, id($1), $3);}
    |WHILE'('expr')'stmt{$ = opr(WHILE, 2, $3, $5);}
    |IF'('expr')'stmt %prec IFX {$ = opr(IF, 2, $3, $5);}
    |IF'('expr')'stmt ELSE stmt {$ = opr(IF, 3, $3, $5, $7);}
    |'{'stmt_list'}' {$ = $2;}
    ;
stmt_list:
stmt {$ = $1;}
|stmt_list stmt{$ = opr(';', 2, $1, $2);}
;
expr:
    INTEGER {$ = con($1);}
    |VARIABLE {$ = id($1);}
    |'-' expr %prec UMINUS {$ = opr(UMINUS, 1, $2);}
    |expr '+' expr {$ = opr('+', 2, $1, $3);}
    |expr '-' expr {$ = opr('-', 2, $1, $3);}
    |expr '*' expr {$ = opr('*', 2, $1, $3);}
    |expr '/' expr {$ = opr('/', 2, $1, $3);}
    |expr '<' expr {$ = opr('<', 2, $1, $3);}
    |expr '>' expr {$ = opr('>', 2, $1, $3);}
    |expr GE expr {$ = opr(GE, 2, $1, $3);}
    |expr LE expr {$ = opr(LE, 2, $1, $3);}
    |expr NE expr {$ = opr(NE, 2, $1, $3);}
    |expr EQ expr {$ = opr(EQ, 2, $1, $3);}
    |'('expr')' {$ = $2;}
    ;
%%
#define SIZEOF_NODETYPE ((char*)&p->con - (char*)p)
nodeType* con(int value){
nodeType* p;
size_t nodeSize;
/* allocate node */
nodeSize = SIZEOF_NODETYPE + sizeof(conNodeType);
if ((p = malloc(nodeSize)) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeCon;
p->con.value = value;
return p;
}
nodeType* id(int i){
nodeType* p;
size_t nodeSize;
nodeSize = SIZEOF_NODETYPE + sizeof(idNodeType);
if((p = malloc(nodeSize)) == NULL)
yyerror("out of memory");

p->type = typeId;
p->id.i = i;
return p;
}
nodeType* opr(int oper, int nops, ...){
va_list ap;
nodeType* p;
size_t nodeSize;
int i;
nodeSize = SIZEOF_NODETYPE + sizeof(oprNodeType) +
(nops - 1) * sizeof(nodeType*);
if((p = malloc(nodeSize)) == NULL)
yyerror("out memory");

p->type = typeOpr;
p->opr.oper = oper;
p->opr.nops = nops;
va_start(ap, nops);
for(i = 0;i < nops;i ++){
p->opr.op[i] = va_arg(ap, nodeType*);
}
va_end(ap);
return p;
}
void freeNode(nodeType* p){
int i;
if(!p){
return ;
}
if(p->type == typeOpr){
for(i = 0;i < p->opr.nops;i++){
   freeNode(p->opr.op[i]);
}
}
free(p);
}
void yyerror(char*s){
fprintf(stderr, "Error: %s /n", s);
}
int main(void){
yyparse();
return 0;
}

--ex_parser.c-- 包含了ex()函数的一个版本：如何解释执行脚本。这是一个以递归方式执行的函数，
一次单独的调用返回可以看作是一个独立的语法上下文。
/*
* 这是一个使用解释方式运行程序的编译处理ex(nodeType*)函数版本。
*/
#include <stdio.h>
#include "calc.h"
#include "calc.tab.h"
int ex(nodeType* p){
if(!p){
return 0;
}
switch(p->type){
case typeCon:
   return p->con.value;
case typeId:
   return sym[p->id.i];
case typeOpr:
   switch(p->opr.oper){
    case WHILE:
     while(ex(p->opr.op[0]))
      ex(p->opr.op[1]);
     return 0;
    case IF:
     if(ex(p->opr.op[0]))
      ex(p->opr.op[1]);
     else if(p->opr.nops > 2)
      ex(p->opr.op[2]);
     return 0;
    case PRINT:
     printf("%d/n",ex(p->opr.op[0]));
     return 0;
    case ';':
     ex(p->opr.op[0]);
     return ex(p->opr.op[1]);
    case '=':
     return sym[p->opr.op[0]->id.i] = ex(p->opr.op[1]);
    case UMINUS:
     return -ex(p->opr.op[0]);
    case '+':
     return ex(p->opr.op[0]) + ex(p->opr.op[1]);
    case '-':
     return ex(p->opr.op[0]) - ex(p->opr.op[1]);
    case '*':
     return ex(p->opr.op[0]) * ex(p->opr.op[1]);
    case '/':
     return ex(p->opr.op[0]) / ex(p->opr.op[1]);
    case '<':
     return ex(p->opr.op[0]) < ex(p->opr.op[1]);
    case '>':
     return ex(p->opr.op[0]) > ex(p->opr.op[1]);
    case GE:
     return ex(p->opr.op[0]) >= ex(p->opr.op[1]);
    case LE:
     return ex(p->opr.op[0]) <= ex(p->opr.op[1]);
    case NE:
     return ex(p->opr.op[0]) != ex(p->opr.op[1]);
    case EQ:
     return ex(p->opr.op[0]) == ex(p->opr.op[1]);
   }
}
return 0;
}
如何编译呢？也是非常的简单：
>bison -d calc.y     (生成calc.tab.c和calc.tab.h两个文件)
>flex calc.lex         (生成yy.lex.c这个文件)
>gcc -g -Wall calc.tab.c yy.lex.c -o calc
如果将最上面提到的那个脚本存为 test.txt那么执行它就可以用：
>cat test.txt | ./calc
那么你就可以看到出现一排数字（每行一个），从第二行开始到结束就是那个斐波那契数列啦！！
转自:http://blog.chinaunix.net/u/30708/showart_434574.html