I've tried to make a mini c compiler with the help of lex and yacc
Below is the yacc/bison code:
%{
#include <stdio.h>
#include <stdlib.h>
extern FILE *fp;
extern int line;
extern int yylex();
int yyerror(char *);
%}
%token FOR WHILE IF ELSE PRINTF NUM ID STR ROP LOG INC RETURN STRUCT INT FLOAT CHAR DOUBLE VOID LT GT LE GE NE EQ
%right '='
%left LOG
%left AND OR
%left LT GT LE GE NE EQ
%left '+' '-'
%left '*' '/' '%'
%left INC
%nonassoc XYZ
%nonassoc ELSE
%%
start: Function start
| Declaration start
|
;
Declaration: Type Ass ';'
| Ass ';'
| Type Arr ';'
| Arr ';'
| StructStmt ';'
| FunctionCall ';'
;
Ass: ID1 '=' Ass
| ID1 '=' FunctionCall
| ID1 '=' Arr
| Arr '=' Ass
| ID1 ',' Ass
| NUM ',' Ass
| ID1 '+' Ass
| ID1 '-' Ass
| ID1 '*' Ass
| ID1 '/' Ass
| ID1 '%' Ass
| NUM '+' Ass
| NUM '-' Ass
| NUM '*' Ass
| NUM '/' Ass
| NUM '%' Ass
| RETURN Ass
| '\'' Ass '\''
| '\"' Ass '\"'
| '(' Ass ')'
| '-' ID1
| '-' NUM
| INC ID1
| ID1 INC
| INC NUM
| NUM INC
| ID1
| NUM
;
ID1: ID
;
FunctionCall: ID1 '(' Ass ')'
| ID1 '(' ')'
;
Function: Type ID1 '(' ArgListOpt ')' Stmt
;
ArgListOpt: ArgList Arg
|
;
ArgList: ArgList ',' Arg
| Arg
;
Arg: Type ID1
;
CompoundStmt: '{' StmtList '}'
;
StmtList: StmtList Stmt
|
;
Stmt: WhileStmt
| ForStmt
| IfStmt
| CompoundStmt
| Declaration
| PrintFunc
| ';'
;
PrintFunc: PRINTF '(' STR ',' Ass ')' ';'
| PRINTF '(' STR ')' ';'
;
WhileStmt: WHILE '(' Expr ')' Stmt
;
ForStmt: FOR '(' Expr1 ';' Expr1 ';' Expr1 ')' Stmt
;
Expr1: Expr
|
;
IfStmt: IF '(' Expr ')' Stmt %prec XYZ
| IF '(' Expr ')' Stmt ELSE Stmt
;
StructStmt: STRUCT ID1 '{' Type Ass '}'
;
Expr: Expr LE Expr
| Expr GE Expr
| Expr NE Expr
| Expr EQ Expr
| Expr GT Expr
| Expr LT Expr
| Expr LOG Expr
| Ass
| Arr
;
Arr: Type ID1 '[' Ass ']'
;
Type: INT
| FLOAT
| CHAR
| DOUBLE
| VOID
;
%%
#include <ctype.h>
#include "lex.yy.c"
int main(int argc, char *argv[])
{
yyin = fopen("tests/test1.c", "r");
if (!yyparse())
{
printf("Success\n");
}
else
{
printf("Error\n");
}
return 0;
}
int yyerror (char *s)
{
printf("%d : %s %s\n", line, s, yytext);
return 0;
}
Below is the lex file which I've used to collect the tokens
%{
#include <stdio.h>
int line = 1;
%}
L [a-zA-Z_]
D [0-9]
%x mcomment
%x slcomment
%%
"/*" BEGIN(mcomment);
"//" BEGIN(slcomment);
<mcomment>"/*" printf("Error Mcomment");
<mcomment>. ;
<mcomment>\n line++;
<mcomment>"*/" BEGIN(INITIAL);
<slcomment>. ;
<slcomment>\n {line++; BEGIN(INITIAL);}
"printf" return PRINTF;
"for" return FOR;
"while" return WHILE;
"if" return IF;
"else" return ELSE;
"return" return RETURN;
"struct" return STRUCT;
"int" return INT;
"float" return FLOAT;
"char" return CHAR;
"double" return DOUBLE;
"void" return VOID;
{L}({L}|{D})* {return ID;}
{D}+([^{L};])* {return NUM;}
"+"|"-"|"*"|"/"|"%" {;}
"=" {;}
"<=" return LE;
">=" return GE;
"==" return EQ;
"!=" return NE;
">" return GT;
"<" return LT;
"!" ;
"&&"|"||" {return LOG;}
"++"|"--" {return INC;}
\"(\\.|[^\\"])*\" {return STR;}
^"#include ".+ ;
^"#include".+ ;
"("|")"|"["|"]"|"{"|"}" {;}
[ \t] return yytext[0];
";" ;
\n {line++;}
. return yytext[0];
%%
No matter which file I give as input, it always shows a syntax error in line 1. It shows successful parsing only when I give a blank input file, which is useless. Is there a problem with the grammar? Or is the lex code wrong?
start
should be left-recursive, as should all other recursive productions.Ass
allowsa = RETURN a;
among other nonsense. I suggest you clean it up to properly express your intentions. – user207421