I want to write a compiler for a mini C language using flex and bison. An example of my language would look like this:
/* This is an example uC program. */
int fac(int n)
{
if (n < 2)
return n;
return n * fac(n - 1);
}
int sum(int n, int a[])
{
int i;
int s;
i = 0;
s = 0;
while (i < n) {
s = s + a[i];
i = i + 1;
}
return s;
}
int main(void)
{
int a[2];
a[0] = fac(5);
a[1] = 27;
return 0;
}
Here is my grammer to parse this kind of language:
program ::= topdec_list
topdec_list ::= /empty/ | topdec topdec_list
topdec ::= vardec ";"
| funtype ident "(" formals ")" funbody
vardec ::= scalardec | arraydec
scalardec ::= typename ident
arraydec ::= typename ident "[" intconst "]"
typename ::= "int" | "char"
funtype ::= typename | "void"
funbody ::= "{" locals stmts "}" | ";"
formals ::= "void" | formal_list
formal_list ::= formaldec | formaldec "," formal_list
formaldec ::= scalardec | typename ident "[" "]"
locals ::= /empty/ | vardec ";" locals
stmts ::= /empty/ | stmt stmts
stmt ::= expr ";"
| "return" expr ";" | "return" ";"
| "while" condition stmt
| "if" condition stmt else_part
| "{" stmts "}"
| ";"
else_part ::= /empty/ | "else" stmt
condition ::= "(" expr ")"
expr ::= intconst
| ident | ident "[" expr "]"
| unop expr
| expr binop expr
| ident "(" actuals ")"
| "(" expr ")"
unop ::= "-" | "!"
binop ::= "+" | "-" | "*" | "/"
| "<" | ">" | "<=" | ">=" | "!=" | "=="
| "&&"
| "="
actuals ::= /empty/ | expr_list
expr_list ::= expr | expr "," expr_list
The flex module works fine and returns the values as desired, but bison keeps giving me strange errors that indicate my grammer is wrong (which is not). Here is my bison file:
%{
#include <stdio.h>
#include <stdlib.h>
extern int yylex();
extern int yyparse();
FILE* yyin;
extern int lineNumber;
void yyerror(const char* s);
%}
%union {
int intvalue;
}
%token<intvalue> INTCONST
%token IDENT
%token CHARK
%token ELSEK
%token IFK
%token INTK
%token RETURNK
%token VOIDK
%token WHILEK
%token NOTK
%token ANDK
%token COMMAK
%token DIVIDEK
%token MULTIPLYK
%token MINUSK
%token PLUSK
%token SEMICOLONK
%token NEQUALK
%token EQUALK
%token ASSIGNMENTK
%token RECOMPARATORK
%token LECOMPARATORK
%token RCOMPARATORK
%token LCOMPARATORK
%token RPARANTESESK
%token LPARANTESESK
%token RBRACKETK
%token LBRACKETK
%token RCURLY
%token LCURLY
%right ASSIGNMENTK
%left ANDK
%left EQUALK NEQUALK
%left LCOMPARATORK RCOMPARATORK LECOMPARATORK RECOMPARATORK
%left PLUSK
%left MULTIPLYK DIVIDEK
%left MINUSK NOTK
%start program
%%
program: topdec_list
;
topdec_list: /*empty*/
| topdec topdec_list
;
topdec: vardec SEMICOLONK
| funtype IDENT LPARANTESESK formals RPARANTESESK funbody
;
vardec: scalardec
| arraydec
;
scalardec: typename IDENT
;
arraydec: typename IDENT LBRACKETK INTCONST RBRACKETK
;
typename: INTK
| CHARK
;
funtype: typename
| VOIDK
;
funbody: LCURLY locals stmts RCURLY
| SEMICOLONK
;
formals: VOIDK
| formal_list
;
formal_list: formaldec
| formaldec COMMAK formal_list
;
formaldec: scalardec
| typename IDENT LBRACKETK RBRACKETK
;
locals: /*empty*/
| vardec SEMICOLONK locals
;
stmts: /*empty*/
| stmt stmts
;
stmt: expr SEMICOLONK
| RETURNK expr SEMICOLONK
| RETURNK SEMICOLONK
| WHILEK condition stmt
| IFK condition stmt else_part
| LCURLY stmts RCURLY
| SEMICOLONK
;
else_part: /*empty*/ | ELSEK stmt
;
condition: LPARANTESESK expr RPARANTESESK
;
expr: INTCONST
| IDENT
| IDENT LBRACKETK expr RBRACKETK
| unop expr
| expr binop expr
| IDENT LPARANTESESK actuals RPARANTESESK
| LPARANTESESK expr RPARANTESESK
;
unop: MINUSK | NOTK
;
binop: PLUSK
| MINUSK
| MULTIPLYK
| DIVIDEK
| LCOMPARATORK
| RCOMPARATORK
| LECOMPARATORK
| RECOMPARATORK
| NEQUALK
| EQUALK
| ANDK
| ASSIGNMENTK
;
actuals: /*empty*/
| expr_list
;
expr_list: expr
| expr COMMAK expr_list
;
%%
int main(int argc, char **argv)
{
yyin = fopen("input.c", "r");
do
{
yyparse();
} while (!feof(yyin));
fclose(yyin);
return 0;
}
void yyerror(const char* error)
{
fprintf(stderr, "Parse error in line %d: %s\n", lineNumber, error);
}
for example for this input:
int i;
i = 0;
i get an error that a syntax error happened right after it distinguishes the second i
( i print the tokens in my flex file so i know it has no problem untill it reaches the assignment character ).
Or as another example when passing this line:
int fac(int n);
i get the same syntax error ( which exactly is Parse error in line 1: syntax error
) right after the first paranteses, meaning that it sees the second int
as a syntax error, which it shouldn't because my grammer looks fine.
Also the warnings produced by bison is as follows (flex and gcc are fine):
semantic_analyzer.y: warning: 26 shift/reduce conflicts [-Wconflicts-sr]
semantic_analyzer.y:78.10-17: warning: rule useless in parser due to conflicts [-Wother]
funtype: typename
^^^^^^^^
Any suggestions or corrections are appreciated :) thanks in advance.