1
votes

I've tried to make a mini c compiler with the help of lex and yacc

Below is the yacc/bison code:

%{
#include <stdio.h>
#include <stdlib.h>

extern FILE *fp;

extern int line;
extern int yylex();
int yyerror(char *);
%}

%token FOR WHILE IF ELSE PRINTF NUM ID STR ROP LOG INC RETURN STRUCT INT      FLOAT CHAR DOUBLE VOID LT GT LE GE NE EQ

%right '='
%left LOG
%left AND OR
%left LT GT LE GE NE EQ
%left '+' '-'   
%left '*' '/' '%'
%left INC

%nonassoc XYZ
%nonassoc ELSE

%%

start: Function start
  | Declaration start
  |
  ;

Declaration: Type Ass ';'
   | Ass ';'
   | Type Arr ';'
   | Arr ';'
   | StructStmt ';'
   | FunctionCall ';'

   ;

Ass: ID1 '=' Ass
| ID1 '=' FunctionCall
| ID1 '=' Arr
| Arr '=' Ass
| ID1 ',' Ass
|  NUM ',' Ass
| ID1 '+' Ass
| ID1 '-' Ass
| ID1 '*' Ass
| ID1 '/' Ass
| ID1 '%' Ass
| NUM '+' Ass
| NUM '-' Ass
| NUM '*' Ass
| NUM '/' Ass
| NUM '%' Ass
| RETURN Ass
| '\'' Ass '\''
| '\"' Ass '\"'
| '(' Ass ')'
| '-' ID1
| '-' NUM
| INC ID1
| ID1 INC
| INC NUM
| NUM INC
| ID1
| NUM
;

ID1: ID
;

FunctionCall: ID1 '(' Ass ')' 
    | ID1 '(' ')'
    ;

Function: Type ID1 '(' ArgListOpt ')' Stmt
 ;

ArgListOpt: ArgList Arg
  |
  ;

ArgList: ArgList ',' Arg
   | Arg
   ;

Arg: Type ID1
 ;

CompoundStmt: '{' StmtList '}'
      ;

StmtList: StmtList Stmt
|
;

Stmt: WhileStmt
| ForStmt
| IfStmt
| CompoundStmt
| Declaration
| PrintFunc
| ';'
;

PrintFunc: PRINTF '(' STR ',' Ass ')' ';'
 | PRINTF '(' STR ')' ';'
 ;

WhileStmt: WHILE '(' Expr ')' Stmt
 ;

ForStmt: FOR '(' Expr1 ';' Expr1 ';' Expr1 ')' Stmt
   ;

Expr1: Expr
 | 
 ;

IfStmt: IF '(' Expr ')' Stmt %prec XYZ
  | IF '(' Expr ')' Stmt ELSE Stmt
  ;

StructStmt: STRUCT ID1 '{' Type Ass '}' 
  ;

Expr: Expr LE Expr 
| Expr GE Expr
| Expr NE Expr
| Expr EQ Expr
| Expr GT Expr
| Expr LT Expr
| Expr LOG Expr
| Ass
| Arr
;

Arr: Type ID1 '[' Ass ']'
; 

Type: INT
| FLOAT
| CHAR
| DOUBLE
| VOID
;

%%

#include <ctype.h>
#include "lex.yy.c"

int main(int argc, char *argv[])
{
    yyin = fopen("tests/test1.c", "r");  
    if (!yyparse())
    {
        printf("Success\n");
    }
    else
    {
         printf("Error\n");
    }
    return 0;
}

int yyerror (char *s)
{
    printf("%d : %s %s\n", line, s, yytext);
    return 0;
}

Below is the lex file which I've used to collect the tokens

%{
#include <stdio.h>
int line = 1;
%}

L [a-zA-Z_]
D [0-9]

%x mcomment  
%x slcomment

%%

"/*"    BEGIN(mcomment);
"//"    BEGIN(slcomment);


<mcomment>"/*"  printf("Error Mcomment");
<mcomment>. ;
<mcomment>\n    line++;
<mcomment>"*/"  BEGIN(INITIAL);
<slcomment>.    ;
<slcomment>\n   {line++; BEGIN(INITIAL);}

"printf"    return PRINTF;
"for"   return FOR;
"while" return WHILE;
"if"    return IF;
"else"  return ELSE;
"return" return RETURN;
"struct" return STRUCT;
"int" return INT;
"float" return FLOAT;
"char" return CHAR;
"double" return DOUBLE;
"void" return VOID;

{L}({L}|{D})*   {return ID;}
{D}+([^{L};])*  {return NUM;}
"+"|"-"|"*"|"/"|"%" {;}
"=" {;} 
"<="    return LE;
">="    return GE;
"=="    return EQ;
"!="    return NE;
">" return GT;
"<" return LT;
"!" ; 
"&&"|"||"   {return LOG;}
"++"|"--"   {return INC;}
\"(\\.|[^\\"])*\"   {return STR;}
^"#include ".+  ;
^"#include".+   ;
"("|")"|"["|"]"|"{"|"}" {;}
[ \t]   return yytext[0];
";" ;
\n  {line++;}
.   return yytext[0];

%%

No matter which file I give as input, it always shows a syntax error in line 1. It shows successful parsing only when I give a blank input file, which is useless. Is there a problem with the grammar? Or is the lex code wrong?

2
Strange grammar. start should be left-recursive, as should all other recursive productions. Ass allows a = RETURN a; among other nonsense. I suggest you clean it up to properly express your intentions.user207421
@EJP left recursion adds 7 shift-reduce conflicts plus one supposedly useless grammar (NULL of ArgListOpt) which is actually very usefulPrateek Kembhavi
So fix them. Your productions are largely nonsense anyway, as I already indicated. Right recursion adds stack overflows at runtime. Which do you really prefer?user207421

2 Answers

1
votes

Your lexer returns all white space characters as themselves, except for newlines, which it eats (does not return).

Your grammar accepts no white space characters. So if you have any white space in your input stream, there will be a syntax error.

Your grammar generally requires semicolon terminators, but your lexer eats semicolons (does not return them). So if you have any productions that must end with a semicolon, there will be a syntax error here as well.

1
votes

Your grammar is expecting whitespace to be ignored and semicolons to be sent as ';' tokens. Your lexer, on tbe other hand:

[ \t]   return yytext[0]; // Send every soace and tab as a token
";" ;                       // Ignore semicolons

You also ignore arithmetic operators. Undoubtedly there are other problems as well.

Suggestion: since this is your very first bison/flex project, start with something much simpler and work up to a full grammar a little bit at a time. Make extensive use of flex's trace facility (just add -d to the command line which processes your lex file.)

Good luck.