I'm making a parser with Lex&Yacc for a school project, and I have some unexplained issues with my syntax analysis.
First of all, this is my yacc file that doesn't work.
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern FILE *yyin;
extern char* yytext;
%}
%token <code> IDENTIFIER CONSTANT SIZEOF
%token <code> PTR_OP LE_OP GE_OP LES_OP GES_OP EQ_OP NE_OP
%token <code> AND_OP OR_OP
%token <code> EXTERN
%token <code> INT VOID
%token <code> STRUCT
%token <code> IF ELSE WHILE FOR RETURN
%union
{
int number;
char* code;
}
%start program
%type <code> primary_expression postfix_expression argument_expression_list unary_expression
%type <code> unary_operator multiplicative_expression additive_expression relational_expression
%type <code> equality_expression logical_and_expression logical_or_expression expression
%type <code> declaration declaration_specifiers type_specifier struct_specifier
%type <code> struct_declaration_list struct_declaration declarator direct_declarator
%type <code> parameter_list parameter_declaration statement compound_statement
%type <code> declaration_list statement_list expression_statement selection_statement
%type <code> iteration_statement jump_statement program external_declaration
%type <code> function_definition
%%
primary_expression
: IDENTIFIER {strcpy($$,$1);}
| CONSTANT {strcpy($$,$1);}
| '(' expression ')' {
char* temp = (char*)malloc((2 + strlen($2))*sizeof(char));
sprintf(temp,"(%s)",$2);
strcpy($$,temp);
free(temp);
}
| SIZEOF '(' type_specifier ')' {
char* temp = (char*)malloc((8 + strlen($3))*sizeof(char));
sprintf(temp,"sizeof(%s)",$3);
strcpy($$,temp);
free(temp);
}
| SIZEOF '(' IDENTIFIER ')' {
char* temp = (char*)malloc((8 + strlen($3))*sizeof(char));
sprintf(temp,"sizeof(%s)",$3);
strcpy($$,temp);
free(temp);
}
;
postfix_expression
: primary_expression {strcpy($$,$1);}
| postfix_expression '(' ')' {
char* temp = (char*)malloc((2 + strlen($1))*sizeof(char));
sprintf(temp,"%s()",$1);
strcpy($$,temp);
free(temp);
}
| postfix_expression '(' argument_expression_list ')' {
char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s(%s)",$1,$3);
strcpy($$,temp);
free(temp);
}
| postfix_expression PTR_OP IDENTIFIER {
char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s->%s",$1,$3);
strcpy($$,temp);
free(temp);
}
;
argument_expression_list
: expression {strcpy($$,$1);}
| argument_expression_list ',' expression {
char* temp = (char*)malloc((1 + strlen($1)+strlen($1))*sizeof(char));
sprintf(temp,"%s,%s",$1,$3);
strcpy($$,temp);
free(temp);
}
;
unary_expression
: postfix_expression {strcpy($$,$1);}
| unary_operator unary_expression {
char* temp = (char*)malloc((strlen($1) + strlen($2))*sizeof(char));
sprintf(temp,"%s%s",$1,$2);
strcpy($$,temp);
free(temp);}
;
unary_operator
: '&' {strcpy($$,"&");}
| '*' {strcpy($$,"*");}
| '-' {strcpy($$,"-");}
;
multiplicative_expression
: unary_expression {strcpy($$,$1);}
| multiplicative_expression '*' unary_expression {
char* temp = (char*)malloc((1 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s*%s",$1,$3);
strcpy($$,temp);
free(temp);
}
| multiplicative_expression '/' unary_expression {
char* temp = (char*)malloc((1 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s/%s",$1,$3);
strcpy($$,temp);
free(temp);
}
;
additive_expression
: multiplicative_expression {strcpy($$,$1);}
| additive_expression '+' multiplicative_expression {
char* temp = (char*)malloc((1 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s+%s",$1,$3);
strcpy($$,temp);
free(temp);
}
| additive_expression '-' multiplicative_expression {
char* temp = (char*)malloc((1 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s-%s",$1,$3);
strcpy($$,temp);
free(temp);
}
;
relational_expression
: additive_expression {strcpy($$,$1);}
| relational_expression LES_OP additive_expression {
char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s<=%s",$1,$3);
strcpy($$,temp);
free(temp);
}
| relational_expression GES_OP additive_expression {
char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s>=%s",$1,$3);
strcpy($$,temp);
free(temp);
}
| relational_expression LE_OP additive_expression {
char* temp = (char*)malloc((1 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s<%s",$1,$3);
strcpy($$,temp);
free(temp);
}
| relational_expression GE_OP additive_expression {
char* temp = (char*)malloc((1 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s>%s",$1,$3);
strcpy($$,temp);
free(temp);
}
;
equality_expression
: relational_expression {strcpy($$,$1);}
| equality_expression EQ_OP relational_expression {
char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s==%s",$1,$3);
strcpy($$,temp);
free(temp);
}
| equality_expression NE_OP relational_expression {
char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s!=%s",$1,$3);
strcpy($$,temp);
free(temp);
}
;
logical_and_expression
: equality_expression {strcpy($$,$1);}
| logical_and_expression AND_OP equality_expression {
char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s&&%s",$1,$3);
strcpy($$,temp);
free(temp);
}
;
logical_or_expression
: logical_and_expression {strcpy($$,$1);}
| logical_or_expression OR_OP logical_and_expression {
char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s||%s",$1,$3);
strcpy($$,temp);
free(temp);
}
;
expression
: logical_or_expression {strcpy($$,$1);}
| unary_expression '=' expression {
char* temp = (char*)malloc((1 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s=%s",$1,$3);
strcpy($$,temp);
free(temp);
}
;
declaration
: declaration_specifiers declarator ';' {
char* temp = (char*)malloc((1 + strlen($1) + strlen($2))*sizeof(char));
sprintf(temp,"%s%s;",$1,$2);
strcpy($$,temp);
free(temp);
}
| struct_specifier ';' {
char* temp = (char*)malloc((1 + strlen($1))*sizeof(char));
sprintf(temp,"%s;",$1);
strcpy($$,temp);
free(temp);
}
;
declaration_specifiers
: EXTERN type_specifier {
char* temp = (char*)malloc((strlen($1)+ strlen($2))*sizeof(char));
sprintf(temp,"%s %s",$1,$2);
strcpy($$,temp);
free(temp);
}
| type_specifier {strcpy($$,$1);}
;
type_specifier
: VOID {strcpy($$,$1);}
| INT {strcpy($$,$1);}
| struct_specifier {strcpy($$,$1);}
;
struct_specifier
: STRUCT IDENTIFIER '{' struct_declaration_list '}' {
char* temp = (char*)malloc((2 + strlen($1) + strlen($2) + strlen($4))*sizeof(char));
sprintf(temp,"%s %s {%s}",$1,$2,$4);
strcpy($$,temp);
free(temp);
}
| STRUCT '{' struct_declaration_list '}' {
char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s {%s}",$1,$3);
strcpy($$,temp);
free(temp);
}
| STRUCT IDENTIFIER {
char* temp = (char*)malloc((strlen($1) + strlen($2))*sizeof(char));
sprintf(temp,"%s %s",$1,$2);
strcpy($$,temp);
free(temp);
}
;
struct_declaration_list
: struct_declaration {strcpy($$,$1);}
| struct_declaration_list struct_declaration {
char* temp = (char*)malloc((strlen($1) + strlen($2))*sizeof(char));
sprintf(temp,"%s %s",$1,$2);
strcpy($$,temp);
free(temp);
}
;
struct_declaration
: type_specifier declarator ';' {
char* temp = (char*)malloc((1 + strlen($1) + strlen($2))*sizeof(char));
sprintf(temp,"%s %s;",$1,$2);
strcpy($$,temp);
free(temp);
}
;
declarator
: '*' direct_declarator {
char* temp = (char*)malloc((1 + strlen($2))*sizeof(char));
sprintf(temp,"*%s",$2);
strcpy($$,temp);
free(temp);
}
| direct_declarator {strcpy($$,$1);}
;
direct_declarator
: IDENTIFIER {strcpy($$,$1);}
| '(' declarator ')' {
char* temp = (char*)malloc((2 + strlen($2))*sizeof(char));
sprintf(temp,"(%s)",$2);
strcpy($$,temp);
free(temp);
}
| direct_declarator '(' parameter_list ')' {
char* temp = (char*)malloc((2 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s(%s)",$1,$3);
strcpy($$,temp);
free(temp);
}
| direct_declarator '(' ')' {
char* temp = (char*)malloc((2 + strlen($1))*sizeof(char));
sprintf(temp,"%s()",$1);
strcpy($$,temp);
free(temp);
}
;
parameter_list
: parameter_declaration {strcpy($$,$1);}
| parameter_list ',' parameter_declaration {
char* temp = (char*)malloc((1 + strlen($1) + strlen($3))*sizeof(char));
sprintf(temp,"%s, %s",$1,$3);
strcpy($$,temp);
free(temp);
}
;
parameter_declaration
: declaration_specifiers declarator {
char* temp = (char*)malloc((strlen($1) + strlen($2))*sizeof(char));
sprintf(temp,"%s %s",$1,$2);
strcpy($$,temp);
free(temp);
}
;
statement
: compound_statement {strcpy($$,$1);}
| expression_statement {strcpy($$,$1);}
| selection_statement {strcpy($$,$1);}
| iteration_statement {strcpy($$,$1);}
| jump_statement {strcpy($$,$1);}
;
compound_statement
: '{' '}' {strcpy($$,"{}");}
| '{' statement_list '}' {
char* temp = (char*)malloc((2 + strlen($2))*sizeof(char));
sprintf(temp,"{%s}",$2);
strcpy($$,temp);
free(temp);
}
| '{' declaration_list '}' {
char* temp = (char*)malloc((2 + strlen($2))*sizeof(char));
sprintf(temp,"{%s}",$2);
strcpy($$,temp);
free(temp);
}
| '{' declaration_list statement_list '}' {
char* temp = (char*)malloc((2 + strlen($2) + strlen($3))*sizeof(char));
sprintf(temp,"{%s%s}",$2,$3);
strcpy($$,temp);
free(temp);
}
;
declaration_list
: declaration {strcpy($$,$1);}
| declaration_list declaration {
char* temp = (char*)malloc((strlen($1) + strlen($2))*sizeof(char));
sprintf(temp,"%s %s",$1,$2);
strcpy($$,temp);
free(temp);
}
;
statement_list
: statement {strcpy($$,$1);}
| statement_list statement {
char* temp = (char*)malloc((strlen($1) + strlen($2))*sizeof(char));
sprintf(temp,"%s %s",$1,$2);
strcpy($$,temp);
free(temp);
}
;
expression_statement
: ';' {strcpy($$,";");}
| expression ';' {
char* temp = (char*)malloc((1 + strlen($1))*sizeof(char));
sprintf(temp,"%s;",$1);
strcpy($$,temp);
free(temp);
}
;
selection_statement
: IF '(' expression ')' statement {
char* temp = (char*)malloc((2 + strlen($1) + strlen($3) + strlen($5))*sizeof(char));
sprintf(temp,"%s (%s) %s",$1,$3,$5);
strcpy($$,temp);
free(temp);
}
| IF '(' expression ')' statement ELSE statement {
char* temp = (char*)malloc((2 + strlen($1) + strlen($3) + strlen($5) + strlen($6) + strlen($7))*sizeof(char));
sprintf(temp,"%s (%s) %s %s %s",$1,$3,$5,$6,$7);
strcpy($$,temp);
free(temp);
}
;
iteration_statement
: WHILE '(' expression ')' statement {
char* temp = (char*)malloc((1 + strlen($1) + strlen($3) + strlen($5))*sizeof(char));
sprintf(temp,"%s (%s) %s",$1,$3,$5);
strcpy($$,temp);
free(temp);
}
| FOR '(' expression_statement expression_statement expression ')' statement {
char* temp = (char*)malloc((2 + strlen($1) + strlen($3) + strlen($4) + strlen($5) + strlen($7))*sizeof(char));
sprintf(temp,"%s (%s %s %s) %s",$1,$3,$4,$5,$7);
strcpy($$,temp);
free(temp);
}
;
jump_statement
: RETURN ';' {strcpy($$,$1);}
| RETURN expression ';' {
char* temp = (char*)malloc((strlen($1) + strlen($2))*sizeof(char));
sprintf(temp,"%s %s;",$1,$2);
strcpy($$,temp);
}
;
program
: external_declaration {strcpy($$,$1);}
| program external_declaration {
char* temp = (char*)malloc((strlen($1) + strlen($2))*sizeof(char));
sprintf(temp,"%s %s",$1,$2);
strcpy($$,temp);
free(temp);
}
;
external_declaration
: function_definition {strcpy($$,$1);}
| declaration {strcpy($$,$1);}
;
function_definition
: declaration_specifiers declarator compound_statement {
char* temp = (char*)malloc((strlen($1) + strlen($2) + strlen($3))*sizeof(char));
sprintf(temp,"%s %s %s",$1,$2,$3);
strcpy($$,temp);
free(temp);
}
;
%%
int yyerror(char *s) {
printf("%s\n",s);
printf("%s",yytext);
}
int main(int argc, char *argv[]) {
yyin = fopen(argv[1],"r");
yyparse();
return 0;
}
The final purpose of this parser is to generate a 3 address code with an initial code (near to an old C language).
For now, I'm just creating a simple parser to test if the input file is correctly written in my first language.
In my actual code, semantic actions are here to build the same code as the input, to then write it in an external file.
It may seems completely idiot for you but it's my first compiler project and I think it's a crucial step for me if I want to generate code by the future (since I just want a code translation).
So this is the issue. Before the implementation of my semantic actions, the parser was perfectly parsing test files, no syntax error. But now, it doesn't work anymore.
Are we ok that semantic actions don't influence the syntax parse ?
PS: There is my lex file :
chiffre [0-9]
lettre [a-zA-Z]
exposant [Ee][+-]?{D}+
commentaire [\/][\*][^\/\*]*[\*][\/]
identificateur {lettre}({lettre}|_|{chiffre})*
entier {chiffre}+
pointeur [-][>]
%{
#include <stdio.h>
#include "y.tab.h"
/* Ne gère pas les commentaires. A rajouter */
/* Supprimer les lignes dont vous n'avez pas besoin. */
%}
%%
"else" {yylval.code=yytext;return ELSE;}
"extern" {yylval.code=yytext;return EXTERN;}
"for" {yylval.code=yytext;return FOR;}
"if" {yylval.code=yytext;return IF;}
"int" {yylval.code=yytext;return INT;}
"return" {yylval.code=yytext;return RETURN;}
"sizeof" {yylval.code=yytext;return SIZEOF;}
"struct" {yylval.code=yytext;return STRUCT;}
"void" {yylval.code=yytext;return VOID;}
"while" {yylval.code=yytext;return WHILE;}
{entier} {yylval.code=yytext;return CONSTANT; }
{identificateur} {yylval.code=yytext;return IDENTIFIER;}
"<" {yylval.code=yytext;return LE_OP;}
">" {yylval.code=yytext;return GE_OP;}
"<=" {yylval.code=yytext;return LES_OP;}
">=" {yylval.code=yytext;return GES_OP;}
"==" {yylval.code=yytext;return EQ_OP;}
"!=" {yylval.code=yytext;return NE_OP;}
"&&" {yylval.code=yytext;return AND_OP;}
"||" {yylval.code=yytext;return OR_OP;}
"*" {yylval.code=yytext;return '*';}
"-" {yylval.code=yytext;return '-';}
"&" {yylval.code=yytext;return '&';}
{pointeur} {yylval.code=yytext;return PTR_OP;}
"+" {yylval.code=yytext;return '+';}
"/" {yylval.code=yytext;return '/';}
{commentaire} {/* commentaire ne rien faire */}
"," {yylval.code=yytext;return ',';}
";" {yylval.code=yytext;return ';';}
"(" {yylval.code=yytext;return '(';}
")" {yylval.code=yytext;return ')';}
"{" {yylval.code=yytext;return '{';}
"}" {yylval.code=yytext;return '}';}
"=" {yylval.code=yytext;return '=';}
[ \t\n\v\r]+ { /* rien faire */ }
. { printf("erreur : b");printf("%s",yytext);printf("a\n");printf("Erreur lexicale. \n"); }
%%
and finally one of my test files
extern int printd( int i );
int main() {
int i;
int j;
i = 45000;
j = -123;
printd(i+j);
printd(45000+j);
printd(i+123);
printd(45000+123);
printd(i+(j+0));
printd((i+0)+j);
printd((i+0)+(j+0));
printd((i+0)+123);
printd(45000+(j+0));
return 0;
}
After those changes, the parser is blocking at the first "int" keyword.
Thanks for your help...