1
votes

I am writing a simple Yacc program that takes a program code and returns the counts of int and double type of variables and the functions.

I ran into a bizarre problem that a program returns a syntax error when there is a matching rule for the line, but the line picked up a different rule. I brought the components of the code that shows this error: (If you see unused variables, that's because I deleted other parts that are irrelevant to this error)

yacc code

%{
#define YYDEBUG 1
#include <stdio.h>
#include <stdlib.h>
int func_count=0;
int int_count=0;
int char_count=0;
int double_count=0;
int float_count=0;
int pointer_count=0;
int array_count=0;
int condition_count=0;
int for_count=0;
int return_count=0;
int numeric_count=0;
%}

%token INT_KEYWORD DOUBLE_KEYWORD CHAR_KEYWORD RETURN_KEYWORD FLOAT_KEYWORD IF_KEYWORD VARIABLE OPERATOR COMPARE DIGIT FOR_KEYWORD POINTER_VARIABLE 
%start program
%%

program:
    program statement '\n'
    |
    ;

statement:
    declaration_statement |
    function_declaration_statement {func_count++;}

    ;

function_declaration_statement: 
    datatype VARIABLE '(' datatype VARIABLE ')' '{'
    ;   

declaration_statement:
    int_declaration_statement |
    double_declaration_statement 
    ;

int_declaration_statement:
    INT_KEYWORD VARIABLE '[' DIGIT ']' ';'{array_count++;}
    |
    INT_KEYWORD VARIABLE ';' {int_count++;}
    |
    INT_KEYWORD VARIABLE '=' DIGIT ';' {int_count++;}


double_declaration_statement:
    DOUBLE_KEYWORD VARIABLE '[' DIGIT ']' ';' {array_count++;}
    |
    DOUBLE_KEYWORD VARIABLE ';' {double_count++;}
    |
    DOUBLE_KEYWORD VARIABLE '=' DIGIT ';' {double_count++;}


datatype:
    INT_KEYWORD
    |
    DOUBLE_KEYWORD
    |
    CHAR_KEYWORD
    |
    FLOAT_KEYWORD
    ;
%%

int yyerror(char *s){
fprintf(stderr,"%s\n",s);
return 0;
}

int main (void){
    yydebug=1;
    yyparse();
    printf("#int variable=%d, #double variable=%d",int_count,double_count);
    printf("#array=%d\n",array_count);
    printf("#function=%d\n",func_count);


}

lex

%{
 #include <stdio.h>
 #include <stdlib.h> 
 #include "y.tab.h"
 void yyerror(char *);
 %}

%%
"int"           {return INT_KEYWORD;}
"double"        {return DOUBLE_KEYWORD;}
"char"          {return CHAR_KEYWORD;}
"float"         {return FLOAT_KEYWORD;} 
"if"            {return IF_KEYWORD;}
"for"           {return FOR_KEYWORD;}
"return"        {return RETURN_KEYWORD;}
"=="            {return COMPARE;}
">"         {return COMPARE;}
"<"         {return COMPARE;}
">="            {return COMPARE;}
"<="            {return COMPARE;}
"+"         {return OPERATOR;}
"-"         {return OPERATOR;}
"/"         {return OPERATOR;}
"*"         {return OPERATOR;}
"%"         {return OPERATOR;}
[0-9]+          {return DIGIT;}
[a-z]+          {return VARIABLE;}
"*"" "?[a-zA-Z]+    {return POINTER_VARIABLE;}
"["         {return *yytext;}
"="         {return *yytext;}
"]"         {return *yytext;}
[;\n(){}]       {return *yytext;}
[ \t]           ;
.           {printf("%s\n",yytext); yyerror("invalid charactor");} 
%%

int yywrap(void){
return 1;
}

test file:

int a;
int a[3];
int a(int a) {

Expected output

#int variable=1, #double variable=0 #array=1
#function=1

But instead it fails at the third line, int a(int a), because the program seemed to choose int variable declaration rule, and it fails when it sees '(' token, generating a syntax error.

The debug error message says...

....
Reading a token: Next token is token INT_KEYWORD ()
Shifting token INT_KEYWORD ()
Entering state 3
Reading a token: Next token is token VARIABLE ()
Shifting token VARIABLE ()
Entering state 13
Reading a token: Next token is token '(' ()
syntax error
....

Could anyone please point out what I did wrong? Thanks.

1
Don't you get a warning about shift/reduce conflicts and a useless production? You need to fix those.rici

1 Answers

1
votes

You have two shift/reduce conflicts in your grammar. You can see where in the output file generated by yacc :

State 3

  8 int_declaration_statement: INT_KEYWORD . VARIABLE '[' DIGIT ']' ';'
  9                          | INT_KEYWORD . VARIABLE ';'
 10                          | INT_KEYWORD . VARIABLE '=' DIGIT ';'
 14 datatype: INT_KEYWORD .

   VARIABLE  shift, and go to state 13

   VARIABLE  [reduce using rule 14 (datatype)]

State 4

 11 double_declaration_statement: DOUBLE_KEYWORD . VARIABLE '[' DIGIT ']' ';'
 12                             | DOUBLE_KEYWORD . VARIABLE ';'
 13                             | DOUBLE_KEYWORD . VARIABLE '=' DIGIT ';'
 15 datatype: DOUBLE_KEYWORD .

  VARIABLE  shift, and go to state 14

  VARIABLE  [reduce using rule 15 (datatype)]

Here, when yacc encounter an INT_KEYWORD or a DOUBLE_KEYWORD, it does not know whether it needs to shift or reduce (i.e. it does not know if it is a declaration or just a datatype). By default, yacc will shift.

Also, in your function_declaration_statement, you first have a datatype: yacc will reduce it (since it is the only production rule for it). Then it will have something like INT_KEYWORD VARIABLE (or DOUBLE_KEYWORD), so it will think it is a int_declaration_statement... The syntax error happens when yacc encounter a '('.

To solve this, you can remove the function_declaration_statement and add a line to your int_declaration_statement (and double). Something like :

statement: int_declaration_statement
         | double_declaration_statement
         ;

int_declaration_statement: INT_KEYWORD VARIABLE '[' DIGIT ']' ';'{array_count++;}
                         | INT_KEYWORD VARIABLE ';' {int_count++;}
                         | INT_KEYWORD VARIABLE '=' DIGIT ';' {int_count++;}
                         | INT_KEYWORD VARIABLE '(' datatype VARIABLE ')' '{' {func_count++;}
                         ;

That will remove you shift/reduce conflicts and give you the result you want, for instance :

--- ~ » ./a.out
int a;
int a[3];
int a(int a) {
#int variable=1, #double variable=0#array=1
#function=1

Hope it helps.