0
votes

I have to build a compiler that translates the java language into pyhton. I'm using the Flex and Bison tools. I created the flex file and I defined the syntactic grammar in Bison for some restrictions that I have to implement (such as array, management of cycles, management of a class, management of logical-arithmetic operators, etc.). I'm having trouble understanding how to handle semantic rules. For example, I should handle the semantics for import statement and variable declaration, add the variable in the symbol table and then handle the translation. This is the structure of the symbol table in the symboltable.h module:

struct symtable{

  char *scopename;            // key 
  struct symtable2 *subtable; // symble table secondary
  UT_hash_handle hh;          // to make the structure hash

}

struct symtable2            // secondary symbol structure
{

  char *name;               // Name of the symbol (key)
  char *element;            // it can be a variable or an array
  char *type;               // Indicates the type assumed by the token 
  (int, float, char, bool)
  char *value;              // value assigned to the variable
  int dim;                  // Array size, it is zero in the case of a variable.
  UT_hash_handle hh;        // to make the structure hash

};

And this is the add symbol function:

void add_symbol( char *name, char *current_scopename, char *element, char *current_type, char *current_value, int dim, int nr) {    //Function to add a new symbol in the symbol table

  struct symtable *s;
  HASH_FIND_PTR(symbols, current_scopename, s);
  if (s == NULL) {
     s = (struct symtable *)malloc(sizeof *s);
     s->scopename =current_scopename;
     s->subtable=NULL;
     s->scopename =current_scopename;
     HASH_ADD_KEYPTR(hh,symbols,s->scopename,strlen(s->scopename),s);
  }
  struct symtable2 *s2;
  HASH_FIND_PTR(symbols2, name, s2);
  if (s2==NULL) {
     s2 = (struct symtable2 *)malloc(sizeof *s2);
     s2->name = name;
     s2->element = element;
     s2->type = current_type;
     s2->value = current_value;
     s2->dim = dim;
     HASH_ADD_KEYPTR(hh,s->subtable,s2->name,strlen(s2->name),s2);
  } else {
     if (strcmp( s2->type,current_type) == 0){
        s2->value =current_value;
     } else {
      printf("\033[01;31mRiga %i. [FATALE] SEMANTIC ERROR: assignment violates the primitive type of the variable.\033[00m\n", nr);
      printf("\n\n\033[01;31mParsing failed.\033[00m\n");
     }
  }
}

This is a part of the bison file with the grammar to handle import statement and the variable declaration:

%{
  #include <stdio.h>;
  #include <ctype.h>;
  #include <symboltable.h>;
  file *f_ptr;
%}

%start program
%token NUMBER
%token ID
%token INT
%token FLOAT
%token DOUBLE
%token CHAR
%token IMPORT

%right ASSIGNOP
%left SCOR
%left  SCAND
%left  EQ NE
%left  LT GT LE GE
%left ADD SUB
%left MULT DIV MOD
%right NOT
%left '(' ')' '[' ']'

%%

program
        : ImportStatement GlobalVariableDeclarations
        ;

ImportStatement
               :  IMPORT LibraryName ';'  { delete_file (); f_ptr = open_file (); fprintf(fptr, "import array \n"); }
               ;

LibraryName
           : 'java.util.*'
           ;

GlobalVariableFieldDeclarations
                               : type GlobalVariableDeclarations ';' 
                               ;


GlobalVariableDeclarations
                          : GlobalVariableDeclaration
                          | GlobalVariableDeclarations ',' GlobalVariableDeclaration
                          ;

GlobalVariableDeclaration
                         : VariableName 
                         | VariableName ASSIGNOP VariableInitializer {if (typeChecking($1,$3)== 0) {$1= $3; $$=$1;}}
                         ;

VariableName
            : ID {$$ = $1 ;}
            ;

type
    : INT
    | CHAR
    | FLOAT
    | DOUBLE
    | BOOLEAN
    ;

VariableInitializers
                    : VariableInitializer
                    | VariableInitializers ',' VariableInitializer
                    ;
VariableInitializer
                    : ExpressionStatement
                    ;

ExpressionStatement
                   : VariableName
                   | NUMBER
                   | ArithmeticExpression
                   | RelationalExpression
                   | BooleanExpression
                   ; 
ArithmeticExpression
                    : ExpressionStatement ADD ExpressionStatement
                    | ExpressionStatement SUB ExpressionStatement
                    | ExpressionStatement MULT ExpressionStatement
                    | ExpressionStatement DIV ExpressionStatement
                    | ExpressionStatement MOD ExpressionStatement
                    ;

RelationalExpression
                    : ExpressionStatement GT ExpressionStatement
                    | ExpressionStatement LT ExpressionStatement
                    | ExpressionStatement GE ExpressionStatement
                    | ExpressionStatement LE ExpressionStatement
                    | ExpressionStatement EQ ExpressionStatement
                    | ExpressionStatement NE ExpressionStatement
                    ;

BooleanExpression
                 : ExpressionStatement SCAND ExpressionStatement
                 | ExpressionStatement SCOR ExpressionStatement
                 | NOT ExpressionStatement
                 ;

%%

int yyerror (char *s)
{
  printf ("%s \n",s);
}

int main (void) {
  return yyparse();
}

int typeChecking (variable1, variable2) {
   struct symtable2 *s2;

   s2=find_symbol (scopename, variable1);
   if (s2!=NULL) {
       int type1= s2->type;
       char element1 = s2->element;
   }
   else{
       printf("\n\n\033[01;31mVariable 1 not defined.\033[00m\n");
       return -1;
   }

   s2=find_symbol (scopename, variable2);
   if (s2!=NULL) {
       int type2= s2->type;
       char element2 = s2->element;
   }
   else{
       printf("\n\n\033[01;31mVariable 2 not defined.\033[00m\n");
       return -1;
   }

   if(element1=='variable' && element2=='variable'){

       if (type1 == type2){
          return 0;
       }
       else {
          return 1;
       }
   }

   else {
       printf("\n\n\033[01;31m Different elements.\033[00m\n");
       return -1;
   }
}

I am a beginner with the syntax of the bison for the management of semantics, on the following productions I have doubts about the relative semantic rule:

  GlobalVariableFieldDeclarations
                               : type GlobalVariableDeclarations ';' 
                               ;


  GlobalVariableDeclarations
                          : GlobalVariableDeclaration
                          | GlobalVariableDeclarations ',' GlobalVariableDeclaration
                          ;

  GlobalVariableDeclaration
                          : VariableName 
                          | VariableName ASSIGNOP VariableInitializer {if (typeChecking($1,$3)== 0) {$1= $3; $$=$1;}}
                         ;

  VariableName
             : ID {$$ = $1 ;}
             ;

Is it correct to manage semantics in this way for a GlobalVariableDeclaration production? And how can I insert the required parameter values, in the symbol table, via the add_symbol function? (Or better, how can I acquire the required parameters starting from productions to insert them in the add_symbol function that I have implemented?) Forgive me but I am a beginner, and many things about the semantics are not clear to me. I hope you have the patience to help me, I thank you in advance.

1

1 Answers

1
votes

You should use Bison to build an AST and then you would perform semantic analysis on the tree instead of in the grammar. Building an AST allows you to perform analysis on more complex data structures then just the grammar rules you built in Bison.

Once you have your AST for the input you can then make rules for how to convert that AST into a python program with the same syntax.

Here is an example of a Bison/Flex compiler for the Decaf language that might give you some ideas https://github.com/davidcox143/Decaf-Compiler