2
votes

I have the following code in Bison, which extends the mfcalc proposed in the guide, implementing some functions like yylex() externally with FLEX.

To understand my problem, the key rules are in non-terminal token called line at the beginning of the grammar. Concretely, the rules EVAL CLOSED_STRING '\n' and END (this token is sent by FLEX when EOF is detected. The first opens a file and points the input to that file. The second closes the file and points the input to stdin input.

I'm trying to make a rule eval "file_path" to load tokens from a file and evaluate them. Initially I have yyin = stdin (I use the function setStandardInput() to do this).

When a user introduces eval "file_path" the parser swaps yyinfrom stdin to the file pointer (with the function setFileInput()) and the tokens are readen correctly.

When the END rule is reached by the parser, it tries to restore the stdin input but it gets bugged. This bug means the calculator doesn't ends but what I write in the input isn't evaluated.

Note: I supposed there are no errors in the grammar, because error recovery it's not complete. In the file_path you can use simple arithmetic operations.

As a summary, I want to swap among stdin and file pointers as inputs, but when I swap to stdin it gets bugged, except I start the calculator with stdin as default.

%{


/* Library includes */

#include <stdio.h>
#include <math.h>
#include "utils/fileutils.h"
#include "lex.yy.h"
#include "utils/errors.h"
#include "utils/stringutils.h"
#include "table.h"


void setStandardInput();
void setFileInput(char * filePath);


/* External functions and variables from flex */

extern size_t yyleng;
extern FILE * yyin;
extern int parsing_line;
extern char * yytext;
//extern int yyerror(char *s);
extern int yyparse();
extern int yylex();
int yyerror(char * s);

%}





/***** TOKEN DEFINITION *****/

%union{
    char * text;
    double value;
}

%type <value> exp asig



%token LS
%token EVAL
%token <text> ID
%token <text> VAR
%token <value> FUNCTION
%token <value> LEXEME
%token <value> RESERVED_WORD
%token <value> NUMBER
%token <value> INTEGER
%token <value> FLOAT
%token <value> BINARY
%token <value> SCIENTIFIC_NOTATION
%token <text> CLOSED_STRING
%token DOCUMENTATION
%token COMMENT
%token POW
%token UNRECOGNIZED_CHAR 
%token MALFORMED_STRING_ERROR 
%token STRING_NOT_CLOSED_ERROR
%token COMMENT_ERROR 
%token DOCUMENTATION_ERROR
%token END
%right '='
%left '+' '-'
%left '/' '*'
%left NEG_MINUS
%right '^'
%right '('
%%




input:      /* empty_expression */      |   
            input line
;

line:       '\n'                        

        |   asig    '\n'                {   printf("\t%f\n", $1);                                   }   
        |   asig    END                 {   printf("\t%f\n", $1);                                   }       
        |   LS                          {   print_table();                                          }
        |   EVAL CLOSED_STRING  '\n'    {   
                                            // Getting the file path
                                            char * filePath = deleteStringSorroundingQuotes($2);
                                            setFileInput(filePath);

        | END                           { closeFile(yyin);  setStandardInput();}

;

exp:        NUMBER                      {   $$ = $1;                                                }
        |   VAR                         {   
                                            lex * result = table_search($1, LEXEME);
                                            if(result != NULL) $$ = result->value;
                                        }
        |   VAR '(' exp ')'             {   

                                            lex * result = table_search($1, FUNCTION);

                                            // If the result is a function, then invokes it
                                            if(result != NULL)  $$ = (*(result->function))($3);
                                            else yyerror("That identifier is not a function.");


                                        }
        |   exp '+' exp                 {   $$ = $1 + $3;                                           }
        |   exp '-' exp                 {   $$ = $1 - $3;                                           }
        |   exp '*' exp                 {   $$ = $1 * $3;                                           }
        |   exp '/' exp                 {   
                                            if($3 != 0){ $$ = $1 / $3;};    
                                            yyerror("You can't divide a number by zero");   
                                        }
        |   '-' exp %prec NEG_MINUS     {   $$ = -$2;                                               }
        |   exp '^' exp                 {   $$ = pow($1, $3);                                       }
        |   '(' exp ')'                 {   $$ = $2;                                                }
        |   '(' error ')'               {   
                                            yyerror("An error has ocurred between the parenthesis.");   yyerrok; yyclearin;     

                                        }

;


asig:       exp                         { $$ = $1;                                                  }   
        |   VAR '=' asig                {   
                                            int type = insertLexeme($1, $3);

                                            if(type == RESERVED_WORD){ 
                                                yyerror("You tried to assign a value to a reserved word.");
                                                YYERROR;

                                            }else if(type == FUNCTION){
                                                yyerror("You tried to assign a value to a function.");
                                                YYERROR;

                                            }
                                            $$ = $3;

                                        }
;

%%


void setStandardInput(){

    printf("Starting standard input:\n");
    yyin = NULL;

    yyin = stdin;
    yyparse();

}

void setFileInput(char * filePath){
    FILE * inputFile = openFile(filePath);

    if(inputFile == NULL){
        printf("The file couldn't be loaded. Redirecting to standard input: \n");
        setStandardInput();
    }else{
        yyin = inputFile;
    }
}



int main(int argc, char ** argv) {


    create_table();         // Table instantiation and initzialization

    initTable();            // Symbol table initzialization

    setStandardInput();     // yyin = stdin

    while(yyparse()!=1);

    print_table();


    // Table memory liberation
    destroyTable();


    return 0;
}


int yyerror(char * s){
    printf("---------- Error in line %d --> %s ----------------\n", parsing_line, s);
    return 0;
}
1

1 Answers

2
votes

It's not too difficult to create a parser and a scanner which can be called recursively. (See below for an example.) But neither the default bison-generated parser nor the flex-generated scanner are designed to be reentrant. So with the default parser/scanner, you shouldn't call yyparse() inside SetStandardInput, because that function is itself called by yyparse.

If you had a recursive parser and scanner, on the other hand, you could significantly simplify your logic. You could get rid of the END token (which is, in any case, practically never a good idea) and just recursively call yyparse in your action for EVAL CLOSED_STRING '\n'.

If you want to use the default parser and scanner, then your best solution is to use Flex's buffer stack to push and later pop a "buffer" corresponding to the file to be evaluated. (The word "buffer" here is a bit confusing, I think. A Flex "buffer" is actually an input source, such as a file; it's called a buffer because only a part of it is in memory, but Flex will read the entire input source as part of processing a "buffer".)

You can read about the buffer stack usage in the flex manual, which includes sample code. Note that in the sample code, the end of file condition is entirely handled inside the scanner, which is usual for this architecture.

It is possible in this case to manufacture an end-of-file indicator (although you cannot use END because that is used to indicate the end of all input). That has the advantage of ensuring that the contents of the evaluated file are parsed as a whole, without leaking a partial parse back to the including file, but you will still want to pop the buffer stack inside the scanner because it annoyingly tricky to get end-of-file handling correct without violating any of the API constraints (one of which is that you cannot reliably read EOF twice on the same "buffer").

In this case, I would recommend generating a reentrant parser and scanner and simply doing a recursive call. It's a clean and simple solution, and avoiding global variables is always good.

A simple example. The simple language below only has echo and eval statements, both of which require a quoted string argument.

There are a variety of ways to hook together a reentrant scanner and reentrant parser. All of them have some quirks and the documentation (although definitely worth reading) has some holes. This is a solution which I've found useful. Note that most of the externally visible functions are defined in the scanner file, because they rely on interfaces defined in that file for manipulating the reentrant scanner context object. You can get flex to export a header with the approriate definitions, but I've generally found it simpler to write my own wrapper functions and export those. (I don't usually export yyscan_t either; normally I create a context object of my own which has a yyscan_t member.)

There is an annoying circularity which is largely the result of bison not allowing for the possibility to introduce user code at the top of yyparse. Consequently, it is necessary to pass the yyscan_t used to call the lexer as an argument to yyparse, which means that it is necessary to declare yyscan_t in the bison file. yyscan_t is actually declared in the scanner generated file (or the flex-generated header, if you've asked for one), but you can't include the flex-generated header in the bison-generated header because the flex-generated header requires YYSTYPE which is declared in the bison-generated header.

I normally avoid this circularity by using a push parser, but that's pushing the boundaries for this question, so I just resorted to the usual work-around, which is to insert

typedef void* yyscan_t;

in the bison file. (That's the actual definition of yyscan_t, whose actual contents are supposed to be opaque.)

I hope the rest of the example is self-evident, but please feel free to ask for clarification if there is anything which you don't understand.

file recur.l

%{
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
  #include "recur.tab.h"
%}

%option reentrant bison-bridge 
%option noinput nounput nodefault noyywrap
%option yylineno

%%
"echo"       { return T_ECHO; }
"eval"       { return T_EVAL; }
[[:alpha:]][[:alnum:]]*  {
               yylval->text = strdup(yytext);
               return ID;
             }
["]          { yyerror(yyscanner, "Unterminated string constant"); }
["][^"\n]*["] {
               yylval->text = malloc(yyleng - 1);
               memcpy(yylval->text, yytext + 1, yyleng - 2);
               yylval->text[yyleng - 2] = '\0';
               return STRING;
             }
"."          { return yytext[0]; }
[[:digit:]]*("."[[:digit:]]*)? {
               yylval->number = strtod(yytext, NULL);
               return NUMBER;
             }
[ \t]+       ;
.|\n         { return yytext[0]; }

%%
/* Use "-" or NULL to parse stdin */
int parseFile(const char* path) {
  FILE* in = stdin;
  if (path && strcmp(path, "-") != 0) {
    in = fopen(path, "r");
    if (!in) {
      fprintf(stderr, "Could not open file '%s'\n", path);
      return 1;
    }
  }
  yyscan_t scanner;
  yylex_init (&scanner);
  yyset_in(in, scanner);
  int rv = yyparse(scanner);
  yylex_destroy(scanner);
  if (in != stdin) fclose(in);
  return rv;
}
void yyerror(yyscan_t yyscanner, const char* msg) {
  fprintf(stderr, "At line %d: %s\n", yyget_lineno(yyscanner), msg);
}

file recur.y

%code {
  #include <stdio.h>
}
%define api.pure full
%param { scanner_t context }
%union {
  char*  text;
  double number;
}
%code requires {
  int parseFILE(FILE* in);
}
%token ECHO "echo" EVAL "eval"
%token STRING ID NUMBER
%%
program: %empty | program command '\n'
command: echo | eval | %empty
echo: "echo" STRING  { printf("%s\n", $2); }
eval: "eval" STRING  { FILE* f = fopen($2, "r");
                       if (f) {
                         parseFILE(f);
                         close(f);
                       }
                       else {
                         fprintf(stderr, "Could not open file '%s'\n",
                                         $2);
                         YYABORT;
                       }
                     }

%%