0
votes

I am trying to write scanner and parser, using flex and bison, to parse a simple script file. I am able to parse only first line of the script file. After that, program terminates with an error ‘yyerror : syntax error’. How do I make my program to proceed further to next line in the script?

I am using Window7 with below compilation option.

compilation:

flex lex.l  
bison -d yacc.y  
g++ lex.yy.c yacc.tab.c -lfl -o scanner.exe

Here I am attaching .l, .y file and my script.txt file.

file:lex.l

%{
#include <iostream>
#include <stdio.h>
#include "yacc.tab.h"
#define YY_DECL extern "C" int yylex()

using namespace std;
%}

DOT             "."
COLON           ":"
SEMICOLON       ";"
COMMA           ","
ANGLE_LEFT      "<"
ANGLE_RIGHT     ">"
AT              "@"
EQUAL           "="
SQUARE_OPEN     "["
SQUARE_CLOSE    [^\\]"]"
OPENBRACE       "\("
CLOSEBRACE      "\)"
QUOTE           "\""
QUOTE_OPEN      "\""
QUOTE_CLOSE     [^\\]"\""
SPACE           " "
TAB             "\t"
CRLF            "\r\n"
QUOTED_PAIR     "\\"[^\r\n]
DIGIT           [0-9]
ALPHA           [a-zA-Z]
QTEXT           [0-9a-zA-Z!#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]
%%

{SPACE}*{OPENBRACE}{SPACE}*     { return TOK_OPENBRACE; }

{SPACE}*{CLOSEBRACE}{SPACE}*    { return TOK_CLOSEBRACE; }

{SPACE}*{SEMICOLON}{SPACE}*     { return TOK_SEMICOLON; }

{SPACE}*{COMMA}{SPACE}*         { return TOK_COMMA; }

{QUOTE_OPEN}({SPACE}*{QTEXT}*{QUOTED_PAIR}*)*{QUOTE_CLOSE}  {
                                yylval.sval = &yytext[1];
                                yylval.sval[strlen(yylval.sval) - 1] = '\0';
                                return TOK_QUOTED_STRING;
                                }

{DIGIT}+                        {
                                yylval.lval = atoi(yytext);
                                return TOK_LONG;
                                }

"true"|"false"                  {
                                yylval.ival = ((0 == strcmp(yytext, "true")) ? 1 : 0 );
                                return TOK_BOOL;
                                }
^"function1"                    { return TOK_FUNC1; }
^"function2"                    { return TOK_FUNC2; }
^"function3"                    { return TOK_FUNC3; }

^{CRLF}                         { return TOK_EMPTY_LINE; }
{CRLF}                          {}
.                               {}/* ignore unknown chars */

file: yacc.y

%{
#include <iostream>
#include <stdio.h>

using namespace std;

extern "C" int yylex();
extern "C" FILE *yyin;

int yyerror(const char *s);
%}

// Symbols.
%union
{
    char    *sval;
    long    lval;
    int        ival;
};

%token TOK_FUNC1
%token TOK_FUNC2
%token TOK_FUNC3

%token <sval> TOK_QUOTED_STRING
%token <lval> TOK_LONG
%token <ival> TOK_BOOL

%token TOK_SEMICOLON
%token TOK_OPENBRACE
%token TOK_CLOSEBRACE
%token TOK_COMMA
%token TOK_EMPTY_LINE

%start program
%%

program    :    func1
        |    func2
        |    func3
        |   empty_line
        ;

func1 :   TOK_FUNC1 TOK_OPENBRACE TOK_QUOTED_STRING TOK_COMMA TOK_LONG TOK_COMMA TOK_BOOL TOK_CLOSEBRACE TOK_SEMICOLON
                    {
                    cout << "function1:" << $3 << " " << $5 << " " << $7;
                    }

func2 : TOK_FUNC2 TOK_OPENBRACE TOK_QUOTED_STRING TOK_COMMA TOK_LONG TOK_COMMA TOK_BOOL TOK_CLOSEBRACE TOK_SEMICOLON
                    {
                        cout << "function2:" << $3 << " " << $5 << " " << $7;
                    }

func3 : TOK_FUNC3 TOK_OPENBRACE TOK_QUOTED_STRING TOK_COMMA TOK_LONG TOK_COMMA TOK_BOOL TOK_CLOSEBRACE TOK_SEMICOLON
                    {
                        cout << "function3:" << $3 << " " << $5 << " " << $7;
                    }

empty_line : TOK_EMPTY_LINE
                    {
                    }
%%

int yyerror(const char *s) {
  cout << "yyerror : " << s << endl;
}

int main(void) {
    FILE * pt = fopen("script.txt", "r" );
    yyin = pt;
        yyparse();      
}

file: script.txt

function1("scanner1", 1234, true );
function2("scanner2", 4321, false );
function3("scanner3", 0123, true );

output:

function1:scanner1 1234 1
yyerror : syntax error

expected output:

function1:scanner1 1234 1
function2:scanner2 4321 0
function3:scanner3 0123 1
2

2 Answers

0
votes

Your grammar's start production, program, only accepts a single line (a func1, a func2, a func3, or an empty line). So it doesn't accept a second line.

You have a number of other problems.

First of all, flex uses the standard C I/O library, opening files in ascii mode, so I don't believe it will ever see the \r in a CRLF, even on windows.

Second, yylval.sval = &yytext[1]; does not make a copy of the contents of yytext. yytext belongs to flex, so you shouldn't modify it; also, you need to be aware that flex may modify it as soon as your lexer returns a value. So you do need to make a copy, perhaps using strdup, and then you need to make sure that you free the copy. (See this bison FAQ entry)

0
votes

Here is modified bison file yacc.y

    %{
#include <iostream>
#include <stdio.h>

using namespace std;

extern "C" int yylex();
extern "C" FILE *yyin;

int yyerror(const char *s);
%}

// Symbols.
%union
{
    char    *sval;
    long    lval;
    int        ival;
};

%token TOK_FUNC1
%token TOK_FUNC2
%token TOK_FUNC3

%token <sval> TOK_QUOTED_STRING
%token <lval> TOK_LONG
%token <ival> TOK_BOOL

%token TOK_SEMICOLON
%token TOK_OPENBRACE
%token TOK_CLOSEBRACE
%token TOK_COMMA
%token TOK_EMPTY_LINE

%start program
%%

program : funcs
        ;

funcs   : funcs func
        | func
        ;

func :  TOK_FUNC1 TOK_OPENBRACE TOK_QUOTED_STRING TOK_COMMA TOK_LONG TOK_COMMA TOK_BOOL TOK_CLOSEBRACE TOK_SEMICOLON
        {
        cout << "function1:" << $3 << " " << $5 << " " << $7;
        }
        | TOK_FUNC2 TOK_OPENBRACE TOK_QUOTED_STRING TOK_COMMA TOK_LONG TOK_COMMA TOK_BOOL TOK_CLOSEBRACE TOK_SEMICOLON
        {
            cout << "function2:" << $3 << " " << $5 << " " << $7;
        }
        | TOK_FUNC3 TOK_OPENBRACE TOK_QUOTED_STRING TOK_COMMA TOK_LONG TOK_COMMA TOK_BOOL TOK_CLOSEBRACE TOK_SEMICOLON
        {
            cout << "function3:" << $3 << " " << $5 << " " << $7;
        }
        | TOK_EMPTY_LINE
        {
        }
%%

int yyerror(const char *s) {
  cout << "yyerror : " << s << endl;
}

int main(void) {
    FILE * pt = fopen("script.txt", "r" );
    yyin = pt;
        yyparse();      
}