1
votes

I am parsing the following file:

BEGIN BLOCK BLK_ROWDEC            
        NAME                          cell_rowdec
        SIZE                          UNI_rowdecSize
        ITERATE                       itr_rows
        DIRECTION                     lgDir_rowdec
        STRAP                         STRD1,STRD3,STRD2
        WRAP                          WRD1
        VIA                           VIAB,VIAC,VIAD
ENDS BLK_ROWDEC

My flex and bison file are as follows:

lexa.l

%{
#include <iostream>
#include <stdio.h>
const char s[2] = " ";
#include "yacc.tab.h"
char *token;
#define YY_DECL extern "C" int yylex()
int line_num = 1;



using namespace std;
%}

DOT             "."
COLON           ":"
SEMICOLON       ";"
COMMA           ","
ANGLE_LEFT      "<"
ANGLE_RIGHT     ">"
AT              "@"
EQUAL           "="
SQUARE_OPEN     "["
SQUARE_CLOSE    [^\\]"]"
OPENBRACE       "\("
CLOSEBRACE      "\)"
QUOTE           "\""
QUOTE_OPEN      "\""
QUOTE_CLOSE     [^\\]"\""
SPACE           " "
TAB             "\t"
CRLF            "\r\n"
QUOTED_PAIR     "\\"[^\r\n]
DIGIT           [0-9]
ALPHA           [a-zA-Z]
QTEXT           [0-9a-zA-Z!#$%&'()*+,\-.\/:;<=>?@\[\]^_`{|}~]


%%



[ \t]         ;


^BEGIN(.*)\r?\n+\s*BEGIN(.*)     {   printf("\nError : two continous BEGIN is not allowed : "); }

^ENDS(.*)\r?\n+\s*ENDS(.*)   {   printf("\nError : two continous END is not allowed : \n"); }




NAME          {  yylval.sval = strdup(yytext);
    return TOK_NAME; }

SIZE          { yylval.sval = strdup(yytext);
      return TOK_SIZE; }

ITERATE       { yylval.sval = strdup(yytext);
       return TOK_ITERATE; }

DIRECTION   { yylval.sval = strdup(yytext);
      return TOK_DIRECTION; }

STRAP       { yylval.sval = strdup(yytext);
      return TOK_STRAP; }

WRAP        { yylval.sval = strdup(yytext);
      return TOK_WRAP; }

VIA     { yylval.sval = strdup(yytext);
      return TOK_VIA; }

ENDS        { yylval.sval = strdup(yytext);
      return TOK_END; }

BEGIN       { yylval.sval = strdup(yytext);
      return TOK_BEGIN; }

BLOCK       { yylval.sval = strdup(yytext);
      return TOK_BLOCK; }




[a-zA-Z0-9_,]+    { yylval.sval = strdup(yytext);
      return TOK_STRING; }


{SPACE}*          { return TOK_SPACE; }




^ENDS(.*)$        {}


^{CRLF}                         { return TOK_EMPTY_LINE; }
{CRLF}                          {}
.                               {}/* ignore unknown chars */
\n                      { ++line_num; return ENDL; }

yacca.y

%{
#include <cstdio> 
#include <cstring>
#include <iostream>
#include <stdio.h>

#define YYDEBUG 1

using namespace std;

extern "C" int yylex();
extern "C" FILE *yyin;
extern int line_num;


void yyerror(const char* s);
%}

// Symbols.
%union
{
    char* sval;
};

%token <sval> TOK_NAME
%token <sval> TOK_SIZE
%token <sval> TOK_STRING
%token <sval> TOK_ITERATE
%token <sval> TOK_DIRECTION
%token <sval> TOK_STRAP
%token <sval> TOK_WRAP
%token <sval> TOK_VIA
%token <sval> TOK_EMPTY_LINE 
%token <sval> TOK_BLOCK
%token <sval> TOK_LINE
%token <sval> TOK_BEGIN
%token <sval> TOK_END
%token TOK_SPACE
%token END ENDL



%%

language : program ;


program : block
| program block
;

block   : TOK_BEGIN TOK_BLOCK TOK_SPACE TOK_STRING blockcontents TOK_END TOK_SPACE TOK_STRING 
  {
    cout << endl << "SHAILAVI" << $4 << " ";

  }
;


blockcontents : item
      | blockcontents item
      ;


item    :   TOK_SPACE TOK_NAME TOK_SPACE TOK_STRING         
    { 
        cout << endl << "Value:" << $2 << "->" << $4 << "  "; 
    }
    | TOK_SPACE TOK_SIZE TOK_SPACE TOK_STRING        { cout << $2 << "->" << $4 << "  "; }
    | TOK_SPACE TOK_ITERATE TOK_SPACE TOK_STRING     { cout << $2 << "->" << $4 << "  ";  }
    | TOK_SPACE TOK_DIRECTION TOK_SPACE TOK_STRING   { cout << $2 << "->" << $4 << "  " << endl; }
    | TOK_SPACE TOK_STRAP TOK_SPACE TOK_STRING       { cout << "ref:" << $2 << "->" << $4 << "  "; }
    | TOK_SPACE TOK_WRAP TOK_SPACE TOK_STRING    { cout << $2 << "->" << $4 << "  "; }
    | TOK_SPACE TOK_VIA TOK_SPACE TOK_STRING     { cout << $2 << "->" << $4 << "  " << endl; }
    ;



%%



int main(void) {
    FILE * pt = fopen("file", "r" );
    if(!pt)
    {
    cout << "Bad Input.Noexistant file" << endl;
    return -1;
    }
    yyin = pt;
    do
    {
    yydebug = 1;
        yyparse();
    }while (!feof(yyin));      
}
void yyerror(const char *s) {
    cout << "parse error on line " << line_num << "!  Message: " << s << endl;
    exit(-1);
}


#include "lex.yy.c"

Compilation steps:

flex lexa.l
bison -d yacca.y
g++ yacca.tab.c -lfl -o scanner.exe

At the time of execution it gives syntax error near blockcontents

Please help me to identify the mistake I have done.

Thank You so much.

1

1 Answers

2
votes

It took me a while, but I've found the flaw.

In your lexer, you skip all sequences of tabs and blanks (first rule). But your parser expects white space every now and then. Hence the syntax error.

Since you don't do anything with the white space, simply eat them within the lexer (as you already do now actually, but it is better to eliminate the {SPACE}* rule too) and eliminate the TOK_SPACE in the parser.

---- edit to give some hints ----

What I did to track down the bug is:

  1. make the lexer verbose I added (hash signs omitted; it confuses the renderer for some reason)

         #ifdef DEBUG
         #define RETURN(x)       cerr << "\n--> found " << #x << "\n"; return x;
         #else
         #define RETURN(x)       return x;
         #endif

and replaced all "return something" by RETURN(something)

  1. I compile the bison/flex files separately and link them afterwards

    flex lexa.l && \
    bison -d yacca.y && \
    g++ -c -DDEBUG -I . lex.yy.c && \
    g++ -c -I . yacca.tab.c && \
    g++ lex.yy.o yacca.tab.o -o scanner

(working on linux here)

  1. As requested the working example

    %{
    #include <cstdio>
    #include <cstring>
    #include <iostream>
    #include <stdio.h>

    #define YYDEBUG 1

    using namespace std;

    extern "C" int yylex();
    extern "C" FILE *yyin;
    extern int line_num;


    void yyerror(const char* s);
    %}

    // Symbols.
    %union
    {
        char* sval;
    };

    %token  TOK_NAME
    %token  TOK_SIZE
    %token  TOK_STRING
    %token  TOK_ITERATE
    %token  TOK_DIRECTION
    %token  TOK_STRAP
    %token  TOK_WRAP
    %token  TOK_VIA
    %token  TOK_EMPTY_LINE
    %token  TOK_BLOCK
    %token  TOK_LINE
    %token  TOK_BEGIN
    %token  TOK_END
    %token END ENDL

    %%

    language : program ;


    program : block
    | program block
    ;

    block   : TOK_BEGIN TOK_BLOCK TOK_STRING blockcontents TOK_END TOK_STRING 
      {
        cout << endl << "SHAILAVI" << $3 << " ";

      }
    ;


    blockcontents : item
          | blockcontents item
          ;


    item    :   TOK_NAME TOK_STRING         { cout << endl << "Value:" << $1 << "->" << $2 << "  "; }
        | TOK_SIZE TOK_STRING        { cout << $1 << "->" << $2 " << $2 " << $2 " << $2 << "  "; }
        | TOK_WRAP TOK_STRING    { cout << $1 << "->" << $2 << "  "; }
        | TOK_VIA TOK_STRING     { cout << $1 << "->" << $2 << "  " << endl; }
        ;

    %%

    int main(void) {
        FILE * pt = fopen("./input", "r" );
        if(!pt)
        {
        cout << "Bad Input.Nonexistent file" << endl;
        return -1;
        }
        yyin = pt;
        do
        {
        yydebug = 1;
            yyparse();
        }while (!feof(yyin));
    }
    void yyerror(const char *s) {
        cout << "parse error on line " << line_num << "!  Message: " << s << endl;
        exit(-1);
    }
    extern "C" int yywrap()
    {
            return (1 == 1);
    }

And the lexer


    %{
    #include 
    #include 
    const char s[2] = " ";
    #include "yacca.tab.h"
    char *token;
    #define YY_DECL extern "C" int yylex()
    int line_num = 1;

    #ifdef DEBUG
    #define RETURN(x)       cerr << "\n--> found " << #x << "\n"; return x;
    #else
    #define RETURN(x)       return x;
    #endif


    using namespace std;
    %}

    DOT             "."
    COLON           ":"
    SEMICOLON       ";"
    COMMA           ","
    ANGLE_LEFT      ""
    AT              "@"
    EQUAL           "="
    SQUARE_OPEN     "["
    SQUARE_CLOSE    [^\\]"]"
    OPENBRACE       "\("
    CLOSEBRACE      "\)"
    QUOTE           "\""
    QUOTE_OPEN      "\""
    QUOTE_CLOSE     [^\\]"\""
    SPACE           " "
    TAB             "\t"
    CRLF            "\r\n"
    QUOTED_PAIR     "\\"[^\r\n]
    DIGIT           [0-9]
    ALPHA           [a-zA-Z]
    QTEXT           [0-9a-zA-Z!#$%&'()*+,\-.\/:;?@\[\]^_`{|}~]


    /* [ \t]         ; */
    %%

    ^BEGIN(.*)\r?\n+\s*BEGIN(.*)     {   printf("\nError : two continous BEGIN is not allowed : "); }

    ^ENDS(.*)\r?\n+\s*ENDS(.*)   {   printf("\nError : two continous END is not allowed : \n"); }




    NAME          {  yylval.sval = strdup(yytext);
        RETURN(TOK_NAME); }

    SIZE          { yylval.sval = strdup(yytext);
          RETURN(TOK_SIZE); }

    ITERATE       { yylval.sval = strdup(yytext);
           RETURN(TOK_ITERATE); }

    DIRECTION   { yylval.sval = strdup(yytext);
          RETURN(TOK_DIRECTION); }

    STRAP       { yylval.sval = strdup(yytext);
          RETURN(TOK_STRAP); }

    WRAP        { yylval.sval = strdup(yytext);
          RETURN(TOK_WRAP); }

    VIA     { yylval.sval = strdup(yytext);
          RETURN(TOK_VIA); }

    ENDS        { yylval.sval = strdup(yytext);
          RETURN(TOK_END); }

    BEGIN       { yylval.sval = strdup(yytext);
          RETURN(TOK_BEGIN); }

    BLOCK       { yylval.sval = strdup(yytext);
          RETURN(TOK_BLOCK); }


    [a-zA-Z0-9_,]+    { yylval.sval = strdup(yytext); RETURN(TOK_STRING); }

    ^ENDS(.*)$        {}


    ^{CRLF}                         { RETURN(TOK_EMPTY_LINE); }
    {CRLF}                          {}
    .                               {}/* ignore unknown chars */
    \n                      { ++line_num; /* RETURN(ENDL); */ }

There's only one problem left. It doesn't really like the EOF. I'll leave that as an exercise.