2
votes

I'm trying to write a simple parser for a meta programming language. Everything works fine, but I want to use ';' as statement delimiter and not newline or ommit the semicolon entirely.

So this is the expected behaviour:

// good code
v1 = v2;
v3 = 23;

should parse without errors

But:

// bad code
v1 = v2
v3 = 23;

should fail

yet if I remove the 'empty' rule from separator both codes fail like this:

ID to ID
Error detected in parsing: syntax error, unexpected ID, expecting SEMICOLON
;

If I leave the 'empty' rule active, then both codes are accepted, which is not desired.

ID to ID   // should raise error
ID to NUM;

Any help is welcome here, as most tutorials do not cover delimiters at all.

Here is a simplified version of my parser/lexxer:

parser.l:

%{
#include "parser.tab.h"
#include<stdio.h>
%}

num      [0-9]
alpha    [a-zA-Z_]
alphanum [a-zA-Z_0-9]
comment "//"[^\n]*"\n"
string  \"[^\"]*\"
whitespace [ \t\n]

%x ML_COMMENT


%%
<INITIAL>"/*"               {BEGIN(ML_COMMENT); printf("/*");}
<ML_COMMENT>"*/"            {BEGIN(INITIAL); printf("*/");}  
<ML_COMMENT>[.]+            { }
<ML_COMMENT>[\n]+           { printf("\n"); }
{comment}+                  {printf("%s",yytext);}

{alpha}{alphanum}+          { yylval.str= strdup(yytext); return ID;}
{num}+                      { yylval.str= strdup(yytext); return NUM;}
{string}                    { yylval.str= strdup(yytext); return STRING;}

';'                         {return SEMICOLON;}
"="                         {return ASSIGNMENT;}
" "+                        { }
<<EOF>>                     {exit(0); /* this is suboptimal */}
%%

parser.y:

%{
#include<stdio.h>
#include<string.h>
%}

%error-verbose

%union{
        char *str;
}

%token  <str> ID
%token  <str> NUM
%token  <str> STRING
%left SEMICOLON
%left ASSIGNMENT

%start input

%%
input:  /* empty */
        | expression separator input
;

expression: assign 
            | error {}
;

separator:  SEMICOLON
            | empty
;

empty:      
;

assign:   ID ASSIGNMENT ID      { printf("ID to ID"); }
        | ID ASSIGNMENT STRING  { printf("ID to STRING"); }
        | ID ASSIGNMENT NUM     { printf("ID to NUM"); }
;

%%

yyerror(char* str)
{
        printf("Error detected in parsing: %s\n", str);
}

main()
{
        yyparse();
}

Compiled like this:

$>flex -t parser.l > parser.lex.yy.c
$>bison -v -d parser.y
$>cc parser.tab.c parser.lex.yy.c -lfl -o parser
1

1 Answers

3
votes

Never mind... the problematic line was this one:

';'                         {return SEMICOLON;}

which required to be changed to

";"                         {return SEMICOLON;}

Now the behaviour is correct. :-)