I am writing a compiler for a formatting language and I am writing the bison file. My grammar is correct I think but when I added a recursion rule it and then read the test source file it says that it accepts the rule for the ending tag but that the token is unexpected... The thing is though is that before I added the recursion rule (for some tags inbetween the start and end tag) it worked fine... Here are some details
This is the source file
\begin{document}
\title{test}
\author{test}
\date{21/02/1985}
\pagesetup{35, 80}
\end{document}
This is the bison file
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern int yylex();
extern int yyparse();
extern FILE *yyin;
extern FILE *yyout;
extern int yylineno;
void yyerror(const char*);
int header_status(int,int,int,int,int);
// counters to check nubmer or document properties used, must all become 1
int title = 0;
int author = 0;
int date = 0;
int pgsetup = 0;
int tabsz = 0;
%}
%union{
int iVal;
char* sVal;
}
%error-verbose
%start source
%token <sVal> SLASH
%token <sVal> BLOCK_S BLOCK_E
%token <sVal> DOC LIST ENUM
%token <sVal> TITLE AUTHOR DATE PG_SETUP TAB_SZ SECTION PARAGRAPH ITEM LINE
%token <sVal> LBRACE RBRACE LPAREN RPAREN
%token <sVal> DOCUMENT DIMENSIONS DATE_VAL STRING
%token <iVal> NUMBER
%token <sVal> ERROR_UN ERROR_IL WORD
%%
source
: /* empty */
| entry_point doc_properties txt_properties exit_point
{
if ( header_status(title, author, date, pgsetup, tabsz) == 0 )
printf("\nfail\n"); //YYABORT;
}
;
entry_point
: SLASH BLOCK_S LBRACE DOC RBRACE
;
doc_properties
: /* empty */
| doc_properties header_properties
;
header_properties
: title_property { title++; }
| author_property { author++; }
| date_property { date++; }
| pg_setup_property { pgsetup++; }
| tab_sz_property { tabsz++; }
;
txt_properties
: /* empty */
;
title_property
: SLASH TITLE LBRACE STRING RBRACE
;
author_property
: SLASH AUTHOR LBRACE STRING RBRACE
;
date_property
: SLASH DATE LBRACE DATE_VAL RBRACE
;
pg_setup_property
: SLASH PG_SETUP LBRACE DIMENSIONS RBRACE
;
tab_sz_property
: SLASH TAB_SZ LPAREN NUMBER RPAREN
;
exit_point
: SLASH BLOCK_E LBRACE DOC RBRACE
;
%%
int main (int argc, char* argv[])
{
if ( argc < 2 || argc > 3)
{
fprintf(stdout, "%s: fatal error: needs one or two arguments\n\n\t%s inputFileName [outputFileName]\n\n", argv[0], argv[0]);
}
else if ( argc == 2 )
{
char* fn = (char *)calloc(strlen(argv[1])+12, sizeof(char));
strcpy(fn, argv[1]);
strcat(fn, ".output.txt");
fprintf(stderr, "%s: using default output naming: <%s>\n\n", argv[0], fn);
yyin = fopen(argv[1], "r");
yyout = fopen(fn, "w");
yyparse();
fclose(yyin);
fclose(yyout);
}
else if ( argc == 3 )
{
yyin = fopen(argv[1], "r");
yyout = fopen(argv[2], "w");
yyparse();
fclose(yyin);
fclose(yyout);
}
return 0;
}
void yyerror(const char* str)
{
fprintf(stderr,"syntax error[%d]: %s\n",yylineno, str);
}
int header_status(int title, int author, int date, int pgsetup, int tabsz)
{
if ( title == 1 && author == 1 && date == 1 && pgsetup == 1 && tabsz == 1 )
{
return 1;
}
else
{
if ( title > 1 ) fprintf(stderr,"syntax error: title property was declared more than once\n");
else if ( title < 1 ) fprintf(stderr,"syntax error: title property was not declared (all document properties must be present)\n");
if ( author > 1 ) fprintf(stderr,"syntax error: author property was declared more than once\n");
else if ( author < 1 ) fprintf(stderr,"syntax error: author property was not declared (all document properties must be present)\n");
if ( date > 1 ) fprintf(stderr,"syntax error: date property was declared more than once\n");
else if ( date < 1 ) fprintf(stderr,"syntax error: date property was not declared (all document properties must be present)\n");
if ( pgsetup > 1 ) fprintf(stderr,"syntax error: pagesetup property was declared more than once\n");
else if ( pgsetup < 1 ) fprintf(stderr,"syntax error: pagesetup property was not declared (all document properties must be present)\n");
if ( tabsz > 1 ) fprintf(stderr,"syntax error: title tabsize was declared more than once\n");
else if ( tabsz < 1 ) fprintf(stderr,"syntax error: title tabsize was not declared (all document properties must be present)\n");
return 0;
}
}
My problem I think is in
doc_properties
: /* empty */
| doc_properties header_properties
;
When I had it empty and just
\begin{document}
\end{document}
for the source file it was fine. Specifically the tokens would be
SLASH BLOCK_S LBRACE DOC RBRACE
SLASH BLOCK_E LBRACE DOC RBRACE
When I added the rule with the recursion though when it reached at the 'end' the trace would say that it accepted the rule (lexical) and then it generated a syntax error "unexpected BLOCK_E". The only thing I can think of is that it is expecting some other tag but in the recursion I have the empty as an alternative so why...
Also when I added the final tag
\begin{document}
\title{test}
\author{test}
\date{21/02/1985}
\pagesetup{35, 80}
\tabsize(4)
\end{document}
when it reached at 4 it says that is accepts the rule in the lex file and that rule
return NUMBER;
but it says unexpected $undefined, expecting NUMBER when it just said that it accepts the rule and frankly I don't think it could read anything else...
My question is for the first part though...
If it is any help this is the flex file
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "UnicTextLang.y.tab.h"
#define SAVE_S yylval.sVal = strdup(yytext)
#define SAVE_I yylval.iVal = atoi(yytext)
%}
WS [ \t\n\r]
TAG [a-zA-Z_][a-zA-Z0-9\-_]+
WORD [a-zA-Z0-9`~!@#$%\^&*()\-_=+[\]{}\\|;:'",<.>/?]
NUMBER ([1-9])|([1-9][0-9])|([1-3][0-9][0-9])
DIMEN {NUMBER}{WS}*,{WS}*{NUMBER}
DAY (0[1-9])|([12][0-9])|(3[01])
MONTH (0[1-9])|(1[0-2])
YEAR (19|20)[0-9]{2}
DATE {DAY}\/{MONTH}\/{YEAR}
%option yylineno
%option noyywrap
%option noinput
%option nounput
%option debug
%x PROPERTY
%x VALUE
%x BLOCK
%x NUMBER
%%
^\\|{WS}\\ { BEGIN(PROPERTY); /* fprintf(stdout, "FLEX> BEGINING PROPERTY [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return SLASH; }
{WS}?\{ { BEGIN(VALUE); /* fprintf(stdout, "FLEX> READING PROPERTY VALUE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return LBRACE; }
{WS}?\( { BEGIN(NUMBER); /* fprintf(stdout, "FLEX> READING NUMBER VALUE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return LPAREN; }
{WS} { /* fprintf(stdout, "FLEX> EATING WHITESPACE(i)\n"); */ }
[^ \t\n\r\{(\\][^ \t\n\r]+ { fprintf(stderr, "lexical error[%d]: hingeless word: %s\n", yylineno, yytext); SAVE_S; return WORD; }
. { fprintf(stderr, "lexical error[%d]: illegal character detected: %s\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
<PROPERTY>begin { BEGIN(BLOCK); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return BLOCK_S; }
<PROPERTY>end { BEGIN(BLOCK); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return BLOCK_E; }
<PROPERTY>title { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return TITLE; }
<PROPERTY>author { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return AUTHOR; }
<PROPERTY>date { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return DATE; }
<PROPERTY>pagesetup { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return PG_SETUP; }
<PROPERTY>tabsize { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return TAB_SZ; }
<PROPERTY>section { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return SECTION; }
<PROPERTY>paragraph { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return PARAGRAPH; }
<PROPERTY>item { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return ITEM; }
<PROPERTY>newline { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return LINE; }
<PROPERTY>{TAG} { BEGIN(INITIAL); fprintf(stderr, "lexical error[%d]: |%s| undefined property: expecting property\n", yylineno, yytext); SAVE_S; return ERROR_UN; }
<PROPERTY>{WS} { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> EATING WHITESPACE(p)\n"); */ }
<PROPERTY>[^ \t\n\r\{(]+ { BEGIN(INITIAL); fprintf(stderr, "lexical error[%d]: |%s| undefined property: illegal character detected\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
<PROPERTY>. { fprintf(stderr, "lexical error[%d]: illegal character detected: %s\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
<VALUE>{WS}*{DIMEN}{WS}* { /* fprintf(stdout, "FLEX> \n\tdims: %s\n\n", yytext); */ SAVE_S; return DIMENSIONS; }
<VALUE>{WS}*{DATE}{WS}* { /* fprintf(stdout, "FLEX> \n\tdate: %s\n\n", yytext); */ SAVE_S; return DATE_VAL; }
<VALUE>[^}]* { /* fprintf(stdout, "FLEX> \n\tstrg: %s\n\n", yytext); */ SAVE_S; return STRING; }
<VALUE>\} { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> FINISHED READING PROPERTY VALUE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return RBRACE; }
<VALUE>. { fprintf(stderr, "lexical error[%d]: illegal character detected: %s\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
<NUMBER>{WS}*{NUMBER}{WS}* { /* fprintf(stdout, "FLEX> \n\tnumb: %s\n\n", yytext); */ SAVE_I; return NUMBER; }
<NUMBER>[^)]* { fprintf(stderr, "lexical error[%d]: |%s| illegal value: expecting number(1-399)\n", yylineno, yytext); SAVE_S; return STRING; }
<NUMBER>\) { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> FINISHED READING NUMBER VALUE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return RPAREN; }
<NUMBER>. { fprintf(stderr, "lexical error[%d]: illegal character detected: %s\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
<BLOCK>{WS}?\{ { /* fprintf(stdout, "FLEX> READING BLOCK TYPE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return LBRACE; }
<BLOCK>{WS}*document{WS}* { /* fprintf(stdout, "FLEX> \n\tresv: %s\n\n", yytext); */ SAVE_S; return DOC; }
<BLOCK>{WS}*itemize{WS}* { /* fprintf(stdout, "FLEX> \n\tresv: %s\n\n", yytext); */ SAVE_S; return LIST; }
<BLOCK>{WS}*enumerate{WS}* { /* fprintf(stdout, "FLEX> \n\tresv: %s\n\n", yytext); */ SAVE_S; return ENUM; }
<BLOCK>[^{}]* { fprintf(stderr, "lexical error[%d]: |%s| undefined block type: expecting block type\n", yylineno, yytext); SAVE_S; return ERROR_UN; }
<BLOCK>\} { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> FINISHED READING BLOCK TYPE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return RBRACE;}
<BLOCK>. { fprintf(stderr, "lexical error[%d]: illegal character detected: %s\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
%%