4
votes

Consider this lex.l file:

%{
#include "y.tab.h"
%}

digit         [0-9]
letter        [a-zA-Z]

%%
"+"                  { return PLUS;       }
"-"                  { return MINUS;      }
"*"                  { return TIMES;      }
"/"                  { return SLASH;      }
"("                  { return LPAREN;     }
")"                  { return RPAREN;     }
";"                  { return SEMICOLON;  }
","                  { return COMMA;      }
"."                  { return PERIOD;     }
":="                 { return BECOMES;    }
"="                  { return EQL;        }
"<>"                 { return NEQ;        }
"<"                  { return LSS;        }
">"                  { return GTR;        }
"<="                 { return LEQ;        }
">="                 { return GEQ;        }
"begin"              { return BEGINSYM;   }
"call"               { return CALLSYM;    }
"const"              { return CONSTSYM;   }
"do"                 { return DOSYM;      }
"end"                { return ENDSYM;     }
"if"                 { return IFSYM;      }
"odd"                { return ODDSYM;     }
"procedure"          { return PROCSYM;    }
"then"               { return THENSYM;    }
"var"                { return VARSYM;     }
"while"              { return WHILESYM;   }
{letter}({letter}|{digit})* {
                       yylval.id = strdup(yytext);
                       return IDENT;      }
{digit}+             { yylval.num = atoi(yytext);
                       return NUMBER;     }
[ \t\n\r]            /* skip whitespace */
.                    { printf("Unknown character [%c]\n",yytext[0]);
                       return UNKNOWN;    }
%%

int yywrap(void){return 1;}

In this example identifier cannot be the reserved word, because it will be matched above. Is it some technique in this case to allow define identifier as a reserved word?

1

1 Answers

5
votes

I think what you are looking for is a way for the parser to tell the lexer which words are reserved in a given context. That's not easy, though, particularly since the parser frequently reads the lookahead token before any action takes place.

A simpler solution is to set yylval appropriately for any semi-reserved word, and then use productions like this in your parser:

id_or_procedure: IDENTIFIER | PROCSYM;

id_or_conditional: IDENTIFIER | THENSYM | ODDSYM;

That's not easy to maintain, because it requires you to figure out which semi-reserved words are applicable in which contexts. But if you only have a couple of semi-reserved words, and they are only reserved in some very specific contexts, then it is quite workable.