0
votes

I am using flex and bison to create the lexical analyzer and parser for a programming language, but when I compile my program I get a warning

rule cannot be matched

for my lex.l, specifically for my "var", "function", "if", "else", and "while" tokens in my lexer (lex.l file).

I am attaching both my lex.l and parse.y files, which represents my lexer and parser respectively. Any insight would be much appreciated

lex.l

%{
#include "util.h"
#include "node.h"
#include "parse.tab.h"

void yyerror(const char *fmt, ...);
int create_token(int tag, const char *lexeme);

// global variable pointing to string containing name of input file
//var a b
//a + b
char *g_srcfile;
%}

%option yylineno
%option noyywrap

%%

[A-Za-z][A-Za-z0-9]*         { return create_token(TOK_IDENTIFIER, yytext); }
       
"-"?(0|[1-9][0-9]*)("."[0-9]*)?((e|E)("+"|"-")?[0-9]+)? {
                      return create_token(TOK_NUMBER, yytext); }

"("                 { return create_token(TOK_LPARENTHESIS, yytext); }
")"                 { return create_token(TOK_RPARENTHESIS, yytext); }
"{"                 { return create_token(TOK_LCURLY_BRACE, yytext); }
"}"                 { return create_token(TOK_RCURLY_BRACE, yytext); }

";"                 { return create_token(TOK_COLON, yytext); }
","                 { return create_token(TOK_COMMA, yytext); }

"+"                 { return create_token(TOK_ADD, yytext); }
"-"                 { return create_token(TOK_SUBTRACT, yytext); }
"*"                 { return create_token(TOK_MULTIPLY, yytext); }
"/"                 { return create_token(TOK_DIVIDE, yytext); }


"=="                 { return create_token(TOK_CHECK_EQUAL, yytext); }
"!="                 { return create_token(TOK_NOT_EQUAL, yytext); }
"<"                 { return create_token(TOK_LESS, yytext); }
">"                 { return create_token(TOK_GREATER, yytext); }
"<="                 { return create_token(TOK_LESS_EQUAL, yytext); }
">="                 { return create_token(TOK_GREATER_EQUAL, yytext); }

"&&"                 { return create_token(TOK_AND, yytext); }
"||"                 { return create_token(TOK_OR, yytext); }
"="                 { return create_token(TOK_EQUAL, yytext); }

"var"                 { return create_token(TOK_VAR, yytext); }
"function"                 { return create_token(TOK_FUNCTION, yytext); }
"if"                 { return create_token(TOK_IF, yytext); }
"else"                 { return create_token(TOK_ELSE, yytext); }
"while"                 { return create_token(TOK_WHILE, yytext); }

"//"                 { return create_token(TOK_COMMENT, yytext); }

[ \t\r\n\v]+        { /* ignore whitespace */ }

.                            { yyerror("Unknown character: %c\n", yytext[0]); }


%%

void lexer_set_source_file(const char *filename) {
  g_srcfile = xstrdup(filename);
}

int create_token(int tag, const char *lexeme) {
  yylval.node = node_alloc_str_copy(tag, yytext);

  return tag;
}

Here is my parser

parse.y

%{
#include <stdio.h>
#include <stdarg.h>
#include "util.h"
#include "node.h"
#include "grammar_symbols.h"

int yylex(void);
void yyerror(const char *fmt, ...);

// global variable to point to the root of the parse tree
struct Node *g_translation_unit;
struct Node *g_expression;
struct Node *g_definition;
struct Node *g_parse_tree;
%}

%union {
    struct Node *node;
}

/* TODO: define terminal and nonterminal symbols */

%token<node> TOK_IDENTIFIER

%type<node> translation_unit
%type<node> definition

%type<node> expression variable_declaration_statement if_statement
%type<node> if_else_statement while_statement unary_expression primary_expression
%type<node> parenthesized_subexpression function_call opt_argument_list
%type<node> statement opt_parameter_list statement_list function
%type<node> expression_statement 

%token<node> TOK_LPARENTHESIS TOK_RPARENTHESIS TOK_LCURLY_BRACE TOK_RCURLY_BRACE 
%token<node> TOK_COLON TOK_COMMA
%token<node> TOK_NUMBER TOK_STRING_LITERAL
%token<node> TOK_ADD TOK_SUBTRACT TOK_MULTIPLY TOK_DIVIDE 
%token<node> TOK_CHECK_EQUAL TOK_NOT_EQUAL TOK_LESS TOK_GREATER TOK_LESS_EQUAL TOK_GREATER_EQUAL TOK_AND TOK_OR TOK_EQUAL
%token<node> TOK_VAR TOK_FUNCTION TOK_IF TOK_ELSE TOK_WHILE 
%token<node> TOK_COMMENT


%%

/* TODO: add actual grammar rules */
translation_unit
    : definition { g_translation_unit = $$ = node_build1(NODE_translation_unit, $1); }
    ;

definition
    : statement { g_definition = $$ = node_build1(NODE_definition, $1); }
  | function { $$ = g_definition = node_build1(NODE_definition, $1); }
    ;


... deleted not related code



%%

void yyerror(const char *fmt, ...) {
  extern char *g_srcfile;
  extern int yylineno, g_col;

  va_list args;

  va_start(args, fmt);
  int error_col = 1; // TODO: determine column number
  fprintf(stderr, "%s:%d:%d: Error: ", g_srcfile, yylineno, error_col);
  verr_fatal(fmt, args);
  va_end(args);
}

1
You can make life a lot simpler for yourself by treating all the single special characters literally and removing all their rules. Just use . return yytext[0]; at the bottom of the lexer, and use '=', '<', '>' etc. literally in the grammar rules.user207421

1 Answers

2
votes

The keywords can't be matched because your identifier rule comes first in your file. Every keyword is also a valid identifier; you need the keyword rule to take priority, which it will do if it comes earlier.