I'm trying to learn how to build a C++ based Lexer/Parser, I'm following multiple tutorials and guides online, my code is mostly built off of those two guides: 1, 2 although I added and tweaked as I saw fit.
First, I had a header file for the lexer:
#ifndef SHELL_VARIABLELEXER_HPP
#define SHELL_VARIABLELEXER_HPP
#if ! defined(yyFlexLexerOnce)
#include <FlexLexer.h>
#endif
#include "ShellVariableParser.hpp"
#include "location.hh"
namespace SHELL
{
/// This overloads the yyFlexLexer class
class ShellVariableLexer : public yyFlexLexer
{
private:
/// a yylval pointer
ShellVariableParser::semantic_type *yylval = nullptr;
/// a ocation pointer
ShellVariableParser::location_type *loc = nullptr;
public:
/// This constructor only creates the location type.
ShellVariableLexer(std::istream *in) : yyFlexLexer(in)
{
loc = new ShellVariableParser::location_type();
}
using FlexLexer::yylex;
/// A declaration for the yylex function, errors if not here!
virtual ShellVariableParser::symbol_type yylex(ShellVariableParser::semantic_type* const lval, ShellVariableParser::location_type* location);
};
}
#endif // ShellVariableLEXER_HPP
Then I defined my .l file, which build fine through flex and g++:
%{
#include <string>
#include "Shell/ShellVariableLexer.hpp"
#undef YY_DECL
#define YY_DECL SHELL::ShellVariableParser::symbol_type SHELL::ShellVariableLexer::yylex(SHELL::ShellVariableParser::semantic_type * const lval, SHELL::ShellVariableParser::location_type *location)
#define yyterminate() return SHELL::ShellVariableParser::make_END(*loc)
#define YY_USER_ACTION loc->step(); loc->columns(yyleng);
%}
%option yylineno
%option yyclass="SHELL::ShellVariableLexer"
%option outfile="src/Shell/ShellVariableLexer.cpp"
%option noyywrap
%option c++
%%
%{
yylval = lval;
%}
[0-9]+\.[0-9]+ {
std::string Tmp(yytext, yyleng);
return SHELL::ShellVariableParser::make_FLOAT(std::stod(Tmp, NULL), *loc);
}
[0-9]+ {
std::string Tmp(yytext, yyleng);
return SHELL::ShellVariableParser::make_INTEGER(std::stoi(Tmp, NULL), *loc);
}
"$" return SHELL::ShellVariableParser::make_DOLLARSIGN(*loc);
"{" return SHELL::ShellVariableParser::make_LBRACE(*loc);
"}" return SHELL::ShellVariableParser::make_RBRACE(*loc);
"+" return SHELL::ShellVariableParser::make_PLUS(*loc);
"-" return SHELL::ShellVariableParser::make_MINUS(*loc);
"*" return SHELL::ShellVariableParser::make_MULTIPLY(*loc);
"/" return SHELL::ShellVariableParser::make_DIVIDE(*loc);
"(" return SHELL::ShellVariableParser::make_LPARAN(*loc);
")" return SHELL::ShellVariableParser::make_RPARAN(*loc);
"\"" return SHELL::ShellVariableParser::make_DQUOTE(*loc);
(?i:ON) return SHELL::ShellVariableParser::make_ON(*loc);
(?i:OFF) return SHELL::ShellVariableParser::make_OFF(*loc);
(?i:TRUE) return SHELL::ShellVariableParser::make_TRUE(*loc);
(?i:FALSE) return SHELL::ShellVariableParser::make_FALSE(*loc);
[ \t]
[a-zA-Z_][0-9a-zA-Z_]+ {
std::string Tmp(yytext, yyleng);
return SHELL::ShellVariableParser::make_VARIABLE(Tmp, *loc);
}
[0-9a-zA-Z_ :/\t]+ {
std::string Tmp(yytext, yyleng);
return SHELL::ShellVariableParser::make_STRING(Tmp, *loc);
}
%%
I then created my .y file, the file goes through bison OK, but fails when compiled using g++, I will not show the whole file here, just the first part, the rules are irrelevant for my error anyway:
%defines "include/Shell/ShellVariableParser.hpp"
%skeleton "lalr1.cc"
%define api.namespace {SHELL}
%define parser_class_name {ShellVariableParser}
%output "src/Shell/ShellVariableParser.cpp"
%define api.value.type variant
%define api.token.constructor
%define parse.assert
// %lex-param {semantic_type* const yylval}
// %lex-param {location_type* location}
%code requires
{
#include <utility>
#include <string>
#include <vector>
#include "ShellVariables.hpp"
#include "Util/IO.hpp"
namespace SHELL
{
class ShellVariableLexer;
}
// The following definitions is missing when %locations isn't used
# ifndef YY_NULLPTR
# if defined __cplusplus && 201103L <= __cplusplus
# define YY_NULLPTR nullptr
# else
# define YY_NULLPTR 0
# endif
# endif
}
%parse-param {ShellVariableLexer &lexer}
%define parse.trace
%define parse.error verbose
%code
{
#include "ShellVariableLexer.hpp"
#undef yylex
#define yylex lexer.yylex
}
%token<double> FLOAT
%token<int64_t> INTEGER
%token<std::string> STRING
%token<std::string> VARIABLE
%token DOLLARSIGN
%token LBRACE
%token RBRACE
%token PLUS
%token MINUS
%token MULTIPLY
%token DIVIDE
%token LPARAN
%token RPARAN
%token DQUOTE
%token ON
%token OFF
%token TRUE
%token FALSE
%token END
%locations
%type <std::pair<uint8_t,VariableType>> All;
%type <std::pair<uint8_t,VariableType>> Bool;
%type <std::pair<uint8_t,VariableType>> Integer;
%type <std::pair<uint8_t,VariableType>> Float;
%type <std::pair<uint8_t,VariableType>> String;
%type <std::string> __string__;
%type <VariableType> variable;
%%
When I compile this I get the following error:
src/Shell/ShellVariableParser.cpp: In member function ‘virtual int SHELL::ShellVariableParser::parse()’:
src/Shell/ShellVariableParser.cpp:494:46: error: no matching function for call to ‘SHELL::ShellVariableParser::basic_symbol<SHELL::ShellVariableParser::by_type>::basic_symbol(int)’
symbol_type yylookahead (yylex ());
According to this link I expected the yylex function generated by bison to be the same as the one defined in the lexer header file. But this is not the case. If I try and manually add those parameters by uncommenting the two %lex-param
lines from the .y file, I get the following error:
src/Shell/ShellVariableParser.cpp: In member function ‘virtual int SHELL::ShellVariableParser::parse()’:
src/Shell/ShellVariableParser.cpp:494:45: error: ‘yylval’ was not declared in this scope
symbol_type yylookahead (yylex (yylval, location));
I'm guessing this might be the right way to do it, except for the variable names yylval
and location
? Or am I missing some other options?
I'm using gcc version 6.2.0 with Ubuntu GLIBC 2.24-3ubuntu2, flex version 2.6.1 and bison version 3.0.4.
EDIT
I still have no idea what is causing this, I tried converting yylex
in the .l file to return ints, and instead of make_
I returned the actual tokens:
%{
#include <string>
#include "Shell/ShellVariableLexer.hpp"
#undef YY_DECL
#define YY_DECL int SHELL::ShellVariableLexer::yylex(SHELL::ShellVariableParser::semantic_type * const lval, SHELL::ShellVariableParser::location_type *location)
#define yyterminate() return SHELL::ShellVariableParser::token::END
#define YY_USER_ACTION loc->step(); loc->columns(yyleng);
%}
%option yylineno
%option yyclass="SHELL::ShellVariableLexer"
%option outfile="src/Shell/ShellVariableLexer.cpp"
%option noyywrap
%option c++
%%
%{
yylval = lval;
%}
[0-9]+\.[0-9]+ {
std::string Tmp(yytext, yyleng);
yylval->build<double>(std::stod(Tmp));
return SHELL::ShellVariableParser::token::FLOAT;
}
[0-9]+ {
std::string Tmp(yytext, yyleng);
yylval->build<int64_t>(std::stoi(Tmp));
return SHELL::ShellVariableParser::token::INTEGER;
}
"$" return SHELL::ShellVariableParser::token::DOLLARSIGN;
"{" return SHELL::ShellVariableParser::token::LBRACE;
"}" return SHELL::ShellVariableParser::token::RBRACE;
"+" return SHELL::ShellVariableParser::token::PLUS;
"-" return SHELL::ShellVariableParser::token::MINUS;
"*" return SHELL::ShellVariableParser::token::MULTIPLY;
"/" return SHELL::ShellVariableParser::token::DIVIDE;
"(" return SHELL::ShellVariableParser::token::LPARAN;
")" return SHELL::ShellVariableParser::token::RPARAN;
"\"" return SHELL::ShellVariableParser::token::DQUOTE;
(?i:ON) return SHELL::ShellVariableParser::token::ON;
(?i:OFF) return SHELL::ShellVariableParser::token::OFF;
(?i:TRUE) return SHELL::ShellVariableParser::token::TRUE;
(?i:FALSE) return SHELL::ShellVariableParser::token::FALSE;
[ \t]
[a-zA-Z_][0-9a-zA-Z_]+ {
yylval->build<std::string>(yytext);
return SHELL::ShellVariableParser::token::VARIABLE;
}
[0-9a-zA-Z_ :/\t]+ {
yylval->build<std::string>(yytext);
return SHELL::ShellVariableParser::token::STRING;
}
%%
in the bison file I just removed the %define api.token.constructor
line and the two %lex-param
lines. It works fine.
So basically all I did was change the return type of yylex, not its arguments! why on earth is this working but the first implementation doesn't? why is the return int
working but the return symbol_type
does not?!