3
votes

I'm trying to learn how to build a C++ based Lexer/Parser, I'm following multiple tutorials and guides online, my code is mostly built off of those two guides: 1, 2 although I added and tweaked as I saw fit.

First, I had a header file for the lexer:

#ifndef SHELL_VARIABLELEXER_HPP
#define SHELL_VARIABLELEXER_HPP

#if ! defined(yyFlexLexerOnce)
#include <FlexLexer.h>
#endif

#include "ShellVariableParser.hpp"
#include "location.hh"

namespace SHELL
{
    /// This overloads the yyFlexLexer class
    class ShellVariableLexer : public yyFlexLexer
    {
        private:
           /// a yylval pointer
           ShellVariableParser::semantic_type *yylval = nullptr;
           /// a ocation pointer
           ShellVariableParser::location_type *loc    = nullptr;
        public:
            /// This constructor only creates the location type.
            ShellVariableLexer(std::istream *in) : yyFlexLexer(in)
            {
                loc = new ShellVariableParser::location_type();
            }

            using FlexLexer::yylex;

            /// A declaration for the yylex function, errors if not here!
            virtual ShellVariableParser::symbol_type yylex(ShellVariableParser::semantic_type* const lval, ShellVariableParser::location_type* location);
    };
}

#endif // ShellVariableLEXER_HPP

Then I defined my .l file, which build fine through flex and g++:

%{    
#include <string>
#include "Shell/ShellVariableLexer.hpp"

#undef YY_DECL
#define YY_DECL SHELL::ShellVariableParser::symbol_type SHELL::ShellVariableLexer::yylex(SHELL::ShellVariableParser::semantic_type * const lval, SHELL::ShellVariableParser::location_type *location)
#define yyterminate() return SHELL::ShellVariableParser::make_END(*loc)
#define YY_USER_ACTION loc->step(); loc->columns(yyleng);

%}

%option yylineno
%option yyclass="SHELL::ShellVariableLexer"
%option outfile="src/Shell/ShellVariableLexer.cpp"
%option noyywrap
%option c++

%%
%{
    yylval = lval;
%}

[0-9]+\.[0-9]+          {
                            std::string Tmp(yytext, yyleng);
                            return SHELL::ShellVariableParser::make_FLOAT(std::stod(Tmp, NULL), *loc);
                        }
[0-9]+                  {
                            std::string Tmp(yytext, yyleng);
                            return SHELL::ShellVariableParser::make_INTEGER(std::stoi(Tmp, NULL), *loc);
                        }
"$"                     return SHELL::ShellVariableParser::make_DOLLARSIGN(*loc);
"{"                     return SHELL::ShellVariableParser::make_LBRACE(*loc);
"}"                     return SHELL::ShellVariableParser::make_RBRACE(*loc);
"+"                     return SHELL::ShellVariableParser::make_PLUS(*loc);
"-"                     return SHELL::ShellVariableParser::make_MINUS(*loc);
"*"                     return SHELL::ShellVariableParser::make_MULTIPLY(*loc);
"/"                     return SHELL::ShellVariableParser::make_DIVIDE(*loc);
"("                     return SHELL::ShellVariableParser::make_LPARAN(*loc);
")"                     return SHELL::ShellVariableParser::make_RPARAN(*loc);
"\""                    return SHELL::ShellVariableParser::make_DQUOTE(*loc);
(?i:ON)                 return SHELL::ShellVariableParser::make_ON(*loc);
(?i:OFF)                return SHELL::ShellVariableParser::make_OFF(*loc);
(?i:TRUE)               return SHELL::ShellVariableParser::make_TRUE(*loc);
(?i:FALSE)              return SHELL::ShellVariableParser::make_FALSE(*loc);
[ \t]
[a-zA-Z_][0-9a-zA-Z_]+  {
                            std::string Tmp(yytext, yyleng);
                            return SHELL::ShellVariableParser::make_VARIABLE(Tmp, *loc);
                        }
[0-9a-zA-Z_ :/\t]+      {
                            std::string Tmp(yytext, yyleng);
                            return SHELL::ShellVariableParser::make_STRING(Tmp, *loc);
                        }

%%

I then created my .y file, the file goes through bison OK, but fails when compiled using g++, I will not show the whole file here, just the first part, the rules are irrelevant for my error anyway:

%defines "include/Shell/ShellVariableParser.hpp"
%skeleton "lalr1.cc"
%define api.namespace {SHELL}
%define parser_class_name {ShellVariableParser}
%output "src/Shell/ShellVariableParser.cpp"
%define api.value.type variant
%define api.token.constructor
%define parse.assert
// %lex-param {semantic_type* const yylval}
// %lex-param {location_type* location}

%code requires
{    
    #include <utility>
    #include <string>
    #include <vector>

    #include "ShellVariables.hpp"
    #include "Util/IO.hpp"

    namespace SHELL
    {
        class ShellVariableLexer;
    }

// The following definitions is missing when %locations isn't used
# ifndef YY_NULLPTR
#  if defined __cplusplus && 201103L <= __cplusplus
#   define YY_NULLPTR nullptr
#  else
#   define YY_NULLPTR 0
#  endif
# endif
}

%parse-param {ShellVariableLexer &lexer}
%define parse.trace
%define parse.error verbose

%code
{
#include "ShellVariableLexer.hpp"
#undef yylex
#define yylex lexer.yylex
}

%token<double>              FLOAT
%token<int64_t>             INTEGER
%token<std::string>         STRING
%token<std::string>         VARIABLE
%token                      DOLLARSIGN
%token                      LBRACE
%token                      RBRACE
%token                      PLUS
%token                      MINUS
%token                      MULTIPLY
%token                      DIVIDE
%token                      LPARAN
%token                      RPARAN
%token                      DQUOTE
%token                      ON
%token                      OFF
%token                      TRUE
%token                      FALSE
%token                      END

%locations

%type   <std::pair<uint8_t,VariableType>> All;
%type   <std::pair<uint8_t,VariableType>> Bool;
%type   <std::pair<uint8_t,VariableType>> Integer;
%type   <std::pair<uint8_t,VariableType>> Float;
%type   <std::pair<uint8_t,VariableType>> String;
%type   <std::string> __string__;
%type   <VariableType> variable;

%%

When I compile this I get the following error:

src/Shell/ShellVariableParser.cpp: In member function ‘virtual int SHELL::ShellVariableParser::parse()’:
src/Shell/ShellVariableParser.cpp:494:46: error: no matching function for call to ‘SHELL::ShellVariableParser::basic_symbol<SHELL::ShellVariableParser::by_type>::basic_symbol(int)’
             symbol_type yylookahead (yylex ());

According to this link I expected the yylex function generated by bison to be the same as the one defined in the lexer header file. But this is not the case. If I try and manually add those parameters by uncommenting the two %lex-param lines from the .y file, I get the following error:

src/Shell/ShellVariableParser.cpp: In member function ‘virtual int SHELL::ShellVariableParser::parse()’:
src/Shell/ShellVariableParser.cpp:494:45: error: ‘yylval’ was not declared in this scope
             symbol_type yylookahead (yylex (yylval, location));

I'm guessing this might be the right way to do it, except for the variable names yylval and location? Or am I missing some other options?

I'm using gcc version 6.2.0 with Ubuntu GLIBC 2.24-3ubuntu2, flex version 2.6.1 and bison version 3.0.4.

EDIT

I still have no idea what is causing this, I tried converting yylex in the .l file to return ints, and instead of make_ I returned the actual tokens:

%{
#include <string>
#include "Shell/ShellVariableLexer.hpp"

#undef YY_DECL
#define YY_DECL int SHELL::ShellVariableLexer::yylex(SHELL::ShellVariableParser::semantic_type * const lval, SHELL::ShellVariableParser::location_type *location)
#define yyterminate() return SHELL::ShellVariableParser::token::END
#define YY_USER_ACTION loc->step(); loc->columns(yyleng);

%}

%option yylineno
%option yyclass="SHELL::ShellVariableLexer"
%option outfile="src/Shell/ShellVariableLexer.cpp"
%option noyywrap
%option c++

%%
%{
    yylval = lval;
%}

[0-9]+\.[0-9]+          {
                            std::string Tmp(yytext, yyleng);
                            yylval->build<double>(std::stod(Tmp));
                            return SHELL::ShellVariableParser::token::FLOAT;
                        }
[0-9]+                  {
                            std::string Tmp(yytext, yyleng);
                            yylval->build<int64_t>(std::stoi(Tmp));
                            return SHELL::ShellVariableParser::token::INTEGER;
                        }
"$"                     return SHELL::ShellVariableParser::token::DOLLARSIGN;
"{"                     return SHELL::ShellVariableParser::token::LBRACE;
"}"                     return SHELL::ShellVariableParser::token::RBRACE;
"+"                     return SHELL::ShellVariableParser::token::PLUS;
"-"                     return SHELL::ShellVariableParser::token::MINUS;
"*"                     return SHELL::ShellVariableParser::token::MULTIPLY;
"/"                     return SHELL::ShellVariableParser::token::DIVIDE;
"("                     return SHELL::ShellVariableParser::token::LPARAN;
")"                     return SHELL::ShellVariableParser::token::RPARAN;
"\""                    return SHELL::ShellVariableParser::token::DQUOTE;
(?i:ON)                 return SHELL::ShellVariableParser::token::ON;
(?i:OFF)                return SHELL::ShellVariableParser::token::OFF;
(?i:TRUE)               return SHELL::ShellVariableParser::token::TRUE;
(?i:FALSE)              return SHELL::ShellVariableParser::token::FALSE;
[ \t]
[a-zA-Z_][0-9a-zA-Z_]+  {
                            yylval->build<std::string>(yytext);
                            return SHELL::ShellVariableParser::token::VARIABLE;
                        }
[0-9a-zA-Z_ :/\t]+      {
                            yylval->build<std::string>(yytext);
                            return SHELL::ShellVariableParser::token::STRING;
                        }
%%

in the bison file I just removed the %define api.token.constructor line and the two %lex-param lines. It works fine.

So basically all I did was change the return type of yylex, not its arguments! why on earth is this working but the first implementation doesn't? why is the return int working but the return symbol_type does not?!

1

1 Answers

3
votes

You're misreading the C++ error message. The compiler is not complaining about the arguments to yylex. The error message (with SHELL::ShellVariableParser condensed for readability) says:

 error: no matching function for call to 
‘SH...er::basic_symbol<SH...er::by_type>::basic_symbol(int)’
         symbol_type yylookahead (yylex ());

In fact, symbol_type is a type alias for SHELL::ShellVariableParser::basic_symbol<SHELL::ShellVariableParser::by_type>, so an even more readable rendering would be

error: no matching function for call to ’symbol_type::symbol_type(int)’

In other words, yylex() returns an int, but since symbol_type has no constructor which takes an int, the declaration of yylookahead is invalid.

So the problem is the return type of yylex, which you presumably intended to be symbol_type.

Unfortunately, the (in my opinion clunky) flex C++ interface is working against you. In the base class FlexLexer, yylex() is declared as a virtual function which returns int. Since your ShellVariableLexer is derived from FlexLexer, you cannot define yylex() as returning a different type.

You could, of course, add an unnecessary argument, which is more or less what your declaration of ShellVariableParser::yylex does:

virtual ShellVariableParser::symbol_type yylex(ShellVariableParser::semantic_type* const lval, ShellVariableParser::location_type* location);

But since the parameters are only necessary to avoid confusing the method with the one declared in the base class, you could use something simpler, like:

ShellVariableParser::symbol_type yylex(int);

and then arrange for it to be the version called with

#define yylex() lexer.yylex(0)

(There's not much point declaring that as virtual. The base classes do not have a similar prototype, so the only reason to declare it virtual would be if you were going to subclass ShellVariableParser.)