Spirit Fails to Parse After only Appearing to get First symbol From the Lexer

Question

Recently, I asked a question here: Boost Spirit Segfault In Parser

In this post it was pointed out the grammar I was working with was absolutely left recursive and that spirit is a PEG parser generator, meaning left recursion is impossible.

I converted the grammar to a non-left recursive grammar, using the rule from the section dealing with left recursion in the Dragon Book.

Given a left recursive grammar

A -> A >> alpha | beta

It can be converted to the equivalent right recursive grammar by doing the following:

A -> beta >> A'

A' -> alpha >> A' | epsilon

Here is the resulting parser, with what I believe to be the the non-left recursive productions:

namespace interpreter {
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{           
    template <typename TokenDef>
    InterpreterGrammar(TokenDef const& tok)
        : InterpreterGrammar::base_type(start)
    {
        using boost::phoenix::ref;

        start %= functionList >> endList >> qi::eoi;

        // different expressions
        exp %= 
               qi::token(k_alphaTerminal) >> qi::token(k_equalTo) >> qi::token(k_alphaTerminal) >> expPrime
               |
               qi::token(k_numericTerminal) >> expPrime
               |
               qi::token(k_trueTok) >> expPrime
               |
               qi::token(k_falseTok) >> expPrime;

        expPrime %=     
               qi::token(k_equalTo) >> exp >> expPrime
               |
               qi::token(k_notEq) >> exp >> expPrime
               |
               qi::token(k_less) >> exp >> expPrime
               |
               qi::token(k_lessEq) >> exp >> expPrime
               |
               qi::token(k_greater) >> exp >> expPrime
               |
               qi::token(k_greaterEq) >> exp >> expPrime
               |
               qi::token(k_andTok) >> exp >> expPrime
               |
               qi::token(k_orTok) >> exp >> expPrime
               |
               qi::token(k_notTok) >> exp 
               |
               qi::token(k_plues) >> exp >> expPrime
               |
               qi::token(k_minus) >> exp >> expPrime
               |
               qi::token(k_mult) >> exp >> expPrime
               |
               qi::token(k_minus) >> exp
               |
               qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
               |
               qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket) 
               |
               qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
               |
               qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
               | 
               qi::eps;

        // parameter list
        paramList %= exp >> paramListPrime;

        paramListPrime %= qi::token(k_comma) >> exp >> paramListPrime
                          |
                          qi::eps;

        // return statements
        returnStatement %= qi::token(k_returnTok) >> exp
                           |
                           qi::token(k_returnTok);

        // function call statements
        callStatement %= qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                         |
                         qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen);

        // variable assignment
        assignmentStatement %= qi::token(k_alphaTerminal) >> qi::token(k_assign) >> exp
                               |
                               qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp
                                   >> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp;

        // list of integers
        intList %= qi::token(k_numericTerminal) >> intListPrime;

        intListPrime %= 
                  qi::token(k_comma) >> qi::token(k_numericTerminal) >> intListPrime
                  |
                  qi::eps;

        // print out a variable
        printStatement %= qi::token(k_print) >> exp;

        // take input
        inputStatement %= qi::token(k_alphaTerminal) >> qi::token(k_input);

        // conditional statement
        conditionStatement %= qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse;

        // consitions have optional else
        optionalElse %= qi::token(k_elseTok) >> qi::token(k_colon) >> statements
                        |
                        qi::eps;

        // while loop
        whileStatement %= qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw);

        // actual program statements
        endList %= end >> endListPrime;

        endListPrime %= end >> endListPrime
                        |
                        qi::eps;

        // end possibilities of program in global space
        end %= callStatement
               |
               printStatement
               |
               qi::token(k_alphaTerminal) >> qi::token(k_assign) >> qi::token(k_input)
               |
               qi::token(k_alphaTerminal) >> qi::token(k_assign) >> exp
               |
               qi::token(k_alphaTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
                   >> qi::token(k_rightBracket)
               |
               qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
                   >> qi::token(k_assign) >> exp;

        // function parameters
        param %=
                qi::token(k_alphaTerminal) >> paramPrime
                |
                qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket)
                    >> paramPrime;

        // for handling left recursion in paramlist
        paramPrime %= 
                    qi::token(k_comma) >> qi::token(k_alphaTerminal) >> paramPrime
                    | 
                    qi::eps;


        // define a statement as assignment print input condition while or call
        statement %= 
                    assignmentStatement
                    |
                    printStatement
                    |
                    inputStatement
                    |
                    conditionStatement
                    |
                    whileStatement
                    |
                    callStatement
                    |
                    returnStatement;

        // general statement list
        statements %= statement >> statementsPrime;

        // for handling left recursion in statements
        statementsPrime %= statement >> statementsPrime
                           |
                           qi::eps;

        // functions
        functionList %= qi::token(k_def) >> qi::token(k_alphaTerminal) >> qi::token(k_leftParen)
                            >> param >> qi::token(k_rightParen) >> qi::token(k_colon)
                            >> statements >> qi::token(k_fed)
                        |
                        qi::token(k_def) >> qi::token(k_alphaTerminal) >> qi::token(k_leftParen)
                            >> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed)
                        | qi::eps;

        BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
        debug(start);
    }
    qi::rule<Iterator, Skipper> start;
    qi::rule<Iterator, Skipper> functionList;
    qi::rule<Iterator, Skipper> endList;
    qi::rule<Iterator, Skipper> endListPrime;
    qi::rule<Iterator, Skipper> param;
    qi::rule<Iterator, Skipper> paramPrime;
    qi::rule<Iterator, Skipper> paramList;
    qi::rule<Iterator, Skipper> paramListPrime;
    qi::rule<Iterator, Skipper> statements;
    qi::rule<Iterator, Skipper> statementsPrime;
    qi::rule<Iterator, Skipper> statement;
    qi::rule<Iterator, Skipper> assignmentStatement;
    qi::rule<Iterator, Skipper> printStatement;
    qi::rule<Iterator, Skipper> inputStatement;
    qi::rule<Iterator, Skipper> conditionStatement;
    qi::rule<Iterator, Skipper> whileStatement;
    qi::rule<Iterator, Skipper> callStatement;
    qi::rule<Iterator, Skipper> returnStatement;
    qi::rule<Iterator, Skipper> exp;
    qi::rule<Iterator, Skipper> expPrime;
    qi::rule<Iterator, Skipper> intList;
    qi::rule<Iterator, Skipper> intListPrime;
    qi::rule<Iterator, Skipper> optionalElse;
    qi::rule<Iterator, Skipper> end;
};

}

Here is the lexer

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <fstream>
#include <streambuf>

#include <boost/bind.hpp>
#include <boost/ref.hpp>

namespace interpreter
{
    namespace lex = boost::spirit::lex;

    enum Tokens
    {
        k_andTok = 1,
        k_def = 2,
        k_elihw = 3,
        k_elseTok = 4,
        k_falseTok = 5,
        k_fed = 6,
        k_fi = 7,
        k_ifTok = 8,
        k_input = 9,
        k_notTok = 10,
        k_orTok = 11,
        k_print = 12,
        k_returnTok = 13,
        k_trueTok = 14,
        k_whileTok = 15,
        k_plues = 16,
        k_minus = 17,
        k_mult = 18,
        k_div = 19,
        k_bang = 20,
        k_equalTo = 21,
        k_greaterEq = 22,
        k_lessEq = 23,
        k_notEq = 24,
        k_less = 25,
        k_greater = 26,
        k_assign = 27,
        k_comma = 28,
        k_colon = 29,
        k_leftParen = 30,
        k_rightParen = 31,
        k_leftBracket = 32,
        k_rightBracket = 33,
        k_alphaTerminal = 34,
        k_numericTerminal = 35
    };

    template <typename Lexer>
    struct LexerTokens : lex::lexer<Lexer>
    {
        LexerTokens() :
           whiteSpace("[ \\t\\n]"),
           andTok("and"),
           def("def"),
           elihw("elihw"),
           elseTok("else"),
           falseTok("false"),
           fed("fed"),
           fi("fi"),
           ifTok("if"),
           input("input"),
           notTok("not"),
           orTok("or"),
           print("print"),
           returnTok("return"),
           trueTok("true"),
           whileTok("while"),
           plus("\\+"),
           minus("\\-"),
           mult("\\*"),
           div("\\/"),
           bang("\\!"),
           equalTo("=="),
           greaterEq(">="),
           lessEq("<="),
           notEq("!="),
           less("<"),
           greater(">"),
           assign("="),
           comma(","),
           colon(":"),
           leftParen("\\("),
           rightParen("\\)"),
           leftBracket("\\["),
           rightBracket("\\["),
           alphaTerminal("[a-z][a-zA-Z0-9]*"),
           numericTerminal("[0-9]*")
        {
            this->self("WHITESPACE") = whiteSpace;

            this->self.add
                (andTok, k_andTok)
                (def, k_def)
                (elihw, k_elihw)
                (elseTok, k_elseTok)
                (falseTok, k_falseTok)
                (fed, k_fed)
                (fi, k_fi)
                (ifTok, k_ifTok)
                (andTok, k_andTok)
                (input, k_input)
                (notTok, k_notTok)
                (orTok, k_orTok)
                (print, k_print)
                (returnTok, k_returnTok)
                (trueTok, k_trueTok)
                (whileTok, k_whileTok)
                (plus, k_plues)
                (minus, k_minus)
                (mult, k_mult)
                (div, k_div)
                (bang, k_bang)
                (equalTo, k_equalTo)
                (greaterEq, k_greaterEq)
                (lessEq, k_lessEq)
                (notEq, k_notEq)
                (less, k_less)
                (greater, k_greater)
                (assign, k_assign)
                (comma, k_comma)
                (colon, k_colon)
                (leftParen, k_leftParen)
                (rightParen, k_rightParen)
                (leftBracket, k_leftBracket)
                (rightBracket, k_rightBracket)
                (alphaTerminal, k_alphaTerminal)
                (numericTerminal, k_numericTerminal);
        }

        lex::token_def<lex::omit> whiteSpace;
        lex::token_def<std::string> andTok;
        lex::token_def<std::string> def;
        lex::token_def<std::string> elihw;
        lex::token_def<std::string> elseTok;
        lex::token_def<std::string> falseTok;
        lex::token_def<std::string> fed;
        lex::token_def<std::string> fi;
        lex::token_def<std::string> ifTok;
        lex::token_def<std::string> input;
        lex::token_def<std::string> notTok;
        lex::token_def<std::string> orTok;
        lex::token_def<std::string> print;
        lex::token_def<std::string> returnTok;
        lex::token_def<std::string> trueTok;
        lex::token_def<std::string> whileTok;
        lex::token_def<std::string> plus;
        lex::token_def<std::string> minus;
        lex::token_def<std::string> mult;
        lex::token_def<std::string> div;
        lex::token_def<std::string> bang;
        lex::token_def<std::string> equalTo;
        lex::token_def<std::string> greaterEq;
        lex::token_def<std::string> lessEq;
        lex::token_def<std::string> notEq;
        lex::token_def<std::string> less;
        lex::token_def<std::string> greater;
        lex::token_def<std::string> assign;
        lex::token_def<std::string> comma;
        lex::token_def<std::string> colon;
        lex::token_def<std::string> leftParen;
        lex::token_def<std::string> rightParen;
        lex::token_def<std::string> leftBracket;
        lex::token_def<std::string> rightBracket;
        lex::token_def<std::string> alphaTerminal;
        lex::token_def<std::string> numericTerminal;
    };

And here is my example test program

    int main(int argc, char** argv)
    {
        namespace lex = boost::spirit::lex;
        namespace qi = boost::spirit::qi;

        typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
        typedef lex::lexertl::lexer<token_type> lexer_type;
        typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
        typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;

        interpreter::LexerTokens< lexer_type > lexer;
        interpreter::InterpreterGrammar< iterator_type, skipper_type > parser(lexer);
        std::string sourceCode("def print_it(x, y): print 3*x + y return fed print_it(8,1) x = 3 print_it(x, x)"); 

        char const* first = sourceCode.c_str();
        char const* last = &first[sourceCode.size()];
        bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);

        std::cout << "Remaining " << std::string(first,last) << std::endl;
        std::cout << "R is " << r << std::endl;
    }

These revisions have given rise to a few questions, first, by looking at the grammar informally, without constructing the full LL(1) parsing table, I don't believe this grammar is LL(1). I still need to verify this, but I was wondering will spirit, be able to parse this grammar? I know PEGs typically use the / operator to do lookahead, does spirit do this currently? I read in another post that spirit may not?

Second, this grammar fails, I notice that when, in the start production, I simplify the start production and make it:

start %= functionList;

and then change the input to be:

def print_it(x, y): print 3*x + y return fed

the grammar debug statement states that the parse was successful. However, I see the string remaining is:

print_it(x, y): print 3*x + y return fed

So only the first token was actually parsed. After a bit of debugging I am unsure why the parse is successful and why only a single symbol is consumed, could this be an issue with the lexer?

Additionally, I see similar results when I change the start production to be:

start %= endList;

and using the input

y = x

This however, fails to parse and only consumes the character y.

Finally, the output of my debug statement is not very helpful, when running with the debug statement the output that is produced is:

<start>
  <try>[][][][][][][][][][][][][][][][][][][][]</try>
  <fail/>
</start>
Remaining  print_it(x, y): print 3*x + y return fed print_it(8,1) x = 3 print_it(x, x)
R is 0

Which I assume to mean twenty productions are attempted in the grammar, from the twenty empty [], is that a correct assumption? Also why are the [] empty? I typically see them with some text that is useful in debugging. Is it because the grammar is automatically a success if the regular expression is matched? If that is the case, is there a way to get the debug statement to print helpful output when using the token enum token as opposed to adding expressions using macros?

Any help or points in the correct direction is appreciated, thank you.

start %= functionList; should really not be valid. I suppose it might work because of %=, but to assign rules to other rules without implying a parser expression, use functionList.alias() — sehe

sehe sehe · Accepted Answer · 2018-06-23T00:16:23

Which I assume to mean twenty productions are attempted in the grammar, from the twenty empty [], is that a correct assumption?

No. The [] indicate input tokens.

Also why are the [] empty?

There's likely no useful way to print them so they show up as empty.

If that is the case, is there a way to get the debug statement to print helpful output when using the token enum token as opposed to adding expressions using macros?

I'd think so. But I never use Lex. So, It could be a while to figure it out.

First thing that catches my attention is this:

typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;

Your AttributeTypes says omit. Indeed, changing to

typedef lex::lexertl::token< char const*, boost::mpl::vector<lex::omit, std::string>, boost::mpl::true_ > token_type;

Does show signs of life. For the input x=y (no whitespace!) it prints:

Live On Coliru

<start>
  <try>[y][=][x][][][][][][][][][][][][][][][][][]</try>
  <fail/>
</start>
Remaining 

R is false

Now, for the def print_it(x, y): print 3*x + y return fed input, the output still is:

<start>
  <try>[def][][][][][][][][][][][][][][][][][][][]</try>
  <fail/>
</start>
Remaining  print_it(x, y): print 3*x + y return fed

R is false

Slightly more informative. Interestingly, it also seems to fail on the first whitespace. The whiteSpace regex looks okay, so I searched for lex in_state in my answers to remember what I once learned about skipping and Lex.

I played around with some suggestions. Following the second post lead me to this comment:

Nice! You can add to your answer that having a separate state inside the lexer for skipping white spaces has another considerable drawback: lack of debugging support. If you use a separate state for skipping and then use BOOST_SPIRIT_DEBUG_NODE, you won't get the full output of the token stream. This is because the default debug handler advances the lexer iterator to get tokens, the lexer is of course in the INITIAL state. As soon as it meets a white space, the iteration stops, because the lexer cannot find a match. The token stream will be cut at the first white space in the rule's trace. – noxmetus Sep 20 '17 at 18:28

Let's, just for fun, try without the separate state and instead use the one from Troubles with boost::spirit::lex & whitespace.

Now, the expression doesn't parse because

print_it is not a valid identifier. Let's add _ to alphaTerminal

alphaTerminal requires = to follow. Fixing the expression a bit:

exp = (
            qi::token(k_alphaTerminal)
          | qi::token(k_numericTerminal)
          | qi::token(k_trueTok)
          | qi::token(k_falseTok))
    >> expPrime
    ;

In fact, those token ids arent valid. You need to start with min_token_id (which is 0x10000):

enum Tokens
{
    k_andTok = lex::tokenids::min_token_id + 1,
    k_def, k_elihw, k_elseTok, k_falseTok, k_fed, k_fi, k_ifTok, k_input,
    k_notTok, k_orTok, k_print, k_returnTok, k_trueTok, k_whileTok,
    k_plues, k_minus, k_mult, k_div, k_bang, k_equalTo, k_greaterEq,
    k_lessEq, k_notEq, k_less, k_greater, k_assign, k_comma, k_colon,
    k_leftParen, k_rightParen, k_leftBracket, k_rightBracket,
    k_alphaTerminal, k_numericTerminal,
};

Why even do you want to dictate the token IDs? Guessing from the fact you are passing a reference to the token definitions to the parser, I would think you were going to use them?
```
exp
    = (tok.alphaTerminal | tok.numericTerminal | tok.trueTok | tok.falseTok) >> expPrime
    ;
```

Also, let's enable debugging for all the rules:

BOOST_SPIRIT_DEBUG_NODES(
    (start) (functionList) (endList) (endListPrime) (param)
    (paramPrime) (paramList) (paramListPrime) (statements)
    (statementsPrime) (statement) (assignmentStatement)
    (printStatement) (inputStatement) (conditionStatement)
    (whileStatement) (callStatement) (returnStatement) (exp)
    (expPrime) (intList) (intListPrime) (optionalElse) (end)
)

And, after spending entirely too much time trying to understand why the state-switching skipper seems not to work, I'm giving up. Here's the upshot of my tinkering:

Live On Coliru

//#define USE_STATES
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/lex.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/phoenix.hpp>
namespace lex = boost::spirit::lex;

namespace interpreter {
    enum Tokens
    {
        k_andTok = lex::tokenids::min_token_id + 1,
        k_def, k_elihw, k_elseTok, k_falseTok, k_fed, k_fi, k_ifTok, k_input,
        k_notTok, k_orTok, k_print, k_returnTok, k_trueTok, k_whileTok,
        k_plues, k_minus, k_mult, k_div, k_bang, k_equalTo, k_greaterEq,
        k_lessEq, k_notEq, k_less, k_greater, k_assign, k_comma, k_colon,
        k_leftParen, k_rightParen, k_leftBracket, k_rightBracket,
        k_alphaTerminal, k_numericTerminal,
    };

    template <typename Lexer>
    struct LexerTokens : lex::lexer<Lexer>
    {
        LexerTokens() :
           whiteSpace("[ \\t\\n]"),
           andTok("and"),
           def("def"),
           elihw("elihw"),
           elseTok("else"),
           falseTok("false"),
           fed("fed"),
           fi("fi"),
           ifTok("if"),
           input("input"),
           notTok("not"),
           orTok("or"),
           print("print"),
           returnTok("return"),
           trueTok("true"),
           whileTok("while"),
           plus("\\+"),
           minus("\\-"),
           mult("\\*"),
           div("\\/"),
           bang("\\!"),
           equalTo("=="),
           greaterEq(">="),
           lessEq("<="),
           notEq("!="),
           less("<"),
           greater(">"),
           assign("="),
           comma(","),
           colon(":"),
           leftParen("\\("),
           rightParen("\\)"),
           leftBracket("\\["),
           rightBracket("\\["),
           alphaTerminal("[a-z][a-zA-Z0-9_]*"),
           numericTerminal("[0-9]*")
        {
            this->self.add
                (andTok, k_andTok)
                (def, k_def)
                (elihw, k_elihw)
                (elseTok, k_elseTok)
                (falseTok, k_falseTok)
                (fed, k_fed)
                (fi, k_fi)
                (ifTok, k_ifTok)
                (andTok, k_andTok)
                (input, k_input)
                (notTok, k_notTok)
                (orTok, k_orTok)
                (print, k_print)
                (returnTok, k_returnTok)
                (trueTok, k_trueTok)
                (whileTok, k_whileTok)
                (plus, k_plues)
                (minus, k_minus)
                (mult, k_mult)
                (div, k_div)
                (bang, k_bang)
                (equalTo, k_equalTo)
                (greaterEq, k_greaterEq)
                (lessEq, k_lessEq)
                (notEq, k_notEq)
                (less, k_less)
                (greater, k_greater)
                (assign, k_assign)
                (comma, k_comma)
                (colon, k_colon)
                (leftParen, k_leftParen)
                (rightParen, k_rightParen)
                (leftBracket, k_leftBracket)
                (rightBracket, k_rightBracket)
                (alphaTerminal, k_alphaTerminal)
                (numericTerminal, k_numericTerminal);

#ifdef USE_STATES
            this->self("WHITESPACE") = whiteSpace;
#else
            this->self += whiteSpace [ lex::_pass = lex::pass_flags::pass_ignore ];
#endif
        }

        lex::token_def<lex::omit> whiteSpace;
        lex::token_def<std::string> andTok;
        lex::token_def<std::string> def;
        lex::token_def<std::string> elihw;
        lex::token_def<std::string> elseTok;
        lex::token_def<std::string> falseTok;
        lex::token_def<std::string> fed;
        lex::token_def<std::string> fi;
        lex::token_def<std::string> ifTok;
        lex::token_def<std::string> input;
        lex::token_def<std::string> notTok;
        lex::token_def<std::string> orTok;
        lex::token_def<std::string> print;
        lex::token_def<std::string> returnTok;
        lex::token_def<std::string> trueTok;
        lex::token_def<std::string> whileTok;
        lex::token_def<std::string> plus;
        lex::token_def<std::string> minus;
        lex::token_def<std::string> mult;
        lex::token_def<std::string> div;
        lex::token_def<std::string> bang;
        lex::token_def<std::string> equalTo;
        lex::token_def<std::string> greaterEq;
        lex::token_def<std::string> lessEq;
        lex::token_def<std::string> notEq;
        lex::token_def<std::string> less;
        lex::token_def<std::string> greater;
        lex::token_def<std::string> assign;
        lex::token_def<std::string> comma;
        lex::token_def<std::string> colon;
        lex::token_def<std::string> leftParen;
        lex::token_def<std::string> rightParen;
        lex::token_def<std::string> leftBracket;
        lex::token_def<std::string> rightBracket;
        lex::token_def<std::string> alphaTerminal;
        lex::token_def<std::string> numericTerminal;
    };

    namespace qi = boost::spirit::qi;
    template <typename Iterator, typename Skipper>
    struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
    {
        template <typename TokenDef>
        InterpreterGrammar(TokenDef const& tok)
            : InterpreterGrammar::base_type(start)
        {
            start
                = functionList >> endList >> qi::eoi
                ;

            // different expressions
            exp
                = (tok.alphaTerminal | tok.numericTerminal | tok.trueTok | tok.falseTok) >> expPrime
                ;

            expPrime
                = tok.equalTo   >> exp >> expPrime
                | tok.notEq     >> exp >> expPrime
                | tok.less      >> exp >> expPrime
                | tok.lessEq    >> exp >> expPrime
                | tok.greater   >> exp >> expPrime
                | tok.greaterEq >> exp >> expPrime
                | tok.andTok    >> exp >> expPrime
                | tok.orTok     >> exp >> expPrime
                | tok.notTok    >> exp
                | tok.plus      >> exp >> expPrime
                | tok.minus     >> exp >> expPrime
                | tok.mult      >> exp >> expPrime
                | tok.minus >> exp
                | tok.leftParen >> exp >> tok.rightParen
                | tok.alphaTerminal >> tok.leftBracket >> exp >> tok.rightBracket
                | tok.alphaTerminal >> tok.leftParen >> tok.rightParen
                | tok.alphaTerminal >> tok.leftParen >> exp >> tok.rightParen
                | qi::eps
                ;

            // parameter list
            paramList
                = exp >> paramListPrime
                ;

            paramListPrime
                = tok.comma >> exp >> paramListPrime
                | qi::eps
                ;

            // return statements
            returnStatement
                = tok.returnTok >> exp
                | tok.returnTok
                ;

            // function call statements
            callStatement
                = tok.alphaTerminal >> tok.leftParen >> tok.rightParen
                | tok.alphaTerminal >> tok.leftParen >> paramList >> tok.rightParen
                ;

            // variable assignment
            assignmentStatement
                = tok.alphaTerminal >> tok.assign >> exp
                | tok.alphaTerminal >> tok.leftBracket >> exp
                    >> tok.rightBracket >> tok.assign >> exp
                ;

            // list of integers
            intList
                = tok.numericTerminal >> intListPrime
                ;

            intListPrime
                = tok.comma >> tok.numericTerminal >> intListPrime
                | qi::eps
                ;

            // print out a variable
            printStatement
                = tok.print >> exp
                ;

            // take input
            inputStatement
                = tok.alphaTerminal >> tok.input
                ;

            // conditional statement
            conditionStatement
                = tok.ifTok >> exp >> tok.colon >> statements >> optionalElse
                ;

            // consitions have optional else
            optionalElse
                = tok.elseTok >> tok.colon >> statements
                | qi::eps
                ;

            // while loop
            whileStatement
                = tok.whileTok >> exp >> tok.colon >> statements >> tok.elihw
                ;

            // actual program statements
            endList
                = end >> endListPrime
                ;

            endListPrime
                = end >> endListPrime
                | qi::eps
                ;

            // end possibilities of program in global space
            end
                = callStatement
                | printStatement
                | tok.alphaTerminal >> tok.assign >> tok.input
                | tok.alphaTerminal >> tok.assign >> exp
                | tok.alphaTerminal >> tok.assign >> tok.leftBracket >> intList
                    >> tok.rightBracket
                | tok.alphaTerminal >> tok.leftBracket >> exp >> tok.rightBracket
                    >> tok.assign >> exp
                ;

            // function parameters
            param
                = tok.alphaTerminal >> paramPrime
                | tok.alphaTerminal >> tok.leftBracket >> tok.rightBracket
                    >> paramPrime
                ;

            // for handling left recursion in paramlist
            paramPrime
                = tok.comma >> tok.alphaTerminal >> paramPrime
                | qi::eps
                ;

            // define a statement as assignment print input condition while or call
            statement
                = assignmentStatement
                | printStatement
                | inputStatement
                | conditionStatement
                | whileStatement
                | callStatement
                | returnStatement
                ;

            // general statement list
            statements
                = statement >> statementsPrime
                ;

            // for handling left recursion in statements
            statementsPrime
                = statement >> statementsPrime
                | qi::eps
                ;

            // functions
            functionList
                = tok.def >> tok.alphaTerminal >> tok.leftParen
                    >> param >> tok.rightParen >> tok.colon
                    >> statements >> tok.fed
                | tok.def >> tok.alphaTerminal >> tok.leftParen
                    >> tok.rightParen >> tok.colon >> statements >> tok.fed
                | qi::eps
                ;

            BOOST_SPIRIT_DEBUG_NODES(
                (start) (functionList) (endList) (endListPrime) (param)
                (paramPrime) (paramList) (paramListPrime) (statements)
                (statementsPrime) (statement) (assignmentStatement)
                (printStatement) (inputStatement) (conditionStatement)
                (whileStatement) (callStatement) (returnStatement) (exp)
                (expPrime) (intList) (intListPrime) (optionalElse) (end)
            )

        }
      private:
        qi::rule<Iterator, Skipper> start;
        qi::rule<Iterator, Skipper> functionList;
        qi::rule<Iterator, Skipper> endList;
        qi::rule<Iterator, Skipper> endListPrime;
        qi::rule<Iterator, Skipper> param;
        qi::rule<Iterator, Skipper> paramPrime;
        qi::rule<Iterator, Skipper> paramList;
        qi::rule<Iterator, Skipper> paramListPrime;
        qi::rule<Iterator, Skipper> statements;
        qi::rule<Iterator, Skipper> statementsPrime;
        qi::rule<Iterator, Skipper> statement;
        qi::rule<Iterator, Skipper> assignmentStatement;
        qi::rule<Iterator, Skipper> printStatement;
        qi::rule<Iterator, Skipper> inputStatement;
        qi::rule<Iterator, Skipper> conditionStatement;
        qi::rule<Iterator, Skipper> whileStatement;
        qi::rule<Iterator, Skipper> callStatement;
        qi::rule<Iterator, Skipper> returnStatement;
        qi::rule<Iterator, Skipper> exp;
        qi::rule<Iterator, Skipper> expPrime;
        qi::rule<Iterator, Skipper> intList;
        qi::rule<Iterator, Skipper> intListPrime;
        qi::rule<Iterator, Skipper> optionalElse;
        qi::rule<Iterator, Skipper> end;
    };
}

#include <fstream>
#include <iterator>

int main(int argc, char** argv) {
    namespace lex = boost::spirit::lex;
    namespace qi = boost::spirit::qi;

    typedef lex::lexertl::token<char const*, boost::mpl::vector<lex::omit, std::string>, boost::mpl::true_> token_type;
#ifdef USE_STATES
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
    typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
#else
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
    typedef qi::unused_type skipper_type;
#endif

    interpreter::LexerTokens<lexer_type> lexer;
    interpreter::InterpreterGrammar<iterator_type, skipper_type> parser(lexer);

    // read the file
    if (argc != 2)
    {
        std::cout << "File required" << std::endl;
        return 1;
    }

    std::ifstream t(argv[1]);
    std::string const sourceCode { std::istreambuf_iterator<char>(t), {} };

    char const* first = sourceCode.data();
    char const* last = first + sourceCode.size();
#ifdef USE_STATES
    bool ok = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
#else
    bool ok = lex::tokenize_and_parse(first, last, lexer, parser);
#endif

    std::cout << "Remaining '" << std::string(first,last) << "'" << std::endl;
    std::cout << "R is " << std::boolalpha << ok << std::endl;
}

Prints

<start>
  <try>[def][print_it][(][x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
  <functionList>
    <try>[def][print_it][(][x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
    <param>
      <try>[x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
      <paramPrime>
        <try>[,][y][)][:][print][3][*][x][+][y][return][fed]</try>
        <paramPrime>
          <try>[)][:][print][3][*][x][+][y][return][fed]</try>
          <success>[)][:][print][3][*][x][+][y][return][fed]</success>
          <attributes>[]</attributes>
        </paramPrime>
        <success>[)][:][print][3][*][x][+][y][return][fed]</success>
        <attributes>[]</attributes>
      </paramPrime>
      <success>[)][:][print][3][*][x][+][y][return][fed]</success>
      <attributes>[]</attributes>
    </param>
    <statements>
      <try>[print][3][*][x][+][y][return][fed]</try>
      <statement>
        <try>[print][3][*][x][+][y][return][fed]</try>
        <assignmentStatement>
          <try>[print][3][*][x][+][y][return][fed]</try>
          <fail/>
        </assignmentStatement>
        <printStatement>
          <try>[print][3][*][x][+][y][return][fed]</try>
          <exp>
            <try>[3][*][x][+][y][return][fed]</try>
            <expPrime>
              <try>[*][x][+][y][return][fed]</try>
              <exp>
                <try>[x][+][y][return][fed]</try>
                <expPrime>
                  <try>[+][y][return][fed]</try>
                  <exp>
                    <try>[y][return][fed]</try>
                    <expPrime>
                      <try>[return][fed]</try>
                      <success>[return][fed]</success>
                      <attributes>[]</attributes>
                    </expPrime>
                    <success>[return][fed]</success>
                    <attributes>[]</attributes>
                  </exp>
                  <expPrime>
                    <try>[return][fed]</try>
                    <success>[return][fed]</success>
                    <attributes>[]</attributes>
                  </expPrime>
                  <success>[return][fed]</success>
                  <attributes>[]</attributes>
                </expPrime>
                <success>[return][fed]</success>
                <attributes>[]</attributes>
              </exp>
              <expPrime>
                <try>[return][fed]</try>
                <success>[return][fed]</success>
                <attributes>[]</attributes>
              </expPrime>
              <success>[return][fed]</success>
              <attributes>[]</attributes>
            </expPrime>
            <success>[return][fed]</success>
            <attributes>[]</attributes>
          </exp>
          <success>[return][fed]</success>
          <attributes>[]</attributes>
        </printStatement>
        <success>[return][fed]</success>
        <attributes>[]</attributes>
      </statement>
      <statementsPrime>
        <try>[return][fed]</try>
        <statement>
          <try>[return][fed]</try>
          <assignmentStatement>
            <try>[return][fed]</try>
            <fail/>
          </assignmentStatement>
          <printStatement>
            <try>[return][fed]</try>
            <fail/>
          </printStatement>
          <inputStatement>
            <try>[return][fed]</try>
            <fail/>
          </inputStatement>
          <conditionStatement>
            <try>[return][fed]</try>
            <fail/>
          </conditionStatement>
          <whileStatement>
            <try>[return][fed]</try>
            <fail/>
          </whileStatement>
          <callStatement>
            <try>[return][fed]</try>
            <fail/>
          </callStatement>
          <returnStatement>
            <try>[return][fed]</try>
            <exp>
              <try>[fed]</try>
              <fail/>
            </exp>
            <success>[fed]</success>
            <attributes>[]</attributes>
          </returnStatement>
          <success>[fed]</success>
          <attributes>[]</attributes>
        </statement>
        <statementsPrime>
          <try>[fed]</try>
          <statement>
            <try>[fed]</try>
            <assignmentStatement>
              <try>[fed]</try>
              <fail/>
            </assignmentStatement>
            <printStatement>
              <try>[fed]</try>
              <fail/>
            </printStatement>
            <inputStatement>
              <try>[fed]</try>
              <fail/>
            </inputStatement>
            <conditionStatement>
              <try>[fed]</try>
              <fail/>
            </conditionStatement>
            <whileStatement>
              <try>[fed]</try>
              <fail/>
            </whileStatement>
            <callStatement>
              <try>[fed]</try>
              <fail/>
            </callStatement>
            <returnStatement>
              <try>[fed]</try>
              <fail/>
            </returnStatement>
            <fail/>
          </statement>
          <success>[fed]</success>
          <attributes>[]</attributes>
        </statementsPrime>
        <success>[fed]</success>
        <attributes>[]</attributes>
      </statementsPrime>
      <success>[fed]</success>
      <attributes>[]</attributes>
    </statements>
    <success></success>
    <attributes>[]</attributes>
  </functionList>
  <endList>
    <try></try>
    <end>
      <try></try>
      <callStatement>
        <try></try>
        <fail/>
      </callStatement>
      <printStatement>
        <try></try>
        <fail/>
      </printStatement>
      <fail/>
    </end>
    <fail/>
  </endList>
  <fail/>
</start>
Remaining ''
R is false

Spirit Fails to Parse After only Appearing to get First symbol From the Lexer

1 Answers