3
votes

I have pieced together a parser using boost libraries from various sources on the net. It works (although is not as clean as I would have liked) but I ran into a particular problem. In the first part of the parser I first parse the function name, then a set of arguments enclosed in parenthesis. Later on, when parsing the actual expression, in the parse factor I allow for numbers and variables to be parsed. However, I would like to only parse those variables that have been previously declared in the vars parser. Here is my grammar:

template<typename Iterator>
  struct exp_parser : qi::grammar<Iterator, expression(), ascii::space_type>
  {
    exp_parser() : exp_parser::base_type(all)
    {
      using qi::_val;
      using qi::_1;
      using qi::char_;
      using qi::double_;
      using qi::lit;
      using phoenix::at_c;
      using phoenix::push_back;
      using phoenix::bind;

      all =
        name [at_c<0>(_val) = _1] >> '(' >> vars [at_c<1>(_val) = _1] >> ')' >> '='
        >> expr [at_c<2>(_val) = _1];

      // Parsing of actual expression
      expr =
          term                            [_val = _1]
          >> *(   ('+' >> term            [_val += _1])
              |   ('-' >> term            [_val -= _1])
            );

      term =
          factor                          [_val = _1]
          >> *(   ('*' >> factor          [_val *= _1])
              |   ('/' >> factor          [_val /= _1])
            );

      factor =
          simple                          [_val = _1]
          |   '(' >> expr                 [_val = _1] >> ')'
          |   ('-' >> factor              [_val = bind(make_unary, UN_OP::MIN, _1)])
          |   ("sin" >> factor            [_val = bind(make_unary, UN_OP::SIN, _1)])
          |   ("cos" >> factor            [_val = bind(make_unary, UN_OP::COS, _1)])
          |   ("tan" >> factor            [_val = bind(make_unary, UN_OP::TAN, _1)])
          |   ('+' >> factor              [_val = _1]);

      // Prototyping of expression
      prtctd %= lit("sin") | lit("cos") | lit("tan");
      var    %= !prtctd >> char_('a','z');
      num    %= double_;
      simple %= var | num | ('(' >> expr >> ')');
      name   %= ((char_('a','z') | char_('A','Z') ) >> *(char_('a','z') | char_('A','Z') | char_('0','9') ));
      vars   %= (char_('a','z') >> *(',' >> char_('a','z')));
    }
    qi::rule<Iterator, ast(), ascii::space_type> expr, term, factor, simple;

    qi::rule<Iterator, expression(), ascii::space_type> all;
    qi::rule<Iterator, std::string(), ascii::space_type> name, prtctd;
    qi::rule<Iterator, std::vector<char>(), ascii::space_type> vars;
    qi::rule<Iterator, char(), ascii::space_type> var;
    qi::rule<Iterator, double(), ascii::space_type> num;
  };

And this is the struct that I use to store it all:

  struct expression {
    std::string name;
    std::vector<char> arguments;
    ast syntax_tree;
  };

Now, how can I access the std::vector<char> in the factor parser so that I only parse the right variables.

Also, I am new to using boost and using this as an exercise to myself to start learning a little bit about. If anyone has any advice, please let me know on how I can clean up this code.

Thanks in advance!

1

1 Answers

2
votes

This is a big anti-pattern in Spirit:

  all =
    name [at_c<0>(_val) = _1] >> '(' >> vars [at_c<1>(_val) = _1] >> ')' >> '='
    >> expr [at_c<2>(_val) = _1];

In fact, I'm convinced the samples you've been looking at show better approaches. Also, I note that you have picked code from conflicting approaches (you cannot synthesize a syntax tree when the semantic actions evaluate expression values on the fly).

First off, get rid of semantic-action thinking: Boost Spirit: "Semantic actions are evil"?

BOOST_FUSION_ADAPT_STRUCT(expression, name, arguments, syntax_tree)

all = name >> '(' >> vars >> ')' >> '=' >> expr;

There are many other "sicknesses":

  • prtctd should be a lexeme, so si\nn doesn't match
  • *(char_('a','z') | char_('A','Z') | char_('0','9') ) is simply *alnum
  • name should also be a lexeme, so simply

    name   = alpha >> *alnum;
    
  • vars doesn't even use var?

All in all, here's a simplifix of those rules (assuming you dropped the skipper from prtctd and name):

  prtctd = lit("sin") | "cos" | "tan";
  var    = !prtctd >> ascii::lower;
  num    = double_;
  simple = var | num | '(' >> expr >> ')';
  name   = ascii::alpha >> *ascii::alnum;
  vars   = var % ',';

A Self Contained Example

Let's add a few mock parts to the above and have something we can test:

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted.hpp>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;

struct ast {
    template <typename T> ast& operator+=(T&&) { return *this; }
    template <typename T> ast& operator*=(T&&) { return *this; }
    template <typename T> ast& operator/=(T&&) { return *this; }
    template <typename T> ast& operator-=(T&&) { return *this; }
    ast() = default;
    template <typename T> ast(T&&) { }
    template <typename T> ast& operator =(T&&) { return *this; }

    friend std::ostream& operator<<(std::ostream& os, ast) { return os << "syntax_tree"; }
};

struct expression {
    std::string name;
    std::vector<std::string> arguments;
    ast syntax_tree;

    friend std::ostream& operator<<(std::ostream& os, expression const& e) { 
        os << e.name << "(";
        for (auto arg : e.arguments) os << arg << ", ";
        return os << ") = " << e.syntax_tree;
    }
};

BOOST_FUSION_ADAPT_STRUCT(expression, name, arguments, syntax_tree)

enum UN_OP { MIN, SIN, COS, TAN };

struct make_unary_f {
    template <typename... Ts> qi::unused_type operator()(Ts&&...) const { return qi::unused; }
} static const make_unary = {};

template<typename Iterator>
  struct exp_parser : qi::grammar<Iterator, expression(), ascii::space_type>
  {
    exp_parser() : exp_parser::base_type(all)
    {
      using qi::_val;
      using qi::_1;
      using qi::char_;
      using qi::double_;
      using qi::lit;
      using phoenix::at_c;
      using phoenix::push_back;
      using phoenix::bind;

      all = name >> '(' >> vars >> ')' >> '=' >> expr;

      // Parsing of actual expression
      expr =
          term                   [_val = _1]
          >> *(   ('+' >> term   [_val += _1])
              |   ('-' >> term   [_val -= _1])
            );

      term =
          factor                 [_val = _1]
          >> *(   ('*' >> factor [_val *= _1])
              |   ('/' >> factor [_val /= _1])
            );

      factor =
          simple                 [_val = _1]
          |   '(' >> expr        [_val = _1] >> ')'
          |   ('-' >> factor     [_val = bind(make_unary, UN_OP::MIN, _1)])
          |   ("sin" >> factor   [_val = bind(make_unary, UN_OP::SIN, _1)])
          |   ("cos" >> factor   [_val = bind(make_unary, UN_OP::COS, _1)])
          |   ("tan" >> factor   [_val = bind(make_unary, UN_OP::TAN, _1)])
          |   ('+' >> factor     [_val = _1]);

      // Prototyping of expression
      prtctd = lit("sin") | "cos" | "tan";
      var    = !prtctd >> ascii::lower;
      num    = double_;
      simple = var | num | '(' >> expr >> ')';
      name   = ascii::alpha >> *ascii::alnum;
      vars   = var % ',';
    }

  private:
    qi::rule<Iterator, ast(), ascii::space_type> expr, term, factor, simple;
    qi::rule<Iterator, expression(), ascii::space_type> all;
    qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> vars;

    // lexemes
    qi::rule<Iterator, std::string()> name, prtctd;
    qi::rule<Iterator, std::string()> var;
    qi::rule<Iterator, double()> num;
  };

int main() {
    for (std::string const& input : {
            "",
            "foo (a) = 3*8+a",
            "bar (x, y) = (sin(x) + y*y) / (x + y)",
            "oops (x, y) = (sin(x) + y*y) / (x + a)",
        })
    try {
        using It = std::string::const_iterator;
        It f = input.begin(), l = input.end();

        expression e;
        bool ok = qi::phrase_parse(f, l, exp_parser<It>{} >> qi::eoi, ascii::space, e);

        if (ok) {
            std::cout << "Parse success: '" << input << "' -> " << e << "\n";
        } else {
            std::cout << "Parse failed: '" << input << "'\n";
        }

        if (f != l)
            std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
    } catch(std::exception const& e) {
        std::cout << "Exception: '" << e.what() << "'\n";
    }
}

As expected, it still parses all non-empty lines, including oops which mistakenly uses a instead of y:

Parse failed: ''
Parse success: 'foo (a) = 3*8+a' -> foo(a, ) = syntax_tree
Parse success: 'bar (x, y) = (sin(x) + y*y) / (x + y)' -> bar(x, y, ) = syntax_tree
Parse success: 'oops (x, y) = (sin(x) + y*y) / (x + a)' -> oops(x, y, ) = syntax_tree

Declaring and Checking

To match declared variables, I'd use qi::symbols<>:

qi::symbols<char> _declared;

simple = _declared | num | '(' >> expr >> ')';

Now, to add declared items, we'll devise a Phoenix function,

struct add_declaration_f {
    add_declaration_f(qi::symbols<char>& ref) : _p(std::addressof(ref)) {}
    qi::symbols<char>* _p;
    void operator()(std::string const& arg) const { _p->add(arg); }
};

phoenix::function<add_declaration_f> _declare { _declared };

And use it:

  vars  %= var [ _declare(_1) ] % ',';

Integrating Demo

Live On Coliru

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted.hpp>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;

struct ast {
    template <typename T> ast& operator+=(T&&) { return *this; }
    template <typename T> ast& operator*=(T&&) { return *this; }
    template <typename T> ast& operator/=(T&&) { return *this; }
    template <typename T> ast& operator-=(T&&) { return *this; }
    ast() = default;
    template <typename T> ast(T&&) { }
    template <typename T> ast& operator =(T&&) { return *this; }

    friend std::ostream& operator<<(std::ostream& os, ast) { return os << "syntax_tree"; }
};

struct expression {
    std::string name;
    std::vector<std::string> arguments;
    ast syntax_tree;

    friend std::ostream& operator<<(std::ostream& os, expression const& e) { 
        os << e.name << "(";
        for (auto arg : e.arguments) os << arg << ", ";
        return os << ") = " << e.syntax_tree;
    }
};

BOOST_FUSION_ADAPT_STRUCT(expression, name, arguments, syntax_tree)

enum UN_OP { MIN, SIN, COS, TAN };

struct make_unary_f {
    template <typename... Ts> qi::unused_type operator()(Ts&&...) const { return qi::unused; }
} static const make_unary = {};

template<typename Iterator>
  struct exp_parser : qi::grammar<Iterator, expression(), ascii::space_type>
  {
    exp_parser() : exp_parser::base_type(all)
    {
      using qi::_val;
      using qi::_1;
      using qi::char_;
      using qi::double_;
      using qi::lit;
      using phoenix::at_c;
      using phoenix::push_back;
      using phoenix::bind;

      all = name >> '(' >> vars >> ')' >> '=' >> expr;

      // Parsing of actual expression
      expr =
          term                   [_val = _1]
          >> *(   ('+' >> term   [_val += _1])
              |   ('-' >> term   [_val -= _1])
            );

      term =
          factor                 [_val = _1]
          >> *(   ('*' >> factor [_val *= _1])
              |   ('/' >> factor [_val /= _1])
            );

      factor =
          simple                 [_val = _1]
          |   '(' >> expr        [_val = _1] >> ')'
          |   ('-' >> factor     [_val = bind(make_unary, UN_OP::MIN, _1)])
          |   ("sin" >> factor   [_val = bind(make_unary, UN_OP::SIN, _1)])
          |   ("cos" >> factor   [_val = bind(make_unary, UN_OP::COS, _1)])
          |   ("tan" >> factor   [_val = bind(make_unary, UN_OP::TAN, _1)])
          |   ('+' >> factor     [_val = _1]);

      // Prototyping of expression
      prtctd = lit("sin") | "cos" | "tan";
      var    = !prtctd >> ascii::lower;
      num    = double_;
      simple = _declared | num | '(' >> expr >> ')';
      name   = ascii::alpha >> *ascii::alnum;
      vars  %= var [ _declare(_1) ] % ',';
    }

  private:
    qi::symbols<char> _declared;

    struct add_declaration_f {
        add_declaration_f(qi::symbols<char>& ref) : _p(std::addressof(ref)) {}
        qi::symbols<char>* _p;
        void operator()(std::string const& arg) const { _p->add(arg); }
    };

    phoenix::function<add_declaration_f> _declare { _declared };

    qi::rule<Iterator, ast(), ascii::space_type> expr, term, factor, simple;
    qi::rule<Iterator, expression(), ascii::space_type> all;
    qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> vars;

    // lexemes
    qi::rule<Iterator, std::string()> name, prtctd;
    qi::rule<Iterator, std::string()> var;
    qi::rule<Iterator, double()> num;
  };

int main() {
    for (std::string const& input : {
            "",
            "foo (a) = 3*8+a",
            "bar (x, y) = (sin(x) + y*y) / (x + y)",
            "oops (x, y) = (sin(x) + y*y) / (x + a)",
        })
    try {
        using It = std::string::const_iterator;
        It f = input.begin(), l = input.end();

        expression e;
        bool ok = qi::phrase_parse(f, l, exp_parser<It>{}, ascii::space, e);

        if (ok) {
            std::cout << "Parse success: '" << input << "' -> " << e << "\n";
        } else {
            std::cout << "Parse failed: '" << input << "'\n";
        }

        if (f != l)
            std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
    } catch(std::exception const& e) {
        std::cout << "Exception: '" << e.what() << "'\n";
    }
}

Which prints:

Parse failed: ''
Parse success: 'foo (a) = 3*8+a' -> foo(a, ) = syntax_tree
Parse success: 'bar (x, y) = (sin(x) + y*y) / (x + y)' -> bar(x, y, ) = syntax_tree
Parse success: 'oops (x, y) = (sin(x) + y*y) / (x + a)' -> oops(x, y, ) = syntax_tree
Remaining unparsed: '/ (x + a)'

Adding >> qi::eoi to the parser expression we get: Live On Coliru

Parse failed: ''
Parse success: 'foo (a) = 3*8+a' -> foo(a, ) = syntax_tree
Parse success: 'bar (x, y) = (sin(x) + y*y) / (x + y)' -> bar(x, y, ) = syntax_tree
Parse failed: 'oops (x, y) = (sin(x) + y*y) / (x + a)'
Remaining unparsed: 'oops (x, y) = (sin(x) + y*y) / (x + a)'