2
votes

I have a list of key value pairs, separated by EOL. I got Boost Spirit to do what I want for properly formatted lines (i.e. "MyKey : MyValue \r\n MyKey2 : MyValue2").

Now my problem is that I want to skip lines that do not comply. For example:

This is some title line! 
Key1:Value1
Some more gibberish to skip
Key2:Value2

I came up with the following code that I thought would work, but instead, the resulting map is empty and parsing fails.

  • In my KeyRule, I added '- qi::eol' to avoid eating up the invalid line until the first KeyValue separator is encountered.
  • In my ItemRule, both PairRule's are made optional and the eol is 1 or more to address multiple breaklines.

I read the following thread: Why does parsing a blank line with Spirit produce an empty key value pair in map? It skips the comment line (starting with #) via a custom skipper but in my case, I want to skip ANY lines not containing the Key Value separator :. There has to be something elegant.

#include <iostream>
#include <string>
#include <map>

#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/std_pair.hpp>

namespace qi = boost::spirit::qi;

template <typename Iterator, typename Skipper = qi::blank_type>
struct KeyValueParser : qi::grammar<Iterator, std::map<std::string, std::string>(), Skipper> {
    KeyValueParser() : KeyValueParser::base_type(ItemRule) {
        ItemRule = -PairRule >> *(+qi::eol >> -PairRule) >> -qi::eol;
        PairRule = KeyRule >> ':' >> ValueRule;
        KeyRule = qi::raw[+(qi::char_ - ':' - qi::eol)];
        ValueRule = qi::raw[+(qi::char_ - qi::eol)];
    }
    qi::rule<Iterator, std::map<std::string, std::string>(), Skipper> ItemRule;
    qi::rule<Iterator, std::pair<std::string, std::string>(), Skipper> PairRule;
    qi::rule<Iterator, std::string(), Skipper> KeyRule;
    qi::rule<Iterator, std::string(), Skipper> ValueRule;
};

int main() {
    const std::string input = " Line To Skip! \r\n  My Key : Value \r\n My2ndKey : Long    Value \r\n";

    std::string::const_iterator iter = input.begin(), end = input.end();

    KeyValueParser<std::string::const_iterator> parser;
    typedef std::map<std::string, std::string> MyMap;
    MyMap parsed_map;

    bool result = qi::phrase_parse(iter, end, parser, qi::blank, parsed_map);

    if (result && (iter == end)) {
        std::cout << "Success." << std::endl;
        for (MyMap::const_iterator pIter = parsed_map.begin(); pIter != parsed_map.end(); ++pIter) {
            std::cout << "\"" << pIter->first << "\" : \"" << pIter->second << "\"" << std::endl;
        }
    } else {
        std::cout << "Something failed. Unparsed: ->|" << std::string(iter, end) << "|<-" << std::endl;
    }

    getchar();
    return 0;
}
1

1 Answers

2
votes

The most elegant I can think of is to parse a keyvalue pair /optionally/, followed by any gibberish till the end of the line.

You could write:

ItemRule  = -PairRule % (*~char_("\r\n") >> eol);

The only caveat is that on gibberish lines, the "default" pair (empty key & value) will be inserted, so you'd have to remove that post-parse.

An equivalent way to write it (but less elegant) would be:

ItemRule  = (hold[PairRule] | omit[ *~char_("\r\n") ]) % eol;

DEMO

Here's a full demo. Note I also moved the skipper knowledge inside the grammar (it's essential to the correct operation of the grammar).

Finally, I used BOOST_SPIRIT_DEBUG to print debug output.

Live On Coliru

#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <map>

namespace qi = boost::spirit::qi;

template <typename Iterator>
struct KeyValueParser : qi::grammar<Iterator, std::map<std::string, std::string>()> {
    KeyValueParser() : KeyValueParser::base_type(ItemRule) {

        using namespace qi;

        ItemRule  = skip(blank) [ -PairRule % (*~char_("\r\n") >> eol) ];
        ItemRule  = skip(blank) [ hold[PairRule] | omit[ *~char_("\r\n") ] ] % eol;
        PairRule  = KeyRule >> ':' >> ValueRule;
        KeyRule   = +~char_("\r\n:");
        ValueRule = +~char_("\r\n");

        BOOST_SPIRIT_DEBUG_NODES((ItemRule)(PairRule)(KeyRule)(ValueRule))
    }
  private:
    qi::rule<Iterator, std::map<std::string, std::string>()> ItemRule;
    qi::rule<Iterator, std::pair<std::string, std::string>(), qi::blank_type> PairRule;
    // lexemes
    qi::rule<Iterator, std::string()> KeyRule, ValueRule;
};

int main() {
    const std::string input = R"(
 Line To Skip! 
  My Key : Value 
Some more gibberish to skip
 My2ndKey : Long    Value 
)";

    std::string::const_iterator iter = input.begin(), end = input.end();

    KeyValueParser<std::string::const_iterator> parser;
    std::map<std::string, std::string> parsed_map;

    bool result = qi::parse(iter, end, parser, parsed_map);

    if (result && (iter == end)) {
        std::cout << "Success.\n";

        // drop empty lines:
        parsed_map.erase("");

        for (auto& p : parsed_map)
            std::cout << "\"" << p.first << "\" : \"" << p.second << "\"\n";
    } else {
        std::cout << "Something failed. Unparsed: ->|" << std::string(iter, end) << "|<-\n";
    }
}

Prints

Success.
"My Key " : "Value "
"My2ndKey " : "Long    Value "

With debug information

<ItemRule>
  <try>\n Line To Skip! \n  M</try>
  <PairRule>
    <try>\n Line To Skip! \n  M</try>
    <KeyRule>
      <try>\n Line To Skip! \n  M</try>
      <fail/>
    </KeyRule>
    <fail/>
  </PairRule>
  <PairRule>
    <try> Line To Skip! \n  My</try>
    <KeyRule>
      <try>Line To Skip! \n  My </try>
      <success>\n  My Key : Value \nS</success>
      <attributes>[[L, i, n, e,  , T, o,  , S, k, i, p, !,  ]]</attributes>
    </KeyRule>
    <fail/>
  </PairRule>
  <PairRule>
    <try>  My Key : Value \nSo</try>
    <KeyRule>
      <try>My Key : Value \nSome</try>
      <success>: Value \nSome more g</success>
      <attributes>[[M, y,  , K, e, y,  ]]</attributes>
    </KeyRule>
    <ValueRule>
      <try>Value \nSome more gib</try>
      <success>\nSome more gibberish</success>
      <attributes>[[V, a, l, u, e,  ]]</attributes>
    </ValueRule>
    <success>\nSome more gibberish</success>
    <attributes>[[[M, y,  , K, e, y,  ], [V, a, l, u, e,  ]]]</attributes>
  </PairRule>
  <PairRule>
    <try>Some more gibberish </try>
    <KeyRule>
      <try>Some more gibberish </try>
      <success>\n My2ndKey : Long   </success>
      <attributes>[[S, o, m, e,  , m, o, r, e,  , g, i, b, b, e, r, i, s, h,  , t, o,  , s, k, i, p]]</attributes>
    </KeyRule>
    <fail/>
  </PairRule>
  <PairRule>
    <try> My2ndKey : Long    </try>
    <KeyRule>
      <try>My2ndKey : Long    V</try>
      <success>: Long    Value \n</success>
      <attributes>[[M, y, 2, n, d, K, e, y,  ]]</attributes>
    </KeyRule>
    <ValueRule>
      <try>Long    Value \n</try>
      <success>\n</success>
      <attributes>[[L, o, n, g,  ,  ,  ,  , V, a, l, u, e,  ]]</attributes>
    </ValueRule>
    <success>\n</success>
    <attributes>[[[M, y, 2, n, d, K, e, y,  ], [L, o, n, g,  ,  ,  ,  , V, a, l, u, e,  ]]]</attributes>
  </PairRule>
  <PairRule>
    <try></try>
    <KeyRule>
      <try></try>
      <fail/>
    </KeyRule>
    <fail/>
  </PairRule>
  <success></success>
  <attributes>[[[[], []], [[M, y,  , K, e, y,  ], [V, a, l, u, e,  ]], [[M, y, 2, n, d, K, e, y,  ], [L, o, n, g,  ,  ,  ,  , V, a, l, u, e,  ]]]]</attributes>
</ItemRule>