2
votes

Problem:

  1. Cannot find reason for missing parsed attributes in Qi JSON parser. The parser successfully parses the input string but the output data structure, json_object, only contains the first attribute (attribute_a) but missing the others (attribute_b and attribute_c)

Software: Boost Spirit Qi using Boost 1.52

Platform: Windows 7 (64-bit)

Compiler (Visual Studio 2010)

Request:

  1. Help finding out why the parser is not finding all attributes.

  2. Looking at the debugging output I see that the attributes are not being put into a single std::vector object. I am using JSON grammar I found on http://www.json.org/ as a reference. What I would like to see as the output of the 'members' is a single std::vector containing a list of all json_pair objects found for that JSON object.

Limitations:

  1. Parser does not support Unicode strings.

Code:

    #define BOOST_SPIRIT_DEBUG
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/phoenix_core.hpp>
    #include <boost/spirit/include/phoenix_container.hpp>
    #include <boost/spirit/include/phoenix_statement.hpp>
    #include <boost/spirit/include/phoenix_operator.hpp>
    #include <boost/fusion/include/adapt_struct.hpp>
    #include <boost/fusion/include/boost_tuple.hpp>
    #include <boost/variant/recursive_variant.hpp>

    #include <boost/make_shared.hpp>

    #include <vector>

    namespace signal_processing {
      namespace parsing {

        struct json_object;
        struct json_array;

        typedef boost::variant < std::string,
            double,
            boost::recursive_wrapper<json_object>,
            boost::recursive_wrapper<json_array>,
            bool > json_value;

        typedef boost::tuple < std::string, json_value> json_pair;

        struct json_members
        {
            std::vector < json_pair > items;
        };

        struct json_object
        {
            std::vector < json_members > children;
        };

        struct json_array
        {
            std::vector < json_value > list;
        };

        using boost::spirit::qi::bool_;
        using boost::spirit::qi::char_;
        using boost::spirit::qi::double_;
        using boost::spirit::qi::eol;
        using boost::spirit::qi::float_;
        using boost::spirit::qi::int_;
        using boost::spirit::qi::lexeme;
        using boost::spirit::qi::lit;   
        using boost::spirit::qi::space;
        using boost::spirit::qi::_val;
        using boost::spirit::qi::_1;

        template <typename Iterator, typename Skipper>
        struct json_grammar : boost::spirit::qi::grammar < Iterator, json_object(), Skipper>
        {
            json_grammar() : json_grammar::base_type(object)
            {
                object = '{' > *members > '}';

                pair = string > ':' > value;

                members = pair > *( ',' > members );

                element_list = '[' > *elements > ']';

                elements = value > *( ',' > elements );

                value = string |
                    number |
                    object |
                    element_list |
                    bool_ |
                    lit("null");

                char const* exclude = " ();\"\n\r\t";
                string = '"'
                    > +lexeme[char_ - char_(exclude)]
                    > '"';

                // Return: double
                number = double_ |
                    float_ |
                    int_;

                BOOST_SPIRIT_DEBUG_NODE(object);
                BOOST_SPIRIT_DEBUG_NODE(pair);
                BOOST_SPIRIT_DEBUG_NODE(members);
                BOOST_SPIRIT_DEBUG_NODE(element_list);
                BOOST_SPIRIT_DEBUG_NODE(elements);
                BOOST_SPIRIT_DEBUG_NODE(value);
                BOOST_SPIRIT_DEBUG_NODE(string);
                BOOST_SPIRIT_DEBUG_NODE(number);
            }

            boost::spirit::qi::rule < Iterator, json_object(), Skipper > object;
            boost::spirit::qi::rule < Iterator, json_pair(), Skipper > pair;
            boost::spirit::qi::rule < Iterator, json_members(), Skipper > members;
            boost::spirit::qi::rule < Iterator, json_array(), Skipper > element_list;
            boost::spirit::qi::rule < Iterator, json_array(), Skipper > elements;
            boost::spirit::qi::rule < Iterator, json_value(), Skipper > value;
            boost::spirit::qi::rule < Iterator, std::string(), Skipper > string;
            boost::spirit::qi::rule < Iterator, double(), Skipper > number;
        };
      }
    }

    BOOST_FUSION_ADAPT_STRUCT(
        signal_processing::parsing::json_object,
        (std::vector < signal_processing::parsing::json_members >, children)
    )

    BOOST_FUSION_ADAPT_STRUCT(
        signal_processing::parsing::json_members,
        (std::vector < signal_processing::parsing::json_pair >, items)
    )

    BOOST_FUSION_ADAPT_STRUCT(
        signal_processing::parsing::json_array,
        (std::vector < signal_processing::parsing::json_value >, list)
    )

    void parse ( std::string const& file )
    {
        typedef signal_processing::parsing::json_grammar < std::string::const_iterator, boost::spirit::ascii::space_type > configuration_grammar;
        configuration_grammar input; // Input grammar
        signal_processing::parsing::json_object parsed_data;

        std::string::const_iterator iter = file.begin();
        std::string::const_iterator end = file.end();
        bool r = boost::spirit::qi::phrase_parse ( iter, end, input, boost::spirit::ascii::space, parsed_data );

        if ( ! r || iter != end)
        {
            // Report the next 30 characters
            std::string::const_iterator some = iter + 30;

            if ( some > end )
            {
                some = end;
            }

            std::string context(iter, some);
            std::cout << "-------------------------\n";
            std::cout << "Parsing failed\n";
            std::cout << "stopped at: \": " << context << "...\"\n";
            std::cout << "-------------------------\n";
        }
    }

    int main(int,char**)
    {
        std::string input ( "{\r\n       \"Event\": {\r\n                \"attribute_a\": 0.0002,\r\n                \"attribute_b\": 2e-005,\r\n                \"attribute_c\": 0.022\r\n        }\r\n}" );

        parse ( input );

        return 0;
    }
2
You can find here a really similar problem. You can sidestep the problem using a temporary rule with an attribute of vector. If you change the rules members and elements as suggested by FatalFlaw and make their attributes std::vector<json_pair> and std::vector<json_value> respectively, you will get the result you want with that input.user1252091
+1 Nice grammar. It looks clean. You should add limitations: no support for character escapes (\b\f\r\n\0\") and accepting non-JSON-compliant numeric formats (including but not limited to ±∞ and NaN)sehe

2 Answers

1
votes

The parser

The answer to your first question, in general, is that there are probably a few ways to achieve what you want. Specifically, I can show you a way that seems to work.

The key thing is correctly setting up the attribute types for the rules that match one or more instances of a given phrase or token, by making sure that what you declare actually is a container, rather than a struct that has a member that is a container.

Once you've done that you need to compose the rule so that Qi knows that you are dealing with a container, and that it should back fill as appropriate.

Looking at the members rule first: you supplied this as the rule:

members = pair > *( ',' > members );

I don't believe this in itself is enough to tell Qi that you want it to back-stuff a json_members container with pairs, especially since pairs and a members don't have the same attribute type. So I suggest you replace the rule with:

members = pair > *( ',' > pair );

Or even:

members = pair % ',';

You need to determine that these rules amount to the same thing or not, but you get the idea.

As to the json_members type: I changed your struct definition to make the struct derive from the container instead of having the container as a property of the struct:

struct json_members : std::vector < json_pair > {};

And you don't need to do the fusion-isation of the struct.

Here's what appears to me to be a working version of your code:

#define BOOST_SPIRIT_DEBUG
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/phoenix_core.hpp>
    #include <boost/spirit/include/phoenix_container.hpp>
    #include <boost/spirit/include/phoenix_statement.hpp>
    #include <boost/spirit/include/phoenix_operator.hpp>
    #include <boost/fusion/include/adapt_struct.hpp>
    #include <boost/fusion/include/boost_tuple.hpp>
    #include <boost/variant/recursive_variant.hpp>

    #include <boost/make_shared.hpp>

    #include <vector>

    namespace signal_processing {
      namespace parsing {

        struct json_object;
        struct json_array;

        typedef boost::variant < std::string,
            double,
            boost::recursive_wrapper<json_object>,
            boost::recursive_wrapper<json_array>,
            bool > json_value;

        typedef boost::tuple < std::string, json_value> json_pair;

//        struct json_members
//        {
//            std::vector < json_pair > items;
//        };
//
//        struct json_object
//        {
//            std::vector < json_members > children;
//        };
//
//        struct json_array
//        {
//            std::vector < json_value > list;
//        };

        struct json_members : std::vector < json_pair > {};
        struct json_object : std::vector < json_members > {};
        struct json_array : std::vector < json_value > {};

        using boost::spirit::qi::bool_;
        using boost::spirit::qi::char_;
        using boost::spirit::qi::double_;
        using boost::spirit::qi::eol;
        using boost::spirit::qi::float_;
        using boost::spirit::qi::int_;
        using boost::spirit::qi::lexeme;
        using boost::spirit::qi::lit;
        using boost::spirit::qi::space;
        using boost::spirit::qi::_val;
        using boost::spirit::qi::_1;

        template <typename Iterator, typename Skipper>
        struct json_grammar : boost::spirit::qi::grammar < Iterator, json_object(), Skipper>
        {
            json_grammar() : json_grammar::base_type(object)
            {
                object = '{' > *members > '}';

                pair = string > ':' > value;

                members = pair > *( ',' > pair );

                element_list = '[' > *elements > ']';

                elements = value > *( ',' > value );

                value = string |
                    number |
                    object |
                    element_list |
                    bool_ |
                    lit("null");

                char const* exclude = " ();\"\n\r\t";
                string = '"'
                    > +lexeme[char_ - char_(exclude)]
                    > '"';

                // Return: double
                number = double_ |
                    float_ |
                    int_;

                BOOST_SPIRIT_DEBUG_NODE(object);
                BOOST_SPIRIT_DEBUG_NODE(pair);
                BOOST_SPIRIT_DEBUG_NODE(members);
                BOOST_SPIRIT_DEBUG_NODE(element_list);
                BOOST_SPIRIT_DEBUG_NODE(elements);
                BOOST_SPIRIT_DEBUG_NODE(value);
                BOOST_SPIRIT_DEBUG_NODE(string);
                BOOST_SPIRIT_DEBUG_NODE(number);
            }

            boost::spirit::qi::rule < Iterator, json_object(), Skipper > object;
            boost::spirit::qi::rule < Iterator, json_pair(), Skipper > pair;
            boost::spirit::qi::rule < Iterator, json_members(), Skipper > members;
            boost::spirit::qi::rule < Iterator, json_array(), Skipper > element_list;
            boost::spirit::qi::rule < Iterator, json_array(), Skipper > elements;
            boost::spirit::qi::rule < Iterator, json_value(), Skipper > value;
            boost::spirit::qi::rule < Iterator, std::string(), Skipper > string;
            boost::spirit::qi::rule < Iterator, double(), Skipper > number;
        };
      }
    }

//    BOOST_FUSION_ADAPT_STRUCT(
//        signal_processing::parsing::json_object,
//        (std::vector < signal_processing::parsing::json_members >, children)
//    )
//
//    BOOST_FUSION_ADAPT_STRUCT(
//        signal_processing::parsing::json_members,
//        (std::vector < signal_processing::parsing::json_pair >, items)
//    )
//
//    BOOST_FUSION_ADAPT_STRUCT(
//        signal_processing::parsing::json_array,
//        (std::vector < signal_processing::parsing::json_value >, list)
//    )

    void parse ( std::string const& file )
    {
        typedef signal_processing::parsing::json_grammar < std::string::const_iterator, boost::spirit::ascii::space_type > configuration_grammar;
        configuration_grammar input; // Input grammar
        signal_processing::parsing::json_object parsed_data;

        std::string::const_iterator iter = file.begin();
        std::string::const_iterator end = file.end();
        bool r = boost::spirit::qi::phrase_parse ( iter, end, input, boost::spirit::ascii::space, parsed_data );

        if ( ! r || iter != end)
        {
            // Report the next 30 characters
            std::string::const_iterator some = iter + 30;

            if ( some > end )
            {
                some = end;
            }

            std::string context(iter, some);
            std::cout << "-------------------------\n";
            std::cout << "Parsing failed\n";
            std::cout << "stopped at: \": " << context << "...\"\n";
            std::cout << "-------------------------\n";
        }
    }

    int main(int,char**)
    {
        std::string input ( "{\r\n       \"Event\": {\r\n                \"attribute_a\": 0.0002,\r\n                \"attribute_b\": 2e-005,\r\n                \"attribute_c\": 0.022\r\n        }\r\n}" );

        parse ( input );

        return 0;
    }

Which outputs:

localhost stov # ./stov
<object>
  <try>{\r\n       "Event": {</try>
  <members>
    <try>\r\n       "Event": {\r</try>
    <pair>
      <try>\r\n       "Event": {\r</try>
      <string>
        <try>\r\n       "Event": {\r</try>
        <success>: {\r\n               </success>
        <attributes>[[E, v, e, n, t]]</attributes>
      </string>
      <value>
        <try> {\r\n                </try>
        <string>
          <try> {\r\n                </try>
          <fail/>
        </string>
        <number>
          <try> {\r\n                </try>
          <fail/>
        </number>
        <object>
          <try>{\r\n                "</try>
          <members>
            <try>\r\n                "a</try>
            <pair>
              <try>\r\n                "a</try>
              <string>
                <try>\r\n                "a</try>
                <success>: 0.0002,\r\n         </success>
                <attributes>[[a, t, t, r, i, b, u, t, e, _, a]]</attributes>
              </string>
              <value>
                <try> 0.0002,\r\n          </try>
                <string>
                  <try> 0.0002,\r\n          </try>
                  <fail/>
                </string>
                <number>
                  <try> 0.0002,\r\n          </try>
                  <success>,\r\n                "</success>
                  <attributes>[0.0002]</attributes>
                </number>
                <success>,\r\n                "</success>
                <attributes>[0.0002]</attributes>
              </value>
              <success>,\r\n                "</success>
              <attributes>[[[a, t, t, r, i, b, u, t, e, _, a], 0.0002]]</attributes>
            </pair>
            <pair>
              <try>\r\n                "a</try>
              <string>
                <try>\r\n                "a</try>
                <success>: 2e-005,\r\n         </success>
                <attributes>[[a, t, t, r, i, b, u, t, e, _, b]]</attributes>
              </string>
              <value>
                <try> 2e-005,\r\n          </try>
                <string>
                  <try> 2e-005,\r\n          </try>
                  <fail/>
                </string>
                <number>
                  <try> 2e-005,\r\n          </try>
                  <success>,\r\n                "</success>
                  <attributes>[2e-05]</attributes>
                </number>
                <success>,\r\n                "</success>
                <attributes>[2e-05]</attributes>
              </value>
              <success>,\r\n                "</success>
              <attributes>[[[a, t, t, r, i, b, u, t, e, _, b], 2e-05]]</attributes>
            </pair>
            <pair>
              <try>\r\n                "a</try>
              <string>
                <try>\r\n                "a</try>
                <success>: 0.022\r\n        }\r\n</success>
                <attributes>[[a, t, t, r, i, b, u, t, e, _, c]]</attributes>
              </string>
              <value>
                <try> 0.022\r\n        }\r\n}</try>
                <string>
                  <try> 0.022\r\n        }\r\n}</try>
                  <fail/>
                </string>
                <number>
                  <try> 0.022\r\n        }\r\n}</try>
                  <success>\r\n        }\r\n}</success>
                  <attributes>[0.022]</attributes>
                </number>
                <success>\r\n        }\r\n}</success>
                <attributes>[0.022]</attributes>
              </value>
              <success>\r\n        }\r\n}</success>
              <attributes>[[[a, t, t, r, i, b, u, t, e, _, c], 0.022]]</attributes>
            </pair>
            <success>\r\n        }\r\n}</success>
            <attributes>[[[[a, t, t, r, i, b, u, t, e, _, a], 0.0002], [[a, t, t, r, i, b, u, t, e, _, b], 2e-05], [[a, t, t, r, i, b, u, t, e, _, c], 0.022]]]</attributes>
          </members>
          <members>
            <try>\r\n        }\r\n}</try>
            <pair>
              <try>\r\n        }\r\n}</try>
              <string>
                <try>\r\n        }\r\n}</try>
                <fail/>
              </string>
              <fail/>
            </pair>
            <fail/>
          </members>
          <success>\r\n}</success>
          <attributes>[[[[[a, t, t, r, i, b, u, t, e, _, a], 0.0002], [[a, t, t, r, i, b, u, t, e, _, b], 2e-05], [[a, t, t, r, i, b, u, t, e, _, c], 0.022]]]]</attributes>
        </object>
        <success>\r\n}</success>
        <attributes>[[[[[a, t, t, r, i, b, u, t, e, _, a], 0.0002], [[a, t, t, r, i, b, u, t, e, _, b], 2e-05], [[a, t, t, r, i, b, u, t, e, _, c], 0.022]]]]</attributes>
      </value>
      <success>\r\n}</success>
      <attributes>[[[E, v, e, n, t], [[[[a, t, t, r, i, b, u, t, e, _, a], 0.0002], [[a, t, t, r, i, b, u, t, e, _, b], 2e-05], [[a, t, t, r, i, b, u, t, e, _, c], 0.022]]]]]</attributes>
    </pair>
    <success>\r\n}</success>
    <attributes>[[[[E, v, e, n, t], [[[[a, t, t, r, i, b, u, t, e, _, a], 0.0002], [[a, t, t, r, i, b, u, t, e, _, b], 2e-05], [[a, t, t, r, i, b, u, t, e, _, c], 0.022]]]]]]</attributes>
  </members>
  <members>
    <try>\r\n}</try>
    <pair>
      <try>\r\n}</try>
      <string>
        <try>\r\n}</try>
        <fail/>
      </string>
      <fail/>
    </pair>
    <fail/>
  </members>
  <success></success>
  <attributes>[[[[[E, v, e, n, t], [[[[a, t, t, r, i, b, u, t, e, _, a], 0.0002], [[a, t, t, r, i, b, u, t, e, _, b], 2e-05], [[a, t, t, r, i, b, u, t, e, _, c], 0.022]]]]]]]</attributes>
</object>

Unicode

I understand that the SVN build of boost::spirit now supports UTF8; try googling BOOST_SPIRIT_UNICODE. You can support 'unicode' (as microsoft calls it) right now using the wide string support in the library.

Disclaimer

I am working on Linux. YMMV.

0
votes

I happen to have written a UNICODE-aware JSON parser in Spirit v2 just recently, here's a testcase that parses your sample:

#include <sstream>
#include "JSON.hpp"

// util
static JSON::Value roundtrip(JSON::Value const& given) {
    return JSON::parse(to_wstring(given));
}

void roundtrip_test()
{
    auto 
        document = JSON::readFrom(std::istringstream(
                    "{\r\n"
                    "       \"Event\": {\r\n"
                    "             \"attribute_a\": 0.0002,\r\n"
                    "\"attribute_b\": 2e-005,\r\n"
                    "\"attribute_c\": 0.022\r\n"
                    "}\r\n}")),
        verify = roundtrip(document);

    std::cout << verify << "\n";
    std::cout << "document <=> verify equal:     \t" << std::boolalpha << (document == verify)                       << "\n";
    std::cout << "document <=> verify text match:\t" << std::boolalpha << (to_string(document) == to_string(verify)) << "\n";
}

This prints:

{"Event":{"attribute_a":0.0002,"attribute_b":2e-05,"attribute_c":0.022}}
document <=> verify equal:      true
document <=> verify text match: true

Some more API samples:

  1. Object initializer expressions for 'immediate' JSON documents:

    void initializer_test()
    {
        using namespace JSON;
    
        const Array arr { 
            L"text", 
            42,
            Object { { L"dummy", Null() } } 
        };
    
        auto radius = as_double(arr[1]);
    
        auto const document = Object {
                { L"number", 314e-2 },
                { L"string", L"hello\ngoodbye" },
                { L"array" , arr },
                { L"bool" , False() },
                { L"radius", radius },
                { L"area", radius * radius * 3.14 },
                { String { 10, L'=' }, String { 10, L'*' } }
        };
    
        std::cout << document[L"bool"]   << std::endl;
        std::cout << document[L"number"] << std::endl;
        std::cout << document[L"string"] << std::endl;
        std::cout << document[L"array"]  << std::endl;
        std::cout << document[L"bool"]   << std::endl;
        std::cout << document[L"radius"] << std::endl;
        std::cout << document[L"area"]   << std::endl;
        std::cout << document            << std::endl;
    }
    
  2. e.g. this sample input which passes the roundtrip test:

    {
        "Image": {
            "Width":  800,
            "Height": 600,
            "Title":  "View from 15th Floor",
            "Thumbnail": {
                "Russian":  "На берегу пустынных волн",
                "Escapes": "Ha \"\u0431\u0435\u0440\u0435\u0433\u0443\" shows up \\similar\\.\r\b\n",
                "берегу": "Russian",
                "Dummy": null,
                "Yummy": false,
                "Tummy": true,
                "Url":    "http://www.example.com/image/481989943",
                "Height": 125,
                "Width":  "100"
            },
            "IDs": [116, 943, 234, 38793]
    
        }
    }
    
  3. This visitor example that transforms some JSON nodes: How to manipulate leaves of a JSON tree