diff --git a/.gitignore b/.gitignore index 75206b5..a77db75 100644 --- a/.gitignore +++ b/.gitignore @@ -62,6 +62,7 @@ nbproject .settings composer.lock *.code-workspace +.vscode #Cmake CMakeCache.txt diff --git a/VERSIONS.md b/VERSIONS.md index 8ba3053..472dce6 100644 --- a/VERSIONS.md +++ b/VERSIONS.md @@ -1,3 +1,17 @@ +2._ Series +========== + +2.0 +--- + + - [2.0.0](https://github.com/tgockel/json-voorhees/milestone/12): 2020 March 13 + - Core + - Major refactoring of the parsing from the pull-based `tokenizer` into the flat-structured `ast_index` + - Removed support for more lax parser settings -- a parsed `ast_index` has been validated + - Serialization + - Extraction to C++ objects now occurs directly from `ast_index` instead of going through the `value` middle man, + saving time and memory + 1._ Series ========== @@ -121,7 +135,7 @@ The focus of this release was the creation of tools to traverse and manipulate t ------------------------------------------------------------------------- The main focus of this release is access and modification of the low-level parsing and encoding system. - + - [0.3.1](https://github.com/tgockel/json-voorhees/releases/tag/v0.3.1): 2014 September 27 - Greatly expands the flexibility of `parse_options` - Adds all the tests from [JSON_Checker](http://json.org/JSON_checker/) diff --git a/doc/conversions.dot b/doc/conversions.dot index 65dfae2..afbda0b 100644 --- a/doc/conversions.dot +++ b/doc/conversions.dot @@ -1,14 +1,14 @@ digraph jsonv { label="Conversions" labelloc="t" - - str [label="std::string"] + + str [label="std::string\nstd::string_view"] istream [label="std::istream"] ostream [label="std::ostream"] value [label="value", URL="\ref jsonv::value"] class [label="C++ class"] dsl [label="C++ DSL"] - + value -> str [label="to_string", URL="\ref jsonv::to_string(jsonv::value)"] value -> ostream [label="operator<<"] str -> value [label="parse", URL="\ref jsonv::parse"] diff --git a/doc/conversions.png b/doc/conversions.png index e6b6c08..4f32316 100644 Binary files a/doc/conversions.png and b/doc/conversions.png differ diff --git a/include/jsonv/algorithm.hpp b/include/jsonv/algorithm.hpp index 57464fa..9b483df 100644 --- a/include/jsonv/algorithm.hpp +++ b/include/jsonv/algorithm.hpp @@ -1,6 +1,6 @@ /** \file jsonv/algorithm.hpp * A collection of algorithms a la `<algorithm>`. - * + * * Copyright (c) 2014-2018 by Travis Gockel. All rights reserved. * * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License @@ -32,7 +32,7 @@ class path; /** Traits describing how to perform various aspects of comparison. This implementation for comparison is strict and is * ultimately the one used by \c value::compare. - * + * * \see compare **/ struct JSONV_PUBLIC compare_traits @@ -47,7 +47,7 @@ struct JSONV_PUBLIC compare_traits int vb = kindval(b); return va == vb ? 0 : va < vb ? -1 : 1; } - + /** Compare two boolean values. **/ static int compare_booleans(bool a, bool b) { @@ -55,7 +55,7 @@ struct JSONV_PUBLIC compare_traits : a ? 1 : -1; } - + /** Compare two integer values. **/ static int compare_integers(std::int64_t a, std::int64_t b) { @@ -71,19 +71,19 @@ struct JSONV_PUBLIC compare_traits : (a < b) ? -1 : 1; } - + /** Compare two string values. **/ static int compare_strings(const std::string& a, const std::string& b) { return a.compare(b); } - + /** Compare two strings used for the keys of objects. **/ static int compare_object_keys(const std::string& a, const std::string& b) { return a.compare(b); } - + /** Compare two objects \e before comparing the values. The \c compare function will only check the contents of an * object if this function returns 0. **/ @@ -91,7 +91,7 @@ struct JSONV_PUBLIC compare_traits { return 0; } - + private: static int kindval(kind k) { @@ -117,7 +117,7 @@ struct JSONV_PUBLIC compare_traits }; /** Compare the values \a a and \a b using the comparison \a traits. - * + * * \tparam TCompareTraits A type which should be compatible with the public signatures on the \c compare_traits class. **/ template @@ -125,10 +125,10 @@ int compare(const value& a, const value& b, const TCompareTraits& traits) { if (&a == &b) return 0; - + if (int kindcmp = traits.compare_kinds(a.kind(), b.kind())) return kindcmp; - + switch (a.kind()) { case jsonv::kind::null: @@ -158,7 +158,7 @@ int compare(const value& a, const value& b, const TCompareTraits& traits) { if (int objmetacmp = traits.compare_objects_meta(a, b)) return objmetacmp; - + auto aiter = a.begin_object(); auto biter = b.begin_object(); for ( ; aiter != a.end_object() && biter != b.end_object(); ++aiter, ++biter) @@ -176,30 +176,28 @@ int compare(const value& a, const value& b, const TCompareTraits& traits) } } -/** Compare the values \a a and \a b with strict comparison traits. - * - * \see value::compare - * \see compare_icase -**/ +/// Compare the values \a a and \a b with strict comparison traits. +/// +/// \see value::compare +/// \see compare_icase JSONV_PUBLIC int compare(const value& a, const value& b); -/** Compare the values \a a and \a b, but use case-insensitive matching on \c kind::string values. This does \e not use - * case-insensitive matching on the keys of objects! - * - * \see compare -**/ +/// Compare the values \a a and \a b, but use case-insensitive matching on \c kind::string values. This does \e not use +/// case-insensitive matching on the keys of objects! +/// +/// \see compare JSONV_PUBLIC int compare_icase(const value& a, const value& b); -/** The results of the \c diff operation. **/ +/// The results of the \c diff operation. struct JSONV_PUBLIC diff_result { - /** Elements that were the same between the two halves of the diff. **/ + /// Elements that were the same between the two halves of the diff. value same; - /** Elements that were unique to the left hand side of the diff. **/ + /// Elements that were unique to the left hand side of the diff. value left; - /** Elements that were unique to the right hand side of the diff. **/ + /// Elements that were unique to the right hand side of the diff. value right; }; @@ -217,7 +215,7 @@ JSONV_PUBLIC diff_result diff(value left, value right); * \a input is \c kind::array, \c func is called for every value in the array and the output will be an array with each * element transformed by \a func. If \a input is \c kind::object, the result will be an object with each key * transformed by \a func. - * + * * \param func The function to apply to the element or elements of \a input. * \param input The value to transform. **/ @@ -230,10 +228,10 @@ JSONV_PUBLIC value map(const std::function& func, * \a input is \c kind::array, \c func is called for every value in the array and the output will be an array with each * element transformed by \a func. If \a input is \c kind::object, the result will be an object with each key * transformed by \a func. - * + * * \param func The function to apply to the element or elements of \a input. * \param input The value to transform. - * + * * \note * This version of \c map provides only a basic exception-safety guarantee. If an exception is thrown while * transforming a non-scalar \c kind, there is no rollback action, so \a input is left in a usable, but @@ -245,7 +243,7 @@ JSONV_PUBLIC value map(const std::function& func, ); /** Recursively walk the provided \a tree and call \a func for each item in the tree. - * + * * \param tree The JSON value to traverse. * \param func The function to call for each element in the tree. * \param base_path The path to prepend to each output path to \a func. This can be useful if beginning traversal from @@ -261,7 +259,7 @@ JSONV_PUBLIC void traverse(const value& ); /** Recursively walk the provided \a tree and call \a func for each item in the tree. - * + * * \param tree The JSON value to traverse. * \param func The function to call for each element in the tree. * \param leafs_only If true, call \a func only when the current path is a "leaf" value (\c string, \c integer, diff --git a/include/jsonv/all.hpp b/include/jsonv/all.hpp index 9f0a6b7..4329f22 100644 --- a/include/jsonv/all.hpp +++ b/include/jsonv/all.hpp @@ -1,743 +1,738 @@ -/** \file jsonv/all.hpp - * A head which includes all other JSON Voorhees headers. - * - * Copyright (c) 2012-2015 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ -#ifndef __JSONV_ALL_HPP_INCLUDED__ -#define __JSONV_ALL_HPP_INCLUDED__ +/// \file jsonv/all.hpp +/// A header which includes all other JSON Voorhees headers. +/// +/// Copyright (c) 2012-2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) +#pragma once namespace jsonv { -/** \mainpage Overview - * JSON Voorhees is a JSON library written for the C++ programmer who wants to be productive in this modern world. What - * does that mean? There are a ton of JSON libraries floating around touting how they are "modern" C++ and so on. But - * who really cares? JSON Voorhees puts the focus more on the resulting C++ than any "modern" feature set. This means - * the library does not skip on string encoding details like having full support for UTF-8. Are there "modern" - * features? Sure, but this library is not meant to be a gallery of them -- a good API should get out of your way and - * let you work. It is hosted on GitHub and sports an Apache - * License, so use it anywhere you need. - * - * Features include (but are not necessarily limited to): - * - * - Simple - * - A `value` should not feel terribly different from a C++ Standard Library container - * - Write valid JSON with `operator<<` - * - Simple JSON parsing with `parse` - * - Reasonable error messages when parsing fails - * - Full support for Unicode-filled JSON (encoded in UTF-8 in C++) - * - Efficient - * - Minimal overhead to store values (a `value` is 16 bytes on a 64-bit platform) - * - No-throw move semantics wherever possible - * - Easy - * - Convert a `value` into a C++ type using `extract` - * - Encode a C++ type into a value using `to_json` - * - Safe - * - In the best case, illegal code should fail to compile - * - An illegal action should throw an exception - * - Almost all utility functions have a strong exception guarantee. - * - Stable - * - Worry less about upgrading -- the API and ABI will not change out from under you - * - Documented - * - Consumable by human beings - * - Answers questions you might actually ask - * - * \dotfile doc/conversions.dot - * - * JSON Voorhees is designed with ease-of-use in mind. So let's look at some code! - * - * \section demo_value The jsonv::value - * - * The central class of JSON Voorhees is the \c jsonv::value. This class represents a JSON AST and is somewhat of a - * dynamic type. This can make things a little bit awkward for C++ programmers who are used to static typing. Don't - * worry about it -- you can learn to love it. - * - * Putting values of different types is super-easy. - * - * \code - * #include - * #include - * - * int main() - * { - * jsonv::value x = jsonv::null; - * std::cout << x << std::endl; - * x = 5.9; - * std::cout << x << std::endl; - * x = -100; - * std::cout << x << std::endl; - * x = "something else"; - * std::cout << x << std::endl; - * x = jsonv::array({ "arrays", "of", "the", 7, "different", "types"}); - * std::cout << x << std::endl; - * x = jsonv::object({ - * { "objects", jsonv::array({ - * "Are fun, too.", - * "Do what you want." - * }) - * }, - * { "compose like", "standard library maps" }, - * }); - * std::cout << x << std::endl; - * } - * \endcode - * - * Output: - * - * \code - * null - * 5.9 - * -100 - * "something else" - * ["arrays","of","the",7,"different","types"] - * {"compose like":"standard library maps","objects":["Are fun, too.","Do what you want."]} - * \endcode - * - * If that isn't convenient enough for you, there is a user-defined literal \c _json in the \c jsonv namespace you can - * use - * - * \code - * // You can use this hideous syntax if you do not want to bring in the whole jsonv namespace: - * using jsonv::operator"" _json; - * - * jsonv::value x = R"({ - * "objects": [ "Are fun, too.", - * "Do what you want." - * ], - * "compose like": "You are just writing JSON", - * "which I guess": ["is", "also", "neat"] - * })"_json; - * \endcode - * - * JSON is dynamic, which makes value access a bit more of a hassle, but JSON Voorhees aims to make it not too - * horrifying for you. A \c jsonv::value has a number of accessor methods named things like \c as_integer and - * \c as_string which let you access the value as if it was that type. But what if it isn't that type? In that case, - * the function will throw a \c jsonv::kind_error with a bit more information as to what rule you violated. - * - * \code - * #include - * #include - * - * int main() - * { - * jsonv::value x = jsonv::null; - * try - * { - * x.as_string(); - * } - * catch (const jsonv::kind_error& err) - * { - * std::cout << err.what() << std::endl; - * } - * - * x = "now make it a string"; - * std::cout << x.as_string().size() << std::endl; - * std::cout << x.as_string() << "\tis not the same as\t" << x << std::endl; - * } - * \endcode - * - * Output: - * - * \code - * Unexpected type: expected string but found null. - * 20 - * now make it a string is not the same as "now make it a string" - * \endcode - * - * You can also deal with container types in a similar manner that you would deal with the equivalent STL container - * type, with some minor caveats. Because the \c value_type of a JSON object and JSON array are different, they have - * different iterator types in JSON Voorhees. They are aptly-named \c object_iterator and \c array_iterator. The access - * methods for these iterators are \c begin_object / \c end_object and \c begin_array / \c end_array, respectively. - * The object interface behaves exactly like you would expect a \c std::map to, while the - * array interface behaves just like a \c std::deque would. - * - * \code - * #include - * #include - * - * int main() - * { - * jsonv::value x = jsonv::object({ { "one", 1 }}); - * auto iter = x.find("one"); - * if (iter != x.end_object()) - * std::cout << iter->first << ": " << iter->second << std::endl; - * else - * std::cout << "Nothing..." << std::end; - * - * iter = x.find("two"); - * if (iter != x.end_object()) - * std::cout << iter->first << ": " << iter->second << std::endl; - * else - * std::cout << "Nothing..." << std::end; - * - * x["two"] = 2; - * iter = x.find("two"); - * if (iter != x.end_object()) - * std::cout << iter->first << ": " << iter->second << std::endl; - * else - * std::cout << "Nothing..." << std::end; - * - * x["two"] = jsonv::array({ "one", "+", x.at("one") }); - * iter = x.find("two"); - * if (iter != x.end_object()) - * std::cout << iter->first << ": " << iter->second << std::endl; - * else - * std::cout << "Nothing..." << std::end; - * - * x.erase("one"); - * iter = x.find("one"); - * if (iter != x.end_object()) - * std::cout << iter->first << ": " << iter->second << std::endl; - * else - * std::cout << "Nothing..." << std::end; - * } - * \endcode - * - * Output: - * - * \code - * one: 1 - * Nothing... - * two: 2 - * two: ["one","+",1] - * Nothing... - * \endcode - * - * The iterator types \e work. This means you are free to use all of the C++ things just like you would a regular - * container. To use a ranged-based for, simply call \c as_array or \c as_object. Everything from \c and - * \c or any other library works great with JSON Voorhees. Bring those templates on! - * - * \code - * #include - * #include - * #include - * - * int main() - * { - * jsonv::value arr = jsonv::array({ "taco", "cat", 3, -2, jsonv::null, "beef", 4.8, 5 }); - * std::cout << "Initial: "; - * for (const auto& val : arr.as_array()) - * std::cout << val << '\t'; - * std::cout << std::endl; - * - * std::sort(arr.begin_array(), arr.end_array()); - * std::cout << "Sorted: "; - * for (const auto& val : arr.as_array()) - * std::cout << val << '\t'; - * std::cout << std::endl; - * } - * \endcode - * - * Output: - * - * \code - * Initial: "taco" "cat" 3 -2 null "beef" 4.8 5 - * Sorted: null -2 3 4.8 5 "beef" "cat" "taco" - * \endcode - * - * \section demo_parsing Encoding and decoding - * - * Usually, the reason people are using JSON is as a data exchange format, either for communicating with other services - * or storing things in a file or a database. To do this, you need to \e encode your \c json::value into an - * \c std::string and \e parse it back. JSON Voorhees makes this very easy for you. - * - * \code - * #include - * #include - * #include - * - * #include - * #include - * #include - * - * int main() - * { - * jsonv::value obj = jsonv::object(); - * obj["taco"] = "cat"; - * obj["array"] = jsonv::array({ 1, 2, 3, 4, 5 }); - * obj["infinity"] = std::numeric_limits::infinity(); - * - * { - * std::cout << "Saving \"file.json\"... " << obj << std::endl; - * std::ofstream file("file.json"); - * file << obj; - * } - * - * jsonv::value loaded; - * { - * std::cout << "Loading \"file.json\"..."; - * std::ifstream file("file.json"); - * loaded = jsonv::parse(file); - * } - * std::cout << loaded << std::endl; - * - * return obj == loaded ? 0 : 1; - * } - * \endcode - * - * Output: - * - * \code - * Saving "file.json"... {"array":[1,2,3,4,5],"infinity":null,"taco":"cat"} - * Loading "file.json"...{"array":[1,2,3,4,5],"infinity":null,"taco":"cat"} - * \endcode - * - * If you are paying close attention, you might have noticed that the value for the \c "infinity" looks a little bit - * more \c null than \c infinity. This is because, much like mathematicians before Anaximander, JSON has no concept of - * infinity, so it is actually \e illegal to serialize a token like \c infinity anywhere. By default, when an encoder - * encounters an unrepresentable value in the JSON it is trying to encode, it outputs \c null instead. If you wish to - * change this behavior, implement your own \c jsonv::encoder (or derive from \c jsonv::ostream_encoder). If you ran - * the example program, you might have noticed that the return code was 1, meaning the value you put into the file and - * what you got from it were not equal. This is because all the type and value information is still kept around in the - * in-memory \c obj. It is only upon encoding that information is lost. - * - * Getting tired of all this compact rendering of your JSON strings? Want a little more whitespace in your life? Then - * \c jsonv::ostream_pretty_encoder is the class for you! Unlike our standard \e compact encoder, this guy will put - * newlines and indentation in your JSON so you can present it in a way more readable format. - * - * \code - * #include - * #include - * #include - * - * #include - * - * int main() - * { - * // Make a pretty encoder and point to std::cout - * jsonv::ostream_pretty_encoder prettifier(std::cout); - * prettifier.encode(jsonv::parse(std::cin)); - * } - * \endcode - * - * Compile that code and you now have your own little JSON prettification program! - * - * \section serialization Serialization - * - * Most of the time, you do not want to deal with \c jsonv::value instances directly. Instead, most people prefer to - * convert \c jsonv::value instances into their own strong C++ \c class or \c struct. JSON Voorhees provides utilities - * to make this easy for you to use. At the end of the day, you should be able to create an arbitrary C++ type with - * jsonv::extract<my_type>(value) and create a \c jsonv::value from your arbitrary C++ type with - * jsonv::to_json(my_instance). - * - * \subsection serialization_encoding Extracting with extract - * - * Let's start with converting a \c jsonv::value into a custom C++ type with jsonv::extract<T>. - * - * \code - * #include - * #include - * #include - * - * #include - * - * int main() - * { - * jsonv::value val = jsonv::parse(R"({ "a": 1, "b": 2, "c": "Hello!" })"); - * std::cout << "a=" << jsonv::extract(val.at("a")) << std::endl; - * std::cout << "b=" << jsonv::extract(val.at("b")) << std::endl; - * std::cout << "c=" << jsonv::extract(val.at("c")) << std::endl; - * } - * \endcode - * - * Output: - * - * \code - * a=1 - * b=2 - * c=Hello! - * \endcode - * - * Overall, this is not very complicated. We did not do anything that could not have been done through a little use of - * \c as_integer and \c as_string. So what is this \c extract giving us? - * - * The real power comes in when we start talking about \c jsonv::formats. These objects provide a set of rules to - * encode and decode arbitrary types. So let's make a C++ \c class for our JSON object and write a special constructor - * for it. - * - * \code - * #include - * #include - * #include - * #include - * - * #include - * - * class my_type - * { - * public: - * my_type(const jsonv::value& from, const jsonv::extraction_context& context) : - * a(context.extract_sub(from, "a")), - * b(context.extract_sub(from, "b")), - * c(context.extract_sub(from, "c")) - * { } - * - * static const jsonv::extractor* get_extractor() - * { - * static jsonv::extractor_construction instance; - * return &instance; - * } - * - * friend std::ostream& operator<<(std::ostream& os, const my_type& self) - * { - * return os << "{ a=" << self.a << ", b=" << self.b << ", c=" << self.c << " }"; - * } - * - * private: - * int a; - * int b; - * std::string c; - * }; - * - * int main() - * { - * jsonv::formats local_formats; - * local_formats.register_extractor(my_type::get_extractor()); - * jsonv::formats format = jsonv::formats::compose({ jsonv::formats::defaults(), local_formats }); - * - * jsonv::value val = jsonv::parse(R"({ "a": 1, "b": 2, "c": "Hello!" })"); - * my_type x = jsonv::extract(val, format); - * std::ostream << x << std::endl; - * } - * \endcode - * - * Output: - * - * \code - * { a=1, b=2, c=Hello! } - * \endcode - * - * There is a lot going on in that example, so let's take it one step at a time. First, we are creating a \c my_type - * object to store our values, which is nice. Then, we gave it a funny-looking constructor: - * - * \code - * my_type(const jsonv::value& from, const jsonv::extraction_context& context) : - * a(context.extract_sub(from, "a")), - * b(context.extract_sub(from, "b")), - * c(context.extract_sub(from, "c")) - * { } - * \endcode - * - * This is an extracting constructor. All that means is that it has those two arguments: a \c jsonv::value and - * a \c jsonv::extraction_context. The \c jsonv::extraction_context is an optional, but extremely helpful class. Inside - * the constructor, we use the \c jsonv::extraction_context to access the values of the incoming JSON object in order - * to build our object. - * - * \code - * static const jsonv::extractor* get_extractor() - * { - * static jsonv::extractor_construction instance; - * return &instance; - * } - * \endcode - * - * A \c jsonv::extractor is a type that knows how to take a \c jsonv::value and create some C++ type out of it. In this - * case, we are creating a \c jsonv::extractor_construction, which is a subtype that knows how to call the constructor - * of a type. There are all sorts of \c jsonv::extractor implementations in \c jsonv/serialization.hpp, so you should - * be able to find one that fits your needs. - * - * \code - * jsonv::formats local_formats; - * local_formats.register_extractor(my_type::get_extractor()); - * jsonv::formats format = jsonv::formats::compose({ jsonv::formats::defaults(), local_formats }); - * \endcode - * - * Now things are starting to get interesting. The \c jsonv::formats object is a collection of - * jsonv::extractors, so we create one of our own and add the \c jsonv::extractor* from the static function of - * \c my_type. Now, \c local_formats \e only knows how to extract instances of \c my_type -- it does \e not know even - * the most basic things like how to extract an \c int. We use \c jsonv::formats::compose to create a new instance of - * \c jsonv::formats that combines the qualities of \c local_formats (which knows how to deal with \c my_type) and the - * \c jsonv::formats::defaults (which knows how to deal with things like \c int and \c std::string). The \c formats - * instance now has the power to do everything we need! - * - * \code - * my_type x = jsonv::extract(val, format); - * \endcode - * - * This is not terribly different from the example before, but now we are explicitly passing a \c jsonv::formats object - * to the function. If we had not provided \c format as an argument here, the function would have thrown a - * \c jsonv::extraction_error complaining about how it did not know how to extract a \c my_type. - * - * \subsection serialization_to_json Serialization with to_json - * - * JSON Voorhees also allows you to convert from your C++ structures into JSON values, using \c jsonv::to_json. It - * should feel like a mirror \c jsonv::extract, with similar argument types and many shared concepts. Just like - * extraction, \c jsonv::to_json uses the \c jsonv::formats class, but it uses a \c jsonv::serializer to convert from - * C++ into JSON. - * - * \code - * #include - * #include - * #include - * - * #include - * - * class my_type - * { - * public: - * my_type(int a, int b, std::string c) : - * a(a), - * b(b), - * c(std::move(c)) - * { } - * - * static const jsonv::serializer* get_serializer() - * { - * static auto instance = jsonv::make_serializer - * ( - * [] (const jsonv::serialization_context& context, const my_type& self) - * { - * return jsonv::object({ { "a", context.to_json(self.a) }, - * { "b", context.to_json(self.b) }, - * { "c", context.to_json(self.c) } - * } - * ); - * } - * ); - * return &instance; - * } - * - * private: - * int a; - * int b; - * std::string c; - * }; - * - * int main() - * { - * jsonv::formats local_formats; - * local_formats.register_serializer(my_type::get_serializer()); - * jsonv::formats format = jsonv::formats::compose({ jsonv::formats::defaults(), local_formats }); - * - * my_type x(5, 6, "Hello"); - * std::ostream << jsonv::to_json(x, format) << std::endl; - * } - * \endcode - * - * Output: - * - * \code - * {"a":5,"b":6,"c":"Hello"} - * \endcode - * - * \subsection serialization_composition Composing Type Adapters - * - * Does all this seem a little bit \e manual to you? Creating an \c extractor and \c serializer for every single type - * can get a little bit tedious. Unfortunately, until C++ has a standard way to do reflection, we must specify the - * conversions manually. However, there \e is an easier way! That way is the - * \ref serialization_builder_dsl "Serialization Builder DSL". - * - * Let's start with a couple of simple structures: - * - * \code - * struct foo - * { - * int a; - * int b; - * std::string c; - * }; - * - * struct bar - * { - * foo x; - * foo y; - * std::string z; - * std::string w; - * }; - * \endcode - * - * Let's make a \c formats for them using the DSL: - * - * \code - * jsonv::formats formats = - * jsonv::formats_builder() - * .type() - * .member("a", &foo::a) - * .member("b", &foo::b) - * .default_value(10) - * .member("c", &foo::c) - * .type() - * .member("x", &bar::x) - * .member("y", &bar::y) - * .member("z", &bar::z) - * .since(jsonv::version(2, 0)) - * .member("w", &bar::w) - * .until(jsonv::version(5, 0)) - * ; - * \endcode - * - * What is going on there? The giant chain of function calls is building up a collection of type adapters into a - * \c formats for you. The indentation shows the intent -- the .member("a", &foo::a) is attached to the type - * \c adapter for \c foo (if you tried to specify \c &bar::y in that same place, it would fail to compile). Each - * function call returns a reference back to the builder so you can chain as many of these together as you want to. The - * \c jsonv::formats_builder is a proper object, so if you wish to spread out building your type adapters into multiple - * functions, you can do that by passing around an instance. - * - * The two most-used functions are \c type and \c member. \c type defines a \c jsonv::adapter for the C++ class - * provided at the template parameter. All of the calls before the second \c type call modify the adapter for \c foo. - * There, we attach members with the \c member function. This tells the \c formats how to encode and extract each of - * the specified members to and from a JSON object using the provided string as the key. The extra function calls like - * \c default_value, \c since and \c until are just a could of the many functions available to modify how the members - * of the type get transformed. - * - * The \c formats we built would be perfectly capable of serializing to and extracting from this JSON document: - * - * \code - * { - * "x": { "a": 50, "b": 20, "c": "Blah" }, - * "y": { "a": 10, "c": "No B?" }, - * "z": "Only serialized in 2.0+", - * "w": "Only serialized before 5.0" - * } - * \endcode - * - * For a more in-depth reference, see the \ref serialization_builder_dsl "Serialization Builder DSL page". - * - * \section demo_algorithm Algorithms - * - * JSON Voorhees takes a "batteries included" approach. A few building blocks for powerful operations can be found in - * the \c algorithm.hpp header file. - * - * One of the simplest operations you can perform is the \c map operation. This operation takes in some \c jsonv::value - * and returns another. Let's try it. - * - * \code - * #include - * #include - * - * #include - * - * int main() - * { - * jsonv::value x = 5; - * std::cout << jsonv::map([] (const jsonv::value& y) { return y.as_integer() * 2; }, x) << std::endl; - * } - * \endcode - * - * If everything went right, you should see a number: - * - * \code - * 10 - * \endcode - * - * Okay, so that was not very interesting. To be fair, that is not the most interesting example of using \c map, but it - * is enough to get the general idea of what is going on. This operation is so common that it is a member function of - * \c value as \c jsonv::value::map. Let's make things a bit more interesting and \c map an \c array... - * - * \code - * #include - * - * #include - * - * int main() - * { - * std::cout << jsonv::array({ 1, 2, 3, 4, 5 }) - * .map([] (const jsonv::value& y) { return y.as_integer() * 2; }) - * << std::endl; - * } - * \endcode - * - * Now we're starting to get somewhere! - * - * \code - * [2,4,6,8,10] - * \endcode - * - * The \c map function maps over whatever the contents of the \c jsonv::value happens to be and returns something for - * you based on the \c kind. This simple concept is so ubiquitous that - * Eugenio Moggi named it a monad. If you're - * feeling adventurous, try using \c map with an \c object or chaining multiple \c map operations together. - * - * Another common building block is the function \c jsonv::traverse. This function walks a JSON structure and calls a - * some user-provided function. - * - * \code - * #include - * #include - * #include - * - * #include - * - * int main() - * { - * jsonv::traverse(jsonv::parse(std::cin), - * [] (const jsonv::path& path, const jsonv::value& value) - * { - * std::cout << path << "=" << value << std::endl; - * }, - * true - * ); - * } - * \endcode - * - * Now we have a tiny little program! Here's what happens when I pipe { "bar": [1, 2, 3], "foo": "hello" } - * into the program: - * - * \code - * .bar[0]=1 - * .bar[1]=2 - * .bar[2]=3 - * .foo="hello" - * \endcode - * - * Imagine the possibilities! - * - * All of the \e really powerful functions can be found in \c util.hpp. My personal favorite is \c jsonv::merge. The - * idea is simple: it merges two (or more) JSON values into one. - * - * \code - * #include - * #include - * - * #include - * - * int main() - * { - * jsonv::value a = jsonv::object({ { "a", "taco" }, { "b", "cat" } }); - * jsonv::value b = jsonv::object({ { "c", "burrito" }, { "d", "dog" } }); - * jsonv::value merged = jsonv::merge(std::move(a), std::move(b)); - * std::cout << merged << std::endl; - * } - * \endcode - * - * Output: - * - * \code - * {"a":"taco","b":"cat","c":"burrito","d":"dog"} - * \endcode - * - * You might have noticed the use of \c std::move into the \c merge function. Like most functions in JSON Voorhees, - * \c merge takes advantage of move semantics. In this case, the implementation will move the contents of the values - * instead of copying them around. While it may not matter in this simple case, if you have large JSON structures, the - * support for movement will save you a ton of memory. - * - * \see https://github.com/tgockel/json-voorhees - * \see http://json.org/ -**/ +/// \mainpage Overview +/// JSON Voorhees is a JSON library written for the C++ programmer who wants to be productive in this modern world. What +/// does that mean? There are a ton of JSON libraries floating around touting how they are "modern" C++ and so on. But +/// who really cares? JSON Voorhees puts the focus more on the resulting C++ than any "modern" feature set. This means +/// the library does not skip on string encoding details like having full support for UTF-8. Are there "modern" +/// features? Sure, but this library is not meant to be a gallery of them -- a good API should get out of your way and +/// let you work. It is hosted on GitHub and sports an Apache +/// License, so use it anywhere you need. +/// +/// Features include (but are not necessarily limited to): +/// +/// - Simple +/// - A `value` should not feel terribly different from a C++ Standard Library container +/// - Write valid JSON with `operator<<` +/// - Simple JSON parsing with `parse` +/// - Reasonable error messages when parsing fails +/// - Full support for Unicode-filled JSON (encoded in UTF-8 in C++) +/// - Efficient +/// - Minimal overhead to store values (a `value` is 16 bytes on a 64-bit platform) +/// - No-throw move semantics wherever possible +/// - Easy +/// - Convert a `value` into a C++ type using `extract` +/// - Encode a C++ type into a value using `to_json` +/// - Safe +/// - In the best case, illegal code should fail to compile +/// - An illegal action should throw an exception +/// - Almost all utility functions have a strong exception guarantee. +/// - Stable +/// - Worry less about upgrading -- the API and ABI will not change out from under you +/// - Documented +/// - Consumable by human beings +/// - Answers questions you might actually ask +/// +/// \dotfile doc/conversions.dot +/// +/// JSON Voorhees is designed with ease-of-use in mind. So let's look at some code! +/// +/// \section demo_value The jsonv::value +/// +/// The central class of JSON Voorhees is the \c jsonv::value. This class represents a JSON AST and is somewhat of a +/// dynamic type. This can make things a little bit awkward for C++ programmers who are used to static typing. Don't +/// worry about it -- you can learn to love it. +/// +/// Putting values of different types is super-easy. +/// +/// \code +/// #include +/// #include +/// int main() +/// { +/// jsonv::value x = jsonv::null; +/// std::cout << x << std::endl; +/// x = 5.9; +/// std::cout << x << std::endl; +/// x = -100; +/// std::cout << x << std::endl; +/// x = "something else"; +/// std::cout << x << std::endl; +/// x = jsonv::array({ "arrays", "of", "the", 7, "different", "types"}); +/// std::cout << x << std::endl; +/// x = jsonv::object({ +/// { "objects", jsonv::array({ +/// "Are fun, too.", +/// "Do what you want." +/// }) +/// }, +/// { "compose like", "standard library maps" }, +/// }); +/// std::cout << x << std::endl; +/// } +/// \endcode +/// +/// Output: +/// +/// \code +/// null +/// 5.9 +/// -100 +/// "something else" +/// ["arrays","of","the",7,"different","types"] +/// {"compose like":"standard library maps","objects":["Are fun, too.","Do what you want."]} +/// \endcode +/// +/// If that isn't convenient enough for you, there is a user-defined literal \c _json in the \c jsonv namespace you can +/// use +/// +/// \code +/// // You can use this hideous syntax if you do not want to bring in the whole jsonv namespace: +/// using jsonv::operator"" _json; +/// +/// jsonv::value x = R"({ +/// "objects": [ "Are fun, too.", +/// "Do what you want." +/// ], +/// "compose like": "You are just writing JSON", +/// "which I guess": ["is", "also", "neat"] +/// })"_json; +/// \endcode +/// +/// JSON is dynamic, which makes value access a bit more of a hassle, but JSON Voorhees aims to make it not too +/// horrifying for you. A \c jsonv::value has a number of accessor methods named things like \c as_integer and +/// \c as_string which let you access the value as if it was that type. But what if it isn't that type? In that case, +/// the function will throw a \c jsonv::kind_error with a bit more information as to what rule you violated. +/// +/// \code +/// #include +/// #include +/// +/// int main() +/// { +/// jsonv::value x = jsonv::null; +/// try +/// { +/// x.as_string(); +/// } +/// catch (const jsonv::kind_error& err) +/// { +/// std::cout << err.what() << std::endl; +/// } +/// +/// x = "now make it a string"; +/// std::cout << x.as_string().size() << std::endl; +/// std::cout << x.as_string() << "\tis not the same as\t" << x << std::endl; +/// } +/// \endcode +/// +/// Output: +/// +/// \code +/// Unexpected type: expected string but found null. +/// 20 +/// now make it a string is not the same as "now make it a string" +/// \endcode +/// +/// You can also deal with container types in a similar manner that you would deal with the equivalent STL container +/// type, with some minor caveats. Because the \c value_type of a JSON object and JSON array are different, they have +/// different iterator types in JSON Voorhees. They are aptly-named \c object_iterator and \c array_iterator. The access +/// methods for these iterators are \c begin_object / \c end_object and \c begin_array / \c end_array, respectively. +/// The object interface behaves exactly like you would expect a \c std::map to, while the +/// array interface behaves just like a \c std::deque would. +/// +/// \code +/// #include +/// #include +/// +/// int main() +/// { +/// jsonv::value x = jsonv::object({ { "one", 1 }}); +/// auto iter = x.find("one"); +/// if (iter != x.end_object()) +/// std::cout << iter->first << ": " << iter->second << std::endl; +/// else +/// std::cout << "Nothing..." << std::end; +/// +/// iter = x.find("two"); +/// if (iter != x.end_object()) +/// std::cout << iter->first << ": " << iter->second << std::endl; +/// else +/// std::cout << "Nothing..." << std::end; +/// +/// x["two"] = 2; +/// iter = x.find("two"); +/// if (iter != x.end_object()) +/// std::cout << iter->first << ": " << iter->second << std::endl; +/// else +/// std::cout << "Nothing..." << std::end; +/// +/// x["two"] = jsonv::array({ "one", "+", x.at("one") }); +/// iter = x.find("two"); +/// if (iter != x.end_object()) +/// std::cout << iter->first << ": " << iter->second << std::endl; +/// else +/// std::cout << "Nothing..." << std::end; +/// +/// x.erase("one"); +/// iter = x.find("one"); +/// if (iter != x.end_object()) +/// std::cout << iter->first << ": " << iter->second << std::endl; +/// else +/// std::cout << "Nothing..." << std::end; +/// } +/// \endcode +/// +/// Output: +/// +/// \code +/// one: 1 +/// Nothing... +/// two: 2 +/// two: ["one","+",1] +/// Nothing... +/// \endcode +/// +/// The iterator types \e work. This means you are free to use all of the C++ things just like you would a regular +/// container. To use a ranged-based for, simply call \c as_array or \c as_object. Everything from \c and +/// \c or any other library works great with JSON Voorhees. Bring those templates on! +/// +/// \code +/// #include +/// #include +/// #include +/// +/// int main() +/// { +/// jsonv::value arr = jsonv::array({ "taco", "cat", 3, -2, jsonv::null, "beef", 4.8, 5 }); +/// std::cout << "Initial: "; +/// for (const auto& val : arr.as_array()) +/// std::cout << val << '\t'; +/// std::cout << std::endl; +/// +/// std::sort(arr.begin_array(), arr.end_array()); +/// std::cout << "Sorted: "; +/// for (const auto& val : arr.as_array()) +/// std::cout << val << '\t'; +/// std::cout << std::endl; +/// } +/// \endcode +/// +/// Output: +/// +/// \code +/// Initial: "taco" "cat" 3 -2 null "beef" 4.8 5 +/// Sorted: null -2 3 4.8 5 "beef" "cat" "taco" +/// \endcode +/// +/// \section demo_parsing Encoding and decoding +/// +/// Usually, the reason people are using JSON is as a data exchange format, either for communicating with other services +/// or storing things in a file or a database. To do this, you need to \e encode your \c json::value into an +/// \c std::string and \e parse it back. JSON Voorhees makes this very easy for you. +/// +/// \code +/// #include +/// #include +/// #include +/// +/// #include +/// #include +/// #include +/// +/// int main() +/// { +/// jsonv::value obj = jsonv::object(); +/// obj["taco"] = "cat"; +/// obj["array"] = jsonv::array({ 1, 2, 3, 4, 5 }); +/// obj["infinity"] = std::numeric_limits::infinity(); +/// +/// { +/// std::cout << "Saving \"file.json\"... " << obj << std::endl; +/// std::ofstream file("file.json"); +/// file << obj; +/// } +/// +/// jsonv::value loaded; +/// { +/// std::cout << "Loading \"file.json\"..."; +/// std::ifstream file("file.json"); +/// loaded = jsonv::parse(file); +/// } +/// std::cout << loaded << std::endl; +/// +/// return obj == loaded ? 0 : 1; +/// } +/// \endcode +/// +/// Output: +/// +/// \code +/// Saving "file.json"... {"array":[1,2,3,4,5],"infinity":null,"taco":"cat"} +/// Loading "file.json"...{"array":[1,2,3,4,5],"infinity":null,"taco":"cat"} +/// \endcode +/// +/// If you are paying close attention, you might have noticed that the value for the \c "infinity" looks a little bit +/// more \c null than \c infinity. This is because, much like mathematicians before Anaximander, JSON has no concept of +/// infinity, so it is actually \e illegal to serialize a token like \c infinity anywhere. By default, when an encoder +/// encounters an unrepresentable value in the JSON it is trying to encode, it outputs \c null instead. If you wish to +/// change this behavior, implement your own \c jsonv::encoder (or derive from \c jsonv::ostream_encoder). If you ran +/// the example program, you might have noticed that the return code was 1, meaning the value you put into the file and +/// what you got from it were not equal. This is because all the type and value information is still kept around in the +/// in-memory \c obj. It is only upon encoding that information is lost. +/// +/// Getting tired of all this compact rendering of your JSON strings? Want a little more whitespace in your life? Then +/// \c jsonv::ostream_pretty_encoder is the class for you! Unlike our standard \e compact encoder, this guy will put +/// newlines and indentation in your JSON so you can present it in a way more readable format. +/// +/// \code +/// #include +/// #include +/// #include +/// +/// #include +/// +/// int main() +/// { +/// // Make a pretty encoder and point to std::cout +/// jsonv::ostream_pretty_encoder prettifier(std::cout); +/// prettifier.encode(jsonv::parse(std::cin)); +/// } +/// \endcode +/// +/// Compile that code and you now have your own little JSON prettification program! +/// +/// \section serialization Serialization +/// +/// Most of the time, you do not want to deal with \c jsonv::value instances directly. Instead, most people prefer to +/// convert \c jsonv::value instances into their own strong C++ \c class or \c struct. JSON Voorhees provides utilities +/// to make this easy for you to use. At the end of the day, you should be able to create an arbitrary C++ type with +/// jsonv::extract<my_type>(value) and create a \c jsonv::value from your arbitrary C++ type with +/// jsonv::to_json(my_instance). +/// +/// \subsection serialization_encoding Extracting with extract +/// +/// Let's start with converting a \c jsonv::value into a custom C++ type with jsonv::extract<T>. +/// +/// \code +/// #include +/// #include +/// #include +/// +/// #include +/// +/// int main() +/// { +/// jsonv::value val = jsonv::parse(R"({ "a": 1, "b": 2, "c": "Hello!" })"); +/// std::cout << "a=" << jsonv::extract(val.at("a")) << std::endl; +/// std::cout << "b=" << jsonv::extract(val.at("b")) << std::endl; +/// std::cout << "c=" << jsonv::extract(val.at("c")) << std::endl; +/// } +/// \endcode +/// +/// Output: +/// +/// \code +/// a=1 +/// b=2 +/// c=Hello! +/// \endcode +/// +/// Overall, this is not very complicated. We did not do anything that could not have been done through a little use of +/// \c as_integer and \c as_string. So what is this \c extract giving us? +/// +/// The real power comes in when we start talking about \c jsonv::formats. These objects provide a set of rules to +/// encode and decode arbitrary types. So let's make a C++ \c class for our JSON object and write a special constructor +/// for it. +/// +/// \code +/// #include +/// #include +/// #include +/// #include +/// +/// #include +/// +/// class my_type +/// { +/// public: +/// my_type(const jsonv::value& from, const jsonv::extraction_context& context) : +/// a(context.extract_sub(from, "a")), +/// b(context.extract_sub(from, "b")), +/// c(context.extract_sub(from, "c")) +/// { } +/// +/// static const jsonv::extractor* get_extractor() +/// { +/// static jsonv::extractor_construction instance; +/// return &instance; +/// } +/// +/// friend std::ostream& operator<<(std::ostream& os, const my_type& self) +/// { +/// return os << "{ a=" << self.a << ", b=" << self.b << ", c=" << self.c << " }"; +/// } +/// +/// private: +/// int a; +/// int b; +/// std::string c; +/// }; +/// +/// int main() +/// { +/// jsonv::formats local_formats; +/// local_formats.register_extractor(my_type::get_extractor()); +/// jsonv::formats format = jsonv::formats::compose({ jsonv::formats::defaults(), local_formats }); +/// +/// jsonv::value val = jsonv::parse(R"({ "a": 1, "b": 2, "c": "Hello!" })"); +/// my_type x = jsonv::extract(val, format); +/// std::ostream << x << std::endl; +/// } +/// \endcode +/// +/// Output: +/// +/// \code +/// { a=1, b=2, c=Hello! } +/// \endcode +/// +/// There is a lot going on in that example, so let's take it one step at a time. First, we are creating a \c my_type +/// object to store our values, which is nice. Then, we gave it a funny-looking constructor: +/// +/// \code +/// my_type(const jsonv::value& from, const jsonv::extraction_context& context) : +/// a(context.extract_sub(from, "a")), +/// b(context.extract_sub(from, "b")), +/// c(context.extract_sub(from, "c")) +/// { } +/// \endcode +/// +/// This is an extracting constructor. All that means is that it has those two arguments: a \c jsonv::value and +/// a \c jsonv::extraction_context. The \c jsonv::extraction_context is an optional, but extremely helpful class. Inside +/// the constructor, we use the \c jsonv::extraction_context to access the values of the incoming JSON object in order +/// to build our object. +/// +/// \code +/// static const jsonv::extractor* get_extractor() +/// { +/// static jsonv::extractor_construction instance; +/// return &instance; +/// } +/// \endcode +/// +/// A \c jsonv::extractor is a type that knows how to take a \c jsonv::value and create some C++ type out of it. In this +/// case, we are creating a \c jsonv::extractor_construction, which is a subtype that knows how to call the constructor +/// of a type. There are all sorts of \c jsonv::extractor implementations in \c jsonv/serialization.hpp, so you should +/// be able to find one that fits your needs. +/// +/// \code +/// jsonv::formats local_formats; +/// local_formats.register_extractor(my_type::get_extractor()); +/// jsonv::formats format = jsonv::formats::compose({ jsonv::formats::defaults(), local_formats }); +/// \endcode +/// +/// Now things are starting to get interesting. The \c jsonv::formats object is a collection of +/// jsonv::extractors, so we create one of our own and add the \c jsonv::extractor* from the static function of +/// \c my_type. Now, \c local_formats \e only knows how to extract instances of \c my_type -- it does \e not know even +/// the most basic things like how to extract an \c int. We use \c jsonv::formats::compose to create a new instance of +/// \c jsonv::formats that combines the qualities of \c local_formats (which knows how to deal with \c my_type) and the +/// \c jsonv::formats::defaults (which knows how to deal with things like \c int and \c std::string). The \c formats +/// instance now has the power to do everything we need! +/// +/// \code +/// my_type x = jsonv::extract(val, format); +/// \endcode +/// +/// This is not terribly different from the example before, but now we are explicitly passing a \c jsonv::formats object +/// to the function. If we had not provided \c format as an argument here, the function would have thrown a +/// \c jsonv::extraction_error complaining about how it did not know how to extract a \c my_type. +/// +/// \subsection serialization_to_json Serialization with to_json +/// +/// JSON Voorhees also allows you to convert from your C++ structures into JSON values, using \c jsonv::to_json. It +/// should feel like a mirror \c jsonv::extract, with similar argument types and many shared concepts. Just like +/// extraction, \c jsonv::to_json uses the \c jsonv::formats class, but it uses a \c jsonv::serializer to convert from +/// C++ into JSON. +/// +/// \code +/// #include +/// #include +/// #include +/// +/// #include +/// +/// class my_type +/// { +/// public: +/// my_type(int a, int b, std::string c) : +/// a(a), +/// b(b), +/// c(std::move(c)) +/// { } +/// +/// static const jsonv::serializer* get_serializer() +/// { +/// static auto instance = jsonv::make_serializer +/// ( +/// [] (const jsonv::serialization_context& context, const my_type& self) +/// { +/// return jsonv::object({ { "a", context.to_json(self.a) }, +/// { "b", context.to_json(self.b) }, +/// { "c", context.to_json(self.c) } +/// } +/// ); +/// } +/// ); +/// return &instance; +/// } +/// +/// private: +/// int a; +/// int b; +/// std::string c; +/// }; +/// +/// int main() +/// { +/// jsonv::formats local_formats; +/// local_formats.register_serializer(my_type::get_serializer()); +/// jsonv::formats format = jsonv::formats::compose({ jsonv::formats::defaults(), local_formats }); +/// +/// my_type x(5, 6, "Hello"); +/// std::ostream << jsonv::to_json(x, format) << std::endl; +/// } +/// \endcode +/// +/// Output: +/// +/// \code +/// {"a":5,"b":6,"c":"Hello"} +/// \endcode +/// +/// \subsection serialization_composition Composing Type Adapters +/// +/// Does all this seem a little bit \e manual to you? Creating an \c extractor and \c serializer for every single type +/// can get a little bit tedious. Unfortunately, until C++ has a standard way to do reflection, we must specify the +/// conversions manually. However, there \e is an easier way! That way is the +/// \ref serialization_builder_dsl "Serialization Builder DSL". +/// +/// Let's start with a couple of simple structures: +/// +/// \code +/// struct foo +/// { +/// int a; +/// int b; +/// std::string c; +/// }; +/// +/// struct bar +/// { +/// foo x; +/// foo y; +/// std::string z; +/// std::string w; +/// }; +/// \endcode +/// +/// Let's make a \c formats for them using the DSL: +/// +/// \code +/// jsonv::formats formats = +/// jsonv::formats_builder() +/// .type() +/// .member("a", &foo::a) +/// .member("b", &foo::b) +/// .default_value(10) +/// .member("c", &foo::c) +/// .type() +/// .member("x", &bar::x) +/// .member("y", &bar::y) +/// .member("z", &bar::z) +/// .since(jsonv::version(2, 0)) +/// .member("w", &bar::w) +/// .until(jsonv::version(5, 0)) +/// ; +/// \endcode +/// +/// What is going on there? The giant chain of function calls is building up a collection of type adapters into a +/// \c formats for you. The indentation shows the intent -- the .member("a", &foo::a) is attached to the type +/// \c adapter for \c foo (if you tried to specify \c &bar::y in that same place, it would fail to compile). Each +/// function call returns a reference back to the builder so you can chain as many of these together as you want to. The +/// \c jsonv::formats_builder is a proper object, so if you wish to spread out building your type adapters into multiple +/// functions, you can do that by passing around an instance. +/// +/// The two most-used functions are \c type and \c member. \c type defines a \c jsonv::adapter for the C++ class +/// provided at the template parameter. All of the calls before the second \c type call modify the adapter for \c foo. +/// There, we attach members with the \c member function. This tells the \c formats how to encode and extract each of +/// the specified members to and from a JSON object using the provided string as the key. The extra function calls like +/// \c default_value, \c since and \c until are just a could of the many functions available to modify how the members +/// of the type get transformed. +/// +/// The \c formats we built would be perfectly capable of serializing to and extracting from this JSON document: +/// +/// \code +/// { +/// "x": { "a": 50, "b": 20, "c": "Blah" }, +/// "y": { "a": 10, "c": "No B?" }, +/// "z": "Only serialized in 2.0+", +/// "w": "Only serialized before 5.0" +/// } +/// \endcode +/// +/// For a more in-depth reference, see the \ref serialization_builder_dsl "Serialization Builder DSL page". +/// +/// \section demo_algorithm Algorithms +/// +/// JSON Voorhees takes a "batteries included" approach. A few building blocks for powerful operations can be found in +/// the \c algorithm.hpp header file. +/// +/// One of the simplest operations you can perform is the \c map operation. This operation takes in some \c jsonv::value +/// and returns another. Let's try it. +/// +/// \code +/// #include +/// #include +/// +/// #include +/// +/// int main() +/// { +/// jsonv::value x = 5; +/// std::cout << jsonv::map([] (const jsonv::value& y) { return y.as_integer() * 2; }, x) << std::endl; +/// } +/// \endcode +/// +/// If everything went right, you should see a number: +/// +/// \code +/// 10 +/// \endcode +/// +/// Okay, so that was not very interesting. To be fair, that is not the most interesting example of using \c map, but it +/// is enough to get the general idea of what is going on. This operation is so common that it is a member function of +/// \c value as \c jsonv::value::map. Let's make things a bit more interesting and \c map an \c array... +/// +/// \code +/// #include +/// +/// #include +/// +/// int main() +/// { +/// std::cout << jsonv::array({ 1, 2, 3, 4, 5 }) +/// .map([] (const jsonv::value& y) { return y.as_integer() * 2; }) +/// << std::endl; +/// } +/// \endcode +/// +/// Now we're starting to get somewhere! +/// +/// \code +/// [2,4,6,8,10] +/// \endcode +/// +/// The \c map function maps over whatever the contents of the \c jsonv::value happens to be and returns something for +/// you based on the \c kind. This simple concept is so ubiquitous that +/// Eugenio Moggi named it a monad. If you're +/// feeling adventurous, try using \c map with an \c object or chaining multiple \c map operations together. +/// +/// Another common building block is the function \c jsonv::traverse. This function walks a JSON structure and calls a +/// some user-provided function. +/// +/// \code +/// #include +/// #include +/// #include +/// +/// #include +/// +/// int main() +/// { +/// jsonv::traverse(jsonv::parse(std::cin), +/// [] (const jsonv::path& path, const jsonv::value& value) +/// { +/// std::cout << path << "=" << value << std::endl; +/// }, +/// true +/// ); +/// } +/// \endcode +/// +/// Now we have a tiny little program! Here's what happens when I pipe { "bar": [1, 2, 3], "foo": "hello" } +/// into the program: +/// +/// \code +/// .bar[0]=1 +/// .bar[1]=2 +/// .bar[2]=3 +/// .foo="hello" +/// \endcode +/// +/// Imagine the possibilities! +/// +/// All of the \e really powerful functions can be found in \c util.hpp. My personal favorite is \c jsonv::merge. The +/// idea is simple: it merges two (or more) JSON values into one. +/// +/// \code +/// #include +/// #include +/// +/// #include +/// +/// int main() +/// { +/// jsonv::value a = jsonv::object({ { "a", "taco" }, { "b", "cat" } }); +/// jsonv::value b = jsonv::object({ { "c", "burrito" }, { "d", "dog" } }); +/// jsonv::value merged = jsonv::merge(std::move(a), std::move(b)); +/// std::cout << merged << std::endl; +/// } +/// \endcode +/// +/// Output: +/// +/// \code +/// {"a":"taco","b":"cat","c":"burrito","d":"dog"} +/// \endcode +/// +/// You might have noticed the use of \c std::move into the \c merge function. Like most functions in JSON Voorhees, +/// \c merge takes advantage of move semantics. In this case, the implementation will move the contents of the values +/// instead of copying them around. While it may not matter in this simple case, if you have large JSON structures, the +/// support for movement will save you a ton of memory. +/// +/// \see https://github.com/tgockel/json-voorhees +/// \see http://json.org/ } #include "algorithm.hpp" +#include "ast.hpp" #include "coerce.hpp" #include "config.hpp" #include "demangle.hpp" #include "encode.hpp" #include "forward.hpp" #include "functional.hpp" +#include "kind.hpp" #include "parse.hpp" #include "path.hpp" #include "serialization.hpp" #include "serialization_builder.hpp" #include "serialization_util.hpp" #include "string_view.hpp" -#include "tokenizer.hpp" #include "util.hpp" #include "value.hpp" - -#endif/*__JSONV_ALL_HPP_INCLUDED__*/ diff --git a/include/jsonv/ast.hpp b/include/jsonv/ast.hpp new file mode 100644 index 0000000..bede4a5 --- /dev/null +++ b/include/jsonv/ast.hpp @@ -0,0 +1,633 @@ +/// \file jsonv/ast.hpp +/// Utilities for directly dealing with a JSON AST. For most cases, it is more convenient to use \c jsonv::value. +/// Copyright (c) 2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace jsonv +{ + +class parse_options; +class value; + +namespace detail +{ + +inline string_view string_from_token(string_view token, std::false_type is_escaped JSONV_UNUSED) +{ + return string_view(token.data() + 1, token.size() - 2U); +} + +std::string string_from_token(string_view token, std::true_type is_escaped); + +} + +/// \ingroup Value +/// \{ + +/// Marker type for an encountered token type. +/// +/// - \c document_end +/// The end of a document. +/// - \c document_start +/// The beginning of a document. +/// - \c object_begin +/// The beginning of an \c kind::object (`{`). +/// - \c object_end +/// The end of an \c kind::object (`}`). +/// - \c array_begin +/// The beginning of an \c kind::array (`[`). +/// - \c array_end +/// The end of an \c kind::array (`]`). +/// - \c string_canonical +/// A \c kind::string whose JSON-encoded format matches the canonical UTF-8 representation. There is no need to +/// translate JSON escape sequences to extract a \c std::string value, so accessing the raw text is safe. +/// - \c string_escaped +/// A \c kind::string whose JSON-encoded format contains escape sequences, so it must be translated to extract a +/// \c std::string value. +/// - \c key_canonical +/// The \c ast_node_type::string_canonical key of an \c kind::object. +/// - \c key_escaped +/// The \c ast_node_type::string_escaped key of an \c kind::object. +/// - \c literal_true +/// The \c kind::boolean literal \c true. +/// - \c literal_false +/// The \c kind::boolean literal \c false. +/// - \c literal_null +/// The \c kind::null literal \c null. +/// - \c integer +/// An \c kind::integer value. No decimals or exponent symbols were encountered during parsing. Note that integers are +/// \e not bounds-checked by the AST -- values outside of \c std::int64_t are still \c integer values. +/// - \c decimal +/// A \c kind::decimal value. +/// - \c error +/// An AST parsing error. +/// +/// \see ast_index +/// \see ast_node +enum class ast_node_type : std::uint8_t +{ + document_end = 0, + document_start = 1, + object_begin = 2, + object_end = 3, + array_begin = 4, + array_end = 5, + string_canonical = 6, + string_escaped = 7, + key_canonical = 8, + key_escaped = 9, + literal_true = 10, + literal_false = 11, + literal_null = 12, + integer = 13, + decimal = 14, + error = 15, +}; + +/// \{ +/// +--------------------+--------+ +/// | `ast_node_type` | Output | +/// +--------------------+--------+ +/// | `document_start` | `^` | +/// | `document_end` | `$` | +/// | `object_begin` | `{` | +/// | `object_end` | `}` | +/// | `array_begin` | `[` | +/// | `array_end` | `]` | +/// | `string_canonical` | `s` | +/// | `string_escaped` | `S` | +/// | `key_canonical` | `k` | +/// | `key_escaped` | `K` | +/// | `literal_true` | `t` | +/// | `literal_false` | `f` | +/// | `literal_null` | `n` | +/// | `integer` | `i` | +/// | `decimal` | `d` | +/// | `error` | `!` | +/// +--------------------+--------+ +JSONV_PUBLIC std::ostream& operator<<(std::ostream&, const ast_node_type& type); +JSONV_PUBLIC std::string to_string(const ast_node_type& type); +/// \} + +/// Error code encountered while building the AST. +enum class ast_error : std::uint64_t +{ + none = 0, + unexpected_token, + eof, + expected_eof, + depth_exceeded, + extra_close, + mismatched_close, + close_after_comma, + unexpected_comma, + expected_string, + expected_key_delimiter, + invalid_literal, + invalid_number, + invalid_string, + invalid_comment, + internal, +}; + +/// \{ +/// Get a description of the error \a code. +JSONV_PUBLIC std::ostream& operator<<(std::ostream&, const ast_error& code); +JSONV_PUBLIC std::string to_string(const ast_error& code); +/// \} + +/// Represents an entry in a JSON AST. +/// +/// \see ast_index +class ast_node final +{ +public: + template + class base + { + public: + /// Get the \c ast_node_type type. + constexpr ast_node_type type() const + { + return KIndexToken; + } + + /// \see ast_node::token_raw + string_view token_raw() const + { + return string_view(_token_begin, static_cast(*this).token_size()); + } + + /// Allow implicit conversion to the more generic \c ast_node. + operator ast_node() const; + + protected: + explicit constexpr base(const char* token_begin) : + _token_begin(token_begin) + { } + + private: + const char* _token_begin; + }; + + template + class basic_fixed_size_token : public base + { + public: + explicit constexpr basic_fixed_size_token(const char* token_begin) : + base(token_begin) + { } + + constexpr std::size_t token_size() const + { + return KTokenSize; + } + }; + + template + class basic_dynamic_size_token : public base + { + public: + explicit constexpr basic_dynamic_size_token(const char* token_begin, std::size_t token_size) : + base(token_begin), + _token_size(token_size) + { } + + constexpr std::size_t token_size() const + { + return _token_size; + } + + private: + std::size_t _token_size; + }; + + /// The start of a document. + class document_start final : public basic_fixed_size_token + { + public: + using basic_fixed_size_token::basic_fixed_size_token; + }; + + /// The end of a document. + class document_end final : public basic_fixed_size_token + { + public: + using basic_fixed_size_token::basic_fixed_size_token; + }; + + /// The beginning of an \c kind::object (`{`). + class object_begin final : public basic_fixed_size_token + { + public: + explicit constexpr object_begin(const char* token_begin, std::size_t element_count) : + basic_fixed_size_token(token_begin), + _element_count(element_count) + { } + + /// Get the number of elements in the object this token starts. This is useful for reserving memory. + constexpr std::size_t element_count() const + { + return _element_count; + } + + private: + std::size_t _element_count; + }; + + /// The end of an \c kind::object (`}`). + class object_end final : public basic_fixed_size_token + { + public: + using basic_fixed_size_token::basic_fixed_size_token; + }; + + /// The beginning of an \c kind::array (`[`). + class array_begin final : public basic_fixed_size_token + { + public: + explicit constexpr array_begin(const char* token_begin, std::size_t element_count) : + basic_fixed_size_token(token_begin), + _element_count(element_count) + { } + + /// Get the number of elements in the array this token starts. This is useful for reserving memory. + constexpr std::size_t element_count() const + { + return _element_count; + } + + private: + std::size_t _element_count; + }; + + /// The end of an \c kind::array (`]`). + class array_end final : public basic_fixed_size_token + { + public: + using basic_fixed_size_token::basic_fixed_size_token; + }; + + template + class basic_string_token : public basic_dynamic_size_token + { + public: + /// The return type of \c value is based on if the string is \c canonical or \c escaped. Strings in canonical + /// representation can be returned directly from the text through a \c string_view. + using value_type = std::conditional_t; + + public: + explicit constexpr basic_string_token(const char* token_begin, std::size_t token_size) : + basic_dynamic_size_token(token_begin, token_size), + _token_size(token_size) + { } + + /// Was the source JSON for this string encoded in the canonical UTF-8 representation? If this is \c true, there + /// is no need to translate JSON escape sequences to extract a \c std::string value. This is the opposite of + /// \c escaped. + constexpr bool canonical() const noexcept + { + return !KEscaped; + } + + /// Did the source JSON for this string contain escape sequences? If this is \c true, JSON escape sequences must + /// be translated into their canonical UTF-8 representation on extraction. This is the opposite of \c canonical. + constexpr bool escaped() const noexcept + { + return KEscaped; + } + + constexpr std::size_t token_size() const + { + return _token_size; + } + + value_type value() const + { + return detail::string_from_token(this->token_raw(), std::integral_constant()); + } + + private: + std::size_t _token_size; + }; + + class string_canonical final : public basic_string_token + { + public: + using basic_string_token::basic_string_token; + }; + + class string_escaped final : public basic_string_token + { + public: + using basic_string_token::basic_string_token; + }; + + class key_canonical final : public basic_string_token + { + public: + using basic_string_token::basic_string_token; + }; + + class key_escaped final : public basic_string_token + { + public: + using basic_string_token::basic_string_token; + }; + + class literal_true final : public basic_fixed_size_token + { + public: + using basic_fixed_size_token::basic_fixed_size_token; + + bool value() const noexcept + { + return true; + } + }; + + class literal_false final : public basic_fixed_size_token + { + public: + using basic_fixed_size_token::basic_fixed_size_token; + + bool value() const noexcept + { + return false; + } + }; + + class literal_null final : public basic_fixed_size_token + { + public: + using basic_fixed_size_token::basic_fixed_size_token; + + jsonv::value value() const; + }; + + class integer final : public basic_dynamic_size_token + { + public: + using basic_dynamic_size_token::basic_dynamic_size_token; + + std::int64_t value() const; + }; + + class decimal final : public basic_dynamic_size_token + { + public: + using basic_dynamic_size_token::basic_dynamic_size_token; + + double value() const; + }; + + class error final : public basic_dynamic_size_token + { + public: + explicit constexpr error(const char* token_begin, std::size_t token_size, ast_error error_code) : + basic_dynamic_size_token(token_begin, token_size), + _error_code(error_code) + { } + + constexpr ast_error error_code() const + { + return _error_code; + } + + private: + ast_error _error_code; + }; + + using storage_type = std::variant; + +public: + ast_node(const storage_type& value) : + _impl(value) + { } + + template + explicit ast_node(std::in_place_type_t type, TArgs&&... args) : + _impl(type, std::forward(args)...) + { } + + /// Get the \c std::variant that backs this type. + /// + /// \see visit + const storage_type& storage() const + { + return _impl; + } + + /// Convenience function for calling \c std::visit on the underlying \c storage of this node. + template + auto visit(FVisitor&& visitor) const + { + return std::visit(std::forward(visitor), _impl); + } + + /// Get the \c ast_node_type that tells the underlying type of this instance. + ast_node_type type() const + { + return visit([](const auto& x) { return x.type(); }); + } + + /// Get a view of the raw token. For example, \c "true", \c "{", or \c "1234". Note that this includes the complete + /// source, so string types such as \c ast_node_type::string_canonical include the opening and closing quotations. + string_view token_raw() const + { + return visit([](const auto& x) { return x.token_raw(); }); + } + + /// Get the underlying data of this node as \c T. + /// + /// \throws std::bad_variant_access if the requested \c T is different from the \c type of this instance. + template + const T& as() const + { + return std::get(_impl); + } + +private: + storage_type _impl; +}; + +template +ast_node::base::operator ast_node() const +{ + return ast_node(ast_node::storage_type(static_cast(*this))); +} + +/// Represents the index of a parsed AST. When combined with the original text, can be used to create a \ref value (this +/// happens within \ref parse). See \ref make_from to construct instances from JSON source text. +class JSONV_PUBLIC ast_index final +{ +public: + class iterator final + { + public: + using value_type = ast_node; + + public: + iterator() = default; + + iterator& operator++(); + iterator operator++(int) + { + iterator temp(*this); + ++*this; + return temp; + } + + value_type operator*() const; + + // Note the lack of comparison between `_prefix` -- valid `iterator`s will always have the same `_prefix` + bool operator==(const iterator& other) const { return _iter == other._iter; } + bool operator!=(const iterator& other) const { return _iter != other._iter; } + bool operator< (const iterator& other) const { return _iter < other._iter; } + bool operator<=(const iterator& other) const { return _iter <= other._iter; } + bool operator> (const iterator& other) const { return _iter > other._iter; } + bool operator>=(const iterator& other) const { return _iter >= other._iter; } + + private: + explicit iterator(const std::uintptr_t prefix, const std::uint64_t* iter) : + _prefix(prefix), + _iter(iter) + { } + + friend class ast_index; + + private: + std::uintptr_t _prefix; + const std::uint64_t* _iter; + }; + + using const_iterator = iterator; + +public: + /// Creates an empty not-an-AST instance. + ast_index() noexcept = default; + + ast_index(ast_index&& src) noexcept : + _impl(std::exchange(src._impl, nullptr)) + { } + + ast_index& operator=(ast_index&& src) noexcept + { + if (this == &src) + return *this; + + using std::swap; + swap(_impl, src._impl); + src.reset(); + + return *this; + } + + ~ast_index() noexcept; + + /// Create an \c ast_index from the given \a src JSON. + /// + /// \param initial_buffer_capacity The initial capacity of the underlying buffer. By default (\c nullopt), this will + /// size the buffer according to the length of the \a src string. + static ast_index make_from(string_view src, optional initial_buffer_capacity = nullopt); + + /// Clear the contents of this instance. + void reset(); + + /// Check if this instance represents a valid AST. This will be \c false if the source JSON was not valid JSON text. + /// This will also be \c false if this instance was default-constructed or moved-from. + /// + /// \note + /// Even if this returns true, it is possible that conversion to a \c jsonv::value will throw an exception. For + /// example, if the value of a number exceeds the range of an \c int64_t. This is because JSON does not specify an + /// acceptable range for numbers, but the storage of \c jsonv::value does. + bool success() const noexcept; + + /// Validate that the parse was a \c success. + /// + /// \throws parse_error if the parse was not successful. This will contain additional details about why the parse + /// failed. + /// \throws std::invalid_argument if this instance was default-constructed or moved-from. + void validate() const; + + iterator begin() const; + iterator cbegin() const { return begin(); } + + iterator end() const; + iterator cend() const { return end(); } + + value extract_tree(const parse_options& options) const; + value extract_tree() const; + + /// \{ + /// Get a string representation of the AST. + /// + /// +--------------------+--------+ + /// | `ast_node_type` | Output | + /// +--------------------+--------+ + /// | `document_start` | `^` | + /// | `document_end` | `$` | + /// | `object_begin` | `{` | + /// | `object_end` | `}` | + /// | `array_begin` | `[` | + /// | `array_end` | `]` | + /// | `string_canonical` | `s` | + /// | `string_escaped` | `S` | + /// | `key_canonical` | `k` | + /// | `key_escaped` | `K` | + /// | `literal_true` | `t` | + /// | `literal_false` | `f` | + /// | `literal_null` | `n` | + /// | `integer` | `i` | + /// | `decimal` | `d` | + /// | `error` | `!` | + /// +--------------------+--------+ + /// + /// This exists primarily for debugging purposes. + friend std::ostream& operator<<(std::ostream&, const ast_index&); + friend std::string to_string(const ast_index&); + /// \} + +private: + struct impl; + + explicit ast_index(impl*) noexcept; + +private: + impl* _impl = nullptr; +}; + +/// \} + +} diff --git a/include/jsonv/config.hpp b/include/jsonv/config.hpp index a43a453..f541ec1 100644 --- a/include/jsonv/config.hpp +++ b/include/jsonv/config.hpp @@ -1,19 +1,16 @@ -/** \file jsonv/config.hpp - * - * Copyright (c) 2014-2019 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ -#ifndef __JSONV_CONFIG_HPP_INCLUDED__ -#define __JSONV_CONFIG_HPP_INCLUDED__ +/// \file jsonv/config.hpp +/// +/// Copyright (c) 2014-2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) +#pragma once -/** \def JSONV_USER_CONFIG - * \brief A user-defined configuration file to be included before all other JSON Voorhees content. -**/ +/// \def JSONV_USER_CONFIG +/// \brief A user-defined configuration file to be included before all other JSON Voorhees content. #ifdef JSONV_USER_CONFIG # include JSONV_USER_CONFIG #endif @@ -22,29 +19,29 @@ #define JSONV_VERSION_MINOR 0 #define JSONV_VERSION_PATCH 0 -/** \def JSONV_DEBUG - * \brief Was JSON Voorhees compiled in debug mode? - * This value must be the same between when the SO was built and when you are compiling. In general, this is not useful - * outside of library maintainers. - * - * \warning - * Keep in mind this value is \e always defined. Use `#if JSONV_DEBUG`, \e not `#ifdef JSONV_DEBUG`. -**/ +/// \def JSONV_VERSION +#define JSONV_VERSION (JSONV_VERSION_MAJOR * 1000000 + JSONV_VERSION_MINOR * 1000 + JSONV_VERSION_PATCH) + +/// \def JSONV_DEBUG +/// \brief Was JSON Voorhees compiled in debug mode? +/// This value must be the same between when the SO was built and when you are compiling. In general, this is not useful +/// outside of library maintainers. +/// +/// \warning +/// Keep in mind this value is \e always defined. Use `#if JSONV_DEBUG`, \e not `#ifdef JSONV_DEBUG`. #ifndef JSONV_DEBUG # define JSONV_DEBUG 0 #endif -/** \def JSONV_SO - * \brief Are you using shared objects (DLLs in Windows)? -**/ +/// \def JSONV_SO +/// \brief Are you using shared objects (DLLs in Windows)? #ifndef JSONV_SO # define JSONV_SO 1 #endif -/** \def JSONV_COMPILING - * \brief Is JSON Voorhees currently compiling? - * You probably do not want to set this by hand. It is set by the build system when the library is compiled. -**/ +/// \def JSONV_COMPILING +/// \brief Is JSON Voorhees currently compiling? +/// You do not want to set this by hand. It is set by the build system when the library is compiled. #ifndef JSONV_COMPILING # ifdef jsonv_EXPORTS # define JSONV_COMPILING 1 @@ -53,17 +50,16 @@ # endif #endif -/** \def JSONV_EXPORT - * If using shared objects, this class or function should be exported. - * - * \def JSONV_IMPORT - * If using shared objects, this class or function should be imported. - * - * \def JSONV_HIDDEN - * This symbol is only visible within the same shared object in which the translation unit will end up. Symbols which - * are "hidden" will \e not be put into the global offset table, which means code can be more optimal when it involves - * hidden symbols at the cost that nothing outside of the SO can access it. -**/ +/// \def JSONV_EXPORT +/// If using shared objects, this class or function should be exported. +/// +/// \def JSONV_IMPORT +/// If using shared objects, this class or function should be imported. +/// +/// \def JSONV_HIDDEN +/// This symbol is only visible within the same shared object in which the translation unit will end up. Symbols which +/// are "hidden" will \e not be put into the global offset table, which means code can be more optimal when it involves +/// hidden symbols at the cost that nothing outside of the SO can access it. #if JSONV_SO # if defined(__GNUC__) # define JSONV_EXPORT __attribute__((visibility("default"))) @@ -88,15 +84,14 @@ # define JSONV_HIDDEN #endif -/** \def JSONV_PUBLIC - * \brief This function or class is part of the public API for JsonVoorhees. - * If you are including JsonVoorhees for another library, this will have import semantics (\c JSONV_IMPORT); if you are - * building JsonVoorhees, this will have export semantics (\c JSONV_EXPORT). - * - * \def JSONV_LOCAL - * \brief This function or class is internal-use only. - * \see JSONV_HIDDEN -**/ +/// \def JSONV_PUBLIC +/// \brief This function or class is part of the public API for JSON Voorhees. +/// If you are including JsonVoorhees for another library, this will have import semantics (\c JSONV_IMPORT); if you are +/// building JsonVoorhees, this will have export semantics (\c JSONV_EXPORT). +/// +/// \def JSONV_LOCAL +/// \brief This function or class is internal-use only. +/// \see JSONV_HIDDEN #if JSONV_COMPILING # define JSONV_PUBLIC JSONV_EXPORT # define JSONV_LOCAL JSONV_HIDDEN @@ -105,32 +100,33 @@ # define JSONV_LOCAL JSONV_HIDDEN #endif -/** \def JSONV_UNUSED - * \brief Note that you know the variable is unused, but make the compiler stop complaining about it. -**/ +/// \def JSONV_UNUSED +/// \brief Note that you know the variable is unused, but make the compiler stop complaining about it. #ifndef JSONV_UNUSED -# if defined(__GNUC__) +# if __has_cpp_attribute(maybe_unused) +# define JSONV_UNUSED [[maybe_unused]] +# elif defined(__GNUC__) # define JSONV_UNUSED __attribute__((unused)) # else # define JSONV_UNUSED # endif #endif -/** \def JSONV_NO_RETURN - * \brief Mark that a given function will never return control to the caller, either by exiting or throwing an - * exception. -**/ +/// \def JSONV_NO_RETURN +/// \brief Mark that a given function will never return control to the caller, either by exiting or throwing an +/// exception. #ifndef JSONV_NO_RETURN -# if defined(__GNUC__) +# if __has_cpp_attribute(noreturn) +# define JSONV_NO_RETURN [[noreturn]] +# elif defined(__GNUC__) # define JSONV_NO_RETURN __attribute__((noreturn)) # else # define JSONV_NO_RETURN # endif #endif -/** \def JSONV_ALWAYS_INLINE - * \brief Always inline the function this decorates, no matter what the compiler might think is best. -**/ +/// \def JSONV_ALWAYS_INLINE +/// \brief Always inline the function this decorates, no matter what the compiler might think is best. #ifndef JSONV_ALWAYS_INLINE # if defined(__GNUC__) # define JSONV_ALWAYS_INLINE __attribute__((always_inline)) @@ -139,11 +135,10 @@ # endif #endif -/** \def JSONV_INTEGER_ALTERNATES_LIST - * \brief An item list of types to also consider as an integer. - * This mostly exists to help resolve the C-induced type ambiguity for the literal \c 0. It most prefers to be an - * \c int, but might also become a \c long or a pointer type. -**/ +/// \def JSONV_INTEGER_ALTERNATES_LIST +/// \brief An item list of types to also consider as an integer. +/// This mostly exists to help resolve the C-induced type ambiguity for the literal \c 0. It most prefers to be an +/// \c int, but might also become a \c long or a pointer type. #ifndef JSONV_INTEGER_ALTERNATES_LIST # define JSONV_INTEGER_ALTERNATES_LIST(item) \ item(int) \ @@ -152,9 +147,8 @@ item(unsigned long long) #endif -/** \def JSONV_COMPILER_SUPPORTS_TEMPLATE_TEMPLATES - * Does the compiler properly support template templates? Most compilers do, MSVC does not. -**/ +/// \def JSONV_COMPILER_SUPPORTS_TEMPLATE_TEMPLATES +/// Does the compiler properly support template templates? Most compilers do, MSVC does not. #ifndef JSONV_COMPILER_SUPPORTS_TEMPLATE_TEMPLATES # ifdef _MSC_VER # define JSONV_COMPILER_SUPPORTS_TEMPLATE_TEMPLATES 0 @@ -163,4 +157,26 @@ # endif #endif -#endif/*__JSONV_CONFIG_HPP_INCLUDED__*/ +/// \def JSONV_LIKELY +/// Mark that a section of code is likely to be reached. +/// +/// \see JSONV_UNLIKELY +#ifndef JSONV_LIKELY +# if __has_cpp_attribute(likely) +# define JSONV_LIKELY [[likely]] +# else +# define JSONV_LIKELY +# endif +#endif + +/// \def JSONV_UNLIKELY +/// Mark that a section of code is not likely to be reached. +/// +/// \see JSONV_LIKELY +#ifndef JSONV_UNLIKELY +# if __has_cpp_attribute(unlikely) +# define JSONV_UNLIKELY [[unlikely]] +# else +# define JSONV_UNLIKELY +# endif +#endif diff --git a/include/jsonv/detail/basic_view.hpp b/include/jsonv/detail/basic_view.hpp index 8ac165d..920e42b 100644 --- a/include/jsonv/detail/basic_view.hpp +++ b/include/jsonv/detail/basic_view.hpp @@ -1,33 +1,28 @@ -/** \file jsonv/detail/basic_view.hpp - * - * Copyright (c) 2014 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ -#ifndef __JSONV_DETAIL_BASIC_VIEW_HPP_INCLUDED__ -#define __JSONV_DETAIL_BASIC_VIEW_HPP_INCLUDED__ +/// \file jsonv/detail/basic_view.hpp +/// +/// Copyright (c) 2014-2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) +#pragma once #include #include #include -namespace jsonv -{ -namespace detail +namespace jsonv::detail { -/** A view template used for array and object views of a \c value. This class allows traversing a \c value with a range - * based for loop. - * - * \note - * A view does nothing to preserve the lifetime of the underlying container, nor does it remain valid if the container - * is modified. -**/ +/// A view template used for array and object views of a \c value. This class allows traversing a \c value with a range +/// based for loop. +/// +/// \note +/// A view does nothing to preserve the lifetime of the underlying container, nor does it remain valid if the container +/// is modified. template @@ -41,37 +36,36 @@ class basic_view typedef typename std::iterator_traits::value_type value_type; typedef typename std::iterator_traits::reference reference; typedef typename std::iterator_traits::pointer pointer; - + public: basic_view(iterator begin_, iterator end_) : _begin(begin_), _end(end_) { } - + iterator begin() { return _begin; } const_iterator begin() const { return _begin; } iterator end() { return _end; } const_iterator end() const { return _end; } - + const_iterator cbegin() const { return _begin; } const_iterator cend() const { return _end; } - + reverse_iterator rbegin() { return reverse_iterator(end()); }; const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } reverse_iterator rend() { return reverse_iterator(begin()); } const_reverse_iterator rend() const { return reverse_iterator(begin()); } - + const_reverse_iterator crbegin() const { return const_reverse_iterator(end()); } const_reverse_iterator crend() const { return reverse_iterator(begin()); } - + private: iterator _begin; iterator _end; }; -/** Something that owns an object. Used in basic_owning_view to move the value somewhere fixed before getting the - * itertors and constructing the base. -**/ +/// Something that owns an object. Used in basic_owning_view to move the value somewhere fixed before getting the +/// itertors and constructing the base. template class basic_owner { @@ -79,12 +73,12 @@ class basic_owner explicit basic_owner(T&& x) : _value(std::move(x)) { } - + protected: T _value; }; -/** A form of basic_view that owns the object it is iterating over. **/ +/// A form of basic_view that owns the object it is iterating over. template { using basic_owner::_value; - + public: template basic_owning_view(TContainer&& container, FBegin begin, FEnd end) : @@ -104,6 +98,3 @@ class basic_owning_view : }; } -} - -#endif/*__JSONV_DETAIL_BASIC_VIEW_HPP_INCLUDED__*/ diff --git a/include/jsonv/kind.hpp b/include/jsonv/kind.hpp new file mode 100644 index 0000000..cf77a83 --- /dev/null +++ b/include/jsonv/kind.hpp @@ -0,0 +1,51 @@ +/** \file jsonv/kind.hpp + * + * Copyright (c) 2019-2020 by Travis Gockel. All rights reserved. + * + * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License + * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later + * version. + * + * \author Travis Gockel (travis@gockelhut.com) +**/ +#ifndef __JSONV_KIND_HPP_INCLUDED__ +#define __JSONV_KIND_HPP_INCLUDED__ + +#include + +#include +#include + +namespace jsonv +{ + +/** \ingroup Value + * \{ +**/ + +/** Describes the \e kind of data a \c value holds. See \c value for more information. + * + * \see http://json.org/ +**/ +enum class kind : unsigned char +{ + null, + object, + array, + string, + integer, + decimal, + boolean, +}; + +/** Print out the name of the \c kind. **/ +JSONV_PUBLIC std::ostream& operator<<(std::ostream&, const kind&); + +/** Get the name of the \c kind. **/ +JSONV_PUBLIC std::string to_string(const kind&); + +/** \} **/ + +} + +#endif/*__JSONV_KIND_HPP_INCLUDED__*/ diff --git a/include/jsonv/parse.hpp b/include/jsonv/parse.hpp index 13f5239..814e016 100644 --- a/include/jsonv/parse.hpp +++ b/include/jsonv/parse.hpp @@ -1,5 +1,5 @@ /** \file jsonv/parse.hpp - * + * * Copyright (c) 2012-2014 by Travis Gockel. All rights reserved. * * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License @@ -24,98 +24,93 @@ namespace jsonv class tokenizer; -/** An error encountered when parsing. - * - * \see parse -**/ +/// An error encountered when parsing. +/// +/// \see parse class JSONV_PUBLIC parse_error : public std::runtime_error { public: typedef std::size_t size_type; - + /** Description of a single parsing problem. **/ struct problem { public: problem(size_type line, size_type column, size_type character, std::string message); - - /** The line of input this error was encountered on. A new "line" is determined by carriage return or line feed. - * If you are in Windows and line breaks are two characters, the line number of the error will appear to be - * twice as high as you would think. - **/ + + /// The line of input this error was encountered on. A new "line" is determined by carriage return or line feed. + /// If you are in Windows and line breaks are two characters, the line number of the error will appear to be + /// twice as high as you would think. size_type line() const { return _line; } - - /** The character index on the current line this error was encountered on. **/ + + /// The character index on the current line this error was encountered on. size_type column() const { return _column; } - - /** The character index into the entire input this error was encountered on. **/ + + /// The character index into the entire input this error was encountered on. size_type character() const { return _character; } - - /** A message from the parser which has user-readable details about the encountered problem. **/ + + /// A message from the parser which has user-readable details about the encountered problem. const std::string& message() const { return _message; } - + private: size_type _line; size_type _column; size_type _character; std::string _message; }; - + typedef std::deque problem_list; - + public: parse_error(problem_list, value partial_result); - + virtual ~parse_error() noexcept; - - /** The list of problems which ultimately contributed to this \c parse_error. There will always be at least one - * \c problem in this list. - **/ + + /// The list of problems which ultimately contributed to this \c parse_error. There will always be at least one + /// \c problem in this list. const problem_list& problems() const; - - /** Get the partial result of parsing. There is no guarantee this value even resembles the input JSON as the input - * JSON was malformed. - **/ + + /// Get the partial result of parsing. There is no guarantee this value even resembles the input JSON as the input + /// JSON was malformed. const value& partial_result() const; - + private: problem_list _problems; value _partial_result; }; -/** Get a string representation of a problem. **/ +/// Get a string representation of a problem. JSONV_PUBLIC std::ostream& operator<<(std::ostream& os, const parse_error::problem& p); -/** Get a string representation of a problem. **/ +/// Get a string representation of a problem. JSONV_PUBLIC std::string to_string(const parse_error::problem& p); -/** Get a string representation of a \c parse_error. **/ +/// Get a string representation of a \c parse_error. JSONV_PUBLIC std::ostream& operator<<(std::ostream& os, const parse_error& p); -/** Get a string representation of a \c parse_error. **/ +/// Get a string representation of a \c parse_error. JSONV_PUBLIC std::string to_string(const parse_error& p); -/** Configuration for various parsing options. All parse functions should take in a \c parse_options as a paramter and - * should respect your settings. -**/ +/// Configuration for various parsing options. All parse functions should take in a \c parse_options as a paramter and +/// should respect your settings. class JSONV_PUBLIC parse_options { public: using size_type = value::size_type; - + /** When a parse error is encountered, what should the parser do? **/ enum class on_error { @@ -128,12 +123,12 @@ class JSONV_PUBLIC parse_options **/ ignore, }; - + /** The encoding format for strings. **/ enum class encoding { /** Use UTF-8 like a sane library should. - * + * * \see http://www.unicode.org/versions/Unicode6.2.0/ch03.pdf#G7404 **/ utf8, @@ -142,15 +137,15 @@ class JSONV_PUBLIC parse_options * while this will reject them. **/ utf8_strict, - /** Use the CESU-8 Compatibility Encoding Scheme for UTF-16? It is generally not recommended unless your + /** Use the CESU-8 Compatibility Encoding Scheme for UTF-16. It is generally not recommended unless your * processing environment requires binary collation with UTF-16. If you do not know you need this, you probably * do not. - * + * * \see http://www.unicode.org/reports/tr26/ **/ cesu8, }; - + /** When dealing with comma separators, how should extra commas be treated? **/ enum class commas { @@ -159,7 +154,7 @@ class JSONV_PUBLIC parse_options /** Allow a single trailing comma at the end of an array or object (similar to C++ \c enum definitions). **/ allow_trailing, }; - + /** How should numbers be dealt with? **/ enum class numbers { @@ -170,21 +165,21 @@ class JSONV_PUBLIC parse_options /** Strictly comply with the JSON specification for numbers -- no leading zeros! **/ strict, }; - + public: - /** Create an instance with the default options. **/ + /// Create an instance with the default options. parse_options(); - + ~parse_options() noexcept; - + /** Create a parser with the default options -- this is the same result as the default constructor, but might be * helpful if you like to be more explicit. **/ static parse_options create_default(); - + /** Create a strict parser. In general, these options are meant to fail on anything that is not a 100% valid JSON * document. More specifically: - * + * * \code * failure_mode() == on_error::fail_immediately * string_encoding() == encoding::utf8_strict @@ -197,70 +192,59 @@ class JSONV_PUBLIC parse_options * \endcode **/ static parse_options create_strict(); - + /** See \c on_error. The default failure mode is \c fail_immediately. **/ on_error failure_mode() const; parse_options& failure_mode(on_error mode); - + /** The maximum allowed parsing failures the parser can encounter before throwing an error. This is only applicable * if the \c failure_mode is not \c on_error::fail_immediately. By default, this value is 10. - * + * * You should probably not set this value to an unreasonably high number, as each parse error encountered must be * stored in memory for some period of time. **/ std::size_t max_failures() const; parse_options& max_failures(std::size_t limit); - - /** The output encoding for multi-byte characters in strings. The default value is UTF-8 because UTF-8 is best. Keep - * in mind this changes the output encoding for \e all decoded strings. If you need mixed encodings, you must - * handle that in your application. - **/ + + /// The output encoding for multi-byte characters in strings. The default value is UTF-8 because UTF-8 is best. Keep + /// in mind this changes the output encoding for \e all decoded strings. If you need mixed encodings, you must + /// handle that in your application. encoding string_encoding() const; parse_options& string_encoding(encoding); - - /** How should a parser interpret numbers? By default, this is \c numbers::decimal, which allows any form of decimal - * input. - **/ + + /// How should a parser interpret numbers? By default, this is \c numbers::decimal, which allows any form of decimal + /// input. numbers number_encoding() const; parse_options& number_encoding(numbers); - - /** How should extra commas be treated? By default, this is \c commas::allow_trailing. **/ + + /// How should extra commas be treated? By default, this is \c commas::allow_trailing. commas comma_policy() const; parse_options& comma_policy(commas); - - /** The maximum allowed nesting depth of any structure in the JSON document. The JSON specification technically - * limits the depth to 20, but very few implementations actually conform to this, so it is fairly dangerous to set - * this value. By default, the value is 0, which means we should not do any depth checking. - **/ + + /// The maximum allowed nesting depth of any structure in the JSON document. The JSON specification technically + /// limits the depth to 20, but very few implementations actually conform to this, so it is fairly dangerous to set + /// this value. By default, the value is 0, which means we should not do any depth checking. size_type max_structure_depth() const; parse_options& max_structure_depth(size_type depth); - - /** If set to true, the result of a parse is required to have \c kind of \c kind::object or \c kind::array. By - * default, this is turned off, which will allow \c parse to return incomplete documents. - **/ + + /// If set to true, the result of a parse is required to have \c kind of \c kind::object or \c kind::array. By + /// default, this is turned off, which will allow \c parse to return incomplete documents. bool require_document() const; parse_options& require_document(bool); - - /** Should the input be completely parsed to consider the parsing a success? This is on by default. Disabling this - * option can be useful for situations where JSON input is coming from some stream and you wish to process distinct - * objects separately (this technique is used to great effect in jq: http://stedolan.github.io/jq/). - * - * \warning - * When using this option, it is best to construct a \c tokenizer for your input stream and reuse that. The - * \c parse functions all internally buffer your \c istream and while they \e attempt to use \c putback re-put - * characters back into the \c istream, they are not necessarily successful at doing so. - **/ + + /// Should the input be completely parsed to consider the parsing a success? This is on by default. Disabling this + /// option can be useful for situations where JSON input is coming from some stream and you wish to process distinct + /// objects separately (this technique is used to great effect in jq: http://stedolan.github.io/jq/). bool complete_parse() const; parse_options& complete_parse(bool); - - /** Are JSON comments allowed? - * - * \warning - * There is no "official" syntax for JSON comments, but this system allows - **/ + + /// Are JSON comments allowed? + /// + /// \warning + /// There is no "official" syntax for JSON comments. bool comments() const; parse_options& comments(bool); - + private: // For the purposes of ABI compliance, most modifications to the variables in this class should bump the minor // version number. @@ -275,54 +259,35 @@ class JSONV_PUBLIC parse_options bool _comments = true; }; -/** Reads a JSON value from the input stream. - * - * \note - * This function is \e not intended for verifying if the input is valid JSON, as it will intentionally correctly parse - * invalid JSON (so long as it resembles valid JSON). See \c parse_options::create_strict for a strict-mode parse. - * - * \example "parse(std::istream&, const parse_options&)" - * Parse JSON from some file. - * \code - * std::ifstream file("file.json"); - * jsonv::value out = parse(file); - * \endcode - * - * \throws parse_error if an error is found in the JSON. If the \a input terminates unexpectedly, a \c parse_error will - * still be thrown with a message like "Unexpected end: unmatched {...". If you suspect the input of going bad, you - * can check the state flags or set the exception mask of the stream (exceptions thrown by \a input while processing - * will be propagated out) -**/ +/// Reads a JSON value from the input stream. +/// +/// \note +/// This function is \e not intended for verifying if the input is valid JSON, as it will intentionally correctly parse +/// invalid JSON (so long as it resembles valid JSON). See \c parse_options::create_strict for a strict-mode parse. +/// +/// \example "parse(std::istream&, const parse_options&)" +/// Parse JSON from some file. +/// \code +/// std::ifstream file("file.json"); +/// jsonv::value out = parse(file); +/// \endcode +/// +/// \throws parse_error if an error is found in the JSON. If the \a input terminates unexpectedly, a \c parse_error will +/// still be thrown with a message like "Unexpected end: unmatched {...". If you suspect the input of going bad, you +/// can check the state flags or set the exception mask of the stream (exceptions thrown by \a input while processing +/// will be propagated out) value JSONV_PUBLIC parse(std::istream& input, const parse_options& = parse_options()); -/** Construct a JSON value from the given input. - * - * \throws parse_error if an error is found in the JSON. -**/ +/// Construct a JSON value from the given input. +/// +/// \throws parse_error if an error is found in the JSON. value JSONV_PUBLIC parse(const string_view& input, const parse_options& = parse_options()); -/** Construct a JSON value from the given input in `[begin, end)`. - * - * \throws parse_error if an error is found in the JSON. -**/ +/// Construct a JSON value from the given input in `[begin, end)`. +/// +/// \throws parse_error if an error is found in the JSON. value JSONV_PUBLIC parse(const char* begin, const char* end, const parse_options& = parse_options()); -/** Reads a JSON value from a buffered \c tokenizer. This less convenient function is useful when setting - * \c parse_options::complete_parse to \c false. - * - * \see parse(std::istream&, const parse_options&) - * - * \example "parse(tokenizer&, const parse_options&)" - * \code - * tcp_stream input(get_network_stream()); - * jsonv::tokenizer buffered(input); - * jsonv::parse_options options = jsonv::parse_options().complete_parse(false); - * jsonv::value x = parse(buffered, options); - * jsonv::value y = parse(buffered, options); - * \endcode -**/ -value JSONV_PUBLIC parse(tokenizer& input, const parse_options& = parse_options()); - } #endif/*__JSONV_PARSE_HPP_INCLUDED__*/ diff --git a/include/jsonv/path.hpp b/include/jsonv/path.hpp index bcbe052..7397199 100644 --- a/include/jsonv/path.hpp +++ b/include/jsonv/path.hpp @@ -1,14 +1,13 @@ -/** \file jsonv/path.hpp - * Support for [JSONPath](http://goessner.net/articles/JsonPath/). - * - * Copyright (c) 2014 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ +/// \file jsonv/path.hpp +/// Support for [JSONPath](http://goessner.net/articles/JsonPath/). +/// +/// Copyright (c) 2014-2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) #ifndef __JSONV_PATH_HPP_INCLUDED__ #define __JSONV_PATH_HPP_INCLUDED__ @@ -40,35 +39,35 @@ class JSONV_PUBLIC path_element path_element(std::size_t idx); path_element(int idx); path_element(std::string key); - path_element(string_view key); + path_element(string_view key); path_element(const char* key); path_element(const path_element&); path_element& operator=(const path_element&); path_element(path_element&&) noexcept; path_element& operator=(path_element&&) noexcept; - + ~path_element() noexcept; - + path_element_kind kind() const; - + std::size_t index() const; - + const std::string& key() const; - + bool operator==(const path_element&) const; bool operator!=(const path_element&) const; - + private: - union JSONV_PUBLIC storage + union storage { std::size_t index; std::string key; - + storage(std::size_t idx); storage(std::string&& key); ~storage() noexcept; }; - + private: path_element_kind _kind; storage _data; @@ -78,35 +77,34 @@ JSONV_PUBLIC std::ostream& operator<<(std::ostream&, const path_element&); JSONV_PUBLIC std::string to_string(const path_element&); -/** Represents an exact path in some JSON structure. **/ +/// Represents an exact path in some JSON structure. class JSONV_PUBLIC path : public detail::generic_container> { public: - /** Creates a new, empty path. **/ + /// Creates a new, empty path. path(); - - /** Creates a path with the provided \a elements. **/ + + /// Creates a path with the provided \a elements. path(storage_type elements); - - /** Create a \c path from a string definition. The syntax of this is ECMAScript's syntax for selecting elements, so - * path::create(".foo.bar[1]") is equivalent to path({ "foo", "bar", 1 }). - * - * \throws std::invalid_argument if the \a specification is not valid. - **/ + + /// Create a \c path from a string definition. The syntax of this is ECMAScript's syntax for selecting elements, so + /// path::create(".foo.bar[1]") is equivalent to path({ "foo", "bar", 1 }). + /// + /// \throws std::invalid_argument if the \a specification is not valid. static path create(string_view specification); - + path(const path&); path& operator=(const path&); path(path&&) noexcept; path& operator=(path&&) noexcept; ~path() noexcept; - - /** Return a new path with the given \a subpath appended to the back. **/ + + /// Return a new path with the given \a subpath appended to the back. path operator+(const path& subpath) const; path& operator+=(const path& subpath); - - /** Return a new path with the given \a elem appended to the back. **/ + + /// Return a new path with the given \a elem appended to the back. path operator+(path_element elem) const; path& operator+=(path_element elem); }; diff --git a/include/jsonv/serialization.hpp b/include/jsonv/serialization.hpp index cdb9fc8..bfb8ceb 100644 --- a/include/jsonv/serialization.hpp +++ b/include/jsonv/serialization.hpp @@ -1,6 +1,6 @@ /** \file jsonv/serialization.hpp * Conversion between C++ types and JSON values. - * + * * Copyright (c) 2015 by Travis Gockel. All rights reserved. * * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License @@ -46,7 +46,7 @@ struct JSONV_PUBLIC version { public: using version_element = std::uint32_t; - + public: /** Initialize an instance with the given \a major and \a minor version info. **/ constexpr version(version_element major = 0, version_element minor = 0) : @@ -59,7 +59,7 @@ struct JSONV_PUBLIC version { return major == 0 && minor == 0; } - + /** Convert this instance into a \c uint64_t. The \c major version will be in the higher-order bits, while \c minor * will be in the lower-order bits. **/ @@ -68,43 +68,43 @@ struct JSONV_PUBLIC version return static_cast(major) << 32 | static_cast(minor) << 0; } - + /** Test for equality with \a other. **/ constexpr bool operator==(const version& other) const { return static_cast(*this) == static_cast(other); } - + /** Test for inequality with \a other. **/ constexpr bool operator!=(const version& other) const { return static_cast(*this) != static_cast(other); } - + /** Check that this version is less than \a other. The comparison is done lexicographically. **/ constexpr bool operator<(const version& other) const { return static_cast(*this) < static_cast(other); } - + /** Check that this version is less than or equal to \a other. The comparison is done lexicographically. **/ constexpr bool operator<=(const version& other) const { return static_cast(*this) <= static_cast(other); } - + /** Check that this version is greater than \a other. The comparison is done lexicographically. **/ constexpr bool operator>(const version& other) const { return static_cast(*this) > static_cast(other); } - + /** Check that this version is greater than or equal to \a other. The comparison is done lexicographically. **/ constexpr bool operator>=(const version& other) const { return static_cast(*this) >= static_cast(other); } - + public: version_element major; version_element minor; @@ -150,12 +150,12 @@ class JSONV_PUBLIC extraction_error : public: /** Create a new \c extraction_error from the given \a context and \a message. **/ explicit extraction_error(const extraction_context& context, const std::string& message); - + virtual ~extraction_error() noexcept; - + /** Get the path this extraction error came from. **/ const jsonv::path& path() const; - + private: jsonv::path _path; }; @@ -168,15 +168,15 @@ class JSONV_PUBLIC no_extractor : /** Create a new exception. **/ explicit no_extractor(const std::type_info& type); explicit no_extractor(const std::type_index& type); - + virtual ~no_extractor() noexcept; - + /** The name of the type. **/ string_view type_name() const; - + /** Get an ID for the type of \c extractor that \c formats::extract could not locate. **/ std::type_index type_index() const; - + private: std::type_index _type_index; std::string _type_name; @@ -190,15 +190,15 @@ class JSONV_PUBLIC no_serializer : /** Create a new exception. **/ explicit no_serializer(const std::type_info& type); explicit no_serializer(const std::type_index& type); - + virtual ~no_serializer() noexcept; - + /** The name of the type. **/ string_view type_name() const; - + /** Get an ID for the type of \c serializer that \c formats::to_json could not locate. **/ std::type_index type_index() const; - + private: std::type_index _type_index; std::string _type_name; @@ -209,14 +209,14 @@ class JSONV_PUBLIC extractor { public: virtual ~extractor() noexcept; - + /** Get the run-time type this \c extractor knows how to extract. Once this \c extractor is registered with a * \c formats, it is not allowed to change. **/ virtual const std::type_info& get_type() const = 0; - + /** Extract a the type \a from a \c value \a into a region of memory. - * + * * \param context Extra information to help you decode sub-objects, such as looking up other \c formats. It also * tracks your \c path in the decoding heirarchy, so any exceptions thrown will have \c path * information in the error message. @@ -236,14 +236,14 @@ class JSONV_PUBLIC serializer { public: virtual ~serializer() noexcept; - + /** Get the run-time type this \c serialize knows how to encode. Once this \c serializer is registered with a * \c formats, it is not allowed to change. **/ virtual const std::type_info& get_type() const = 0; - + /** Create a \c value \a from the value in the given region of memory. - * + * * \param context Extra information to help you encode sub-objects for your type, such as the ability to find other * \c formats. It also tracks the progression of types in the encoding heirarchy, so any exceptions * thrown will have \c type information in the error message. @@ -268,7 +268,7 @@ class JSONV_PUBLIC adapter : }; /** Simply put, this class is a collection of \c extractor and \c serializer instances. - * + * * Ultimately, \c formats form a directed graph of possible types to load. This allows you to compose formats including * your application with any number of 3rd party libraries an base formats. Imagine an application that has both a user * facing API and an object storage system, both of which speak JSON. When loading from the database, you would like @@ -277,7 +277,7 @@ class JSONV_PUBLIC adapter : * the integer \c 51. You really don't want to have to write two versions of decoders for all of your objects: one * which uses the standard checked \c value::as_integer and one that uses \c coerce_integer -- you want the same object * model. - * + * * \dot * digraph formats { * defaults [label="formats::defaults"] @@ -286,7 +286,7 @@ class JSONV_PUBLIC adapter : * my_app [label="my_app_formats"] * db [label="my_app_database_loader"] * api [label="my_app_api_loader"] - * + * * my_app -> lib * db -> defaults * db -> my_app @@ -294,24 +294,24 @@ class JSONV_PUBLIC adapter : * api -> my_app * } * \enddot - * + * * To do this, you would create your object model (called \c my_app_formats in the chart) with the object models for * your application-specific types. This can use any number of 3rd party libraries to get the job done. To make a - * functional \c formats, you would \c compose different \c formats instances into a single one. From there, - * + * functional \c formats, you would \c compose different \c formats instances into a single one. From there, + * * \code * jsonv::formats get_api_formats() * { * static jsonv::formats instance = jsonv::formats::compose({ jsonv::formats::coerce(), get_app_formats() }); * return instance; * } - * + * * jsonv::formats get_db_formats() * { * static jsonv::formats instance = jsonv::formats::compose({ jsonv::formats::defaults(), get_app_formats() }); * return instance; * } - * + * * MyType extract_thing(const jsonv::value& from, bool from_db) * { * return jsonv::extract(from, from_db ? get_db_formats() : get_api_formats()); @@ -322,63 +322,63 @@ class JSONV_PUBLIC formats { public: using list = std::vector; - + public: /** Get the default \c formats instance. This uses \e strict type-checking and behaves by the same rules as the * \c value \c as_ member functions (\c as_integer, \c as_string, etc). - * + * * \note * This function actually returns a \e copy of the default \c formats, so modifications do not affect the actual * instance. **/ static formats defaults(); - + /** Get the global \c formats instance. By default, this is the same as \c defaults, but you can override it with * \c set_global. The \c extract function uses this \c formats instance if none is provided, so this is convenient * if your application only has one type of \c formats to use. - * + * * \note * This function actually returns a \e copy of the global \c formats, so modifications do not affect the actual * instance. If you wish to alter the global formats, use \c set_global. **/ static formats global(); - + /** Set the \c global \c formats instance. * * \returns the previous value of the global formats instance. **/ static formats set_global(formats); - + /** Reset the \c global \c formats instance to \c defaults. * * \returns the previous value of the global formats instance. **/ static formats reset_global(); - + /** Get the coercing \c formats instance. This uses \e loose type-checking and behaves by the same rules as the * \c coerce_ functions in \c coerce.hpp. - * + * * \note * This function actually returns a \e copy of the default \c formats, so modifications do not affect the actual * instance. **/ static formats coerce(); - + /** Create a new, empty \c formats instance. By default, this does not know how to extract anything -- not even the * basic types like \c int64_t or \c std::string. **/ formats(); - + ~formats() noexcept; - + /** Create a new (empty) \c formats using the \a bases as backing \c formats. This forms a directed graph of * \c formats objects. When searching for an \c extractor or \c serializer, the \c formats is searched, then each * base is searched depth-first left-to-right. - * + * * \param bases Is the list of \c formats objects to use as bases for the newly-created \c formats. Order matters * -- the \a bases are searched left-to-right, so \c formats that are farther left take precedence * over those more to the right. - * + * * \note * It is impossible to form an endless loop of \c formats objects, since the base of all \c formats are eventually * empty. If there is an existing set of nodes \f$ k \f$ and each new \c format created with \c compose is in @@ -386,11 +386,11 @@ class JSONV_PUBLIC formats * circuit. **/ static formats compose(const list& bases); - + /** Extract the provided \a type \a from a \c value \a into an area of memory. The \a context is passed to the * \c extractor which performs the conversion. In general, this should not be used directly as it is quite painful * to do so -- prefer \c extraction_context::extract or the free function \c jsonv::extract. - * + * * \throws no_extractor if an \c extractor for \a type could not be found. **/ void extract(const std::type_info& type, @@ -398,103 +398,103 @@ class JSONV_PUBLIC formats void* into, const extraction_context& context ) const; - + /** Get the \c extractor for the given \a type. - * + * * \throws no_extractor if an \c extractor for \a type could not be found. **/ const extractor& get_extractor(std::type_index type) const; - + /** Get the \c extractor for the given \a type. - * + * * \throws no_extractor if an \c extractor for \a type could not be found. **/ const extractor& get_extractor(const std::type_info& type) const; - + /** Encode the provided value \a from into a JSON \c value. The \a context is passed to the \c serializer which * performs the conversion. In general, this should not be used directly as it is painful to do so -- prefer * \c serialization_context::to_json or the free function \c jsonv::to_json. - * + * * \throws no_serializer if a \c serializer for \a type could not be found. **/ value to_json(const std::type_info& type, const void* from, const serialization_context& context ) const; - + /** Gets the \c serializer for the given \a type. - * + * * \throws no_serializer if a \c serializer for \a type could not be found. **/ const serializer& get_serializer(std::type_index type) const; - + /** Gets the \c serializer for the given \a type. - * + * * \throws no_serializer if a \c serializer for \a type could not be found. **/ const serializer& get_serializer(const std::type_info& type) const; - + /** Register an \c extractor that lives in some unmanaged space. - * + * * \throws duplicate_type_error if this \c formats instance already has an \c extractor that serves the provided * \c extractor::get_type and the \c duplicate_type_action is \c exception. **/ void register_extractor(const extractor*, duplicate_type_action action = duplicate_type_action::exception); - + /** Register an \c extractor with shared ownership between this \c formats instance and anything else. - * + * * \throws duplicate_type_error if this \c formats instance already has an \c extractor that serves the provided * \c extractor::get_type and the \c duplicate_type_action is \c exception. **/ void register_extractor(std::shared_ptr, duplicate_type_action action = duplicate_type_action::exception); - + /** Register a \c serializer that lives in some managed space. - * + * * \throws duplicate_type_error if this \c formats instance already has a \c serializer that serves the provided * \c serializer::get_type and the \c duplicate_type_action is \c exception. **/ void register_serializer(const serializer*, duplicate_type_action action = duplicate_type_action::exception); - + /** Register a \c serializer with shared ownership between this \c formats instance and anything else. - * + * * \throws duplicate_type_error if this \c formats instance already has a \c serializer that serves the provided * \c serializer::get_type and the \c duplicate_type_action is \c exception. **/ void register_serializer(std::shared_ptr, duplicate_type_action action = duplicate_type_action::exception); - + /** Register an \c adapter that lives in some unmanaged space. - * + * * \throws duplicate_type_error if this \c formats instance already has either an \c extractor or \c serializer * that serves the provided \c adapter::get_type and the \c duplicate_type_action is * \c exception. **/ void register_adapter(const adapter*, duplicate_type_action action = duplicate_type_action::exception); - + /** Register an \c adapter with shared ownership between this \c formats instance and anything else. - * + * * \throws duplicate_type_error if this \c formats instance already has either an \c extractor or \c serializer * that serves the provided \c adapter::get_type the \c duplicate_type_action is * \c exception. **/ void register_adapter(std::shared_ptr, duplicate_type_action action = duplicate_type_action::exception); - + /** Test for equality between this instance and \a other. If two \c formats are equal, they are the \e exact same * node in the graph. Even if one \c formats has the exact same types for the exact same extractors. **/ bool operator==(const formats& other) const; - + /** Test for inequality between this instance and \a other. The opposite of \c operator==. **/ bool operator!=(const formats& other) const; - + private: struct data; - + private: explicit formats(std::vector> bases); - + private: std::shared_ptr _data; }; @@ -505,33 +505,33 @@ class JSONV_PUBLIC context_base public: /** Create a new instance using the default \c formats (\c formats::global). **/ context_base(); - + /** Create a new instance using the given \a fmt, \a ver and \a p. **/ explicit context_base(jsonv::formats fmt, const jsonv::version& ver = jsonv::version(1), const void* userdata = nullptr ); - + virtual ~context_base() noexcept = 0; - + /** Get the \c formats object backing extraction and encoding. **/ const jsonv::formats& formats() const { return _formats; } - + /** Get the version this \c extraction_context was created with. **/ const jsonv::version version() const { return _version; } - + /** Get a pointer to arbitrary user data. **/ const void* user_data() const { return _user_data; } - + private: jsonv::formats _formats; jsonv::version _version; @@ -544,16 +544,16 @@ class JSONV_PUBLIC extraction_context : public: /** Create a new instance using the default \c formats (\c formats::global). **/ extraction_context(); - + /** Create a new instance using the given \a fmt, \a ver and \a p. **/ explicit extraction_context(jsonv::formats fmt, const jsonv::version& ver = jsonv::version(), jsonv::path p = jsonv::path(), const void* userdata = nullptr ); - + virtual ~extraction_context() noexcept; - + /** Get the current \c path this \c extraction_context is extracting for. This is useful when debugging and * generating error messages. **/ @@ -561,11 +561,11 @@ class JSONV_PUBLIC extraction_context : { return _path; } - + /** Attempt to extract a \c T from \a from using the \c formats associated with this context. - * + * * \tparam T is the type to extract from \a from. It must be movable. - * + * * \throws extraction_error if anything goes wrong when attempting to extract a value. **/ template @@ -577,16 +577,16 @@ class JSONV_PUBLIC extraction_context : auto destroy = detail::on_scope_exit([ptr] { ptr->~T(); }); return std::move(*ptr); } - + void extract(const std::type_info& type, const value& from, void* into ) const; - + /** Attempt to extract a \c T from from.at_path(subpath) using the \c formats associated with this context. - * + * * \tparam T is the type to extract from \a from. It must be movable. - * + * * \throws extraction_error if anything goes wrong when attempting to extract a value. **/ template @@ -598,13 +598,13 @@ class JSONV_PUBLIC extraction_context : auto destroy = detail::on_scope_exit([ptr] { ptr->~T(); }); return std::move(*ptr); } - + void extract_sub(const std::type_info& type, const value& from, jsonv::path subpath, void* into) const; - + /** Attempt to extract a \c T from from.at_path({elem}) using the \c formats associated with this context. - * + * * \tparam T is the type to extract from \a from. It must be movable. - * + * * \throws extraction_error if anything goes wrong when attempting to extract a value. **/ template @@ -612,12 +612,12 @@ class JSONV_PUBLIC extraction_context : { return extract_sub(from, jsonv::path({ elem })); } - + private: jsonv::path _path; }; -/** Extract a C++ value from \a from using the provided \a fmts. **/ +/// Extract a C++ value from \a from using the provided \a fmts. template T extract(const value& from, const formats& fmts) { @@ -625,7 +625,7 @@ T extract(const value& from, const formats& fmts) return context.extract(from); } -/** Extract a C++ value from \a from using \c jsonv::formats::global(). **/ +/// Extract a C++ value from \a from using \c jsonv::formats::global(). template T extract(const value& from) { @@ -637,31 +637,29 @@ class JSONV_PUBLIC serialization_context : public context_base { public: - /** Create a new instance using the default \c formats (\c formats::global). **/ + /// Create a new instance using the default \c formats (\c formats::global). serialization_context(); - - /** Create a new instance using the given \a fmt and \a ver. **/ + + /// Create a new instance using the given \a fmt and \a ver. explicit serialization_context(jsonv::formats fmt, const jsonv::version& ver = jsonv::version(), const void* userdata = nullptr ); - + virtual ~serialization_context() noexcept; - - /** Convenience function for converting a C++ object into a JSON value. - * - * \see formats::to_json - **/ + + /// Convenience function for converting a C++ object into a JSON value. + /// + /// \see formats::to_json template value to_json(const T& from) const { return to_json(typeid(T), static_cast(&from)); } - - /** Dynamically convert a type into a JSON value. - * - * \see formats::to_json - **/ + + /// Dynamically convert a type into a JSON value. + /// + /// \see formats::to_json value to_json(const std::type_info& type, const void* from) const; }; diff --git a/include/jsonv/tokenizer.hpp b/include/jsonv/tokenizer.hpp deleted file mode 100644 index 19aec7e..0000000 --- a/include/jsonv/tokenizer.hpp +++ /dev/null @@ -1,166 +0,0 @@ -/** \file jsonv/tokenizer.hpp - * A stream-based tokenizer meant to help with creating custom parsers. If you are happy with the JSON Voorhees AST - * (\c value and friends), it is probably easier to use the functions in \c jsonv/parse.hpp. - * - * Copyright (c) 2014-2018 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ -#ifndef __JSONV_TOKENIZER_INCLUDED__ -#define __JSONV_TOKENIZER_INCLUDED__ - -#include -#include - -#include -#include -#include - -namespace jsonv -{ - -/** The kind of token that was encountered in a \c tokenizer. The tokenizer will is parsing this information anyway, so - * it is easy to expose. -**/ -enum class token_kind : unsigned int -{ - /** Unknown value...either uninitialized or a parse error. **/ - unknown = 0x00000, - /** The beginning of an array: \c [. **/ - array_begin = 0x00001, - /** The end of an array: \c ]. **/ - array_end = 0x00002, - /** A boolean: \c true or \c false. **/ - boolean = 0x00004, - /** The literal \c null. **/ - null = 0x00008, - /** A number -- in either integer or decimal type. **/ - number = 0x00010, - /** A separator was encountered: \c ,. **/ - separator = 0x00020, - /** A string was encountered. It could be the key of an object, but it is not the responsibility of the \c tokenizer - * to track this. - **/ - string = 0x00040, - /** The beginning of an object: \c {. **/ - object_begin = 0x00080, - /** The delimiter between an object key and value: \c :. **/ - object_key_delimiter = 0x00100, - /** The end of an object: \c }. **/ - object_end = 0x00200, - /** The whitespace in between things. **/ - whitespace = 0x00400, - /** A JSON comment block. **/ - comment = 0x00800, - /** Indicates that a parse error happened. **/ - parse_error_indicator = 0x10000, -}; - -/** Combine multiple flag values. **/ -constexpr token_kind operator|(token_kind a, token_kind b) -{ - return token_kind(static_cast(a) | static_cast(b)); -} - -/** Filter out flag values. **/ -constexpr token_kind operator&(token_kind a, token_kind b) -{ - return token_kind(static_cast(a) & static_cast(b)); -} - -/** Invert flag values. **/ -constexpr token_kind operator~(token_kind a) -{ - return token_kind(~static_cast(a)); -} - -/** Output the given \c token_kind to the \c std::ostream. **/ -JSONV_PUBLIC std::ostream& operator<<(std::ostream&, const token_kind&); - -/** Convert the given \c token_kind to an \c std::string. **/ -JSONV_PUBLIC std::string to_string(const token_kind&); - -/** Splits input into tokens, allowing traversal of JSON without verification. This is the basis for JSON parsers. - * - * An important thing to remember is a \c tokenizer does not perform any real validation of any kind beyond emitting - * a \c token_kind::unknown when it encounters complete garbage. What does this mean? Given the input string: - * - * \code - * [{]{{{{{{}}{{]]]]][][]]][[[[]]]"fdsadf"]]]}}}}}}}}]]] - * \endcode - * - * A \c tokenizer will emit \c token_kind::array_begin, \c token_kind::object_begin, \c token_kind::array_end and so - * on, even though it is illegal JSON. It is up to a higher-level construct to detect such failures. -**/ -class JSONV_PUBLIC tokenizer -{ -public: - using size_type = std::vector::size_type; - - /// \deprecated - /// See \ref buffer_reserve. - static size_type min_buffer_size(); - - /// \deprecated - /// See \ref buffer_reserve. - static void set_min_buffer_size(size_type sz); - - /** A representation of what this tokenizer has. **/ - struct token - { - string_view text; - token_kind kind; - - operator std::pair() - { - return { text, kind }; - } - }; - -public: - /// Construct a tokenizer to read the given non-owned \a input. - explicit tokenizer(string_view input); - - /// Construct a tokenizer from the provided \a input. - explicit tokenizer(std::istream& input); - - ~tokenizer() noexcept; - - /// Get the input this instance is reading from. - const string_view& input() const; - - /** Attempt to go to the next token in the input stream. The contents of \c current will be cleared. - * - * \returns \c true if another token was obtained; \c false if we reached EOF or an I/O failure. Check \c input to - * see which. - **/ - bool next(); - - /** Get the current token and its associated \c token_kind. - * - * \returns The current token. - * \throws std::logic_error if \c next has not been called or if it returned \c false. - **/ - const token& current() const; - - /// \deprecated - /// Calling this function has no effect and will be removed in 2.0. - void buffer_reserve(size_type sz); - -private: - explicit tokenizer(std::shared_ptr input); - -private: - string_view _input; - const char* _position; - token _current; //!< The current token - std::shared_ptr _track; //!< Used to track input data when needed (\c std::istream constructor) -}; - -} - -#endif/*__JSONV_TOKENIZER_INCLUDED__*/ diff --git a/include/jsonv/value.hpp b/include/jsonv/value.hpp index 688d31a..ad79ac2 100644 --- a/include/jsonv/value.hpp +++ b/include/jsonv/value.hpp @@ -1,5 +1,5 @@ /** \file jsonv/value.hpp - * + * * Copyright (c) 2012-2018 by Travis Gockel. All rights reserved. * * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License @@ -12,6 +12,7 @@ #define __JSONV_VALUE_HPP_INCLUDED__ #include +#include #include #include @@ -49,7 +50,7 @@ union value_storage int64_t integer; double decimal; bool boolean; - + constexpr value_storage() : object(nullptr) { } @@ -62,78 +63,55 @@ union value_storage * \{ **/ -/** Describes the \e kind of data a \c value holds. See \c value for more information. - * - * \see http://json.org/ -**/ -enum class kind : unsigned char -{ - null, - object, - array, - string, - integer, - decimal, - boolean -}; - -/** Print out the name of the \c kind. **/ -JSONV_PUBLIC std::ostream& operator<<(std::ostream&, const kind&); - -/** Get the name of the \c kind. **/ -JSONV_PUBLIC std::string to_string(const kind&); - /** Get a string representation of the given \c value. **/ JSONV_PUBLIC std::string to_string(const value&); -/** Thrown from various \c value methods when attempting to perform an operation which is not valid for the \c kind of - * value. -**/ +/// Thrown from various \c value methods when attempting to perform an operation which is not valid for the \c kind of +/// value. class JSONV_PUBLIC kind_error : public std::logic_error { public: explicit kind_error(const std::string& description); - + virtual ~kind_error() noexcept; }; -/** Represents a single JSON value, which can be any one of a potential \c kind, each behaving slightly differently. - * Instances will vary their behavior based on their kind -- functions will throw a \c kind_error if the operation does - * not apply to the value's kind. For example, it does not make sense to call \c find on an \c integer. - * - * - \c kind::null - * You cannot do anything with this...it is just null. - * - \c kind::boolean - * These values can be \c true or \c false. - * - \c kind::integer - * A numeric value which can be added, subtracted and all the other things you would expect. - * - \c kind::decimal - * Floating-point values should be considered "more general" than integers -- you may request an integer value as a - * decimal, but you cannot request a decimal as an integer, even when doing so would not require rounding. The - * literal \c 20.0 will always have \c kind::decimal. - * - \c kind::string - * A UTF-8 encoded string which is mostly accessed through the \c std::string class. Some random functions work in - * the cases where it makes sense (for example: \c empty and \c size), but in general, string manipulation should be - * done after calling \c as_string. - * - \c kind::array - * An array behaves like a \c std::deque because it is ultimately backed by one. If you feel the documentation is - * lacking, read this: http://en.cppreference.com/w/cpp/container/deque. - * - \c kind::object - * An object behaves lake a \c std::map because it is ultimately backed by one. If you feel the documentation is - * lacking, read this: http://en.cppreference.com/w/cpp/container/map. This library follows the recommendation in - * RFC 7159 to not allow for duplicate keys because most other libraries can not deal with it. It would also make - * the AST significantly more painful. - * - * \see http://json.org/ - * \see http://tools.ietf.org/html/rfc7159 -**/ +/// Represents a single JSON value, which can be any one of a potential \c kind, each behaving slightly differently. +/// Instances will vary their behavior based on their kind -- functions will throw a \c kind_error if the operation does +/// not apply to the value's kind. For example, it does not make sense to call \c find on an \c integer. +/// +/// - \c kind::null +/// You cannot do anything with this...it is just null. +/// - \c kind::boolean +/// These values can be \c true or \c false. +/// - \c kind::integer +/// A numeric value which can be added, subtracted and all the other things you would expect. +/// - \c kind::decimal +/// Floating-point values should be considered "more general" than integers -- you may request an integer value as a +/// decimal, but you cannot request a decimal as an integer, even when doing so would not require rounding. The +/// literal \c 20.0 will always have \c kind::decimal. +/// - \c kind::string +/// A UTF-8 encoded string which is mostly accessed through the \c std::string class. Some random functions work in +/// the cases where it makes sense (for example: \c empty and \c size), but in general, string manipulation should be +/// done after calling \c as_string. +/// - \c kind::array +/// An array behaves like a \c std::deque because it is ultimately backed by one. If you feel the documentation is +/// lacking, read this: http://en.cppreference.com/w/cpp/container/deque. +/// - \c kind::object +/// An object behaves lake a \c std::map because it is ultimately backed by one. If you feel the documentation is +/// lacking, read this: http://en.cppreference.com/w/cpp/container/map. This library follows the recommendation in +/// RFC 7159 to not allow for duplicate keys because most other libraries can not deal with it. It would also make +/// the AST significantly more painful. +/// +/// \see http://json.org/ +/// \see http://tools.ietf.org/html/rfc7159 class JSONV_PUBLIC value { public: typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; - + /** The base type for iterating over array values. **/ template struct basic_array_iterator : @@ -144,12 +122,12 @@ class JSONV_PUBLIC value _owner(0), _index(0) { } - + basic_array_iterator(TArrayView* owner, size_type index) : _owner(owner), _index(index) { } - + template basic_array_iterator(const basic_array_iterator& source, typename std::enable_if::value>::type* = 0 @@ -157,106 +135,106 @@ class JSONV_PUBLIC value _owner(source._owner), _index(source._index) { } - + basic_array_iterator& operator++() { ++_index; return *this; } - + basic_array_iterator operator++(int) const { basic_array_iterator clone = *this; ++clone; return clone; } - + basic_array_iterator& operator--() { --_index; return *this; } - + basic_array_iterator operator--(int) const { basic_array_iterator clone = *this; --clone; return clone; } - + template bool operator==(const basic_array_iterator& other) const { return _owner == other._owner && _index == other._index; } - + template bool operator!=(const basic_array_iterator& other) const { return !operator==(other); } - + T& operator*() const { return _owner->operator[](_index); } - + T* operator->() const { return &_owner->operator[](_index); } - + basic_array_iterator& operator+=(size_type n) { _index += n; return *this; } - + basic_array_iterator operator+(size_type n) const { basic_array_iterator clone = *this; clone += n; return clone; } - + basic_array_iterator& operator-=(size_type n) { _index -= n; return *this; } - + basic_array_iterator operator-(size_type n) const { basic_array_iterator clone = *this; clone -= n; return clone; } - + difference_type operator-(const basic_array_iterator& other) const { return difference_type(_index) - difference_type(other._index); } - + bool operator<(const basic_array_iterator& rhs) const { return _index < rhs._index; } - + bool operator<=(const basic_array_iterator& rhs) const { return _index <= rhs._index; } - + bool operator>(const basic_array_iterator& rhs) const { return _index > rhs._index; } - + bool operator>=(const basic_array_iterator& rhs) const { return _index >= rhs._index; } - + T& operator[](size_type n) const { return _owner->operator[](_index + n); @@ -264,27 +242,27 @@ class JSONV_PUBLIC value private: template friend struct basic_array_iterator; - + friend class value; - + private: TArrayView* _owner; size_type _index; }; - + /** The \c array_iterator is applicable when \c kind is \c kind::array. It allows you to use algorithms as if * a \c value was a normal sequence container. **/ typedef basic_array_iterator array_iterator; typedef basic_array_iterator const_array_iterator; - + /** If \c kind is \c kind::array, an \c array_view allows you to access a value as a sequence container. This is * most useful for range-based for loops. **/ typedef detail::basic_view array_view; typedef detail::basic_view const_array_view; typedef detail::basic_owning_view owning_array_view; - + /** The base iterator type for iterating over object types. It is a bidirectional iterator similar to a * \c std::map. **/ @@ -296,11 +274,11 @@ class JSONV_PUBLIC value basic_object_iterator() : _impl() { } - + basic_object_iterator(const basic_object_iterator& source) : _impl(source._impl) { } - + /** This allows assignment from an \c object_iterator to a \c const_object_iterator. **/ template basic_object_iterator(const basic_object_iterator& source, @@ -308,104 +286,104 @@ class JSONV_PUBLIC value ) : _impl(source._impl) { } - + basic_object_iterator& operator=(const basic_object_iterator& source) { _impl = source._impl; return *this; } - + template typename std::enable_if::value, basic_object_iterator&>::type operator=(const basic_object_iterator& source) { return operator=(basic_object_iterator(source)); } - + basic_object_iterator& operator++() { increment(); return *this; } - + basic_object_iterator operator++(int) const { basic_object_iterator clone(*this); clone.increment(); return clone; } - + basic_object_iterator& operator--() { decrement(); return *this; } - + basic_object_iterator operator--(int) const { basic_object_iterator clone(*this); clone.decrement(); return clone; } - + template bool operator ==(const basic_object_iterator& other) const { return _impl == other._impl; } - + template bool operator !=(const basic_object_iterator& other) const { return _impl != other._impl; } - + T& operator *() const { return current(); } - + T* operator ->() const { return ¤t(); } - + private: friend class value; - + template explicit basic_object_iterator(const UIterator& iter) : _impl(iter) { } - + void increment() { ++_impl; } - + void decrement() { --_impl; } - + T& current() const { return *_impl; } - + private: TIterator _impl; }; - + /** The type of value stored when \c kind is \c kind::object. **/ typedef std::pair object_value_type; - + /** The \c object_iterator is applicable when \c kind is \c kind::object. It allows you to use algorithms as if * a \c value was a normal associative container. **/ typedef basic_object_iterator::iterator> object_iterator; typedef basic_object_iterator::const_iterator> const_object_iterator; - + /** If \c kind is \c kind::object, an \c object_view allows you to access a value as an associative container. * This is most useful for range-based for loops. **/ @@ -426,82 +404,81 @@ class JSONV_PUBLIC value /// with the same name. bool inserted; }; - + public: /** Default-construct this to null. **/ constexpr value() : _kind(jsonv::kind::null) { } - - /** The nullptr overload will fail to compile -- use \c null if you want a \c kind::null. **/ + + /// The nullptr overload will fail to compile -- use \c jsonv::null if you want a \c kind::null. value(std::nullptr_t) = delete; - + /** Copy the contents of \a source into a new instance. **/ value(const value& source); - + /** Create a \c kind::string with the given \a value. **/ value(const std::string& value); /** Create a \c kind::string with the given \a value. **/ value(const string_view& value); - - /** Create a \c kind::string with the given \a value. - * - * \param value The value to create with. This must be null-terminated. - **/ + + /// Create a \c kind::string with the given \a value. + /// + /// \param value The value to create with. This must be null-terminated. value(const char* value); - + /// Create a \c kind::string with the given \a value. Keep in mind that it will be converted to and stored as a /// UTF-8 encoded string. value(const std::wstring& value); - + /** Create a \c kind::string with the given \a value. Keep in mind that it will be converted to and stored as a * UTF-8 encoded string. - * + * * \param value The value to create with. This must be null-terminated. **/ value(const wchar_t* value); - + /** Create a \c kind::integer with the given \a value. **/ value(int64_t value); - + /** Create a \c kind::decimal with the given \a value. **/ value(double value); - + /** Create a \c kind::decimal with the given \a value. **/ value(float value); - + /** Create a \c kind::boolean with the given \a value. **/ value(bool value); - + #define JSONV_VALUE_INTEGER_ALTERNATIVE_CTOR_PROTO_GENERATOR(type_) \ value(type_ val); JSONV_INTEGER_ALTERNATES_LIST(JSONV_VALUE_INTEGER_ALTERNATIVE_CTOR_PROTO_GENERATOR) - + /** Destruction will never throw. **/ ~value() noexcept; - + /** Copy-assigns \c source to this. - * + * * If an exception is thrown during the copy, it is propagated out. This instance will remain unchanged. **/ value& operator=(const value& source); - + /** Move-construct this instance, leaving \a source as a null value. **/ value(value&& source) noexcept; - + /** Move-assigns \c source to this, leaving \a source as a null value. - * + * * Unlike a copy, this will never throw. **/ value& operator=(value&& source) noexcept; - + /** Get this value as a string. - * + * * \throws kind_error if this value does not represent a string. **/ const std::string& as_string() const; - + /** Tests if this \c kind is \c kind::string. **/ bool is_string() const; @@ -517,55 +494,55 @@ class JSONV_PUBLIC value * \throws kind_error if this value does not represent a string. **/ std::wstring as_wstring() const; - + /** Get this value as an integer. - * + * * \throws kind_error if this value does not represent an integer. **/ int64_t as_integer() const; - + /** Tests if this \c kind is \c kind::integer. **/ bool is_integer() const; - + /** Get this value as a decimal. If the value's underlying kind is actually an integer type, cast the integer to a * double before returning. This ignores the potential loss of precision. - * + * * \throws kind_error if this value does not represent a decimal or integer. **/ double as_decimal() const; - + /** Tests if this \c kind is \c kind::integer or \c kind::decimal. **/ bool is_decimal() const; - + /** Get this value as a boolean. - * + * * \throws kind_error if this value does not represent a boolean. **/ bool as_boolean() const; - + /** Tests if this \c kind is \c kind::boolean. **/ bool is_boolean() const; - + /** Tests if this \c kind is \c kind::array. **/ bool is_array() const; - + /** Tests if this \c kind is \c kind::object. **/ bool is_object() const; - + /** Tests if this \c kind is \c kind::null. **/ bool is_null() const; - + /** Resets this value to null. **/ void clear(); - + /** Get this value's kind. **/ inline jsonv::kind kind() const { return _kind; } - + /** Get the value specified by the path \a p. - * + * * \throws std::out_of_range if any path along the chain did not exist. * \throws kind_error if the path traversal is not valid for the value (for example: if the path specifies an array * index when the value is a string). @@ -578,41 +555,41 @@ class JSONV_PUBLIC value const value& at_path(const path& p) const; const value& at_path(string_view p) const; const value& at_path(size_type p) const; - + /** Similar to \c count, but walks the given path \a p to determine its presence. - * + * * \returns \c 1 if the path finds an element; \c 0 if there is no such path in the tree. - * + * * \throws parse_error if a \c string_view was specified that did not have a valid specification (see * \c path::create). **/ size_type count_path(const path& p) const; size_type count_path(string_view p) const; size_type count_path(size_type p) const; - + /** Get or create the value specified by the path \a p. This is the moral equivalent to \c operator[] for paths. If * no value exists at the path, a new one is created as the default (\c null) value. If any path along the way * either does not exist or is \c null, it is created for you, based on the \e implications of the specification * \a p. Unlike \c at_path, which will throw if accessing a non-existent key of an \c object or going past the end * of an \c array, this will simply create that path and fill in the blanks with \c null values. - * + * * \throws kind_error if the path traversal is not valid for the value (for example: if the path specifies an array * index when the value is a string). * \throws parse_error if a \c string_view was specified that did not have a valid specification (see * \c path::create). - * + * * \see at_path **/ value& path(const path& p); value& path(string_view p); value& path(size_type p); - + /** Swap the value this instance represents with \a other. **/ void swap(value& other) noexcept; - + /** Compares two JSON values for equality. Two JSON values are equal if and only if all of the following conditions * apply: - * + * * 1. They have the same valid value for \c kind. * - If \c kind is invalid (memory corruption), then two JSON values are \e not equal, even if they have been * corrupted in the same way and even if they share \c this (a corrupt object is not equal to itself). @@ -621,20 +598,20 @@ class JSONV_PUBLIC value * - string, integer, decimal and boolean follow the classic rules for their type. * - objects are equal if they have the same keys and values corresponding with the same key are also equal. * - arrays are equal if they have the same length and the values at each index are also equal. - * + * * \note * The rules for equality are based on Python \c dict and \c list. **/ bool operator==(const value& other) const; - + /** Compares two JSON values for inequality. The rules for inequality are the exact opposite of equality. **/ bool operator!=(const value& other) const; - + /** Used to build a strict-ordering of JSON values. When comparing values of the same kind, the ordering should * align with your intuition. When comparing values of different kinds, some arbitrary rules were created based on * how "complicated" the author thought the type to be. - * + * * - null: less than everything but null, which it is equal to. * - boolean: false is less than true. * - integer, decimal: compared by their numeric value. Comparisons between two integers do not cast, but comparison @@ -643,116 +620,116 @@ class JSONV_PUBLIC value * might lead to surprising results) * - array: compared lexicographically by elements (recursively following this same technique) * - object: entries in the object are sorted and compared lexicographically, first by key then by value - * + * * \returns -1 if this is less than other by the rules stated above; 0 if this is equal to other; -1 if otherwise. **/ int compare(const value& other) const; - + bool operator< (const value& other) const; bool operator> (const value& other) const; bool operator<=(const value& other) const; bool operator>=(const value& other) const; - + /** Output this value to a stream. **/ friend std::ostream& operator<<(std::ostream& stream, const value& val); - + /** Get a string representation of the given \c value. **/ friend std::string to_string(const value&); /** Get an iterator to the beginning of this array. - * + * * \throws kind_error if the kind is not an array. **/ array_iterator begin_array(); const_array_iterator begin_array() const; - + /** Get an iterator to the end of this array. - * + * * \throws kind_error if the kind is not an array. **/ array_iterator end_array(); const_array_iterator end_array() const; - + /** View this instance as an array. - * + * * \throws kind_error if the kind is not an array. **/ array_view as_array() &; const_array_view as_array() const &; owning_array_view as_array() &&; - + /** Get the value in this array at the given \a idx. The overloads which accept an \c int are required to resolve * the type ambiguity of the literal \c 0 between a size_type and a char*. - * + * * \throws kind_error if the kind is not an array. **/ value& operator[](size_type idx); const value& operator[](size_type idx) const; inline value& operator[](int idx) { return operator[](size_type(idx)); } inline const value& operator[](int idx) const { return operator[](size_type(idx)); } - + /** Get the value in this array at the given \a idx. - * + * * \throws kind_error if the kind is not an array. * \throws std::out_of_range if the provided \a idx is above \c size. **/ value& at(size_type idx); const value& at(size_type idx) const; - + /** Push \a item to the back of this array. - * + * * \throws kind_error if the kind is not an array. **/ void push_back(value item); - + /** Pop an item off the back of this array. - * + * * \throws kind_error if the kind is not an array. * \throws std::logic_error if the array is empty. **/ void pop_back(); - + /** Push \a item to the front of this array. - * + * * \throws kind_error if the kind is not an array. **/ void push_front(value item); - + /** Pop an item from the front of this array. - * + * * \throws kind_error if the kind is not an array. * \throws std::logic_error if the array is empty. **/ void pop_front(); - + /** Insert an item into \a position on this array. - * + * * \throws kind_error if the kind is not an array. **/ array_iterator insert(const_array_iterator position, value item); - + /** Insert the range defined by [\a first, \a last) at \a position in this array. - * + * * \throws kind_error if the kind is not an array. **/ template array_iterator insert(const_array_iterator position, TForwardIterator first, TForwardIterator last) { difference_type orig_offset = std::distance(const_array_iterator(begin_array()), position); - + for (difference_type offset = orig_offset ; first != last; ++first, ++offset) insert(begin_array() + offset, *first); return begin_array() + orig_offset; } - + /** Assign \a count elements to this array with \a val. - * + * * \throws kind_error if the kind is not an array. **/ void assign(size_type count, const value& val); - + /** Assign the contents of range [\a first, \a last) to this array. - * + * * \throws kind_error if the kind is not an array. **/ template @@ -767,64 +744,69 @@ class JSONV_PUBLIC value ++first; } } - + /** Assign the given \a items to this array. - * + * * \throws kind_error if the kind is not an array. **/ void assign(std::initializer_list items); - + + /// Reserve at least \a count elements in the array. + /// + /// \throws kind_error if the kind is not an array. + void reserve(size_type count); + /** Resize the length of this array to \a count items. If the resize creates new elements, fill those newly-created * elements with \a val. - * + * * \throws kind_error if the kind is not an array. **/ void resize(size_type count, const value& val = value()); - + /** Erase the item at this array's \a position. - * + * * \throws kind_error if the kind is not an array. **/ array_iterator erase(const_array_iterator position); - + /** Erase the range [\a first, \a last) from this array. - * + * * \throws kind_error if the kind is not an array. **/ array_iterator erase(const_array_iterator first, const_array_iterator last); - + /** Get an iterator to the first key-value pair in this object. - * + * * \throws kind_error if the kind is not an object. **/ object_iterator begin_object(); const_object_iterator begin_object() const; - + /** Get an iterator to the one past the end of this object. - * + * * \throws kind_error if the kind is not an object. **/ object_iterator end_object(); const_object_iterator end_object() const; - + /** View this instance as an object. - * + * * \throws kind_error if the kind is not an object. **/ object_view as_object() &; const_object_view as_object() const &; owning_object_view as_object() &&; - + /** Get the value associated with the given \a key of this object. If the \a key does not exist, it will be created. - * + * * \throws kind_error if the kind is not an object. **/ value& operator[](const std::string& key); value& operator[](std::string&& key); value& operator[](const std::wstring& key); - + /** Get the value associated with the given \a key of this object. - * + * * \throws kind_error if the kind is not an object. * \throws std::out_of_range if the \a key is not in this object. **/ @@ -832,23 +814,23 @@ class JSONV_PUBLIC value value& at(const std::wstring& key); const value& at(const std::string& key) const; const value& at(const std::wstring& key) const; - + /** Check if the given \a key exists in this object. - * + * * \throws kind_error if the kind is not an object. **/ size_type count(const std::string& key) const; size_type count(const std::wstring& key) const; - + /** Attempt to locate a key-value pair with the provided \a key in this object. - * + * * \throws kind_error if the kind is not an object. **/ object_iterator find(const std::string& key); object_iterator find(const std::wstring& key); const_object_iterator find(const std::string& key) const; const_object_iterator find(const std::wstring& key) const; - + /// \{ /// Insert \a pair into this object. If \a hint is provided, this insertion could be optimized. /// @@ -858,7 +840,7 @@ class JSONV_PUBLIC value std::pair insert(std::pair pair); object_iterator insert(const_object_iterator hint, std::pair pair); object_iterator insert(const_object_iterator hint, std::pair pair); - + /// Insert range defined by [\a first, \a last) into this object. /// /// \throws kind_error if the kind is not an object. @@ -895,7 +877,7 @@ class JSONV_PUBLIC value void insert(std::initializer_list> items); void insert(std::initializer_list> items); /// \} - + /// \{ /// Erase the item with the given \a key. /// @@ -903,12 +885,12 @@ class JSONV_PUBLIC value /// \throws kind_error if the kind is not an object. size_type erase(const std::string& key); size_type erase(const std::wstring& key); - + /// Erase the item at the given \a position. /// /// \throws kind_error if the kind is not an object. object_iterator erase(const_object_iterator position); - + /// Erase the range defined by [\a first, \a last). /// /// \throws kind_error if the kind is not an object. @@ -930,63 +912,63 @@ class JSONV_PUBLIC value /// \} /** Is the underlying structure empty? - * + * * - object: Are there no keys? * - array: Are there no values? * - string: Is the string 0 length? * - null: true (always) * - all other types: false (always) - * + * * \throws nothing **/ bool empty() const noexcept; - + /** Get the number of items in this value. - * + * * - object: The number of key/value pairs. * - array: The number of values. * - string: The number of code points in the string (including \c \\0 values and counting multi-byte encodings as * more than one value). - * + * * \throws kind_error if the kind is not an object, array or string. **/ size_type size() const; - + /** \addtogroup Algorithm * \{ **/ - + /** Run a function over the values of this instance. The behavior of this function is different, depending on the * \c kind. For scalar kinds (\c kind::integer, \c kind::null, etc), \a func is called once with the value. If this * is \c kind::array, \c func is called for every value in the array and the output will be an array with each * element transformed by \a func. If this is \c kind::object, the result will be an object with each key * transformed by \a func. - * + * * \param func The function to apply to the element or elements of this instance. **/ value map(const std::function& func) const&; - + /** Run a function over the values of this instance. The behavior of this function is different, depending on the * \c kind. For scalar kinds (\c kind::integer, \c kind::null, etc), \a func is called once with the value. If this * is \c kind::array, \c func is called for every value in the array and the output will be an array with each * element transformed by \a func. If this is \c kind::object, the result will be an object with each key * transformed by \a func. - * + * * \param func The function to apply to the element or elements of this instance. - * + * * \note * This version of \c map provides only a basic exception-safety guarantee. If an exception is thrown while * transforming a non-scalar \c kind, there is no rollback action, so \c this is left in a usable, but * \e unpredictable state. If you need a strong exception guarantee, use the constant reference version of \c map. **/ value map(const std::function& func) &&; - + /** \} **/ - + private: friend JSONV_PUBLIC value array(); friend JSONV_PUBLIC value object(); - + private: detail::value_storage _data; jsonv::kind _kind; @@ -998,7 +980,7 @@ class JSONV_PUBLIC value JSONV_PUBLIC extern const value null; /** A user-defined literal for parsing JSON. Uses the default (non-strict) \c parse_options. - * + * * \code * R"({ * "taco": "cat", @@ -1096,20 +1078,19 @@ class JSONV_PUBLIC object_node_handle final mutable mapped_type _value; }; -/** \} **/ +/// \} } namespace std { -/** Explicit specialization of \c std::hash for \c jsonv::value types so you can store a \c value in an unordered - * container. Hashing results depend on the \c kind for the provided value -- most kinds directly use the hasher for - * their kind (hashing a \c jsonv::value for integer \c 5 should have the same hash value as directly hashing the same - * integer). For aggregate kinds \c array and \c object, hashing visits every sub-element recursively. This might be - * expensive, but is required when storing multiple values with similar layouts in the a set (which is the most common - * use case). -**/ +/// Explicit specialization of \c std::hash for \c jsonv::value types so you can store a \c value in an unordered +/// container. Hashing results depend on the \c kind for the provided value -- most kinds directly use the hasher for +/// their kind (hashing a \c jsonv::value for integer \c 5 should have the same hash value as directly hashing the same +/// integer). For aggregate kinds \c array and \c object, hashing visits every sub-element recursively. This might be +/// expensive, but is required when storing multiple values with similar layouts in the a set (which is the most common +/// use case). template <> struct JSONV_PUBLIC hash { diff --git a/src/json-benchmark/jsonv_benchmark.cpp b/src/json-benchmark/jsonv_benchmark.cpp index 5911192..33a442f 100644 --- a/src/json-benchmark/jsonv_benchmark.cpp +++ b/src/json-benchmark/jsonv_benchmark.cpp @@ -1,13 +1,12 @@ -/** \file - * - * Copyright (c) 2015 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ +/// \file +/// +/// Copyright (c) 2015-2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) #include "core.hpp" #include @@ -22,13 +21,28 @@ class jsonv_benchmark_suite : jsonv_benchmark_suite() : typed_benchmark_suite("JSONV") { } - + protected: virtual jsonv::value parse(const std::string& source) const { return jsonv::parse(source); } - + } jsonv_benchmark_suite_instance; +class jsonv_ast_benchmark_suite : + public typed_benchmark_suite +{ +public: + jsonv_ast_benchmark_suite() : + typed_benchmark_suite("JSONV-AST") + { } + +protected: + virtual jsonv::ast_index parse(const std::string& source) const + { + return jsonv::ast_index::make_from(source); + } +} jsonv_ast_benchmark_suite_instance; + } diff --git a/src/jsonv-tests/algorithm_merge_tests.cpp b/src/jsonv-tests/algorithm_merge_tests.cpp index 198036d..2cb2f45 100644 --- a/src/jsonv-tests/algorithm_merge_tests.cpp +++ b/src/jsonv-tests/algorithm_merge_tests.cpp @@ -1,6 +1,6 @@ /** \file * Data-driven tests for testing merges. - * + * * Copyright (c) 2015-2018 by Travis Gockel. All rights reserved. * * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License @@ -42,11 +42,11 @@ class json_merge_test : b(std::move(b)), expected(std::move(expected)) { } - + virtual void run_impl() override { bool expect_failure = expected.kind() == jsonv::kind::string && expected.as_string() == "FAILURE"; - + try { TMergeRules rules; @@ -60,7 +60,7 @@ class json_merge_test : throw; } } - + private: jsonv::value a; jsonv::value b; @@ -77,12 +77,12 @@ class json_merge_test_initializer jsonv::value whole = [&p] { std::ifstream in(p.c_str()); return jsonv::parse(in); }(); jsonv::value a = whole.at("a"); jsonv::value b = whole.at("b"); - + checked_add(p, "expected", whole, a, b); checked_add(p, "recursive", whole, a, b); }); } - + private: template void checked_add(const std::string& p, @@ -105,7 +105,7 @@ class json_merge_test_initializer ); } } - + private: std::deque> _tests; } json_merge_test_initializer_instance(test_path("merges")); diff --git a/src/jsonv-tests/ast_tests.cpp b/src/jsonv-tests/ast_tests.cpp new file mode 100644 index 0000000..c31e24a --- /dev/null +++ b/src/jsonv-tests/ast_tests.cpp @@ -0,0 +1,148 @@ +/// \file +/// +/// Copyright (c) 2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) +#include "test.hpp" + +#include + +#include + +template +TNode parse_single(jsonv::string_view src, jsonv::string_view expected) +{ + auto ast = jsonv::ast_index::make_from(src); + ensure_eq(to_string(ast), expected); + + auto iter = ast.begin(); + ensure_eq(jsonv::ast_node_type::document_start, (*iter).type()); + + ++iter; + auto result = (*iter).as(); + + return result; +} + +TEST(ast_parse_literal_true) +{ + parse_single("true", "^t$"); +} + +TEST(ast_parse_literal_true_incomplete) +{ + parse_single("tru", "^!"); +} + +TEST(ast_parse_literal_false) +{ + parse_single(" false", "^f$"); +} + +TEST(ast_parse_literal_null) +{ + parse_single("null ", "^n$"); +} + +TEST(ast_parse_integers) +{ + auto node_0 = parse_single("0", "^i$"); + ensure_eq(0, node_0.value()); + + auto node_1 = parse_single(" 1", "^i$"); + ensure_eq(1, node_1.value()); + + auto node_8 = parse_single("12345678 ", "^i$"); + ensure_eq(12345678, node_8.value()); +} + +TEST(ast_parse_integer_only_minus) +{ + parse_single("-", "^!"); +} + +TEST(ast_parse_string_canonical) +{ + auto node = parse_single("\"1234567890\"", "^s$"); + ensure_eq(12U, node.token_size()); + ensure_eq(10U, node.value().size()); +} + +TEST(ast_parse_string_empty) +{ + auto node = parse_single("\"\"", "^s$"); + ensure_eq(2U, node.token_size()); + ensure_eq(0U, node.value().size()); +} + +template +std::size_t sstrlen(const char (&)[N]) +{ + return N-1; +} + +TEST(ast_parse_string_double_reverse_solidus_before_escaped_quote) +{ + static const char tokens[] = R"("\\\" and keep going")"; + + auto node = parse_single(tokens, "^S$"); + ensure_eq(sstrlen(tokens), node.token_size()); +} + +TEST(ast_parse_nothing) +{ + auto ast = jsonv::ast_index::make_from(" "); + ensure_eq(to_string(ast), "^$"); +} + +TEST(ast_parse_comment) +{ + parse_single("null /* <- still null */ ", "^n$"); +} + +TEST(ast_empty_array) +{ + auto ast = jsonv::ast_index::make_from("[ ]"); + ensure_eq(to_string(ast), "^[]$"); +} + +TEST(ast_array_elems) +{ + auto ast = jsonv::ast_index::make_from("[ 1, 2,\t 3, \"Bob\\n\"]"); + ensure_eq(to_string(ast), "^[iiiS]$"); + + auto iter = ast.begin(); + ensure_eq(jsonv::ast_node_type::document_start, (*iter).type()); + ++iter; + auto array_node = (*iter).as(); + ensure_eq(4U, array_node.element_count()); +} + +TEST(ast_object_empty) +{ + auto ast = jsonv::ast_index::make_from("\t{}\t"); + ensure_eq(to_string(ast), "^{}$"); + + auto iter = ast.begin(); + ensure_eq(jsonv::ast_node_type::document_start, (*iter).type()); + ++iter; + auto object_node = (*iter).as(); + ensure_eq(0U, object_node.element_count()); +} + +TEST(ast_object) +{ + auto ast = jsonv::ast_index::make_from(R"( { "a": 1.0, "b": "Bob", "c": [], "d\t": {} } )"); + ensure_eq(to_string(ast), "^{kdksk[]K{}}$"); +} + +TEST(ast_parse_object_with_numeric_keys) +{ + auto ast = jsonv::ast_index::make_from("{ 3: \"Bob\", \"a\": \"A\" }"); + ensure(!ast.success()); + ensure_eq(to_string(ast), "^{!"); +} diff --git a/src/jsonv-tests/benchmark_tests.cpp b/src/jsonv-tests/benchmark_tests.cpp index 43413f1..722d7d6 100644 --- a/src/jsonv-tests/benchmark_tests.cpp +++ b/src/jsonv-tests/benchmark_tests.cpp @@ -73,7 +73,8 @@ class benchmark_test_initializer { recursive_directory_for_each(rootpath, ".json", [&, this] (const std::string& path) { - if (path.find("fail") == std::string::npos) + // TODO(#145): Re-enable loading blns.json when non-strict parsing is enabled + if (path.find("fail") == std::string::npos && path.find("blns") == std::string::npos) { _tests.emplace_back(new benchmark_test([] (const std::string& p) { return p; }, "ifstream", diff --git a/src/jsonv-tests/data/json_checker/pass-but-fail-strict-1.json b/src/jsonv-tests/data/json_checker/fail1.json similarity index 100% rename from src/jsonv-tests/data/json_checker/pass-but-fail-strict-1.json rename to src/jsonv-tests/data/json_checker/fail1.json diff --git a/src/jsonv-tests/data/json_checker/pass-but-fail-strict-13.json b/src/jsonv-tests/data/json_checker/fail13.json similarity index 100% rename from src/jsonv-tests/data/json_checker/pass-but-fail-strict-13.json rename to src/jsonv-tests/data/json_checker/fail13.json diff --git a/src/jsonv-tests/data/json_checker/pass-but-fail-strict-18.json b/src/jsonv-tests/data/json_checker/fail18.json similarity index 100% rename from src/jsonv-tests/data/json_checker/pass-but-fail-strict-18.json rename to src/jsonv-tests/data/json_checker/fail18.json diff --git a/src/jsonv-tests/data/json_checker/pass-but-fail-strict-25.json b/src/jsonv-tests/data/json_checker/fail25.json similarity index 100% rename from src/jsonv-tests/data/json_checker/pass-but-fail-strict-25.json rename to src/jsonv-tests/data/json_checker/fail25.json diff --git a/src/jsonv-tests/data/json_checker/pass-but-fail-strict-27.json b/src/jsonv-tests/data/json_checker/fail27.json similarity index 100% rename from src/jsonv-tests/data/json_checker/pass-but-fail-strict-27.json rename to src/jsonv-tests/data/json_checker/fail27.json diff --git a/src/jsonv-tests/data/json_checker/fail34.json b/src/jsonv-tests/data/json_checker/fail34.json new file mode 100644 index 0000000..247a268 --- /dev/null +++ b/src/jsonv-tests/data/json_checker/fail34.json @@ -0,0 +1 @@ +{ 4: "number for key" } diff --git a/src/jsonv-tests/data/json_checker/pass-but-fail-strict-4.json b/src/jsonv-tests/data/json_checker/fail4.json similarity index 100% rename from src/jsonv-tests/data/json_checker/pass-but-fail-strict-4.json rename to src/jsonv-tests/data/json_checker/fail4.json diff --git a/src/jsonv-tests/data/json_checker/pass-but-fail-strict-9.json b/src/jsonv-tests/data/json_checker/fail9.json similarity index 100% rename from src/jsonv-tests/data/json_checker/pass-but-fail-strict-9.json rename to src/jsonv-tests/data/json_checker/fail9.json diff --git a/src/jsonv-tests/detail/token_patterns_tests.cpp b/src/jsonv-tests/detail/token_patterns_tests.cpp deleted file mode 100644 index ef81e43..0000000 --- a/src/jsonv-tests/detail/token_patterns_tests.cpp +++ /dev/null @@ -1,165 +0,0 @@ -/** \file - * - * Copyright (c) 2014 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ -#include - -#include - -namespace jsonv_test -{ - -using namespace jsonv; -using namespace jsonv::detail; - -template -match_result static_attempt_match(const char (& buffer)[N], token_kind& kind, std::size_t& length) -{ - return attempt_match(buffer, buffer + N - 1, kind, length); -} - -TEST(token_attempt_match_literal_true) -{ - token_kind kind; - std::size_t length; - match_result result = static_attempt_match("true", kind, length); - ensure(result == match_result::complete); - ensure_eq(token_kind::boolean, kind); - ensure_eq(4, length); -} - -TEST(token_attempt_match_literal_true_incomplete) -{ - token_kind kind; - std::size_t length; - match_result result = static_attempt_match("tru", kind, length); - ensure(result == match_result::unmatched); - ensure_eq(token_kind::boolean, kind); - ensure_eq(3, length); -} - -TEST(token_attempt_match_number_integer_incomplete) -{ - token_kind kind; - std::size_t length; - match_result result = static_attempt_match("1234567890", kind, length); - ensure(result == match_result::complete); - ensure_eq(token_kind::number, kind); - ensure_eq(10, length); -} - -TEST(token_attempt_match_number_integer_complete) -{ - token_kind kind; - std::size_t length; - match_result result = static_attempt_match("1234567890,", kind, length); - ensure(result == match_result::complete); - ensure_eq(token_kind::number, kind); - ensure_eq(10, length); -} - -TEST(token_attempt_match_number_integer_only_minus) -{ - token_kind kind; - std::size_t length; - match_result result = static_attempt_match("-", kind, length); - ensure(result == match_result::unmatched); - ensure_eq(token_kind::number, kind); - ensure_eq(1, length); -} - -TEST(token_attempt_match_string_complete) -{ - token_kind kind; - std::size_t length; - match_result result = static_attempt_match("\"1234567890\"", kind, length); - ensure(result == match_result::complete); - ensure_eq(token_kind::string, kind); - ensure_eq(12, length); -} - -TEST(token_attempt_match_string_empty_complete) -{ - token_kind kind; - std::size_t length; - match_result result = static_attempt_match("\"\"", kind, length); - ensure(result == match_result::complete); - ensure_eq(token_kind::string, kind); - ensure_eq(2, length); -} - -template -std::size_t sstrlen(const char (&)[N]) -{ - return N-1; -} - -TEST(token_attempt_match_string_double_reverse_solidus_before_escaped_quote) -{ - static const char tokens[] = R"("\\\" and keep going")"; - - token_kind kind; - std::size_t length; - match_result result = static_attempt_match(tokens, kind, length); - ensure(result == match_result::complete); - ensure_eq(token_kind::string, kind); - ensure_eq(sstrlen(tokens), length); -} - -TEST(token_attempt_match_comment) -{ - static const char tokens[] = "/**/"; - - token_kind kind; - std::size_t length; - match_result result = static_attempt_match(tokens, kind, length); - - ensure(result == match_result::complete); - ensure_eq(token_kind::comment, kind); - ensure_eq(sstrlen(tokens), length); -} - -TEST(token_attempt_match_comment_eof) -{ - static const char tokens[] = "/* whatever goes here*"; - - token_kind kind; - std::size_t length; - match_result result = static_attempt_match(tokens, kind, length); - - ensure(result == match_result::unmatched); - ensure_eq(token_kind::comment, kind); - ensure_eq(sstrlen(tokens), length); -} - -TEST(token_attempt_match_comment_too_short_eof) -{ - static const char tokens[] = "/*/"; - - token_kind kind; - std::size_t length; - match_result result = static_attempt_match(tokens, kind, length); - - ensure(result == match_result::unmatched); - ensure_eq(token_kind::comment, kind); - ensure_eq(sstrlen(tokens), length); -} - -TEST(token_attempt_match_comment_slash_invalid) -{ - static const char tokens[] = "//"; - - token_kind kind; - std::size_t length; - match_result result = static_attempt_match(tokens, kind, length); - - ensure(result == match_result::unmatched); -} - -} diff --git a/src/jsonv-tests/json_checker.cpp b/src/jsonv-tests/json_checker.cpp index 187e72a..07f8cba 100644 --- a/src/jsonv-tests/json_checker.cpp +++ b/src/jsonv-tests/json_checker.cpp @@ -1,20 +1,22 @@ -/** \file - * Data-driven tests for running the samples from http://json.org/JSON_checker/. - * - * Copyright (c) 2014 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ +/// \file +/// Data-driven tests for running the samples from http://json.org/JSON_checker/. +/// +/// Copyright (c) 2014-2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) #include "test.hpp" +#include #include #include #include +#include +#include #include @@ -35,21 +37,17 @@ class json_checker_test : _expect_failure(expect_failure), _options(options) { } - + private: virtual void run_impl() override { std::ifstream file(_datapath.c_str()); - if (_expect_failure) - { - ensure_throws(jsonv::parse_error, jsonv::parse(file, _options)); - } - else - { - jsonv::parse(file, _options); - } + auto text = std::string(std::istreambuf_iterator(file), std::istreambuf_iterator()); + auto ast = jsonv::ast_index::make_from(text); + if (_expect_failure == ast.success()) + ast.validate(); } - + private: std::string _datapath; bool _expect_failure; @@ -68,23 +66,12 @@ class json_checker_test_initializer "", expect_failure, jsonv::parse_options::create_default() - ) + ) ); _tests.emplace_back(std::move(test)); - - if (filename(p).find("pass-but-fail-strict") == 0) - { - std::unique_ptr strict(new json_checker_test(p, - "+strict", - true, - jsonv::parse_options::create_strict() - ) - ); - _tests.emplace_back(std::move(strict)); - } }); } - + private: std::deque> _tests; } json_checker_test_initializer_instance(test_path("json_checker")); diff --git a/src/jsonv-tests/main.cpp b/src/jsonv-tests/main.cpp index 873e18a..6cea104 100644 --- a/src/jsonv-tests/main.cpp +++ b/src/jsonv-tests/main.cpp @@ -1,5 +1,5 @@ /** \file - * + * * Copyright (c) 2012-2016 by Travis Gockel. All rights reserved. * * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License diff --git a/src/jsonv-tests/object_tests.cpp b/src/jsonv-tests/object_tests.cpp index 2498ae6..a99591f 100644 --- a/src/jsonv-tests/object_tests.cpp +++ b/src/jsonv-tests/object_tests.cpp @@ -1,5 +1,5 @@ /** \file - * + * * Copyright (c) 2012 by Travis Gockel. All rights reserved. * * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License @@ -31,11 +31,11 @@ TEST(object) TEST(object_view_iter_assign) { using namespace jsonv; - + value obj = object({ { "foo", 5 }, { "bar", "wat" } }); value found = object({ { "foo", false }, { "bar", false } }); ensure(obj.size() == 2); - + for (auto iter = obj.begin_object(); iter != obj.end_object(); ++iter) { value::object_iterator fiter; @@ -43,7 +43,7 @@ TEST(object_view_iter_assign) ensure(!fiter->second.as_boolean()); fiter->second = true; } - + for (auto iter = found.begin_object(); iter != found.end_object(); ++iter) ensure(iter->second.as_boolean()); } @@ -51,7 +51,7 @@ TEST(object_view_iter_assign) TEST(object_view_reverse_iter) { using namespace jsonv; - + value obj = object({ { "a", 1 }, { "b", 2 }, { "c", 3 } }); auto riter = obj.as_object().rbegin(); ensure_eq(riter->first, "c"); @@ -66,10 +66,10 @@ TEST(object_view_reverse_iter) TEST(object_compare) { using namespace jsonv; - + value obj = object(); value i = 5; - + // really just a test to see if this compiles: ensure(obj != i); } @@ -149,7 +149,7 @@ TEST(object_nested_access) p = &(*p)[name]; ++depth; } - + ensure_eq(v["x"], 0); ensure_eq(v["a"]["x"], 1); ensure_eq(v["a"]["b"]["x"], 2); @@ -168,7 +168,7 @@ TEST(object_wide_nested_access) p = &(*p)[name]; ++depth; } - + ensure_eq(v.at(L"x"), 0); ensure_eq(v[L"a"][L"x"], 1); ensure_eq(v[L"a"][L"b"][L"x"], 2); @@ -198,20 +198,8 @@ TEST(object_wide_keys) TEST(parse_empty_object) { auto obj = jsonv::parse("{}"); - - ensure(obj.size() == 0); -} -TEST(parse_keyless_object) -{ - try - { - jsonv::parse("{a : 3}", jsonv::parse_options().failure_mode(jsonv::parse_options::on_error::collect_all)); - } - catch (const jsonv::parse_error& err) - { - ensure_eq(jsonv::object({ { "a", 3 } }), err.partial_result()); - } + ensure(obj.size() == 0); } TEST(parse_object_wrong_kind_keys) @@ -239,11 +227,12 @@ TEST(parse_object_value_stops) ensure_throws(jsonv::parse_error, jsonv::parse(R"({"a": "blah)")); } -TEST(parse_object_duplicate_keys) -{ - std::string source = R"({ "a": 1, "a": 2 })"; - ensure_throws(jsonv::parse_error, jsonv::parse(source)); - ensure_eq(jsonv::object({ { "a", 2 } }), - jsonv::parse(source, jsonv::parse_options().failure_mode(jsonv::parse_options::on_error::ignore)) - ); -} +// TODO(#145): Revisit this when JSON tree extraction is a separate thing. +// TEST(parse_object_duplicate_keys) +// { +// std::string source = R"({ "a": 1, "a": 2 })"; +// ensure_throws(jsonv::parse_error, jsonv::parse(source)); +// ensure_eq(jsonv::object({ { "a", 2 } }), +// jsonv::parse(source, jsonv::parse_options().failure_mode(jsonv::parse_options::on_error::ignore)) +// ); +// } diff --git a/src/jsonv-tests/parse_blns.cpp b/src/jsonv-tests/parse_blns.cpp index 317a57e..f15b153 100644 --- a/src/jsonv-tests/parse_blns.cpp +++ b/src/jsonv-tests/parse_blns.cpp @@ -44,17 +44,18 @@ namespace jsonv_test { -TEST(parse_naughty_strings) -{ - std::ifstream src_file(test_path("blns.json")); - auto val = jsonv::parse(src_file); - - // Unclear exactly how to test this...so we'll just make sure that we parse an array and that all elements of the - // array are strings. - ensure(val.is_array()); - for (const auto& sub : val.as_array()) - { - ensure(sub.is_string()); - } -} +// TODO(#145): When non-strict UTF-8 is supported again, this should be re-enabled +// TEST(parse_naughty_strings) +// { +// std::ifstream src_file(test_path("blns.json")); +// auto val = jsonv::parse(src_file); +// +// // Unclear exactly how to test this...so we'll just make sure that we parse an array and that all elements of the +// // array are strings. +// ensure(val.is_array()); +// for (const auto& sub : val.as_array()) +// { +// ensure(sub.is_string()); +// } +// } } diff --git a/src/jsonv-tests/parse_tests.cpp b/src/jsonv-tests/parse_tests.cpp index 076d3f5..a67c4c1 100644 --- a/src/jsonv-tests/parse_tests.cpp +++ b/src/jsonv-tests/parse_tests.cpp @@ -1,19 +1,17 @@ -/** \file - * - * Copyright (c) 2014 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ +/// \file +/// +/// Copyright (c) 2014-2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) #include "test.hpp" #include #include #include -#include #include @@ -67,18 +65,18 @@ TEST_PARSE(object_simple_general_havoc) 4,"raz" : { - - - - + + + + },"bar" : [ 2, 3,4, "5" ]} - - + + )"); ensure_eq(simple_obj, result); } @@ -199,14 +197,6 @@ TEST_PARSE(malformed_decimal_in_object) ensure_throws(jsonv::parse_error, parse(R"({"x": 123.456.789 })")); } -TEST_PARSE(malformed_decimal_ignore) -{ - auto options = jsonv::parse_options() - .failure_mode(jsonv::parse_options::on_error::ignore); - // Could potentially check that the result is still a decimal, but the result is undefined. - parse("123.456.789", options); -} - TEST_PARSE(malformed_string_unterminated) { ensure_throws(jsonv::parse_error, parse(R"("abc)")); @@ -218,48 +208,35 @@ TEST_PARSE(malformed_boolean) ensure_throws(jsonv::parse_error, parse("try")); } -TEST_PARSE(option_complete_parse_false) -{ - auto options = jsonv::parse_options() - .complete_parse(false); - std::string input = R"({ "x": [4, 3, 5] })"; - jsonv::value expected = parse(input); - std::istringstream istream(input + input + input + input); - jsonv::tokenizer tokens(istream); - for (std::size_t x = 0; x < 4; ++x) - { - jsonv::value entry = jsonv::parse(tokens, options); - ensure_eq(expected, entry); - } -} - -TEST_PARSE(partial_array) -{ - try - { - parse_options options = parse_options() - .failure_mode(parse_options::on_error::collect_all) - .max_failures(1); - parse("[1, 2, bogus]", options); - ensure(false); - } - catch (const jsonv::parse_error& err) - { - // just check that we can... - to_string(err); - to_string(err.problems().at(0)); - value expected = array({ 1, 2, null }); - ensure_eq(expected, err.partial_result()); - } -} - -TEST_PARSE(depth) -{ - std::string src = R"({"a": null, "b": [{}, 3, 4.5, false, [[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]})"; - // this isn't all that useful -- we just want to ensure that the normal src parses - parse(src); - ensure_throws(parse_error, parse(src, parse_options::create_strict())); -} +// TODO(#145): Probably remove entirely +// TEST_PARSE(partial_array) +// { +// try +// { +// parse_options options = parse_options() +// .failure_mode(parse_options::on_error::collect_all) +// .max_failures(1); +// parse("[1, 2, bogus]", options); +// ensure(false); +// } +// catch (const jsonv::parse_error& err) +// { +// // just check that we can... +// to_string(err); +// to_string(err.problems().at(0)); +// value expected = array({ 1, 2, null }); +// ensure_eq(expected, err.partial_result()); +// } +// } + +// TODO(#145): Depth checking +// TEST_PARSE(depth) +// { +// std::string src = R"({"a": null, "b": [{}, 3, 4.5, false, [[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]})"; +// // this isn't all that useful -- we just want to ensure that the normal src parses +// parse(src); +// ensure_throws(parse_error, parse(src, parse_options::create_strict())); +// } TEST_PARSE(literal) { diff --git a/src/jsonv-tests/test.cpp b/src/jsonv-tests/test.cpp index 475b447..1ad5159 100644 --- a/src/jsonv-tests/test.cpp +++ b/src/jsonv-tests/test.cpp @@ -1,13 +1,12 @@ -/** \file - * - * Copyright (c) 2012 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ +/// \file +/// +/// Copyright (c) 2012-2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) #include "test.hpp" #include @@ -24,12 +23,15 @@ unit_test_list_type& get_unit_tests() return instance; } +test_failure::~test_failure() noexcept +{ } + unit_test::unit_test(const std::string& name) : _name(name) { get_unit_tests().push_back(this); } - + bool unit_test::run() { std::cout << "TEST: " << name() << " ..."; @@ -40,11 +42,21 @@ bool unit_test::run() { run_impl(); } + catch (const test_failure& ex) + { + _success = false; + _failstring = ex.message(); + } catch (const std::exception& ex) { _success = false; _failstring = std::string("Threw exception of type ") + jsonv::demangle(typeid(ex).name()) + ": " + ex.what(); } + catch (const char* const& ex) + { + _success = false; + _failstring = std::string("Threw exception of type `const char*`: ") + ex; + } catch (...) { _success = false; diff --git a/src/jsonv-tests/test.hpp b/src/jsonv-tests/test.hpp index 1ec8306..b91f86b 100644 --- a/src/jsonv-tests/test.hpp +++ b/src/jsonv-tests/test.hpp @@ -1,13 +1,12 @@ -/** \file - * - * Copyright (c) 2012-2014 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ +/// \file +/// +/// Copyright (c) 2012-2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) #ifndef __TEST_JSONV_TEST_HPP_INCLUDED__ #define __TEST_JSONV_TEST_HPP_INCLUDED__ @@ -29,37 +28,48 @@ class unit_test; typedef std::deque unit_test_list_type; unit_test_list_type& get_unit_tests(); +class test_failure final +{ +public: + test_failure(std::string message) noexcept : + _message(std::move(message)) + { } + + virtual ~test_failure() noexcept; + + const std::string& message() const { return _message; } + +private: + std::string _message; +}; + #if ASSERT_ON_TEST_FAILURE # define ensure assert #else -# define ensure(cond_) \ - do \ - { \ - if (!(cond_)) \ - { \ - this->_success = false; \ - this->_failstring = #cond_; \ - return; \ - } \ +# define ensure(cond_) \ + do \ + { \ + if (!(cond_)) \ + { \ + throw ::jsonv_test::test_failure(#cond_); \ + } \ } while (0) #endif #if ASSERT_ON_TEST_FAILURE # define ensure_op(a_, op_, b_) assert((a_) op_ (b_)) #else -# define ensure_op(a_, op_, b_) \ - do \ - { \ - if (!((a_) op_ (b_))) \ - { \ - this->_success = false; \ - std::ostringstream ss; \ - ss << "!(" << #a_ << " {" << (a_) << "}"; \ - ss << " " << #op_ << " "; \ - ss << #b_ << " {" << (b_) << "})"; \ - this->_failstring = ss.str(); \ - return; \ - } \ +# define ensure_op(a_, op_, b_) \ + do \ + { \ + if (!((a_) op_ (b_))) \ + { \ + std::ostringstream ss; \ + ss << "!(" << #a_ << " {" << (a_) << "}"; \ + ss << " " << #op_ << " "; \ + ss << #b_ << " {" << (b_) << "})"; \ + throw ::jsonv_test::test_failure(ss.str()); \ + } \ } while (0) #endif @@ -86,17 +96,17 @@ class unit_test { public: explicit unit_test(const std::string& name); - + bool run(); - + const std::string& name() const { return _name; } - + private: virtual void run_impl() = 0; - + protected: std::string _name; bool _success; diff --git a/src/jsonv-tests/tokenizer_tests.cpp b/src/jsonv-tests/tokenizer_tests.cpp deleted file mode 100644 index a16167a..0000000 --- a/src/jsonv-tests/tokenizer_tests.cpp +++ /dev/null @@ -1,65 +0,0 @@ -/** \file - * - * Copyright (c) 2014 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ -#include "test.hpp" - -#include - -#include -#include - -namespace jsonv_test -{ - -using namespace jsonv; - -TEST(token_kind_streaming) -{ - ensure_eq(to_string(token_kind::boolean), "boolean"); - ensure_eq(to_string(token_kind::boolean | token_kind::string), "boolean|string"); - ensure_eq(to_string(token_kind::boolean | token_kind::parse_error_indicator), "parse_error(boolean)"); - ensure_eq(to_string(token_kind::number | static_cast(0x4000)), "number|0x4000"); -} - -TEST(token_kind_streaming_random) -{ - // create a bunch of (most likely invalid) token_kind values and to_string them...this sort of checks that we - // never infinitely loop in the output formatter - std::random_device prng; - for (std::size_t x = 0; x < 1000; ++x) - { - token_kind tok = static_cast(prng()); - to_string(tok); - } -} - -TEST(tokenizer_single_boolean) -{ - std::string input = "true"; - std::istringstream istream(input); - tokenizer tokens(istream); - ensure(tokens.next()); - auto found = tokens.current(); - ensure_eq(found.kind, token_kind::boolean); - ensure_eq(found.text, "true"); -} - -TEST(tokenizer_string) -{ - std::string input = "\"true\""; - std::istringstream istream(input); - tokenizer tokens(istream); - ensure(tokens.next()); - auto found = tokens.current(); - ensure_eq(found.kind, token_kind::string); - ensure_eq(found.text, "\"true\""); -} - -} diff --git a/src/jsonv-tests/unicode_tests.cpp b/src/jsonv-tests/unicode_tests.cpp index 22a1aec..91f1ec5 100644 --- a/src/jsonv-tests/unicode_tests.cpp +++ b/src/jsonv-tests/unicode_tests.cpp @@ -1,5 +1,5 @@ /** \file - * + * * Copyright (c) 2012-2014 by Travis Gockel. All rights reserved. * * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License @@ -23,7 +23,7 @@ TEST(parse_unicode_single) TEST(parse_unicode_inline) { std::string s = jsonv::parse("\"é\"").as_string(); - ensure(s.size() == 2); + ensure_eq(s.size(), 2U); ensure(s[0] == '\xc3'); ensure(s[1] == '\xa9'); } @@ -51,14 +51,15 @@ TEST(parse_unicode_invalid_surrogates) ensure_throws(jsonv::parse_error, jsonv::parse("\"\\udead\\ubeef\"").as_string()); } -TEST(parse_unicode_invalid_surrogates_cesu8) -{ - std::string s = jsonv::parse("\"\\udead\\ubeef\"", - jsonv::parse_options().string_encoding(jsonv::parse_options::encoding::cesu8) - ).as_string(); - ensure(s.size() == 6); - // The right answer according to Python: u'\udead\ubeef'.encode('utf-8') - const char vals[] = "\xed\xba\xad\xeb\xbb\xaf"; - for (unsigned idx = 0; idx < sizeof vals; ++idx) - ensure(s[idx] == vals[idx]); -} +// TODO(#145): Probably remove because CESU-8 isn't supported +// TEST(parse_unicode_invalid_surrogates_cesu8) +// { +// std::string s = jsonv::parse("\"\\udead\\ubeef\"", +// jsonv::parse_options().string_encoding(jsonv::parse_options::encoding::cesu8) +// ).as_string(); +// ensure(s.size() == 6); +// // The right answer according to Python: u'\udead\ubeef'.encode('utf-8') +// const char vals[] = "\xed\xba\xad\xeb\xbb\xaf"; +// for (unsigned idx = 0; idx < sizeof vals; ++idx) +// ensure(s[idx] == vals[idx]); +// } diff --git a/src/jsonv/array.cpp b/src/jsonv/array.cpp index 2a5ef61..062a50f 100644 --- a/src/jsonv/array.cpp +++ b/src/jsonv/array.cpp @@ -1,5 +1,5 @@ /** \file - * + * * Copyright (c) 2012-2015 by Travis Gockel. All rights reserved. * * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License @@ -163,6 +163,12 @@ void value::assign(std::initializer_list items) _data.array->_values.assign(std::move(items)); } +void value::reserve(size_type count JSONV_UNUSED) +{ + check_type(jsonv::kind::array, kind()); + // TODO(#144): Call `.reserve` on the vector +} + void value::resize(size_type count, const value& val) { check_type(jsonv::kind::array, kind()); diff --git a/src/jsonv/ast.cpp b/src/jsonv/ast.cpp new file mode 100644 index 0000000..ba8e486 --- /dev/null +++ b/src/jsonv/ast.cpp @@ -0,0 +1,981 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "char_convert.hpp" +#include "detail/match/number.hpp" +#include "detail/match/string.hpp" + +namespace jsonv +{ + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Private Helper Functions // +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifndef JSONV_AST_PARSE_MAX_DEPTH +# define JSONV_AST_PARSE_MAX_DEPTH 128 +#endif + +static inline char char_repr(ast_node_type src /* UNSAFE */) +{ + static const char reprs[] = "$^{}[]sSkKtfnid!"; + return reprs[static_cast(src)]; +} + +static inline ast_node_type ast_node_type_from_coded(std::uint64_t src) +{ + return static_cast(src & 0xff); +} + +static inline std::uint64_t encode(ast_node_type token, const char* src_location) +{ + auto src_index = reinterpret_cast(src_location); + static_assert(sizeof src_index <= sizeof(std::uint64_t), "!!"); + return std::uint64_t(static_cast(token)) + | (std::uint64_t(src_index) << 8); +} + +static constexpr std::uintptr_t ast_node_prefix_add = + sizeof(std::uintptr_t) == sizeof(std::uint64_t) ? std::uintptr_t(1UL << 56) : 0; + +static constexpr std::uintptr_t ast_node_prefix_from_ptr(const char* src_begin) +{ + if constexpr (sizeof(std::uintptr_t) == sizeof(std::uint64_t)) + { + constexpr std::uintptr_t mask = std::uintptr_t(0xffUL << 56); + return reinterpret_cast(src_begin) & mask; + } + else + { + return 0; + } +} + +static inline std::pair decode_ast_node_position(std::uintptr_t prefix, std::uint64_t src) +{ + static_assert(sizeof prefix <= sizeof src); + + auto ptr = std::uintptr_t(src >> 8) | prefix; + return { ast_node_type_from_coded(src), reinterpret_cast(ptr) }; +} + +static inline std::size_t code_size(ast_node_type src /* UNSAFE */) +{ + static const std::size_t jumps[] = + { + 1, // $ + 3, // ^ { end_idx, element_count } + 3, // { { end_idx, element_count } + 1, // } + 3, // [ { end_idx, element_count } + 1, // ] + 2, // s { encoded_size } + 2, // S { encoded_size } + 2, // k { encoded_size } + 2, // K { encoded_size } + 1, // t + 1, // f + 1, // n + 2, // i { run_length } + 2, // d { run_length } + 2, // ! { error_code } + }; + return jumps[static_cast(src)]; +} + +static inline std::invalid_argument make_failed_numeric_extract(const ast_node& src, const char* type) +{ + std::ostringstream ss; + ss << "Failed to extract " << type << " from \"" << src.token_raw() << "\""; + return std::invalid_argument(std::move(ss).str()); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Public Helper Functions // +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace detail +{ + +std::string string_from_token(string_view token, std::true_type is_escaped JSONV_UNUSED) +{ + static const string_decode_fn decoder = get_string_decoder(parse_options::encoding::utf8_strict); + + // chop off the ""s + token.remove_prefix(1); + token.remove_suffix(1); + + try + { + return decoder(token); + } + catch (const decode_error& ex) + { + // TODO(#145): This makes more sense as an extraction error + std::throw_with_nested(parse_error({ { 0, 0, 0, ex.what() } }, null)); + } +} + +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// ast_node_type // +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +std::ostream& operator<<(std::ostream& os, const ast_node_type& type) +{ + auto repr_type = static_cast(type); + auto repr_max = static_cast(ast_node_type::error); + + if (repr_type <= repr_max) + os << char_repr(type) << " (" << +repr_type << ")"; + else + os << "Invalid type value: " << +repr_type; + return os; +} + +std::string to_string(const ast_node_type& type) +{ + std::ostringstream ss; + ss << type; + return std::move(ss).str(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// ast_node::literal_null // +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +jsonv::value ast_node::literal_null::value() const +{ + return jsonv::null; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// ast_node::integer // +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +std::int64_t ast_node::integer::value() const +{ + // TODO(#150): This logic should be moved to a dedicated extractor + auto characters = token_raw(); + auto end = const_cast(characters.data() + characters.size()); + + if (characters[0] == '-') + { + auto scan_end = end; + auto val = std::strtoll(characters.data(), &scan_end, 10); + if (end == scan_end) + return val; + else + throw make_failed_numeric_extract(*this, "integer"); + } + else + { + // For non-negative integer types, use lexical_cast of a uint64_t then static_cast to an int64_t. This is done + // to deal with the values 2^63..2^64-1 -- do not consider it an exception, as we can store the bits properly, + // but the onus is on the user to know the particular key was in the overflow range. + auto scan_end = end; + auto val = std::strtoull(characters.data(), &scan_end, 10); + if (end == scan_end) + return val; + else + throw make_failed_numeric_extract(*this, "integer"); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// ast_node::decimal // +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +double ast_node::decimal::value() const +{ + // TODO(#150): This logic should be moved to a dedicated extractor + auto characters = token_raw(); + auto end = const_cast(characters.data() + characters.size()); + auto scan_end = end; + + auto val = std::strtod(characters.data(), &scan_end); + if (end == scan_end) + return val; + else + throw make_failed_numeric_extract(*this, "decimal"); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// ast_error // +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +std::ostream& operator<<(std::ostream& os, const ast_error& src) +{ + switch (src) + { + case ast_error::none: return os << "none"; + case ast_error::unexpected_token: return os << "unexpected token"; + case ast_error::eof: return os << "input ended unexpectedly"; + case ast_error::expected_eof: return os << "extra characters in input"; + case ast_error::depth_exceeded: return os << "max structural depth exceeded"; + case ast_error::extra_close: return os << "extra closing character"; + case ast_error::mismatched_close: return os << "mismatched closing character"; + case ast_error::close_after_comma: return os << "structure closed after comma"; + case ast_error::unexpected_comma: return os << "unexpected comma"; + case ast_error::expected_string: return os << "expected a string"; + case ast_error::expected_key_delimiter: return os << "expected ':'"; + case ast_error::invalid_literal: return os << "invalid literal"; + case ast_error::invalid_number: return os << "invalid number format"; + case ast_error::invalid_string: return os << "invalid string format"; + case ast_error::invalid_comment: return os << "invalid comment block"; + case ast_error::internal: return os << "internal parser error"; + default: return os << "ast_error(" << static_cast(src) << ")"; + } +} + +std::string to_string(const ast_error& src) +{ + std::ostringstream ss; + ss << src; + return std::move(ss).str(); +} + +struct JSONV_LOCAL ast_exception +{ + ast_error code; + std::size_t index; + + explicit constexpr ast_exception(ast_error code, std::size_t index) + : code(code) + , index(index) + { } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// ast_index // +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +struct JSONV_LOCAL ast_index::impl final +{ + const char* src_begin; //!< The source string this AST was built from + std::uint64_t data_size; //!< Number of used elements in \c data. + std::uint64_t data_capacity; //!< Capacity of \c data. + ast_error first_error_code; //!< The code of the first-encountered error + std::uint64_t first_error_index; //!< The index of the first error encountered in parsing. + + static impl* allocate(std::size_t capacity) + { + if (capacity < 16U) + capacity = 16U; + + auto alloc_sz = sizeof(impl) + capacity * sizeof(std::uint64_t); + if (void* p = std::aligned_alloc(alignof(impl), alloc_sz)) + { + auto out = reinterpret_cast(p); + out->src_begin = nullptr; + out->data_size = 0U; + out->data_capacity = capacity; + out->first_error_code = ast_error::none; + out->first_error_index = 0U; + return out; + } + else + { + throw std::bad_alloc(); + } + } + + std::uint64_t& data(std::size_t idx) + { + auto data_ptr = reinterpret_cast(this + 1); + return data_ptr[idx]; + } + + static void destroy(impl* p) + { + std::free(p); + } + + static void grow_data_buffer(impl*& self) + { + // NOTE: Doubling the data capacity will always grow enough to hold a complete code size, as the minimum size + // can hold the largest code size. + auto new_capacity = self->data_capacity * 2; + + // OPTIMIZATION: It's possible that `realloc` would be better here, but there is not an aligned version of that. + auto new_self = allocate(new_capacity); + std::memcpy(new_self, self, sizeof *new_self + self->data_size * sizeof self->data(0)); + new_self->data_capacity = new_capacity; + + destroy(self); + self = new_self; + } + + /// Add the \a token, \a src_location pair to the \a self \c data buffer. + /// + /// \param self Like \c this, but will be updated in the case that \c data_capacity cannot store the inserted token. + /// \returns The index that was inserted into (\c data_size before the insertion was performed). This is used for + /// adding extra data elements when \a token is something like \c ast_node_type::object_begin. + static inline std::size_t push_back(impl*& self, ast_node_type token, const char* src_location) + { + auto added_sz = code_size(token); + if (self->data_size + added_sz > self->data_capacity) + { + JSONV_UNLIKELY; + grow_data_buffer(self); + } + + auto put_idx = self->data_size; + self->data(put_idx) = encode(token, src_location); + self->data_size += added_sz; + return put_idx; + } + + static ast_exception push_error(impl*& self, ast_error error_code, const char* begin, const char* src_location) + { + auto idx = push_back(self, ast_node_type::error, src_location); + self->data(idx + 1) = static_cast(error_code); + + self->first_error_code = error_code; + self->first_error_index = src_location - begin; + return ast_exception(error_code, self->first_error_index); + } + + static void parse(impl*& self, string_view src); + + template + static void parse_literal(impl*& self, + const char* begin, + const char*& iter, + const char* end, + const char (& expected_token)[N], + ast_node_type success_value + ); +}; + +// OPTIMIZATION(SIMD-SSE4.2): Skip over chunks with cmpistri +static void fastforward_whitespace(const char*& iter, const char* end) +{ + while (iter < end) + { + char c = *iter; + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') + ++iter; + else + break; + } +} + +static bool fastforward_comment(const char*& ext_iter, const char* end) +{ + assert(ext_iter[0] == '/'); + if (ext_iter + 1 == end || ext_iter[1] != '*') + return false; + + for (const char* iter = ext_iter + 2; iter < end; ++iter) + { + if (iter[0] == '*') + { + if (iter + 1 == end) + { + return false; + } + else if (iter[1] == '/') + { + ext_iter = iter + 2; + return true; + } + } + } + return false; +} + +// OPTIMIZATION(SIMD): Tokens can be checked as a single `uint32_t`, including "false", as 'f' has already been checked. +template +void ast_index::impl::parse_literal(impl*& self, + const char* begin, + const char*& iter, + const char* end, + const char (& expected_token)[NPlusOne], + ast_node_type success_value + ) +{ + static constexpr std::size_t N = NPlusOne - 1; + + if (iter + N <= end) + { + // start at `idx` 1 because `parse` checks the first value + for (std::size_t idx = 1; idx < N; ++idx) + { + if (iter[idx] != expected_token[idx]) + { + JSONV_UNLIKELY + throw push_error(*&self, ast_error::invalid_literal, begin, iter); + } + } + + push_back(self, success_value, iter); + iter += N; + } + else + { + JSONV_UNLIKELY + throw push_error(*&self, ast_error::eof, begin, iter); + } +} + +void ast_index::impl::parse(impl*& self, string_view src) +{ + enum class container_state + { + item_finished = 0, // <- an item just finished parsing (value 0 because it is set frequently) + opened, // <- just encountered [ or { + needs_item, // <- just parsed a `,` or ':', so we need an item (can't close now) + none, // <- not in a container + }; + + struct structure_state + { + std::size_t open_index; + std::size_t item_count; + ast_node_type open_token; + + static structure_state create(std::size_t index, ast_node_type opener) JSONV_ALWAYS_INLINE + { + return { index, 0UL, opener }; + } + }; + + static constexpr std::size_t max_depth = std::size_t(JSONV_AST_PARSE_MAX_DEPTH); + structure_state structure[max_depth]; + std::size_t depth = 0; + ast_node_type container = ast_node_type::error; + container_state state = container_state::none; + + const char* const begin = src.data(); + const char* iter = begin; + const char* const end = begin + src.size(); + + auto push_back_deeper = + [&](ast_node_type token, const char* src_location) JSONV_ALWAYS_INLINE + { + if (depth + 1 > max_depth) + { + JSONV_UNLIKELY + throw push_error(self, ast_error::depth_exceeded, begin, src_location); + } + + structure[depth] = structure_state::create(push_back(self, token, src_location), token); + container = token; + ++depth; + }; + + auto push_back_out = + [&](ast_node_type token, ast_node_type expected_open_token, const char* src_location) JSONV_ALWAYS_INLINE + { + if (depth <= 0) + { + JSONV_UNLIKELY + throw push_error(self, ast_error::extra_close, begin, src_location); + } + + auto this_idx = push_back(self, token, src_location); + --depth; + + // Since we add to `item_count` on ',' tokens, we will undercount by 1, since the last item did not have a + // comma. Check that we are in `item_finished` state so empty containers remain at 0. + if (state == container_state::item_finished) + ++structure[depth].item_count; + + auto open_token = structure[depth].open_token; + if (open_token != expected_open_token) + { + JSONV_UNLIKELY + throw push_error(self, ast_error::mismatched_close, begin, src_location); + } + + if (depth > 0) + { + + + container = structure[depth - 1].open_token; + + if (depth == 1U) + { + ++src_location; + fastforward_whitespace(*&src_location, end); + if (src_location != end && *src_location) + throw push_error(self, ast_error::expected_eof, begin, src_location); + } + } + else + { + container = ast_node_type::error; + } + + self->data(structure[depth].open_index + 1) = this_idx; + self->data(structure[depth].open_index + 2) = structure[depth].item_count; + }; + + auto get_string = [&](ast_node_type token_ascii, ast_node_type token_escaped) JSONV_ALWAYS_INLINE + { + if (*iter != '\"') + throw push_error(self, ast_error::expected_string, begin, iter); + + auto result = detail::match_string(iter, end); + if (!result) + throw push_error(self, ast_error::eof, begin, iter); + + auto idx = push_back(self, result.needs_conversion ? token_escaped : token_ascii, iter); + self->data(idx + 1) = result.length; + + iter += result.length; + }; + + auto get_key = [&]() JSONV_ALWAYS_INLINE + { + fastforward_whitespace(*&iter, end); + if (iter >= end) + throw push_error(self, ast_error::eof, begin, iter); + else if (*iter == '}') + return false; + + get_string(ast_node_type::key_canonical, ast_node_type::key_escaped); + + fastforward_whitespace(*&iter, end); + if (iter >= end) + throw push_error(self, ast_error::eof, begin, iter); + + if (*iter != ':') + throw push_error(self, ast_error::expected_key_delimiter, begin, iter); + ++iter; + return true; + }; + + push_back_deeper(ast_node_type::document_start, iter); + + while (iter < end && *iter) + { + switch (*iter) + { + case ' ': + case '\t': + case '\n': + case '\r': + fastforward_whitespace(*&iter, end); + break; + case 't': + parse_literal(self, begin, *&iter, end, "true", ast_node_type::literal_true); + state = container_state::item_finished; + break; + case 'f': + parse_literal(self, begin, *&iter, end, "false", ast_node_type::literal_false); + state = container_state::item_finished; + break; + case 'n': + parse_literal(self, begin, *&iter, end, "null", ast_node_type::literal_null); + state = container_state::item_finished; + break; + case '[': + push_back_deeper(ast_node_type::array_begin, iter); + state = container_state::opened; + ++iter; + break; + case ']': + if (state == container_state::needs_item) + throw push_error(self, ast_error::close_after_comma, begin, iter); + + push_back_out(ast_node_type::array_end, ast_node_type::array_begin, iter); + state = container_state::item_finished; + ++iter; + break; + case '{': + push_back_deeper(ast_node_type::object_begin, iter); + state = container_state::opened; + ++iter; + get_key(); + break; + case '}': + if (state == container_state::needs_item) + throw push_error(self, ast_error::close_after_comma, begin, iter); + + push_back_out(ast_node_type::object_end, ast_node_type::object_begin, iter); + state = container_state::item_finished; + ++iter; + break; + case ',': + if (state != container_state::item_finished) + throw push_error(self, ast_error::unexpected_comma, begin, iter); + + ++iter; + ++structure[depth - 1].item_count; + + if (container == ast_node_type::object_begin) + get_key(); + state = container_state::needs_item; + break; + case '\"': + get_string(ast_node_type::string_canonical, ast_node_type::string_escaped); + state = container_state::item_finished; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + if (auto result = detail::match_number(iter, end)) + { + auto idx = push_back(self, result.decimal ? ast_node_type::decimal : ast_node_type::integer, iter); + self->data(idx + 1) = result.length; + iter += result.length; + state = container_state::item_finished; + } + else + { + throw push_error(self, ast_error::invalid_number, begin, iter); + } + break; + case '/': + if (fastforward_comment(*&iter, end)) + { + break; + } + else + { + throw push_error(self, ast_error::invalid_comment, begin, iter); + } + default: + throw push_error(self, ast_error::unexpected_token, begin, iter); + break; + } + } + + push_back_out(ast_node_type::document_end, ast_node_type::document_start, iter); +} + +ast_index::ast_index(impl* ptr) noexcept : + _impl(ptr) +{ } + +ast_index::~ast_index() noexcept +{ + reset(); +} + +void ast_index::reset() +{ + if (auto p = std::exchange(_impl, nullptr)) + { + impl::destroy(p); + } +} + +bool ast_index::success() const noexcept +{ + return _impl ? _impl->first_error_code == ast_error::none : false; +} + +void ast_index::validate() const +{ + if (!_impl) + throw std::invalid_argument("AST index was not initialized"); + if (success()) + return; + + throw parse_error({ parse_error::problem(0, 0, _impl->first_error_index, std::string(to_string(_impl->first_error_code))) }, value()); +} + +ast_index::iterator ast_index::begin() const +{ + if (_impl) + { + return iterator(ast_node_prefix_from_ptr(_impl->src_begin), &_impl->data(0)); + } + else + { + return iterator(0, reinterpret_cast(0)); + } +} + +ast_index::iterator ast_index::end() const +{ + if (_impl) + { + return iterator(ast_node_prefix_from_ptr(_impl->src_begin), &_impl->data(_impl->data_size)); + } + else + { + return iterator(0, reinterpret_cast(0)); + } +} + +ast_index ast_index::make_from(string_view src, optional initial_buffer_capacity) +{ + // OPTIMIZATION: Better heuristics on initial buffer capacity. + auto p = impl::allocate(initial_buffer_capacity.value_or(src.size() / 16)); + p->src_begin = src.data(); + + try + { + impl::parse(p, src); + return ast_index(p); + } + catch (const ast_exception& ex) + { + return ast_index(p); + } + catch (...) + { + impl::destroy(p); + throw; + } +} + +std::ostream& operator<<(std::ostream& os, const ast_index& self) +{ + if (!self._impl) + return os << char_repr(ast_node_type::document_end); + + const std::uint64_t* data = &self._impl->data(0); + const std::uint64_t* end = data + self._impl->data_size; + while (data < end) + { + auto token = ast_node_type_from_coded(*data); + os << char_repr(token); + + data += code_size(token); + } + + return os; +} + +std::string to_string(const ast_index& self) +{ + std::ostringstream ss; + ss << self; + return std::move(ss).str(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// ast_index::iterator // +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +ast_index::iterator& ast_index::iterator::operator++() +{ + auto [prev_token, prev_ptr] = decode_ast_node_position(_prefix, *_iter); + + _iter += code_size(prev_token); + auto new_ptr = decode_ast_node_position(_prefix, *_iter).second; + + if (new_ptr < prev_ptr) + _prefix += ast_node_prefix_add; + + return *this; +} + +ast_node ast_index::iterator::operator*() const +{ + auto [token, ptr] = decode_ast_node_position(_prefix, *_iter); + + switch (token) + { + case ast_node_type::document_start: return ast_node::document_start(ptr); + case ast_node_type::document_end: return ast_node::document_end(ptr); + case ast_node_type::object_begin: return ast_node::object_begin(ptr, _iter[2]); + case ast_node_type::object_end: return ast_node::object_end(ptr); + case ast_node_type::array_begin: return ast_node::array_begin(ptr, _iter[2]); + case ast_node_type::array_end: return ast_node::array_end(ptr); + case ast_node_type::string_canonical: return ast_node::string_canonical(ptr, _iter[1]); + case ast_node_type::string_escaped: return ast_node::string_escaped(ptr, _iter[1]); + case ast_node_type::key_canonical: return ast_node::key_canonical(ptr, _iter[1]); + case ast_node_type::key_escaped: return ast_node::key_escaped(ptr, _iter[1]); + case ast_node_type::literal_true: return ast_node::literal_true(ptr); + case ast_node_type::literal_false: return ast_node::literal_false(ptr); + case ast_node_type::literal_null: return ast_node::literal_null(ptr); + case ast_node_type::integer: return ast_node::integer(ptr, _iter[1]); + case ast_node_type::decimal: return ast_node::decimal(ptr, _iter[1]); + case ast_node_type::error: return ast_node::error(ptr, 1U, static_cast(_iter[1])); + default: throw std::invalid_argument(std::string("Unknown token: ") + to_string(token)); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// parse // +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace +{ + +template +struct overload : F... +{ + using F::operator()...; +}; + +template +overload(F...) -> overload; + +value extract_single(ast_index::const_iterator& iter, ast_index::const_iterator last); + +value extract_object(ast_index::const_iterator& iter, ast_index::const_iterator last) +{ + auto first_token = *iter; + if (first_token.type() != ast_node_type::object_begin) + throw std::invalid_argument("`extract_object` called on non-object token"); + + value out = object(); + + ++iter; + while (iter != last) + { + auto key_token = *iter; + if (key_token.type() == ast_node_type::object_end) + { + return out; + } + + std::string key = key_token.visit(overload + { + [](const ast_node::key_canonical& x) { return std::string(x.value()); }, + [](const ast_node::key_escaped& x) { return x.value(); }, + [](const auto&) -> std::string + { + // This shouldn't happen if the AST index is valid + JSONV_UNLIKELY + throw std::invalid_argument("Extracting object does not have repeating KEY, VALUE sequence"); + } + }); + + ++iter; + value val = extract_single(*&iter, last); + + out.insert({ std::move(key), std::move(val) }); + ++iter; + } + + throw std::invalid_argument("Did not find end of object"); +} + +value extract_array(ast_index::const_iterator& iter, ast_index::const_iterator last) +{ + auto first_token = *iter; + value out = first_token.visit(overload + { + [](const ast_node::array_begin& arr) -> value + { + auto out = array(); + out.reserve(arr.element_count()); + return out; + }, + [](const auto&) -> value + { + JSONV_UNLIKELY + throw std::invalid_argument("Extracting array does not start with ["); + } + }); + + ++iter; + while (iter != last) + { + auto token = *iter; + if (token.type() == ast_node_type::array_end) + return out; + + out.push_back(extract_single(*&iter, last)); + ++iter; + } + + throw std::invalid_argument("Did not find end of array"); +} + +value extract_single(ast_index::const_iterator& iter, ast_index::const_iterator last) +{ + if (iter == last) + throw std::invalid_argument("Can not extract from empty"); + + auto node = *iter; + return node.visit( + overload + { + [&](const ast_node::object_begin&) -> value { return extract_object(*&iter, last); }, + [&](const ast_node::array_begin&) -> value { return extract_array(*&iter, last); }, + [&](const ast_node::integer& x) -> value { return x.value(); }, + [&](const ast_node::decimal& x) -> value { return x.value(); }, + [&](const ast_node::string_canonical& x) -> value { return x.value(); }, + [&](const ast_node::string_escaped& x) -> value { return x.value(); }, + [&](const ast_node::literal_false& x) -> value { return x.value(); }, + [&](const ast_node::literal_true& x) -> value { return x.value(); }, + [&](const ast_node::literal_null& x) -> value { return x.value(); }, + [&](const ast_node::error& x) -> value + { + throw parse_error({ { 0, 0, 0, to_string(x.error_code()) } }, null); + }, + [&](const auto& x) -> value + { + // If the AST if valid, this should not be hit + throw std::invalid_argument(std::string("unexpected token ") + to_string(x.type())); + }, + }); +} + +} + +value ast_index::extract_tree(const parse_options& options /* TODO(#145) */) const +{ + if (!_impl) + throw std::invalid_argument("AST index was not initialized"); + + auto iter = begin(); + auto last = end(); + + if (iter == last) + return null; + + auto first_node = *iter; + if (first_node.type() != ast_node_type::document_start) + throw std::invalid_argument("AST index did not start with a `document_start`"); + + ++iter; + auto out = extract_single(*&iter, last); + + if (iter == last) + throw std::invalid_argument("Extracting value into a JSON tree ended early"); + + ++iter; + if (iter == last) + throw std::invalid_argument("Extracting value into a JSON tree ended early"); + + auto last_node = *iter; + if (last_node.type() != ast_node_type::document_end) + throw std::invalid_argument( + std::string("AST index does not end with `document_end`: ") + to_string(last_node.type())); + + return out; +} + +value ast_index::extract_tree() const +{ + return extract_tree(parse_options::create_default()); +} + +value parse(const string_view& input, const parse_options& options) +{ + auto ast = ast_index::make_from(input); + ast.validate(); + return ast.extract_tree(options); +} + +} diff --git a/src/jsonv/char_convert.cpp b/src/jsonv/char_convert.cpp index c2bb519..fe22b22 100644 --- a/src/jsonv/char_convert.cpp +++ b/src/jsonv/char_convert.cpp @@ -23,6 +23,7 @@ #include #include "detail/fixed_map.hpp" +#include "detail/is_print.hpp" #if __cplusplus >= 201703L || defined __has_include # if __has_include() @@ -106,7 +107,7 @@ static const char* find_decoding(char char_after_backslash) static bool needs_unicode_escaping(char c) { return bool(c & '\x80') - || !std::isprint(c); + || !is_print(c); } static constexpr bool char_bitmatch(char c, char pos, char neg) @@ -395,18 +396,18 @@ static void utf8_append_code(std::string& str, char32_t val) utf8_sequence_info(val, &length, &c); char buffer[8]; - char* bufferOut = buffer; - *bufferOut++ = c; + char* buffer_out = buffer; + *buffer_out++ = c; std::size_t shift = (length - 2) * 6; for (std::size_t idx = 1; idx < length; ++idx) { c = char('\x80' | ('\x3f' & (val >> shift))); - *bufferOut++ = c; + *buffer_out++ = c; shift -= 6; } - str.append(buffer, bufferOut); + str.append(buffer, buffer_out); } static bool utf16_combine_surrogates(uint16_t high, uint16_t low, char32_t* out) @@ -440,6 +441,7 @@ std::string string_decode(string_view source) typedef std::string::size_type size_type; std::string output; + output.reserve(source.size()); // OPTIMIZATION: Reserve a more appropriate size const char* last_pushed_src = source.data(); size_type utf8_sequence_start = 0; unsigned remaining_utf8_sequence = 0; @@ -522,8 +524,10 @@ std::string string_decode(string_view source) remaining_utf8_sequence = utf8_length - 1; ++idx; } - else if (require_printable && !std::isprint(current)) + else if (require_printable && !is_print(current)) { + JSONV_UNLIKELY + std::ostringstream os; os << "Unprintable character found in input: "; switch (current) @@ -554,6 +558,8 @@ std::string string_decode(string_view source) // not on a UTF8 continuation, even though we should be... else { + JSONV_UNLIKELY + std::ostringstream os; os << "Invalid UTF-8 multi-byte sequence in source: \""; for (size_type pos = utf8_sequence_start; pos <= idx; ++pos) @@ -568,6 +574,8 @@ std::string string_decode(string_view source) if (encoding != parse_options::encoding::cesu8 && remaining_utf8_sequence > 0) { + JSONV_UNLIKELY + std::ostringstream os; os << "unterminated UTF-8 sequence at end of string: \""; os << std::hex; diff --git a/src/jsonv/char_convert.hpp b/src/jsonv/char_convert.hpp index e2c2d81..e835d78 100644 --- a/src/jsonv/char_convert.hpp +++ b/src/jsonv/char_convert.hpp @@ -1,15 +1,13 @@ -/** \file - * - * Copyright (c) 2012 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ -#ifndef __JSONV_CHAR_CONVERT_HPP_INCLUDED__ -#define __JSONV_CHAR_CONVERT_HPP_INCLUDED__ +/// \file +/// +/// Copyright (c) 2012-2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) +#pragma once #include #include @@ -18,48 +16,44 @@ #include #include -namespace jsonv -{ -namespace detail +namespace jsonv::detail { -class decode_error : +class decode_error final : public std::runtime_error { public: typedef std::string::size_type size_type; public: decode_error(size_type offset, const std::string& message); - + virtual ~decode_error() noexcept; - + inline size_type offset() const { return _offset; } - + private: size_type _offset; }; -/** Encodes C++ string \a source into a fully-escaped JSON string into \a stream ready for sending over the wire. -**/ +/// Encodes C++ string \a source into a fully-escaped JSON string into \a stream ready for sending over the wire. std::ostream& string_encode(std::ostream& stream, string_view source, bool ensure_ascii = true); -/** A function that decodes an over the wire character sequence \c source into a C++ string. **/ -typedef std::string (*string_decode_fn)(string_view source); +/// A function that decodes an over the wire character sequence \c source into a C++ string. +using string_decode_fn = std::string (*)(string_view source); -/** Get a string decoding function for the given output \a encoding. **/ +/// Get a string decoding function for the given output \a encoding. string_decode_fn get_string_decoder(parse_options::encoding encoding); /** Convert the UTF-8 encoded \a source into a UTF-16 encoded \c std::wstring. **/ std::wstring convert_to_wide(string_view source); -/** Convert the UTF-16 encoded \a source into a UTF-8 encoded \c std::string. **/ +/// \{ +/// Convert the UTF-16 encoded \a source into a UTF-8 encoded \c std::string. std::string convert_to_narrow(const std::wstring& source); std::string convert_to_narrow(const wchar_t* source); +/// \} } -} - -#endif/*__JSONV_CHAR_CONVERT_HPP_INCLUDED__*/ diff --git a/src/jsonv/detail/is_print.hpp b/src/jsonv/detail/is_print.hpp new file mode 100644 index 0000000..48b63f0 --- /dev/null +++ b/src/jsonv/detail/is_print.hpp @@ -0,0 +1,21 @@ +/// \file +/// +/// Copyright (c) 2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) +#include + +namespace jsonv::detail +{ + +/// Like `std::isprint`, but significantly faster. It assumes \a c is in the ASCII space. +JSONV_ALWAYS_INLINE inline constexpr bool is_print(char c) +{ + return c >= '\x20' && c != '\x7f'; +} + +} diff --git a/src/jsonv/detail/match/number.cpp b/src/jsonv/detail/match/number.cpp new file mode 100644 index 0000000..edb1511 --- /dev/null +++ b/src/jsonv/detail/match/number.cpp @@ -0,0 +1,302 @@ +/// \file +/// Pattern matching for JSON numeric values. +/// +/// Copyright (c) 2014-2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) +#include + +#include + +#include "number.hpp" + +namespace jsonv::detail +{ + +namespace +{ + +enum class match_number_state +{ + initial, + leading_minus, + leading_zero, + integer, + decimal, + exponent, + exponent_sign, + complete, +}; + +} + +match_number_result match_number(const char* begin, const char* end) +{ + auto length = std::size_t(0); + auto state = match_number_state::initial; + auto max_length = std::size_t(end - begin); + auto decimal = false; + + auto current = [&] () + { + if (length < max_length) + return begin[length]; + else + return '\0'; + }; + + // Initial: behavior of parse branches from here + switch (current()) + { + case '-': + ++length; + state = match_number_state::leading_minus; + break; + case '0': + ++length; + state = match_number_state::leading_zero; + break; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + ++length; + state = match_number_state::integer; + break; + default: + return match_number_result::create_unmatched(length); + } + + // Leading '-' + if (state == match_number_state::leading_minus) + { + switch (current()) + { + case '0': + ++length; + state = match_number_state::leading_zero; + break; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + ++length; + state = match_number_state::integer; + break; + default: + return match_number_result::create_unmatched(length); + } + } + + // Leading '0' or "-0" + if (state == match_number_state::leading_zero) + { + switch (current()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + // NOTE: This behavior is incorrect according to strict JSON parsing. However, this library matches the + // input as a number in case `parse_options::numbers::decimal` is used. + ++length; + state = match_number_state::integer; + break; + case '.': + ++length; + state = match_number_state::decimal; + decimal = true; + break; + case 'e': + case 'E': + ++length; + state = match_number_state::exponent; + decimal = true; + break; + default: + state = match_number_state::complete; + break; + } + } + + // Have only seen integer values + while (state == match_number_state::integer) + { + switch (current()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + ++length; + break; + case '.': + ++length; + state = match_number_state::decimal; + decimal = true; + break; + case 'e': + case 'E': + ++length; + state = match_number_state::exponent; + decimal = true; + break; + default: + state = match_number_state::complete; + break; + } + } + + // Just saw a '.' + if (state == match_number_state::decimal) + { + switch (current()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + ++length; + break; + default: + return match_number_result::create_unmatched(length); + } + + while (state == match_number_state::decimal) + { + switch (current()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + ++length; + break; + case 'e': + case 'E': + ++length; + state = match_number_state::exponent; + decimal = true; + break; + default: + state = match_number_state::complete; + break; + } + } + } + + // Just saw 'e' or 'E' + if (state == match_number_state::exponent) + { + switch (current()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + ++length; + break; + case '+': + case '-': + ++length; + state = match_number_state::exponent_sign; + break; + default: + return match_number_result::create_unmatched(length); + } + } + + // Just saw "e-", "e+", "E-", or "E+" + if (state == match_number_state::exponent_sign) + { + switch (current()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + ++length; + state = match_number_state::exponent; + break; + default: + return match_number_result::create_unmatched(length); + } + } + + while (state == match_number_state::exponent) + { + switch (current()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + ++length; + break; + default: + state = match_number_state::complete; + break; + } + } + + assert(state == match_number_state::complete); + return match_number_result::create_complete(decimal, length); +} + +} diff --git a/src/jsonv/detail/match/number.hpp b/src/jsonv/detail/match/number.hpp new file mode 100644 index 0000000..30ab898 --- /dev/null +++ b/src/jsonv/detail/match/number.hpp @@ -0,0 +1,49 @@ +/// \file jsonv/detail/match/number.hpp +/// Pattern matching for JSON numeric values. +/// +/// Copyright (c) 2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) +#pragma once + +#include + +#include + +namespace jsonv::detail +{ + +struct JSONV_LOCAL match_number_result +{ + bool success; + bool decimal; + std::size_t length; + + explicit constexpr match_number_result(bool success, bool decimal, std::size_t length) : + success(success), + decimal(decimal), + length(length) + { } + + static inline constexpr match_number_result create_complete(bool decimal, std::size_t length) + { + return match_number_result(true, decimal, length); + } + + static inline constexpr match_number_result create_unmatched(std::size_t length) + { + return match_number_result(false, false, length); + } + + explicit constexpr operator bool() const + { + return success; + } +}; + +match_number_result JSONV_LOCAL match_number(const char* iter, const char* end); +} diff --git a/src/jsonv/detail/match/string.cpp b/src/jsonv/detail/match/string.cpp new file mode 100644 index 0000000..a875341 --- /dev/null +++ b/src/jsonv/detail/match/string.cpp @@ -0,0 +1,195 @@ +/// \file +/// Pattern matching for JSON string values. +/// +/// Copyright (c) 2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) +#include + +#include + +#include +#include + +#include "string.hpp" + +namespace jsonv::detail +{ + +static bool is_valid_escape(char c) +{ + switch (c) + { + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + case '\\': + case '/': + case '\"': + return true; + case 'u': // <- note that `u` must be handled in a special case + default: + return false; + } +} + +static constexpr bool char_bitmatch(char c, char pos, char neg) +{ + using u8 = unsigned char; + + // NOTE(tgockel, 2018-06-04): The use of casting should not be needed here. However, GCC 8.1.0 seems to have a + // bug in the optimizer that causes this function to erroneously return `true` in some cases (specifically, with + // `char_bitmatch('\xf0', '\xc0', '\x20')`, but not if you call the function directly). It is possible this issue + // (https://github.com/tgockel/json-voorhees/issues/108) has been misdiagnosed, but the behavior only happens on + // GCC 8.1.0 and only with -O3. This also fixes the problem, even though it logically should not change anything + // (https://stackoverflow.com/questions/50671485/bitwise-operations-on-signed-chars). + return (u8(c) & u8(pos)) == u8(pos) + && !(u8(c) & u8(neg)); +} + +/// Tests if \a c is a valid UTF-8 sequence continuation. +static constexpr bool is_utf8_sequence_continuation(char c) +{ + return char_bitmatch(c, '\x80', '\x40'); +} + +static unsigned utf8_length(char c) +{ + if (char_bitmatch(c, '\xc0', '\x20')) + { + JSONV_LIKELY + return 2U; + } + else if (char_bitmatch(c, '\xe0', '\x10')) + { + return 3U; + } + else if (char_bitmatch(c, '\xf0', '\x08')) + { + return 4U; + } + else if (char_bitmatch(c, '\xf8', '\x04')) + { + return 5U; + } + else if (char_bitmatch(c, '\xfc', '\x02')) + { + return 6U; + } + else + { + JSONV_UNLIKELY + // This is not an acceptable/valid UTF-8 string. A failure here means I can't trust or don't understand the + // source encoding. + return 0U; + } +} + +match_string_result match_string(const char* iter, const char* end) +{ + assert(*iter == '\"'); + + ++iter; + std::size_t length = 1U; + bool escaped = false; + + while (iter < end) + { + if (*iter == '\"') + { + ++length; + return match_string_result::create_complete(escaped, length); + } + else if (*iter == '\\') + { + escaped = true; + if (iter + 1 == end) + { + JSONV_UNLIKELY + return match_string_result::create_unmatched(length); + } + else if (iter[1] == 'u') + { + iter += 2; + length += 2; + + if (iter + 4 >= end) + { + JSONV_UNLIKELY + return match_string_result::create_unmatched(length); + } + + if ( std::isxdigit(iter[0]) + && std::isxdigit(iter[1]) + && std::isxdigit(iter[2]) + && std::isxdigit(iter[3]) + ) + { + JSONV_LIKELY + iter += 4; + length += 4; + } + else + { + return match_string_result::create_unmatched(length); + } + } + else if (is_valid_escape(iter[1])) + { + JSONV_LIKELY + length += 2; + iter += 2; + } + else + { + return match_string_result::create_unmatched(length); + } + } + else if (!(*iter & '\x80')) + { + if (!is_print(*iter)) + { + JSONV_UNLIKELY + return match_string_result::create_unmatched(length); + } + + ++iter; + ++length; + } + else if (auto utf_seq_length = utf8_length(*iter)) + { + if (iter + utf_seq_length > end) + { + return match_string_result::create_unmatched(length); + } + else + { + for (unsigned offset = 1U; offset < utf_seq_length; ++offset) + { + if (!is_utf8_sequence_continuation(iter[offset])) + { + JSONV_UNLIKELY + return match_string_result::create_unmatched(length + offset); + } + } + + iter += utf_seq_length; + length += utf_seq_length; + } + } + else + { + return match_string_result::create_unmatched(length); + } + } + + return match_string_result::create_unmatched(length); +} + +} diff --git a/src/jsonv/detail/match/string.hpp b/src/jsonv/detail/match/string.hpp new file mode 100644 index 0000000..0a128ef --- /dev/null +++ b/src/jsonv/detail/match/string.hpp @@ -0,0 +1,49 @@ +/// \file jsonv/detail/match/string.hpp +/// Pattern matching for JSON string values. +/// +/// Copyright (c) 2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) +#pragma once + +#include + +#include + +namespace jsonv::detail +{ + +struct JSONV_LOCAL match_string_result +{ + bool success; + bool needs_conversion; + std::size_t length; + + explicit constexpr match_string_result(bool success, bool needs_conversion, std::size_t length) : + success(success), + needs_conversion(needs_conversion), + length(length) + { } + + static inline constexpr match_string_result create_complete(bool needs_conversion, std::size_t length) + { + return match_string_result(true, needs_conversion, length); + } + + static inline constexpr match_string_result create_unmatched(std::size_t length) + { + return match_string_result(false, false, length); + } + + explicit constexpr operator bool() const + { + return success; + } +}; + +match_string_result JSONV_LOCAL match_string(const char* iter, const char* end); +} diff --git a/src/jsonv/detail/token_patterns.cpp b/src/jsonv/detail/token_patterns.cpp deleted file mode 100644 index b7a7873..0000000 --- a/src/jsonv/detail/token_patterns.cpp +++ /dev/null @@ -1,540 +0,0 @@ -/** \file - * - * Copyright (c) 2014-2018 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ -#include - -#include -#include -#include - -namespace jsonv -{ -namespace detail -{ - -template -static match_result match_literal(const char* begin, const char* end, const char (& literal)[N], std::size_t& length) -{ - for (length = 0; length < (N-1); ++length) - { - if (begin + length == end || begin[length] != literal[length]) - return match_result::unmatched; - } - return match_result::complete; -} - -static match_result match_true(const char* begin, const char* end, token_kind& kind, std::size_t& length) -{ - kind = token_kind::boolean; - return match_literal(begin, end, "true", length); -} - -static match_result match_false(const char* begin, const char* end, token_kind& kind, std::size_t& length) -{ - kind = token_kind::boolean; - return match_literal(begin, end, "false", length); -} - -static match_result match_null(const char* begin, const char* end, token_kind& kind, std::size_t& length) -{ - kind = token_kind::null; - return match_literal(begin, end, "null", length); -} - -enum class match_number_state -{ - initial, - leading_minus, - leading_zero, - integer, - decimal, - exponent, - exponent_sign, - complete, -}; - -static match_result match_number(const char* begin, const char* end, token_kind& kind, std::size_t& length) -{ - kind = token_kind::number; - length = 0U; - auto state = match_number_state::initial; - auto max_length = std::size_t(end - begin); - - auto current = [&] () - { - if (length < max_length) - return begin[length]; - else - return '\0'; - }; - - // Initial: behavior of parse branches from here - switch (current()) - { - case '-': - ++length; - state = match_number_state::leading_minus; - break; - case '0': - ++length; - state = match_number_state::leading_zero; - break; - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - ++length; - state = match_number_state::integer; - break; - default: - return match_result::unmatched; - } - - // Leading '-' - if (state == match_number_state::leading_minus) - { - switch (current()) - { - case '0': - ++length; - state = match_number_state::leading_zero; - break; - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - ++length; - state = match_number_state::integer; - break; - default: - return match_result::unmatched; - } - } - - // Leading '0' or "-0" - if (state == match_number_state::leading_zero) - { - switch (current()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - // NOTE: This behavior is incorrect according to strict JSON parsing. However, this library matches the - // input as a number in case `parse_options::numbers::decimal` is used. - ++length; - state = match_number_state::integer; - break; - case '.': - ++length; - state = match_number_state::decimal; - break; - case 'e': - case 'E': - ++length; - state = match_number_state::exponent; - break; - default: - state = match_number_state::complete; - break; - } - } - - // Have only seen integer values - while (state == match_number_state::integer) - { - switch (current()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - ++length; - break; - case '.': - ++length; - state = match_number_state::decimal; - break; - case 'e': - case 'E': - ++length; - state = match_number_state::exponent; - break; - default: - state = match_number_state::complete; - break; - } - } - - // Just saw a '.' - if (state == match_number_state::decimal) - { - switch (current()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - ++length; - break; - default: - return match_result::unmatched; - } - - while (state == match_number_state::decimal) - { - switch (current()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - ++length; - break; - case 'e': - case 'E': - ++length; - state = match_number_state::exponent; - break; - default: - state = match_number_state::complete; - break; - } - } - } - - // Just saw 'e' or 'E' - if (state == match_number_state::exponent) - { - switch (current()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - ++length; - break; - case '+': - case '-': - ++length; - state = match_number_state::exponent_sign; - break; - default: - return match_result::unmatched; - } - } - - // Just saw "e-", "e+", "E-", or "E+" - if (state == match_number_state::exponent_sign) - { - switch (current()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - ++length; - state = match_number_state::exponent; - break; - default: - return match_result::unmatched; - } - } - - while (state == match_number_state::exponent) - { - switch (current()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - ++length; - break; - default: - state = match_number_state::complete; - break; - } - } - - assert(state == match_number_state::complete); - return match_result::complete; -} - -static match_result match_string(const char* begin, const char* end, token_kind& kind, std::size_t& length) -{ - assert(*begin == '\"'); - - kind = token_kind::string; - length = 1; - - while (true) - { - if (begin + length == end) - return match_result::unmatched; - - if (begin[length] == '\"') - { - ++length; - return match_result::complete; - } - else if (begin[length] == '\\') - { - if (begin + length + 1 == end) - return match_result::unmatched; - else - length += 2; - } - else - { - ++length; - } - } -} - -static match_result match_simple_string(const char* begin, const char* end, token_kind& kind, std::size_t& length) -{ - kind = token_kind::string; - length = 0U; - auto max_length = std::size_t(end - begin); - - auto current = [&] () - { - if (length < max_length) - return begin[length]; - else - return '\0'; - }; - - // R"(^[a-zA-Z_$][a-zA-Z0-9_$]*)" - char c = current(); - if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_') || (c == '$')) - ++length; - else - return match_result::unmatched; - - while (true) - { - c = current(); - if (c == '\0') - return match_result::complete; - else if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_') || (c == '$') || ('0' <= c && c <= '9')) - ++length; - else - return match_result::complete; - } -} - -static match_result match_whitespace(const char* begin, const char* end, token_kind& kind, std::size_t& length) -{ - kind = token_kind::whitespace; - for (length = 0; begin != end; ++length, ++begin) - { - switch (*begin) - { - case ' ': - case '\t': - case '\r': - case '\n': - continue; - default: - return match_result::complete; - } - } - return match_result::complete; -} - -static match_result match_comment(const char* begin, const char* end, token_kind& kind, std::size_t& length) -{ - assert(*begin == '/'); - - kind = token_kind::comment; - if (std::distance(begin, end) == 1) - { - length = 1; - return match_result::unmatched; - } - else if (begin[1] == '*') - { - bool saw_asterisk = false; - for (length = 2, begin += 2; begin != end; ++length, ++begin) - { - if (*begin == '*') - { - saw_asterisk = true; - } - else if (saw_asterisk && *begin == '/') - { - ++length; - return match_result::complete; - } - else - { - saw_asterisk = false; - } - } - return match_result::unmatched; - } - else - { - length = 1; - return match_result::unmatched; - } - -} - -match_result attempt_match(const char* begin, const char* end, token_kind& kind, std::size_t& length) -{ - auto result = [&] (match_result r, token_kind kind_, std::size_t length_) - { - kind = kind_; - length = length_; - return r; - }; - - if (begin == end) - { - return result(match_result::unmatched, token_kind::unknown, 0); - } - - switch (*begin) - { - case '[': return result(match_result::complete, token_kind::array_begin, 1); - case ']': return result(match_result::complete, token_kind::array_end, 1); - case '{': return result(match_result::complete, token_kind::object_begin, 1); - case '}': return result(match_result::complete, token_kind::object_end, 1); - case ':': return result(match_result::complete, token_kind::object_key_delimiter, 1); - case ',': return result(match_result::complete, token_kind::separator, 1); - case 't': return match_true( begin, end, kind, length); - case 'f': return match_false(begin, end, kind, length); - case 'n': return match_null( begin, end, kind, length); - case '-': - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - return match_number(begin, end, kind, length); - case '\"': - return match_string(begin, end, kind, length); - case ' ': - case '\t': - case '\n': - case '\r': - return match_whitespace(begin, end, kind, length); - case '/': - return match_comment(begin, end, kind, length); - default: - return result(match_result::unmatched, token_kind::unknown, 1); - } -} - -path_match_result path_match(string_view input, string_view& match_contents) -{ - if (input.length() < 2) - return path_match_result::invalid; - - match_result result; - token_kind kind; - std::size_t length; - - switch (input.at(0)) - { - case '.': - result = match_simple_string(input.data() + 1, input.data() + input.size(), kind, length); - if (result == match_result::complete) - { - match_contents = input.substr(0, length + 1); - return path_match_result::simple_object; - } - else - { - return path_match_result::invalid; - } - case '[': - result = attempt_match(input.data() + 1, input.data() + input.length(), kind, length); - if (result == match_result::complete) - { - if (input.length() == length + 1 || input.at(1 + length) != ']') - return path_match_result::invalid; - if (kind != token_kind::string && kind != token_kind::number) - return path_match_result::invalid; - - match_contents = input.substr(0, length + 2); - return path_match_result::brace; - } - else - { - return path_match_result::invalid; - } - default: - return path_match_result::invalid; - } -} - -} -} diff --git a/src/jsonv/detail/token_patterns.hpp b/src/jsonv/detail/token_patterns.hpp deleted file mode 100644 index e9fa6f5..0000000 --- a/src/jsonv/detail/token_patterns.hpp +++ /dev/null @@ -1,64 +0,0 @@ -/** \file jsonv/detail/token_patterns.hpp - * Pattern matching for JSON tokens. - * - * Copyright (c) 2014 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ -#ifndef __JSONV_DETAIL_TOKEN_PATTERNS_HPP_INCLUDED__ -#define __JSONV_DETAIL_TOKEN_PATTERNS_HPP_INCLUDED__ - -#include -#include -#include - -namespace jsonv -{ -namespace detail -{ - -/** The result of a match. **/ -enum class match_result : bool -{ - complete = true, - unmatched = false, -}; - -/** Attempt to match the given sequence. - * - * \param begin The beginning of the sequence to attempt to match. - * \param end The end of the sequence to attempt to match. - * \param[out] kind The kind of token matched. - * \param[out] length The length of the match, if found. - * \returns the result of the match. -**/ -match_result attempt_match(const char* begin, - const char* end, - token_kind& kind, - std::size_t& length - ); - -enum class path_match_result : char -{ - simple_object = '.', - brace = '[', - invalid = '\x00', -}; - -/** Attempt to match a path. - * - * \param input The input to match - * \param[out] match_contents The full contents of a match -**/ -path_match_result path_match(string_view input, - string_view& match_contents - ); - -} -} - -#endif/*__JSONV_DETAIL_TOKEN_PATTERNS_HPP_INCLUDED__*/ diff --git a/src/jsonv/kind.cpp b/src/jsonv/kind.cpp new file mode 100644 index 0000000..277dddb --- /dev/null +++ b/src/jsonv/kind.cpp @@ -0,0 +1,34 @@ +#include + +#include + +namespace jsonv +{ + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// kind // +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +std::ostream& operator<<(std::ostream& os, const kind& k) +{ + switch (k) + { + case jsonv::kind::array: return os << "array"; + case jsonv::kind::boolean: return os << "boolean"; + case jsonv::kind::decimal: return os << "decimal"; + case jsonv::kind::integer: return os << "integer"; + case jsonv::kind::null: return os << "null"; + case jsonv::kind::object: return os << "object"; + case jsonv::kind::string: return os << "string"; + default: return os << "kind(" << +static_cast(k) << ")"; + } +} + +std::string to_string(const kind& k) +{ + std::ostringstream ss; + ss << k; + return ss.str(); +} + +} diff --git a/src/jsonv/parse.cpp b/src/jsonv/parse.cpp index 6425fe7..4d860a9 100644 --- a/src/jsonv/parse.cpp +++ b/src/jsonv/parse.cpp @@ -1,18 +1,16 @@ -/** \file - * - * Copyright (c) 2012-2018 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ +/// \file +/// +/// Copyright (c) 2012-2020 by Travis Gockel. All rights reserved. +/// +/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License +/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later +/// version. +/// +/// \author Travis Gockel (travis@gockelhut.com) #include #include #include #include -#include #include "char_convert.hpp" @@ -74,7 +72,7 @@ static std::string parse_error_what(const parse_error::problem_list& problems) first = false; else stream << std::endl; - + stream << p; } return stream.str(); @@ -237,570 +235,14 @@ parse_options& parse_options::comments(bool val) return *this; } -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// parsing internals // -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -namespace detail -{ - -struct JSONV_LOCAL parse_context -{ - using size_type = std::size_t; - - tokenizer& input; - parse_options options; - string_decode_fn string_decode; - - size_type line; - size_type column; - size_type character; - - bool successful; - jsonv::parse_error::problem_list problems; - bool complete; - - explicit parse_context(const parse_options& options, tokenizer& input) : - input(input), - options(options), - string_decode(get_string_decoder(options.string_encoding())), - line(0), - column(1), - character(0), - successful(true), - problems(), - complete(false) - { } - - parse_context(const parse_context&) = delete; - parse_context& operator=(const parse_context&) = delete; - - bool next() - { - if (!complete && line != 0) - { - character += current().text.size(); - for (const char c : current().text) - { - if (c == '\n' || c == '\r') - { - ++line; - column = 1; - } - else - { - ++column; - } - } - } - else - { - ++line; - } - - if (input.next()) - { - JSONV_DBG_NEXT("(" << input.current().text << " cxt:" << input.current().kind << ")"); - if (current_kind() == token_kind::whitespace) - { - return next(); - } - else if (current_kind() == token_kind::comment) - { - if (!options.comments()) - parse_error("JSON comment is not allowed"); - return next(); - } - else - { - return true; - } - } - else - { - complete = true; - return false; - } - } - - const tokenizer::token& current() const - { - return input.current(); - } - - const token_kind& current_kind() const - { - return current().kind; - } - - template - void parse_error(T&&... message) - { - std::ostringstream stream; - parse_error_impl(stream, std::forward(message)...); - } - -private: - void parse_error_impl(std::ostringstream& stream) - { - try - { - string_view text = current().text; - stream << ": \"" << text << "\""; - } - catch (const std::logic_error&) - { } - jsonv::parse_error::problem problem(line, column, character, stream.str()); - if (options.failure_mode() == parse_options::on_error::fail_immediately) - { - throw jsonv::parse_error({ problem }, null); - } - else - { - successful = false; - if (problems.size() < options.max_failures()) - problems.emplace_back(std::move(problem)); - } - } - - template - void parse_error_impl(std::ostringstream& stream, T&& current, TRest&&... rest) - { - stream << std::forward(current); - parse_error_impl(stream, std::forward(rest)...); - } -}; - -static bool parse_generic(parse_context& context, value& out, bool advance = true); - -static void check_token(parse_context& context, string_view expected_token) -{ - if (context.current().text != expected_token) - context.parse_error("Failed to match \"", expected_token, "\"" - , "\t", context.current().text.length(), " ", expected_token.length(), "\t", - std::equal(expected_token.begin(), expected_token.end(), context.current().text.begin()) - ); -} - -static bool parse_boolean(parse_context& context, value& out) -{ - assert(context.current_kind() == token_kind::boolean); - switch (context.current().text.at(0)) - { - case 't': - out = true; - check_token(context, "true"); - return true; - case 'f': - out = false; - check_token(context, "false"); - return true; - default: - assert(false); - return false; - } -} - -static bool parse_null(parse_context& context, value& out) -{ - assert(context.current_kind() == token_kind::null); - out = null; - check_token(context, "null"); - return true; -} - -static bool parse_number(parse_context& context, value& out) -{ - JSONV_DBG_STRUCT("#"); - string_view characters = context.current().text; - - if ( context.options.number_encoding() == parse_options::numbers::strict - && characters.size() > 1U - && characters.at(0) == '0' - ) - { - context.parse_error("Numbers cannot start with a leading '0'"); - } - - auto end = const_cast(characters.data() + characters.length()); - - // optimization: a numeric token is "decimal-like" if it has . in it - if (characters.find_first_of(".Ee") != string_view::npos) - { - auto scan_end = end; - auto val = std::strtod(characters.data(), &scan_end); - if (end == scan_end) - { - out = val; - return true; - } - } - else if (characters[0] == '-') - { - auto scan_end = end; - auto val = std::strtoul(characters.data(), &scan_end, 10); - if (end == scan_end) - { - out = val; - return true; - } - } - else - { - // For non-negative integer types, use lexical_cast of a uint64_t then static_cast to an int64_t. This is done - // to deal with the values 2^63..2^64-1 -- do not consider it an exception, as we can store the bits properly, - // but the onus is on the user to know the particular key was in the overflow range. - auto scan_end = end; - auto val = std::strtoull(characters.data(), &scan_end, 10); - if (end == scan_end) - { - out = val; - return true; - } - } - - // Numbers that do not contain decimals or exponents, but are too large might still be representable as a double - { - auto scan_end = end; - auto val = std::strtod(characters.data(), &scan_end); - if (end == scan_end) - { - out = val; - return true; - } - } - - context.parse_error("Could not extract number from \"", characters, "\""); - return true; -} - -static std::string parse_string(parse_context& context) -{ - assert(context.current_kind() == token_kind::string); - - string_view source = context.current().text; - JSONV_DBG_STRUCT(source); - // chop off the ""s - source.remove_prefix(1); - source.remove_suffix(1); - - try - { - return context.string_decode(source); - } - catch (const detail::decode_error& err) - { - context.parse_error("Error decoding string:", err.what()); - // return it un-decoded - return std::string(source); - } -} - -static bool parse_string(parse_context& context, value& out) -{ - out = parse_string(context); - return true; -} - -static bool parse_array(parse_context& context, value& arr) -{ - JSONV_DBG_STRUCT('['); - arr = array(); - bool trailing_comma = false; - - while (true) - { - if (!context.next()) - break; - - value val; - if (context.current_kind() == token_kind::array_end) - { - if (trailing_comma && context.options.comma_policy() != parse_options::commas::allow_trailing) - context.parse_error("Array contained a trailing comma"); - JSONV_DBG_STRUCT(']'); - return true; - } - else if (parse_generic(context, val, false)) - { - JSONV_DBG_STRUCT(val); - arr.push_back(std::move(val)); - trailing_comma = false; - } - else - { - JSONV_DBG_STRUCT("parse error:" << context.current().text << " kind:" << context.current_kind()); - // a parse error, but parse_generic will have complained about it - } - - if (!context.next()) - { - break; - } - - if (context.current_kind() == token_kind::array_end) - { - JSONV_DBG_STRUCT(']'); - return true; - } - else if (context.current_kind() == token_kind::separator) - { - JSONV_DBG_STRUCT(','); - trailing_comma = true; - } - else - { - context.parse_error("Invalid entry when looking for ',' or ']'"); - } - } - context.parse_error("Unexpected end: unmatched '['"); - return false; -} - -static bool parse_object(parse_context& context, value& out) -{ - out = object(); - bool trailing_comma = false; - - while (context.next()) - { - std::string key; - if (context.current_kind() == token_kind::string) - { - key = parse_string(context); - trailing_comma = false; - } - else if (context.current_kind() == token_kind::object_end) - { - if (trailing_comma && context.options.comma_policy() != parse_options::commas::allow_trailing) - context.parse_error("Trailing comma at end of object."); - return true; - } - else - { - context.parse_error("Expecting a key, but found ", context.current_kind()); - // simulate a new key - key = std::string(context.current().text); - } - - if (!context.next()) - { - context.parse_error("Unexpected end: missing ':' for key '", key, "'"); - return false; - } - - if (context.current_kind() != token_kind::object_key_delimiter) - context.parse_error("Invalid key-value delimiter...expecting ':' after key '", key, "'"); - - value val; - if (!parse_generic(context, val)) - { - context.parse_error("Unexpected end: incomplete value for key '", key, "'"); - return false; - } - - auto iter = out.find(key); - if (iter == out.end_object()) - { - out.insert({ std::move(key), std::move(val) }); - } - else - { - context.parse_error("Duplicate entries for key '", key, "'. ", - "Updating old value ", iter->second, " with new value ", val, "." - ); - iter->second = std::move(val); - } - - if (!context.next()) - break; - - if (context.current_kind() == token_kind::object_end) - return true; - else if (context.current_kind() == token_kind::separator) - trailing_comma = true; - else - context.parse_error("Invalid token while searching for next value in object."); - } - - context.parse_error("Unexpected end inside of object."); - return false; -} - -/** This function skips over anything that isn't one of the "separator" characters. It is intended to make parse errors - * a little more reasonable. -**/ -static bool forward_to_separator(parse_context& context) -{ - while (context.next()) switch (context.current().kind) - { - case token_kind::boolean: - case token_kind::null: - case token_kind::number: - case token_kind::string: - case token_kind::parse_error_indicator: - case token_kind::unknown: - continue; - case token_kind::array_begin: - case token_kind::object_begin: - case token_kind::comment: - case token_kind::object_key_delimiter: - case token_kind::array_end: - case token_kind::object_end: - case token_kind::separator: - case token_kind::whitespace: - return true; - } - - return false; -} - -static bool parse_generic(parse_context& context, value& out, bool advance) -{ - if (advance && !context.next()) - return false; - - switch (context.current().kind) - { - case token_kind::array_begin: - return parse_array(context, out); - case token_kind::boolean: - return parse_boolean(context, out); - case token_kind::null: - return parse_null(context, out); - case token_kind::number: - return parse_number(context, out); - case token_kind::object_begin: - return parse_object(context, out); - case token_kind::string: - return parse_string(context, out); - case token_kind::comment: - case token_kind::whitespace: - // ignore - return parse_generic(context, out); - case token_kind::unknown: - case token_kind::array_end: - case token_kind::object_end: - case token_kind::object_key_delimiter: - case token_kind::separator: - case token_kind::parse_error_indicator: - default: - context.parse_error("Encountered invalid token ", context.current().kind, ": \"", context.current().text, "\""); - return forward_to_separator(context); - } -} - -class JSONV_LOCAL depth_checker : - private encoder -{ -public: - explicit depth_checker(parse_context& context) : - _context(context), - _current_depth(0) - { } - - void check(const value& val) - { - encode(val); - } - -private: - virtual void write_null() override { } - virtual void write_object_key(string_view) override { } - virtual void write_object_delimiter() override { } - virtual void write_array_delimiter() override { } - virtual void write_string(string_view) override { } - virtual void write_integer(std::int64_t) override { } - virtual void write_decimal(double) override { } - virtual void write_boolean(bool) override { } - - virtual void write_object_begin() override { increase_depth(); } - virtual void write_array_begin() override { increase_depth(); } - virtual void write_object_end() override { --_current_depth; } - virtual void write_array_end() override { --_current_depth; } - -private: - void increase_depth() - { - if (++_current_depth == _context.options.max_structure_depth()) - { - _context.parse_error("Structure depth reached maximum of ", _current_depth); - } - } - -private: - parse_context& _context; - parse_options::size_type _current_depth; -}; - -} - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // parse functions // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -static value post_parse(detail::parse_context& context, value&& out_) -{ - // allow RVO - value out(std::move(out_)); - if (context.successful && context.options.complete_parse()) - { - while (context.next()) - { - if ( context.current_kind() != token_kind::whitespace - && context.current_kind() != token_kind::comment - && context.current_kind() != token_kind::unknown - ) - { - // At the end of input, we might have a few nulls -- this is expected for string literals, so ignore - // them. - string_view current_text = context.current().text; - if (std::any_of(current_text.begin(), current_text.end(), [] (char c) { return c != '\0'; })) - context.parse_error("Found non-trivial data after final token. ", context.current_kind()); - } - } - } - - if (context.successful && context.options.require_document()) - { - if (out.kind() != kind::array && out.kind() != kind::object) - { - context.parse_error("JSON requires the root of a payload to be an array or object, not ", out.kind()); - } - } - - if (context.options.max_structure_depth() > 0) - { - detail::depth_checker depth_checker(context); - depth_checker.check(out); - } - - if (context.successful || context.options.failure_mode() == parse_options::on_error::ignore) - return out; - else - throw parse_error(context.problems, out); -} - -value parse(tokenizer& input, const parse_options& options) -{ - detail::parse_context context(options, input); - value out; - if (!detail::parse_generic(context, out)) - context.parse_error("No input"); - - return post_parse(context, std::move(out)); -} - value parse(std::istream& input, const parse_options& options) { - tokenizer tokens(input); - return parse(tokens, options); -} - -value parse(const string_view& input, const parse_options& options) -{ - tokenizer tokens(input); - return parse(tokens, options); + auto text = std::string(std::istreambuf_iterator(input), std::istreambuf_iterator()); + return parse(text, options); } value parse(const char* begin, const char* end, const parse_options& options) diff --git a/src/jsonv/path.cpp b/src/jsonv/path.cpp index 8617418..d96b1a8 100644 --- a/src/jsonv/path.cpp +++ b/src/jsonv/path.cpp @@ -11,8 +11,10 @@ #include #include #include +#include +#include #include -#include +#include #include #include @@ -238,6 +240,116 @@ std::string to_string(const path_element& val) return os.str(); } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// path Parsing Details // +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace detail +{ + +enum class path_match_result : char +{ + simple_object = '.', + brace = '[', + invalid = '\x00', +}; + +static optional match_simple_string(const char* begin, const char* end) +{ + auto length = std::size_t(0U); + auto max_length = std::size_t(end - begin); + + auto current = [&] () + { + if (length < max_length) + return begin[length]; + else + return '\0'; + }; + + // R"(^[a-zA-Z_$][a-zA-Z0-9_$]*)" + char c = current(); + if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_') || (c == '$')) + ++length; + else + return nullopt; + + while (true) + { + c = current(); + if (c == '\0') + return string_view(begin, length); + else if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_') || (c == '$') || ('0' <= c && c <= '9')) + ++length; + else + return string_view(begin, length); + } +} + +/// Attempt to match a path. +/// +/// \param input The input to match +/// \param[out] match_contents The full contents of a match +static path_match_result path_match(string_view input, string_view& match_contents) +{ + if (input.length() < 2U) + return path_match_result::invalid; + + switch (input.at(0)) + { + case '.': + if (auto result = match_simple_string(input.data() + 1, input.data() + input.size())) + { + match_contents = input.substr(0, result->length() + 1); + return path_match_result::simple_object; + } + else + { + return path_match_result::invalid; + } + case '[': + if (input.size() < 2U) + return path_match_result::invalid; + + if (input[1] == '\"') + { + if (auto result = match_string(input.data() + 1, input.data() + input.size())) + { + if (input.length() == result.length + 1U || input.at(1 + result.length) != ']') + return path_match_result::invalid; + match_contents = input.substr(0, result.length + 2U); + return path_match_result::brace; + } + else + { + return path_match_result::invalid; + } + } + else if (input[1] >= '0' && input[1] <= '9') + { + if (auto result = match_number(input.data() + 1, input.data() + input.size()); result && !result.decimal) + { + if (input.length() == result.length + 1U || input.at(1 + result.length) != ']') + return path_match_result::invalid; + match_contents = input.substr(0, result.length + 2U); + return path_match_result::brace; + } + else + { + return path_match_result::invalid; + } + } + else + { + return path_match_result::invalid; + } + default: + return path_match_result::invalid; + } +} + +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // path // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -289,8 +401,8 @@ path path::create(string_view specification) switch (detail::path_match(remaining, match)) { case detail::path_match_result::simple_object: - out += match.substr(1); - break; + out += match.substr(1); + break; case detail::path_match_result::brace: if (match.at(1) == '\"') out += detail::get_string_decoder(parse_options::encoding::utf8)(match.substr(2, match.size() - 4)); diff --git a/src/jsonv/tokenizer.cpp b/src/jsonv/tokenizer.cpp deleted file mode 100644 index 4d7b1dc..0000000 --- a/src/jsonv/tokenizer.cpp +++ /dev/null @@ -1,189 +0,0 @@ -/** \file - * - * Copyright (c) 2014-2018 by Travis Gockel. All rights reserved. - * - * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License - * as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later - * version. - * - * \author Travis Gockel (travis@gockelhut.com) -**/ -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace jsonv -{ - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// token_kind // -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -std::ostream& operator<<(std::ostream& os, const token_kind& value) -{ - static constexpr token_kind all_valid_tokens = token_kind(0x0fff); - static constexpr token_kind non_error_tokens = token_kind(0xffff); - - switch (value) - { - case token_kind::unknown: return os << "unknown"; - case token_kind::array_begin: return os << '['; - case token_kind::array_end: return os << ']'; - case token_kind::boolean: return os << "boolean"; - case token_kind::null: return os << "null"; - case token_kind::number: return os << "number"; - case token_kind::separator: return os << ','; - case token_kind::string: return os << "string"; - case token_kind::object_begin: return os << '{'; - case token_kind::object_key_delimiter: return os << ':'; - case token_kind::object_end: return os << '}'; - case token_kind::whitespace: return os << "whitespace"; - case token_kind::comment: return os << "comment"; - case token_kind::parse_error_indicator: - default: - // if the value represents a parse error... - if ((value & token_kind::parse_error_indicator) == token_kind::parse_error_indicator) - { - return os << "parse_error(" - << (value & all_valid_tokens) - << ')'; - } - // not a parse error - else - { - token_kind post = value & non_error_tokens; - for (token_kind scan_token = static_cast(1); - bool(post); - scan_token = token_kind(static_cast(scan_token) << 1) - ) - { - if (bool(scan_token & post)) - { - if (bool(scan_token & all_valid_tokens)) - os << scan_token; - else - os << std::hex << "0x" << std::setfill('0') << std::setw(4) - << static_cast(scan_token) - << std::dec << std::setfill(' '); - - post = post & ~scan_token; - if (bool(post)) - os << '|'; - } - } - return os; - } - } -} - -std::string to_string(const token_kind& value) -{ - std::ostringstream ss; - ss << value; - return ss.str(); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// tokenizer // -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -static tokenizer::size_type& min_buffer_size_ref() -{ - static tokenizer::size_type instance = 1024 * sizeof(void*); - return instance; -} - -tokenizer::size_type tokenizer::min_buffer_size() -{ - return min_buffer_size_ref(); -} - -void tokenizer::set_min_buffer_size(tokenizer::size_type sz) -{ - min_buffer_size_ref() = std::max(sz, tokenizer::size_type(1)); -} - -tokenizer::tokenizer(string_view input) : - _input(input), - _position(_input.data()) -{ } - -tokenizer::tokenizer(std::shared_ptr input) : - _input(*input), - _position(_input.data()), - _track(std::move(input)) -{ } - -static std::string load_single_string(std::istream& input) -{ - std::string out; - char buffer[16 * 1024]; - - while (input.read(buffer, sizeof buffer)) - { - out.append(buffer, sizeof buffer); - } - out.append(buffer, input.gcount()); - - return out; -} - -tokenizer::tokenizer(std::istream& input) : - tokenizer(std::make_shared(load_single_string(input))) -{ } - -tokenizer::~tokenizer() noexcept -{ } - -const string_view& tokenizer::input() const -{ - return _input; -} - -const tokenizer::token& tokenizer::current() const -{ - if (_current.text.data()) - return _current; - else - throw std::logic_error("Cannot get token -- call next() and make sure it returns true."); -} - -bool tokenizer::next() -{ - auto valid = [this] (const string_view& new_current, token_kind new_kind) - { - _current.text = new_current; - _current.kind = new_kind; - return true; - }; - - if (!_current.text.empty()) - _position += _current.text.size(); - - while (_position < _input.end()) - { - token_kind kind; - size_type match_len; - auto result = detail::attempt_match(_position, _input.end(), *&kind, *&match_len); - - if (result == detail::match_result::unmatched) - { - // unmatched entry -- this token is invalid - kind = kind | token_kind::parse_error_indicator; - } - return valid(string_view(_position, match_len), kind); - } - - return false; -} - -void tokenizer::buffer_reserve(size_type) -{ } - -} diff --git a/src/jsonv/value.cpp b/src/jsonv/value.cpp index 8398ded..8177a95 100644 --- a/src/jsonv/value.cpp +++ b/src/jsonv/value.cpp @@ -1,5 +1,5 @@ /** \file - * + * * Copyright (c) 2012-2015 by Travis Gockel. All rights reserved. * * This program is free software: you can redistribute it and/or modify it under the terms of the Apache License @@ -28,32 +28,6 @@ namespace jsonv { -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// kind // -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -std::ostream& operator<<(std::ostream& os, const kind& k) -{ - switch (k) - { - case jsonv::kind::array: return os << "array"; - case jsonv::kind::boolean: return os << "boolean"; - case jsonv::kind::decimal: return os << "decimal"; - case jsonv::kind::integer: return os << "integer"; - case jsonv::kind::null: return os << "null"; - case jsonv::kind::object: return os << "object"; - case jsonv::kind::string: return os << "string"; - default: return os << "kind(" << static_cast(k) << ")"; - } -} - -std::string to_string(const kind& k) -{ - std::ostringstream ss; - ss << k; - return ss.str(); -} - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // kind_error // //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -117,7 +91,7 @@ value::value(bool val) : #define JSONV_VALUE_INTEGER_ALTERNATIVE_CTOR_GENERATOR(type_) \ value::value(type_ val) : \ - _kind(jsonv::kind::integer) \ + _kind(jsonv::kind::integer) \ { \ _data.integer = val; \ } @@ -176,13 +150,13 @@ value& value::operator=(value&& source) noexcept if (this != &source) { clear(); - + _data = source._data; _kind = source._kind; source._data.object = 0; source._kind = jsonv::kind::null; } - + return *this; } @@ -230,7 +204,7 @@ TValueRef walk_path(TValueRef&& current, { if (first == last) return current; - + const path_element& elem = *first; switch (elem.kind()) { @@ -333,7 +307,7 @@ value& value::path(size_type path_idx) value::size_type value::count_path(const jsonv::path& p) const { - // TODO: Performance of this function sucks! + // TODO(#148): Performance of this function sucks! try { at_path(p); @@ -362,7 +336,7 @@ value::size_type value::count_path(size_type p) const void value::swap(value& other) noexcept { using std::swap; - + // All types of this union a trivially swappable swap(_data, other._data); swap(_kind, other._kind); @@ -388,7 +362,7 @@ void value::clear() // do nothing break; } - + _kind = jsonv::kind::null; _data.object = 0; } @@ -443,7 +417,7 @@ bool value::operator !=(const value& other) const // must be first: an invalid type is not equal to itself if (!kind_valid(kind())) return true; - + if (this == &other) return false; else @@ -453,7 +427,7 @@ bool value::operator !=(const value& other) const int value::compare(const value& other) const { using jsonv::compare; - + return compare(*this, other); } @@ -516,7 +490,7 @@ bool value::empty() const noexcept value::size_type value::size() const { check_type({ jsonv::kind::object, jsonv::kind::array, jsonv::kind::string }, kind()); - + switch (kind()) { case jsonv::kind::object: @@ -565,14 +539,14 @@ static std::size_t hash_range(TForwardIterator first, TForwardIterator last, con std::size_t x = 0; for ( ; first != last; ++first) x = (x << 1) ^ hasher(*first); - + return x; } size_t hash::operator()(const jsonv::value& val) const noexcept { using namespace jsonv; - + switch (val.kind()) { case jsonv::kind::object: