be/src/util/json/simd_json_parser.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/JSONParsers/SimdJSONParser.h |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <rapidjson/document.h> |
24 | | #include <simdjson.h> |
25 | | |
26 | | #include "core/types.h" |
27 | | |
28 | | namespace doris { |
29 | | |
30 | | /// This class can be used as an argument for the template class FunctionJSON. |
31 | | /// It provides ability to parse JSONs using simdjson library. |
32 | | class SimdJSONParser { |
33 | | public: |
34 | | class Array; |
35 | | class Object; |
36 | | /// References an element in a JSON document, representing a JSON null, boolean, string, number, |
37 | | /// array or object. |
38 | | class Element { |
39 | | public: |
40 | 1.35M | ALWAYS_INLINE Element() {} /// NOLINT |
41 | | ALWAYS_INLINE Element(const simdjson::dom::element& element_) |
42 | 25.0M | : element(element_) {} /// NOLINT |
43 | 19.6M | ALWAYS_INLINE bool isInt64() const { |
44 | 19.6M | return element.type() == simdjson::dom::element_type::INT64; |
45 | 19.6M | } |
46 | 14.5M | ALWAYS_INLINE bool isUInt64() const { |
47 | 14.5M | return element.type() == simdjson::dom::element_type::UINT64; |
48 | 14.5M | } |
49 | 14.5M | ALWAYS_INLINE bool isDouble() const { |
50 | 14.5M | return element.type() == simdjson::dom::element_type::DOUBLE; |
51 | 14.5M | } |
52 | 11.7M | ALWAYS_INLINE bool isString() const { |
53 | 11.7M | return element.type() == simdjson::dom::element_type::STRING; |
54 | 11.7M | } |
55 | 24.8M | ALWAYS_INLINE bool isArray() const { |
56 | 24.8M | return element.type() == simdjson::dom::element_type::ARRAY; |
57 | 24.8M | } |
58 | 25.7M | ALWAYS_INLINE bool isObject() const { |
59 | 25.7M | return element.type() == simdjson::dom::element_type::OBJECT; |
60 | 25.7M | } |
61 | 19.9M | ALWAYS_INLINE bool isBool() const { |
62 | 19.9M | return element.type() == simdjson::dom::element_type::BOOLEAN; |
63 | 19.9M | } |
64 | 110k | ALWAYS_INLINE bool isNull() const { |
65 | 110k | return element.type() == simdjson::dom::element_type::NULL_VALUE; |
66 | 110k | } |
67 | 5.31M | ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().value_unsafe(); } |
68 | 2.89M | ALWAYS_INLINE double getDouble() const { return element.get_double().value_unsafe(); } |
69 | 358k | ALWAYS_INLINE bool getBool() const { return element.get_bool().value_unsafe(); } |
70 | 11.6M | ALWAYS_INLINE std::string_view getString() const { |
71 | 11.6M | return element.get_string().value_unsafe(); |
72 | 11.6M | } |
73 | 29 | ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().value_unsafe(); } |
74 | | ALWAYS_INLINE Array getArray() const; |
75 | | ALWAYS_INLINE Object getObject() const; |
76 | | |
77 | | private: |
78 | | simdjson::dom::element element; |
79 | | }; |
80 | | /// References an array in a JSON document. |
81 | | class Array { |
82 | | public: |
83 | | class Iterator { |
84 | | public: |
85 | | ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator& it_) |
86 | 6.60M | : it(it_) {} /// NOLINT |
87 | 3.56M | ALWAYS_INLINE Element operator*() const { return *it; } |
88 | 3.57M | ALWAYS_INLINE Iterator& operator++() { |
89 | 3.57M | ++it; |
90 | 3.57M | return *this; |
91 | 3.57M | } |
92 | 5.08M | ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) { |
93 | 5.08M | return left.it != right.it; |
94 | 5.08M | } |
95 | | |
96 | | private: |
97 | | simdjson::dom::array::iterator it; |
98 | | }; |
99 | 1.52M | ALWAYS_INLINE Array(const simdjson::dom::array& array_) : array(array_) {} /// NOLINT |
100 | 1.52M | ALWAYS_INLINE Iterator begin() const { return array.begin(); } |
101 | 5.08M | ALWAYS_INLINE Iterator end() const { return array.end(); } |
102 | 635k | ALWAYS_INLINE size_t size() const { return array.size(); } |
103 | 0 | ALWAYS_INLINE Element operator[](size_t index) const { |
104 | 0 | assert(index < size()); |
105 | 0 | return array.at(index).value_unsafe(); |
106 | 0 | } |
107 | | |
108 | | private: |
109 | | simdjson::dom::array array; |
110 | | }; |
111 | | using KeyValuePair = std::pair<std::string_view, Element>; |
112 | | /// References an object in a JSON document. |
113 | | class Object { |
114 | | public: |
115 | | class Iterator { |
116 | | public: |
117 | | ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator& it_) |
118 | 24.5M | : it(it_) {} /// NOLINT |
119 | 20.2M | ALWAYS_INLINE KeyValuePair operator*() const { |
120 | 20.2M | const auto& res = *it; |
121 | 20.2M | return {res.key, res.value}; |
122 | 20.2M | } |
123 | 20.2M | ALWAYS_INLINE Iterator& operator++() { |
124 | 20.2M | ++it; |
125 | 20.2M | return *this; |
126 | 20.2M | } |
127 | 0 | ALWAYS_INLINE Iterator operator++(int) { |
128 | 0 | auto res = *this; |
129 | 0 | ++it; |
130 | 0 | return res; |
131 | 0 | } /// NOLINT |
132 | 22.3M | ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) { |
133 | 22.3M | return left.it != right.it; |
134 | 22.3M | } |
135 | 0 | ALWAYS_INLINE friend bool operator==(const Iterator& left, const Iterator& right) { |
136 | 0 | return !(left != right); |
137 | 0 | } |
138 | | |
139 | | private: |
140 | | simdjson::dom::object::iterator it; |
141 | | }; |
142 | 2.19M | ALWAYS_INLINE Object(const simdjson::dom::object& object_) : object(object_) {} /// NOLINT |
143 | 2.19M | ALWAYS_INLINE Iterator begin() const { return object.begin(); } |
144 | 22.4M | ALWAYS_INLINE Iterator end() const { return object.end(); } |
145 | 4.06M | ALWAYS_INLINE size_t size() const { return object.size(); } |
146 | | /// Optional: Provides access to an object's element by index. |
147 | 0 | KeyValuePair operator[](size_t index) const { |
148 | 0 | assert(index < size()); |
149 | 0 | auto it = object.begin(); |
150 | 0 | while (index--) { |
151 | 0 | ++it; |
152 | 0 | } |
153 | 0 | const auto& res = *it; |
154 | 0 | return {res.key, res.value}; |
155 | 0 | } |
156 | | |
157 | | private: |
158 | | simdjson::dom::object object; |
159 | | }; |
160 | | /// Parses a JSON document, returns the reference to its root element if succeeded. |
161 | 1.35M | bool parse(const char* data, size_t size, Element& result) { |
162 | 1.35M | auto document = parser.parse(data, size); |
163 | 1.35M | if (document.error()) { |
164 | 666 | return false; |
165 | 666 | } |
166 | 1.35M | result = document.value_unsafe(); |
167 | 1.35M | return true; |
168 | 1.35M | } |
169 | | |
170 | | private: |
171 | | simdjson::dom::parser parser; |
172 | | }; |
173 | 1.52M | inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const { |
174 | 1.52M | return element.get_array().value_unsafe(); |
175 | 1.52M | } |
176 | 2.19M | inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const { |
177 | 2.19M | return element.get_object().value_unsafe(); |
178 | 2.19M | } |
179 | | |
180 | | } // namespace doris |