be/src/util/json/simd_json_parser.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/JSONParsers/SimdJSONParser.h |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <simdjson.h> |
24 | | |
25 | | #include <cassert> |
26 | | #include <string> |
27 | | #include <string_view> |
28 | | #include <type_traits> |
29 | | #include <utility> |
30 | | #include <variant> |
31 | | #include <vector> |
32 | | |
33 | | #include "core/types.h" |
34 | | #include "util/string_parser.hpp" |
35 | | |
36 | | namespace doris { |
37 | | |
38 | | /// This class can be used as an argument for the template class FunctionJSON. |
39 | | /// It provides ability to parse JSONs using simdjson library. |
40 | | class SimdJSONParser { |
41 | | struct Node { |
42 | | enum class Type { |
43 | | INT64, |
44 | | UINT64, |
45 | | INT128, |
46 | | DOUBLE, |
47 | | STRING, |
48 | | ARRAY, |
49 | | OBJECT, |
50 | | BOOL, |
51 | | NULL_VALUE, |
52 | | }; |
53 | | |
54 | | Type type = Type::NULL_VALUE; |
55 | | Int64 int64_value = 0; |
56 | | UInt64 uint64_value = 0; |
57 | | Int128 int128_value = 0; |
58 | | double double_value = 0; |
59 | | bool bool_value = false; |
60 | | std::string string_value; |
61 | | std::string raw_number; |
62 | | std::vector<Node> array_values; |
63 | | std::vector<std::string> object_keys; |
64 | | std::vector<Node> object_values; |
65 | | }; |
66 | | |
67 | | public: |
68 | | class Array; |
69 | | class Object; |
70 | | /// References an element in a JSON document, representing a JSON null, boolean, string, number, |
71 | | /// array or object. |
72 | | class Element { |
73 | | public: |
74 | 80.6k | ALWAYS_INLINE Element() {} /// NOLINT |
75 | | ALWAYS_INLINE explicit Element(const simdjson::dom::element& element_) |
76 | 3.15M | : dom_element(element_) {} /// NOLINT |
77 | 34 | ALWAYS_INLINE explicit Element(const Node* node_) : node(node_) {} /// NOLINT |
78 | 1.86M | ALWAYS_INLINE bool isInt64() const { |
79 | 1.86M | return node ? node->type == Node::Type::INT64 |
80 | 1.86M | : dom_element.type() == simdjson::dom::element_type::INT64; |
81 | 1.86M | } |
82 | 1.06M | ALWAYS_INLINE bool isUInt64() const { |
83 | 1.06M | return node ? node->type == Node::Type::UINT64 |
84 | 1.06M | : dom_element.type() == simdjson::dom::element_type::UINT64; |
85 | 1.06M | } |
86 | 1.06M | ALWAYS_INLINE bool isInt128() const { return node && node->type == Node::Type::INT128; } |
87 | 1.06M | ALWAYS_INLINE bool isDouble() const { |
88 | 1.06M | return node ? node->type == Node::Type::DOUBLE |
89 | 1.06M | : dom_element.type() == simdjson::dom::element_type::DOUBLE; |
90 | 1.06M | } |
91 | 944k | ALWAYS_INLINE bool isString() const { |
92 | 944k | return node ? node->type == Node::Type::STRING |
93 | 944k | : dom_element.type() == simdjson::dom::element_type::STRING; |
94 | 944k | } |
95 | 3.14M | ALWAYS_INLINE bool isArray() const { |
96 | 3.14M | return node ? node->type == Node::Type::ARRAY |
97 | 3.14M | : dom_element.type() == simdjson::dom::element_type::ARRAY; |
98 | 3.14M | } |
99 | 3.21M | ALWAYS_INLINE bool isObject() const { |
100 | 3.21M | return node ? node->type == Node::Type::OBJECT |
101 | 3.21M | : dom_element.type() == simdjson::dom::element_type::OBJECT; |
102 | 3.21M | } |
103 | 1.88M | ALWAYS_INLINE bool isBool() const { |
104 | 1.88M | return node ? node->type == Node::Type::BOOL |
105 | 1.88M | : dom_element.type() == simdjson::dom::element_type::BOOLEAN; |
106 | 1.88M | } |
107 | 12.0k | ALWAYS_INLINE bool isNull() const { |
108 | 12.0k | return node ? node->type == Node::Type::NULL_VALUE |
109 | 12.0k | : dom_element.type() == simdjson::dom::element_type::NULL_VALUE; |
110 | 12.0k | } |
111 | 803k | ALWAYS_INLINE Int64 getInt64() const { |
112 | 803k | return node ? node->int64_value : dom_element.get_int64().value_unsafe(); |
113 | 803k | } |
114 | 116k | ALWAYS_INLINE double getDouble() const { |
115 | 116k | return node ? node->double_value : dom_element.get_double().value_unsafe(); |
116 | 116k | } |
117 | 20.7k | ALWAYS_INLINE bool getBool() const { |
118 | 20.7k | return node ? node->bool_value : dom_element.get_bool().value_unsafe(); |
119 | 20.7k | } |
120 | 932k | ALWAYS_INLINE std::string_view getString() const { |
121 | 932k | return node ? std::string_view(node->string_value) |
122 | 932k | : dom_element.get_string().value_unsafe(); |
123 | 932k | } |
124 | 21 | ALWAYS_INLINE UInt64 getUInt64() const { |
125 | 21 | return node ? node->uint64_value : dom_element.get_uint64().value_unsafe(); |
126 | 21 | } |
127 | 1 | ALWAYS_INLINE Int128 getInt128() const { |
128 | 1 | assert(node != nullptr); |
129 | 1 | return node->int128_value; |
130 | 1 | } |
131 | 15 | ALWAYS_INLINE std::string_view getRawNumber() const { |
132 | 15 | assert(node != nullptr); |
133 | 15 | return node->raw_number; |
134 | 15 | } |
135 | | ALWAYS_INLINE Array getArray() const; |
136 | | ALWAYS_INLINE Object getObject() const; |
137 | | |
138 | | private: |
139 | | simdjson::dom::element dom_element; |
140 | | const Node* node = nullptr; |
141 | | }; |
142 | | /// References an array in a JSON document. |
143 | | class Array { |
144 | | public: |
145 | | class Iterator { |
146 | | public: |
147 | | using DomIterator = simdjson::dom::array::iterator; |
148 | | using NodeIterator = std::vector<Node>::const_iterator; |
149 | 1.96M | ALWAYS_INLINE explicit Iterator(const DomIterator& it_) : it(it_) {} /// NOLINT |
150 | 14 | ALWAYS_INLINE explicit Iterator(NodeIterator it_) : it(it_) {} /// NOLINT |
151 | 1.64M | ALWAYS_INLINE Element operator*() const { |
152 | 1.64M | if (const auto* node_it = std::get_if<NodeIterator>(&it)) { |
153 | 6 | return Element(&**node_it); |
154 | 6 | } |
155 | 1.64M | return Element(*std::get<DomIterator>(it)); |
156 | 1.64M | } |
157 | 1.64M | ALWAYS_INLINE Iterator& operator++() { |
158 | 1.64M | if (auto* node_it = std::get_if<NodeIterator>(&it)) { |
159 | 6 | ++(*node_it); |
160 | 1.64M | } else { |
161 | 1.64M | ++std::get<DomIterator>(it); |
162 | 1.64M | } |
163 | 1.64M | return *this; |
164 | 1.64M | } |
165 | 1.80M | ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) { |
166 | 1.80M | if (const auto* left_node_it = std::get_if<NodeIterator>(&left.it)) { |
167 | 10 | return *left_node_it != std::get<NodeIterator>(right.it); |
168 | 10 | } |
169 | 1.80M | return std::get<DomIterator>(left.it) != std::get<DomIterator>(right.it); |
170 | 1.80M | } |
171 | | |
172 | | private: |
173 | | std::variant<DomIterator, NodeIterator> it; |
174 | | }; |
175 | | ALWAYS_INLINE explicit Array(const simdjson::dom::array& array_) |
176 | 160k | : dom_array(array_) {} /// NOLINT |
177 | 4 | ALWAYS_INLINE explicit Array(const std::vector<Node>* array_) : array(array_) {} /// NOLINT |
178 | 160k | ALWAYS_INLINE Iterator begin() const { |
179 | 160k | return array ? Iterator(array->begin()) : Iterator(dom_array.begin()); |
180 | 160k | } |
181 | 1.80M | ALWAYS_INLINE Iterator end() const { |
182 | 1.80M | return array ? Iterator(array->end()) : Iterator(dom_array.end()); |
183 | 1.80M | } |
184 | 55.1k | ALWAYS_INLINE size_t size() const { return array ? array->size() : dom_array.size(); } |
185 | 0 | ALWAYS_INLINE Element operator[](size_t index) const { |
186 | 0 | assert(index < size()); |
187 | 0 | return array ? Element(&(*array)[index]) : Element(dom_array.at(index).value_unsafe()); |
188 | 0 | } |
189 | | |
190 | | private: |
191 | | simdjson::dom::array dom_array; |
192 | | const std::vector<Node>* array = nullptr; |
193 | | }; |
194 | | using KeyValuePair = std::pair<std::string_view, Element>; |
195 | | /// References an object in a JSON document. |
196 | | class Object { |
197 | | public: |
198 | | class Iterator { |
199 | | public: |
200 | | using DomIterator = simdjson::dom::object::iterator; |
201 | | ALWAYS_INLINE explicit Iterator(const std::vector<std::string>* keys_, |
202 | | const std::vector<Node>* values_, size_t index_) |
203 | 26 | : it(index_), keys(keys_), values(values_) {} /// NOLINT |
204 | 1.71M | ALWAYS_INLINE explicit Iterator(const DomIterator& it_) : it(it_) {} /// NOLINT |
205 | 1.42M | ALWAYS_INLINE KeyValuePair operator*() const { |
206 | 1.42M | if (const auto* index = std::get_if<size_t>(&it)) { |
207 | 14 | return {(*keys)[*index], Element(&(*values)[*index])}; |
208 | 14 | } |
209 | 1.42M | const auto& res = *std::get<DomIterator>(it); |
210 | 1.42M | return {res.key, Element(res.value)}; |
211 | 1.42M | } |
212 | 1.42M | ALWAYS_INLINE Iterator& operator++() { |
213 | 1.42M | if (auto* index = std::get_if<size_t>(&it)) { |
214 | 14 | ++(*index); |
215 | 1.42M | } else { |
216 | 1.42M | ++std::get<DomIterator>(it); |
217 | 1.42M | } |
218 | 1.42M | return *this; |
219 | 1.42M | } |
220 | 0 | ALWAYS_INLINE Iterator operator++(int) { |
221 | 0 | auto res = *this; |
222 | 0 | ++(*this); |
223 | 0 | return res; |
224 | 0 | } /// NOLINT |
225 | 1.57M | ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) { |
226 | 1.57M | if (const auto* left_index = std::get_if<size_t>(&left.it)) { |
227 | 20 | return *left_index != std::get<size_t>(right.it); |
228 | 20 | } |
229 | 1.57M | return std::get<DomIterator>(left.it) != std::get<DomIterator>(right.it); |
230 | 1.57M | } |
231 | 0 | ALWAYS_INLINE friend bool operator==(const Iterator& left, const Iterator& right) { |
232 | 0 | return !(left != right); |
233 | 0 | } |
234 | | |
235 | | private: |
236 | | std::variant<DomIterator, size_t> it; |
237 | | const std::vector<std::string>* keys = nullptr; |
238 | | const std::vector<Node>* values = nullptr; |
239 | | }; |
240 | | ALWAYS_INLINE explicit Object(const simdjson::dom::object& object_) |
241 | 146k | : dom_object(object_) {} /// NOLINT |
242 | | ALWAYS_INLINE explicit Object(const std::vector<std::string>* keys_, |
243 | | const std::vector<Node>* values_) |
244 | 6 | : keys(keys_), values(values_) {} /// NOLINT |
245 | 146k | ALWAYS_INLINE Iterator begin() const { |
246 | 146k | return values ? Iterator(keys, values, 0) : Iterator(dom_object.begin()); |
247 | 146k | } |
248 | 1.57M | ALWAYS_INLINE Iterator end() const { |
249 | 1.57M | return values ? Iterator(keys, values, size()) : Iterator(dom_object.end()); |
250 | 1.57M | } |
251 | 289k | ALWAYS_INLINE size_t size() const { return values ? values->size() : dom_object.size(); } |
252 | | /// Optional: Provides access to an object's element by index. |
253 | 0 | KeyValuePair operator[](size_t index) const { |
254 | 0 | assert(index < size()); |
255 | 0 | if (values) { |
256 | 0 | return {(*keys)[index], Element(&(*values)[index])}; |
257 | 0 | } |
258 | 0 | auto it = dom_object.begin(); |
259 | 0 | while (index--) { |
260 | 0 | ++it; |
261 | 0 | } |
262 | 0 | const auto& res = *it; |
263 | 0 | return {res.key, Element(res.value)}; |
264 | 0 | } |
265 | | |
266 | | private: |
267 | | simdjson::dom::object dom_object; |
268 | | const std::vector<std::string>* keys = nullptr; |
269 | | const std::vector<Node>* values = nullptr; |
270 | | }; |
271 | | /// Parses a JSON document, returns the reference to its root element if succeeded. |
272 | 80.6k | bool parse(const char* data, size_t size, Element& result, bool preserve_raw_numbers = false) { |
273 | 80.6k | if (!preserve_raw_numbers) { |
274 | 80.6k | auto document = dom_parser.parse(data, size); |
275 | 80.6k | if (document.error()) { |
276 | 15 | return false; |
277 | 15 | } |
278 | 80.6k | result = Element(document.value_unsafe()); |
279 | 80.6k | return true; |
280 | 80.6k | } |
281 | | |
282 | 14 | return parse_ondemand(data, size, result); |
283 | 80.6k | } |
284 | | |
285 | | private: |
286 | 14 | bool parse_ondemand(const char* data, size_t size, Element& result) { |
287 | 14 | simdjson::padded_string padded_json(data, size); |
288 | 14 | simdjson::ondemand::document document; |
289 | 14 | auto error = ondemand_parser.iterate(padded_json).get(document); |
290 | 14 | if (error) { |
291 | 0 | return false; |
292 | 0 | } |
293 | 14 | root = Node(); |
294 | 14 | if (!build_node(document, &root)) { |
295 | 0 | return false; |
296 | 0 | } |
297 | 14 | result = Element(&root); |
298 | 14 | return true; |
299 | 14 | } |
300 | | |
301 | 17 | static std::string_view trim_raw_number(std::string_view raw_number) { |
302 | 42 | auto is_space = [](char ch) { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'; }; |
303 | 17 | while (!raw_number.empty() && is_space(raw_number.front())) { |
304 | 0 | raw_number.remove_prefix(1); |
305 | 0 | } |
306 | 25 | while (!raw_number.empty() && is_space(raw_number.back())) { |
307 | 8 | raw_number.remove_suffix(1); |
308 | 8 | } |
309 | 17 | return raw_number; |
310 | 17 | } |
311 | | |
312 | | template <typename RawNumber> |
313 | 17 | static bool assign_raw_number(RawNumber&& raw_number, std::string* out) { |
314 | 17 | if constexpr (std::is_same_v<std::decay_t<RawNumber>, std::string_view>) { |
315 | 12 | *out = std::string(trim_raw_number(raw_number)); |
316 | 12 | return true; |
317 | 12 | } else { |
318 | 5 | std::string_view raw_number_view; |
319 | 5 | auto error = std::move(raw_number).get(raw_number_view); |
320 | 5 | if (error) { |
321 | 0 | return false; |
322 | 0 | } |
323 | 5 | *out = std::string(trim_raw_number(raw_number_view)); |
324 | 5 | return true; |
325 | 5 | } |
326 | 17 | } _ZN5doris14SimdJSONParser17assign_raw_numberISt17basic_string_viewIcSt11char_traitsIcEEEEbOT_PNSt7__cxx1112basic_stringIcS4_SaIcEEE Line | Count | Source | 313 | 12 | static bool assign_raw_number(RawNumber&& raw_number, std::string* out) { | 314 | 12 | if constexpr (std::is_same_v<std::decay_t<RawNumber>, std::string_view>) { | 315 | 12 | *out = std::string(trim_raw_number(raw_number)); | 316 | 12 | return true; | 317 | | } else { | 318 | | std::string_view raw_number_view; | 319 | | auto error = std::move(raw_number).get(raw_number_view); | 320 | | if (error) { | 321 | | return false; | 322 | | } | 323 | | *out = std::string(trim_raw_number(raw_number_view)); | 324 | | return true; | 325 | | } | 326 | 12 | } |
_ZN5doris14SimdJSONParser17assign_raw_numberIN8simdjson15simdjson_resultISt17basic_string_viewIcSt11char_traitsIcEEEEEEbOT_PNSt7__cxx1112basic_stringIcS6_SaIcEEE Line | Count | Source | 313 | 5 | static bool assign_raw_number(RawNumber&& raw_number, std::string* out) { | 314 | | if constexpr (std::is_same_v<std::decay_t<RawNumber>, std::string_view>) { | 315 | | *out = std::string(trim_raw_number(raw_number)); | 316 | | return true; | 317 | 5 | } else { | 318 | 5 | std::string_view raw_number_view; | 319 | 5 | auto error = std::move(raw_number).get(raw_number_view); | 320 | 5 | if (error) { | 321 | 0 | return false; | 322 | 0 | } | 323 | 5 | *out = std::string(trim_raw_number(raw_number_view)); | 324 | 5 | return true; | 325 | 5 | } | 326 | 5 | } |
|
327 | | |
328 | | template <typename Value> |
329 | 2 | bool build_array_node(Value& value, Node* out) { |
330 | 2 | simdjson::ondemand::array array; |
331 | 2 | auto error = value.get_array().get(array); |
332 | 2 | if (error) { |
333 | 0 | return false; |
334 | 0 | } |
335 | 2 | out->type = Node::Type::ARRAY; |
336 | 3 | for (auto element_result : array) { |
337 | 3 | simdjson::ondemand::value element; |
338 | 3 | error = std::move(element_result).get(element); |
339 | 3 | if (error) { |
340 | 0 | return false; |
341 | 0 | } |
342 | 3 | Node element_node; |
343 | 3 | if (!build_node(element, &element_node)) { |
344 | 0 | return false; |
345 | 0 | } |
346 | 3 | out->array_values.push_back(std::move(element_node)); |
347 | 3 | } |
348 | 2 | return true; |
349 | 2 | } Unexecuted instantiation: _ZN5doris14SimdJSONParser16build_array_nodeIN8simdjson8fallback8ondemand8documentEEEbRT_PNS0_4NodeE _ZN5doris14SimdJSONParser16build_array_nodeIN8simdjson8fallback8ondemand5valueEEEbRT_PNS0_4NodeE Line | Count | Source | 329 | 2 | bool build_array_node(Value& value, Node* out) { | 330 | 2 | simdjson::ondemand::array array; | 331 | 2 | auto error = value.get_array().get(array); | 332 | 2 | if (error) { | 333 | 0 | return false; | 334 | 0 | } | 335 | 2 | out->type = Node::Type::ARRAY; | 336 | 3 | for (auto element_result : array) { | 337 | 3 | simdjson::ondemand::value element; | 338 | 3 | error = std::move(element_result).get(element); | 339 | 3 | if (error) { | 340 | 0 | return false; | 341 | 0 | } | 342 | 3 | Node element_node; | 343 | 3 | if (!build_node(element, &element_node)) { | 344 | 0 | return false; | 345 | 0 | } | 346 | 3 | out->array_values.push_back(std::move(element_node)); | 347 | 3 | } | 348 | 2 | return true; | 349 | 2 | } |
|
350 | | |
351 | | template <typename Value> |
352 | 6 | bool build_object_node(Value& value, Node* out) { |
353 | 6 | simdjson::ondemand::object object; |
354 | 6 | auto error = value.get_object().get(object); |
355 | 6 | if (error) { |
356 | 0 | return false; |
357 | 0 | } |
358 | 6 | out->type = Node::Type::OBJECT; |
359 | 14 | for (auto field_result : object) { |
360 | 14 | simdjson::ondemand::field field; |
361 | 14 | error = std::move(field_result).get(field); |
362 | 14 | if (error) { |
363 | 0 | return false; |
364 | 0 | } |
365 | 14 | std::string_view key; |
366 | 14 | error = field.unescaped_key().get(key); |
367 | 14 | if (error) { |
368 | 0 | return false; |
369 | 0 | } |
370 | 14 | std::string key_copy(key); |
371 | 14 | simdjson::ondemand::value field_value = field.value(); |
372 | 14 | Node field_node; |
373 | 14 | if (!build_node(field_value, &field_node)) { |
374 | 0 | return false; |
375 | 0 | } |
376 | 14 | out->object_keys.push_back(std::move(key_copy)); |
377 | 14 | out->object_values.push_back(std::move(field_node)); |
378 | 14 | } |
379 | 6 | return true; |
380 | 6 | } _ZN5doris14SimdJSONParser17build_object_nodeIN8simdjson8fallback8ondemand5valueEEEbRT_PNS0_4NodeE Line | Count | Source | 352 | 1 | bool build_object_node(Value& value, Node* out) { | 353 | 1 | simdjson::ondemand::object object; | 354 | 1 | auto error = value.get_object().get(object); | 355 | 1 | if (error) { | 356 | 0 | return false; | 357 | 0 | } | 358 | 1 | out->type = Node::Type::OBJECT; | 359 | 2 | for (auto field_result : object) { | 360 | 2 | simdjson::ondemand::field field; | 361 | 2 | error = std::move(field_result).get(field); | 362 | 2 | if (error) { | 363 | 0 | return false; | 364 | 0 | } | 365 | 2 | std::string_view key; | 366 | 2 | error = field.unescaped_key().get(key); | 367 | 2 | if (error) { | 368 | 0 | return false; | 369 | 0 | } | 370 | 2 | std::string key_copy(key); | 371 | 2 | simdjson::ondemand::value field_value = field.value(); | 372 | 2 | Node field_node; | 373 | 2 | if (!build_node(field_value, &field_node)) { | 374 | 0 | return false; | 375 | 0 | } | 376 | 2 | out->object_keys.push_back(std::move(key_copy)); | 377 | 2 | out->object_values.push_back(std::move(field_node)); | 378 | 2 | } | 379 | 1 | return true; | 380 | 1 | } |
_ZN5doris14SimdJSONParser17build_object_nodeIN8simdjson8fallback8ondemand8documentEEEbRT_PNS0_4NodeE Line | Count | Source | 352 | 5 | bool build_object_node(Value& value, Node* out) { | 353 | 5 | simdjson::ondemand::object object; | 354 | 5 | auto error = value.get_object().get(object); | 355 | 5 | if (error) { | 356 | 0 | return false; | 357 | 0 | } | 358 | 5 | out->type = Node::Type::OBJECT; | 359 | 12 | for (auto field_result : object) { | 360 | 12 | simdjson::ondemand::field field; | 361 | 12 | error = std::move(field_result).get(field); | 362 | 12 | if (error) { | 363 | 0 | return false; | 364 | 0 | } | 365 | 12 | std::string_view key; | 366 | 12 | error = field.unescaped_key().get(key); | 367 | 12 | if (error) { | 368 | 0 | return false; | 369 | 0 | } | 370 | 12 | std::string key_copy(key); | 371 | 12 | simdjson::ondemand::value field_value = field.value(); | 372 | 12 | Node field_node; | 373 | 12 | if (!build_node(field_value, &field_node)) { | 374 | 0 | return false; | 375 | 0 | } | 376 | 12 | out->object_keys.push_back(std::move(key_copy)); | 377 | 12 | out->object_values.push_back(std::move(field_node)); | 378 | 12 | } | 379 | 5 | return true; | 380 | 5 | } |
|
381 | | |
382 | | template <typename Value> |
383 | 19 | bool build_number_node(Value& value, Node* out) { |
384 | 19 | simdjson::ondemand::number_type number_type; |
385 | 19 | auto error = value.get_number_type().get(number_type); |
386 | 19 | if (error) { |
387 | 0 | return false; |
388 | 0 | } |
389 | 19 | switch (number_type) { |
390 | 0 | case simdjson::ondemand::number_type::signed_integer: |
391 | 0 | out->type = Node::Type::INT64; |
392 | 0 | error = value.get_int64().get(out->int64_value); |
393 | 0 | return !error; |
394 | 2 | case simdjson::ondemand::number_type::unsigned_integer: |
395 | 2 | out->type = Node::Type::UINT64; |
396 | 2 | error = value.get_uint64().get(out->uint64_value); |
397 | 2 | return !error; |
398 | 15 | case simdjson::ondemand::number_type::floating_point_number: |
399 | 15 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { |
400 | 0 | return false; |
401 | 0 | } |
402 | 15 | out->type = Node::Type::DOUBLE; |
403 | 15 | error = value.get_double().get(out->double_value); |
404 | 15 | return !error; |
405 | 2 | case simdjson::ondemand::number_type::big_integer: { |
406 | 2 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { |
407 | 0 | return false; |
408 | 0 | } |
409 | 2 | out->type = Node::Type::INT128; |
410 | 2 | StringParser::ParseResult parse_result; |
411 | 2 | out->int128_value = StringParser::string_to_int<Int128>( |
412 | 2 | out->raw_number.data(), out->raw_number.size(), &parse_result); |
413 | 2 | return parse_result == StringParser::PARSE_SUCCESS; |
414 | 2 | } |
415 | 19 | } |
416 | 0 | return false; |
417 | 19 | } _ZN5doris14SimdJSONParser17build_number_nodeIN8simdjson8fallback8ondemand5valueEEEbRT_PNS0_4NodeE Line | Count | Source | 383 | 13 | bool build_number_node(Value& value, Node* out) { | 384 | 13 | simdjson::ondemand::number_type number_type; | 385 | 13 | auto error = value.get_number_type().get(number_type); | 386 | 13 | if (error) { | 387 | 0 | return false; | 388 | 0 | } | 389 | 13 | switch (number_type) { | 390 | 0 | case simdjson::ondemand::number_type::signed_integer: | 391 | 0 | out->type = Node::Type::INT64; | 392 | 0 | error = value.get_int64().get(out->int64_value); | 393 | 0 | return !error; | 394 | 1 | case simdjson::ondemand::number_type::unsigned_integer: | 395 | 1 | out->type = Node::Type::UINT64; | 396 | 1 | error = value.get_uint64().get(out->uint64_value); | 397 | 1 | return !error; | 398 | 11 | case simdjson::ondemand::number_type::floating_point_number: | 399 | 11 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { | 400 | 0 | return false; | 401 | 0 | } | 402 | 11 | out->type = Node::Type::DOUBLE; | 403 | 11 | error = value.get_double().get(out->double_value); | 404 | 11 | return !error; | 405 | 1 | case simdjson::ondemand::number_type::big_integer: { | 406 | 1 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { | 407 | 0 | return false; | 408 | 0 | } | 409 | 1 | out->type = Node::Type::INT128; | 410 | 1 | StringParser::ParseResult parse_result; | 411 | 1 | out->int128_value = StringParser::string_to_int<Int128>( | 412 | 1 | out->raw_number.data(), out->raw_number.size(), &parse_result); | 413 | 1 | return parse_result == StringParser::PARSE_SUCCESS; | 414 | 1 | } | 415 | 13 | } | 416 | 0 | return false; | 417 | 13 | } |
_ZN5doris14SimdJSONParser17build_number_nodeIN8simdjson8fallback8ondemand8documentEEEbRT_PNS0_4NodeE Line | Count | Source | 383 | 6 | bool build_number_node(Value& value, Node* out) { | 384 | 6 | simdjson::ondemand::number_type number_type; | 385 | 6 | auto error = value.get_number_type().get(number_type); | 386 | 6 | if (error) { | 387 | 0 | return false; | 388 | 0 | } | 389 | 6 | switch (number_type) { | 390 | 0 | case simdjson::ondemand::number_type::signed_integer: | 391 | 0 | out->type = Node::Type::INT64; | 392 | 0 | error = value.get_int64().get(out->int64_value); | 393 | 0 | return !error; | 394 | 1 | case simdjson::ondemand::number_type::unsigned_integer: | 395 | 1 | out->type = Node::Type::UINT64; | 396 | 1 | error = value.get_uint64().get(out->uint64_value); | 397 | 1 | return !error; | 398 | 4 | case simdjson::ondemand::number_type::floating_point_number: | 399 | 4 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { | 400 | 0 | return false; | 401 | 0 | } | 402 | 4 | out->type = Node::Type::DOUBLE; | 403 | 4 | error = value.get_double().get(out->double_value); | 404 | 4 | return !error; | 405 | 1 | case simdjson::ondemand::number_type::big_integer: { | 406 | 1 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { | 407 | 0 | return false; | 408 | 0 | } | 409 | 1 | out->type = Node::Type::INT128; | 410 | 1 | StringParser::ParseResult parse_result; | 411 | 1 | out->int128_value = StringParser::string_to_int<Int128>( | 412 | 1 | out->raw_number.data(), out->raw_number.size(), &parse_result); | 413 | 1 | return parse_result == StringParser::PARSE_SUCCESS; | 414 | 1 | } | 415 | 6 | } | 416 | 0 | return false; | 417 | 6 | } |
|
418 | | |
419 | | template <typename Value> |
420 | 2 | bool build_string_node(Value& value, Node* out) { |
421 | 2 | std::string_view str; |
422 | 2 | auto error = value.get_string().get(str); |
423 | 2 | if (error) { |
424 | 0 | return false; |
425 | 0 | } |
426 | 2 | out->type = Node::Type::STRING; |
427 | 2 | out->string_value = std::string(str); |
428 | 2 | return true; |
429 | 2 | } _ZN5doris14SimdJSONParser17build_string_nodeIN8simdjson8fallback8ondemand5valueEEEbRT_PNS0_4NodeE Line | Count | Source | 420 | 1 | bool build_string_node(Value& value, Node* out) { | 421 | 1 | std::string_view str; | 422 | 1 | auto error = value.get_string().get(str); | 423 | 1 | if (error) { | 424 | 0 | return false; | 425 | 0 | } | 426 | 1 | out->type = Node::Type::STRING; | 427 | 1 | out->string_value = std::string(str); | 428 | 1 | return true; | 429 | 1 | } |
_ZN5doris14SimdJSONParser17build_string_nodeIN8simdjson8fallback8ondemand8documentEEEbRT_PNS0_4NodeE Line | Count | Source | 420 | 1 | bool build_string_node(Value& value, Node* out) { | 421 | 1 | std::string_view str; | 422 | 1 | auto error = value.get_string().get(str); | 423 | 1 | if (error) { | 424 | 0 | return false; | 425 | 0 | } | 426 | 1 | out->type = Node::Type::STRING; | 427 | 1 | out->string_value = std::string(str); | 428 | 1 | return true; | 429 | 1 | } |
|
430 | | |
431 | | template <typename Value> |
432 | 31 | bool build_node(Value& value, Node* out) { |
433 | 31 | simdjson::ondemand::json_type type; |
434 | 31 | auto error = value.type().get(type); |
435 | 31 | if (error) { |
436 | 0 | return false; |
437 | 0 | } |
438 | 31 | switch (type) { |
439 | 2 | case simdjson::ondemand::json_type::array: |
440 | 2 | return build_array_node(value, out); |
441 | 6 | case simdjson::ondemand::json_type::object: |
442 | 6 | return build_object_node(value, out); |
443 | 19 | case simdjson::ondemand::json_type::number: |
444 | 19 | return build_number_node(value, out); |
445 | 2 | case simdjson::ondemand::json_type::string: { |
446 | 2 | return build_string_node(value, out); |
447 | 0 | } |
448 | 1 | case simdjson::ondemand::json_type::boolean: |
449 | 1 | out->type = Node::Type::BOOL; |
450 | 1 | error = value.get_bool().get(out->bool_value); |
451 | 1 | return !error; |
452 | 1 | case simdjson::ondemand::json_type::null: |
453 | 1 | out->type = Node::Type::NULL_VALUE; |
454 | 1 | return true; |
455 | 31 | } |
456 | 0 | return false; |
457 | 31 | } _ZN5doris14SimdJSONParser10build_nodeIN8simdjson8fallback8ondemand8documentEEEbRT_PNS0_4NodeE Line | Count | Source | 432 | 14 | bool build_node(Value& value, Node* out) { | 433 | 14 | simdjson::ondemand::json_type type; | 434 | 14 | auto error = value.type().get(type); | 435 | 14 | if (error) { | 436 | 0 | return false; | 437 | 0 | } | 438 | 14 | switch (type) { | 439 | 0 | case simdjson::ondemand::json_type::array: | 440 | 0 | return build_array_node(value, out); | 441 | 5 | case simdjson::ondemand::json_type::object: | 442 | 5 | return build_object_node(value, out); | 443 | 6 | case simdjson::ondemand::json_type::number: | 444 | 6 | return build_number_node(value, out); | 445 | 1 | case simdjson::ondemand::json_type::string: { | 446 | 1 | return build_string_node(value, out); | 447 | 0 | } | 448 | 1 | case simdjson::ondemand::json_type::boolean: | 449 | 1 | out->type = Node::Type::BOOL; | 450 | 1 | error = value.get_bool().get(out->bool_value); | 451 | 1 | return !error; | 452 | 1 | case simdjson::ondemand::json_type::null: | 453 | 1 | out->type = Node::Type::NULL_VALUE; | 454 | 1 | return true; | 455 | 14 | } | 456 | 0 | return false; | 457 | 14 | } |
_ZN5doris14SimdJSONParser10build_nodeIN8simdjson8fallback8ondemand5valueEEEbRT_PNS0_4NodeE Line | Count | Source | 432 | 17 | bool build_node(Value& value, Node* out) { | 433 | 17 | simdjson::ondemand::json_type type; | 434 | 17 | auto error = value.type().get(type); | 435 | 17 | if (error) { | 436 | 0 | return false; | 437 | 0 | } | 438 | 17 | switch (type) { | 439 | 2 | case simdjson::ondemand::json_type::array: | 440 | 2 | return build_array_node(value, out); | 441 | 1 | case simdjson::ondemand::json_type::object: | 442 | 1 | return build_object_node(value, out); | 443 | 13 | case simdjson::ondemand::json_type::number: | 444 | 13 | return build_number_node(value, out); | 445 | 1 | case simdjson::ondemand::json_type::string: { | 446 | 1 | return build_string_node(value, out); | 447 | 0 | } | 448 | 0 | case simdjson::ondemand::json_type::boolean: | 449 | 0 | out->type = Node::Type::BOOL; | 450 | 0 | error = value.get_bool().get(out->bool_value); | 451 | 0 | return !error; | 452 | 0 | case simdjson::ondemand::json_type::null: | 453 | 0 | out->type = Node::Type::NULL_VALUE; | 454 | 0 | return true; | 455 | 17 | } | 456 | 0 | return false; | 457 | 17 | } |
|
458 | | |
459 | | simdjson::dom::parser dom_parser; |
460 | | simdjson::ondemand::parser ondemand_parser; |
461 | | Node root; |
462 | | }; |
463 | 160k | inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const { |
464 | 160k | return node ? Array(&node->array_values) : Array(dom_element.get_array().value_unsafe()); |
465 | 160k | } |
466 | 146k | inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const { |
467 | 146k | return node ? Object(&node->object_keys, &node->object_values) |
468 | 146k | : Object(dom_element.get_object().value_unsafe()); |
469 | 146k | } |
470 | | |
471 | | } // namespace doris |