be/src/util/json/simd_json_parser.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/JSONParsers/SimdJSONParser.h |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <simdjson.h> |
24 | | |
25 | | #include <cassert> |
26 | | #include <string> |
27 | | #include <string_view> |
28 | | #include <type_traits> |
29 | | #include <utility> |
30 | | #include <variant> |
31 | | #include <vector> |
32 | | |
33 | | #include "core/types.h" |
34 | | #include "util/string_parser.hpp" |
35 | | |
36 | | namespace doris { |
37 | | |
38 | | /// This class can be used as an argument for the template class FunctionJSON. |
39 | | /// It provides ability to parse JSONs using simdjson library. |
40 | | class SimdJSONParser { |
41 | | struct Node { |
42 | | enum class Type { |
43 | | INT64, |
44 | | UINT64, |
45 | | INT128, |
46 | | BIG_INTEGER, |
47 | | DOUBLE, |
48 | | STRING, |
49 | | ARRAY, |
50 | | OBJECT, |
51 | | BOOL, |
52 | | NULL_VALUE, |
53 | | }; |
54 | | |
55 | | Type type = Type::NULL_VALUE; |
56 | | Int64 int64_value = 0; |
57 | | UInt64 uint64_value = 0; |
58 | | Int128 int128_value = 0; |
59 | | double double_value = 0; |
60 | | bool bool_value = false; |
61 | | std::string string_value; |
62 | | std::string raw_number; |
63 | | std::vector<Node> array_values; |
64 | | std::vector<std::string> object_keys; |
65 | | std::vector<Node> object_values; |
66 | | }; |
67 | | |
68 | | public: |
69 | | class Array; |
70 | | class Object; |
71 | | /// References an element in a JSON document, representing a JSON null, boolean, string, number, |
72 | | /// array or object. |
73 | | class Element { |
74 | | public: |
75 | 1.34M | ALWAYS_INLINE Element() {} /// NOLINT |
76 | | ALWAYS_INLINE explicit Element(const simdjson::dom::element& element_) |
77 | 22.1M | : dom_element(element_) {} /// NOLINT |
78 | 2.78M | ALWAYS_INLINE explicit Element(const Node* node_) : node(node_) {} /// NOLINT |
79 | 19.7M | ALWAYS_INLINE bool isInt64() const { |
80 | 19.7M | return node ? node->type == Node::Type::INT64 |
81 | 19.7M | : dom_element.type() == simdjson::dom::element_type::INT64; |
82 | 19.7M | } |
83 | 14.5M | ALWAYS_INLINE bool isUInt64() const { |
84 | 14.5M | return node ? node->type == Node::Type::UINT64 |
85 | 14.5M | : dom_element.type() == simdjson::dom::element_type::UINT64; |
86 | 14.5M | } |
87 | 14.5M | ALWAYS_INLINE bool isInt128() const { return node && node->type == Node::Type::INT128; } |
88 | 14.5M | ALWAYS_INLINE bool isBigInteger() const { |
89 | 14.5M | return node && node->type == Node::Type::BIG_INTEGER; |
90 | 14.5M | } |
91 | 14.5M | ALWAYS_INLINE bool isDouble() const { |
92 | 14.5M | return node ? node->type == Node::Type::DOUBLE |
93 | 14.5M | : dom_element.type() == simdjson::dom::element_type::DOUBLE; |
94 | 14.5M | } |
95 | 11.8M | ALWAYS_INLINE bool isString() const { |
96 | 11.8M | return node ? node->type == Node::Type::STRING |
97 | 11.8M | : dom_element.type() == simdjson::dom::element_type::STRING; |
98 | 11.8M | } |
99 | 24.7M | ALWAYS_INLINE bool isArray() const { |
100 | 24.7M | return node ? node->type == Node::Type::ARRAY |
101 | 24.7M | : dom_element.type() == simdjson::dom::element_type::ARRAY; |
102 | 24.7M | } |
103 | 25.5M | ALWAYS_INLINE bool isObject() const { |
104 | 25.5M | return node ? node->type == Node::Type::OBJECT |
105 | 25.5M | : dom_element.type() == simdjson::dom::element_type::OBJECT; |
106 | 25.5M | } |
107 | 20.0M | ALWAYS_INLINE bool isBool() const { |
108 | 20.0M | return node ? node->type == Node::Type::BOOL |
109 | 20.0M | : dom_element.type() == simdjson::dom::element_type::BOOLEAN; |
110 | 20.0M | } |
111 | 110k | ALWAYS_INLINE bool isNull() const { |
112 | 110k | return node ? node->type == Node::Type::NULL_VALUE |
113 | 110k | : dom_element.type() == simdjson::dom::element_type::NULL_VALUE; |
114 | 110k | } |
115 | 5.30M | ALWAYS_INLINE Int64 getInt64() const { |
116 | 5.30M | return node ? node->int64_value : dom_element.get_int64().value_unsafe(); |
117 | 5.30M | } |
118 | 2.89M | ALWAYS_INLINE double getDouble() const { |
119 | 2.89M | return node ? node->double_value : dom_element.get_double().value_unsafe(); |
120 | 2.89M | } |
121 | 358k | ALWAYS_INLINE bool getBool() const { |
122 | 358k | return node ? node->bool_value : dom_element.get_bool().value_unsafe(); |
123 | 358k | } |
124 | 11.6M | ALWAYS_INLINE std::string_view getString() const { |
125 | 11.6M | return node ? std::string_view(node->string_value) |
126 | 11.6M | : dom_element.get_string().value_unsafe(); |
127 | 11.6M | } |
128 | 30 | ALWAYS_INLINE UInt64 getUInt64() const { |
129 | 30 | return node ? node->uint64_value : dom_element.get_uint64().value_unsafe(); |
130 | 30 | } |
131 | 8 | ALWAYS_INLINE Int128 getInt128() const { |
132 | 8 | assert(node != nullptr); |
133 | 8 | return node->int128_value; |
134 | 8 | } |
135 | 2.49k | ALWAYS_INLINE std::string_view getRawNumber() const { |
136 | 2.49k | assert(node != nullptr); |
137 | 2.49k | return node->raw_number; |
138 | 2.49k | } |
139 | | ALWAYS_INLINE Array getArray() const; |
140 | | ALWAYS_INLINE Object getObject() const; |
141 | | |
142 | | private: |
143 | | simdjson::dom::element dom_element; |
144 | | const Node* node = nullptr; |
145 | | }; |
146 | | /// References an array in a JSON document. |
147 | | class Array { |
148 | | public: |
149 | | class Iterator { |
150 | | public: |
151 | | using DomIterator = simdjson::dom::array::iterator; |
152 | | using NodeIterator = std::vector<Node>::const_iterator; |
153 | 4.98M | ALWAYS_INLINE explicit Iterator(const DomIterator& it_) : it(it_) {} /// NOLINT |
154 | 1.44M | ALWAYS_INLINE explicit Iterator(NodeIterator it_) : it(it_) {} /// NOLINT |
155 | 3.51M | ALWAYS_INLINE Element operator*() const { |
156 | 3.51M | if (const auto* node_it = std::get_if<NodeIterator>(&it)) { |
157 | 722k | return Element(&**node_it); |
158 | 722k | } |
159 | 2.79M | return Element(*std::get<DomIterator>(it)); |
160 | 3.51M | } |
161 | 3.51M | ALWAYS_INLINE Iterator& operator++() { |
162 | 3.51M | if (auto* node_it = std::get_if<NodeIterator>(&it)) { |
163 | 722k | ++(*node_it); |
164 | 2.79M | } else { |
165 | 2.79M | ++std::get<DomIterator>(it); |
166 | 2.79M | } |
167 | 3.51M | return *this; |
168 | 3.51M | } |
169 | 4.97M | ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) { |
170 | 4.97M | if (const auto* left_node_it = std::get_if<NodeIterator>(&left.it)) { |
171 | 1.08M | return *left_node_it != std::get<NodeIterator>(right.it); |
172 | 1.08M | } |
173 | 3.88M | return std::get<DomIterator>(left.it) != std::get<DomIterator>(right.it); |
174 | 4.97M | } |
175 | | |
176 | | private: |
177 | | std::variant<DomIterator, NodeIterator> it; |
178 | | }; |
179 | | ALWAYS_INLINE explicit Array(const simdjson::dom::array& array_) |
180 | 1.10M | : dom_array(array_) {} /// NOLINT |
181 | 360k | ALWAYS_INLINE explicit Array(const std::vector<Node>* array_) : array(array_) {} /// NOLINT |
182 | 1.47M | ALWAYS_INLINE Iterator begin() const { |
183 | 1.47M | return array ? Iterator(array->begin()) : Iterator(dom_array.begin()); |
184 | 1.47M | } |
185 | 4.97M | ALWAYS_INLINE Iterator end() const { |
186 | 4.97M | return array ? Iterator(array->end()) : Iterator(dom_array.end()); |
187 | 4.97M | } |
188 | 613k | ALWAYS_INLINE size_t size() const { return array ? array->size() : dom_array.size(); } |
189 | 0 | ALWAYS_INLINE Element operator[](size_t index) const { |
190 | 0 | assert(index < size()); |
191 | 0 | return array ? Element(&(*array)[index]) : Element(dom_array.at(index).value_unsafe()); |
192 | 0 | } |
193 | | |
194 | | private: |
195 | | simdjson::dom::array dom_array; |
196 | | const std::vector<Node>* array = nullptr; |
197 | | }; |
198 | | using KeyValuePair = std::pair<std::string_view, Element>; |
199 | | /// References an object in a JSON document. |
200 | | class Object { |
201 | | public: |
202 | | class Iterator { |
203 | | public: |
204 | | using DomIterator = simdjson::dom::object::iterator; |
205 | | ALWAYS_INLINE explicit Iterator(const std::vector<std::string>* keys_, |
206 | | const std::vector<Node>* values_, size_t index_) |
207 | 2.60M | : it(index_), keys(keys_), values(values_) {} /// NOLINT |
208 | 21.8M | ALWAYS_INLINE explicit Iterator(const DomIterator& it_) : it(it_) {} /// NOLINT |
209 | 20.1M | ALWAYS_INLINE KeyValuePair operator*() const { |
210 | 20.1M | if (const auto* index = std::get_if<size_t>(&it)) { |
211 | 1.97M | return {(*keys)[*index], Element(&(*values)[*index])}; |
212 | 1.97M | } |
213 | 18.1M | const auto& res = *std::get<DomIterator>(it); |
214 | 18.1M | return {res.key, Element(res.value)}; |
215 | 20.1M | } |
216 | 20.1M | ALWAYS_INLINE Iterator& operator++() { |
217 | 20.1M | if (auto* index = std::get_if<size_t>(&it)) { |
218 | 1.97M | ++(*index); |
219 | 18.1M | } else { |
220 | 18.1M | ++std::get<DomIterator>(it); |
221 | 18.1M | } |
222 | 20.1M | return *this; |
223 | 20.1M | } |
224 | 0 | ALWAYS_INLINE Iterator operator++(int) { |
225 | 0 | auto res = *this; |
226 | 0 | ++(*this); |
227 | 0 | return res; |
228 | 0 | } /// NOLINT |
229 | 22.3M | ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) { |
230 | 22.3M | if (const auto* left_index = std::get_if<size_t>(&left.it)) { |
231 | 2.28M | return *left_index != std::get<size_t>(right.it); |
232 | 2.28M | } |
233 | 20.0M | return std::get<DomIterator>(left.it) != std::get<DomIterator>(right.it); |
234 | 22.3M | } |
235 | 0 | ALWAYS_INLINE friend bool operator==(const Iterator& left, const Iterator& right) { |
236 | 0 | return !(left != right); |
237 | 0 | } |
238 | | |
239 | | private: |
240 | | std::variant<DomIterator, size_t> it; |
241 | | const std::vector<std::string>* keys = nullptr; |
242 | | const std::vector<Node>* values = nullptr; |
243 | | }; |
244 | | ALWAYS_INLINE explicit Object(const simdjson::dom::object& object_) |
245 | 1.86M | : dom_object(object_) {} /// NOLINT |
246 | | ALWAYS_INLINE explicit Object(const std::vector<std::string>* keys_, |
247 | | const std::vector<Node>* values_) |
248 | 315k | : keys(keys_), values(values_) {} /// NOLINT |
249 | 2.18M | ALWAYS_INLINE Iterator begin() const { |
250 | 2.18M | return values ? Iterator(keys, values, 0) : Iterator(dom_object.begin()); |
251 | 2.18M | } |
252 | 22.3M | ALWAYS_INLINE Iterator end() const { |
253 | 22.3M | return values ? Iterator(keys, values, size()) : Iterator(dom_object.end()); |
254 | 22.3M | } |
255 | 6.32M | ALWAYS_INLINE size_t size() const { return values ? values->size() : dom_object.size(); } |
256 | | /// Optional: Provides access to an object's element by index. |
257 | 0 | KeyValuePair operator[](size_t index) const { |
258 | 0 | assert(index < size()); |
259 | 0 | if (values) { |
260 | 0 | return {(*keys)[index], Element(&(*values)[index])}; |
261 | 0 | } |
262 | 0 | auto it = dom_object.begin(); |
263 | 0 | while (index--) { |
264 | 0 | ++it; |
265 | 0 | } |
266 | 0 | const auto& res = *it; |
267 | 0 | return {res.key, Element(res.value)}; |
268 | 0 | } |
269 | | |
270 | | private: |
271 | | simdjson::dom::object dom_object; |
272 | | const std::vector<std::string>* keys = nullptr; |
273 | | const std::vector<Node>* values = nullptr; |
274 | | }; |
275 | | /// Parses a JSON document, returns the reference to its root element if succeeded. |
276 | 1.34M | bool parse(const char* data, size_t size, Element& result, bool preserve_raw_numbers = false) { |
277 | 1.34M | if (!preserve_raw_numbers) { |
278 | 1.25M | auto document = dom_parser.parse(data, size); |
279 | 1.25M | if (document.error()) { |
280 | 668 | return false; |
281 | 668 | } |
282 | 1.25M | result = Element(document.value_unsafe()); |
283 | 1.25M | return true; |
284 | 1.25M | } |
285 | | |
286 | 90.3k | return parse_ondemand(data, size, result); |
287 | 1.34M | } |
288 | | |
289 | | private: |
290 | 90.6k | bool parse_ondemand(const char* data, size_t size, Element& result) { |
291 | 90.6k | simdjson::padded_string padded_json(data, size); |
292 | 90.6k | simdjson::ondemand::document document; |
293 | 90.6k | auto error = ondemand_parser.iterate(padded_json).get(document); |
294 | 90.6k | if (error) { |
295 | 0 | return false; |
296 | 0 | } |
297 | 90.6k | root = Node(); |
298 | 90.6k | if (!build_node(document, &root)) { |
299 | 0 | return false; |
300 | 0 | } |
301 | 90.6k | result = Element(&root); |
302 | 90.6k | return true; |
303 | 90.6k | } |
304 | | |
305 | 4.10k | static std::string_view trim_raw_number(std::string_view raw_number) { |
306 | 8.26k | auto is_space = [](char ch) { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'; }; |
307 | 4.10k | while (!raw_number.empty() && is_space(raw_number.front())) { |
308 | 0 | raw_number.remove_prefix(1); |
309 | 0 | } |
310 | 4.15k | while (!raw_number.empty() && is_space(raw_number.back())) { |
311 | 50 | raw_number.remove_suffix(1); |
312 | 50 | } |
313 | 4.10k | return raw_number; |
314 | 4.10k | } |
315 | | |
316 | | template <typename RawNumber> |
317 | 4.10k | static bool assign_raw_number(RawNumber&& raw_number, std::string* out) { |
318 | 4.10k | if constexpr (std::is_same_v<std::decay_t<RawNumber>, std::string_view>) { |
319 | 4.10k | *out = std::string(trim_raw_number(raw_number)); |
320 | 4.10k | return true; |
321 | 4.10k | } else { |
322 | 6 | std::string_view raw_number_view; |
323 | 6 | auto error = std::move(raw_number).get(raw_number_view); |
324 | 6 | if (error) { |
325 | 0 | return false; |
326 | 0 | } |
327 | 6 | *out = std::string(trim_raw_number(raw_number_view)); |
328 | 6 | return true; |
329 | 6 | } |
330 | 4.10k | } _ZN5doris14SimdJSONParser17assign_raw_numberISt17basic_string_viewIcSt11char_traitsIcEEEEbOT_PNSt7__cxx1112basic_stringIcS4_SaIcEEE Line | Count | Source | 317 | 4.10k | static bool assign_raw_number(RawNumber&& raw_number, std::string* out) { | 318 | 4.10k | if constexpr (std::is_same_v<std::decay_t<RawNumber>, std::string_view>) { | 319 | 4.10k | *out = std::string(trim_raw_number(raw_number)); | 320 | 4.10k | return true; | 321 | | } else { | 322 | | std::string_view raw_number_view; | 323 | | auto error = std::move(raw_number).get(raw_number_view); | 324 | | if (error) { | 325 | | return false; | 326 | | } | 327 | | *out = std::string(trim_raw_number(raw_number_view)); | 328 | | return true; | 329 | | } | 330 | 4.10k | } |
_ZN5doris14SimdJSONParser17assign_raw_numberIN8simdjson15simdjson_resultISt17basic_string_viewIcSt11char_traitsIcEEEEEEbOT_PNSt7__cxx1112basic_stringIcS6_SaIcEEE Line | Count | Source | 317 | 6 | static bool assign_raw_number(RawNumber&& raw_number, std::string* out) { | 318 | | if constexpr (std::is_same_v<std::decay_t<RawNumber>, std::string_view>) { | 319 | | *out = std::string(trim_raw_number(raw_number)); | 320 | | return true; | 321 | 6 | } else { | 322 | 6 | std::string_view raw_number_view; | 323 | 6 | auto error = std::move(raw_number).get(raw_number_view); | 324 | 6 | if (error) { | 325 | 0 | return false; | 326 | 0 | } | 327 | 6 | *out = std::string(trim_raw_number(raw_number_view)); | 328 | 6 | return true; | 329 | 6 | } | 330 | 6 | } |
|
331 | | |
332 | | template <typename Value> |
333 | 180k | bool build_array_node(Value& value, Node* out) { |
334 | 180k | simdjson::ondemand::array array; |
335 | 180k | auto error = value.get_array().get(array); |
336 | 180k | if (error) { |
337 | 0 | return false; |
338 | 0 | } |
339 | 180k | out->type = Node::Type::ARRAY; |
340 | 361k | for (auto element_result : array) { |
341 | 361k | simdjson::ondemand::value element; |
342 | 361k | error = std::move(element_result).get(element); |
343 | 361k | if (error) { |
344 | 0 | return false; |
345 | 0 | } |
346 | 361k | Node element_node; |
347 | 361k | if (!build_node(element, &element_node)) { |
348 | 0 | return false; |
349 | 0 | } |
350 | 361k | out->array_values.push_back(std::move(element_node)); |
351 | 361k | } |
352 | 180k | return true; |
353 | 180k | } Unexecuted instantiation: _ZN5doris14SimdJSONParser16build_array_nodeIN8simdjson8fallback8ondemand8documentEEEbRT_PNS0_4NodeE _ZN5doris14SimdJSONParser16build_array_nodeIN8simdjson8fallback8ondemand5valueEEEbRT_PNS0_4NodeE Line | Count | Source | 333 | 180k | bool build_array_node(Value& value, Node* out) { | 334 | 180k | simdjson::ondemand::array array; | 335 | 180k | auto error = value.get_array().get(array); | 336 | 180k | if (error) { | 337 | 0 | return false; | 338 | 0 | } | 339 | 180k | out->type = Node::Type::ARRAY; | 340 | 361k | for (auto element_result : array) { | 341 | 361k | simdjson::ondemand::value element; | 342 | 361k | error = std::move(element_result).get(element); | 343 | 361k | if (error) { | 344 | 0 | return false; | 345 | 0 | } | 346 | 361k | Node element_node; | 347 | 361k | if (!build_node(element, &element_node)) { | 348 | 0 | return false; | 349 | 0 | } | 350 | 361k | out->array_values.push_back(std::move(element_node)); | 351 | 361k | } | 352 | 180k | return true; | 353 | 180k | } |
|
354 | | |
355 | | template <typename Value> |
356 | 315k | bool build_object_node(Value& value, Node* out) { |
357 | 315k | simdjson::ondemand::object object; |
358 | 315k | auto error = value.get_object().get(object); |
359 | 315k | if (error) { |
360 | 0 | return false; |
361 | 0 | } |
362 | 315k | out->type = Node::Type::OBJECT; |
363 | 1.97M | for (auto field_result : object) { |
364 | 1.97M | simdjson::ondemand::field field; |
365 | 1.97M | error = std::move(field_result).get(field); |
366 | 1.97M | if (error) { |
367 | 0 | return false; |
368 | 0 | } |
369 | 1.97M | std::string_view key; |
370 | 1.97M | error = field.unescaped_key().get(key); |
371 | 1.97M | if (error) { |
372 | 0 | return false; |
373 | 0 | } |
374 | 1.97M | std::string key_copy(key); |
375 | 1.97M | simdjson::ondemand::value field_value = field.value(); |
376 | 1.97M | Node field_node; |
377 | 1.97M | if (!build_node(field_value, &field_node)) { |
378 | 0 | return false; |
379 | 0 | } |
380 | 1.97M | out->object_keys.push_back(std::move(key_copy)); |
381 | 1.97M | out->object_values.push_back(std::move(field_node)); |
382 | 1.97M | } |
383 | 315k | return true; |
384 | 315k | } _ZN5doris14SimdJSONParser17build_object_nodeIN8simdjson8fallback8ondemand5valueEEEbRT_PNS0_4NodeE Line | Count | Source | 356 | 225k | bool build_object_node(Value& value, Node* out) { | 357 | 225k | simdjson::ondemand::object object; | 358 | 225k | auto error = value.get_object().get(object); | 359 | 225k | if (error) { | 360 | 0 | return false; | 361 | 0 | } | 362 | 225k | out->type = Node::Type::OBJECT; | 363 | 648k | for (auto field_result : object) { | 364 | 648k | simdjson::ondemand::field field; | 365 | 648k | error = std::move(field_result).get(field); | 366 | 648k | if (error) { | 367 | 0 | return false; | 368 | 0 | } | 369 | 648k | std::string_view key; | 370 | 648k | error = field.unescaped_key().get(key); | 371 | 648k | if (error) { | 372 | 0 | return false; | 373 | 0 | } | 374 | 648k | std::string key_copy(key); | 375 | 648k | simdjson::ondemand::value field_value = field.value(); | 376 | 648k | Node field_node; | 377 | 648k | if (!build_node(field_value, &field_node)) { | 378 | 0 | return false; | 379 | 0 | } | 380 | 648k | out->object_keys.push_back(std::move(key_copy)); | 381 | 648k | out->object_values.push_back(std::move(field_node)); | 382 | 648k | } | 383 | 225k | return true; | 384 | 225k | } |
_ZN5doris14SimdJSONParser17build_object_nodeIN8simdjson8fallback8ondemand8documentEEEbRT_PNS0_4NodeE Line | Count | Source | 356 | 90.5k | bool build_object_node(Value& value, Node* out) { | 357 | 90.5k | simdjson::ondemand::object object; | 358 | 90.5k | auto error = value.get_object().get(object); | 359 | 90.5k | if (error) { | 360 | 0 | return false; | 361 | 0 | } | 362 | 90.5k | out->type = Node::Type::OBJECT; | 363 | 1.32M | for (auto field_result : object) { | 364 | 1.32M | simdjson::ondemand::field field; | 365 | 1.32M | error = std::move(field_result).get(field); | 366 | 1.32M | if (error) { | 367 | 0 | return false; | 368 | 0 | } | 369 | 1.32M | std::string_view key; | 370 | 1.32M | error = field.unescaped_key().get(key); | 371 | 1.32M | if (error) { | 372 | 0 | return false; | 373 | 0 | } | 374 | 1.32M | std::string key_copy(key); | 375 | 1.32M | simdjson::ondemand::value field_value = field.value(); | 376 | 1.32M | Node field_node; | 377 | 1.32M | if (!build_node(field_value, &field_node)) { | 378 | 0 | return false; | 379 | 0 | } | 380 | 1.32M | out->object_keys.push_back(std::move(key_copy)); | 381 | 1.32M | out->object_values.push_back(std::move(field_node)); | 382 | 1.32M | } | 383 | 90.5k | return true; | 384 | 90.5k | } |
|
385 | | |
386 | | template <typename Value> |
387 | 261k | bool build_number_node(Value& value, Node* out) { |
388 | 261k | simdjson::ondemand::number_type number_type; |
389 | 261k | auto error = value.get_number_type().get(number_type); |
390 | 261k | if (error) { |
391 | 0 | return false; |
392 | 0 | } |
393 | 261k | switch (number_type) { |
394 | 257k | case simdjson::ondemand::number_type::signed_integer: |
395 | 257k | out->type = Node::Type::INT64; |
396 | 257k | error = value.get_int64().get(out->int64_value); |
397 | 257k | return !error; |
398 | 2 | case simdjson::ondemand::number_type::unsigned_integer: |
399 | 2 | out->type = Node::Type::UINT64; |
400 | 2 | error = value.get_uint64().get(out->uint64_value); |
401 | 2 | return !error; |
402 | 4.09k | case simdjson::ondemand::number_type::floating_point_number: |
403 | 4.09k | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { |
404 | 0 | return false; |
405 | 0 | } |
406 | 4.09k | out->type = Node::Type::DOUBLE; |
407 | 4.09k | error = value.get_double().get(out->double_value); |
408 | 4.09k | return !error; |
409 | 17 | case simdjson::ondemand::number_type::big_integer: { |
410 | 17 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { |
411 | 0 | return false; |
412 | 0 | } |
413 | 17 | StringParser::ParseResult parse_result; |
414 | 17 | out->int128_value = StringParser::string_to_int<Int128>( |
415 | 17 | out->raw_number.data(), out->raw_number.size(), &parse_result); |
416 | 17 | out->type = parse_result == StringParser::PARSE_SUCCESS ? Node::Type::INT128 |
417 | 17 | : Node::Type::BIG_INTEGER; |
418 | 17 | return true; |
419 | 17 | } |
420 | 261k | } |
421 | 0 | return false; |
422 | 261k | } _ZN5doris14SimdJSONParser17build_number_nodeIN8simdjson8fallback8ondemand5valueEEEbRT_PNS0_4NodeE Line | Count | Source | 387 | 261k | bool build_number_node(Value& value, Node* out) { | 388 | 261k | simdjson::ondemand::number_type number_type; | 389 | 261k | auto error = value.get_number_type().get(number_type); | 390 | 261k | if (error) { | 391 | 0 | return false; | 392 | 0 | } | 393 | 261k | switch (number_type) { | 394 | 257k | case simdjson::ondemand::number_type::signed_integer: | 395 | 257k | out->type = Node::Type::INT64; | 396 | 257k | error = value.get_int64().get(out->int64_value); | 397 | 257k | return !error; | 398 | 1 | case simdjson::ondemand::number_type::unsigned_integer: | 399 | 1 | out->type = Node::Type::UINT64; | 400 | 1 | error = value.get_uint64().get(out->uint64_value); | 401 | 1 | return !error; | 402 | 4.08k | case simdjson::ondemand::number_type::floating_point_number: | 403 | 4.08k | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { | 404 | 0 | return false; | 405 | 0 | } | 406 | 4.08k | out->type = Node::Type::DOUBLE; | 407 | 4.08k | error = value.get_double().get(out->double_value); | 408 | 4.08k | return !error; | 409 | 15 | case simdjson::ondemand::number_type::big_integer: { | 410 | 15 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { | 411 | 0 | return false; | 412 | 0 | } | 413 | 15 | StringParser::ParseResult parse_result; | 414 | 15 | out->int128_value = StringParser::string_to_int<Int128>( | 415 | 15 | out->raw_number.data(), out->raw_number.size(), &parse_result); | 416 | 15 | out->type = parse_result == StringParser::PARSE_SUCCESS ? Node::Type::INT128 | 417 | 15 | : Node::Type::BIG_INTEGER; | 418 | 15 | return true; | 419 | 15 | } | 420 | 261k | } | 421 | 0 | return false; | 422 | 261k | } |
_ZN5doris14SimdJSONParser17build_number_nodeIN8simdjson8fallback8ondemand8documentEEEbRT_PNS0_4NodeE Line | Count | Source | 387 | 7 | bool build_number_node(Value& value, Node* out) { | 388 | 7 | simdjson::ondemand::number_type number_type; | 389 | 7 | auto error = value.get_number_type().get(number_type); | 390 | 7 | if (error) { | 391 | 0 | return false; | 392 | 0 | } | 393 | 7 | switch (number_type) { | 394 | 0 | case simdjson::ondemand::number_type::signed_integer: | 395 | 0 | out->type = Node::Type::INT64; | 396 | 0 | error = value.get_int64().get(out->int64_value); | 397 | 0 | return !error; | 398 | 1 | case simdjson::ondemand::number_type::unsigned_integer: | 399 | 1 | out->type = Node::Type::UINT64; | 400 | 1 | error = value.get_uint64().get(out->uint64_value); | 401 | 1 | return !error; | 402 | 4 | case simdjson::ondemand::number_type::floating_point_number: | 403 | 4 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { | 404 | 0 | return false; | 405 | 0 | } | 406 | 4 | out->type = Node::Type::DOUBLE; | 407 | 4 | error = value.get_double().get(out->double_value); | 408 | 4 | return !error; | 409 | 2 | case simdjson::ondemand::number_type::big_integer: { | 410 | 2 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { | 411 | 0 | return false; | 412 | 0 | } | 413 | 2 | StringParser::ParseResult parse_result; | 414 | 2 | out->int128_value = StringParser::string_to_int<Int128>( | 415 | 2 | out->raw_number.data(), out->raw_number.size(), &parse_result); | 416 | 2 | out->type = parse_result == StringParser::PARSE_SUCCESS ? Node::Type::INT128 | 417 | 2 | : Node::Type::BIG_INTEGER; | 418 | 2 | return true; | 419 | 2 | } | 420 | 7 | } | 421 | 0 | return false; | 422 | 7 | } |
|
423 | | |
424 | | template <typename Value> |
425 | 1.64M | bool build_string_node(Value& value, Node* out) { |
426 | 1.64M | std::string_view str; |
427 | 1.64M | auto error = value.get_string().get(str); |
428 | 1.64M | if (error) { |
429 | 0 | return false; |
430 | 0 | } |
431 | 1.64M | out->type = Node::Type::STRING; |
432 | 1.64M | out->string_value = std::string(str); |
433 | 1.64M | return true; |
434 | 1.64M | } _ZN5doris14SimdJSONParser17build_string_nodeIN8simdjson8fallback8ondemand5valueEEEbRT_PNS0_4NodeE Line | Count | Source | 425 | 1.64M | bool build_string_node(Value& value, Node* out) { | 426 | 1.64M | std::string_view str; | 427 | 1.64M | auto error = value.get_string().get(str); | 428 | 1.64M | if (error) { | 429 | 0 | return false; | 430 | 0 | } | 431 | 1.64M | out->type = Node::Type::STRING; | 432 | 1.64M | out->string_value = std::string(str); | 433 | 1.64M | return true; | 434 | 1.64M | } |
_ZN5doris14SimdJSONParser17build_string_nodeIN8simdjson8fallback8ondemand8documentEEEbRT_PNS0_4NodeE Line | Count | Source | 425 | 1 | bool build_string_node(Value& value, Node* out) { | 426 | 1 | std::string_view str; | 427 | 1 | auto error = value.get_string().get(str); | 428 | 1 | if (error) { | 429 | 0 | return false; | 430 | 0 | } | 431 | 1 | out->type = Node::Type::STRING; | 432 | 1 | out->string_value = std::string(str); | 433 | 1 | return true; | 434 | 1 | } |
|
435 | | |
436 | | template <typename Value> |
437 | 2.42M | bool build_node(Value& value, Node* out) { |
438 | 2.42M | simdjson::ondemand::json_type type; |
439 | 2.42M | auto error = value.type().get(type); |
440 | 2.42M | if (error) { |
441 | 0 | return false; |
442 | 0 | } |
443 | 2.42M | switch (type) { |
444 | 180k | case simdjson::ondemand::json_type::array: |
445 | 180k | return build_array_node(value, out); |
446 | 315k | case simdjson::ondemand::json_type::object: |
447 | 315k | return build_object_node(value, out); |
448 | 261k | case simdjson::ondemand::json_type::number: |
449 | 261k | return build_number_node(value, out); |
450 | 1.64M | case simdjson::ondemand::json_type::string: { |
451 | 1.64M | return build_string_node(value, out); |
452 | 0 | } |
453 | 17.9k | case simdjson::ondemand::json_type::boolean: |
454 | 17.9k | out->type = Node::Type::BOOL; |
455 | 17.9k | error = value.get_bool().get(out->bool_value); |
456 | 17.9k | return !error; |
457 | 12 | case simdjson::ondemand::json_type::null: |
458 | 12 | out->type = Node::Type::NULL_VALUE; |
459 | 12 | return true; |
460 | 2.42M | } |
461 | 0 | return false; |
462 | 2.42M | } _ZN5doris14SimdJSONParser10build_nodeIN8simdjson8fallback8ondemand8documentEEEbRT_PNS0_4NodeE Line | Count | Source | 437 | 90.6k | bool build_node(Value& value, Node* out) { | 438 | 90.6k | simdjson::ondemand::json_type type; | 439 | 90.6k | auto error = value.type().get(type); | 440 | 90.6k | if (error) { | 441 | 0 | return false; | 442 | 0 | } | 443 | 90.6k | switch (type) { | 444 | 0 | case simdjson::ondemand::json_type::array: | 445 | 0 | return build_array_node(value, out); | 446 | 90.5k | case simdjson::ondemand::json_type::object: | 447 | 90.5k | return build_object_node(value, out); | 448 | 7 | case simdjson::ondemand::json_type::number: | 449 | 7 | return build_number_node(value, out); | 450 | 1 | case simdjson::ondemand::json_type::string: { | 451 | 1 | return build_string_node(value, out); | 452 | 0 | } | 453 | 1 | case simdjson::ondemand::json_type::boolean: | 454 | 1 | out->type = Node::Type::BOOL; | 455 | 1 | error = value.get_bool().get(out->bool_value); | 456 | 1 | return !error; | 457 | 1 | case simdjson::ondemand::json_type::null: | 458 | 1 | out->type = Node::Type::NULL_VALUE; | 459 | 1 | return true; | 460 | 90.6k | } | 461 | 0 | return false; | 462 | 90.6k | } |
_ZN5doris14SimdJSONParser10build_nodeIN8simdjson8fallback8ondemand5valueEEEbRT_PNS0_4NodeE Line | Count | Source | 437 | 2.33M | bool build_node(Value& value, Node* out) { | 438 | 2.33M | simdjson::ondemand::json_type type; | 439 | 2.33M | auto error = value.type().get(type); | 440 | 2.33M | if (error) { | 441 | 0 | return false; | 442 | 0 | } | 443 | 2.33M | switch (type) { | 444 | 180k | case simdjson::ondemand::json_type::array: | 445 | 180k | return build_array_node(value, out); | 446 | 225k | case simdjson::ondemand::json_type::object: | 447 | 225k | return build_object_node(value, out); | 448 | 261k | case simdjson::ondemand::json_type::number: | 449 | 261k | return build_number_node(value, out); | 450 | 1.64M | case simdjson::ondemand::json_type::string: { | 451 | 1.64M | return build_string_node(value, out); | 452 | 0 | } | 453 | 17.9k | case simdjson::ondemand::json_type::boolean: | 454 | 17.9k | out->type = Node::Type::BOOL; | 455 | 17.9k | error = value.get_bool().get(out->bool_value); | 456 | 17.9k | return !error; | 457 | 11 | case simdjson::ondemand::json_type::null: | 458 | 11 | out->type = Node::Type::NULL_VALUE; | 459 | 11 | return true; | 460 | 2.33M | } | 461 | 0 | return false; | 462 | 2.33M | } |
|
463 | | |
464 | | simdjson::dom::parser dom_parser; |
465 | | simdjson::ondemand::parser ondemand_parser; |
466 | | Node root; |
467 | | }; |
468 | 1.46M | inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const { |
469 | 1.46M | return node ? Array(&node->array_values) : Array(dom_element.get_array().value_unsafe()); |
470 | 1.46M | } |
471 | 2.17M | inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const { |
472 | 2.17M | return node ? Object(&node->object_keys, &node->object_values) |
473 | 2.17M | : Object(dom_element.get_object().value_unsafe()); |
474 | 2.17M | } |
475 | | |
476 | | } // namespace doris |