Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/JSONParsers/SimdJSONParser.h |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <simdjson.h> |
24 | | |
25 | | #include <cassert> |
26 | | #include <string> |
27 | | #include <string_view> |
28 | | #include <type_traits> |
29 | | #include <utility> |
30 | | #include <vector> |
31 | | |
32 | | #include "core/types.h" |
33 | | |
34 | | namespace doris { |
35 | | |
36 | | /// This class can be used as an argument for the template class FunctionJSON. |
37 | | /// It provides ability to parse JSONs using simdjson library. |
38 | | class SimdJSONParser { |
39 | | struct Node { |
40 | | enum class Type { |
41 | | INT64, |
42 | | UINT64, |
43 | | BIG_INTEGER, |
44 | | DOUBLE, |
45 | | STRING, |
46 | | ARRAY, |
47 | | OBJECT, |
48 | | BOOL, |
49 | | NULL_VALUE, |
50 | | }; |
51 | | |
52 | | Type type = Type::NULL_VALUE; |
53 | | Int64 int64_value = 0; |
54 | | UInt64 uint64_value = 0; |
55 | | double double_value = 0; |
56 | | bool bool_value = false; |
57 | | std::string string_value; |
58 | | std::string raw_number; |
59 | | std::vector<Node> array_values; |
60 | | std::vector<std::string> object_keys; |
61 | | std::vector<Node> object_values; |
62 | | }; |
63 | | |
64 | | public: |
65 | | class Array; |
66 | | class Object; |
67 | | /// References an element in a JSON document, representing a JSON null, boolean, string, number, |
68 | | /// array or object. |
69 | | class Element { |
70 | | public: |
71 | 80.6k | ALWAYS_INLINE Element() {} /// NOLINT |
72 | 3.29M | ALWAYS_INLINE explicit Element(const Node* node_) : node(node_) {} /// NOLINT |
73 | 1.86M | ALWAYS_INLINE bool isInt64() const { |
74 | 1.86M | assert(node != nullptr); |
75 | 1.86M | return node->type == Node::Type::INT64; |
76 | 1.86M | } |
77 | 1.06M | ALWAYS_INLINE bool isUInt64() const { |
78 | 1.06M | assert(node != nullptr); |
79 | 1.06M | return node->type == Node::Type::UINT64; |
80 | 1.06M | } |
81 | 3.08M | ALWAYS_INLINE bool isBigInteger() const { |
82 | 3.08M | assert(node != nullptr); |
83 | 3.08M | return node->type == Node::Type::BIG_INTEGER; |
84 | 3.08M | } |
85 | 14 | ALWAYS_INLINE bool isNumber() const { |
86 | 14 | assert(node != nullptr); |
87 | 14 | return node->type == Node::Type::INT64 || node->type == Node::Type::UINT64 || |
88 | 14 | node->type == Node::Type::BIG_INTEGER || node->type == Node::Type::DOUBLE; |
89 | 14 | } |
90 | 1.06M | ALWAYS_INLINE bool isDouble() const { |
91 | 1.06M | assert(node != nullptr); |
92 | 1.06M | return node->type == Node::Type::DOUBLE; |
93 | 1.06M | } |
94 | 944k | ALWAYS_INLINE bool isString() const { |
95 | 944k | assert(node != nullptr); |
96 | 944k | return node->type == Node::Type::STRING; |
97 | 944k | } |
98 | 3.28M | ALWAYS_INLINE bool isArray() const { |
99 | 3.28M | assert(node != nullptr); |
100 | 3.28M | return node->type == Node::Type::ARRAY; |
101 | 3.28M | } |
102 | 3.35M | ALWAYS_INLINE bool isObject() const { |
103 | 3.35M | assert(node != nullptr); |
104 | 3.35M | return node->type == Node::Type::OBJECT; |
105 | 3.35M | } |
106 | 1.88M | ALWAYS_INLINE bool isBool() const { |
107 | 1.88M | assert(node != nullptr); |
108 | 1.88M | return node->type == Node::Type::BOOL; |
109 | 1.88M | } |
110 | 12.0k | ALWAYS_INLINE bool isNull() const { |
111 | 12.0k | assert(node != nullptr); |
112 | 12.0k | return node->type == Node::Type::NULL_VALUE; |
113 | 12.0k | } |
114 | 803k | ALWAYS_INLINE Int64 getInt64() const { |
115 | 803k | assert(node != nullptr); |
116 | 803k | return node->int64_value; |
117 | 803k | } |
118 | 116k | ALWAYS_INLINE double getDouble() const { |
119 | 116k | assert(node != nullptr); |
120 | 116k | return node->double_value; |
121 | 116k | } |
122 | 20.7k | ALWAYS_INLINE bool getBool() const { |
123 | 20.7k | assert(node != nullptr); |
124 | 20.7k | return node->bool_value; |
125 | 20.7k | } |
126 | 932k | ALWAYS_INLINE std::string_view getString() const { |
127 | 932k | assert(node != nullptr); |
128 | 932k | return node->string_value; |
129 | 932k | } |
130 | 20 | ALWAYS_INLINE UInt64 getUInt64() const { |
131 | 20 | assert(node != nullptr); |
132 | 20 | return node->uint64_value; |
133 | 20 | } |
134 | 11 | ALWAYS_INLINE std::string_view getRawNumber() const { |
135 | 11 | assert(node != nullptr); |
136 | 11 | return node->raw_number; |
137 | 11 | } |
138 | | ALWAYS_INLINE Array getArray() const; |
139 | | ALWAYS_INLINE Object getObject() const; |
140 | | |
141 | | private: |
142 | | const Node* node = nullptr; |
143 | | }; |
144 | | /// References an array in a JSON document. |
145 | | class Array { |
146 | | public: |
147 | | class Iterator { |
148 | | public: |
149 | | using NodeIterator = std::vector<Node>::const_iterator; |
150 | 1.98M | ALWAYS_INLINE explicit Iterator(NodeIterator it_) : it(it_) {} /// NOLINT |
151 | 1.66M | ALWAYS_INLINE Element operator*() const { return Element(&*it); } |
152 | 1.66M | ALWAYS_INLINE Iterator& operator++() { |
153 | 1.66M | ++it; |
154 | 1.66M | return *this; |
155 | 1.66M | } |
156 | 1.82M | ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) { |
157 | 1.82M | return left.it != right.it; |
158 | 1.82M | } |
159 | | |
160 | | private: |
161 | | NodeIterator it; |
162 | | }; |
163 | 161k | ALWAYS_INLINE explicit Array(const std::vector<Node>* array_) : array(array_) {} /// NOLINT |
164 | 161k | ALWAYS_INLINE Iterator begin() const { return Iterator(array->begin()); } |
165 | 1.82M | ALWAYS_INLINE Iterator end() const { return Iterator(array->end()); } |
166 | 55.1k | ALWAYS_INLINE size_t size() const { return array->size(); } |
167 | 0 | ALWAYS_INLINE Element operator[](size_t index) const { |
168 | 0 | assert(index < size()); |
169 | 0 | return Element(&(*array)[index]); |
170 | 0 | } |
171 | | |
172 | | private: |
173 | | const std::vector<Node>* array; |
174 | | }; |
175 | | using KeyValuePair = std::pair<std::string_view, Element>; |
176 | | /// References an object in a JSON document. |
177 | | class Object { |
178 | | public: |
179 | | class Iterator { |
180 | | public: |
181 | | ALWAYS_INLINE explicit Iterator(const std::vector<std::string>* keys_, |
182 | | const std::vector<Node>* values_, size_t index_) |
183 | 1.92M | : index(index_), keys(keys_), values(values_) {} /// NOLINT |
184 | 1.55M | ALWAYS_INLINE KeyValuePair operator*() const { |
185 | 1.55M | return {(*keys)[index], Element(&(*values)[index])}; |
186 | 1.55M | } |
187 | 1.54M | ALWAYS_INLINE Iterator& operator++() { |
188 | 1.54M | ++index; |
189 | 1.54M | return *this; |
190 | 1.54M | } |
191 | 0 | ALWAYS_INLINE Iterator operator++(int) { |
192 | 0 | auto res = *this; |
193 | 0 | ++(*this); |
194 | 0 | return res; |
195 | 0 | } /// NOLINT |
196 | 1.73M | ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) { |
197 | 1.73M | return left.index != right.index; |
198 | 1.73M | } |
199 | 0 | ALWAYS_INLINE friend bool operator==(const Iterator& left, const Iterator& right) { |
200 | 0 | return !(left != right); |
201 | 0 | } |
202 | | |
203 | | private: |
204 | | size_t index; |
205 | | const std::vector<std::string>* keys; |
206 | | const std::vector<Node>* values; |
207 | | }; |
208 | | ALWAYS_INLINE explicit Object(const std::vector<std::string>* keys_, |
209 | | const std::vector<Node>* values_) |
210 | 190k | : keys(keys_), values(values_) {} /// NOLINT |
211 | 190k | ALWAYS_INLINE Iterator begin() const { return Iterator(keys, values, 0); } |
212 | 1.73M | ALWAYS_INLINE Iterator end() const { return Iterator(keys, values, size()); } |
213 | 2.03M | ALWAYS_INLINE size_t size() const { return values->size(); } |
214 | | /// Optional: Provides access to an object's element by index. |
215 | 0 | KeyValuePair operator[](size_t index) const { |
216 | 0 | assert(index < size()); |
217 | 0 | return {(*keys)[index], Element(&(*values)[index])}; |
218 | 0 | } |
219 | | |
220 | | private: |
221 | | const std::vector<std::string>* keys; |
222 | | const std::vector<Node>* values; |
223 | | }; |
224 | | /// Parses a JSON document, returns the reference to its root element if succeeded. |
225 | 80.6k | bool parse(const char* data, size_t size, Element& result) { |
226 | 80.6k | return parse_ondemand(data, size, result); |
227 | 80.6k | } |
228 | | |
229 | | private: |
230 | 80.6k | bool parse_ondemand(const char* data, size_t size, Element& result) { |
231 | 80.6k | simdjson::padded_string padded_json(data, size); |
232 | 80.6k | simdjson::ondemand::document document; |
233 | 80.6k | auto error = ondemand_parser.iterate(padded_json).get(document); |
234 | 80.6k | if (error) { |
235 | 1 | return false; |
236 | 1 | } |
237 | 80.6k | root = Node(); |
238 | 80.6k | if (!build_node(document, &root)) { |
239 | 6 | return false; |
240 | 6 | } |
241 | 80.6k | result = Element(&root); |
242 | 80.6k | return true; |
243 | 80.6k | } |
244 | | |
245 | 945k | static std::string_view trim_raw_number(std::string_view raw_number) { |
246 | 1.89M | auto is_space = [](char ch) { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'; }; |
247 | 945k | while (!raw_number.empty() && is_space(raw_number.front())) { |
248 | 0 | raw_number.remove_prefix(1); |
249 | 0 | } |
250 | 945k | while (!raw_number.empty() && is_space(raw_number.back())) { |
251 | 6 | raw_number.remove_suffix(1); |
252 | 6 | } |
253 | 945k | return raw_number; |
254 | 945k | } |
255 | | |
256 | | template <typename RawNumber> |
257 | 945k | static bool assign_raw_number(RawNumber&& raw_number, std::string* out) { |
258 | 945k | if constexpr (std::is_same_v<std::decay_t<RawNumber>, std::string_view>) { |
259 | 945k | *out = std::string(trim_raw_number(raw_number)); |
260 | 945k | return true; |
261 | 945k | } else { |
262 | 125 | std::string_view raw_number_view; |
263 | 125 | auto error = std::move(raw_number).get(raw_number_view); |
264 | 125 | if (error) { |
265 | 0 | return false; |
266 | 0 | } |
267 | 125 | *out = std::string(trim_raw_number(raw_number_view)); |
268 | 125 | return true; |
269 | 125 | } |
270 | 945k | } _ZN5doris14SimdJSONParser17assign_raw_numberISt17basic_string_viewIcSt11char_traitsIcEEEEbOT_PNSt7__cxx1112basic_stringIcS4_SaIcEEE Line | Count | Source | 257 | 945k | static bool assign_raw_number(RawNumber&& raw_number, std::string* out) { | 258 | 945k | if constexpr (std::is_same_v<std::decay_t<RawNumber>, std::string_view>) { | 259 | 945k | *out = std::string(trim_raw_number(raw_number)); | 260 | 945k | return true; | 261 | | } else { | 262 | | std::string_view raw_number_view; | 263 | | auto error = std::move(raw_number).get(raw_number_view); | 264 | | if (error) { | 265 | | return false; | 266 | | } | 267 | | *out = std::string(trim_raw_number(raw_number_view)); | 268 | | return true; | 269 | | } | 270 | 945k | } |
_ZN5doris14SimdJSONParser17assign_raw_numberIN8simdjson15simdjson_resultISt17basic_string_viewIcSt11char_traitsIcEEEEEEbOT_PNSt7__cxx1112basic_stringIcS6_SaIcEEE Line | Count | Source | 257 | 125 | static bool assign_raw_number(RawNumber&& raw_number, std::string* out) { | 258 | | if constexpr (std::is_same_v<std::decay_t<RawNumber>, std::string_view>) { | 259 | | *out = std::string(trim_raw_number(raw_number)); | 260 | | return true; | 261 | 125 | } else { | 262 | 125 | std::string_view raw_number_view; | 263 | 125 | auto error = std::move(raw_number).get(raw_number_view); | 264 | 125 | if (error) { | 265 | 0 | return false; | 266 | 0 | } | 267 | 125 | *out = std::string(trim_raw_number(raw_number_view)); | 268 | 125 | return true; | 269 | 125 | } | 270 | 125 | } |
|
271 | | |
272 | | template <typename Value> |
273 | 55.9k | bool build_array_node(Value& value, Node* out) { |
274 | 55.9k | simdjson::ondemand::array array; |
275 | 55.9k | auto error = value.get_array().get(array); |
276 | 55.9k | if (error) { |
277 | 0 | return false; |
278 | 0 | } |
279 | 55.9k | out->type = Node::Type::ARRAY; |
280 | 618k | for (auto element_result : array) { |
281 | 618k | simdjson::ondemand::value element; |
282 | 618k | error = std::move(element_result).get(element); |
283 | 618k | if (error) { |
284 | 0 | return false; |
285 | 0 | } |
286 | 618k | Node element_node; |
287 | 618k | if (!build_node(element, &element_node)) { |
288 | 0 | return false; |
289 | 0 | } |
290 | 618k | out->array_values.push_back(std::move(element_node)); |
291 | 618k | } |
292 | 55.9k | return true; |
293 | 55.9k | } _ZN5doris14SimdJSONParser16build_array_nodeIN8simdjson8fallback8ondemand8documentEEEbRT_PNS0_4NodeE Line | Count | Source | 273 | 1.41k | bool build_array_node(Value& value, Node* out) { | 274 | 1.41k | simdjson::ondemand::array array; | 275 | 1.41k | auto error = value.get_array().get(array); | 276 | 1.41k | if (error) { | 277 | 0 | return false; | 278 | 0 | } | 279 | 1.41k | out->type = Node::Type::ARRAY; | 280 | 96.3k | for (auto element_result : array) { | 281 | 96.3k | simdjson::ondemand::value element; | 282 | 96.3k | error = std::move(element_result).get(element); | 283 | 96.3k | if (error) { | 284 | 0 | return false; | 285 | 0 | } | 286 | 96.3k | Node element_node; | 287 | 96.3k | if (!build_node(element, &element_node)) { | 288 | 0 | return false; | 289 | 0 | } | 290 | 96.3k | out->array_values.push_back(std::move(element_node)); | 291 | 96.3k | } | 292 | 1.41k | return true; | 293 | 1.41k | } |
_ZN5doris14SimdJSONParser16build_array_nodeIN8simdjson8fallback8ondemand5valueEEEbRT_PNS0_4NodeE Line | Count | Source | 273 | 54.5k | bool build_array_node(Value& value, Node* out) { | 274 | 54.5k | simdjson::ondemand::array array; | 275 | 54.5k | auto error = value.get_array().get(array); | 276 | 54.5k | if (error) { | 277 | 0 | return false; | 278 | 0 | } | 279 | 54.5k | out->type = Node::Type::ARRAY; | 280 | 522k | for (auto element_result : array) { | 281 | 522k | simdjson::ondemand::value element; | 282 | 522k | error = std::move(element_result).get(element); | 283 | 522k | if (error) { | 284 | 0 | return false; | 285 | 0 | } | 286 | 522k | Node element_node; | 287 | 522k | if (!build_node(element, &element_node)) { | 288 | 0 | return false; | 289 | 0 | } | 290 | 522k | out->array_values.push_back(std::move(element_node)); | 291 | 522k | } | 292 | 54.5k | return true; | 293 | 54.5k | } |
|
294 | | |
295 | | template <typename Value> |
296 | 189k | bool build_object_node(Value& value, Node* out) { |
297 | 189k | simdjson::ondemand::object object; |
298 | 189k | auto error = value.get_object().get(object); |
299 | 189k | if (error) { |
300 | 0 | return false; |
301 | 0 | } |
302 | 189k | out->type = Node::Type::OBJECT; |
303 | 1.55M | for (auto field_result : object) { |
304 | 1.55M | simdjson::ondemand::field field; |
305 | 1.55M | error = std::move(field_result).get(field); |
306 | 1.55M | if (error) { |
307 | 6 | return false; |
308 | 6 | } |
309 | 1.55M | std::string_view key; |
310 | 1.55M | error = field.unescaped_key().get(key); |
311 | 1.55M | if (error) { |
312 | 0 | return false; |
313 | 0 | } |
314 | 1.55M | std::string key_copy(key); |
315 | 1.55M | simdjson::ondemand::value field_value = field.value(); |
316 | 1.55M | Node field_node; |
317 | 1.55M | if (!build_node(field_value, &field_node)) { |
318 | 0 | return false; |
319 | 0 | } |
320 | 1.55M | out->object_keys.push_back(std::move(key_copy)); |
321 | 1.55M | out->object_values.push_back(std::move(field_node)); |
322 | 1.55M | } |
323 | 189k | return true; |
324 | 189k | } _ZN5doris14SimdJSONParser17build_object_nodeIN8simdjson8fallback8ondemand5valueEEEbRT_PNS0_4NodeE Line | Count | Source | 296 | 111k | bool build_object_node(Value& value, Node* out) { | 297 | 111k | simdjson::ondemand::object object; | 298 | 111k | auto error = value.get_object().get(object); | 299 | 111k | if (error) { | 300 | 0 | return false; | 301 | 0 | } | 302 | 111k | out->type = Node::Type::OBJECT; | 303 | 205k | for (auto field_result : object) { | 304 | 205k | simdjson::ondemand::field field; | 305 | 205k | error = std::move(field_result).get(field); | 306 | 205k | if (error) { | 307 | 0 | return false; | 308 | 0 | } | 309 | 205k | std::string_view key; | 310 | 205k | error = field.unescaped_key().get(key); | 311 | 205k | if (error) { | 312 | 0 | return false; | 313 | 0 | } | 314 | 205k | std::string key_copy(key); | 315 | 205k | simdjson::ondemand::value field_value = field.value(); | 316 | 205k | Node field_node; | 317 | 205k | if (!build_node(field_value, &field_node)) { | 318 | 0 | return false; | 319 | 0 | } | 320 | 205k | out->object_keys.push_back(std::move(key_copy)); | 321 | 205k | out->object_values.push_back(std::move(field_node)); | 322 | 205k | } | 323 | 111k | return true; | 324 | 111k | } |
_ZN5doris14SimdJSONParser17build_object_nodeIN8simdjson8fallback8ondemand8documentEEEbRT_PNS0_4NodeE Line | Count | Source | 296 | 78.8k | bool build_object_node(Value& value, Node* out) { | 297 | 78.8k | simdjson::ondemand::object object; | 298 | 78.8k | auto error = value.get_object().get(object); | 299 | 78.8k | if (error) { | 300 | 0 | return false; | 301 | 0 | } | 302 | 78.8k | out->type = Node::Type::OBJECT; | 303 | 1.34M | for (auto field_result : object) { | 304 | 1.34M | simdjson::ondemand::field field; | 305 | 1.34M | error = std::move(field_result).get(field); | 306 | 1.34M | if (error) { | 307 | 6 | return false; | 308 | 6 | } | 309 | 1.34M | std::string_view key; | 310 | 1.34M | error = field.unescaped_key().get(key); | 311 | 1.34M | if (error) { | 312 | 0 | return false; | 313 | 0 | } | 314 | 1.34M | std::string key_copy(key); | 315 | 1.34M | simdjson::ondemand::value field_value = field.value(); | 316 | 1.34M | Node field_node; | 317 | 1.34M | if (!build_node(field_value, &field_node)) { | 318 | 0 | return false; | 319 | 0 | } | 320 | 1.34M | out->object_keys.push_back(std::move(key_copy)); | 321 | 1.34M | out->object_values.push_back(std::move(field_node)); | 322 | 1.34M | } | 323 | 78.8k | return true; | 324 | 78.8k | } |
|
325 | | |
326 | | template <typename Value> |
327 | 945k | bool build_number_node(Value& value, Node* out) { |
328 | 945k | simdjson::ondemand::number_type number_type; |
329 | 945k | auto error = value.get_number_type().get(number_type); |
330 | 945k | if (error) { |
331 | 0 | return false; |
332 | 0 | } |
333 | 945k | switch (number_type) { |
334 | 816k | case simdjson::ondemand::number_type::signed_integer: |
335 | 816k | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { |
336 | 0 | return false; |
337 | 0 | } |
338 | 816k | out->type = Node::Type::INT64; |
339 | 816k | error = value.get_int64().get(out->int64_value); |
340 | 816k | return !error; |
341 | 21 | case simdjson::ondemand::number_type::unsigned_integer: |
342 | 21 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { |
343 | 0 | return false; |
344 | 0 | } |
345 | 21 | out->type = Node::Type::UINT64; |
346 | 21 | error = value.get_uint64().get(out->uint64_value); |
347 | 21 | return !error; |
348 | 129k | case simdjson::ondemand::number_type::floating_point_number: |
349 | 129k | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { |
350 | 0 | return false; |
351 | 0 | } |
352 | 129k | out->type = Node::Type::DOUBLE; |
353 | 129k | error = value.get_double().get(out->double_value); |
354 | 129k | return !error; |
355 | 11 | case simdjson::ondemand::number_type::big_integer: { |
356 | 11 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { |
357 | 0 | return false; |
358 | 0 | } |
359 | 11 | out->type = Node::Type::BIG_INTEGER; |
360 | 11 | return true; |
361 | 11 | } |
362 | 945k | } |
363 | 0 | return false; |
364 | 945k | } _ZN5doris14SimdJSONParser17build_number_nodeIN8simdjson8fallback8ondemand5valueEEEbRT_PNS0_4NodeE Line | Count | Source | 327 | 945k | bool build_number_node(Value& value, Node* out) { | 328 | 945k | simdjson::ondemand::number_type number_type; | 329 | 945k | auto error = value.get_number_type().get(number_type); | 330 | 945k | if (error) { | 331 | 0 | return false; | 332 | 0 | } | 333 | 945k | switch (number_type) { | 334 | 816k | case simdjson::ondemand::number_type::signed_integer: | 335 | 816k | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { | 336 | 0 | return false; | 337 | 0 | } | 338 | 816k | out->type = Node::Type::INT64; | 339 | 816k | error = value.get_int64().get(out->int64_value); | 340 | 816k | return !error; | 341 | 8 | case simdjson::ondemand::number_type::unsigned_integer: | 342 | 8 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { | 343 | 0 | return false; | 344 | 0 | } | 345 | 8 | out->type = Node::Type::UINT64; | 346 | 8 | error = value.get_uint64().get(out->uint64_value); | 347 | 8 | return !error; | 348 | 129k | case simdjson::ondemand::number_type::floating_point_number: | 349 | 129k | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { | 350 | 0 | return false; | 351 | 0 | } | 352 | 129k | out->type = Node::Type::DOUBLE; | 353 | 129k | error = value.get_double().get(out->double_value); | 354 | 129k | return !error; | 355 | 8 | case simdjson::ondemand::number_type::big_integer: { | 356 | 8 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { | 357 | 0 | return false; | 358 | 0 | } | 359 | 8 | out->type = Node::Type::BIG_INTEGER; | 360 | 8 | return true; | 361 | 8 | } | 362 | 945k | } | 363 | 0 | return false; | 364 | 945k | } |
_ZN5doris14SimdJSONParser17build_number_nodeIN8simdjson8fallback8ondemand8documentEEEbRT_PNS0_4NodeE Line | Count | Source | 327 | 125 | bool build_number_node(Value& value, Node* out) { | 328 | 125 | simdjson::ondemand::number_type number_type; | 329 | 125 | auto error = value.get_number_type().get(number_type); | 330 | 125 | if (error) { | 331 | 0 | return false; | 332 | 0 | } | 333 | 125 | switch (number_type) { | 334 | 58 | case simdjson::ondemand::number_type::signed_integer: | 335 | 58 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { | 336 | 0 | return false; | 337 | 0 | } | 338 | 58 | out->type = Node::Type::INT64; | 339 | 58 | error = value.get_int64().get(out->int64_value); | 340 | 58 | return !error; | 341 | 13 | case simdjson::ondemand::number_type::unsigned_integer: | 342 | 13 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { | 343 | 0 | return false; | 344 | 0 | } | 345 | 13 | out->type = Node::Type::UINT64; | 346 | 13 | error = value.get_uint64().get(out->uint64_value); | 347 | 13 | return !error; | 348 | 51 | case simdjson::ondemand::number_type::floating_point_number: | 349 | 51 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { | 350 | 0 | return false; | 351 | 0 | } | 352 | 51 | out->type = Node::Type::DOUBLE; | 353 | 51 | error = value.get_double().get(out->double_value); | 354 | 51 | return !error; | 355 | 3 | case simdjson::ondemand::number_type::big_integer: { | 356 | 3 | if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { | 357 | 0 | return false; | 358 | 0 | } | 359 | 3 | out->type = Node::Type::BIG_INTEGER; | 360 | 3 | return true; | 361 | 3 | } | 362 | 125 | } | 363 | 0 | return false; | 364 | 125 | } |
|
365 | | |
366 | | template <typename Value> |
367 | 1.02M | bool build_string_node(Value& value, Node* out) { |
368 | 1.02M | std::string_view str; |
369 | 1.02M | auto error = value.get_string().get(str); |
370 | 1.02M | if (error) { |
371 | 0 | return false; |
372 | 0 | } |
373 | 1.02M | out->type = Node::Type::STRING; |
374 | 1.02M | out->string_value = std::string(str); |
375 | 1.02M | return true; |
376 | 1.02M | } _ZN5doris14SimdJSONParser17build_string_nodeIN8simdjson8fallback8ondemand5valueEEEbRT_PNS0_4NodeE Line | Count | Source | 367 | 1.02M | bool build_string_node(Value& value, Node* out) { | 368 | 1.02M | std::string_view str; | 369 | 1.02M | auto error = value.get_string().get(str); | 370 | 1.02M | if (error) { | 371 | 0 | return false; | 372 | 0 | } | 373 | 1.02M | out->type = Node::Type::STRING; | 374 | 1.02M | out->string_value = std::string(str); | 375 | 1.02M | return true; | 376 | 1.02M | } |
_ZN5doris14SimdJSONParser17build_string_nodeIN8simdjson8fallback8ondemand8documentEEEbRT_PNS0_4NodeE Line | Count | Source | 367 | 256 | bool build_string_node(Value& value, Node* out) { | 368 | 256 | std::string_view str; | 369 | 256 | auto error = value.get_string().get(str); | 370 | 256 | if (error) { | 371 | 0 | return false; | 372 | 0 | } | 373 | 256 | out->type = Node::Type::STRING; | 374 | 256 | out->string_value = std::string(str); | 375 | 256 | return true; | 376 | 256 | } |
|
377 | | |
378 | | template <typename Value> |
379 | 2.25M | bool build_node(Value& value, Node* out) { |
380 | 2.25M | simdjson::ondemand::json_type type; |
381 | 2.25M | auto error = value.type().get(type); |
382 | 2.25M | if (error) { |
383 | 0 | return false; |
384 | 0 | } |
385 | 2.25M | switch (type) { |
386 | 55.9k | case simdjson::ondemand::json_type::array: |
387 | 55.9k | return build_array_node(value, out); |
388 | 189k | case simdjson::ondemand::json_type::object: |
389 | 189k | return build_object_node(value, out); |
390 | 945k | case simdjson::ondemand::json_type::number: |
391 | 945k | return build_number_node(value, out); |
392 | 1.02M | case simdjson::ondemand::json_type::string: { |
393 | 1.02M | return build_string_node(value, out); |
394 | 0 | } |
395 | 24.5k | case simdjson::ondemand::json_type::boolean: |
396 | 24.5k | out->type = Node::Type::BOOL; |
397 | 24.5k | error = value.get_bool().get(out->bool_value); |
398 | 24.5k | return !error; |
399 | 15.3k | case simdjson::ondemand::json_type::null: |
400 | 15.3k | out->type = Node::Type::NULL_VALUE; |
401 | 15.3k | return true; |
402 | 2.25M | } |
403 | 0 | return false; |
404 | 2.25M | } _ZN5doris14SimdJSONParser10build_nodeIN8simdjson8fallback8ondemand8documentEEEbRT_PNS0_4NodeE Line | Count | Source | 379 | 80.6k | bool build_node(Value& value, Node* out) { | 380 | 80.6k | simdjson::ondemand::json_type type; | 381 | 80.6k | auto error = value.type().get(type); | 382 | 80.6k | if (error) { | 383 | 0 | return false; | 384 | 0 | } | 385 | 80.6k | switch (type) { | 386 | 1.41k | case simdjson::ondemand::json_type::array: | 387 | 1.41k | return build_array_node(value, out); | 388 | 78.8k | case simdjson::ondemand::json_type::object: | 389 | 78.8k | return build_object_node(value, out); | 390 | 125 | case simdjson::ondemand::json_type::number: | 391 | 125 | return build_number_node(value, out); | 392 | 256 | case simdjson::ondemand::json_type::string: { | 393 | 256 | return build_string_node(value, out); | 394 | 0 | } | 395 | 18 | case simdjson::ondemand::json_type::boolean: | 396 | 18 | out->type = Node::Type::BOOL; | 397 | 18 | error = value.get_bool().get(out->bool_value); | 398 | 18 | return !error; | 399 | 17 | case simdjson::ondemand::json_type::null: | 400 | 17 | out->type = Node::Type::NULL_VALUE; | 401 | 17 | return true; | 402 | 80.6k | } | 403 | 0 | return false; | 404 | 80.6k | } |
_ZN5doris14SimdJSONParser10build_nodeIN8simdjson8fallback8ondemand5valueEEEbRT_PNS0_4NodeE Line | Count | Source | 379 | 2.17M | bool build_node(Value& value, Node* out) { | 380 | 2.17M | simdjson::ondemand::json_type type; | 381 | 2.17M | auto error = value.type().get(type); | 382 | 2.17M | if (error) { | 383 | 0 | return false; | 384 | 0 | } | 385 | 2.17M | switch (type) { | 386 | 54.5k | case simdjson::ondemand::json_type::array: | 387 | 54.5k | return build_array_node(value, out); | 388 | 111k | case simdjson::ondemand::json_type::object: | 389 | 111k | return build_object_node(value, out); | 390 | 945k | case simdjson::ondemand::json_type::number: | 391 | 945k | return build_number_node(value, out); | 392 | 1.02M | case simdjson::ondemand::json_type::string: { | 393 | 1.02M | return build_string_node(value, out); | 394 | 0 | } | 395 | 24.4k | case simdjson::ondemand::json_type::boolean: | 396 | 24.4k | out->type = Node::Type::BOOL; | 397 | 24.4k | error = value.get_bool().get(out->bool_value); | 398 | 24.4k | return !error; | 399 | 15.3k | case simdjson::ondemand::json_type::null: | 400 | 15.3k | out->type = Node::Type::NULL_VALUE; | 401 | 15.3k | return true; | 402 | 2.17M | } | 403 | 0 | return false; | 404 | 2.17M | } |
|
405 | | |
406 | | simdjson::ondemand::parser ondemand_parser; |
407 | | Node root; |
408 | | }; |
409 | 161k | inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const { |
410 | 161k | assert(node != nullptr); |
411 | 161k | return Array(&node->array_values); |
412 | 161k | } |
413 | 190k | inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const { |
414 | 190k | assert(node != nullptr); |
415 | 190k | return Object(&node->object_keys, &node->object_values); |
416 | 190k | } |
417 | | |
418 | | } // namespace doris |