/root/doris/be/src/util/jsonb_document.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2014, Facebook, Inc. |
3 | | * All rights reserved. |
4 | | * |
5 | | * This source code is licensed under the BSD-style license found in the |
6 | | * LICENSE file in the root directory of this source tree. An additional grant |
7 | | * of patent rights can be found in the PATENTS file in the same directory. |
8 | | * |
9 | | */ |
10 | | |
11 | | /* |
12 | | * This header defines JsonbDocument, JsonbKeyValue, and various value classes |
13 | | * which are derived from JsonbValue, and a forward iterator for container |
14 | | * values - essentially everything that is related to JSONB binary data |
15 | | * structures. |
16 | | * |
17 | | * Implementation notes: |
18 | | * |
19 | | * None of the classes in this header file can be instantiated directly (i.e. |
20 | | * you cannot create a JsonbKeyValue or JsonbValue object - all constructors |
21 | | * are declared non-public). We use the classes as wrappers on the packed JSONB |
22 | | * bytes (serialized), and cast the classes (types) to the underlying packed |
23 | | * byte array. |
24 | | * |
25 | | * For the same reason, we cannot define any JSONB value class to be virtual, |
26 | | * since we never call constructors, and will not instantiate vtbl and vptrs. |
27 | | * |
28 | | * Therefore, the classes are defined as packed structures (i.e. no data |
29 | | * alignment and padding), and the private member variables of the classes are |
30 | | * defined precisely in the same order as the JSONB spec. This ensures we |
31 | | * access the packed JSONB bytes correctly. |
32 | | * |
33 | | * The packed structures are highly optimized for in-place operations with low |
34 | | * overhead. The reads (and in-place writes) are performed directly on packed |
35 | | * bytes. There is no memory allocation at all at runtime. |
36 | | * |
37 | | * For updates/writes of values that will expand the original JSONB size, the |
38 | | * write will fail, and the caller needs to handle buffer increase. |
39 | | * |
40 | | * ** Iterator ** |
41 | | * Both ObjectVal class and ArrayVal class have iterator type that you can use |
42 | | * to declare an iterator on a container object to go through the key-value |
43 | | * pairs or value list. The iterator has both non-const and const types. |
44 | | * |
45 | | * Note: iterators are forward direction only. |
46 | | * |
47 | | * ** Query ** |
48 | | * Querying into containers is through the member functions find (for key/value |
49 | | * pairs) and get (for array elements), and is in streaming style. We don't |
50 | | * need to read/scan the whole JSONB packed bytes in order to return results. |
51 | | * Once the key/index is found, we will stop search. You can use text to query |
52 | | * both objects and array (for array, text will be converted to integer index), |
53 | | * and use index to retrieve from array. Array index is 0-based. |
54 | | * |
55 | | * ** External dictionary ** |
56 | | * During query processing, you can also pass a call-back function, so the |
57 | | * search will first try to check if the key string exists in the dictionary. |
58 | | * If so, search will be based on the id instead of the key string. |
59 | | * @author Tian Xia <tianx@fb.com> |
60 | | * |
61 | | * this file is copied from |
62 | | * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonDocument.h |
63 | | * and modified by Doris |
64 | | */ |
65 | | |
66 | | #ifndef JSONB_JSONBDOCUMENT_H |
67 | | #define JSONB_JSONBDOCUMENT_H |
68 | | |
69 | | #include <algorithm> |
70 | | #include <cctype> |
71 | | #include <charconv> |
72 | | #include <cstddef> |
73 | | #include <cstdint> |
74 | | #include <string> |
75 | | #include <string_view> |
76 | | #include <type_traits> |
77 | | |
78 | | #include "common/compiler_util.h" // IWYU pragma: keep |
79 | | #include "common/status.h" |
80 | | #include "runtime/define_primitive_type.h" |
81 | | #include "util/string_util.h" |
82 | | #include "vec/core/types.h" |
83 | | |
84 | | // #include "util/string_parser.hpp" |
85 | | |
86 | | // Concept to check for supported decimal types |
87 | | template <typename T> |
88 | | concept JsonbDecimalType = std::same_as<T, doris::vectorized::Decimal256> || |
89 | | std::same_as<T, doris::vectorized::Decimal64> || |
90 | | std::same_as<T, doris::vectorized::Decimal128V3> || |
91 | | std::same_as<T, doris::vectorized::Decimal32>; |
92 | | |
93 | | namespace doris { |
94 | | |
95 | | template <typename T> |
96 | | constexpr bool is_pod_v = std::is_trivial_v<T> && std::is_standard_layout_v<T>; |
97 | | |
98 | | struct JsonbStringVal; |
99 | | struct ObjectVal; |
100 | | struct ArrayVal; |
101 | | struct JsonbBinaryVal; |
102 | | struct ContainerVal; |
103 | | |
104 | | template <JsonbDecimalType T> |
105 | | struct JsonbDecimalVal; |
106 | | |
107 | | using JsonbDecimal256 = JsonbDecimalVal<vectorized::Decimal256>; |
108 | | using JsonbDecimal128 = JsonbDecimalVal<vectorized::Decimal128V3>; |
109 | | using JsonbDecimal64 = JsonbDecimalVal<vectorized::Decimal64>; |
110 | | using JsonbDecimal32 = JsonbDecimalVal<vectorized::Decimal32>; |
111 | | |
112 | | template <typename T> |
113 | | requires std::is_integral_v<T> || std::is_floating_point_v<T> |
114 | | struct NumberValT; |
115 | | |
116 | | using JsonbInt8Val = NumberValT<int8_t>; |
117 | | using JsonbInt16Val = NumberValT<int16_t>; |
118 | | using JsonbInt32Val = NumberValT<int32_t>; |
119 | | using JsonbInt64Val = NumberValT<int64_t>; |
120 | | using JsonbInt128Val = NumberValT<int128_t>; |
121 | | using JsonbDoubleVal = NumberValT<double>; |
122 | | using JsonbFloatVal = NumberValT<float>; |
123 | | |
124 | | template <typename T> |
125 | | concept JsonbPodType = (std::same_as<T, JsonbStringVal> || std::same_as<T, ObjectVal> || |
126 | | std::same_as<T, ContainerVal> || std::same_as<T, ArrayVal> || |
127 | | std::same_as<T, JsonbBinaryVal> || std::same_as<T, JsonbDecimal32> || |
128 | | std::same_as<T, JsonbDecimal64> || std::same_as<T, JsonbDecimal128> || |
129 | | std::same_as<T, JsonbDecimal256> || std::same_as<T, JsonbDecimal32> || |
130 | | std::same_as<T, JsonbInt8Val> || std::same_as<T, JsonbInt16Val> || |
131 | | std::same_as<T, JsonbInt32Val> || std::same_as<T, JsonbInt64Val> || |
132 | | std::same_as<T, JsonbInt128Val> || std::same_as<T, JsonbFloatVal> || |
133 | | std::same_as<T, JsonbFloatVal> || std::same_as<T, JsonbDoubleVal>); |
134 | | |
135 | 208k | #define JSONB_VER 1 |
136 | | |
137 | | using int128_t = __int128; |
138 | | |
139 | | // forward declaration |
140 | | struct JsonbValue; |
141 | | |
142 | | class JsonbOutStream; |
143 | | |
144 | | template <class OS_TYPE> |
145 | | class JsonbWriterT; |
146 | | |
147 | | using JsonbWriter = JsonbWriterT<JsonbOutStream>; |
148 | | |
149 | | const int MaxNestingLevel = 100; |
150 | | |
151 | | /* |
152 | | * JsonbType defines 10 primitive types and 2 container types, as described |
153 | | * below. |
154 | | * NOTE: Do NOT modify the existing values or their order in this enum. |
155 | | * You may only append new entries at the end before `NUM_TYPES`. |
156 | | * This enum will be used in serialized data and/or persisted data. |
157 | | * Changing existing values may break backward compatibility |
158 | | * with previously stored or transmitted data. |
159 | | * |
160 | | * primitive_value ::= |
161 | | * 0x00 //null value (0 byte) |
162 | | * | 0x01 //boolean true (0 byte) |
163 | | * | 0x02 //boolean false (0 byte) |
164 | | * | 0x03 int8 //char/int8 (1 byte) |
165 | | * | 0x04 int16 //int16 (2 bytes) |
166 | | * | 0x05 int32 //int32 (4 bytes) |
167 | | * | 0x06 int64 //int64 (8 bytes) |
168 | | * | 0x07 double //floating point (8 bytes) |
169 | | * | 0x08 string //variable length string |
170 | | * | 0x09 binary //variable length binary |
171 | | * |
172 | | * container ::= |
173 | | * 0x0A int32 key_value_list //object, int32 is the total bytes of the object |
174 | | * | 0x0B int32 value_list //array, int32 is the total bytes of the array |
175 | | */ |
176 | | enum class JsonbType : char { |
177 | | T_Null = 0x00, |
178 | | T_True = 0x01, |
179 | | T_False = 0x02, |
180 | | T_Int8 = 0x03, |
181 | | T_Int16 = 0x04, |
182 | | T_Int32 = 0x05, |
183 | | T_Int64 = 0x06, |
184 | | T_Double = 0x07, |
185 | | T_String = 0x08, |
186 | | T_Binary = 0x09, |
187 | | T_Object = 0x0A, |
188 | | T_Array = 0x0B, |
189 | | T_Int128 = 0x0C, |
190 | | T_Float = 0x0D, |
191 | | T_Decimal32 = 0x0E, // DecimalV3 only |
192 | | T_Decimal64 = 0x0F, // DecimalV3 only |
193 | | T_Decimal128 = 0x10, // DecimalV3 only |
194 | | T_Decimal256 = 0x11, // DecimalV3 only |
195 | | NUM_TYPES, |
196 | | }; |
197 | | |
198 | 11 | inline PrimitiveType get_primitive_type_from_json_type(JsonbType json_type) { |
199 | 11 | switch (json_type) { |
200 | 1 | case JsonbType::T_Null: |
201 | 1 | return TYPE_NULL; |
202 | 1 | case JsonbType::T_True: |
203 | 2 | case JsonbType::T_False: |
204 | 2 | return TYPE_BOOLEAN; |
205 | 0 | case JsonbType::T_Int8: |
206 | 0 | return TYPE_TINYINT; |
207 | 0 | case JsonbType::T_Int16: |
208 | 0 | return TYPE_SMALLINT; |
209 | 0 | case JsonbType::T_Int32: |
210 | 0 | return TYPE_INT; |
211 | 0 | case JsonbType::T_Int64: |
212 | 0 | return TYPE_BIGINT; |
213 | 0 | case JsonbType::T_Double: |
214 | 0 | return TYPE_DOUBLE; |
215 | 1 | case JsonbType::T_String: |
216 | 1 | return TYPE_STRING; |
217 | 0 | case JsonbType::T_Binary: |
218 | 0 | return TYPE_BINARY; |
219 | 0 | case JsonbType::T_Object: |
220 | 0 | return TYPE_STRUCT; |
221 | 1 | case JsonbType::T_Array: |
222 | 1 | return TYPE_ARRAY; |
223 | 1 | case JsonbType::T_Int128: |
224 | 1 | return TYPE_LARGEINT; |
225 | 1 | case JsonbType::T_Float: |
226 | 1 | return TYPE_FLOAT; |
227 | 1 | case JsonbType::T_Decimal32: |
228 | 1 | return TYPE_DECIMAL32; |
229 | 1 | case JsonbType::T_Decimal64: |
230 | 1 | return TYPE_DECIMAL64; |
231 | 1 | case JsonbType::T_Decimal128: |
232 | 1 | return TYPE_DECIMAL128I; |
233 | 1 | case JsonbType::T_Decimal256: |
234 | 1 | return TYPE_DECIMAL256; |
235 | 0 | default: |
236 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Unsupported JsonbType: {}", |
237 | 0 | static_cast<int>(json_type)); |
238 | 11 | } |
239 | 11 | } |
240 | | |
241 | | //for parse json path |
242 | | constexpr char SCOPE = '$'; |
243 | | constexpr char BEGIN_MEMBER = '.'; |
244 | | constexpr char BEGIN_ARRAY = '['; |
245 | | constexpr char END_ARRAY = ']'; |
246 | | constexpr char DOUBLE_QUOTE = '"'; |
247 | | constexpr char WILDCARD = '*'; |
248 | | constexpr char MINUS = '-'; |
249 | | constexpr char LAST[] = "last"; |
250 | | constexpr char ESCAPE = '\\'; |
251 | | constexpr unsigned int MEMBER_CODE = 0; |
252 | | constexpr unsigned int ARRAY_CODE = 1; |
253 | | |
254 | | /// A simple input stream class for the JSON path parser. |
255 | | class Stream { |
256 | | public: |
257 | | /// Creates an input stream reading from a character string. |
258 | | /// @param string the input string |
259 | | /// @param length the length of the input string |
260 | 96 | Stream(const char* string, size_t length) : m_position(string), m_end(string + length) {} |
261 | | |
262 | | /// Returns a pointer to the current position in the stream. |
263 | 90 | const char* position() const { return m_position; } |
264 | | |
265 | | /// Returns a pointer to the position just after the end of the stream. |
266 | 0 | const char* end() const { return m_end; } |
267 | | |
268 | | /// Returns the number of bytes remaining in the stream. |
269 | 1.71k | size_t remaining() const { |
270 | 1.71k | assert(m_position <= m_end); |
271 | 1.71k | return m_end - m_position; |
272 | 1.71k | } |
273 | | |
274 | | /// Tells if the stream has been exhausted. |
275 | 1.56k | bool exhausted() const { return remaining() == 0; } |
276 | | |
277 | | /// Reads the next byte from the stream and moves the position forward. |
278 | 96 | char read() { |
279 | 96 | assert(!exhausted()); |
280 | 96 | return *m_position++; |
281 | 96 | } |
282 | | |
283 | | /// Reads the next byte from the stream without moving the position forward. |
284 | 796 | char peek() const { |
285 | 796 | assert(!exhausted()); |
286 | 796 | return *m_position; |
287 | 796 | } |
288 | | |
289 | | /// Moves the position to the next non-whitespace character. |
290 | 334 | void skip_whitespace() { |
291 | 334 | m_position = std::find_if_not(m_position, m_end, [](char c) { return std::isspace(c); }); |
292 | 334 | } |
293 | | |
294 | | /// Moves the position n bytes forward. |
295 | 148 | void skip(size_t n) { |
296 | 148 | assert(remaining() >= n); |
297 | 148 | m_position += n; |
298 | 148 | skip_whitespace(); |
299 | 148 | } |
300 | | |
301 | 122 | void advance() { m_position++; } |
302 | | |
303 | 180 | void clear_leg_ptr() { leg_ptr = nullptr; } |
304 | | |
305 | 90 | void set_leg_ptr(char* ptr) { |
306 | 90 | clear_leg_ptr(); |
307 | 90 | leg_ptr = ptr; |
308 | 90 | } |
309 | | |
310 | 122 | char* get_leg_ptr() { return leg_ptr; } |
311 | | |
312 | 90 | void clear_leg_len() { leg_len = 0; } |
313 | | |
314 | 122 | void add_leg_len() { leg_len++; } |
315 | | |
316 | 180 | unsigned int get_leg_len() const { return leg_len; } |
317 | | |
318 | 0 | void remove_escapes() { |
319 | 0 | int new_len = 0; |
320 | 0 | for (int i = 0; i < leg_len; i++) { |
321 | 0 | if (leg_ptr[i] != '\\') { |
322 | 0 | leg_ptr[new_len++] = leg_ptr[i]; |
323 | 0 | } |
324 | 0 | } |
325 | 0 | leg_ptr[new_len] = '\0'; |
326 | 0 | leg_len = new_len; |
327 | 0 | } |
328 | | |
329 | 0 | void set_has_escapes(bool has) { has_escapes = has; } |
330 | | |
331 | 32 | bool get_has_escapes() const { return has_escapes; } |
332 | | |
333 | | private: |
334 | | /// The current position in the stream. |
335 | | const char* m_position = nullptr; |
336 | | |
337 | | /// The end of the stream. |
338 | | const char* const m_end; |
339 | | |
340 | | ///path leg ptr |
341 | | char* leg_ptr = nullptr; |
342 | | |
343 | | ///path leg len |
344 | | unsigned int leg_len; |
345 | | |
346 | | ///Whether to contain escape characters |
347 | | bool has_escapes = false; |
348 | | }; |
349 | | |
350 | | struct leg_info { |
351 | | ///path leg ptr |
352 | | char* leg_ptr = nullptr; |
353 | | |
354 | | ///path leg len |
355 | | unsigned int leg_len; |
356 | | |
357 | | ///array_index |
358 | | int array_index; |
359 | | |
360 | | ///type: 0 is member 1 is array |
361 | | unsigned int type; |
362 | | |
363 | 0 | bool to_string(std::string* str) const { |
364 | 0 | if (type == MEMBER_CODE) { |
365 | 0 | str->push_back(BEGIN_MEMBER); |
366 | 0 | bool contains_space = false; |
367 | 0 | std::string tmp; |
368 | 0 | for (auto* it = leg_ptr; it != (leg_ptr + leg_len); ++it) { |
369 | 0 | if (std::isspace(*it)) { |
370 | 0 | contains_space = true; |
371 | 0 | } else if (*it == '"' || *it == ESCAPE || *it == '\r' || *it == '\n' || |
372 | 0 | *it == '\b' || *it == '\t') { |
373 | 0 | tmp.push_back(ESCAPE); |
374 | 0 | } |
375 | 0 | tmp.push_back(*it); |
376 | 0 | } |
377 | 0 | if (contains_space) { |
378 | 0 | str->push_back(DOUBLE_QUOTE); |
379 | 0 | } |
380 | 0 | str->append(tmp); |
381 | 0 | if (contains_space) { |
382 | 0 | str->push_back(DOUBLE_QUOTE); |
383 | 0 | } |
384 | 0 | return true; |
385 | 0 | } else if (type == ARRAY_CODE) { |
386 | 0 | str->push_back(BEGIN_ARRAY); |
387 | 0 | std::string int_str = std::to_string(array_index); |
388 | 0 | str->append(int_str); |
389 | 0 | str->push_back(END_ARRAY); |
390 | 0 | return true; |
391 | 0 | } else { |
392 | 0 | return false; |
393 | 0 | } |
394 | 0 | } |
395 | | }; |
396 | | |
397 | | class JsonbPath { |
398 | | public: |
399 | | // parse json path |
400 | | static bool parsePath(Stream* stream, JsonbPath* path); |
401 | | |
402 | | static bool parse_array(Stream* stream, JsonbPath* path); |
403 | | static bool parse_member(Stream* stream, JsonbPath* path); |
404 | | |
405 | | //return true if json path valid else return false |
406 | | bool seek(const char* string, size_t length); |
407 | | |
408 | 90 | void add_leg_to_leg_vector(std::unique_ptr<leg_info> leg) { |
409 | 90 | leg_vector.emplace_back(leg.release()); |
410 | 90 | } |
411 | | |
412 | 0 | void pop_leg_from_leg_vector() { leg_vector.pop_back(); } |
413 | | |
414 | 0 | bool to_string(std::string* res) const { |
415 | 0 | res->push_back(SCOPE); |
416 | 0 | for (const auto& leg : leg_vector) { |
417 | 0 | auto valid = leg->to_string(res); |
418 | 0 | if (!valid) { |
419 | 0 | return false; |
420 | 0 | } |
421 | 0 | } |
422 | 0 | return true; |
423 | 0 | } |
424 | | |
425 | 185 | size_t get_leg_vector_size() const { return leg_vector.size(); } |
426 | | |
427 | 295 | leg_info* get_leg_from_leg_vector(size_t i) const { return leg_vector[i].get(); } |
428 | | |
429 | 0 | bool is_wildcard() const { return _is_wildcard; } |
430 | 95 | bool is_supper_wildcard() const { return _is_supper_wildcard; } |
431 | | |
432 | 6 | void clean() { leg_vector.clear(); } |
433 | | |
434 | | private: |
435 | | std::vector<std::unique_ptr<leg_info>> leg_vector; |
436 | | bool _is_wildcard = false; // whether the path is a wildcard path |
437 | | bool _is_supper_wildcard = false; // supper wildcard likes '$**.a' or '$**[1]' |
438 | | }; |
439 | | |
440 | | /* |
441 | | * JsonbFwdIteratorT implements JSONB's iterator template. |
442 | | * |
443 | | * Note: it is an FORWARD iterator only due to the design of JSONB format. |
444 | | */ |
445 | | template <class Iter_Type, class Cont_Type> |
446 | | class JsonbFwdIteratorT { |
447 | | public: |
448 | | using iterator = Iter_Type; |
449 | | using pointer = typename std::iterator_traits<Iter_Type>::pointer; |
450 | | using reference = typename std::iterator_traits<Iter_Type>::reference; |
451 | | |
452 | | explicit JsonbFwdIteratorT() : current_(nullptr) {} |
453 | 20.2k | explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {} _ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEC2ERKS3_ Line | Count | Source | 453 | 20.0k | explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {} |
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEC2ERKS3_ Line | Count | Source | 453 | 135 | explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {} |
_ZN5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEC2ERKS2_ Line | Count | Source | 453 | 50 | explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {} |
|
454 | | |
455 | | // allow non-const to const iterator conversion (same container type) |
456 | | template <class Iter_Ty> |
457 | | JsonbFwdIteratorT(const JsonbFwdIteratorT<Iter_Ty, Cont_Type>& rhs) : current_(rhs.base()) {} |
458 | | |
459 | 20.5k | bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); } _ZNK5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEeqERKS4_ Line | Count | Source | 459 | 25 | bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEeqERKS5_ Line | Count | Source | 459 | 19.0k | bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEeqERKS5_ Line | Count | Source | 459 | 1.43k | bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); } |
|
460 | | |
461 | 19.9k | bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); } _ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEneERKS5_ Line | Count | Source | 461 | 18.5k | bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEneERKS5_ Line | Count | Source | 461 | 1.37k | bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); } |
|
462 | | |
463 | 1.14k | bool operator<(const JsonbFwdIteratorT& rhs) const { return (current_ < rhs.current_); } |
464 | | |
465 | | bool operator>(const JsonbFwdIteratorT& rhs) const { return !operator<(rhs); } |
466 | | |
467 | 18.1k | JsonbFwdIteratorT& operator++() { |
468 | 18.1k | current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); |
469 | 18.1k | return *this; |
470 | 18.1k | } _ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEppEv Line | Count | Source | 467 | 17.4k | JsonbFwdIteratorT& operator++() { | 468 | 17.4k | current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); | 469 | 17.4k | return *this; | 470 | 17.4k | } |
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEppEv Line | Count | Source | 467 | 653 | JsonbFwdIteratorT& operator++() { | 468 | 653 | current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); | 469 | 653 | return *this; | 470 | 653 | } |
|
471 | | |
472 | | JsonbFwdIteratorT operator++(int) { |
473 | | auto tmp = *this; |
474 | | current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); |
475 | | return tmp; |
476 | | } |
477 | | |
478 | 653 | explicit operator pointer() { return current_; } |
479 | | |
480 | 0 | reference operator*() const { return *current_; } Unexecuted instantiation: _ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEdeEv Unexecuted instantiation: _ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEdeEv |
481 | | |
482 | 28.6k | pointer operator->() const { return current_; } _ZNK5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEptEv Line | Count | Source | 482 | 23 | pointer operator->() const { return current_; } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEptEv Line | Count | Source | 482 | 28.6k | pointer operator->() const { return current_; } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEptEv Line | Count | Source | 482 | 3 | pointer operator->() const { return current_; } |
|
483 | | |
484 | 0 | iterator base() const { return current_; } |
485 | | |
486 | | private: |
487 | | iterator current_; |
488 | | }; |
489 | | |
490 | | using hDictInsert = int (*)(const char*, unsigned int); |
491 | | using hDictFind = int (*)(const char*, unsigned int); |
492 | | |
493 | | using JsonbTypeUnder = std::underlying_type_t<JsonbType>; |
494 | | |
495 | | #if defined(__clang__) |
496 | | #pragma clang diagnostic push |
497 | | #pragma clang diagnostic ignored "-Wzero-length-array" |
498 | | #endif |
499 | | #pragma pack(push, 1) |
500 | | |
501 | | /* |
502 | | * JsonbDocument is the main object that accesses and queries JSONB packed |
503 | | * bytes. NOTE: JsonbDocument only allows object container as the top level |
504 | | * JSONB value. However, you can use the static method "createValue" to get any |
505 | | * JsonbValue object from the packed bytes. |
506 | | * |
507 | | * JsonbDocument object also dereferences to an object container value |
508 | | * (ObjectVal) once JSONB is loaded. |
509 | | * |
510 | | * ** Load ** |
511 | | * JsonbDocument is usable after loading packed bytes (memory location) into |
512 | | * the object. We only need the header and first few bytes of the payload after |
513 | | * header to verify the JSONB. |
514 | | * |
515 | | * Note: creating an JsonbDocument (through createDocument) does not allocate |
516 | | * any memory. The document object is an efficient wrapper on the packed bytes |
517 | | * which is accessed directly. |
518 | | * |
519 | | * ** Query ** |
520 | | * Query is through dereferencing into ObjectVal. |
521 | | */ |
522 | | class JsonbDocument { |
523 | | public: |
524 | | // create an JsonbDocument object from JSONB packed bytes |
525 | | [[nodiscard]] static Status checkAndCreateDocument(const char* pb, size_t size, |
526 | | JsonbDocument** doc); |
527 | | |
528 | | // create an JsonbValue from JSONB packed bytes |
529 | | static JsonbValue* createValue(const char* pb, size_t size); |
530 | | |
531 | 0 | uint8_t version() const { return header_.ver_; } |
532 | | |
533 | 26.6k | JsonbValue* getValue() { return ((JsonbValue*)payload_); } |
534 | | |
535 | | void setValue(const JsonbValue* value); |
536 | | |
537 | | unsigned int numPackedBytes() const; |
538 | | |
539 | | // ObjectVal* operator->(); |
540 | | |
541 | | const ObjectVal* operator->() const; |
542 | | |
543 | | private: |
544 | | /* |
545 | | * JsonbHeader class defines JSONB header (internal to JsonbDocument). |
546 | | * |
547 | | * Currently it only contains version information (1-byte). We may expand the |
548 | | * header to include checksum of the JSONB binary for more security. |
549 | | */ |
550 | | struct JsonbHeader { |
551 | | uint8_t ver_; |
552 | | } header_; |
553 | | |
554 | | char payload_[0]; |
555 | | }; |
556 | | |
557 | | /* |
558 | | * JsonbKeyValue class defines JSONB key type, as described below. |
559 | | * |
560 | | * key ::= |
561 | | * 0x00 int8 //1-byte dictionary id |
562 | | * | int8 (byte*) //int8 (>0) is the size of the key string |
563 | | * |
564 | | * value ::= primitive_value | container |
565 | | * |
566 | | * JsonbKeyValue can be either an id mapping to the key string in an external |
567 | | * dictionary, or it is the original key string. Whether to read an id or a |
568 | | * string is decided by the first byte (size). |
569 | | * |
570 | | * Note: a key object must be followed by a value object. Therefore, a key |
571 | | * object implicitly refers to a key-value pair, and you can get the value |
572 | | * object right after the key object. The function numPackedBytes hence |
573 | | * indicates the total size of the key-value pair, so that we will be able go |
574 | | * to next pair from the key. |
575 | | * |
576 | | * ** Dictionary size ** |
577 | | * By default, the dictionary size is 255 (1-byte). Users can define |
578 | | * "USE_LARGE_DICT" to increase the dictionary size to 655535 (2-byte). |
579 | | */ |
580 | | class JsonbKeyValue { |
581 | | public: |
582 | | // now we use sMaxKeyId to represent an empty key |
583 | | static const int sMaxKeyId = 65535; |
584 | | using keyid_type = uint16_t; |
585 | | |
586 | | static const uint8_t sMaxKeyLen = 64; |
587 | | |
588 | | // size of the key. 0 indicates it is stored as id |
589 | 1.30k | uint8_t klen() const { return size; } |
590 | | |
591 | | // get the key string. Note the string may not be null terminated. |
592 | 663 | const char* getKeyStr() const { return key.str_; } |
593 | | |
594 | 9.22k | keyid_type getKeyId() const { return key.id_; } |
595 | | |
596 | 34.9k | unsigned int keyPackedBytes() const { |
597 | 34.9k | return size ? (sizeof(size) + size) : (sizeof(size) + sizeof(keyid_type)); |
598 | 34.9k | } |
599 | | |
600 | 17.4k | JsonbValue* value() const { return (JsonbValue*)(((char*)this) + keyPackedBytes()); } |
601 | | |
602 | | // size of the total packed bytes (key+value) |
603 | | unsigned int numPackedBytes() const; |
604 | | |
605 | | uint8_t size; |
606 | | |
607 | | union key_ { |
608 | | keyid_type id_; |
609 | | char str_[1]; |
610 | | } key; |
611 | | }; |
612 | | |
613 | | struct JsonbFindResult { |
614 | | const JsonbValue* value = nullptr; // found value |
615 | | std::unique_ptr<JsonbWriter> writer; // writer to write the value |
616 | | bool is_wildcard = false; // whether the path is a wildcard path |
617 | | }; |
618 | | |
619 | | /* |
620 | | * JsonbValue is the base class of all JSONB types. It contains only one member |
621 | | * variable - type info, which can be retrieved by member functions is[Type]() |
622 | | * or type(). |
623 | | */ |
624 | | struct JsonbValue { |
625 | | static const uint32_t sMaxValueLen = 1 << 24; // 16M |
626 | | |
627 | 4.22k | bool isNull() const { return (type == JsonbType::T_Null); } |
628 | 21 | bool isTrue() const { return (type == JsonbType::T_True); } |
629 | 1 | bool isFalse() const { return (type == JsonbType::T_False); } |
630 | 4 | bool isInt() const { return isInt8() || isInt16() || isInt32() || isInt64() || isInt128(); } |
631 | 4 | bool isInt8() const { return (type == JsonbType::T_Int8); } |
632 | 2 | bool isInt16() const { return (type == JsonbType::T_Int16); } |
633 | 0 | bool isInt32() const { return (type == JsonbType::T_Int32); } |
634 | 3 | bool isInt64() const { return (type == JsonbType::T_Int64); } |
635 | 1 | bool isDouble() const { return (type == JsonbType::T_Double); } |
636 | 1 | bool isFloat() const { return (type == JsonbType::T_Float); } |
637 | 33 | bool isString() const { return (type == JsonbType::T_String); } |
638 | 1.07k | bool isBinary() const { return (type == JsonbType::T_Binary); } |
639 | 6 | bool isObject() const { return (type == JsonbType::T_Object); } |
640 | 7 | bool isArray() const { return (type == JsonbType::T_Array); } |
641 | 3 | bool isInt128() const { return (type == JsonbType::T_Int128); } |
642 | 4 | bool isDecimal() const { |
643 | 4 | return (type == JsonbType::T_Decimal32 || type == JsonbType::T_Decimal64 || |
644 | 4 | type == JsonbType::T_Decimal128 || type == JsonbType::T_Decimal256); |
645 | 4 | } |
646 | 1 | bool isDecimal32() const { return (type == JsonbType::T_Decimal32); } |
647 | 1 | bool isDecimal64() const { return (type == JsonbType::T_Decimal64); } |
648 | 1 | bool isDecimal128() const { return (type == JsonbType::T_Decimal128); } |
649 | 1 | bool isDecimal256() const { return (type == JsonbType::T_Decimal256); } |
650 | | |
651 | 11 | PrimitiveType get_primitive_type() const { return get_primitive_type_from_json_type(type); } |
652 | | |
653 | 0 | const char* typeName() const { |
654 | 0 | switch (type) { |
655 | 0 | case JsonbType::T_Null: |
656 | 0 | return "null"; |
657 | 0 | case JsonbType::T_True: |
658 | 0 | case JsonbType::T_False: |
659 | 0 | return "bool"; |
660 | 0 | case JsonbType::T_Int8: |
661 | 0 | case JsonbType::T_Int16: |
662 | 0 | case JsonbType::T_Int32: |
663 | 0 | return "int"; |
664 | 0 | case JsonbType::T_Int64: |
665 | 0 | return "bigint"; |
666 | 0 | case JsonbType::T_Int128: |
667 | 0 | return "largeint"; |
668 | 0 | case JsonbType::T_Double: |
669 | 0 | return "double"; |
670 | 0 | case JsonbType::T_Float: |
671 | 0 | return "float"; |
672 | 0 | case JsonbType::T_String: |
673 | 0 | return "string"; |
674 | 0 | case JsonbType::T_Binary: |
675 | 0 | return "binary"; |
676 | 0 | case JsonbType::T_Object: |
677 | 0 | return "object"; |
678 | 0 | case JsonbType::T_Array: |
679 | 0 | return "array"; |
680 | 0 | case JsonbType::T_Decimal32: |
681 | 0 | return "Decimal32"; |
682 | 0 | case JsonbType::T_Decimal64: |
683 | 0 | return "Decimal64"; |
684 | 0 | case JsonbType::T_Decimal128: |
685 | 0 | return "Decimal128"; |
686 | 0 | case JsonbType::T_Decimal256: |
687 | 0 | return "Decimal256"; |
688 | 0 | default: |
689 | 0 | return "unknown"; |
690 | 0 | } |
691 | 0 | } |
692 | | |
693 | | // size of the total packed bytes |
694 | | unsigned int numPackedBytes() const; |
695 | | |
696 | | // size of the value in bytes |
697 | | unsigned int size() const; |
698 | | |
699 | | //Get the number of jsonbvalue elements |
700 | | int numElements() const; |
701 | | |
702 | | //Whether to include the jsonbvalue rhs |
703 | | bool contains(JsonbValue* rhs) const; |
704 | | |
705 | | // find the JSONB value by JsonbPath |
706 | | JsonbFindResult findValue(JsonbPath& path) const; |
707 | | friend class JsonbDocument; |
708 | | |
709 | | JsonbType type; // type info |
710 | | |
711 | | char payload[0]; // payload, which is the packed bytes of the value |
712 | | |
713 | | /** |
714 | | * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`. |
715 | | * |
716 | | * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept. |
717 | | * This ensures that `T` is trivially copyable, standard-layout, and safe to |
718 | | * reinterpret from raw bytes without invoking undefined behavior. |
719 | | * |
720 | | * @return A pointer to a `const T` object, interpreted from the internal buffer. |
721 | | * |
722 | | * @note The caller must ensure that the current JsonbValue actually contains data |
723 | | * compatible with type `T`, otherwise the result is undefined. |
724 | | */ |
725 | | template <JsonbPodType T> |
726 | 52.0k | const T* unpack() const { |
727 | 52.0k | static_assert(is_pod_v<T>, "T must be a POD type"); |
728 | 52.0k | return reinterpret_cast<const T*>(payload); |
729 | 52.0k | } _ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_9ObjectValEEEPKT_v Line | Count | Source | 726 | 19.5k | const T* unpack() const { | 727 | 19.5k | static_assert(is_pod_v<T>, "T must be a POD type"); | 728 | 19.5k | return reinterpret_cast<const T*>(payload); | 729 | 19.5k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIaEEEEPKT_v Line | Count | Source | 726 | 720 | const T* unpack() const { | 727 | 720 | static_assert(is_pod_v<T>, "T must be a POD type"); | 728 | 720 | return reinterpret_cast<const T*>(payload); | 729 | 720 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIsEEEEPKT_v Line | Count | Source | 726 | 103 | const T* unpack() const { | 727 | 103 | static_assert(is_pod_v<T>, "T must be a POD type"); | 728 | 103 | return reinterpret_cast<const T*>(payload); | 729 | 103 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIiEEEEPKT_v Line | Count | Source | 726 | 3.47k | const T* unpack() const { | 727 | 3.47k | static_assert(is_pod_v<T>, "T must be a POD type"); | 728 | 3.47k | return reinterpret_cast<const T*>(payload); | 729 | 3.47k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIlEEEEPKT_v Line | Count | Source | 726 | 1.87k | const T* unpack() const { | 727 | 1.87k | static_assert(is_pod_v<T>, "T must be a POD type"); | 728 | 1.87k | return reinterpret_cast<const T*>(payload); | 729 | 1.87k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTInEEEEPKT_v Line | Count | Source | 726 | 4.16k | const T* unpack() const { | 727 | 4.16k | static_assert(is_pod_v<T>, "T must be a POD type"); | 728 | 4.16k | return reinterpret_cast<const T*>(payload); | 729 | 4.16k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_14JsonbBinaryValEEEPKT_v Line | Count | Source | 726 | 19.6k | const T* unpack() const { | 727 | 19.6k | static_assert(is_pod_v<T>, "T must be a POD type"); | 728 | 19.6k | return reinterpret_cast<const T*>(payload); | 729 | 19.6k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_12ContainerValEEEPKT_v Line | Count | Source | 726 | 1.84k | const T* unpack() const { | 727 | 1.84k | static_assert(is_pod_v<T>, "T must be a POD type"); | 728 | 1.84k | return reinterpret_cast<const T*>(payload); | 729 | 1.84k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_8ArrayValEEEPKT_v Line | Count | Source | 726 | 101 | const T* unpack() const { | 727 | 101 | static_assert(is_pod_v<T>, "T must be a POD type"); | 728 | 101 | return reinterpret_cast<const T*>(payload); | 729 | 101 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIdEEEEPKT_v Line | Count | Source | 726 | 169 | const T* unpack() const { | 727 | 169 | static_assert(is_pod_v<T>, "T must be a POD type"); | 728 | 169 | return reinterpret_cast<const T*>(payload); | 729 | 169 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIfEEEEPKT_v Line | Count | Source | 726 | 25 | const T* unpack() const { | 727 | 25 | static_assert(is_pod_v<T>, "T must be a POD type"); | 728 | 25 | return reinterpret_cast<const T*>(payload); | 729 | 25 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_14JsonbStringValEEEPKT_v Line | Count | Source | 726 | 265 | const T* unpack() const { | 727 | 265 | static_assert(is_pod_v<T>, "T must be a POD type"); | 728 | 265 | return reinterpret_cast<const T*>(payload); | 729 | 265 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_10vectorized7DecimalIiEEEEEEPKT_v Line | Count | Source | 726 | 13 | const T* unpack() const { | 727 | 13 | static_assert(is_pod_v<T>, "T must be a POD type"); | 728 | 13 | return reinterpret_cast<const T*>(payload); | 729 | 13 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_10vectorized7DecimalIlEEEEEEPKT_v Line | Count | Source | 726 | 13 | const T* unpack() const { | 727 | 13 | static_assert(is_pod_v<T>, "T must be a POD type"); | 728 | 13 | return reinterpret_cast<const T*>(payload); | 729 | 13 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_10vectorized12Decimal128V3EEEEEPKT_v Line | Count | Source | 726 | 17 | const T* unpack() const { | 727 | 17 | static_assert(is_pod_v<T>, "T must be a POD type"); | 728 | 17 | return reinterpret_cast<const T*>(payload); | 729 | 17 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_10vectorized7DecimalIN4wide7integerILm256EiEEEEEEEEPKT_v Line | Count | Source | 726 | 13 | const T* unpack() const { | 727 | 13 | static_assert(is_pod_v<T>, "T must be a POD type"); | 728 | 13 | return reinterpret_cast<const T*>(payload); | 729 | 13 | } |
|
730 | | |
731 | | // /** |
732 | | // * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`. |
733 | | // * |
734 | | // * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept. |
735 | | // * This ensures that `T` is trivially copyable, standard-layout, and safe to |
736 | | // * reinterpret from raw bytes without invoking undefined behavior. |
737 | | // * |
738 | | // * @return A pointer to a `T` object, interpreted from the internal buffer. |
739 | | // * |
740 | | // * @note The caller must ensure that the current JsonbValue actually contains data |
741 | | // * compatible with type `T`, otherwise the result is undefined. |
742 | | // */ |
743 | | // template <JsonbPodType T> |
744 | | // T* unpack() { |
745 | | // static_assert(is_pod_v<T>, "T must be a POD type"); |
746 | | // return reinterpret_cast<T*>(payload); |
747 | | // } |
748 | | |
749 | | int128_t int_val() const; |
750 | | }; |
751 | | |
752 | | // inline ObjectVal* JsonbDocument::operator->() { |
753 | | // return (((JsonbValue*)payload_)->unpack<ObjectVal>()); |
754 | | // } |
755 | | |
756 | 19.0k | inline const ObjectVal* JsonbDocument::operator->() const { |
757 | 19.0k | return (((JsonbValue*)payload_)->unpack<ObjectVal>()); |
758 | 19.0k | } |
759 | | |
760 | | /* |
761 | | * NumerValT is the template class (derived from JsonbValue) of all number |
762 | | * types (integers and double). |
763 | | */ |
764 | | template <typename T> |
765 | | requires std::is_integral_v<T> || std::is_floating_point_v<T> |
766 | | struct NumberValT { |
767 | | public: |
768 | 10.5k | T val() const { return num; } _ZNK5doris10NumberValTIaE3valEv Line | Count | Source | 768 | 720 | T val() const { return num; } |
_ZNK5doris10NumberValTIsE3valEv Line | Count | Source | 768 | 103 | T val() const { return num; } |
_ZNK5doris10NumberValTIiE3valEv Line | Count | Source | 768 | 3.47k | T val() const { return num; } |
_ZNK5doris10NumberValTIlE3valEv Line | Count | Source | 768 | 1.87k | T val() const { return num; } |
_ZNK5doris10NumberValTInE3valEv Line | Count | Source | 768 | 4.16k | T val() const { return num; } |
_ZNK5doris10NumberValTIdE3valEv Line | Count | Source | 768 | 169 | T val() const { return num; } |
_ZNK5doris10NumberValTIfE3valEv Line | Count | Source | 768 | 25 | T val() const { return num; } |
|
769 | | |
770 | | static unsigned int numPackedBytes() { return sizeof(JsonbValue) + sizeof(T); } |
771 | | |
772 | | T num; |
773 | | }; |
774 | | |
775 | 9 | inline int128_t JsonbValue::int_val() const { |
776 | 9 | switch (type) { |
777 | 3 | case JsonbType::T_Int8: |
778 | 3 | return unpack<JsonbInt8Val>()->val(); |
779 | 1 | case JsonbType::T_Int16: |
780 | 1 | return unpack<JsonbInt16Val>()->val(); |
781 | 0 | case JsonbType::T_Int32: |
782 | 0 | return unpack<JsonbInt32Val>()->val(); |
783 | 3 | case JsonbType::T_Int64: |
784 | 3 | return unpack<JsonbInt64Val>()->val(); |
785 | 2 | case JsonbType::T_Int128: |
786 | 2 | return unpack<JsonbInt128Val>()->val(); |
787 | 0 | default: |
788 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
789 | 0 | static_cast<int32_t>(type)); |
790 | 9 | } |
791 | 9 | } |
792 | | |
793 | | template <JsonbDecimalType T> |
794 | | struct JsonbDecimalVal { |
795 | | public: |
796 | | using NativeType = typename T::NativeType; |
797 | | |
798 | | // get the decimal value |
799 | 26 | NativeType val() const { |
800 | | // to avoid memory alignment issues, we use memcpy to copy the value |
801 | 26 | NativeType tmp; |
802 | 26 | memcpy(&tmp, &value, sizeof(NativeType)); |
803 | 26 | return tmp; |
804 | 26 | } _ZNK5doris15JsonbDecimalValINS_10vectorized7DecimalIiEEE3valEv Line | Count | Source | 799 | 6 | NativeType val() const { | 800 | | // to avoid memory alignment issues, we use memcpy to copy the value | 801 | 6 | NativeType tmp; | 802 | 6 | memcpy(&tmp, &value, sizeof(NativeType)); | 803 | 6 | return tmp; | 804 | 6 | } |
_ZNK5doris15JsonbDecimalValINS_10vectorized7DecimalIlEEE3valEv Line | Count | Source | 799 | 6 | NativeType val() const { | 800 | | // to avoid memory alignment issues, we use memcpy to copy the value | 801 | 6 | NativeType tmp; | 802 | 6 | memcpy(&tmp, &value, sizeof(NativeType)); | 803 | 6 | return tmp; | 804 | 6 | } |
_ZNK5doris15JsonbDecimalValINS_10vectorized12Decimal128V3EE3valEv Line | Count | Source | 799 | 8 | NativeType val() const { | 800 | | // to avoid memory alignment issues, we use memcpy to copy the value | 801 | 8 | NativeType tmp; | 802 | 8 | memcpy(&tmp, &value, sizeof(NativeType)); | 803 | 8 | return tmp; | 804 | 8 | } |
_ZNK5doris15JsonbDecimalValINS_10vectorized7DecimalIN4wide7integerILm256EiEEEEE3valEv Line | Count | Source | 799 | 6 | NativeType val() const { | 800 | | // to avoid memory alignment issues, we use memcpy to copy the value | 801 | 6 | NativeType tmp; | 802 | 6 | memcpy(&tmp, &value, sizeof(NativeType)); | 803 | 6 | return tmp; | 804 | 6 | } |
|
805 | | |
806 | 29 | static constexpr int numPackedBytes() { |
807 | 29 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); |
808 | 29 | } _ZN5doris15JsonbDecimalValINS_10vectorized7DecimalIiEEE14numPackedBytesEv Line | Count | Source | 806 | 7 | static constexpr int numPackedBytes() { | 807 | 7 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); | 808 | 7 | } |
_ZN5doris15JsonbDecimalValINS_10vectorized7DecimalIlEEE14numPackedBytesEv Line | Count | Source | 806 | 7 | static constexpr int numPackedBytes() { | 807 | 7 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); | 808 | 7 | } |
_ZN5doris15JsonbDecimalValINS_10vectorized12Decimal128V3EE14numPackedBytesEv Line | Count | Source | 806 | 9 | static constexpr int numPackedBytes() { | 807 | 9 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); | 808 | 9 | } |
_ZN5doris15JsonbDecimalValINS_10vectorized7DecimalIN4wide7integerILm256EiEEEEE14numPackedBytesEv Line | Count | Source | 806 | 6 | static constexpr int numPackedBytes() { | 807 | 6 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); | 808 | 6 | } |
|
809 | | |
810 | | uint32_t precision; |
811 | | uint32_t scale; |
812 | | NativeType value; |
813 | | }; |
814 | | |
815 | | /* |
816 | | * BlobVal is the base class (derived from JsonbValue) for string and binary |
817 | | * types. The size indicates the total bytes of the payload. |
818 | | */ |
819 | | struct JsonbBinaryVal { |
820 | | public: |
821 | | // size of the blob payload only |
822 | 2.11k | unsigned int getBlobLen() const { return size; } |
823 | | |
824 | | // return the blob as byte array |
825 | 4.51k | const char* getBlob() const { return payload; } |
826 | | |
827 | | // size of the total packed bytes |
828 | 15.3k | unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; } |
829 | | friend class JsonbDocument; |
830 | | |
831 | | uint32_t size; |
832 | | char payload[0]; |
833 | | }; |
834 | | |
835 | | /* |
836 | | * String type |
837 | | * Note: JSONB string may not be a c-string (NULL-terminated) |
838 | | */ |
839 | | struct JsonbStringVal : public JsonbBinaryVal { |
840 | | public: |
841 | | /* |
842 | | This function return the actual size of a string. Since for |
843 | | a string, it can be null-terminated with null paddings or it |
844 | | can take all the space in the payload without null in the end. |
845 | | So we need to check it to get the true actual length of a string. |
846 | | */ |
847 | 136 | size_t length() const { |
848 | | // It's an empty string |
849 | 136 | if (0 == size) { |
850 | 0 | return size; |
851 | 0 | } |
852 | | // The string stored takes all the spaces in payload |
853 | 136 | if (payload[size - 1] != 0) { |
854 | 136 | return size; |
855 | 136 | } |
856 | | // It's shorter than the size of payload |
857 | 0 | return strnlen(payload, size); |
858 | 136 | } |
859 | | // convert the string (case insensitive) to a boolean value |
860 | | // "false": 0 |
861 | | // "true": 1 |
862 | | // all other strings: -1 |
863 | 0 | int getBoolVal() { |
864 | 0 | if (size == 4 && tolower(payload[0]) == 't' && tolower(payload[1]) == 'r' && |
865 | 0 | tolower(payload[2]) == 'u' && tolower(payload[3]) == 'e') { |
866 | 0 | return 1; |
867 | 0 | } else if (size == 5 && tolower(payload[0]) == 'f' && tolower(payload[1]) == 'a' && |
868 | 0 | tolower(payload[2]) == 'l' && tolower(payload[3]) == 's' && |
869 | 0 | tolower(payload[4]) == 'e') { |
870 | 0 | return 0; |
871 | 0 | } else { |
872 | 0 | return -1; |
873 | 0 | } |
874 | 0 | } |
875 | | }; |
876 | | |
877 | | /* |
878 | | * ContainerVal is the base class (derived from JsonbValue) for object and |
879 | | * array types. The size indicates the total bytes of the payload. |
880 | | */ |
881 | | struct ContainerVal { |
882 | | // size of the container payload only |
883 | 0 | unsigned int getContainerSize() const { return size; } |
884 | | |
885 | | // return the container payload as byte array |
886 | 0 | const char* getPayload() const { return payload; } |
887 | | |
888 | | // size of the total packed bytes |
889 | 1.84k | unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; } |
890 | | friend class JsonbDocument; |
891 | | |
892 | | uint32_t size; |
893 | | char payload[0]; |
894 | | }; |
895 | | |
896 | | /* |
897 | | * Object type |
898 | | */ |
899 | | struct ObjectVal : public ContainerVal { |
900 | | using value_type = JsonbKeyValue; |
901 | | using pointer = value_type*; |
902 | | using const_pointer = const value_type*; |
903 | | using iterator = JsonbFwdIteratorT<pointer, ObjectVal>; |
904 | | using const_iterator = JsonbFwdIteratorT<const_pointer, ObjectVal>; |
905 | | |
906 | 0 | const_iterator search(const char* key, hDictFind handler = nullptr) const { |
907 | 0 | return const_cast<ObjectVal*>(this)->search(key, handler); |
908 | 0 | } |
909 | | |
910 | 0 | const_iterator search(const char* key, unsigned int klen, hDictFind handler = nullptr) const { |
911 | 0 | return const_cast<ObjectVal*>(this)->search(key, klen, handler); |
912 | 0 | } |
913 | | |
914 | 0 | const_iterator search(int key_id) const { return const_cast<ObjectVal*>(this)->search(key_id); } |
915 | 0 | iterator search(const char* key, hDictFind handler = nullptr) { |
916 | 0 | if (!key) { |
917 | 0 | return end(); |
918 | 0 | } |
919 | 0 | return search(key, (unsigned int)strlen(key), handler); |
920 | 0 | } |
921 | | |
922 | 25 | iterator search(const char* key, unsigned int klen, hDictFind handler = nullptr) { |
923 | 25 | if (!key || !klen) { |
924 | 0 | return end(); |
925 | 0 | } |
926 | | |
927 | 25 | int key_id = -1; |
928 | 25 | if (handler && (key_id = handler(key, klen)) >= 0) { |
929 | 0 | return search(key_id); |
930 | 0 | } |
931 | 25 | return internalSearch(key, klen); |
932 | 25 | } |
933 | | |
934 | 0 | iterator search(int key_id) { |
935 | 0 | if (key_id < 0 || key_id > JsonbKeyValue::sMaxKeyId) { |
936 | 0 | return end(); |
937 | 0 | } |
938 | | |
939 | 0 | const char* pch = payload; |
940 | 0 | const char* fence = payload + size; |
941 | |
|
942 | 0 | while (pch < fence) { |
943 | 0 | auto* pkey = (JsonbKeyValue*)(pch); |
944 | 0 | if (!pkey->klen() && key_id == pkey->getKeyId()) { |
945 | 0 | return iterator(pkey); |
946 | 0 | } |
947 | 0 | pch += pkey->numPackedBytes(); |
948 | 0 | } |
949 | | |
950 | 0 | assert(pch == fence); |
951 | 0 | return end(); |
952 | 0 | } |
953 | | |
954 | | // Get number of elements in object |
955 | 6 | int numElem() const { |
956 | 6 | const char* pch = payload; |
957 | 6 | const char* fence = payload + size; |
958 | | |
959 | 6 | unsigned int num = 0; |
960 | 28 | while (pch < fence) { |
961 | 22 | auto* pkey = (JsonbKeyValue*)(pch); |
962 | 22 | ++num; |
963 | 22 | pch += pkey->numPackedBytes(); |
964 | 22 | } |
965 | | |
966 | 6 | assert(pch == fence); |
967 | | |
968 | 6 | return num; |
969 | 6 | } |
970 | | |
971 | 0 | JsonbKeyValue* getJsonbKeyValue(unsigned int i) const { |
972 | 0 | const char* pch = payload; |
973 | 0 | const char* fence = payload + size; |
974 | |
|
975 | 0 | unsigned int num = 0; |
976 | 0 | while (pch < fence) { |
977 | 0 | auto* pkey = (JsonbKeyValue*)(pch); |
978 | 0 | if (num == i) { |
979 | 0 | return pkey; |
980 | 0 | } |
981 | 0 | ++num; |
982 | 0 | pch += pkey->numPackedBytes(); |
983 | 0 | } |
984 | | |
985 | 0 | assert(pch == fence); |
986 | | |
987 | 0 | return nullptr; |
988 | 0 | } |
989 | | |
990 | 0 | JsonbValue* find(const char* key, hDictFind handler = nullptr) const { |
991 | 0 | return const_cast<ObjectVal*>(this)->find(key, handler); |
992 | 0 | } |
993 | | |
994 | 25 | JsonbValue* find(const char* key, unsigned int klen, hDictFind handler = nullptr) const { |
995 | 25 | return const_cast<ObjectVal*>(this)->find(key, klen, handler); |
996 | 25 | } |
997 | 0 | JsonbValue* find(int key_id) const { return const_cast<ObjectVal*>(this)->find(key_id); } |
998 | | |
999 | | // find the JSONB value by a key string (null terminated) |
1000 | 0 | JsonbValue* find(const char* key, hDictFind handler = nullptr) { |
1001 | 0 | if (!key) { |
1002 | 0 | return nullptr; |
1003 | 0 | } |
1004 | 0 | return find(key, (unsigned int)strlen(key), handler); |
1005 | 0 | } |
1006 | | |
1007 | | // find the JSONB value by a key string (with length) |
1008 | 25 | JsonbValue* find(const char* key, unsigned int klen, hDictFind handler = nullptr) { |
1009 | 25 | iterator kv = search(key, klen, handler); |
1010 | 25 | if (end() == kv) { |
1011 | 2 | return nullptr; |
1012 | 2 | } |
1013 | 23 | return kv->value(); |
1014 | 25 | } |
1015 | | |
1016 | | // find the JSONB value by a key dictionary ID |
1017 | 0 | JsonbValue* find(int key_id) { |
1018 | 0 | iterator kv = search(key_id); |
1019 | 0 | if (end() == kv) { |
1020 | 0 | return nullptr; |
1021 | 0 | } |
1022 | 0 | return kv->value(); |
1023 | 0 | } |
1024 | | |
1025 | 0 | iterator begin() { return iterator((pointer)payload); } |
1026 | | |
1027 | 1.64k | const_iterator begin() const { return const_iterator((pointer)payload); } |
1028 | | |
1029 | 27 | iterator end() { return iterator((pointer)(payload + size)); } |
1030 | | |
1031 | 18.4k | const_iterator end() const { return const_iterator((pointer)(payload + size)); } |
1032 | | |
1033 | | private: |
1034 | 25 | iterator internalSearch(const char* key, unsigned int klen) { |
1035 | 25 | const char* pch = payload; |
1036 | 25 | const char* fence = payload + size; |
1037 | | |
1038 | 35 | while (pch < fence) { |
1039 | 33 | auto* pkey = (JsonbKeyValue*)(pch); |
1040 | 33 | if (klen == pkey->klen() && strncmp(key, pkey->getKeyStr(), klen) == 0) { |
1041 | 23 | return iterator(pkey); |
1042 | 23 | } |
1043 | 10 | pch += pkey->numPackedBytes(); |
1044 | 10 | } |
1045 | | |
1046 | 25 | assert(pch == fence); |
1047 | | |
1048 | 2 | return end(); |
1049 | 2 | } |
1050 | | }; |
1051 | | |
1052 | | /* |
1053 | | * Array type |
1054 | | */ |
1055 | | struct ArrayVal : public ContainerVal { |
1056 | | using value_type = JsonbValue; |
1057 | | using pointer = value_type*; |
1058 | | using const_pointer = const value_type*; |
1059 | | using iterator = JsonbFwdIteratorT<pointer, ArrayVal>; |
1060 | | using const_iterator = JsonbFwdIteratorT<const_pointer, ArrayVal>; |
1061 | | |
1062 | | // get the JSONB value at index |
1063 | 31 | JsonbValue* get(int idx) const { |
1064 | 31 | if (idx < 0) { |
1065 | 0 | return nullptr; |
1066 | 0 | } |
1067 | | |
1068 | 31 | const char* pch = payload; |
1069 | 31 | const char* fence = payload + size; |
1070 | | |
1071 | 72 | while (pch < fence && idx-- > 0) { |
1072 | 41 | pch += ((JsonbValue*)pch)->numPackedBytes(); |
1073 | 41 | } |
1074 | 31 | if (idx > 0 || pch == fence) { |
1075 | 7 | return nullptr; |
1076 | 7 | } |
1077 | | |
1078 | 24 | return (JsonbValue*)pch; |
1079 | 31 | } |
1080 | | |
1081 | | // Get number of elements in array |
1082 | 8 | int numElem() const { |
1083 | 8 | const char* pch = payload; |
1084 | 8 | const char* fence = payload + size; |
1085 | | |
1086 | 8 | unsigned int num = 0; |
1087 | 29 | while (pch < fence) { |
1088 | 21 | ++num; |
1089 | 21 | pch += ((JsonbValue*)pch)->numPackedBytes(); |
1090 | 21 | } |
1091 | | |
1092 | 8 | assert(pch == fence); |
1093 | | |
1094 | 8 | return num; |
1095 | 8 | } |
1096 | | |
1097 | 0 | iterator begin() { return iterator((pointer)payload); } |
1098 | | |
1099 | 68 | const_iterator begin() const { return const_iterator((pointer)payload); } |
1100 | | |
1101 | 0 | iterator end() { return iterator((pointer)(payload + size)); } |
1102 | | |
1103 | 67 | const_iterator end() const { return const_iterator((pointer)(payload + size)); } |
1104 | | }; |
1105 | | |
1106 | | inline Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t size, |
1107 | 27.6k | JsonbDocument** doc) { |
1108 | 27.6k | *doc = nullptr; |
1109 | 27.6k | if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) { |
1110 | 0 | return Status::InvalidArgument("Invalid JSONB document: too small size({}) or null pointer", |
1111 | 0 | size); |
1112 | 0 | } |
1113 | | |
1114 | 27.6k | auto* doc_ptr = (JsonbDocument*)pb; |
1115 | 27.6k | if (doc_ptr->header_.ver_ != JSONB_VER) { |
1116 | 2 | return Status::InvalidArgument("Invalid JSONB document: invalid version({})", |
1117 | 2 | doc_ptr->header_.ver_); |
1118 | 2 | } |
1119 | | |
1120 | 27.6k | auto* val = (JsonbValue*)doc_ptr->payload_; |
1121 | 27.6k | if (val->type < JsonbType::T_Null || val->type >= JsonbType::NUM_TYPES || |
1122 | 27.6k | size != sizeof(JsonbHeader) + val->numPackedBytes()) { |
1123 | 0 | return Status::InvalidArgument("Invalid JSONB document: invalid type({}) or size({})", |
1124 | 0 | static_cast<JsonbTypeUnder>(val->type), size); |
1125 | 0 | } |
1126 | | |
1127 | 27.6k | *doc = doc_ptr; |
1128 | 27.6k | return Status::OK(); |
1129 | 27.6k | } |
1130 | 0 | inline void JsonbDocument::setValue(const JsonbValue* value) { |
1131 | 0 | memcpy(payload_, value, value->numPackedBytes()); |
1132 | 0 | } |
1133 | | |
1134 | 12 | inline JsonbValue* JsonbDocument::createValue(const char* pb, size_t size) { |
1135 | 12 | if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) { |
1136 | 0 | return nullptr; |
1137 | 0 | } |
1138 | | |
1139 | 12 | auto* doc = (JsonbDocument*)pb; |
1140 | 12 | if (doc->header_.ver_ != JSONB_VER) { |
1141 | 0 | return nullptr; |
1142 | 0 | } |
1143 | | |
1144 | 12 | auto* val = (JsonbValue*)doc->payload_; |
1145 | 12 | if (size != sizeof(JsonbHeader) + val->numPackedBytes()) { |
1146 | 0 | return nullptr; |
1147 | 0 | } |
1148 | | |
1149 | 12 | return val; |
1150 | 12 | } |
1151 | | |
1152 | 0 | inline unsigned int JsonbDocument::numPackedBytes() const { |
1153 | 0 | return ((const JsonbValue*)payload_)->numPackedBytes() + sizeof(header_); |
1154 | 0 | } |
1155 | | |
1156 | 17.4k | inline unsigned int JsonbKeyValue::numPackedBytes() const { |
1157 | 17.4k | unsigned int ks = keyPackedBytes(); |
1158 | 17.4k | auto* val = (JsonbValue*)(((char*)this) + ks); |
1159 | 17.4k | return ks + val->numPackedBytes(); |
1160 | 17.4k | } |
1161 | | |
1162 | | // Poor man's "virtual" function JsonbValue::numPackedBytes |
1163 | 71.8k | inline unsigned int JsonbValue::numPackedBytes() const { |
1164 | 71.8k | switch (type) { |
1165 | 2.74k | case JsonbType::T_Null: |
1166 | 2.82k | case JsonbType::T_True: |
1167 | 2.86k | case JsonbType::T_False: { |
1168 | 2.86k | return sizeof(type); |
1169 | 2.82k | } |
1170 | | |
1171 | 1.66k | case JsonbType::T_Int8: { |
1172 | 1.66k | return sizeof(type) + sizeof(int8_t); |
1173 | 2.82k | } |
1174 | 117 | case JsonbType::T_Int16: { |
1175 | 117 | return sizeof(type) + sizeof(int16_t); |
1176 | 2.82k | } |
1177 | 3.49k | case JsonbType::T_Int32: { |
1178 | 3.49k | return sizeof(type) + sizeof(int32_t); |
1179 | 2.82k | } |
1180 | 21.4k | case JsonbType::T_Int64: { |
1181 | 21.4k | return sizeof(type) + sizeof(int64_t); |
1182 | 2.82k | } |
1183 | 10.6k | case JsonbType::T_Double: { |
1184 | 10.6k | return sizeof(type) + sizeof(double); |
1185 | 2.82k | } |
1186 | 26 | case JsonbType::T_Float: { |
1187 | 26 | return sizeof(type) + sizeof(float); |
1188 | 2.82k | } |
1189 | 14.4k | case JsonbType::T_Int128: { |
1190 | 14.4k | return sizeof(type) + sizeof(int128_t); |
1191 | 2.82k | } |
1192 | 10.9k | case JsonbType::T_String: |
1193 | 15.3k | case JsonbType::T_Binary: { |
1194 | 15.3k | return unpack<JsonbBinaryVal>()->numPackedBytes(); |
1195 | 10.9k | } |
1196 | | |
1197 | 1.69k | case JsonbType::T_Object: |
1198 | 1.84k | case JsonbType::T_Array: { |
1199 | 1.84k | return unpack<ContainerVal>()->numPackedBytes(); |
1200 | 1.69k | } |
1201 | 7 | case JsonbType::T_Decimal32: { |
1202 | 7 | return JsonbDecimal32::numPackedBytes(); |
1203 | 1.69k | } |
1204 | 7 | case JsonbType::T_Decimal64: { |
1205 | 7 | return JsonbDecimal64::numPackedBytes(); |
1206 | 1.69k | } |
1207 | 9 | case JsonbType::T_Decimal128: { |
1208 | 9 | return JsonbDecimal128::numPackedBytes(); |
1209 | 1.69k | } |
1210 | 6 | case JsonbType::T_Decimal256: { |
1211 | 6 | return JsonbDecimal256::numPackedBytes(); |
1212 | 1.69k | } |
1213 | 0 | case JsonbType::NUM_TYPES: |
1214 | 0 | break; |
1215 | 71.8k | } |
1216 | | |
1217 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
1218 | 0 | static_cast<int32_t>(type)); |
1219 | 71.8k | } |
1220 | | |
1221 | 6 | inline int JsonbValue::numElements() const { |
1222 | 6 | switch (type) { |
1223 | 0 | case JsonbType::T_Int8: |
1224 | 0 | case JsonbType::T_Int16: |
1225 | 0 | case JsonbType::T_Int32: |
1226 | 0 | case JsonbType::T_Int64: |
1227 | 0 | case JsonbType::T_Double: |
1228 | 0 | case JsonbType::T_Float: |
1229 | 0 | case JsonbType::T_Int128: |
1230 | 1 | case JsonbType::T_String: |
1231 | 1 | case JsonbType::T_Binary: |
1232 | 2 | case JsonbType::T_Null: |
1233 | 2 | case JsonbType::T_True: |
1234 | 2 | case JsonbType::T_False: |
1235 | 2 | case JsonbType::T_Decimal32: |
1236 | 2 | case JsonbType::T_Decimal64: |
1237 | 2 | case JsonbType::T_Decimal128: |
1238 | 2 | case JsonbType::T_Decimal256: { |
1239 | 2 | return 1; |
1240 | 2 | } |
1241 | 0 | case JsonbType::T_Object: { |
1242 | 0 | return unpack<ObjectVal>()->numElem(); |
1243 | 2 | } |
1244 | 4 | case JsonbType::T_Array: { |
1245 | 4 | return unpack<ArrayVal>()->numElem(); |
1246 | 2 | } |
1247 | 0 | case JsonbType::NUM_TYPES: |
1248 | 0 | break; |
1249 | 6 | } |
1250 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
1251 | 0 | static_cast<int32_t>(type)); |
1252 | 6 | } |
1253 | | |
1254 | 3 | inline bool JsonbValue::contains(JsonbValue* rhs) const { |
1255 | 3 | switch (type) { |
1256 | 1 | case JsonbType::T_Int8: |
1257 | 1 | case JsonbType::T_Int16: |
1258 | 1 | case JsonbType::T_Int32: |
1259 | 1 | case JsonbType::T_Int64: |
1260 | 1 | case JsonbType::T_Int128: { |
1261 | 1 | return rhs->isInt() && this->int_val() == rhs->int_val(); |
1262 | 1 | } |
1263 | 0 | case JsonbType::T_Double: |
1264 | 0 | case JsonbType::T_Float: { |
1265 | 0 | if (!rhs->isDouble() && !rhs->isFloat()) { |
1266 | 0 | return false; |
1267 | 0 | } |
1268 | 0 | double left = isDouble() ? unpack<JsonbDoubleVal>()->val() : unpack<JsonbFloatVal>()->val(); |
1269 | 0 | double right = rhs->isDouble() ? rhs->unpack<JsonbDoubleVal>()->val() |
1270 | 0 | : rhs->unpack<JsonbFloatVal>()->val(); |
1271 | 0 | return left == right; |
1272 | 0 | } |
1273 | 1 | case JsonbType::T_String: |
1274 | 1 | case JsonbType::T_Binary: { |
1275 | 1 | if (rhs->isString() || rhs->isBinary()) { |
1276 | 1 | const auto* str_value1 = unpack<JsonbStringVal>(); |
1277 | 1 | const auto* str_value2 = rhs->unpack<JsonbStringVal>(); |
1278 | 1 | return str_value1->length() == str_value2->length() && |
1279 | 1 | std::memcmp(str_value1->getBlob(), str_value2->getBlob(), |
1280 | 1 | str_value1->length()) == 0; |
1281 | 1 | } |
1282 | 0 | return false; |
1283 | 1 | } |
1284 | 1 | case JsonbType::T_Array: { |
1285 | 1 | int lhs_num = unpack<ArrayVal>()->numElem(); |
1286 | 1 | if (rhs->isArray()) { |
1287 | 0 | int rhs_num = rhs->unpack<ArrayVal>()->numElem(); |
1288 | 0 | if (rhs_num > lhs_num) { |
1289 | 0 | return false; |
1290 | 0 | } |
1291 | 0 | int contains_num = 0; |
1292 | 0 | for (int i = 0; i < lhs_num; ++i) { |
1293 | 0 | for (int j = 0; j < rhs_num; ++j) { |
1294 | 0 | if (unpack<ArrayVal>()->get(i)->contains(rhs->unpack<ArrayVal>()->get(j))) { |
1295 | 0 | contains_num++; |
1296 | 0 | break; |
1297 | 0 | } |
1298 | 0 | } |
1299 | 0 | } |
1300 | 0 | return contains_num == rhs_num; |
1301 | 0 | } |
1302 | 1 | for (int i = 0; i < lhs_num; ++i) { |
1303 | 1 | if (unpack<ArrayVal>()->get(i)->contains(rhs)) { |
1304 | 1 | return true; |
1305 | 1 | } |
1306 | 1 | } |
1307 | 0 | return false; |
1308 | 1 | } |
1309 | 0 | case JsonbType::T_Object: { |
1310 | 0 | if (rhs->isObject()) { |
1311 | 0 | const auto* obj_value1 = unpack<ObjectVal>(); |
1312 | 0 | const auto* obj_value2 = rhs->unpack<ObjectVal>(); |
1313 | 0 | for (int i = 0; i < obj_value2->numElem(); ++i) { |
1314 | 0 | JsonbKeyValue* key = obj_value2->getJsonbKeyValue(i); |
1315 | 0 | JsonbValue* value = obj_value1->find(key->getKeyStr(), key->klen()); |
1316 | 0 | if (value == nullptr || !value->contains(key->value())) { |
1317 | 0 | return false; |
1318 | 0 | } |
1319 | 0 | } |
1320 | 0 | return true; |
1321 | 0 | } |
1322 | 0 | return false; |
1323 | 0 | } |
1324 | 0 | case JsonbType::T_Null: { |
1325 | 0 | return rhs->isNull(); |
1326 | 0 | } |
1327 | 0 | case JsonbType::T_True: { |
1328 | 0 | return rhs->isTrue(); |
1329 | 0 | } |
1330 | 0 | case JsonbType::T_False: { |
1331 | 0 | return rhs->isFalse(); |
1332 | 0 | } |
1333 | 0 | case JsonbType::T_Decimal32: { |
1334 | 0 | if (rhs->isDecimal32()) { |
1335 | 0 | return unpack<JsonbDecimal32>()->val() == rhs->unpack<JsonbDecimal32>()->val() && |
1336 | 0 | unpack<JsonbDecimal32>()->precision == |
1337 | 0 | rhs->unpack<JsonbDecimal32>()->precision && |
1338 | 0 | unpack<JsonbDecimal32>()->scale == rhs->unpack<JsonbDecimal32>()->scale; |
1339 | 0 | } |
1340 | 0 | return false; |
1341 | 0 | } |
1342 | 0 | case JsonbType::T_Decimal64: { |
1343 | 0 | if (rhs->isDecimal64()) { |
1344 | 0 | return unpack<JsonbDecimal64>()->val() == rhs->unpack<JsonbDecimal64>()->val() && |
1345 | 0 | unpack<JsonbDecimal64>()->precision == |
1346 | 0 | rhs->unpack<JsonbDecimal64>()->precision && |
1347 | 0 | unpack<JsonbDecimal64>()->scale == rhs->unpack<JsonbDecimal64>()->scale; |
1348 | 0 | } |
1349 | 0 | return false; |
1350 | 0 | } |
1351 | 0 | case JsonbType::T_Decimal128: { |
1352 | 0 | if (rhs->isDecimal128()) { |
1353 | 0 | return unpack<JsonbDecimal128>()->val() == rhs->unpack<JsonbDecimal128>()->val() && |
1354 | 0 | unpack<JsonbDecimal128>()->precision == |
1355 | 0 | rhs->unpack<JsonbDecimal128>()->precision && |
1356 | 0 | unpack<JsonbDecimal128>()->scale == rhs->unpack<JsonbDecimal128>()->scale; |
1357 | 0 | } |
1358 | 0 | return false; |
1359 | 0 | } |
1360 | 0 | case JsonbType::T_Decimal256: { |
1361 | 0 | if (rhs->isDecimal256()) { |
1362 | 0 | return unpack<JsonbDecimal256>()->val() == rhs->unpack<JsonbDecimal256>()->val() && |
1363 | 0 | unpack<JsonbDecimal256>()->precision == |
1364 | 0 | rhs->unpack<JsonbDecimal256>()->precision && |
1365 | 0 | unpack<JsonbDecimal256>()->scale == rhs->unpack<JsonbDecimal256>()->scale; |
1366 | 0 | } |
1367 | 0 | return false; |
1368 | 0 | } |
1369 | 0 | case JsonbType::NUM_TYPES: |
1370 | 0 | break; |
1371 | 3 | } |
1372 | | |
1373 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
1374 | 0 | static_cast<int32_t>(type)); |
1375 | 3 | } |
1376 | | |
1377 | 96 | inline bool JsonbPath::seek(const char* key_path, size_t kp_len) { |
1378 | 96 | while (kp_len > 0 && std::isspace(key_path[kp_len - 1])) { |
1379 | 0 | --kp_len; |
1380 | 0 | } |
1381 | | |
1382 | | //path invalid |
1383 | 96 | if (!key_path || kp_len == 0) { |
1384 | 0 | return false; |
1385 | 0 | } |
1386 | 96 | Stream stream(key_path, kp_len); |
1387 | 96 | stream.skip_whitespace(); |
1388 | 96 | if (stream.exhausted() || stream.read() != SCOPE) { |
1389 | | //path invalid |
1390 | 0 | return false; |
1391 | 0 | } |
1392 | | |
1393 | 186 | while (!stream.exhausted()) { |
1394 | 90 | stream.skip_whitespace(); |
1395 | 90 | stream.clear_leg_ptr(); |
1396 | 90 | stream.clear_leg_len(); |
1397 | | |
1398 | 90 | if (!JsonbPath::parsePath(&stream, this)) { |
1399 | | //path invalid |
1400 | 0 | return false; |
1401 | 0 | } |
1402 | 90 | } |
1403 | 96 | return true; |
1404 | 96 | } |
1405 | | |
1406 | 90 | inline bool JsonbPath::parsePath(Stream* stream, JsonbPath* path) { |
1407 | | // $[0] |
1408 | 90 | if (stream->peek() == BEGIN_ARRAY) { |
1409 | 58 | return parse_array(stream, path); |
1410 | 58 | } |
1411 | | // $.a or $.[0] |
1412 | 32 | else if (stream->peek() == BEGIN_MEMBER) { |
1413 | | // advance past the . |
1414 | 32 | stream->skip(1); |
1415 | | |
1416 | 32 | if (stream->exhausted()) { |
1417 | 0 | return false; |
1418 | 0 | } |
1419 | | |
1420 | | // $.[0] |
1421 | 32 | if (stream->peek() == BEGIN_ARRAY) { |
1422 | 0 | return parse_array(stream, path); |
1423 | 0 | } |
1424 | | // $.a |
1425 | 32 | else { |
1426 | 32 | return parse_member(stream, path); |
1427 | 32 | } |
1428 | 32 | } else if (stream->peek() == WILDCARD) { |
1429 | 0 | stream->skip(1); |
1430 | 0 | if (stream->exhausted()) { |
1431 | 0 | return false; |
1432 | 0 | } |
1433 | | |
1434 | | // $** |
1435 | 0 | if (stream->peek() == WILDCARD) { |
1436 | 0 | path->_is_supper_wildcard = true; |
1437 | 0 | } |
1438 | |
|
1439 | 0 | stream->skip(1); |
1440 | 0 | if (stream->exhausted()) { |
1441 | 0 | return false; |
1442 | 0 | } |
1443 | | |
1444 | 0 | if (stream->peek() == BEGIN_ARRAY) { |
1445 | 0 | return parse_array(stream, path); |
1446 | 0 | } else if (stream->peek() == BEGIN_MEMBER) { |
1447 | | // advance past the . |
1448 | 0 | stream->skip(1); |
1449 | |
|
1450 | 0 | if (stream->exhausted()) { |
1451 | 0 | return false; |
1452 | 0 | } |
1453 | | |
1454 | | // $.[0] |
1455 | 0 | if (stream->peek() == BEGIN_ARRAY) { |
1456 | 0 | return parse_array(stream, path); |
1457 | 0 | } |
1458 | | // $.a |
1459 | 0 | else { |
1460 | 0 | return parse_member(stream, path); |
1461 | 0 | } |
1462 | 0 | } |
1463 | 0 | return false; |
1464 | 0 | } else { |
1465 | 0 | return false; //invalid json path |
1466 | 0 | } |
1467 | 90 | } |
1468 | | |
1469 | 58 | inline bool JsonbPath::parse_array(Stream* stream, JsonbPath* path) { |
1470 | 58 | assert(stream->peek() == BEGIN_ARRAY); |
1471 | 58 | stream->skip(1); |
1472 | 58 | if (stream->exhausted()) { |
1473 | 0 | return false; |
1474 | 0 | } |
1475 | | |
1476 | 58 | if (stream->peek() == WILDCARD) { |
1477 | 0 | stream->set_leg_ptr(const_cast<char*>(stream->position())); |
1478 | 0 | stream->add_leg_len(); |
1479 | 0 | stream->skip(1); |
1480 | 0 | if (stream->exhausted()) { |
1481 | 0 | return false; |
1482 | 0 | } |
1483 | | |
1484 | 0 | if (stream->peek() == END_ARRAY) { |
1485 | 0 | std::unique_ptr<leg_info> leg( |
1486 | 0 | new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, ARRAY_CODE)); |
1487 | 0 | path->add_leg_to_leg_vector(std::move(leg)); |
1488 | 0 | stream->skip(1); |
1489 | 0 | path->_is_wildcard = true; |
1490 | 0 | return true; |
1491 | 0 | } else { |
1492 | 0 | return false; |
1493 | 0 | } |
1494 | 0 | } |
1495 | | |
1496 | 58 | stream->set_leg_ptr(const_cast<char*>(stream->position())); |
1497 | | |
1498 | 116 | for (; !stream->exhausted() && stream->peek() != END_ARRAY; stream->advance()) { |
1499 | 58 | stream->add_leg_len(); |
1500 | 58 | } |
1501 | | |
1502 | 58 | if (stream->exhausted() || stream->peek() != END_ARRAY) { |
1503 | 0 | return false; |
1504 | 58 | } else { |
1505 | 58 | stream->skip(1); |
1506 | 58 | } |
1507 | | |
1508 | | //parse array index to int |
1509 | | |
1510 | 58 | std::string_view idx_string(stream->get_leg_ptr(), stream->get_leg_len()); |
1511 | 58 | int index = 0; |
1512 | | |
1513 | 58 | if (stream->get_leg_len() >= 4 && |
1514 | 58 | std::equal(LAST, LAST + 4, stream->get_leg_ptr(), |
1515 | 0 | [](char c1, char c2) { return std::tolower(c1) == std::tolower(c2); })) { |
1516 | 0 | auto pos = idx_string.find(MINUS); |
1517 | |
|
1518 | 0 | if (pos != std::string::npos) { |
1519 | 0 | for (size_t i = 4; i < pos; ++i) { |
1520 | 0 | if (std::isspace(idx_string[i])) { |
1521 | 0 | continue; |
1522 | 0 | } else { |
1523 | | // leading zeroes are not allowed |
1524 | 0 | LOG(WARNING) << "Non-space char in idx_string: '" << idx_string << "'"; |
1525 | 0 | return false; |
1526 | 0 | } |
1527 | 0 | } |
1528 | 0 | idx_string = idx_string.substr(pos + 1); |
1529 | 0 | idx_string = trim(idx_string); |
1530 | |
|
1531 | 0 | auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(), |
1532 | 0 | index); |
1533 | 0 | if (result.ec != std::errc()) { |
1534 | 0 | LOG(WARNING) << "Invalid index in JSON path: '" << idx_string << "'"; |
1535 | 0 | return false; |
1536 | 0 | } |
1537 | |
|
1538 | 0 | } else if (stream->get_leg_len() > 4) { |
1539 | 0 | return false; |
1540 | 0 | } |
1541 | | |
1542 | 0 | std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, -index - 1, ARRAY_CODE)); |
1543 | 0 | path->add_leg_to_leg_vector(std::move(leg)); |
1544 | |
|
1545 | 0 | return true; |
1546 | 0 | } |
1547 | | |
1548 | 58 | auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(), index); |
1549 | | |
1550 | 58 | if (result.ec != std::errc()) { |
1551 | 0 | return false; |
1552 | 0 | } |
1553 | | |
1554 | 58 | std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, index, ARRAY_CODE)); |
1555 | 58 | path->add_leg_to_leg_vector(std::move(leg)); |
1556 | | |
1557 | 58 | return true; |
1558 | 58 | } |
1559 | | |
1560 | 32 | inline bool JsonbPath::parse_member(Stream* stream, JsonbPath* path) { |
1561 | 32 | if (stream->exhausted()) { |
1562 | 0 | return false; |
1563 | 0 | } |
1564 | | |
1565 | 32 | if (stream->peek() == WILDCARD) { |
1566 | 0 | stream->set_leg_ptr(const_cast<char*>(stream->position())); |
1567 | 0 | stream->add_leg_len(); |
1568 | 0 | stream->skip(1); |
1569 | 0 | std::unique_ptr<leg_info> leg( |
1570 | 0 | new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE)); |
1571 | 0 | path->add_leg_to_leg_vector(std::move(leg)); |
1572 | 0 | path->_is_wildcard = true; |
1573 | 0 | return true; |
1574 | 0 | } |
1575 | | |
1576 | 32 | stream->set_leg_ptr(const_cast<char*>(stream->position())); |
1577 | | |
1578 | 32 | const char* left_quotation_marks = nullptr; |
1579 | 32 | const char* right_quotation_marks = nullptr; |
1580 | | |
1581 | 96 | for (; !stream->exhausted(); stream->advance()) { |
1582 | | // Only accept space characters quoted by double quotes. |
1583 | 64 | if (std::isspace(stream->peek()) && left_quotation_marks == nullptr) { |
1584 | 0 | return false; |
1585 | 64 | } else if (stream->peek() == ESCAPE) { |
1586 | 0 | stream->add_leg_len(); |
1587 | 0 | stream->skip(1); |
1588 | 0 | stream->add_leg_len(); |
1589 | 0 | stream->set_has_escapes(true); |
1590 | 0 | if (stream->exhausted()) { |
1591 | 0 | return false; |
1592 | 0 | } |
1593 | 0 | continue; |
1594 | 64 | } else if (stream->peek() == DOUBLE_QUOTE) { |
1595 | 0 | if (left_quotation_marks == nullptr) { |
1596 | 0 | left_quotation_marks = stream->position(); |
1597 | 0 | stream->set_leg_ptr(const_cast<char*>(++left_quotation_marks)); |
1598 | 0 | continue; |
1599 | 0 | } else { |
1600 | 0 | right_quotation_marks = stream->position(); |
1601 | 0 | stream->skip(1); |
1602 | 0 | break; |
1603 | 0 | } |
1604 | 64 | } else if (stream->peek() == BEGIN_MEMBER || stream->peek() == BEGIN_ARRAY) { |
1605 | 0 | if (left_quotation_marks == nullptr) { |
1606 | 0 | break; |
1607 | 0 | } |
1608 | 0 | } |
1609 | | |
1610 | 64 | stream->add_leg_len(); |
1611 | 64 | } |
1612 | | |
1613 | 32 | if ((left_quotation_marks != nullptr && right_quotation_marks == nullptr) || |
1614 | 32 | stream->get_leg_ptr() == nullptr || stream->get_leg_len() == 0) { |
1615 | 0 | return false; //invalid json path |
1616 | 0 | } |
1617 | | |
1618 | 32 | if (stream->get_has_escapes()) { |
1619 | 0 | stream->remove_escapes(); |
1620 | 0 | } |
1621 | | |
1622 | 32 | std::unique_ptr<leg_info> leg( |
1623 | 32 | new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE)); |
1624 | 32 | path->add_leg_to_leg_vector(std::move(leg)); |
1625 | | |
1626 | 32 | return true; |
1627 | 32 | } |
1628 | | |
1629 | | static_assert(is_pod_v<JsonbDocument>, "JsonbDocument must be standard layout and trivial"); |
1630 | | static_assert(is_pod_v<JsonbValue>, "JsonbValue must be standard layout and trivial"); |
1631 | | static_assert(is_pod_v<JsonbDecimal32>, "JsonbDecimal32 must be standard layout and trivial"); |
1632 | | static_assert(is_pod_v<JsonbDecimal64>, "JsonbDecimal64 must be standard layout and trivial"); |
1633 | | static_assert(is_pod_v<JsonbDecimal128>, "JsonbDecimal128 must be standard layout and trivial"); |
1634 | | static_assert(is_pod_v<JsonbDecimal256>, "JsonbDecimal256 must be standard layout and trivial"); |
1635 | | static_assert(is_pod_v<JsonbInt8Val>, "JsonbInt8Val must be standard layout and trivial"); |
1636 | | static_assert(is_pod_v<JsonbInt32Val>, "JsonbInt32Val must be standard layout and trivial"); |
1637 | | static_assert(is_pod_v<JsonbInt64Val>, "JsonbInt64Val must be standard layout and trivial"); |
1638 | | static_assert(is_pod_v<JsonbInt128Val>, "JsonbInt128Val must be standard layout and trivial"); |
1639 | | static_assert(is_pod_v<JsonbDoubleVal>, "JsonbDoubleVal must be standard layout and trivial"); |
1640 | | static_assert(is_pod_v<JsonbFloatVal>, "JsonbFloatVal must be standard layout and trivial"); |
1641 | | static_assert(is_pod_v<JsonbBinaryVal>, "JsonbBinaryVal must be standard layout and trivial"); |
1642 | | static_assert(is_pod_v<ContainerVal>, "ContainerVal must be standard layout and trivial"); |
1643 | | |
1644 | | #define ASSERT_DECIMAL_LAYOUT(type) \ |
1645 | | static_assert(offsetof(type, precision) == 0); \ |
1646 | | static_assert(offsetof(type, scale) == 4); \ |
1647 | | static_assert(offsetof(type, value) == 8); |
1648 | | |
1649 | | ASSERT_DECIMAL_LAYOUT(JsonbDecimal32) |
1650 | | ASSERT_DECIMAL_LAYOUT(JsonbDecimal64) |
1651 | | ASSERT_DECIMAL_LAYOUT(JsonbDecimal128) |
1652 | | ASSERT_DECIMAL_LAYOUT(JsonbDecimal256) |
1653 | | |
1654 | | #define ASSERT_NUMERIC_LAYOUT(type) static_assert(offsetof(type, num) == 0); |
1655 | | |
1656 | | ASSERT_NUMERIC_LAYOUT(JsonbInt8Val) |
1657 | | ASSERT_NUMERIC_LAYOUT(JsonbInt32Val) |
1658 | | ASSERT_NUMERIC_LAYOUT(JsonbInt64Val) |
1659 | | ASSERT_NUMERIC_LAYOUT(JsonbInt128Val) |
1660 | | ASSERT_NUMERIC_LAYOUT(JsonbDoubleVal) |
1661 | | |
1662 | | static_assert(offsetof(JsonbBinaryVal, size) == 0); |
1663 | | static_assert(offsetof(JsonbBinaryVal, payload) == 4); |
1664 | | |
1665 | | static_assert(offsetof(ContainerVal, size) == 0); |
1666 | | static_assert(offsetof(ContainerVal, payload) == 4); |
1667 | | |
1668 | | #pragma pack(pop) |
1669 | | #if defined(__clang__) |
1670 | | #pragma clang diagnostic pop |
1671 | | #endif |
1672 | | } // namespace doris |
1673 | | |
1674 | | #endif // JSONB_JSONBDOCUMENT_H |