/root/doris/be/src/util/jsonb_document.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2014, Facebook, Inc. |
3 | | * All rights reserved. |
4 | | * |
5 | | * This source code is licensed under the BSD-style license found in the |
6 | | * LICENSE file in the root directory of this source tree. An additional grant |
7 | | * of patent rights can be found in the PATENTS file in the same directory. |
8 | | * |
9 | | */ |
10 | | |
11 | | /* |
12 | | * This header defines JsonbDocument, JsonbKeyValue, and various value classes |
13 | | * which are derived from JsonbValue, and a forward iterator for container |
14 | | * values - essentially everything that is related to JSONB binary data |
15 | | * structures. |
16 | | * |
17 | | * Implementation notes: |
18 | | * |
19 | | * None of the classes in this header file can be instantiated directly (i.e. |
20 | | * you cannot create a JsonbKeyValue or JsonbValue object - all constructors |
21 | | * are declared non-public). We use the classes as wrappers on the packed JSONB |
22 | | * bytes (serialized), and cast the classes (types) to the underlying packed |
23 | | * byte array. |
24 | | * |
25 | | * For the same reason, we cannot define any JSONB value class to be virtual, |
26 | | * since we never call constructors, and will not instantiate vtbl and vptrs. |
27 | | * |
28 | | * Therefore, the classes are defined as packed structures (i.e. no data |
29 | | * alignment and padding), and the private member variables of the classes are |
30 | | * defined precisely in the same order as the JSONB spec. This ensures we |
31 | | * access the packed JSONB bytes correctly. |
32 | | * |
33 | | * The packed structures are highly optimized for in-place operations with low |
34 | | * overhead. The reads (and in-place writes) are performed directly on packed |
35 | | * bytes. There is no memory allocation at all at runtime. |
36 | | * |
37 | | * For updates/writes of values that will expand the original JSONB size, the |
38 | | * write will fail, and the caller needs to handle buffer increase. |
39 | | * |
40 | | * ** Iterator ** |
41 | | * Both ObjectVal class and ArrayVal class have iterator type that you can use |
42 | | * to declare an iterator on a container object to go through the key-value |
43 | | * pairs or value list. The iterator has both non-const and const types. |
44 | | * |
45 | | * Note: iterators are forward direction only. |
46 | | * |
47 | | * ** Query ** |
48 | | * Querying into containers is through the member functions find (for key/value |
49 | | * pairs) and get (for array elements), and is in streaming style. We don't |
50 | | * need to read/scan the whole JSONB packed bytes in order to return results. |
51 | | * Once the key/index is found, we will stop search. You can use text to query |
52 | | * both objects and array (for array, text will be converted to integer index), |
53 | | * and use index to retrieve from array. Array index is 0-based. |
54 | | * |
55 | | * ** External dictionary ** |
56 | | * During query processing, you can also pass a call-back function, so the |
57 | | * search will first try to check if the key string exists in the dictionary. |
58 | | * If so, search will be based on the id instead of the key string. |
59 | | * @author Tian Xia <tianx@fb.com> |
60 | | * |
61 | | * this file is copied from |
62 | | * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonDocument.h |
63 | | * and modified by Doris |
64 | | */ |
65 | | |
66 | | #ifndef JSONB_JSONBDOCUMENT_H |
67 | | #define JSONB_JSONBDOCUMENT_H |
68 | | |
69 | | #include <algorithm> |
70 | | #include <cctype> |
71 | | #include <charconv> |
72 | | #include <cstddef> |
73 | | #include <cstdint> |
74 | | #include <string> |
75 | | #include <string_view> |
76 | | #include <type_traits> |
77 | | |
78 | | #include "common/compiler_util.h" // IWYU pragma: keep |
79 | | #include "common/status.h" |
80 | | #include "runtime/define_primitive_type.h" |
81 | | #include "util/string_util.h" |
82 | | #include "vec/core/types.h" |
83 | | |
84 | | // #include "util/string_parser.hpp" |
85 | | |
86 | | // Concept to check for supported decimal types |
87 | | template <typename T> |
88 | | concept JsonbDecimalType = std::same_as<T, doris::vectorized::Decimal256> || |
89 | | std::same_as<T, doris::vectorized::Decimal64> || |
90 | | std::same_as<T, doris::vectorized::Decimal128V3> || |
91 | | std::same_as<T, doris::vectorized::Decimal32>; |
92 | | |
93 | | namespace doris { |
94 | | |
95 | | template <typename T> |
96 | | constexpr bool is_pod_v = std::is_trivial_v<T> && std::is_standard_layout_v<T>; |
97 | | |
98 | | struct JsonbStringVal; |
99 | | struct ObjectVal; |
100 | | struct ArrayVal; |
101 | | struct JsonbBinaryVal; |
102 | | struct ContainerVal; |
103 | | |
104 | | template <JsonbDecimalType T> |
105 | | struct JsonbDecimalVal; |
106 | | |
107 | | using JsonbDecimal256 = JsonbDecimalVal<vectorized::Decimal256>; |
108 | | using JsonbDecimal128 = JsonbDecimalVal<vectorized::Decimal128V3>; |
109 | | using JsonbDecimal64 = JsonbDecimalVal<vectorized::Decimal64>; |
110 | | using JsonbDecimal32 = JsonbDecimalVal<vectorized::Decimal32>; |
111 | | |
112 | | template <typename T> |
113 | | requires std::is_integral_v<T> || std::is_floating_point_v<T> |
114 | | struct NumberValT; |
115 | | |
116 | | using JsonbInt8Val = NumberValT<int8_t>; |
117 | | using JsonbInt16Val = NumberValT<int16_t>; |
118 | | using JsonbInt32Val = NumberValT<int32_t>; |
119 | | using JsonbInt64Val = NumberValT<int64_t>; |
120 | | using JsonbInt128Val = NumberValT<int128_t>; |
121 | | using JsonbDoubleVal = NumberValT<double>; |
122 | | using JsonbFloatVal = NumberValT<float>; |
123 | | |
124 | | template <typename T> |
125 | | concept JsonbPodType = (std::same_as<T, JsonbStringVal> || std::same_as<T, ObjectVal> || |
126 | | std::same_as<T, ContainerVal> || std::same_as<T, ArrayVal> || |
127 | | std::same_as<T, JsonbBinaryVal> || std::same_as<T, JsonbDecimal32> || |
128 | | std::same_as<T, JsonbDecimal64> || std::same_as<T, JsonbDecimal128> || |
129 | | std::same_as<T, JsonbDecimal256> || std::same_as<T, JsonbDecimal32> || |
130 | | std::same_as<T, JsonbInt8Val> || std::same_as<T, JsonbInt16Val> || |
131 | | std::same_as<T, JsonbInt32Val> || std::same_as<T, JsonbInt64Val> || |
132 | | std::same_as<T, JsonbInt128Val> || std::same_as<T, JsonbFloatVal> || |
133 | | std::same_as<T, JsonbFloatVal> || std::same_as<T, JsonbDoubleVal>); |
134 | | |
135 | | #if defined(__clang__) |
136 | | #pragma clang diagnostic push |
137 | | #pragma clang diagnostic ignored "-Wzero-length-array" |
138 | | #endif |
139 | | #pragma pack(push, 1) |
140 | | |
141 | 5.42k | #define JSONB_VER 1 |
142 | | |
143 | | using int128_t = __int128; |
144 | | |
145 | | // forward declaration |
146 | | struct JsonbValue; |
147 | | |
148 | | class JsonbOutStream; |
149 | | |
150 | | template <class OS_TYPE> |
151 | | class JsonbWriterT; |
152 | | |
153 | | using JsonbWriter = JsonbWriterT<JsonbOutStream>; |
154 | | |
155 | | const int MaxNestingLevel = 100; |
156 | | |
157 | | /* |
158 | | * JsonbType defines 10 primitive types and 2 container types, as described |
159 | | * below. |
160 | | * NOTE: Do NOT modify the existing values or their order in this enum. |
161 | | * You may only append new entries at the end before `NUM_TYPES`. |
162 | | * This enum will be used in serialized data and/or persisted data. |
163 | | * Changing existing values may break backward compatibility |
164 | | * with previously stored or transmitted data. |
165 | | * |
166 | | * primitive_value ::= |
167 | | * 0x00 //null value (0 byte) |
168 | | * | 0x01 //boolean true (0 byte) |
169 | | * | 0x02 //boolean false (0 byte) |
170 | | * | 0x03 int8 //char/int8 (1 byte) |
171 | | * | 0x04 int16 //int16 (2 bytes) |
172 | | * | 0x05 int32 //int32 (4 bytes) |
173 | | * | 0x06 int64 //int64 (8 bytes) |
174 | | * | 0x07 double //floating point (8 bytes) |
175 | | * | 0x08 string //variable length string |
176 | | * | 0x09 binary //variable length binary |
177 | | * |
178 | | * container ::= |
179 | | * 0x0A int32 key_value_list //object, int32 is the total bytes of the object |
180 | | * | 0x0B int32 value_list //array, int32 is the total bytes of the array |
181 | | */ |
182 | | enum class JsonbType : char { |
183 | | T_Null = 0x00, |
184 | | T_True = 0x01, |
185 | | T_False = 0x02, |
186 | | T_Int8 = 0x03, |
187 | | T_Int16 = 0x04, |
188 | | T_Int32 = 0x05, |
189 | | T_Int64 = 0x06, |
190 | | T_Double = 0x07, |
191 | | T_String = 0x08, |
192 | | T_Binary = 0x09, |
193 | | T_Object = 0x0A, |
194 | | T_Array = 0x0B, |
195 | | T_Int128 = 0x0C, |
196 | | T_Float = 0x0D, |
197 | | T_Decimal32 = 0x0E, // DecimalV3 only |
198 | | T_Decimal64 = 0x0F, // DecimalV3 only |
199 | | T_Decimal128 = 0x10, // DecimalV3 only |
200 | | T_Decimal256 = 0x11, // DecimalV3 only |
201 | | NUM_TYPES, |
202 | | }; |
203 | | |
204 | 11 | inline PrimitiveType get_primitive_type_from_json_type(JsonbType json_type) { |
205 | 11 | switch (json_type) { |
206 | 1 | case JsonbType::T_Null: |
207 | 1 | return TYPE_NULL; |
208 | 1 | case JsonbType::T_True: |
209 | 2 | case JsonbType::T_False: |
210 | 2 | return TYPE_BOOLEAN; |
211 | 0 | case JsonbType::T_Int8: |
212 | 0 | return TYPE_TINYINT; |
213 | 0 | case JsonbType::T_Int16: |
214 | 0 | return TYPE_SMALLINT; |
215 | 0 | case JsonbType::T_Int32: |
216 | 0 | return TYPE_INT; |
217 | 0 | case JsonbType::T_Int64: |
218 | 0 | return TYPE_BIGINT; |
219 | 0 | case JsonbType::T_Double: |
220 | 0 | return TYPE_DOUBLE; |
221 | 1 | case JsonbType::T_String: |
222 | 1 | return TYPE_STRING; |
223 | 0 | case JsonbType::T_Binary: |
224 | 0 | return TYPE_BINARY; |
225 | 0 | case JsonbType::T_Object: |
226 | 0 | return TYPE_STRUCT; |
227 | 1 | case JsonbType::T_Array: |
228 | 1 | return TYPE_ARRAY; |
229 | 1 | case JsonbType::T_Int128: |
230 | 1 | return TYPE_LARGEINT; |
231 | 1 | case JsonbType::T_Float: |
232 | 1 | return TYPE_FLOAT; |
233 | 1 | case JsonbType::T_Decimal32: |
234 | 1 | return TYPE_DECIMAL32; |
235 | 1 | case JsonbType::T_Decimal64: |
236 | 1 | return TYPE_DECIMAL64; |
237 | 1 | case JsonbType::T_Decimal128: |
238 | 1 | return TYPE_DECIMAL128I; |
239 | 1 | case JsonbType::T_Decimal256: |
240 | 1 | return TYPE_DECIMAL256; |
241 | 0 | default: |
242 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Unsupported JsonbType: {}", |
243 | 0 | static_cast<int>(json_type)); |
244 | 11 | } |
245 | 11 | } |
246 | | |
247 | | //for parse json path |
248 | | constexpr char SCOPE = '$'; |
249 | | constexpr char BEGIN_MEMBER = '.'; |
250 | | constexpr char BEGIN_ARRAY = '['; |
251 | | constexpr char END_ARRAY = ']'; |
252 | | constexpr char DOUBLE_QUOTE = '"'; |
253 | | constexpr char WILDCARD = '*'; |
254 | | constexpr char MINUS = '-'; |
255 | | constexpr char LAST[] = "last"; |
256 | | constexpr char ESCAPE = '\\'; |
257 | | constexpr unsigned int MEMBER_CODE = 0; |
258 | | constexpr unsigned int ARRAY_CODE = 1; |
259 | | |
260 | | /* |
261 | | * JsonbDocument is the main object that accesses and queries JSONB packed |
262 | | * bytes. NOTE: JsonbDocument only allows object container as the top level |
263 | | * JSONB value. However, you can use the static method "createValue" to get any |
264 | | * JsonbValue object from the packed bytes. |
265 | | * |
266 | | * JsonbDocument object also dereferences to an object container value |
267 | | * (ObjectVal) once JSONB is loaded. |
268 | | * |
269 | | * ** Load ** |
270 | | * JsonbDocument is usable after loading packed bytes (memory location) into |
271 | | * the object. We only need the header and first few bytes of the payload after |
272 | | * header to verify the JSONB. |
273 | | * |
274 | | * Note: creating an JsonbDocument (through createDocument) does not allocate |
275 | | * any memory. The document object is an efficient wrapper on the packed bytes |
276 | | * which is accessed directly. |
277 | | * |
278 | | * ** Query ** |
279 | | * Query is through dereferencing into ObjectVal. |
280 | | */ |
281 | | class JsonbDocument { |
282 | | public: |
283 | | // create an JsonbDocument object from JSONB packed bytes |
284 | | [[nodiscard]] static Status checkAndCreateDocument(const char* pb, size_t size, |
285 | | JsonbDocument** doc); |
286 | | |
287 | | // create an JsonbValue from JSONB packed bytes |
288 | | static JsonbValue* createValue(const char* pb, size_t size); |
289 | | |
290 | 0 | uint8_t version() const { return header_.ver_; } |
291 | | |
292 | 661 | JsonbValue* getValue() { return ((JsonbValue*)payload_); } |
293 | | |
294 | | void setValue(const JsonbValue* value); |
295 | | |
296 | | unsigned int numPackedBytes() const; |
297 | | |
298 | | // ObjectVal* operator->(); |
299 | | |
300 | | const ObjectVal* operator->() const; |
301 | | |
302 | 0 | bool operator==(const JsonbDocument& other) const { |
303 | 0 | assert(false); |
304 | 0 | return false; |
305 | 0 | } |
306 | | |
307 | 0 | bool operator!=(const JsonbDocument& other) const { |
308 | 0 | assert(false); |
309 | 0 | return false; |
310 | 0 | } |
311 | | |
312 | 0 | bool operator<=(const JsonbDocument& other) const { |
313 | 0 | assert(false); |
314 | 0 | return false; |
315 | 0 | } |
316 | | |
317 | 0 | bool operator>=(const JsonbDocument& other) const { |
318 | 0 | assert(false); |
319 | 0 | return false; |
320 | 0 | } |
321 | | |
322 | 0 | bool operator<(const JsonbDocument& other) const { |
323 | 0 | assert(false); |
324 | 0 | return false; |
325 | 0 | } |
326 | | |
327 | 0 | bool operator>(const JsonbDocument& other) const { |
328 | 0 | assert(false); |
329 | 0 | return false; |
330 | 0 | } |
331 | | |
332 | | private: |
333 | | /* |
334 | | * JsonbHeader class defines JSONB header (internal to JsonbDocument). |
335 | | * |
336 | | * Currently it only contains version information (1-byte). We may expand the |
337 | | * header to include checksum of the JSONB binary for more security. |
338 | | */ |
339 | | struct JsonbHeader { |
340 | | uint8_t ver_; |
341 | | } header_; |
342 | | |
343 | | char payload_[0]; |
344 | | }; |
345 | | |
346 | | /// A simple input stream class for the JSON path parser. |
347 | | class Stream { |
348 | | public: |
349 | | /// Creates an input stream reading from a character string. |
350 | | /// @param string the input string |
351 | | /// @param length the length of the input string |
352 | 96 | Stream(const char* string, size_t length) : m_position(string), m_end(string + length) {} |
353 | | |
354 | | /// Returns a pointer to the current position in the stream. |
355 | 90 | const char* position() const { return m_position; } |
356 | | |
357 | | /// Returns a pointer to the position just after the end of the stream. |
358 | 0 | const char* end() const { return m_end; } |
359 | | |
360 | | /// Returns the number of bytes remaining in the stream. |
361 | 1.71k | size_t remaining() const { |
362 | 1.71k | assert(m_position <= m_end); |
363 | 1.71k | return m_end - m_position; |
364 | 1.71k | } |
365 | | |
366 | | /// Tells if the stream has been exhausted. |
367 | 1.50k | bool exhausted() const { return remaining() == 0; } |
368 | | |
369 | | /// Reads the next byte from the stream and moves the position forward. |
370 | 96 | char read() { |
371 | 96 | assert(!exhausted()); |
372 | 96 | return *m_position++; |
373 | 96 | } |
374 | | |
375 | | /// Reads the next byte from the stream without moving the position forward. |
376 | 732 | char peek() const { |
377 | 732 | assert(!exhausted()); |
378 | 732 | return *m_position; |
379 | 732 | } |
380 | | |
381 | | /// Moves the position to the next non-whitespace character. |
382 | 398 | void skip_whitespace() { |
383 | 398 | m_position = std::find_if_not(m_position, m_end, [](char c) { return std::isspace(c); }); |
384 | 398 | } |
385 | | |
386 | | /// Moves the position n bytes forward. |
387 | 212 | void skip(size_t n) { |
388 | 212 | assert(remaining() >= n); |
389 | 212 | m_position += n; |
390 | 212 | skip_whitespace(); |
391 | 212 | } |
392 | | |
393 | 58 | void advance() { m_position++; } |
394 | | |
395 | 180 | void clear_leg_ptr() { leg_ptr = nullptr; } |
396 | | |
397 | 90 | void set_leg_ptr(char* ptr) { |
398 | 90 | clear_leg_ptr(); |
399 | 90 | leg_ptr = ptr; |
400 | 90 | } |
401 | | |
402 | 122 | char* get_leg_ptr() { return leg_ptr; } |
403 | | |
404 | 90 | void clear_leg_len() { leg_len = 0; } |
405 | | |
406 | 122 | void add_leg_len() { leg_len++; } |
407 | | |
408 | 180 | unsigned int get_leg_len() const { return leg_len; } |
409 | | |
410 | 0 | void remove_escapes() { |
411 | 0 | int new_len = 0; |
412 | 0 | for (int i = 0; i < leg_len; i++) { |
413 | 0 | if (leg_ptr[i] != '\\') { |
414 | 0 | leg_ptr[new_len++] = leg_ptr[i]; |
415 | 0 | } |
416 | 0 | } |
417 | 0 | leg_ptr[new_len] = '\0'; |
418 | 0 | leg_len = new_len; |
419 | 0 | } |
420 | | |
421 | 0 | void set_has_escapes(bool has) { has_escapes = has; } |
422 | | |
423 | 32 | bool get_has_escapes() const { return has_escapes; } |
424 | | |
425 | | private: |
426 | | /// The current position in the stream. |
427 | | const char* m_position = nullptr; |
428 | | |
429 | | /// The end of the stream. |
430 | | const char* const m_end; |
431 | | |
432 | | ///path leg ptr |
433 | | char* leg_ptr = nullptr; |
434 | | |
435 | | ///path leg len |
436 | | unsigned int leg_len; |
437 | | |
438 | | ///Whether to contain escape characters |
439 | | bool has_escapes = false; |
440 | | }; |
441 | | |
442 | | struct leg_info { |
443 | | ///path leg ptr |
444 | | char* leg_ptr = nullptr; |
445 | | |
446 | | ///path leg len |
447 | | unsigned int leg_len; |
448 | | |
449 | | ///array_index |
450 | | int array_index; |
451 | | |
452 | | ///type: 0 is member 1 is array |
453 | | unsigned int type; |
454 | | |
455 | 0 | bool to_string(std::string* str) const { |
456 | 0 | if (type == MEMBER_CODE) { |
457 | 0 | str->push_back(BEGIN_MEMBER); |
458 | 0 | str->append(leg_ptr, leg_len); |
459 | 0 | return true; |
460 | 0 | } else if (type == ARRAY_CODE) { |
461 | 0 | str->push_back(BEGIN_ARRAY); |
462 | 0 | std::string int_str = std::to_string(array_index); |
463 | 0 | str->append(int_str); |
464 | 0 | str->push_back(END_ARRAY); |
465 | 0 | return true; |
466 | 0 | } else { |
467 | 0 | return false; |
468 | 0 | } |
469 | 0 | } |
470 | | }; |
471 | | |
472 | | class JsonbPath { |
473 | | public: |
474 | | // parse json path |
475 | | static bool parsePath(Stream* stream, JsonbPath* path); |
476 | | |
477 | | static bool parse_array(Stream* stream, JsonbPath* path); |
478 | | static bool parse_member(Stream* stream, JsonbPath* path); |
479 | | |
480 | | //return true if json path valid else return false |
481 | | bool seek(const char* string, size_t length); |
482 | | |
483 | 90 | void add_leg_to_leg_vector(std::unique_ptr<leg_info> leg) { |
484 | 90 | leg_vector.emplace_back(leg.release()); |
485 | 90 | } |
486 | | |
487 | 0 | void pop_leg_from_leg_vector() { leg_vector.pop_back(); } |
488 | | |
489 | 0 | bool to_string(std::string* res) const { |
490 | 0 | res->push_back(SCOPE); |
491 | 0 | for (const auto& leg : leg_vector) { |
492 | 0 | auto valid = leg->to_string(res); |
493 | 0 | if (!valid) { |
494 | 0 | return false; |
495 | 0 | } |
496 | 0 | } |
497 | 0 | return true; |
498 | 0 | } |
499 | | |
500 | 185 | size_t get_leg_vector_size() const { return leg_vector.size(); } |
501 | | |
502 | 295 | leg_info* get_leg_from_leg_vector(size_t i) const { return leg_vector[i].get(); } |
503 | | |
504 | 6 | void clean() { leg_vector.clear(); } |
505 | | |
506 | | private: |
507 | | std::vector<std::unique_ptr<leg_info>> leg_vector; |
508 | | }; |
509 | | |
510 | | /* |
511 | | * JsonbFwdIteratorT implements JSONB's iterator template. |
512 | | * |
513 | | * Note: it is an FORWARD iterator only due to the design of JSONB format. |
514 | | */ |
515 | | template <class Iter_Type, class Cont_Type> |
516 | | class JsonbFwdIteratorT { |
517 | | public: |
518 | | using iterator = Iter_Type; |
519 | | using pointer = typename std::iterator_traits<Iter_Type>::pointer; |
520 | | using reference = typename std::iterator_traits<Iter_Type>::reference; |
521 | | |
522 | | explicit JsonbFwdIteratorT() : current_(nullptr) {} |
523 | 20.4k | explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {} _ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEC2ERKS3_ Line | Count | Source | 523 | 20.2k | explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {} |
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEC2ERKS3_ Line | Count | Source | 523 | 190 | explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {} |
_ZN5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEC2ERKS2_ Line | Count | Source | 523 | 10 | explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {} |
|
524 | | |
525 | | // allow non-const to const iterator conversion (same container type) |
526 | | template <class Iter_Ty> |
527 | | JsonbFwdIteratorT(const JsonbFwdIteratorT<Iter_Ty, Cont_Type>& rhs) : current_(rhs.base()) {} |
528 | | |
529 | 20.6k | bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); } _ZNK5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEeqERKS4_ Line | Count | Source | 529 | 5 | bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEeqERKS5_ Line | Count | Source | 529 | 19.1k | bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEeqERKS5_ Line | Count | Source | 529 | 1.46k | bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); } |
|
530 | | |
531 | 20.0k | bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); } _ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEneERKS5_ Line | Count | Source | 531 | 18.6k | bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEneERKS5_ Line | Count | Source | 531 | 1.40k | bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); } |
|
532 | | |
533 | 1.12k | bool operator<(const JsonbFwdIteratorT& rhs) const { return (current_ < rhs.current_); } |
534 | | |
535 | | bool operator>(const JsonbFwdIteratorT& rhs) const { return !operator<(rhs); } |
536 | | |
537 | 18.1k | JsonbFwdIteratorT& operator++() { |
538 | 18.1k | current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); |
539 | 18.1k | return *this; |
540 | 18.1k | } _ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEppEv Line | Count | Source | 537 | 17.4k | JsonbFwdIteratorT& operator++() { | 538 | 17.4k | current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); | 539 | 17.4k | return *this; | 540 | 17.4k | } |
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEppEv Line | Count | Source | 537 | 698 | JsonbFwdIteratorT& operator++() { | 538 | 698 | current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); | 539 | 698 | return *this; | 540 | 698 | } |
|
541 | | |
542 | | JsonbFwdIteratorT operator++(int) { |
543 | | auto tmp = *this; |
544 | | current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); |
545 | | return tmp; |
546 | | } |
547 | | |
548 | 698 | explicit operator pointer() { return current_; } |
549 | | |
550 | 0 | reference operator*() const { return *current_; } Unexecuted instantiation: _ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEdeEv Unexecuted instantiation: _ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEdeEv |
551 | | |
552 | 28.7k | pointer operator->() const { return current_; } _ZNK5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEptEv Line | Count | Source | 552 | 3 | pointer operator->() const { return current_; } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEptEv Line | Count | Source | 552 | 28.7k | pointer operator->() const { return current_; } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEptEv Line | Count | Source | 552 | 3 | pointer operator->() const { return current_; } |
|
553 | | |
554 | 0 | iterator base() const { return current_; } |
555 | | |
556 | | private: |
557 | | iterator current_; |
558 | | }; |
559 | | |
560 | | using hDictInsert = int (*)(const char*, unsigned int); |
561 | | using hDictFind = int (*)(const char*, unsigned int); |
562 | | |
563 | | using JsonbTypeUnder = std::underlying_type_t<JsonbType>; |
564 | | |
565 | | /* |
566 | | * JsonbKeyValue class defines JSONB key type, as described below. |
567 | | * |
568 | | * key ::= |
569 | | * 0x00 int8 //1-byte dictionary id |
570 | | * | int8 (byte*) //int8 (>0) is the size of the key string |
571 | | * |
572 | | * value ::= primitive_value | container |
573 | | * |
574 | | * JsonbKeyValue can be either an id mapping to the key string in an external |
575 | | * dictionary, or it is the original key string. Whether to read an id or a |
576 | | * string is decided by the first byte (size). |
577 | | * |
578 | | * Note: a key object must be followed by a value object. Therefore, a key |
579 | | * object implicitly refers to a key-value pair, and you can get the value |
580 | | * object right after the key object. The function numPackedBytes hence |
581 | | * indicates the total size of the key-value pair, so that we will be able go |
582 | | * to next pair from the key. |
583 | | * |
584 | | * ** Dictionary size ** |
585 | | * By default, the dictionary size is 255 (1-byte). Users can define |
586 | | * "USE_LARGE_DICT" to increase the dictionary size to 655535 (2-byte). |
587 | | */ |
588 | | class JsonbKeyValue { |
589 | | public: |
590 | | // now we use sMaxKeyId to represent an empty key |
591 | | static const int sMaxKeyId = 65535; |
592 | | using keyid_type = uint16_t; |
593 | | |
594 | | static const uint8_t sMaxKeyLen = 64; |
595 | | |
596 | | // size of the key. 0 indicates it is stored as id |
597 | 1.29k | uint8_t klen() const { return size; } |
598 | | |
599 | | // get the key string. Note the string may not be null terminated. |
600 | 681 | const char* getKeyStr() const { return key.str_; } |
601 | | |
602 | 9.22k | keyid_type getKeyId() const { return key.id_; } |
603 | | |
604 | 35.0k | unsigned int keyPackedBytes() const { |
605 | 35.0k | return size ? (sizeof(size) + size) : (sizeof(size) + sizeof(keyid_type)); |
606 | 35.0k | } |
607 | | |
608 | 17.5k | JsonbValue* value() const { return (JsonbValue*)(((char*)this) + keyPackedBytes()); } |
609 | | |
610 | | // size of the total packed bytes (key+value) |
611 | | unsigned int numPackedBytes() const; |
612 | | |
613 | | uint8_t size; |
614 | | |
615 | | union key_ { |
616 | | keyid_type id_; |
617 | | char str_[1]; |
618 | | } key; |
619 | | }; |
620 | | |
621 | | struct JsonbFindResult { |
622 | | const JsonbValue* value = nullptr; // found value |
623 | | std::unique_ptr<JsonbWriter> writer; // writer to write the value |
624 | | bool is_wildcard = false; // whether the path is a wildcard path |
625 | | }; |
626 | | |
627 | | /* |
628 | | * JsonbValue is the base class of all JSONB types. It contains only one member |
629 | | * variable - type info, which can be retrieved by member functions is[Type]() |
630 | | * or type(). |
631 | | */ |
632 | | struct JsonbValue { |
633 | | static const uint32_t sMaxValueLen = 1 << 24; // 16M |
634 | | |
635 | 4.09k | bool isNull() const { return (type == JsonbType::T_Null); } |
636 | 70 | bool isTrue() const { return (type == JsonbType::T_True); } |
637 | 64 | bool isFalse() const { return (type == JsonbType::T_False); } |
638 | 89 | bool isInt() const { return isInt8() || isInt16() || isInt32() || isInt64() || isInt128(); } |
639 | 89 | bool isInt8() const { return (type == JsonbType::T_Int8); } |
640 | 81 | bool isInt16() const { return (type == JsonbType::T_Int16); } |
641 | 73 | bool isInt32() const { return (type == JsonbType::T_Int32); } |
642 | 70 | bool isInt64() const { return (type == JsonbType::T_Int64); } |
643 | 69 | bool isDouble() const { return (type == JsonbType::T_Double); } |
644 | 15 | bool isFloat() const { return (type == JsonbType::T_Float); } |
645 | 102 | bool isString() const { return (type == JsonbType::T_String); } |
646 | 1.07k | bool isBinary() const { return (type == JsonbType::T_Binary); } |
647 | 16 | bool isObject() const { return (type == JsonbType::T_Object); } |
648 | 5 | bool isArray() const { return (type == JsonbType::T_Array); } |
649 | 64 | bool isInt128() const { return (type == JsonbType::T_Int128); } |
650 | 4 | bool isDecimal() const { |
651 | 4 | return (type == JsonbType::T_Decimal32 || type == JsonbType::T_Decimal64 || |
652 | 4 | type == JsonbType::T_Decimal128 || type == JsonbType::T_Decimal256); |
653 | 4 | } |
654 | 1 | bool isDecimal32() const { return (type == JsonbType::T_Decimal32); } |
655 | 1 | bool isDecimal64() const { return (type == JsonbType::T_Decimal64); } |
656 | 1 | bool isDecimal128() const { return (type == JsonbType::T_Decimal128); } |
657 | 1 | bool isDecimal256() const { return (type == JsonbType::T_Decimal256); } |
658 | | |
659 | 11 | PrimitiveType get_primitive_type() const { return get_primitive_type_from_json_type(type); } |
660 | | |
661 | 0 | const char* typeName() const { |
662 | 0 | switch (type) { |
663 | 0 | case JsonbType::T_Null: |
664 | 0 | return "null"; |
665 | 0 | case JsonbType::T_True: |
666 | 0 | case JsonbType::T_False: |
667 | 0 | return "bool"; |
668 | 0 | case JsonbType::T_Int8: |
669 | 0 | case JsonbType::T_Int16: |
670 | 0 | case JsonbType::T_Int32: |
671 | 0 | return "int"; |
672 | 0 | case JsonbType::T_Int64: |
673 | 0 | return "bigint"; |
674 | 0 | case JsonbType::T_Int128: |
675 | 0 | return "largeint"; |
676 | 0 | case JsonbType::T_Double: |
677 | 0 | return "double"; |
678 | 0 | case JsonbType::T_Float: |
679 | 0 | return "float"; |
680 | 0 | case JsonbType::T_String: |
681 | 0 | return "string"; |
682 | 0 | case JsonbType::T_Binary: |
683 | 0 | return "binary"; |
684 | 0 | case JsonbType::T_Object: |
685 | 0 | return "object"; |
686 | 0 | case JsonbType::T_Array: |
687 | 0 | return "array"; |
688 | 0 | case JsonbType::T_Decimal32: |
689 | 0 | return "Decimal32"; |
690 | 0 | case JsonbType::T_Decimal64: |
691 | 0 | return "Decimal64"; |
692 | 0 | case JsonbType::T_Decimal128: |
693 | 0 | return "Decimal128"; |
694 | 0 | case JsonbType::T_Decimal256: |
695 | 0 | return "Decimal256"; |
696 | 0 | default: |
697 | 0 | return "unknown"; |
698 | 0 | } |
699 | 0 | } |
700 | | |
701 | | // size of the total packed bytes |
702 | | unsigned int numPackedBytes() const; |
703 | | |
704 | | // size of the value in bytes |
705 | | unsigned int size() const; |
706 | | |
707 | | //Get the number of jsonbvalue elements |
708 | | int numElements() const; |
709 | | |
710 | | //Whether to include the jsonbvalue rhs |
711 | | bool contains(JsonbValue* rhs) const; |
712 | | |
713 | | // find the JSONB value by JsonbPath |
714 | | JsonbFindResult findValue(JsonbPath& path) const; |
715 | | friend class JsonbDocument; |
716 | | |
717 | | JsonbType type; // type info |
718 | | |
719 | | char payload[0]; // payload, which is the packed bytes of the value |
720 | | |
721 | | /** |
722 | | * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`. |
723 | | * |
724 | | * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept. |
725 | | * This ensures that `T` is trivially copyable, standard-layout, and safe to |
726 | | * reinterpret from raw bytes without invoking undefined behavior. |
727 | | * |
728 | | * @return A pointer to a `const T` object, interpreted from the internal buffer. |
729 | | * |
730 | | * @note The caller must ensure that the current JsonbValue actually contains data |
731 | | * compatible with type `T`, otherwise the result is undefined. |
732 | | */ |
733 | | template <JsonbPodType T> |
734 | 41.3k | const T* unpack() const { |
735 | 41.3k | static_assert(is_pod_v<T>, "T must be a POD type"); |
736 | 41.3k | return reinterpret_cast<const T*>(payload); |
737 | 41.3k | } _ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_9ObjectValEEEPKT_v Line | Count | Source | 734 | 19.6k | const T* unpack() const { | 735 | 19.6k | static_assert(is_pod_v<T>, "T must be a POD type"); | 736 | 19.6k | return reinterpret_cast<const T*>(payload); | 737 | 19.6k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIaEEEEPKT_v Line | Count | Source | 734 | 757 | const T* unpack() const { | 735 | 757 | static_assert(is_pod_v<T>, "T must be a POD type"); | 736 | 757 | return reinterpret_cast<const T*>(payload); | 737 | 757 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIsEEEEPKT_v Line | Count | Source | 734 | 99 | const T* unpack() const { | 735 | 99 | static_assert(is_pod_v<T>, "T must be a POD type"); | 736 | 99 | return reinterpret_cast<const T*>(payload); | 737 | 99 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIiEEEEPKT_v Line | Count | Source | 734 | 3.46k | const T* unpack() const { | 735 | 3.46k | static_assert(is_pod_v<T>, "T must be a POD type"); | 736 | 3.46k | return reinterpret_cast<const T*>(payload); | 737 | 3.46k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIlEEEEPKT_v Line | Count | Source | 734 | 1.87k | const T* unpack() const { | 735 | 1.87k | static_assert(is_pod_v<T>, "T must be a POD type"); | 736 | 1.87k | return reinterpret_cast<const T*>(payload); | 737 | 1.87k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTInEEEEPKT_v Line | Count | Source | 734 | 4.16k | const T* unpack() const { | 735 | 4.16k | static_assert(is_pod_v<T>, "T must be a POD type"); | 736 | 4.16k | return reinterpret_cast<const T*>(payload); | 737 | 4.16k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_14JsonbBinaryValEEEPKT_v Line | Count | Source | 734 | 8.92k | const T* unpack() const { | 735 | 8.92k | static_assert(is_pod_v<T>, "T must be a POD type"); | 736 | 8.92k | return reinterpret_cast<const T*>(payload); | 737 | 8.92k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_12ContainerValEEEPKT_v Line | Count | Source | 734 | 1.87k | const T* unpack() const { | 735 | 1.87k | static_assert(is_pod_v<T>, "T must be a POD type"); | 736 | 1.87k | return reinterpret_cast<const T*>(payload); | 737 | 1.87k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_8ArrayValEEEPKT_v Line | Count | Source | 734 | 99 | const T* unpack() const { | 735 | 99 | static_assert(is_pod_v<T>, "T must be a POD type"); | 736 | 99 | return reinterpret_cast<const T*>(payload); | 737 | 99 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIdEEEEPKT_v Line | Count | Source | 734 | 165 | const T* unpack() const { | 735 | 165 | static_assert(is_pod_v<T>, "T must be a POD type"); | 736 | 165 | return reinterpret_cast<const T*>(payload); | 737 | 165 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIfEEEEPKT_v Line | Count | Source | 734 | 20 | const T* unpack() const { | 735 | 20 | static_assert(is_pod_v<T>, "T must be a POD type"); | 736 | 20 | return reinterpret_cast<const T*>(payload); | 737 | 20 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_14JsonbStringValEEEPKT_v Line | Count | Source | 734 | 277 | const T* unpack() const { | 735 | 277 | static_assert(is_pod_v<T>, "T must be a POD type"); | 736 | 277 | return reinterpret_cast<const T*>(payload); | 737 | 277 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_10vectorized7DecimalIiEEEEEEPKT_v Line | Count | Source | 734 | 2 | const T* unpack() const { | 735 | 2 | static_assert(is_pod_v<T>, "T must be a POD type"); | 736 | 2 | return reinterpret_cast<const T*>(payload); | 737 | 2 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_10vectorized7DecimalIlEEEEEEPKT_v Line | Count | Source | 734 | 2 | const T* unpack() const { | 735 | 2 | static_assert(is_pod_v<T>, "T must be a POD type"); | 736 | 2 | return reinterpret_cast<const T*>(payload); | 737 | 2 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_10vectorized12Decimal128V3EEEEEPKT_v Line | Count | Source | 734 | 3 | const T* unpack() const { | 735 | 3 | static_assert(is_pod_v<T>, "T must be a POD type"); | 736 | 3 | return reinterpret_cast<const T*>(payload); | 737 | 3 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_10vectorized7DecimalIN4wide7integerILm256EiEEEEEEEEPKT_v Line | Count | Source | 734 | 2 | const T* unpack() const { | 735 | 2 | static_assert(is_pod_v<T>, "T must be a POD type"); | 736 | 2 | return reinterpret_cast<const T*>(payload); | 737 | 2 | } |
|
738 | | |
739 | | // /** |
740 | | // * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`. |
741 | | // * |
742 | | // * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept. |
743 | | // * This ensures that `T` is trivially copyable, standard-layout, and safe to |
744 | | // * reinterpret from raw bytes without invoking undefined behavior. |
745 | | // * |
746 | | // * @return A pointer to a `T` object, interpreted from the internal buffer. |
747 | | // * |
748 | | // * @note The caller must ensure that the current JsonbValue actually contains data |
749 | | // * compatible with type `T`, otherwise the result is undefined. |
750 | | // */ |
751 | | // template <JsonbPodType T> |
752 | | // T* unpack() { |
753 | | // static_assert(is_pod_v<T>, "T must be a POD type"); |
754 | | // return reinterpret_cast<T*>(payload); |
755 | | // } |
756 | | |
757 | | int128_t int_val() const; |
758 | | }; |
759 | | |
760 | | // inline ObjectVal* JsonbDocument::operator->() { |
761 | | // return (((JsonbValue*)payload_)->unpack<ObjectVal>()); |
762 | | // } |
763 | | |
764 | 19.0k | inline const ObjectVal* JsonbDocument::operator->() const { |
765 | 19.0k | return (((JsonbValue*)payload_)->unpack<ObjectVal>()); |
766 | 19.0k | } |
767 | | |
768 | | /* |
769 | | * NumerValT is the template class (derived from JsonbValue) of all number |
770 | | * types (integers and double). |
771 | | */ |
772 | | template <typename T> |
773 | | requires std::is_integral_v<T> || std::is_floating_point_v<T> |
774 | | struct NumberValT { |
775 | | public: |
776 | 10.5k | T val() const { return num; } _ZNK5doris10NumberValTIaE3valEv Line | Count | Source | 776 | 757 | T val() const { return num; } |
_ZNK5doris10NumberValTIsE3valEv Line | Count | Source | 776 | 99 | T val() const { return num; } |
_ZNK5doris10NumberValTIiE3valEv Line | Count | Source | 776 | 3.46k | T val() const { return num; } |
_ZNK5doris10NumberValTIlE3valEv Line | Count | Source | 776 | 1.87k | T val() const { return num; } |
_ZNK5doris10NumberValTInE3valEv Line | Count | Source | 776 | 4.16k | T val() const { return num; } |
_ZNK5doris10NumberValTIdE3valEv Line | Count | Source | 776 | 165 | T val() const { return num; } |
_ZNK5doris10NumberValTIfE3valEv Line | Count | Source | 776 | 20 | T val() const { return num; } |
|
777 | | |
778 | | static unsigned int numPackedBytes() { return sizeof(JsonbValue) + sizeof(T); } |
779 | | |
780 | | T num; |
781 | | }; |
782 | | |
783 | 33 | inline int128_t JsonbValue::int_val() const { |
784 | 33 | switch (type) { |
785 | 9 | case JsonbType::T_Int8: |
786 | 9 | return unpack<JsonbInt8Val>()->val(); |
787 | 7 | case JsonbType::T_Int16: |
788 | 7 | return unpack<JsonbInt16Val>()->val(); |
789 | 6 | case JsonbType::T_Int32: |
790 | 6 | return unpack<JsonbInt32Val>()->val(); |
791 | 9 | case JsonbType::T_Int64: |
792 | 9 | return unpack<JsonbInt64Val>()->val(); |
793 | 2 | case JsonbType::T_Int128: |
794 | 2 | return unpack<JsonbInt128Val>()->val(); |
795 | 0 | default: |
796 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
797 | 0 | static_cast<int32_t>(type)); |
798 | 33 | } |
799 | 33 | } |
800 | | |
801 | | template <JsonbDecimalType T> |
802 | | struct JsonbDecimalVal { |
803 | | public: |
804 | | using NativeType = typename T::NativeType; |
805 | | |
806 | | // get the decimal value |
807 | 9 | NativeType val() const { |
808 | | // to avoid memory alignment issues, we use memcpy to copy the value |
809 | 9 | NativeType tmp; |
810 | 9 | memcpy(&tmp, &value, sizeof(NativeType)); |
811 | 9 | return tmp; |
812 | 9 | } _ZNK5doris15JsonbDecimalValINS_10vectorized7DecimalIiEEE3valEv Line | Count | Source | 807 | 2 | NativeType val() const { | 808 | | // to avoid memory alignment issues, we use memcpy to copy the value | 809 | 2 | NativeType tmp; | 810 | 2 | memcpy(&tmp, &value, sizeof(NativeType)); | 811 | 2 | return tmp; | 812 | 2 | } |
_ZNK5doris15JsonbDecimalValINS_10vectorized7DecimalIlEEE3valEv Line | Count | Source | 807 | 2 | NativeType val() const { | 808 | | // to avoid memory alignment issues, we use memcpy to copy the value | 809 | 2 | NativeType tmp; | 810 | 2 | memcpy(&tmp, &value, sizeof(NativeType)); | 811 | 2 | return tmp; | 812 | 2 | } |
_ZNK5doris15JsonbDecimalValINS_10vectorized12Decimal128V3EE3valEv Line | Count | Source | 807 | 3 | NativeType val() const { | 808 | | // to avoid memory alignment issues, we use memcpy to copy the value | 809 | 3 | NativeType tmp; | 810 | 3 | memcpy(&tmp, &value, sizeof(NativeType)); | 811 | 3 | return tmp; | 812 | 3 | } |
_ZNK5doris15JsonbDecimalValINS_10vectorized7DecimalIN4wide7integerILm256EiEEEEE3valEv Line | Count | Source | 807 | 2 | NativeType val() const { | 808 | | // to avoid memory alignment issues, we use memcpy to copy the value | 809 | 2 | NativeType tmp; | 810 | 2 | memcpy(&tmp, &value, sizeof(NativeType)); | 811 | 2 | return tmp; | 812 | 2 | } |
|
813 | | |
814 | 12 | static constexpr int numPackedBytes() { |
815 | 12 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); |
816 | 12 | } _ZN5doris15JsonbDecimalValINS_10vectorized7DecimalIiEEE14numPackedBytesEv Line | Count | Source | 814 | 3 | static constexpr int numPackedBytes() { | 815 | 3 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); | 816 | 3 | } |
_ZN5doris15JsonbDecimalValINS_10vectorized7DecimalIlEEE14numPackedBytesEv Line | Count | Source | 814 | 3 | static constexpr int numPackedBytes() { | 815 | 3 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); | 816 | 3 | } |
_ZN5doris15JsonbDecimalValINS_10vectorized12Decimal128V3EE14numPackedBytesEv Line | Count | Source | 814 | 4 | static constexpr int numPackedBytes() { | 815 | 4 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); | 816 | 4 | } |
_ZN5doris15JsonbDecimalValINS_10vectorized7DecimalIN4wide7integerILm256EiEEEEE14numPackedBytesEv Line | Count | Source | 814 | 2 | static constexpr int numPackedBytes() { | 815 | 2 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); | 816 | 2 | } |
|
817 | | |
818 | | uint32_t precision; |
819 | | uint32_t scale; |
820 | | NativeType value; |
821 | | }; |
822 | | |
823 | | /* |
824 | | * BlobVal is the base class (derived from JsonbValue) for string and binary |
825 | | * types. The size indicates the total bytes of the payload. |
826 | | */ |
827 | | struct JsonbBinaryVal { |
828 | | public: |
829 | | // size of the blob payload only |
830 | 2.12k | unsigned int getBlobLen() const { return size; } |
831 | | |
832 | | // return the blob as byte array |
833 | 4.51k | const char* getBlob() const { return payload; } |
834 | | |
835 | | // size of the total packed bytes |
836 | 4.54k | unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; } |
837 | | friend class JsonbDocument; |
838 | | |
839 | | uint32_t size; |
840 | | char payload[0]; |
841 | | }; |
842 | | |
843 | | /* |
844 | | * String type |
845 | | * Note: JSONB string may not be a c-string (NULL-terminated) |
846 | | */ |
847 | | struct JsonbStringVal : public JsonbBinaryVal { |
848 | | public: |
849 | | /* |
850 | | This function return the actual size of a string. Since for |
851 | | a string, it can be null-terminated with null paddings or it |
852 | | can take all the space in the payload without null in the end. |
853 | | So we need to check it to get the true actual length of a string. |
854 | | */ |
855 | 133 | size_t length() const { |
856 | | // It's an empty string |
857 | 133 | if (0 == size) { |
858 | 0 | return size; |
859 | 0 | } |
860 | | // The string stored takes all the spaces in payload |
861 | 133 | if (payload[size - 1] != 0) { |
862 | 133 | return size; |
863 | 133 | } |
864 | | // It's shorter than the size of payload |
865 | 0 | return strnlen(payload, size); |
866 | 133 | } |
867 | | // convert the string (case insensitive) to a boolean value |
868 | | // "false": 0 |
869 | | // "true": 1 |
870 | | // all other strings: -1 |
871 | 0 | int getBoolVal() { |
872 | 0 | if (size == 4 && tolower(payload[0]) == 't' && tolower(payload[1]) == 'r' && |
873 | 0 | tolower(payload[2]) == 'u' && tolower(payload[3]) == 'e') { |
874 | 0 | return 1; |
875 | 0 | } else if (size == 5 && tolower(payload[0]) == 'f' && tolower(payload[1]) == 'a' && |
876 | 0 | tolower(payload[2]) == 'l' && tolower(payload[3]) == 's' && |
877 | 0 | tolower(payload[4]) == 'e') { |
878 | 0 | return 0; |
879 | 0 | } else { |
880 | 0 | return -1; |
881 | 0 | } |
882 | 0 | } |
883 | | }; |
884 | | |
885 | | /* |
886 | | * ContainerVal is the base class (derived from JsonbValue) for object and |
887 | | * array types. The size indicates the total bytes of the payload. |
888 | | */ |
889 | | struct ContainerVal { |
890 | | // size of the container payload only |
891 | 0 | unsigned int getContainerSize() const { return size; } |
892 | | |
893 | | // return the container payload as byte array |
894 | 0 | const char* getPayload() const { return payload; } |
895 | | |
896 | | // size of the total packed bytes |
897 | 1.87k | unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; } |
898 | | friend class JsonbDocument; |
899 | | |
900 | | uint32_t size; |
901 | | char payload[0]; |
902 | | }; |
903 | | |
904 | | /* |
905 | | * Object type |
906 | | */ |
907 | | struct ObjectVal : public ContainerVal { |
908 | | using value_type = JsonbKeyValue; |
909 | | using pointer = value_type*; |
910 | | using const_pointer = const value_type*; |
911 | | using iterator = JsonbFwdIteratorT<pointer, ObjectVal>; |
912 | | using const_iterator = JsonbFwdIteratorT<const_pointer, ObjectVal>; |
913 | | |
914 | 0 | const_iterator search(const char* key, hDictFind handler = nullptr) const { |
915 | 0 | return const_cast<ObjectVal*>(this)->search(key, handler); |
916 | 0 | } |
917 | | |
918 | 0 | const_iterator search(const char* key, unsigned int klen, hDictFind handler = nullptr) const { |
919 | 0 | return const_cast<ObjectVal*>(this)->search(key, klen, handler); |
920 | 0 | } |
921 | | |
922 | 0 | const_iterator search(int key_id) const { return const_cast<ObjectVal*>(this)->search(key_id); } |
923 | 0 | iterator search(const char* key, hDictFind handler = nullptr) { |
924 | 0 | if (!key) { |
925 | 0 | return end(); |
926 | 0 | } |
927 | 0 | return search(key, (unsigned int)strlen(key), handler); |
928 | 0 | } |
929 | | |
930 | 5 | iterator search(const char* key, unsigned int klen, hDictFind handler = nullptr) { |
931 | 5 | if (!key || !klen) { |
932 | 0 | return end(); |
933 | 0 | } |
934 | | |
935 | 5 | int key_id = -1; |
936 | 5 | if (handler && (key_id = handler(key, klen)) >= 0) { |
937 | 0 | return search(key_id); |
938 | 0 | } |
939 | 5 | return internalSearch(key, klen); |
940 | 5 | } |
941 | | |
942 | 0 | iterator search(int key_id) { |
943 | 0 | if (key_id < 0 || key_id > JsonbKeyValue::sMaxKeyId) { |
944 | 0 | return end(); |
945 | 0 | } |
946 | | |
947 | 0 | const char* pch = payload; |
948 | 0 | const char* fence = payload + size; |
949 | |
|
950 | 0 | while (pch < fence) { |
951 | 0 | auto* pkey = (JsonbKeyValue*)(pch); |
952 | 0 | if (!pkey->klen() && key_id == pkey->getKeyId()) { |
953 | 0 | return iterator(pkey); |
954 | 0 | } |
955 | 0 | pch += pkey->numPackedBytes(); |
956 | 0 | } |
957 | | |
958 | 0 | assert(pch == fence); |
959 | 0 | return end(); |
960 | 0 | } |
961 | | |
962 | | // Get number of elements in object |
963 | 1 | int numElem() const { |
964 | 1 | const char* pch = payload; |
965 | 1 | const char* fence = payload + size; |
966 | | |
967 | 1 | unsigned int num = 0; |
968 | 13 | while (pch < fence) { |
969 | 12 | auto* pkey = (JsonbKeyValue*)(pch); |
970 | 12 | ++num; |
971 | 12 | pch += pkey->numPackedBytes(); |
972 | 12 | } |
973 | | |
974 | 1 | assert(pch == fence); |
975 | | |
976 | 1 | return num; |
977 | 1 | } |
978 | | |
979 | 0 | JsonbKeyValue* getJsonbKeyValue(unsigned int i) const { |
980 | 0 | const char* pch = payload; |
981 | 0 | const char* fence = payload + size; |
982 | |
|
983 | 0 | unsigned int num = 0; |
984 | 0 | while (pch < fence) { |
985 | 0 | auto* pkey = (JsonbKeyValue*)(pch); |
986 | 0 | if (num == i) { |
987 | 0 | return pkey; |
988 | 0 | } |
989 | 0 | ++num; |
990 | 0 | pch += pkey->numPackedBytes(); |
991 | 0 | } |
992 | | |
993 | 0 | assert(pch == fence); |
994 | | |
995 | 0 | return nullptr; |
996 | 0 | } |
997 | | |
998 | 0 | JsonbValue* find(const char* key, hDictFind handler = nullptr) const { |
999 | 0 | return const_cast<ObjectVal*>(this)->find(key, handler); |
1000 | 0 | } |
1001 | | |
1002 | 5 | JsonbValue* find(const char* key, unsigned int klen, hDictFind handler = nullptr) const { |
1003 | 5 | return const_cast<ObjectVal*>(this)->find(key, klen, handler); |
1004 | 5 | } |
1005 | 0 | JsonbValue* find(int key_id) const { return const_cast<ObjectVal*>(this)->find(key_id); } |
1006 | | |
1007 | | // find the JSONB value by a key string (null terminated) |
1008 | 0 | JsonbValue* find(const char* key, hDictFind handler = nullptr) { |
1009 | 0 | if (!key) { |
1010 | 0 | return nullptr; |
1011 | 0 | } |
1012 | 0 | return find(key, (unsigned int)strlen(key), handler); |
1013 | 0 | } |
1014 | | |
1015 | | // find the JSONB value by a key string (with length) |
1016 | 5 | JsonbValue* find(const char* key, unsigned int klen, hDictFind handler = nullptr) { |
1017 | 5 | iterator kv = search(key, klen, handler); |
1018 | 5 | if (end() == kv) { |
1019 | 2 | return nullptr; |
1020 | 2 | } |
1021 | 3 | return kv->value(); |
1022 | 5 | } |
1023 | | |
1024 | | // find the JSONB value by a key dictionary ID |
1025 | 0 | JsonbValue* find(int key_id) { |
1026 | 0 | iterator kv = search(key_id); |
1027 | 0 | if (end() == kv) { |
1028 | 0 | return nullptr; |
1029 | 0 | } |
1030 | 0 | return kv->value(); |
1031 | 0 | } |
1032 | | |
1033 | 0 | iterator begin() { return iterator((pointer)payload); } |
1034 | | |
1035 | 1.68k | const_iterator begin() const { return const_iterator((pointer)payload); } |
1036 | | |
1037 | 7 | iterator end() { return iterator((pointer)(payload + size)); } |
1038 | | |
1039 | 18.5k | const_iterator end() const { return const_iterator((pointer)(payload + size)); } |
1040 | | |
1041 | | private: |
1042 | 5 | iterator internalSearch(const char* key, unsigned int klen) { |
1043 | 5 | const char* pch = payload; |
1044 | 5 | const char* fence = payload + size; |
1045 | | |
1046 | 5 | while (pch < fence) { |
1047 | 3 | auto* pkey = (JsonbKeyValue*)(pch); |
1048 | 3 | if (klen == pkey->klen() && strncmp(key, pkey->getKeyStr(), klen) == 0) { |
1049 | 3 | return iterator(pkey); |
1050 | 3 | } |
1051 | 0 | pch += pkey->numPackedBytes(); |
1052 | 0 | } |
1053 | | |
1054 | 5 | assert(pch == fence); |
1055 | | |
1056 | 2 | return end(); |
1057 | 2 | } |
1058 | | }; |
1059 | | |
1060 | | /* |
1061 | | * Array type |
1062 | | */ |
1063 | | struct ArrayVal : public ContainerVal { |
1064 | | using value_type = JsonbValue; |
1065 | | using pointer = value_type*; |
1066 | | using const_pointer = const value_type*; |
1067 | | using iterator = JsonbFwdIteratorT<pointer, ArrayVal>; |
1068 | | using const_iterator = JsonbFwdIteratorT<const_pointer, ArrayVal>; |
1069 | | |
1070 | | // get the JSONB value at index |
1071 | 26 | JsonbValue* get(int idx) const { |
1072 | 26 | if (idx < 0) { |
1073 | 0 | return nullptr; |
1074 | 0 | } |
1075 | | |
1076 | 26 | const char* pch = payload; |
1077 | 26 | const char* fence = payload + size; |
1078 | | |
1079 | 63 | while (pch < fence && idx-- > 0) { |
1080 | 37 | pch += ((JsonbValue*)pch)->numPackedBytes(); |
1081 | 37 | } |
1082 | 26 | if (idx > 0 || pch == fence) { |
1083 | 7 | return nullptr; |
1084 | 7 | } |
1085 | | |
1086 | 19 | return (JsonbValue*)pch; |
1087 | 26 | } |
1088 | | |
1089 | | // Get number of elements in array |
1090 | 13 | int numElem() const { |
1091 | 13 | const char* pch = payload; |
1092 | 13 | const char* fence = payload + size; |
1093 | | |
1094 | 13 | unsigned int num = 0; |
1095 | 143 | while (pch < fence) { |
1096 | 130 | ++num; |
1097 | 130 | pch += ((JsonbValue*)pch)->numPackedBytes(); |
1098 | 130 | } |
1099 | | |
1100 | 13 | assert(pch == fence); |
1101 | | |
1102 | 13 | return num; |
1103 | 13 | } |
1104 | | |
1105 | 0 | iterator begin() { return iterator((pointer)payload); } |
1106 | | |
1107 | 67 | const_iterator begin() const { return const_iterator((pointer)payload); } |
1108 | | |
1109 | 0 | iterator end() { return iterator((pointer)(payload + size)); } |
1110 | | |
1111 | 123 | const_iterator end() const { return const_iterator((pointer)(payload + size)); } |
1112 | | }; |
1113 | | |
1114 | | inline Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t size, |
1115 | 1.59k | JsonbDocument** doc) { |
1116 | 1.59k | *doc = nullptr; |
1117 | 1.59k | if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) { |
1118 | 0 | return Status::InvalidArgument("Invalid JSONB document: too small size({}) or null pointer", |
1119 | 0 | size); |
1120 | 0 | } |
1121 | | |
1122 | 1.59k | auto* doc_ptr = (JsonbDocument*)pb; |
1123 | 1.59k | if (doc_ptr->header_.ver_ != JSONB_VER) { |
1124 | 1 | return Status::InvalidArgument("Invalid JSONB document: invalid version({})", |
1125 | 1 | doc_ptr->header_.ver_); |
1126 | 1 | } |
1127 | | |
1128 | 1.59k | auto* val = (JsonbValue*)doc_ptr->payload_; |
1129 | 1.59k | if (val->type < JsonbType::T_Null || val->type >= JsonbType::NUM_TYPES || |
1130 | 1.59k | size != sizeof(JsonbHeader) + val->numPackedBytes()) { |
1131 | 0 | return Status::InvalidArgument("Invalid JSONB document: invalid type({}) or size({})", |
1132 | 0 | static_cast<JsonbTypeUnder>(val->type), size); |
1133 | 0 | } |
1134 | | |
1135 | 1.59k | *doc = doc_ptr; |
1136 | 1.59k | return Status::OK(); |
1137 | 1.59k | } |
1138 | 0 | inline void JsonbDocument::setValue(const JsonbValue* value) { |
1139 | 0 | memcpy(payload_, value, value->numPackedBytes()); |
1140 | 0 | } |
1141 | | |
1142 | 15 | inline JsonbValue* JsonbDocument::createValue(const char* pb, size_t size) { |
1143 | 15 | if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) { |
1144 | 0 | return nullptr; |
1145 | 0 | } |
1146 | | |
1147 | 15 | auto* doc = (JsonbDocument*)pb; |
1148 | 15 | if (doc->header_.ver_ != JSONB_VER) { |
1149 | 0 | return nullptr; |
1150 | 0 | } |
1151 | | |
1152 | 15 | auto* val = (JsonbValue*)doc->payload_; |
1153 | 15 | if (size != sizeof(JsonbHeader) + val->numPackedBytes()) { |
1154 | 0 | return nullptr; |
1155 | 0 | } |
1156 | | |
1157 | 15 | return val; |
1158 | 15 | } |
1159 | | |
1160 | 0 | inline unsigned int JsonbDocument::numPackedBytes() const { |
1161 | 0 | return ((const JsonbValue*)payload_)->numPackedBytes() + sizeof(header_); |
1162 | 0 | } |
1163 | | |
1164 | 17.4k | inline unsigned int JsonbKeyValue::numPackedBytes() const { |
1165 | 17.4k | unsigned int ks = keyPackedBytes(); |
1166 | 17.4k | auto* val = (JsonbValue*)(((char*)this) + ks); |
1167 | 17.4k | return ks + val->numPackedBytes(); |
1168 | 17.4k | } |
1169 | | |
1170 | | // Poor man's "virtual" function JsonbValue::numPackedBytes |
1171 | 20.0k | inline unsigned int JsonbValue::numPackedBytes() const { |
1172 | 20.0k | switch (type) { |
1173 | 2.74k | case JsonbType::T_Null: |
1174 | 2.82k | case JsonbType::T_True: |
1175 | 2.86k | case JsonbType::T_False: { |
1176 | 2.86k | return sizeof(type); |
1177 | 2.82k | } |
1178 | | |
1179 | 893 | case JsonbType::T_Int8: { |
1180 | 893 | return sizeof(type) + sizeof(int8_t); |
1181 | 2.82k | } |
1182 | 113 | case JsonbType::T_Int16: { |
1183 | 113 | return sizeof(type) + sizeof(int16_t); |
1184 | 2.82k | } |
1185 | 3.47k | case JsonbType::T_Int32: { |
1186 | 3.47k | return sizeof(type) + sizeof(int32_t); |
1187 | 2.82k | } |
1188 | 1.88k | case JsonbType::T_Int64: { |
1189 | 1.88k | return sizeof(type) + sizeof(int64_t); |
1190 | 2.82k | } |
1191 | 184 | case JsonbType::T_Double: { |
1192 | 184 | return sizeof(type) + sizeof(double); |
1193 | 2.82k | } |
1194 | 21 | case JsonbType::T_Float: { |
1195 | 21 | return sizeof(type) + sizeof(float); |
1196 | 2.82k | } |
1197 | 4.16k | case JsonbType::T_Int128: { |
1198 | 4.16k | return sizeof(type) + sizeof(int128_t); |
1199 | 2.82k | } |
1200 | 180 | case JsonbType::T_String: |
1201 | 4.54k | case JsonbType::T_Binary: { |
1202 | 4.54k | return unpack<JsonbBinaryVal>()->numPackedBytes(); |
1203 | 180 | } |
1204 | | |
1205 | 1.73k | case JsonbType::T_Object: |
1206 | 1.87k | case JsonbType::T_Array: { |
1207 | 1.87k | return unpack<ContainerVal>()->numPackedBytes(); |
1208 | 1.73k | } |
1209 | 3 | case JsonbType::T_Decimal32: { |
1210 | 3 | return JsonbDecimal32::numPackedBytes(); |
1211 | 1.73k | } |
1212 | 3 | case JsonbType::T_Decimal64: { |
1213 | 3 | return JsonbDecimal64::numPackedBytes(); |
1214 | 1.73k | } |
1215 | 4 | case JsonbType::T_Decimal128: { |
1216 | 4 | return JsonbDecimal128::numPackedBytes(); |
1217 | 1.73k | } |
1218 | 2 | case JsonbType::T_Decimal256: { |
1219 | 2 | return JsonbDecimal256::numPackedBytes(); |
1220 | 1.73k | } |
1221 | 0 | case JsonbType::NUM_TYPES: |
1222 | 0 | break; |
1223 | 20.0k | } |
1224 | | |
1225 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
1226 | 0 | static_cast<int32_t>(type)); |
1227 | 20.0k | } |
1228 | | |
1229 | 6 | inline int JsonbValue::numElements() const { |
1230 | 6 | switch (type) { |
1231 | 0 | case JsonbType::T_Int8: |
1232 | 0 | case JsonbType::T_Int16: |
1233 | 0 | case JsonbType::T_Int32: |
1234 | 0 | case JsonbType::T_Int64: |
1235 | 0 | case JsonbType::T_Double: |
1236 | 0 | case JsonbType::T_Float: |
1237 | 0 | case JsonbType::T_Int128: |
1238 | 1 | case JsonbType::T_String: |
1239 | 1 | case JsonbType::T_Binary: |
1240 | 2 | case JsonbType::T_Null: |
1241 | 2 | case JsonbType::T_True: |
1242 | 2 | case JsonbType::T_False: |
1243 | 2 | case JsonbType::T_Decimal32: |
1244 | 2 | case JsonbType::T_Decimal64: |
1245 | 2 | case JsonbType::T_Decimal128: |
1246 | 2 | case JsonbType::T_Decimal256: { |
1247 | 2 | return 1; |
1248 | 2 | } |
1249 | 0 | case JsonbType::T_Object: { |
1250 | 0 | return unpack<ObjectVal>()->numElem(); |
1251 | 2 | } |
1252 | 4 | case JsonbType::T_Array: { |
1253 | 4 | return unpack<ArrayVal>()->numElem(); |
1254 | 2 | } |
1255 | 0 | case JsonbType::NUM_TYPES: |
1256 | 0 | break; |
1257 | 6 | } |
1258 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
1259 | 0 | static_cast<int32_t>(type)); |
1260 | 6 | } |
1261 | | |
1262 | 3 | inline bool JsonbValue::contains(JsonbValue* rhs) const { |
1263 | 3 | switch (type) { |
1264 | 1 | case JsonbType::T_Int8: |
1265 | 1 | case JsonbType::T_Int16: |
1266 | 1 | case JsonbType::T_Int32: |
1267 | 1 | case JsonbType::T_Int64: |
1268 | 1 | case JsonbType::T_Int128: { |
1269 | 1 | return rhs->isInt() && this->int_val() == rhs->int_val(); |
1270 | 1 | } |
1271 | 0 | case JsonbType::T_Double: |
1272 | 0 | case JsonbType::T_Float: { |
1273 | 0 | if (!rhs->isDouble() && !rhs->isFloat()) { |
1274 | 0 | return false; |
1275 | 0 | } |
1276 | 0 | double left = isDouble() ? unpack<JsonbDoubleVal>()->val() : unpack<JsonbFloatVal>()->val(); |
1277 | 0 | double right = rhs->isDouble() ? rhs->unpack<JsonbDoubleVal>()->val() |
1278 | 0 | : rhs->unpack<JsonbFloatVal>()->val(); |
1279 | 0 | return left == right; |
1280 | 0 | } |
1281 | 1 | case JsonbType::T_String: |
1282 | 1 | case JsonbType::T_Binary: { |
1283 | 1 | if (rhs->isString() || rhs->isBinary()) { |
1284 | 1 | const auto* str_value1 = unpack<JsonbStringVal>(); |
1285 | 1 | const auto* str_value2 = rhs->unpack<JsonbStringVal>(); |
1286 | 1 | return str_value1->length() == str_value2->length() && |
1287 | 1 | std::memcmp(str_value1->getBlob(), str_value2->getBlob(), |
1288 | 1 | str_value1->length()) == 0; |
1289 | 1 | } |
1290 | 0 | return false; |
1291 | 1 | } |
1292 | 1 | case JsonbType::T_Array: { |
1293 | 1 | int lhs_num = unpack<ArrayVal>()->numElem(); |
1294 | 1 | if (rhs->isArray()) { |
1295 | 0 | int rhs_num = rhs->unpack<ArrayVal>()->numElem(); |
1296 | 0 | if (rhs_num > lhs_num) { |
1297 | 0 | return false; |
1298 | 0 | } |
1299 | 0 | int contains_num = 0; |
1300 | 0 | for (int i = 0; i < lhs_num; ++i) { |
1301 | 0 | for (int j = 0; j < rhs_num; ++j) { |
1302 | 0 | if (unpack<ArrayVal>()->get(i)->contains(rhs->unpack<ArrayVal>()->get(j))) { |
1303 | 0 | contains_num++; |
1304 | 0 | break; |
1305 | 0 | } |
1306 | 0 | } |
1307 | 0 | } |
1308 | 0 | return contains_num == rhs_num; |
1309 | 0 | } |
1310 | 1 | for (int i = 0; i < lhs_num; ++i) { |
1311 | 1 | if (unpack<ArrayVal>()->get(i)->contains(rhs)) { |
1312 | 1 | return true; |
1313 | 1 | } |
1314 | 1 | } |
1315 | 0 | return false; |
1316 | 1 | } |
1317 | 0 | case JsonbType::T_Object: { |
1318 | 0 | if (rhs->isObject()) { |
1319 | 0 | const auto* obj_value1 = unpack<ObjectVal>(); |
1320 | 0 | const auto* obj_value2 = rhs->unpack<ObjectVal>(); |
1321 | 0 | for (int i = 0; i < obj_value2->numElem(); ++i) { |
1322 | 0 | JsonbKeyValue* key = obj_value2->getJsonbKeyValue(i); |
1323 | 0 | JsonbValue* value = obj_value1->find(key->getKeyStr(), key->klen()); |
1324 | 0 | if (value == nullptr || !value->contains(key->value())) { |
1325 | 0 | return false; |
1326 | 0 | } |
1327 | 0 | } |
1328 | 0 | return true; |
1329 | 0 | } |
1330 | 0 | return false; |
1331 | 0 | } |
1332 | 0 | case JsonbType::T_Null: { |
1333 | 0 | return rhs->isNull(); |
1334 | 0 | } |
1335 | 0 | case JsonbType::T_True: { |
1336 | 0 | return rhs->isTrue(); |
1337 | 0 | } |
1338 | 0 | case JsonbType::T_False: { |
1339 | 0 | return rhs->isFalse(); |
1340 | 0 | } |
1341 | 0 | case JsonbType::T_Decimal32: { |
1342 | 0 | if (rhs->isDecimal32()) { |
1343 | 0 | return unpack<JsonbDecimal32>()->val() == rhs->unpack<JsonbDecimal32>()->val() && |
1344 | 0 | unpack<JsonbDecimal32>()->precision == |
1345 | 0 | rhs->unpack<JsonbDecimal32>()->precision && |
1346 | 0 | unpack<JsonbDecimal32>()->scale == rhs->unpack<JsonbDecimal32>()->scale; |
1347 | 0 | } |
1348 | 0 | return false; |
1349 | 0 | } |
1350 | 0 | case JsonbType::T_Decimal64: { |
1351 | 0 | if (rhs->isDecimal64()) { |
1352 | 0 | return unpack<JsonbDecimal64>()->val() == rhs->unpack<JsonbDecimal64>()->val() && |
1353 | 0 | unpack<JsonbDecimal64>()->precision == |
1354 | 0 | rhs->unpack<JsonbDecimal64>()->precision && |
1355 | 0 | unpack<JsonbDecimal64>()->scale == rhs->unpack<JsonbDecimal64>()->scale; |
1356 | 0 | } |
1357 | 0 | return false; |
1358 | 0 | } |
1359 | 0 | case JsonbType::T_Decimal128: { |
1360 | 0 | if (rhs->isDecimal128()) { |
1361 | 0 | return unpack<JsonbDecimal128>()->val() == rhs->unpack<JsonbDecimal128>()->val() && |
1362 | 0 | unpack<JsonbDecimal128>()->precision == |
1363 | 0 | rhs->unpack<JsonbDecimal128>()->precision && |
1364 | 0 | unpack<JsonbDecimal128>()->scale == rhs->unpack<JsonbDecimal128>()->scale; |
1365 | 0 | } |
1366 | 0 | return false; |
1367 | 0 | } |
1368 | 0 | case JsonbType::T_Decimal256: { |
1369 | 0 | if (rhs->isDecimal256()) { |
1370 | 0 | return unpack<JsonbDecimal256>()->val() == rhs->unpack<JsonbDecimal256>()->val() && |
1371 | 0 | unpack<JsonbDecimal256>()->precision == |
1372 | 0 | rhs->unpack<JsonbDecimal256>()->precision && |
1373 | 0 | unpack<JsonbDecimal256>()->scale == rhs->unpack<JsonbDecimal256>()->scale; |
1374 | 0 | } |
1375 | 0 | return false; |
1376 | 0 | } |
1377 | 0 | case JsonbType::NUM_TYPES: |
1378 | 0 | break; |
1379 | 3 | } |
1380 | | |
1381 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
1382 | 0 | static_cast<int32_t>(type)); |
1383 | 3 | } |
1384 | | |
1385 | 96 | inline bool JsonbPath::seek(const char* key_path, size_t kp_len) { |
1386 | | //path invalid |
1387 | 96 | if (!key_path || kp_len == 0) { |
1388 | 0 | return false; |
1389 | 0 | } |
1390 | 96 | Stream stream(key_path, kp_len); |
1391 | 96 | stream.skip_whitespace(); |
1392 | 96 | if (stream.exhausted() || stream.read() != SCOPE) { |
1393 | | //path invalid |
1394 | 0 | return false; |
1395 | 0 | } |
1396 | | |
1397 | 186 | while (!stream.exhausted()) { |
1398 | 90 | stream.skip_whitespace(); |
1399 | 90 | stream.clear_leg_ptr(); |
1400 | 90 | stream.clear_leg_len(); |
1401 | | |
1402 | 90 | if (!JsonbPath::parsePath(&stream, this)) { |
1403 | | //path invalid |
1404 | 0 | return false; |
1405 | 0 | } |
1406 | 90 | } |
1407 | 96 | return true; |
1408 | 96 | } |
1409 | | |
1410 | 90 | inline bool JsonbPath::parsePath(Stream* stream, JsonbPath* path) { |
1411 | | // $[0] |
1412 | 90 | if (stream->peek() == BEGIN_ARRAY) { |
1413 | 58 | return parse_array(stream, path); |
1414 | 58 | } |
1415 | | // $.a or $.[0] |
1416 | 32 | else if (stream->peek() == BEGIN_MEMBER) { |
1417 | | // advance past the . |
1418 | 32 | stream->skip(1); |
1419 | | |
1420 | 32 | if (stream->exhausted()) { |
1421 | 0 | return false; |
1422 | 0 | } |
1423 | | |
1424 | | // $.[0] |
1425 | 32 | if (stream->peek() == BEGIN_ARRAY) { |
1426 | 0 | return parse_array(stream, path); |
1427 | 0 | } |
1428 | | // $.a |
1429 | 32 | else { |
1430 | 32 | return parse_member(stream, path); |
1431 | 32 | } |
1432 | 32 | } else { |
1433 | 0 | return false; //invalid json path |
1434 | 0 | } |
1435 | 90 | } |
1436 | | |
1437 | 58 | inline bool JsonbPath::parse_array(Stream* stream, JsonbPath* path) { |
1438 | 58 | assert(stream->peek() == BEGIN_ARRAY); |
1439 | 58 | stream->skip(1); |
1440 | 58 | if (stream->exhausted()) { |
1441 | 0 | return false; |
1442 | 0 | } |
1443 | | |
1444 | 58 | if (stream->peek() == WILDCARD) { |
1445 | 0 | stream->set_leg_ptr(const_cast<char*>(stream->position())); |
1446 | 0 | stream->add_leg_len(); |
1447 | 0 | stream->skip(1); |
1448 | 0 | if (stream->exhausted()) { |
1449 | 0 | return false; |
1450 | 0 | } |
1451 | | |
1452 | 0 | if (stream->peek() == END_ARRAY) { |
1453 | 0 | std::unique_ptr<leg_info> leg( |
1454 | 0 | new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, ARRAY_CODE)); |
1455 | 0 | path->add_leg_to_leg_vector(std::move(leg)); |
1456 | 0 | stream->skip(1); |
1457 | 0 | return true; |
1458 | 0 | } else { |
1459 | 0 | return false; |
1460 | 0 | } |
1461 | 0 | } |
1462 | | |
1463 | 58 | stream->set_leg_ptr(const_cast<char*>(stream->position())); |
1464 | | |
1465 | 116 | for (; !stream->exhausted() && stream->peek() != END_ARRAY; stream->advance()) { |
1466 | 58 | stream->add_leg_len(); |
1467 | 58 | } |
1468 | | |
1469 | 58 | if (stream->exhausted() || stream->peek() != END_ARRAY) { |
1470 | 0 | return false; |
1471 | 58 | } else { |
1472 | 58 | stream->skip(1); |
1473 | 58 | } |
1474 | | |
1475 | | //parse array index to int |
1476 | | |
1477 | 58 | std::string_view idx_string(stream->get_leg_ptr(), stream->get_leg_len()); |
1478 | 58 | int index = 0; |
1479 | | |
1480 | 58 | if (stream->get_leg_len() >= 4 && |
1481 | 58 | std::equal(LAST, LAST + 4, stream->get_leg_ptr(), |
1482 | 0 | [](char c1, char c2) { return std::tolower(c1) == std::tolower(c2); })) { |
1483 | 0 | auto pos = idx_string.find(MINUS); |
1484 | |
|
1485 | 0 | if (pos != std::string::npos) { |
1486 | 0 | for (size_t i = 4; i < pos; ++i) { |
1487 | 0 | if (std::isspace(idx_string[i])) { |
1488 | 0 | continue; |
1489 | 0 | } else { |
1490 | | // leading zeroes are not allowed |
1491 | 0 | LOG(WARNING) << "Non-space char in idx_string: '" << idx_string << "'"; |
1492 | 0 | return false; |
1493 | 0 | } |
1494 | 0 | } |
1495 | 0 | idx_string = idx_string.substr(pos + 1); |
1496 | 0 | idx_string = trim(idx_string); |
1497 | |
|
1498 | 0 | auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(), |
1499 | 0 | index); |
1500 | 0 | if (result.ec != std::errc()) { |
1501 | 0 | LOG(WARNING) << "Invalid index in JSON path: '" << idx_string << "'"; |
1502 | 0 | return false; |
1503 | 0 | } |
1504 | |
|
1505 | 0 | } else if (stream->get_leg_len() > 4) { |
1506 | 0 | return false; |
1507 | 0 | } |
1508 | | |
1509 | 0 | std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, -index - 1, ARRAY_CODE)); |
1510 | 0 | path->add_leg_to_leg_vector(std::move(leg)); |
1511 | |
|
1512 | 0 | return true; |
1513 | 0 | } |
1514 | | |
1515 | 58 | auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(), index); |
1516 | | |
1517 | 58 | if (result.ec != std::errc()) { |
1518 | 0 | return false; |
1519 | 0 | } |
1520 | | |
1521 | 58 | std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, index, ARRAY_CODE)); |
1522 | 58 | path->add_leg_to_leg_vector(std::move(leg)); |
1523 | | |
1524 | 58 | return true; |
1525 | 58 | } |
1526 | | |
1527 | 32 | inline bool JsonbPath::parse_member(Stream* stream, JsonbPath* path) { |
1528 | 32 | if (stream->exhausted()) { |
1529 | 0 | return false; |
1530 | 0 | } |
1531 | | |
1532 | 32 | if (stream->peek() == WILDCARD) { |
1533 | 0 | stream->set_leg_ptr(const_cast<char*>(stream->position())); |
1534 | 0 | stream->add_leg_len(); |
1535 | 0 | stream->skip(1); |
1536 | 0 | std::unique_ptr<leg_info> leg( |
1537 | 0 | new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE)); |
1538 | 0 | path->add_leg_to_leg_vector(std::move(leg)); |
1539 | 0 | return true; |
1540 | 0 | } |
1541 | | |
1542 | 32 | stream->set_leg_ptr(const_cast<char*>(stream->position())); |
1543 | | |
1544 | 32 | const char* left_quotation_marks = nullptr; |
1545 | 32 | const char* right_quotation_marks = nullptr; |
1546 | | |
1547 | 96 | for (; !stream->exhausted(); stream->skip(1)) { |
1548 | 64 | if (stream->peek() == ESCAPE) { |
1549 | 0 | stream->add_leg_len(); |
1550 | 0 | stream->skip(1); |
1551 | 0 | stream->add_leg_len(); |
1552 | 0 | stream->set_has_escapes(true); |
1553 | 0 | if (stream->exhausted()) { |
1554 | 0 | return false; |
1555 | 0 | } |
1556 | 0 | continue; |
1557 | 64 | } else if (stream->peek() == DOUBLE_QUOTE) { |
1558 | 0 | if (left_quotation_marks == nullptr) { |
1559 | 0 | left_quotation_marks = stream->position(); |
1560 | 0 | stream->set_leg_ptr(const_cast<char*>(++left_quotation_marks)); |
1561 | 0 | continue; |
1562 | 0 | } else { |
1563 | 0 | right_quotation_marks = stream->position(); |
1564 | 0 | stream->skip(1); |
1565 | 0 | break; |
1566 | 0 | } |
1567 | 64 | } else if (stream->peek() == BEGIN_MEMBER || stream->peek() == BEGIN_ARRAY) { |
1568 | 0 | if (left_quotation_marks == nullptr) { |
1569 | 0 | break; |
1570 | 0 | } |
1571 | 0 | } |
1572 | | |
1573 | 64 | stream->add_leg_len(); |
1574 | 64 | } |
1575 | | |
1576 | 32 | if ((left_quotation_marks != nullptr && right_quotation_marks == nullptr) || |
1577 | 32 | stream->get_leg_ptr() == nullptr || stream->get_leg_len() == 0) { |
1578 | 0 | return false; //invalid json path |
1579 | 0 | } |
1580 | | |
1581 | 32 | if (stream->get_has_escapes()) { |
1582 | 0 | stream->remove_escapes(); |
1583 | 0 | } |
1584 | | |
1585 | 32 | std::unique_ptr<leg_info> leg( |
1586 | 32 | new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE)); |
1587 | 32 | path->add_leg_to_leg_vector(std::move(leg)); |
1588 | | |
1589 | 32 | return true; |
1590 | 32 | } |
1591 | | |
1592 | | static_assert(is_pod_v<JsonbDocument>, "JsonbDocument must be standard layout and trivial"); |
1593 | | static_assert(is_pod_v<JsonbValue>, "JsonbValue must be standard layout and trivial"); |
1594 | | static_assert(is_pod_v<JsonbDecimal32>, "JsonbDecimal32 must be standard layout and trivial"); |
1595 | | static_assert(is_pod_v<JsonbDecimal64>, "JsonbDecimal64 must be standard layout and trivial"); |
1596 | | static_assert(is_pod_v<JsonbDecimal128>, "JsonbDecimal128 must be standard layout and trivial"); |
1597 | | static_assert(is_pod_v<JsonbDecimal256>, "JsonbDecimal256 must be standard layout and trivial"); |
1598 | | static_assert(is_pod_v<JsonbInt8Val>, "JsonbInt8Val must be standard layout and trivial"); |
1599 | | static_assert(is_pod_v<JsonbInt32Val>, "JsonbInt32Val must be standard layout and trivial"); |
1600 | | static_assert(is_pod_v<JsonbInt64Val>, "JsonbInt64Val must be standard layout and trivial"); |
1601 | | static_assert(is_pod_v<JsonbInt128Val>, "JsonbInt128Val must be standard layout and trivial"); |
1602 | | static_assert(is_pod_v<JsonbDoubleVal>, "JsonbDoubleVal must be standard layout and trivial"); |
1603 | | static_assert(is_pod_v<JsonbFloatVal>, "JsonbFloatVal must be standard layout and trivial"); |
1604 | | static_assert(is_pod_v<JsonbBinaryVal>, "JsonbBinaryVal must be standard layout and trivial"); |
1605 | | static_assert(is_pod_v<ContainerVal>, "ContainerVal must be standard layout and trivial"); |
1606 | | |
1607 | | #define ASSERT_DECIMAL_LAYOUT(type) \ |
1608 | | static_assert(offsetof(type, precision) == 0); \ |
1609 | | static_assert(offsetof(type, scale) == 4); \ |
1610 | | static_assert(offsetof(type, value) == 8); |
1611 | | |
1612 | | ASSERT_DECIMAL_LAYOUT(JsonbDecimal32) |
1613 | | ASSERT_DECIMAL_LAYOUT(JsonbDecimal64) |
1614 | | ASSERT_DECIMAL_LAYOUT(JsonbDecimal128) |
1615 | | ASSERT_DECIMAL_LAYOUT(JsonbDecimal256) |
1616 | | |
1617 | | #define ASSERT_NUMERIC_LAYOUT(type) static_assert(offsetof(type, num) == 0); |
1618 | | |
1619 | | ASSERT_NUMERIC_LAYOUT(JsonbInt8Val) |
1620 | | ASSERT_NUMERIC_LAYOUT(JsonbInt32Val) |
1621 | | ASSERT_NUMERIC_LAYOUT(JsonbInt64Val) |
1622 | | ASSERT_NUMERIC_LAYOUT(JsonbInt128Val) |
1623 | | ASSERT_NUMERIC_LAYOUT(JsonbDoubleVal) |
1624 | | |
1625 | | static_assert(offsetof(JsonbBinaryVal, size) == 0); |
1626 | | static_assert(offsetof(JsonbBinaryVal, payload) == 4); |
1627 | | |
1628 | | static_assert(offsetof(ContainerVal, size) == 0); |
1629 | | static_assert(offsetof(ContainerVal, payload) == 4); |
1630 | | |
1631 | | #pragma pack(pop) |
1632 | | #if defined(__clang__) |
1633 | | #pragma clang diagnostic pop |
1634 | | #endif |
1635 | | } // namespace doris |
1636 | | |
1637 | | #endif // JSONB_JSONBDOCUMENT_H |