/root/doris/be/src/util/jsonb_document.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2014, Facebook, Inc. |
3 | | * All rights reserved. |
4 | | * |
5 | | * This source code is licensed under the BSD-style license found in the |
6 | | * LICENSE file in the root directory of this source tree. An additional grant |
7 | | * of patent rights can be found in the PATENTS file in the same directory. |
8 | | * |
9 | | */ |
10 | | |
11 | | /* |
12 | | * This header defines JsonbDocument, JsonbKeyValue, and various value classes |
13 | | * which are derived from JsonbValue, and a forward iterator for container |
14 | | * values - essentially everything that is related to JSONB binary data |
15 | | * structures. |
16 | | * |
17 | | * Implementation notes: |
18 | | * |
19 | | * None of the classes in this header file can be instantiated directly (i.e. |
20 | | * you cannot create a JsonbKeyValue or JsonbValue object - all constructors |
21 | | * are declared non-public). We use the classes as wrappers on the packed JSONB |
22 | | * bytes (serialized), and cast the classes (types) to the underlying packed |
23 | | * byte array. |
24 | | * |
25 | | * For the same reason, we cannot define any JSONB value class to be virtual, |
26 | | * since we never call constructors, and will not instantiate vtbl and vptrs. |
27 | | * |
28 | | * Therefore, the classes are defined as packed structures (i.e. no data |
29 | | * alignment and padding), and the private member variables of the classes are |
30 | | * defined precisely in the same order as the JSONB spec. This ensures we |
31 | | * access the packed JSONB bytes correctly. |
32 | | * |
33 | | * The packed structures are highly optimized for in-place operations with low |
34 | | * overhead. The reads (and in-place writes) are performed directly on packed |
35 | | * bytes. There is no memory allocation at all at runtime. |
36 | | * |
37 | | * For updates/writes of values that will expand the original JSONB size, the |
38 | | * write will fail, and the caller needs to handle buffer increase. |
39 | | * |
40 | | * ** Iterator ** |
41 | | * Both ObjectVal class and ArrayVal class have iterator type that you can use |
42 | | * to declare an iterator on a container object to go through the key-value |
43 | | * pairs or value list. The iterator has both non-const and const types. |
44 | | * |
45 | | * Note: iterators are forward direction only. |
46 | | * |
47 | | * ** Query ** |
48 | | * Querying into containers is through the member functions find (for key/value |
49 | | * pairs) and get (for array elements), and is in streaming style. We don't |
50 | | * need to read/scan the whole JSONB packed bytes in order to return results. |
51 | | * Once the key/index is found, we will stop search. You can use text to query |
52 | | * both objects and array (for array, text will be converted to integer index), |
53 | | * and use index to retrieve from array. Array index is 0-based. |
54 | | * |
55 | | * ** External dictionary ** |
56 | | * During query processing, you can also pass a call-back function, so the |
57 | | * search will first try to check if the key string exists in the dictionary. |
58 | | * If so, search will be based on the id instead of the key string. |
59 | | * @author Tian Xia <tianx@fb.com> |
60 | | * |
61 | | * this file is copied from |
62 | | * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonDocument.h |
63 | | * and modified by Doris |
64 | | */ |
65 | | |
66 | | #ifndef JSONB_JSONBDOCUMENT_H |
67 | | #define JSONB_JSONBDOCUMENT_H |
68 | | |
69 | | #include <algorithm> |
70 | | #include <cctype> |
71 | | #include <charconv> |
72 | | #include <cstddef> |
73 | | #include <cstdint> |
74 | | #include <string> |
75 | | #include <type_traits> |
76 | | |
77 | | #include "common/compiler_util.h" // IWYU pragma: keep |
78 | | #include "common/exception.h" |
79 | | #include "common/status.h" |
80 | | #include "vec/core/types.h" |
81 | | |
82 | | // #include "util/string_parser.hpp" |
83 | | |
84 | | // Concept to check for supported decimal types |
85 | | template <typename T> |
86 | | concept JsonbDecimalType = std::same_as<T, doris::vectorized::Decimal256> || |
87 | | std::same_as<T, doris::vectorized::Decimal64> || |
88 | | std::same_as<T, doris::vectorized::Decimal128V3> || |
89 | | std::same_as<T, doris::vectorized::Decimal32>; |
90 | | |
91 | | namespace doris { |
92 | | |
93 | | template <typename T> |
94 | | constexpr bool is_pod_v = std::is_trivial_v<T> && std::is_standard_layout_v<T>; |
95 | | |
96 | | struct JsonbStringVal; |
97 | | struct ObjectVal; |
98 | | struct ArrayVal; |
99 | | struct JsonbBinaryVal; |
100 | | struct ContainerVal; |
101 | | |
102 | | template <JsonbDecimalType T> |
103 | | struct JsonbDecimalVal; |
104 | | |
105 | | using JsonbDecimal256 = JsonbDecimalVal<vectorized::Decimal256>; |
106 | | using JsonbDecimal128 = JsonbDecimalVal<vectorized::Decimal128V3>; |
107 | | using JsonbDecimal64 = JsonbDecimalVal<vectorized::Decimal64>; |
108 | | using JsonbDecimal32 = JsonbDecimalVal<vectorized::Decimal32>; |
109 | | |
110 | | template <typename T> |
111 | | requires std::is_integral_v<T> || std::is_floating_point_v<T> |
112 | | struct NumberValT; |
113 | | |
114 | | using JsonbInt8Val = NumberValT<int8_t>; |
115 | | using JsonbInt16Val = NumberValT<int16_t>; |
116 | | using JsonbInt32Val = NumberValT<int32_t>; |
117 | | using JsonbInt64Val = NumberValT<int64_t>; |
118 | | using JsonbInt128Val = NumberValT<int128_t>; |
119 | | using JsonbDoubleVal = NumberValT<double>; |
120 | | using JsonbFloatVal = NumberValT<float>; |
121 | | |
122 | | template <typename T> |
123 | | concept JsonbPodType = (std::same_as<T, JsonbStringVal> || std::same_as<T, ObjectVal> || |
124 | | std::same_as<T, ContainerVal> || std::same_as<T, ArrayVal> || |
125 | | std::same_as<T, JsonbBinaryVal> || std::same_as<T, JsonbDecimal32> || |
126 | | std::same_as<T, JsonbDecimal64> || std::same_as<T, JsonbDecimal128> || |
127 | | std::same_as<T, JsonbDecimal256> || std::same_as<T, JsonbDecimal32> || |
128 | | std::same_as<T, JsonbInt8Val> || std::same_as<T, JsonbInt16Val> || |
129 | | std::same_as<T, JsonbInt32Val> || std::same_as<T, JsonbInt64Val> || |
130 | | std::same_as<T, JsonbInt128Val> || std::same_as<T, JsonbFloatVal> || |
131 | | std::same_as<T, JsonbFloatVal> || std::same_as<T, JsonbDoubleVal>); |
132 | | |
133 | | #if defined(__clang__) |
134 | | #pragma clang diagnostic push |
135 | | #pragma clang diagnostic ignored "-Wzero-length-array" |
136 | | #endif |
137 | | #pragma pack(push, 1) |
138 | | |
139 | 6.96k | #define JSONB_VER 1 |
140 | | |
141 | | using int128_t = __int128; |
142 | | |
143 | | // forward declaration |
144 | | struct JsonbValue; |
145 | | |
146 | | const int MaxNestingLevel = 100; |
147 | | |
148 | | /* |
149 | | * JsonbType defines 10 primitive types and 2 container types, as described |
150 | | * below. |
151 | | * NOTE: Do NOT modify the existing values or their order in this enum. |
152 | | * You may only append new entries at the end before `NUM_TYPES`. |
153 | | * This enum will be used in serialized data and/or persisted data. |
154 | | * Changing existing values may break backward compatibility |
155 | | * with previously stored or transmitted data. |
156 | | * |
157 | | * primitive_value ::= |
158 | | * 0x00 //null value (0 byte) |
159 | | * | 0x01 //boolean true (0 byte) |
160 | | * | 0x02 //boolean false (0 byte) |
161 | | * | 0x03 int8 //char/int8 (1 byte) |
162 | | * | 0x04 int16 //int16 (2 bytes) |
163 | | * | 0x05 int32 //int32 (4 bytes) |
164 | | * | 0x06 int64 //int64 (8 bytes) |
165 | | * | 0x07 double //floating point (8 bytes) |
166 | | * | 0x08 string //variable length string |
167 | | * | 0x09 binary //variable length binary |
168 | | * |
169 | | * container ::= |
170 | | * 0x0A int32 key_value_list //object, int32 is the total bytes of the object |
171 | | * | 0x0B int32 value_list //array, int32 is the total bytes of the array |
172 | | */ |
173 | | enum class JsonbType : char { |
174 | | T_Null = 0x00, |
175 | | T_True = 0x01, |
176 | | T_False = 0x02, |
177 | | T_Int8 = 0x03, |
178 | | T_Int16 = 0x04, |
179 | | T_Int32 = 0x05, |
180 | | T_Int64 = 0x06, |
181 | | T_Double = 0x07, |
182 | | T_String = 0x08, |
183 | | T_Binary = 0x09, |
184 | | T_Object = 0x0A, |
185 | | T_Array = 0x0B, |
186 | | T_Int128 = 0x0C, |
187 | | T_Float = 0x0D, |
188 | | T_Decimal32 = 0x0E, // DecimalV3 only |
189 | | T_Decimal64 = 0x0F, // DecimalV3 only |
190 | | T_Decimal128 = 0x10, // DecimalV3 only |
191 | | T_Decimal256 = 0x11, // DecimalV3 only |
192 | | NUM_TYPES, |
193 | | }; |
194 | | |
195 | | //for parse json path |
196 | | constexpr char SCOPE = '$'; |
197 | | constexpr char BEGIN_MEMBER = '.'; |
198 | | constexpr char BEGIN_ARRAY = '['; |
199 | | constexpr char END_ARRAY = ']'; |
200 | | constexpr char DOUBLE_QUOTE = '"'; |
201 | | constexpr char WILDCARD = '*'; |
202 | | constexpr char MINUS = '-'; |
203 | | constexpr char LAST[] = "last"; |
204 | | constexpr char ESCAPE = '\\'; |
205 | | constexpr unsigned int MEMBER_CODE = 0; |
206 | | constexpr unsigned int ARRAY_CODE = 1; |
207 | | |
208 | | /* |
209 | | * JsonbDocument is the main object that accesses and queries JSONB packed |
210 | | * bytes. NOTE: JsonbDocument only allows object container as the top level |
211 | | * JSONB value. However, you can use the static method "createValue" to get any |
212 | | * JsonbValue object from the packed bytes. |
213 | | * |
214 | | * JsonbDocument object also dereferences to an object container value |
215 | | * (ObjectVal) once JSONB is loaded. |
216 | | * |
217 | | * ** Load ** |
218 | | * JsonbDocument is usable after loading packed bytes (memory location) into |
219 | | * the object. We only need the header and first few bytes of the payload after |
220 | | * header to verify the JSONB. |
221 | | * |
222 | | * Note: creating an JsonbDocument (through createDocument) does not allocate |
223 | | * any memory. The document object is an efficient wrapper on the packed bytes |
224 | | * which is accessed directly. |
225 | | * |
226 | | * ** Query ** |
227 | | * Query is through dereferencing into ObjectVal. |
228 | | */ |
229 | | class JsonbDocument { |
230 | | public: |
231 | | // create an JsonbDocument object from JSONB packed bytes |
232 | | [[nodiscard]] static Status checkAndCreateDocument(const char* pb, size_t size, |
233 | | JsonbDocument** doc); |
234 | | |
235 | | // create an JsonbValue from JSONB packed bytes |
236 | | static JsonbValue* createValue(const char* pb, size_t size); |
237 | | |
238 | 0 | uint8_t version() const { return header_.ver_; } |
239 | | |
240 | 2.03k | JsonbValue* getValue() { return ((JsonbValue*)payload_); } |
241 | | |
242 | | void setValue(const JsonbValue* value); |
243 | | |
244 | | unsigned int numPackedBytes() const; |
245 | | |
246 | | // ObjectVal* operator->(); |
247 | | |
248 | | const ObjectVal* operator->() const; |
249 | | |
250 | 0 | bool operator==(const JsonbDocument& other) const { |
251 | 0 | assert(false); |
252 | 0 | return false; |
253 | 0 | } |
254 | | |
255 | 0 | bool operator!=(const JsonbDocument& other) const { |
256 | 0 | assert(false); |
257 | 0 | return false; |
258 | 0 | } |
259 | | |
260 | 0 | bool operator<=(const JsonbDocument& other) const { |
261 | 0 | assert(false); |
262 | 0 | return false; |
263 | 0 | } |
264 | | |
265 | 0 | bool operator>=(const JsonbDocument& other) const { |
266 | 0 | assert(false); |
267 | 0 | return false; |
268 | 0 | } |
269 | | |
270 | 0 | bool operator<(const JsonbDocument& other) const { |
271 | 0 | assert(false); |
272 | 0 | return false; |
273 | 0 | } |
274 | | |
275 | 0 | bool operator>(const JsonbDocument& other) const { |
276 | 0 | assert(false); |
277 | 0 | return false; |
278 | 0 | } |
279 | | |
280 | | private: |
281 | | /* |
282 | | * JsonbHeader class defines JSONB header (internal to JsonbDocument). |
283 | | * |
284 | | * Currently it only contains version information (1-byte). We may expand the |
285 | | * header to include checksum of the JSONB binary for more security. |
286 | | */ |
287 | | struct JsonbHeader { |
288 | | uint8_t ver_; |
289 | | } header_; |
290 | | |
291 | | char payload_[0]; |
292 | | }; |
293 | | |
294 | | /// A simple input stream class for the JSON path parser. |
295 | | class Stream { |
296 | | public: |
297 | | /// Creates an input stream reading from a character string. |
298 | | /// @param string the input string |
299 | | /// @param length the length of the input string |
300 | 846 | Stream(const char* string, size_t length) : m_position(string), m_end(string + length) {} |
301 | | |
302 | | /// Returns a pointer to the current position in the stream. |
303 | 850 | const char* position() const { return m_position; } |
304 | | |
305 | | /// Returns a pointer to the position just after the end of the stream. |
306 | 0 | const char* end() const { return m_end; } |
307 | | |
308 | | /// Returns the number of bytes remaining in the stream. |
309 | 16.5k | size_t remaining() const { |
310 | 16.5k | assert(m_position <= m_end); |
311 | 0 | return m_end - m_position; |
312 | 16.5k | } |
313 | | |
314 | | /// Tells if the stream has been exhausted. |
315 | 14.0k | bool exhausted() const { return remaining() == 0; } |
316 | | |
317 | | /// Reads the next byte from the stream and moves the position forward. |
318 | 846 | char read() { |
319 | 846 | assert(!exhausted()); |
320 | 0 | return *m_position++; |
321 | 846 | } |
322 | | |
323 | | /// Reads the next byte from the stream without moving the position forward. |
324 | 6.94k | char peek() const { |
325 | 6.94k | assert(!exhausted()); |
326 | 0 | return *m_position; |
327 | 6.94k | } |
328 | | |
329 | | /// Moves the position to the next non-whitespace character. |
330 | 4.24k | void skip_whitespace() { |
331 | 4.24k | m_position = std::find_if_not(m_position, m_end, [](char c) { return std::isspace(c); }); |
332 | 4.24k | } |
333 | | |
334 | | /// Moves the position n bytes forward. |
335 | 2.55k | void skip(size_t n) { |
336 | 2.55k | assert(remaining() >= n); |
337 | 0 | m_position += n; |
338 | 2.55k | skip_whitespace(); |
339 | 2.55k | } |
340 | | |
341 | 1.70k | void clear_leg_ptr() { leg_ptr = nullptr; } |
342 | | |
343 | 850 | void set_leg_ptr(char* ptr) { |
344 | 850 | clear_leg_ptr(); |
345 | 850 | leg_ptr = ptr; |
346 | 850 | } |
347 | | |
348 | 1.15k | char* get_leg_ptr() { return leg_ptr; } |
349 | | |
350 | 850 | void clear_leg_len() { leg_len = 0; } |
351 | | |
352 | 1.15k | void add_leg_len() { leg_len++; } |
353 | | |
354 | 1.70k | unsigned int get_leg_len() const { return leg_len; } |
355 | | |
356 | 0 | void remove_escapes() { |
357 | 0 | int new_len = 0; |
358 | 0 | for (int i = 0; i < leg_len; i++) { |
359 | 0 | if (leg_ptr[i] != '\\') { |
360 | 0 | leg_ptr[new_len++] = leg_ptr[i]; |
361 | 0 | } |
362 | 0 | } |
363 | 0 | leg_ptr[new_len] = '\0'; |
364 | 0 | leg_len = new_len; |
365 | 0 | } |
366 | | |
367 | 0 | void set_has_escapes(bool has) { has_escapes = has; } |
368 | | |
369 | 308 | bool get_has_escapes() const { return has_escapes; } |
370 | | |
371 | | private: |
372 | | /// The current position in the stream. |
373 | | const char* m_position = nullptr; |
374 | | |
375 | | /// The end of the stream. |
376 | | const char* const m_end; |
377 | | |
378 | | ///path leg ptr |
379 | | char* leg_ptr = nullptr; |
380 | | |
381 | | ///path leg len |
382 | | unsigned int leg_len; |
383 | | |
384 | | ///Whether to contain escape characters |
385 | | bool has_escapes = false; |
386 | | }; |
387 | | |
388 | | struct leg_info { |
389 | | ///path leg ptr |
390 | | char* leg_ptr = nullptr; |
391 | | |
392 | | ///path leg len |
393 | | unsigned int leg_len; |
394 | | |
395 | | ///array_index |
396 | | int array_index; |
397 | | |
398 | | ///type: 0 is member 1 is array |
399 | | unsigned int type; |
400 | | |
401 | 0 | bool to_string(std::string* str) const { |
402 | 0 | if (type == MEMBER_CODE) { |
403 | 0 | str->push_back(BEGIN_MEMBER); |
404 | 0 | str->append(leg_ptr, leg_len); |
405 | 0 | return true; |
406 | 0 | } else if (type == ARRAY_CODE) { |
407 | 0 | str->push_back(BEGIN_ARRAY); |
408 | 0 | std::string int_str = std::to_string(array_index); |
409 | 0 | str->append(int_str); |
410 | 0 | str->push_back(END_ARRAY); |
411 | 0 | return true; |
412 | 0 | } else { |
413 | 0 | return false; |
414 | 0 | } |
415 | 0 | } |
416 | | }; |
417 | | |
418 | | class JsonbPath { |
419 | | public: |
420 | | // parse json path |
421 | | static bool parsePath(Stream* stream, JsonbPath* path); |
422 | | |
423 | | static bool parse_array(Stream* stream, JsonbPath* path); |
424 | | static bool parse_member(Stream* stream, JsonbPath* path); |
425 | | |
426 | | //return true if json path valid else return false |
427 | | bool seek(const char* string, size_t length); |
428 | | |
429 | 850 | void add_leg_to_leg_vector(std::unique_ptr<leg_info> leg) { |
430 | 850 | leg_vector.emplace_back(leg.release()); |
431 | 850 | } |
432 | | |
433 | 0 | void pop_leg_from_leg_vector() { leg_vector.pop_back(); } |
434 | | |
435 | 0 | bool to_string(std::string* res) const { |
436 | 0 | res->push_back(SCOPE); |
437 | 0 | for (const auto& leg : leg_vector) { |
438 | 0 | auto valid = leg->to_string(res); |
439 | 0 | if (!valid) { |
440 | 0 | return false; |
441 | 0 | } |
442 | 0 | } |
443 | 0 | return true; |
444 | 0 | } |
445 | | |
446 | 1.05k | size_t get_leg_vector_size() { return leg_vector.size(); } |
447 | | |
448 | 2.39k | leg_info* get_leg_from_leg_vector(size_t i) { return leg_vector[i].get(); } |
449 | | |
450 | 0 | void clean() { leg_vector.clear(); } |
451 | | |
452 | | private: |
453 | | std::vector<std::unique_ptr<leg_info>> leg_vector; |
454 | | }; |
455 | | |
456 | | /* |
457 | | * JsonbFwdIteratorT implements JSONB's iterator template. |
458 | | * |
459 | | * Note: it is an FORWARD iterator only due to the design of JSONB format. |
460 | | */ |
461 | | template <class Iter_Type, class Cont_Type> |
462 | | class JsonbFwdIteratorT { |
463 | | public: |
464 | | using iterator = Iter_Type; |
465 | | using pointer = typename std::iterator_traits<Iter_Type>::pointer; |
466 | | using reference = typename std::iterator_traits<Iter_Type>::reference; |
467 | | |
468 | | explicit JsonbFwdIteratorT() : current_(nullptr) {} |
469 | 20.5k | explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {} _ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEC2ERKS3_ Line | Count | Source | 469 | 20.2k | explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {} |
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEC2ERKS3_ Line | Count | Source | 469 | 206 | explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {} |
_ZN5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEC2ERKS2_ Line | Count | Source | 469 | 102 | explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {} |
|
470 | | |
471 | | // allow non-const to const iterator conversion (same container type) |
472 | | template <class Iter_Ty> |
473 | | JsonbFwdIteratorT(const JsonbFwdIteratorT<Iter_Ty, Cont_Type>& rhs) : current_(rhs.base()) {} |
474 | | |
475 | 20.7k | bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); } _ZNK5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEeqERKS4_ Line | Count | Source | 475 | 51 | bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEeqERKS5_ Line | Count | Source | 475 | 19.1k | bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEeqERKS5_ Line | Count | Source | 475 | 1.52k | bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); } |
|
476 | | |
477 | 20.1k | bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); } _ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEneERKS5_ Line | Count | Source | 477 | 18.6k | bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEneERKS5_ Line | Count | Source | 477 | 1.45k | bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); } |
|
478 | | |
479 | 1.13k | bool operator<(const JsonbFwdIteratorT& rhs) const { return (current_ < rhs.current_); } |
480 | | |
481 | | bool operator>(const JsonbFwdIteratorT& rhs) const { return !operator<(rhs); } |
482 | | |
483 | 18.2k | JsonbFwdIteratorT& operator++() { |
484 | 18.2k | current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); |
485 | 18.2k | return *this; |
486 | 18.2k | } _ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEppEv Line | Count | Source | 483 | 17.4k | JsonbFwdIteratorT& operator++() { | 484 | 17.4k | current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); | 485 | 17.4k | return *this; | 486 | 17.4k | } |
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEppEv Line | Count | Source | 483 | 721 | JsonbFwdIteratorT& operator++() { | 484 | 721 | current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); | 485 | 721 | return *this; | 486 | 721 | } |
|
487 | | |
488 | | JsonbFwdIteratorT operator++(int) { |
489 | | auto tmp = *this; |
490 | | current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); |
491 | | return tmp; |
492 | | } |
493 | | |
494 | 721 | explicit operator pointer() { return current_; } |
495 | | |
496 | 0 | reference operator*() const { return *current_; } Unexecuted instantiation: _ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEdeEv Unexecuted instantiation: _ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEdeEv |
497 | | |
498 | 28.7k | pointer operator->() const { return current_; } _ZNK5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEptEv Line | Count | Source | 498 | 33 | pointer operator->() const { return current_; } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEptEv Line | Count | Source | 498 | 28.7k | pointer operator->() const { return current_; } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEptEv Line | Count | Source | 498 | 3 | pointer operator->() const { return current_; } |
|
499 | | |
500 | 0 | iterator base() const { return current_; } |
501 | | |
502 | | private: |
503 | | iterator current_; |
504 | | }; |
505 | | |
506 | | using hDictInsert = int (*)(const char*, unsigned int); |
507 | | using hDictFind = int (*)(const char*, unsigned int); |
508 | | |
509 | | using JsonbTypeUnder = std::underlying_type_t<JsonbType>; |
510 | | |
511 | | /* |
512 | | * JsonbKeyValue class defines JSONB key type, as described below. |
513 | | * |
514 | | * key ::= |
515 | | * 0x00 int8 //1-byte dictionary id |
516 | | * | int8 (byte*) //int8 (>0) is the size of the key string |
517 | | * |
518 | | * value ::= primitive_value | container |
519 | | * |
520 | | * JsonbKeyValue can be either an id mapping to the key string in an external |
521 | | * dictionary, or it is the original key string. Whether to read an id or a |
522 | | * string is decided by the first byte (size). |
523 | | * |
524 | | * Note: a key object must be followed by a value object. Therefore, a key |
525 | | * object implicitly refers to a key-value pair, and you can get the value |
526 | | * object right after the key object. The function numPackedBytes hence |
527 | | * indicates the total size of the key-value pair, so that we will be able go |
528 | | * to next pair from the key. |
529 | | * |
530 | | * ** Dictionary size ** |
531 | | * By default, the dictionary size is 255 (1-byte). Users can define |
532 | | * "USE_LARGE_DICT" to increase the dictionary size to 655535 (2-byte). |
533 | | */ |
534 | | class JsonbKeyValue { |
535 | | public: |
536 | | // now we use sMaxKeyId to represent an empty key |
537 | | static const int sMaxKeyId = 65535; |
538 | | using keyid_type = uint16_t; |
539 | | |
540 | | JsonbKeyValue() = delete; |
541 | | |
542 | | static const uint8_t sMaxKeyLen = 64; |
543 | | |
544 | | // size of the key. 0 indicates it is stored as id |
545 | 1.34k | uint8_t klen() const { return size; } |
546 | | |
547 | | // get the key string. Note the string may not be null terminated. |
548 | 723 | const char* getKeyStr() const { return key.str_; } |
549 | | |
550 | 9.22k | keyid_type getKeyId() const { return key.id_; } |
551 | | |
552 | 35.0k | unsigned int keyPackedBytes() const { |
553 | 35.0k | return size ? (sizeof(size) + size) : (sizeof(size) + sizeof(keyid_type)); |
554 | 35.0k | } |
555 | | |
556 | 17.5k | JsonbValue* value() const { return (JsonbValue*)(((char*)this) + keyPackedBytes()); } |
557 | | |
558 | | // size of the total packed bytes (key+value) |
559 | | unsigned int numPackedBytes() const; |
560 | | |
561 | | uint8_t size; |
562 | | |
563 | | union key_ { |
564 | | keyid_type id_; |
565 | | char str_[1]; |
566 | | } key; |
567 | | }; |
568 | | |
569 | | /* |
570 | | * JsonbValue is the base class of all JSONB types. It contains only one member |
571 | | * variable - type info, which can be retrieved by member functions is[Type]() |
572 | | * or type(). |
573 | | */ |
574 | | struct JsonbValue { |
575 | | static const uint32_t sMaxValueLen = 1 << 24; // 16M |
576 | | |
577 | 4.15k | bool isNull() const { return (type == JsonbType::T_Null); } |
578 | 126 | bool isTrue() const { return (type == JsonbType::T_True); } |
579 | 116 | bool isFalse() const { return (type == JsonbType::T_False); } |
580 | 94 | bool isInt() const { return isInt8() || isInt16() || isInt32() || isInt64() || isInt128(); } |
581 | 346 | bool isInt8() const { return (type == JsonbType::T_Int8); } |
582 | 307 | bool isInt16() const { return (type == JsonbType::T_Int16); } |
583 | 277 | bool isInt32() const { return (type == JsonbType::T_Int32); } |
584 | 207 | bool isInt64() const { return (type == JsonbType::T_Int64); } |
585 | 146 | bool isDouble() const { return (type == JsonbType::T_Double); } |
586 | 16 | bool isFloat() const { return (type == JsonbType::T_Float); } |
587 | 72 | bool isString() const { return (type == JsonbType::T_String); } |
588 | 1.07k | bool isBinary() const { return (type == JsonbType::T_Binary); } |
589 | 15 | bool isObject() const { return (type == JsonbType::T_Object); } |
590 | 1 | bool isArray() const { return (type == JsonbType::T_Array); } |
591 | 68 | bool isInt128() const { return (type == JsonbType::T_Int128); } |
592 | 4 | bool isDecimal() const { |
593 | 4 | return (type == JsonbType::T_Decimal32 || type == JsonbType::T_Decimal64 || |
594 | 4 | type == JsonbType::T_Decimal128 || type == JsonbType::T_Decimal256); |
595 | 4 | } |
596 | 1 | bool isDecimal32() const { return (type == JsonbType::T_Decimal32); } |
597 | 1 | bool isDecimal64() const { return (type == JsonbType::T_Decimal64); } |
598 | 1 | bool isDecimal128() const { return (type == JsonbType::T_Decimal128); } |
599 | 1 | bool isDecimal256() const { return (type == JsonbType::T_Decimal256); } |
600 | | |
601 | 0 | const char* typeName() const { |
602 | 0 | switch (type) { |
603 | 0 | case JsonbType::T_Null: |
604 | 0 | return "null"; |
605 | 0 | case JsonbType::T_True: |
606 | 0 | case JsonbType::T_False: |
607 | 0 | return "bool"; |
608 | 0 | case JsonbType::T_Int8: |
609 | 0 | case JsonbType::T_Int16: |
610 | 0 | case JsonbType::T_Int32: |
611 | 0 | return "int"; |
612 | 0 | case JsonbType::T_Int64: |
613 | 0 | return "bigint"; |
614 | 0 | case JsonbType::T_Int128: |
615 | 0 | return "largeint"; |
616 | 0 | case JsonbType::T_Double: |
617 | 0 | return "double"; |
618 | 0 | case JsonbType::T_Float: |
619 | 0 | return "float"; |
620 | 0 | case JsonbType::T_String: |
621 | 0 | return "string"; |
622 | 0 | case JsonbType::T_Binary: |
623 | 0 | return "binary"; |
624 | 0 | case JsonbType::T_Object: |
625 | 0 | return "object"; |
626 | 0 | case JsonbType::T_Array: |
627 | 0 | return "array"; |
628 | 0 | case JsonbType::T_Decimal32: |
629 | 0 | return "Decimal32"; |
630 | 0 | case JsonbType::T_Decimal64: |
631 | 0 | return "Decimal64"; |
632 | 0 | case JsonbType::T_Decimal128: |
633 | 0 | return "Decimal128"; |
634 | 0 | case JsonbType::T_Decimal256: |
635 | 0 | return "Decimal256"; |
636 | 0 | default: |
637 | 0 | return "unknown"; |
638 | 0 | } |
639 | 0 | } |
640 | | |
641 | | // size of the total packed bytes |
642 | | unsigned int numPackedBytes() const; |
643 | | |
644 | | // size of the value in bytes |
645 | | unsigned int size() const; |
646 | | |
647 | | //Get the number of jsonbvalue elements |
648 | | int numElements() const; |
649 | | |
650 | | //Whether to include the jsonbvalue rhs |
651 | | bool contains(JsonbValue* rhs) const; |
652 | | |
653 | | // find the JSONB value by JsonbPath |
654 | | JsonbValue* findValue(JsonbPath& path, hDictFind handler); |
655 | | friend class JsonbDocument; |
656 | | |
657 | | JsonbType type; // type info |
658 | | |
659 | | char payload[0]; // payload, which is the packed bytes of the value |
660 | | |
661 | | /** |
662 | | * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`. |
663 | | * |
664 | | * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept. |
665 | | * This ensures that `T` is trivially copyable, standard-layout, and safe to |
666 | | * reinterpret from raw bytes without invoking undefined behavior. |
667 | | * |
668 | | * @return A pointer to a `const T` object, interpreted from the internal buffer. |
669 | | * |
670 | | * @note The caller must ensure that the current JsonbValue actually contains data |
671 | | * compatible with type `T`, otherwise the result is undefined. |
672 | | */ |
673 | | template <JsonbPodType T> |
674 | 42.1k | const T* unpack() const { |
675 | 42.1k | static_assert(is_pod_v<T>, "T must be a POD type"); |
676 | 42.1k | return reinterpret_cast<const T*>(payload); |
677 | 42.1k | } _ZNK5doris10JsonbValue6unpackINS_9ObjectValEEEPKT_v Line | Count | Source | 674 | 19.6k | const T* unpack() const { | 675 | 19.6k | static_assert(is_pod_v<T>, "T must be a POD type"); | 676 | 19.6k | return reinterpret_cast<const T*>(payload); | 677 | 19.6k | } |
_ZNK5doris10JsonbValue6unpackINS_10NumberValTIaEEEEPKT_v Line | Count | Source | 674 | 793 | const T* unpack() const { | 675 | 793 | static_assert(is_pod_v<T>, "T must be a POD type"); | 676 | 793 | return reinterpret_cast<const T*>(payload); | 677 | 793 | } |
_ZNK5doris10JsonbValue6unpackINS_10NumberValTIsEEEEPKT_v Line | Count | Source | 674 | 131 | const T* unpack() const { | 675 | 131 | static_assert(is_pod_v<T>, "T must be a POD type"); | 676 | 131 | return reinterpret_cast<const T*>(payload); | 677 | 131 | } |
_ZNK5doris10JsonbValue6unpackINS_10NumberValTIiEEEEPKT_v Line | Count | Source | 674 | 3.46k | const T* unpack() const { | 675 | 3.46k | static_assert(is_pod_v<T>, "T must be a POD type"); | 676 | 3.46k | return reinterpret_cast<const T*>(payload); | 677 | 3.46k | } |
_ZNK5doris10JsonbValue6unpackINS_10NumberValTIlEEEEPKT_v Line | Count | Source | 674 | 1.87k | const T* unpack() const { | 675 | 1.87k | static_assert(is_pod_v<T>, "T must be a POD type"); | 676 | 1.87k | return reinterpret_cast<const T*>(payload); | 677 | 1.87k | } |
_ZNK5doris10JsonbValue6unpackINS_10NumberValTInEEEEPKT_v Line | Count | Source | 674 | 4.16k | const T* unpack() const { | 675 | 4.16k | static_assert(is_pod_v<T>, "T must be a POD type"); | 676 | 4.16k | return reinterpret_cast<const T*>(payload); | 677 | 4.16k | } |
_ZNK5doris10JsonbValue6unpackINS_14JsonbBinaryValEEEPKT_v Line | Count | Source | 674 | 9.00k | const T* unpack() const { | 675 | 9.00k | static_assert(is_pod_v<T>, "T must be a POD type"); | 676 | 9.00k | return reinterpret_cast<const T*>(payload); | 677 | 9.00k | } |
_ZNK5doris10JsonbValue6unpackINS_12ContainerValEEEPKT_v Line | Count | Source | 674 | 2.26k | const T* unpack() const { | 675 | 2.26k | static_assert(is_pod_v<T>, "T must be a POD type"); | 676 | 2.26k | return reinterpret_cast<const T*>(payload); | 677 | 2.26k | } |
_ZNK5doris10JsonbValue6unpackINS_8ArrayValEEEPKT_v Line | Count | Source | 674 | 307 | const T* unpack() const { | 675 | 307 | static_assert(is_pod_v<T>, "T must be a POD type"); | 676 | 307 | return reinterpret_cast<const T*>(payload); | 677 | 307 | } |
_ZNK5doris10JsonbValue6unpackINS_10NumberValTIdEEEEPKT_v Line | Count | Source | 674 | 177 | const T* unpack() const { | 675 | 177 | static_assert(is_pod_v<T>, "T must be a POD type"); | 676 | 177 | return reinterpret_cast<const T*>(payload); | 677 | 177 | } |
_ZNK5doris10JsonbValue6unpackINS_10NumberValTIfEEEEPKT_v Line | Count | Source | 674 | 16 | const T* unpack() const { | 675 | 16 | static_assert(is_pod_v<T>, "T must be a POD type"); | 676 | 16 | return reinterpret_cast<const T*>(payload); | 677 | 16 | } |
_ZNK5doris10JsonbValue6unpackINS_14JsonbStringValEEEPKT_v Line | Count | Source | 674 | 310 | const T* unpack() const { | 675 | 310 | static_assert(is_pod_v<T>, "T must be a POD type"); | 676 | 310 | return reinterpret_cast<const T*>(payload); | 677 | 310 | } |
_ZNK5doris10JsonbValue6unpackINS_15JsonbDecimalValINS_10vectorized7DecimalIiEEEEEEPKT_v Line | Count | Source | 674 | 2 | const T* unpack() const { | 675 | 2 | static_assert(is_pod_v<T>, "T must be a POD type"); | 676 | 2 | return reinterpret_cast<const T*>(payload); | 677 | 2 | } |
_ZNK5doris10JsonbValue6unpackINS_15JsonbDecimalValINS_10vectorized7DecimalIlEEEEEEPKT_v Line | Count | Source | 674 | 2 | const T* unpack() const { | 675 | 2 | static_assert(is_pod_v<T>, "T must be a POD type"); | 676 | 2 | return reinterpret_cast<const T*>(payload); | 677 | 2 | } |
_ZNK5doris10JsonbValue6unpackINS_15JsonbDecimalValINS_10vectorized12Decimal128V3EEEEEPKT_v Line | Count | Source | 674 | 2 | const T* unpack() const { | 675 | 2 | static_assert(is_pod_v<T>, "T must be a POD type"); | 676 | 2 | return reinterpret_cast<const T*>(payload); | 677 | 2 | } |
_ZNK5doris10JsonbValue6unpackINS_15JsonbDecimalValINS_10vectorized7DecimalIN4wide7integerILm256EiEEEEEEEEPKT_v Line | Count | Source | 674 | 2 | const T* unpack() const { | 675 | 2 | static_assert(is_pod_v<T>, "T must be a POD type"); | 676 | 2 | return reinterpret_cast<const T*>(payload); | 677 | 2 | } |
|
678 | | |
679 | | // /** |
680 | | // * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`. |
681 | | // * |
682 | | // * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept. |
683 | | // * This ensures that `T` is trivially copyable, standard-layout, and safe to |
684 | | // * reinterpret from raw bytes without invoking undefined behavior. |
685 | | // * |
686 | | // * @return A pointer to a `T` object, interpreted from the internal buffer. |
687 | | // * |
688 | | // * @note The caller must ensure that the current JsonbValue actually contains data |
689 | | // * compatible with type `T`, otherwise the result is undefined. |
690 | | // */ |
691 | | // template <JsonbPodType T> |
692 | | // T* unpack() { |
693 | | // static_assert(is_pod_v<T>, "T must be a POD type"); |
694 | | // return reinterpret_cast<T*>(payload); |
695 | | // } |
696 | | |
697 | | int128_t int_val() const; |
698 | | |
699 | | JsonbValue() = delete; |
700 | | }; |
701 | | |
702 | | // inline ObjectVal* JsonbDocument::operator->() { |
703 | | // return (((JsonbValue*)payload_)->unpack<ObjectVal>()); |
704 | | // } |
705 | | |
706 | 19.0k | inline const ObjectVal* JsonbDocument::operator->() const { |
707 | 19.0k | return (((JsonbValue*)payload_)->unpack<ObjectVal>()); |
708 | 19.0k | } |
709 | | |
710 | | /* |
711 | | * NumerValT is the template class (derived from JsonbValue) of all number |
712 | | * types (integers and double). |
713 | | */ |
714 | | template <typename T> |
715 | | requires std::is_integral_v<T> || std::is_floating_point_v<T> |
716 | | struct NumberValT { |
717 | | public: |
718 | | NumberValT() = delete; |
719 | 10.6k | T val() const { return num; } _ZNK5doris10NumberValTIaE3valEv Line | Count | Source | 719 | 793 | T val() const { return num; } |
_ZNK5doris10NumberValTIsE3valEv Line | Count | Source | 719 | 131 | T val() const { return num; } |
_ZNK5doris10NumberValTIiE3valEv Line | Count | Source | 719 | 3.46k | T val() const { return num; } |
_ZNK5doris10NumberValTIlE3valEv Line | Count | Source | 719 | 1.87k | T val() const { return num; } |
_ZNK5doris10NumberValTInE3valEv Line | Count | Source | 719 | 4.16k | T val() const { return num; } |
_ZNK5doris10NumberValTIdE3valEv Line | Count | Source | 719 | 177 | T val() const { return num; } |
_ZNK5doris10NumberValTIfE3valEv Line | Count | Source | 719 | 16 | T val() const { return num; } |
|
720 | | |
721 | | static unsigned int numPackedBytes() { return sizeof(JsonbValue) + sizeof(T); } |
722 | | |
723 | | T num; |
724 | | }; |
725 | | |
726 | 78 | inline int128_t JsonbValue::int_val() const { |
727 | 78 | switch (type) { |
728 | 31 | case JsonbType::T_Int8: |
729 | 31 | return unpack<JsonbInt8Val>()->val(); |
730 | 25 | case JsonbType::T_Int16: |
731 | 25 | return unpack<JsonbInt16Val>()->val(); |
732 | 12 | case JsonbType::T_Int32: |
733 | 12 | return unpack<JsonbInt32Val>()->val(); |
734 | 10 | case JsonbType::T_Int64: |
735 | 10 | return unpack<JsonbInt64Val>()->val(); |
736 | 0 | case JsonbType::T_Int128: |
737 | 0 | return unpack<JsonbInt128Val>()->val(); |
738 | 0 | default: |
739 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
740 | 0 | static_cast<int32_t>(type)); |
741 | 78 | } |
742 | 78 | } |
743 | | |
744 | | template <JsonbDecimalType T> |
745 | | struct JsonbDecimalVal { |
746 | | public: |
747 | | using NativeType = typename T::NativeType; |
748 | | JsonbDecimalVal() = delete; |
749 | | |
750 | | // get the decimal value |
751 | 8 | T val() const { return T(value); } _ZNK5doris15JsonbDecimalValINS_10vectorized7DecimalIiEEE3valEv Line | Count | Source | 751 | 2 | T val() const { return T(value); } |
_ZNK5doris15JsonbDecimalValINS_10vectorized7DecimalIlEEE3valEv Line | Count | Source | 751 | 2 | T val() const { return T(value); } |
_ZNK5doris15JsonbDecimalValINS_10vectorized12Decimal128V3EE3valEv Line | Count | Source | 751 | 2 | T val() const { return T(value); } |
_ZNK5doris15JsonbDecimalValINS_10vectorized7DecimalIN4wide7integerILm256EiEEEEE3valEv Line | Count | Source | 751 | 2 | T val() const { return T(value); } |
|
752 | | |
753 | 11 | static constexpr int numPackedBytes() { |
754 | 11 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); |
755 | 11 | } _ZN5doris15JsonbDecimalValINS_10vectorized7DecimalIiEEE14numPackedBytesEv Line | Count | Source | 753 | 3 | static constexpr int numPackedBytes() { | 754 | 3 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); | 755 | 3 | } |
_ZN5doris15JsonbDecimalValINS_10vectorized7DecimalIlEEE14numPackedBytesEv Line | Count | Source | 753 | 3 | static constexpr int numPackedBytes() { | 754 | 3 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); | 755 | 3 | } |
_ZN5doris15JsonbDecimalValINS_10vectorized12Decimal128V3EE14numPackedBytesEv Line | Count | Source | 753 | 3 | static constexpr int numPackedBytes() { | 754 | 3 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); | 755 | 3 | } |
_ZN5doris15JsonbDecimalValINS_10vectorized7DecimalIN4wide7integerILm256EiEEEEE14numPackedBytesEv Line | Count | Source | 753 | 2 | static constexpr int numPackedBytes() { | 754 | 2 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); | 755 | 2 | } |
|
756 | | |
757 | | uint32_t precision; |
758 | | uint32_t scale; |
759 | | NativeType value; |
760 | | }; |
761 | | |
762 | | /* |
763 | | * BlobVal is the base class (derived from JsonbValue) for string and binary |
764 | | * types. The size indicates the total bytes of the payload. |
765 | | */ |
766 | | struct JsonbBinaryVal { |
767 | | public: |
768 | | // size of the blob payload only |
769 | 2.11k | unsigned int getBlobLen() const { return size; } |
770 | | |
771 | | // return the blob as byte array |
772 | 4.53k | const char* getBlob() const { return payload; } |
773 | | |
774 | | // size of the total packed bytes |
775 | 4.63k | unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; } |
776 | | friend class JsonbDocument; |
777 | | |
778 | | uint32_t size; |
779 | | char payload[0]; |
780 | | |
781 | | JsonbBinaryVal() = delete; |
782 | | }; |
783 | | |
784 | | /* |
785 | | * String type |
786 | | * Note: JSONB string may not be a c-string (NULL-terminated) |
787 | | */ |
788 | | struct JsonbStringVal : public JsonbBinaryVal { |
789 | | public: |
790 | | JsonbStringVal() = delete; |
791 | | /* |
792 | | This function return the actual size of a string. Since for |
793 | | a string, it can be null-terminated with null paddings or it |
794 | | can take all the space in the payload without null in the end. |
795 | | So we need to check it to get the true actual length of a string. |
796 | | */ |
797 | 157 | size_t length() const { |
798 | | // It's an empty string |
799 | 157 | if (0 == size) { |
800 | 0 | return size; |
801 | 0 | } |
802 | | // The string stored takes all the spaces in payload |
803 | 157 | if (payload[size - 1] != 0) { |
804 | 157 | return size; |
805 | 157 | } |
806 | | // It's shorter than the size of payload |
807 | 0 | return strnlen(payload, size); |
808 | 157 | } |
809 | | // convert the string (case insensitive) to a boolean value |
810 | | // "false": 0 |
811 | | // "true": 1 |
812 | | // all other strings: -1 |
813 | 0 | int getBoolVal() { |
814 | 0 | if (size == 4 && tolower(payload[0]) == 't' && tolower(payload[1]) == 'r' && |
815 | 0 | tolower(payload[2]) == 'u' && tolower(payload[3]) == 'e') { |
816 | 0 | return 1; |
817 | 0 | } else if (size == 5 && tolower(payload[0]) == 'f' && tolower(payload[1]) == 'a' && |
818 | 0 | tolower(payload[2]) == 'l' && tolower(payload[3]) == 's' && |
819 | 0 | tolower(payload[4]) == 'e') { |
820 | 0 | return 0; |
821 | 0 | } else { |
822 | 0 | return -1; |
823 | 0 | } |
824 | 0 | } |
825 | | }; |
826 | | |
827 | | /* |
828 | | * ContainerVal is the base class (derived from JsonbValue) for object and |
829 | | * array types. The size indicates the total bytes of the payload. |
830 | | */ |
831 | | struct ContainerVal { |
832 | | // size of the container payload only |
833 | 0 | unsigned int getContainerSize() const { return size; } |
834 | | |
835 | | // return the container payload as byte array |
836 | 0 | const char* getPayload() const { return payload; } |
837 | | |
838 | | // size of the total packed bytes |
839 | 2.26k | unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; } |
840 | | friend class JsonbDocument; |
841 | | |
842 | | uint32_t size; |
843 | | char payload[0]; |
844 | | |
845 | | ContainerVal() = delete; |
846 | | }; |
847 | | |
848 | | /* |
849 | | * Object type |
850 | | */ |
851 | | struct ObjectVal : public ContainerVal { |
852 | | using value_type = JsonbKeyValue; |
853 | | using pointer = value_type*; |
854 | | using const_pointer = const value_type*; |
855 | | using iterator = JsonbFwdIteratorT<pointer, ObjectVal>; |
856 | | using const_iterator = JsonbFwdIteratorT<const_pointer, ObjectVal>; |
857 | | |
858 | | ObjectVal() = delete; |
859 | | |
860 | 0 | const_iterator search(const char* key, hDictFind handler = nullptr) const { |
861 | 0 | return const_cast<ObjectVal*>(this)->search(key, handler); |
862 | 0 | } |
863 | | |
864 | 0 | const_iterator search(const char* key, unsigned int klen, hDictFind handler = nullptr) const { |
865 | 0 | return const_cast<ObjectVal*>(this)->search(key, klen, handler); |
866 | 0 | } |
867 | | |
868 | 0 | const_iterator search(int key_id) const { return const_cast<ObjectVal*>(this)->search(key_id); } |
869 | 0 | iterator search(const char* key, hDictFind handler = nullptr) { |
870 | 0 | if (!key) { |
871 | 0 | return end(); |
872 | 0 | } |
873 | 0 | return search(key, (unsigned int)strlen(key), handler); |
874 | 0 | } |
875 | | |
876 | 51 | iterator search(const char* key, unsigned int klen, hDictFind handler = nullptr) { |
877 | 51 | if (!key || !klen) { |
878 | 0 | return end(); |
879 | 0 | } |
880 | | |
881 | 51 | int key_id = -1; |
882 | 51 | if (handler && (key_id = handler(key, klen)) >= 0) { |
883 | 0 | return search(key_id); |
884 | 0 | } |
885 | 51 | return internalSearch(key, klen); |
886 | 51 | } |
887 | | |
888 | 0 | iterator search(int key_id) { |
889 | 0 | if (key_id < 0 || key_id > JsonbKeyValue::sMaxKeyId) { |
890 | 0 | return end(); |
891 | 0 | } |
892 | | |
893 | 0 | const char* pch = payload; |
894 | 0 | const char* fence = payload + size; |
895 | |
|
896 | 0 | while (pch < fence) { |
897 | 0 | auto* pkey = (JsonbKeyValue*)(pch); |
898 | 0 | if (!pkey->klen() && key_id == pkey->getKeyId()) { |
899 | 0 | return iterator(pkey); |
900 | 0 | } |
901 | 0 | pch += pkey->numPackedBytes(); |
902 | 0 | } |
903 | | |
904 | 0 | assert(pch == fence); |
905 | 0 | return end(); |
906 | 0 | } |
907 | | |
908 | | // Get number of elements in object |
909 | 1 | int numElem() const { |
910 | 1 | const char* pch = payload; |
911 | 1 | const char* fence = payload + size; |
912 | | |
913 | 1 | unsigned int num = 0; |
914 | 13 | while (pch < fence) { |
915 | 12 | auto* pkey = (JsonbKeyValue*)(pch); |
916 | 12 | ++num; |
917 | 12 | pch += pkey->numPackedBytes(); |
918 | 12 | } |
919 | | |
920 | 1 | assert(pch == fence); |
921 | | |
922 | 0 | return num; |
923 | 1 | } |
924 | | |
925 | 0 | JsonbKeyValue* getJsonbKeyValue(unsigned int i) const { |
926 | 0 | const char* pch = payload; |
927 | 0 | const char* fence = payload + size; |
928 | |
|
929 | 0 | unsigned int num = 0; |
930 | 0 | while (pch < fence) { |
931 | 0 | auto* pkey = (JsonbKeyValue*)(pch); |
932 | 0 | if (num == i) { |
933 | 0 | return pkey; |
934 | 0 | } |
935 | 0 | ++num; |
936 | 0 | pch += pkey->numPackedBytes(); |
937 | 0 | } |
938 | | |
939 | 0 | assert(pch == fence); |
940 | | |
941 | 0 | return nullptr; |
942 | 0 | } |
943 | | |
944 | 0 | JsonbValue* find(const char* key, hDictFind handler = nullptr) const { |
945 | 0 | return const_cast<ObjectVal*>(this)->find(key, handler); |
946 | 0 | } |
947 | | |
948 | 51 | JsonbValue* find(const char* key, unsigned int klen, hDictFind handler = nullptr) const { |
949 | 51 | return const_cast<ObjectVal*>(this)->find(key, klen, handler); |
950 | 51 | } |
951 | 0 | JsonbValue* find(int key_id) const { return const_cast<ObjectVal*>(this)->find(key_id); } |
952 | | |
953 | | // find the JSONB value by a key string (null terminated) |
954 | 0 | JsonbValue* find(const char* key, hDictFind handler = nullptr) { |
955 | 0 | if (!key) { |
956 | 0 | return nullptr; |
957 | 0 | } |
958 | 0 | return find(key, (unsigned int)strlen(key), handler); |
959 | 0 | } |
960 | | |
961 | | // find the JSONB value by a key string (with length) |
962 | 51 | JsonbValue* find(const char* key, unsigned int klen, hDictFind handler = nullptr) { |
963 | 51 | iterator kv = search(key, klen, handler); |
964 | 51 | if (end() == kv) { |
965 | 18 | return nullptr; |
966 | 18 | } |
967 | 33 | return kv->value(); |
968 | 51 | } |
969 | | |
970 | | // find the JSONB value by a key dictionary ID |
971 | 0 | JsonbValue* find(int key_id) { |
972 | 0 | iterator kv = search(key_id); |
973 | 0 | if (end() == kv) { |
974 | 0 | return nullptr; |
975 | 0 | } |
976 | 0 | return kv->value(); |
977 | 0 | } |
978 | | |
979 | 0 | iterator begin() { return iterator((pointer)payload); } |
980 | | |
981 | 1.68k | const_iterator begin() const { return const_iterator((pointer)payload); } |
982 | | |
983 | 69 | iterator end() { return iterator((pointer)(payload + size)); } |
984 | | |
985 | 18.5k | const_iterator end() const { return const_iterator((pointer)(payload + size)); } |
986 | | |
987 | | private: |
988 | 51 | iterator internalSearch(const char* key, unsigned int klen) { |
989 | 51 | const char* pch = payload; |
990 | 51 | const char* fence = payload + size; |
991 | | |
992 | 57 | while (pch < fence) { |
993 | 39 | auto* pkey = (JsonbKeyValue*)(pch); |
994 | 39 | if (klen == pkey->klen() && strncmp(key, pkey->getKeyStr(), klen) == 0) { |
995 | 33 | return iterator(pkey); |
996 | 33 | } |
997 | 6 | pch += pkey->numPackedBytes(); |
998 | 6 | } |
999 | | |
1000 | 18 | assert(pch == fence); |
1001 | | |
1002 | 0 | return end(); |
1003 | 51 | } |
1004 | | }; |
1005 | | |
1006 | | /* |
1007 | | * Array type |
1008 | | */ |
1009 | | struct ArrayVal : public ContainerVal { |
1010 | | using value_type = JsonbValue; |
1011 | | using pointer = value_type*; |
1012 | | using const_pointer = const value_type*; |
1013 | | using iterator = JsonbFwdIteratorT<pointer, ArrayVal>; |
1014 | | using const_iterator = JsonbFwdIteratorT<const_pointer, ArrayVal>; |
1015 | | |
1016 | | ArrayVal() = delete; |
1017 | | // get the JSONB value at index |
1018 | 231 | JsonbValue* get(int idx) const { |
1019 | 231 | if (idx < 0) { |
1020 | 0 | return nullptr; |
1021 | 0 | } |
1022 | | |
1023 | 231 | const char* pch = payload; |
1024 | 231 | const char* fence = payload + size; |
1025 | | |
1026 | 564 | while (pch < fence && idx-- > 0) { |
1027 | 333 | pch += ((JsonbValue*)pch)->numPackedBytes(); |
1028 | 333 | } |
1029 | 231 | if (idx > 0 || pch == fence) { |
1030 | 63 | return nullptr; |
1031 | 63 | } |
1032 | | |
1033 | 168 | return (JsonbValue*)pch; |
1034 | 231 | } |
1035 | | |
1036 | | // Get number of elements in array |
1037 | 8 | int numElem() const { |
1038 | 8 | const char* pch = payload; |
1039 | 8 | const char* fence = payload + size; |
1040 | | |
1041 | 8 | unsigned int num = 0; |
1042 | 124 | while (pch < fence) { |
1043 | 116 | ++num; |
1044 | 116 | pch += ((JsonbValue*)pch)->numPackedBytes(); |
1045 | 116 | } |
1046 | | |
1047 | 8 | assert(pch == fence); |
1048 | | |
1049 | 0 | return num; |
1050 | 8 | } |
1051 | | |
1052 | 0 | iterator begin() { return iterator((pointer)payload); } |
1053 | | |
1054 | 75 | const_iterator begin() const { return const_iterator((pointer)payload); } |
1055 | | |
1056 | 0 | iterator end() { return iterator((pointer)(payload + size)); } |
1057 | | |
1058 | 131 | const_iterator end() const { return const_iterator((pointer)(payload + size)); } |
1059 | | }; |
1060 | | |
1061 | | inline Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t size, |
1062 | 2.29k | JsonbDocument** doc) { |
1063 | 2.29k | *doc = nullptr; |
1064 | 2.29k | if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) { |
1065 | 30 | return Status::InvalidArgument("Invalid JSONB document: too small size({}) or null pointer", |
1066 | 30 | size); |
1067 | 30 | } |
1068 | | |
1069 | 2.26k | auto* doc_ptr = (JsonbDocument*)pb; |
1070 | 2.26k | if (doc_ptr->header_.ver_ != JSONB_VER) { |
1071 | 1 | return Status::InvalidArgument("Invalid JSONB document: invalid version({})", |
1072 | 1 | doc_ptr->header_.ver_); |
1073 | 1 | } |
1074 | | |
1075 | 2.26k | auto* val = (JsonbValue*)doc_ptr->payload_; |
1076 | 2.26k | if (val->type < JsonbType::T_Null || val->type >= JsonbType::NUM_TYPES || |
1077 | 2.26k | size != sizeof(JsonbHeader) + val->numPackedBytes()) { |
1078 | 0 | return Status::InvalidArgument("Invalid JSONB document: invalid type({}) or size({})", |
1079 | 0 | static_cast<JsonbTypeUnder>(val->type), size); |
1080 | 0 | } |
1081 | | |
1082 | 2.26k | *doc = doc_ptr; |
1083 | 2.26k | return Status::OK(); |
1084 | 2.26k | } |
1085 | 0 | inline void JsonbDocument::setValue(const JsonbValue* value) { |
1086 | 0 | memcpy(payload_, value, value->numPackedBytes()); |
1087 | 0 | } |
1088 | | |
1089 | 15 | inline JsonbValue* JsonbDocument::createValue(const char* pb, size_t size) { |
1090 | 15 | if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) { |
1091 | 0 | return nullptr; |
1092 | 0 | } |
1093 | | |
1094 | 15 | auto* doc = (JsonbDocument*)pb; |
1095 | 15 | if (doc->header_.ver_ != JSONB_VER) { |
1096 | 0 | return nullptr; |
1097 | 0 | } |
1098 | | |
1099 | 15 | auto* val = (JsonbValue*)doc->payload_; |
1100 | 15 | if (size != sizeof(JsonbHeader) + val->numPackedBytes()) { |
1101 | 0 | return nullptr; |
1102 | 0 | } |
1103 | | |
1104 | 15 | return val; |
1105 | 15 | } |
1106 | | |
1107 | 0 | inline unsigned int JsonbDocument::numPackedBytes() const { |
1108 | 0 | return ((const JsonbValue*)payload_)->numPackedBytes() + sizeof(header_); |
1109 | 0 | } |
1110 | | |
1111 | 17.5k | inline unsigned int JsonbKeyValue::numPackedBytes() const { |
1112 | 17.5k | unsigned int ks = keyPackedBytes(); |
1113 | 17.5k | auto* val = (JsonbValue*)(((char*)this) + ks); |
1114 | 17.5k | return ks + val->numPackedBytes(); |
1115 | 17.5k | } |
1116 | | |
1117 | | // Poor man's "virtual" function JsonbValue::numPackedBytes |
1118 | 20.9k | inline unsigned int JsonbValue::numPackedBytes() const { |
1119 | 20.9k | switch (type) { |
1120 | 2.82k | case JsonbType::T_Null: |
1121 | 2.98k | case JsonbType::T_True: |
1122 | 3.09k | case JsonbType::T_False: { |
1123 | 3.09k | return sizeof(type); |
1124 | 2.98k | } |
1125 | | |
1126 | 991 | case JsonbType::T_Int8: { |
1127 | 991 | return sizeof(type) + sizeof(int8_t); |
1128 | 2.98k | } |
1129 | 166 | case JsonbType::T_Int16: { |
1130 | 166 | return sizeof(type) + sizeof(int16_t); |
1131 | 2.98k | } |
1132 | 3.49k | case JsonbType::T_Int32: { |
1133 | 3.49k | return sizeof(type) + sizeof(int32_t); |
1134 | 2.98k | } |
1135 | 1.91k | case JsonbType::T_Int64: { |
1136 | 1.91k | return sizeof(type) + sizeof(int64_t); |
1137 | 2.98k | } |
1138 | 240 | case JsonbType::T_Double: { |
1139 | 240 | return sizeof(type) + sizeof(double); |
1140 | 2.98k | } |
1141 | 17 | case JsonbType::T_Float: { |
1142 | 17 | return sizeof(type) + sizeof(float); |
1143 | 2.98k | } |
1144 | 4.16k | case JsonbType::T_Int128: { |
1145 | 4.16k | return sizeof(type) + sizeof(int128_t); |
1146 | 2.98k | } |
1147 | 270 | case JsonbType::T_String: |
1148 | 4.63k | case JsonbType::T_Binary: { |
1149 | 4.63k | return unpack<JsonbBinaryVal>()->numPackedBytes(); |
1150 | 270 | } |
1151 | | |
1152 | 1.83k | case JsonbType::T_Object: |
1153 | 2.26k | case JsonbType::T_Array: { |
1154 | 2.26k | return unpack<ContainerVal>()->numPackedBytes(); |
1155 | 1.83k | } |
1156 | 3 | case JsonbType::T_Decimal32: { |
1157 | 3 | return JsonbDecimal32::numPackedBytes(); |
1158 | 1.83k | } |
1159 | 3 | case JsonbType::T_Decimal64: { |
1160 | 3 | return JsonbDecimal64::numPackedBytes(); |
1161 | 1.83k | } |
1162 | 3 | case JsonbType::T_Decimal128: { |
1163 | 3 | return JsonbDecimal128::numPackedBytes(); |
1164 | 1.83k | } |
1165 | 2 | case JsonbType::T_Decimal256: { |
1166 | 2 | return JsonbDecimal256::numPackedBytes(); |
1167 | 1.83k | } |
1168 | 0 | case JsonbType::NUM_TYPES: |
1169 | 0 | break; |
1170 | 20.9k | } |
1171 | | |
1172 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
1173 | 0 | static_cast<int32_t>(type)); |
1174 | 20.9k | } |
1175 | | |
1176 | 0 | inline int JsonbValue::numElements() const { |
1177 | 0 | switch (type) { |
1178 | 0 | case JsonbType::T_Int8: |
1179 | 0 | case JsonbType::T_Int16: |
1180 | 0 | case JsonbType::T_Int32: |
1181 | 0 | case JsonbType::T_Int64: |
1182 | 0 | case JsonbType::T_Double: |
1183 | 0 | case JsonbType::T_Float: |
1184 | 0 | case JsonbType::T_Int128: |
1185 | 0 | case JsonbType::T_String: |
1186 | 0 | case JsonbType::T_Binary: |
1187 | 0 | case JsonbType::T_Null: |
1188 | 0 | case JsonbType::T_True: |
1189 | 0 | case JsonbType::T_False: |
1190 | 0 | case JsonbType::T_Decimal32: |
1191 | 0 | case JsonbType::T_Decimal64: |
1192 | 0 | case JsonbType::T_Decimal128: |
1193 | 0 | case JsonbType::T_Decimal256: { |
1194 | 0 | return 1; |
1195 | 0 | } |
1196 | 0 | case JsonbType::T_Object: { |
1197 | 0 | return unpack<ObjectVal>()->numElem(); |
1198 | 0 | } |
1199 | 0 | case JsonbType::T_Array: { |
1200 | 0 | return unpack<ArrayVal>()->numElem(); |
1201 | 0 | } |
1202 | 0 | case JsonbType::NUM_TYPES: |
1203 | 0 | break; |
1204 | 0 | } |
1205 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
1206 | 0 | static_cast<int32_t>(type)); |
1207 | 0 | } |
1208 | | |
1209 | 0 | inline bool JsonbValue::contains(JsonbValue* rhs) const { |
1210 | 0 | switch (type) { |
1211 | 0 | case JsonbType::T_Int8: |
1212 | 0 | case JsonbType::T_Int16: |
1213 | 0 | case JsonbType::T_Int32: |
1214 | 0 | case JsonbType::T_Int64: |
1215 | 0 | case JsonbType::T_Int128: { |
1216 | 0 | return rhs->isInt() && this->int_val() == rhs->int_val(); |
1217 | 0 | } |
1218 | 0 | case JsonbType::T_Double: |
1219 | 0 | case JsonbType::T_Float: { |
1220 | 0 | if (!rhs->isDouble() && !rhs->isFloat()) { |
1221 | 0 | return false; |
1222 | 0 | } |
1223 | 0 | double left = isDouble() ? unpack<JsonbDoubleVal>()->val() : unpack<JsonbFloatVal>()->val(); |
1224 | 0 | double right = rhs->isDouble() ? rhs->unpack<JsonbDoubleVal>()->val() |
1225 | 0 | : rhs->unpack<JsonbFloatVal>()->val(); |
1226 | 0 | return left == right; |
1227 | 0 | } |
1228 | 0 | case JsonbType::T_String: |
1229 | 0 | case JsonbType::T_Binary: { |
1230 | 0 | if (rhs->isString() || rhs->isBinary()) { |
1231 | 0 | const auto* str_value1 = unpack<JsonbStringVal>(); |
1232 | 0 | const auto* str_value2 = rhs->unpack<JsonbStringVal>(); |
1233 | 0 | return str_value1->length() == str_value2->length() && |
1234 | 0 | std::memcmp(str_value1->getBlob(), str_value2->getBlob(), |
1235 | 0 | str_value1->length()) == 0; |
1236 | 0 | } |
1237 | 0 | return false; |
1238 | 0 | } |
1239 | 0 | case JsonbType::T_Array: { |
1240 | 0 | int lhs_num = unpack<ArrayVal>()->numElem(); |
1241 | 0 | if (rhs->isArray()) { |
1242 | 0 | int rhs_num = rhs->unpack<ArrayVal>()->numElem(); |
1243 | 0 | if (rhs_num > lhs_num) { |
1244 | 0 | return false; |
1245 | 0 | } |
1246 | 0 | int contains_num = 0; |
1247 | 0 | for (int i = 0; i < lhs_num; ++i) { |
1248 | 0 | for (int j = 0; j < rhs_num; ++j) { |
1249 | 0 | if (unpack<ArrayVal>()->get(i)->contains(rhs->unpack<ArrayVal>()->get(j))) { |
1250 | 0 | contains_num++; |
1251 | 0 | break; |
1252 | 0 | } |
1253 | 0 | } |
1254 | 0 | } |
1255 | 0 | return contains_num == rhs_num; |
1256 | 0 | } |
1257 | 0 | for (int i = 0; i < lhs_num; ++i) { |
1258 | 0 | if (unpack<ArrayVal>()->get(i)->contains(rhs)) { |
1259 | 0 | return true; |
1260 | 0 | } |
1261 | 0 | } |
1262 | 0 | return false; |
1263 | 0 | } |
1264 | 0 | case JsonbType::T_Object: { |
1265 | 0 | if (rhs->isObject()) { |
1266 | 0 | const auto* str_value1 = unpack<ObjectVal>(); |
1267 | 0 | auto* str_value2 = rhs->unpack<ObjectVal>(); |
1268 | 0 | for (int i = 0; i < str_value2->numElem(); ++i) { |
1269 | 0 | JsonbKeyValue* key = str_value2->getJsonbKeyValue(i); |
1270 | 0 | JsonbValue* value = str_value1->find(key->getKeyStr(), key->klen()); |
1271 | 0 | if (key != nullptr && value != nullptr && !value->contains(key->value())) { |
1272 | 0 | return false; |
1273 | 0 | } |
1274 | 0 | } |
1275 | 0 | return true; |
1276 | 0 | } |
1277 | 0 | return false; |
1278 | 0 | } |
1279 | 0 | case JsonbType::T_Null: { |
1280 | 0 | return rhs->isNull(); |
1281 | 0 | } |
1282 | 0 | case JsonbType::T_True: { |
1283 | 0 | return rhs->isTrue(); |
1284 | 0 | } |
1285 | 0 | case JsonbType::T_False: { |
1286 | 0 | return rhs->isFalse(); |
1287 | 0 | } |
1288 | 0 | case JsonbType::T_Decimal32: { |
1289 | 0 | if (rhs->isDecimal32()) { |
1290 | 0 | return unpack<JsonbDecimal32>()->val() == rhs->unpack<JsonbDecimal32>()->val() && |
1291 | 0 | unpack<JsonbDecimal32>()->precision == |
1292 | 0 | rhs->unpack<JsonbDecimal32>()->precision && |
1293 | 0 | unpack<JsonbDecimal32>()->scale == rhs->unpack<JsonbDecimal32>()->scale; |
1294 | 0 | } |
1295 | 0 | return false; |
1296 | 0 | } |
1297 | 0 | case JsonbType::T_Decimal64: { |
1298 | 0 | if (rhs->isDecimal64()) { |
1299 | 0 | return unpack<JsonbDecimal64>()->val() == rhs->unpack<JsonbDecimal64>()->val() && |
1300 | 0 | unpack<JsonbDecimal64>()->precision == |
1301 | 0 | rhs->unpack<JsonbDecimal64>()->precision && |
1302 | 0 | unpack<JsonbDecimal64>()->scale == rhs->unpack<JsonbDecimal64>()->scale; |
1303 | 0 | } |
1304 | 0 | return false; |
1305 | 0 | } |
1306 | 0 | case JsonbType::T_Decimal128: { |
1307 | 0 | if (rhs->isDecimal128()) { |
1308 | 0 | return unpack<JsonbDecimal128>()->val() == rhs->unpack<JsonbDecimal128>()->val() && |
1309 | 0 | unpack<JsonbDecimal128>()->precision == |
1310 | 0 | rhs->unpack<JsonbDecimal128>()->precision && |
1311 | 0 | unpack<JsonbDecimal128>()->scale == rhs->unpack<JsonbDecimal128>()->scale; |
1312 | 0 | } |
1313 | 0 | return false; |
1314 | 0 | } |
1315 | 0 | case JsonbType::T_Decimal256: { |
1316 | 0 | if (rhs->isDecimal256()) { |
1317 | 0 | return unpack<JsonbDecimal256>()->val() == rhs->unpack<JsonbDecimal256>()->val() && |
1318 | 0 | unpack<JsonbDecimal256>()->precision == |
1319 | 0 | rhs->unpack<JsonbDecimal256>()->precision && |
1320 | 0 | unpack<JsonbDecimal256>()->scale == rhs->unpack<JsonbDecimal256>()->scale; |
1321 | 0 | } |
1322 | 0 | return false; |
1323 | 0 | } |
1324 | 0 | case JsonbType::NUM_TYPES: |
1325 | 0 | break; |
1326 | 0 | } |
1327 | | |
1328 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
1329 | 0 | static_cast<int32_t>(type)); |
1330 | 0 | } |
1331 | | |
1332 | 846 | inline bool JsonbPath::seek(const char* key_path, size_t kp_len) { |
1333 | | //path invalid |
1334 | 846 | if (!key_path || kp_len == 0) { |
1335 | 0 | return false; |
1336 | 0 | } |
1337 | 846 | Stream stream(key_path, kp_len); |
1338 | 846 | stream.skip_whitespace(); |
1339 | 846 | if (stream.exhausted() || stream.read() != SCOPE) { |
1340 | | //path invalid |
1341 | 0 | return false; |
1342 | 0 | } |
1343 | | |
1344 | 1.69k | while (!stream.exhausted()) { |
1345 | 850 | stream.skip_whitespace(); |
1346 | 850 | stream.clear_leg_ptr(); |
1347 | 850 | stream.clear_leg_len(); |
1348 | | |
1349 | 850 | if (!JsonbPath::parsePath(&stream, this)) { |
1350 | | //path invalid |
1351 | 0 | return false; |
1352 | 0 | } |
1353 | 850 | } |
1354 | 846 | return true; |
1355 | 846 | } |
1356 | | |
1357 | 816 | inline JsonbValue* JsonbValue::findValue(JsonbPath& path, hDictFind handler) { |
1358 | 816 | JsonbValue* pval = this; |
1359 | 1.05k | for (size_t i = 0; i < path.get_leg_vector_size(); ++i) { |
1360 | 732 | switch (path.get_leg_from_leg_vector(i)->type) { |
1361 | 204 | case MEMBER_CODE: { |
1362 | 204 | if (LIKELY(pval->type == JsonbType::T_Object)) { |
1363 | 51 | if (path.get_leg_from_leg_vector(i)->leg_len == 1 && |
1364 | 51 | *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) { |
1365 | 0 | continue; |
1366 | 0 | } |
1367 | | |
1368 | 51 | pval = pval->unpack<ObjectVal>()->find(path.get_leg_from_leg_vector(i)->leg_ptr, |
1369 | 51 | path.get_leg_from_leg_vector(i)->leg_len, |
1370 | 51 | handler); |
1371 | | |
1372 | 51 | if (!pval) { |
1373 | 18 | return nullptr; |
1374 | 18 | } |
1375 | 33 | continue; |
1376 | 153 | } else { |
1377 | 153 | return nullptr; |
1378 | 153 | } |
1379 | 204 | } |
1380 | 528 | case ARRAY_CODE: { |
1381 | 528 | if (path.get_leg_from_leg_vector(i)->leg_len == 1 && |
1382 | 528 | *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) { |
1383 | 0 | if (LIKELY(pval->type == JsonbType::T_Array)) { |
1384 | 0 | continue; |
1385 | 0 | } else { |
1386 | 0 | return nullptr; |
1387 | 0 | } |
1388 | 0 | } |
1389 | | |
1390 | 528 | if (pval->type == JsonbType::T_Object && |
1391 | 528 | path.get_leg_from_leg_vector(i)->array_index == 0) { |
1392 | 36 | continue; |
1393 | 36 | } |
1394 | | |
1395 | 492 | if (pval->type != JsonbType::T_Array || |
1396 | 492 | path.get_leg_from_leg_vector(i)->leg_ptr != nullptr || |
1397 | 492 | path.get_leg_from_leg_vector(i)->leg_len != 0) { |
1398 | 261 | return nullptr; |
1399 | 261 | } |
1400 | | |
1401 | 231 | if (path.get_leg_from_leg_vector(i)->array_index >= 0) { |
1402 | 231 | pval = pval->unpack<ArrayVal>()->get(path.get_leg_from_leg_vector(i)->array_index); |
1403 | 231 | } else { |
1404 | 0 | pval = pval->unpack<ArrayVal>()->get(pval->unpack<ArrayVal>()->numElem() + |
1405 | 0 | path.get_leg_from_leg_vector(i)->array_index); |
1406 | 0 | } |
1407 | | |
1408 | 231 | if (!pval) { |
1409 | 63 | return nullptr; |
1410 | 63 | } |
1411 | 168 | continue; |
1412 | 231 | } |
1413 | 732 | } |
1414 | 732 | } |
1415 | 321 | return pval; |
1416 | 816 | } |
1417 | | |
1418 | 850 | inline bool JsonbPath::parsePath(Stream* stream, JsonbPath* path) { |
1419 | | // $[0] |
1420 | 850 | if (stream->peek() == BEGIN_ARRAY) { |
1421 | 542 | return parse_array(stream, path); |
1422 | 542 | } |
1423 | | // $.a or $.[0] |
1424 | 308 | else if (stream->peek() == BEGIN_MEMBER) { |
1425 | | // advance past the . |
1426 | 308 | stream->skip(1); |
1427 | | |
1428 | 308 | if (stream->exhausted()) { |
1429 | 0 | return false; |
1430 | 0 | } |
1431 | | |
1432 | | // $.[0] |
1433 | 308 | if (stream->peek() == BEGIN_ARRAY) { |
1434 | 0 | return parse_array(stream, path); |
1435 | 0 | } |
1436 | | // $.a |
1437 | 308 | else { |
1438 | 308 | return parse_member(stream, path); |
1439 | 308 | } |
1440 | 308 | } else { |
1441 | 0 | return false; //invalid json path |
1442 | 0 | } |
1443 | 850 | } |
1444 | | |
1445 | 542 | inline bool JsonbPath::parse_array(Stream* stream, JsonbPath* path) { |
1446 | 542 | assert(stream->peek() == BEGIN_ARRAY); |
1447 | 0 | stream->skip(1); |
1448 | 542 | if (stream->exhausted()) { |
1449 | 0 | return false; |
1450 | 0 | } |
1451 | | |
1452 | 542 | if (stream->peek() == WILDCARD) { |
1453 | 0 | stream->set_leg_ptr(const_cast<char*>(stream->position())); |
1454 | 0 | stream->add_leg_len(); |
1455 | 0 | stream->skip(1); |
1456 | 0 | if (stream->exhausted()) { |
1457 | 0 | return false; |
1458 | 0 | } |
1459 | | |
1460 | 0 | if (stream->peek() == END_ARRAY) { |
1461 | 0 | std::unique_ptr<leg_info> leg( |
1462 | 0 | new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, ARRAY_CODE)); |
1463 | 0 | path->add_leg_to_leg_vector(std::move(leg)); |
1464 | 0 | stream->skip(1); |
1465 | 0 | return true; |
1466 | 0 | } else { |
1467 | 0 | return false; |
1468 | 0 | } |
1469 | 0 | } |
1470 | | |
1471 | 542 | stream->set_leg_ptr(const_cast<char*>(stream->position())); |
1472 | | |
1473 | 1.08k | for (; !stream->exhausted() && stream->peek() != END_ARRAY; stream->skip(1)) { |
1474 | 542 | stream->add_leg_len(); |
1475 | 542 | } |
1476 | | |
1477 | 542 | if (stream->exhausted() || stream->peek() != END_ARRAY) { |
1478 | 0 | return false; |
1479 | 542 | } else { |
1480 | 542 | stream->skip(1); |
1481 | 542 | } |
1482 | | |
1483 | | //parse array index to int |
1484 | | |
1485 | 542 | std::string_view idx_string(stream->get_leg_ptr(), stream->get_leg_len()); |
1486 | 542 | int index = 0; |
1487 | | |
1488 | 542 | if (stream->get_leg_len() >= 4 && |
1489 | 542 | std::equal(LAST, LAST + 4, stream->get_leg_ptr(), |
1490 | 0 | [](char c1, char c2) { return std::tolower(c1) == std::tolower(c2); })) { |
1491 | 0 | auto pos = idx_string.find(MINUS); |
1492 | |
|
1493 | 0 | if (pos != std::string::npos) { |
1494 | 0 | idx_string = idx_string.substr(pos + 1); |
1495 | |
|
1496 | 0 | auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(), |
1497 | 0 | index); |
1498 | 0 | if (result.ec != std::errc()) { |
1499 | 0 | return false; |
1500 | 0 | } |
1501 | |
|
1502 | 0 | } else if (stream->get_leg_len() > 4) { |
1503 | 0 | return false; |
1504 | 0 | } |
1505 | | |
1506 | 0 | std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, -index - 1, ARRAY_CODE)); |
1507 | 0 | path->add_leg_to_leg_vector(std::move(leg)); |
1508 | |
|
1509 | 0 | return true; |
1510 | 0 | } |
1511 | | |
1512 | 542 | auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(), index); |
1513 | | |
1514 | 542 | if (result.ec != std::errc()) { |
1515 | 0 | return false; |
1516 | 0 | } |
1517 | | |
1518 | 542 | std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, index, ARRAY_CODE)); |
1519 | 542 | path->add_leg_to_leg_vector(std::move(leg)); |
1520 | | |
1521 | 542 | return true; |
1522 | 542 | } |
1523 | | |
1524 | 308 | inline bool JsonbPath::parse_member(Stream* stream, JsonbPath* path) { |
1525 | 308 | if (stream->exhausted()) { |
1526 | 0 | return false; |
1527 | 0 | } |
1528 | | |
1529 | 308 | if (stream->peek() == WILDCARD) { |
1530 | 0 | stream->set_leg_ptr(const_cast<char*>(stream->position())); |
1531 | 0 | stream->add_leg_len(); |
1532 | 0 | stream->skip(1); |
1533 | 0 | std::unique_ptr<leg_info> leg( |
1534 | 0 | new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE)); |
1535 | 0 | path->add_leg_to_leg_vector(std::move(leg)); |
1536 | 0 | return true; |
1537 | 0 | } |
1538 | | |
1539 | 308 | stream->set_leg_ptr(const_cast<char*>(stream->position())); |
1540 | | |
1541 | 308 | const char* left_quotation_marks = nullptr; |
1542 | 308 | const char* right_quotation_marks = nullptr; |
1543 | | |
1544 | 924 | for (; !stream->exhausted(); stream->skip(1)) { |
1545 | 616 | if (stream->peek() == ESCAPE) { |
1546 | 0 | stream->add_leg_len(); |
1547 | 0 | stream->skip(1); |
1548 | 0 | stream->add_leg_len(); |
1549 | 0 | stream->set_has_escapes(true); |
1550 | 0 | if (stream->exhausted()) { |
1551 | 0 | return false; |
1552 | 0 | } |
1553 | 0 | continue; |
1554 | 616 | } else if (stream->peek() == DOUBLE_QUOTE) { |
1555 | 0 | if (left_quotation_marks == nullptr) { |
1556 | 0 | left_quotation_marks = stream->position(); |
1557 | 0 | stream->set_leg_ptr(const_cast<char*>(++left_quotation_marks)); |
1558 | 0 | continue; |
1559 | 0 | } else { |
1560 | 0 | right_quotation_marks = stream->position(); |
1561 | 0 | stream->skip(1); |
1562 | 0 | break; |
1563 | 0 | } |
1564 | 616 | } else if (stream->peek() == BEGIN_MEMBER || stream->peek() == BEGIN_ARRAY) { |
1565 | 0 | if (left_quotation_marks == nullptr) { |
1566 | 0 | break; |
1567 | 0 | } |
1568 | 0 | } |
1569 | | |
1570 | 616 | stream->add_leg_len(); |
1571 | 616 | } |
1572 | | |
1573 | 308 | if ((left_quotation_marks != nullptr && right_quotation_marks == nullptr) || |
1574 | 308 | stream->get_leg_ptr() == nullptr || stream->get_leg_len() == 0) { |
1575 | 0 | return false; //invalid json path |
1576 | 0 | } |
1577 | | |
1578 | 308 | if (stream->get_has_escapes()) { |
1579 | 0 | stream->remove_escapes(); |
1580 | 0 | } |
1581 | | |
1582 | 308 | std::unique_ptr<leg_info> leg( |
1583 | 308 | new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE)); |
1584 | 308 | path->add_leg_to_leg_vector(std::move(leg)); |
1585 | | |
1586 | 308 | return true; |
1587 | 308 | } |
1588 | | |
1589 | | static_assert(is_pod_v<JsonbDocument>, "JsonbDocument must be standard layout and trivial"); |
1590 | | static_assert(is_pod_v<JsonbValue>, "JsonbValue must be standard layout and trivial"); |
1591 | | static_assert(is_pod_v<JsonbDecimal32>, "JsonbDecimal32 must be standard layout and trivial"); |
1592 | | static_assert(is_pod_v<JsonbDecimal64>, "JsonbDecimal64 must be standard layout and trivial"); |
1593 | | static_assert(is_pod_v<JsonbDecimal128>, "JsonbDecimal128 must be standard layout and trivial"); |
1594 | | static_assert(is_pod_v<JsonbDecimal256>, "JsonbDecimal256 must be standard layout and trivial"); |
1595 | | static_assert(is_pod_v<JsonbInt8Val>, "JsonbInt8Val must be standard layout and trivial"); |
1596 | | static_assert(is_pod_v<JsonbInt32Val>, "JsonbInt32Val must be standard layout and trivial"); |
1597 | | static_assert(is_pod_v<JsonbInt64Val>, "JsonbInt64Val must be standard layout and trivial"); |
1598 | | static_assert(is_pod_v<JsonbInt128Val>, "JsonbInt128Val must be standard layout and trivial"); |
1599 | | static_assert(is_pod_v<JsonbDoubleVal>, "JsonbDoubleVal must be standard layout and trivial"); |
1600 | | static_assert(is_pod_v<JsonbFloatVal>, "JsonbFloatVal must be standard layout and trivial"); |
1601 | | static_assert(is_pod_v<JsonbBinaryVal>, "JsonbBinaryVal must be standard layout and trivial"); |
1602 | | static_assert(is_pod_v<ContainerVal>, "ContainerVal must be standard layout and trivial"); |
1603 | | |
1604 | | #define ASSERT_DECIMAL_LAYOUT(type) \ |
1605 | | static_assert(offsetof(type, precision) == 0); \ |
1606 | | static_assert(offsetof(type, scale) == 4); \ |
1607 | | static_assert(offsetof(type, value) == 8); |
1608 | | |
1609 | | ASSERT_DECIMAL_LAYOUT(JsonbDecimal32) |
1610 | | ASSERT_DECIMAL_LAYOUT(JsonbDecimal64) |
1611 | | ASSERT_DECIMAL_LAYOUT(JsonbDecimal128) |
1612 | | ASSERT_DECIMAL_LAYOUT(JsonbDecimal256) |
1613 | | |
1614 | | #define ASSERT_NUMERIC_LAYOUT(type) static_assert(offsetof(type, num) == 0); |
1615 | | |
1616 | | ASSERT_NUMERIC_LAYOUT(JsonbInt8Val) |
1617 | | ASSERT_NUMERIC_LAYOUT(JsonbInt32Val) |
1618 | | ASSERT_NUMERIC_LAYOUT(JsonbInt64Val) |
1619 | | ASSERT_NUMERIC_LAYOUT(JsonbInt128Val) |
1620 | | ASSERT_NUMERIC_LAYOUT(JsonbDoubleVal) |
1621 | | |
1622 | | static_assert(offsetof(JsonbBinaryVal, size) == 0); |
1623 | | static_assert(offsetof(JsonbBinaryVal, payload) == 4); |
1624 | | |
1625 | | static_assert(offsetof(ContainerVal, size) == 0); |
1626 | | static_assert(offsetof(ContainerVal, payload) == 4); |
1627 | | |
1628 | | #pragma pack(pop) |
1629 | | #if defined(__clang__) |
1630 | | #pragma clang diagnostic pop |
1631 | | #endif |
1632 | | } // namespace doris |
1633 | | |
1634 | | #endif // JSONB_JSONBDOCUMENT_H |