be/src/util/jsonb_document.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2014, Facebook, Inc. |
3 | | * All rights reserved. |
4 | | * |
5 | | * This source code is licensed under the BSD-style license found in the |
6 | | * LICENSE file in the root directory of this source tree. An additional grant |
7 | | * of patent rights can be found in the PATENTS file in the same directory. |
8 | | * |
9 | | */ |
10 | | |
11 | | /* |
12 | | * This header defines JsonbDocument, JsonbKeyValue, and various value classes |
13 | | * which are derived from JsonbValue, and a forward iterator for container |
14 | | * values - essentially everything that is related to JSONB binary data |
15 | | * structures. |
16 | | * |
17 | | * Implementation notes: |
18 | | * |
19 | | * None of the classes in this header file can be instantiated directly (i.e. |
20 | | * you cannot create a JsonbKeyValue or JsonbValue object - all constructors |
21 | | * are declared non-public). We use the classes as wrappers on the packed JSONB |
22 | | * bytes (serialized), and cast the classes (types) to the underlying packed |
23 | | * byte array. |
24 | | * |
25 | | * For the same reason, we cannot define any JSONB value class to be virtual, |
26 | | * since we never call constructors, and will not instantiate vtbl and vptrs. |
27 | | * |
28 | | * Therefore, the classes are defined as packed structures (i.e. no data |
29 | | * alignment and padding), and the private member variables of the classes are |
30 | | * defined precisely in the same order as the JSONB spec. This ensures we |
31 | | * access the packed JSONB bytes correctly. |
32 | | * |
33 | | * The packed structures are highly optimized for in-place operations with low |
34 | | * overhead. The reads (and in-place writes) are performed directly on packed |
35 | | * bytes. There is no memory allocation at all at runtime. |
36 | | * |
37 | | * For updates/writes of values that will expand the original JSONB size, the |
38 | | * write will fail, and the caller needs to handle buffer increase. |
39 | | * |
40 | | * ** Iterator ** |
41 | | * Both ObjectVal class and ArrayVal class have iterator type that you can use |
42 | | * to declare an iterator on a container object to go through the key-value |
43 | | * pairs or value list. The iterator has both non-const and const types. |
44 | | * |
45 | | * Note: iterators are forward direction only. |
46 | | * |
47 | | * ** Query ** |
48 | | * Querying into containers is through the member functions find (for key/value |
49 | | * pairs) and get (for array elements), and is in streaming style. We don't |
50 | | * need to read/scan the whole JSONB packed bytes in order to return results. |
51 | | * Once the key/index is found, we will stop search. You can use text to query |
52 | | * both objects and array (for array, text will be converted to integer index), |
53 | | * and use index to retrieve from array. Array index is 0-based. |
54 | | * |
55 | | * ** External dictionary ** |
56 | | * During query processing, you can also pass a call-back function, so the |
57 | | * search will first try to check if the key string exists in the dictionary. |
58 | | * If so, search will be based on the id instead of the key string. |
59 | | * @author Tian Xia <tianx@fb.com> |
60 | | * |
61 | | * this file is copied from |
62 | | * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonDocument.h |
63 | | * and modified by Doris |
64 | | */ |
65 | | |
66 | | #ifndef JSONB_JSONBDOCUMENT_H |
67 | | #define JSONB_JSONBDOCUMENT_H |
68 | | |
69 | | #include <algorithm> |
70 | | #include <array> |
71 | | #include <cctype> |
72 | | #include <charconv> |
73 | | #include <cmath> |
74 | | #include <cstddef> |
75 | | #include <cstdint> |
76 | | #include <limits> |
77 | | #include <string> |
78 | | #include <string_view> |
79 | | #include <type_traits> |
80 | | |
81 | | #include "common/compiler_util.h" // IWYU pragma: keep |
82 | | #include "common/status.h" |
83 | | #include "core/data_type/define_primitive_type.h" |
84 | | #include "core/string_ref.h" |
85 | | #include "core/types.h" |
86 | | #include "util/string_util.h" |
87 | | |
88 | | // #include "util/string_parser.hpp" |
89 | | |
90 | | // Concept to check for supported decimal types |
91 | | template <typename T> |
92 | | concept JsonbDecimalType = |
93 | | std::same_as<T, doris::Decimal256> || std::same_as<T, doris::Decimal64> || |
94 | | std::same_as<T, doris::Decimal128V3> || std::same_as<T, doris::Decimal32>; |
95 | | |
96 | | namespace doris { |
97 | | |
98 | | template <typename T> |
99 | | constexpr bool is_pod_v = std::is_trivial_v<T> && std::is_standard_layout_v<T>; |
100 | | |
101 | | struct JsonbStringVal; |
102 | | struct ObjectVal; |
103 | | struct ArrayVal; |
104 | | struct JsonbBinaryVal; |
105 | | struct ContainerVal; |
106 | | |
107 | | template <JsonbDecimalType T> |
108 | | struct JsonbDecimalVal; |
109 | | |
110 | | using JsonbDecimal256 = JsonbDecimalVal<Decimal256>; |
111 | | using JsonbDecimal128 = JsonbDecimalVal<Decimal128V3>; |
112 | | using JsonbDecimal64 = JsonbDecimalVal<Decimal64>; |
113 | | using JsonbDecimal32 = JsonbDecimalVal<Decimal32>; |
114 | | |
115 | | template <typename T> |
116 | | requires std::is_integral_v<T> || std::is_floating_point_v<T> |
117 | | struct NumberValT; |
118 | | |
119 | | using JsonbInt8Val = NumberValT<int8_t>; |
120 | | using JsonbInt16Val = NumberValT<int16_t>; |
121 | | using JsonbInt32Val = NumberValT<int32_t>; |
122 | | using JsonbInt64Val = NumberValT<int64_t>; |
123 | | using JsonbInt128Val = NumberValT<int128_t>; |
124 | | using JsonbDoubleVal = NumberValT<double>; |
125 | | using JsonbFloatVal = NumberValT<float>; |
126 | | |
127 | | template <typename T> |
128 | | concept JsonbPodType = (std::same_as<T, JsonbStringVal> || std::same_as<T, ObjectVal> || |
129 | | std::same_as<T, ContainerVal> || std::same_as<T, ArrayVal> || |
130 | | std::same_as<T, JsonbBinaryVal> || std::same_as<T, JsonbDecimal32> || |
131 | | std::same_as<T, JsonbDecimal64> || std::same_as<T, JsonbDecimal128> || |
132 | | std::same_as<T, JsonbDecimal256> || std::same_as<T, JsonbDecimal32> || |
133 | | std::same_as<T, JsonbInt8Val> || std::same_as<T, JsonbInt16Val> || |
134 | | std::same_as<T, JsonbInt32Val> || std::same_as<T, JsonbInt64Val> || |
135 | | std::same_as<T, JsonbInt128Val> || std::same_as<T, JsonbFloatVal> || |
136 | | std::same_as<T, JsonbFloatVal> || std::same_as<T, JsonbDoubleVal>); |
137 | | |
138 | 1.21M | #define JSONB_VER 1 |
139 | | |
140 | | using int128_t = __int128; |
141 | | |
142 | | // forward declaration |
143 | | struct JsonbValue; |
144 | | |
145 | | class JsonbOutStream; |
146 | | |
147 | | template <class OS_TYPE> |
148 | | class JsonbWriterT; |
149 | | |
150 | | using JsonbWriter = JsonbWriterT<JsonbOutStream>; |
151 | | |
152 | | const int MaxNestingLevel = 100; |
153 | | |
154 | | /* |
155 | | * JsonbType defines 10 primitive types and 2 container types, as described |
156 | | * below. |
157 | | * NOTE: Do NOT modify the existing values or their order in this enum. |
158 | | * You may only append new entries at the end before `NUM_TYPES`. |
159 | | * This enum will be used in serialized data and/or persisted data. |
160 | | * Changing existing values may break backward compatibility |
161 | | * with previously stored or transmitted data. |
162 | | * |
163 | | * primitive_value ::= |
164 | | * 0x00 //null value (0 byte) |
165 | | * | 0x01 //boolean true (0 byte) |
166 | | * | 0x02 //boolean false (0 byte) |
167 | | * | 0x03 int8 //char/int8 (1 byte) |
168 | | * | 0x04 int16 //int16 (2 bytes) |
169 | | * | 0x05 int32 //int32 (4 bytes) |
170 | | * | 0x06 int64 //int64 (8 bytes) |
171 | | * | 0x07 double //floating point (8 bytes) |
172 | | * | 0x08 string //variable length string |
173 | | * | 0x09 binary //variable length binary |
174 | | * |
175 | | * container ::= |
176 | | * 0x0A int32 key_value_list //object, int32 is the total bytes of the object |
177 | | * | 0x0B int32 value_list //array, int32 is the total bytes of the array |
178 | | */ |
179 | | enum class JsonbType : char { |
180 | | T_Null = 0x00, |
181 | | T_True = 0x01, |
182 | | T_False = 0x02, |
183 | | T_Int8 = 0x03, |
184 | | T_Int16 = 0x04, |
185 | | T_Int32 = 0x05, |
186 | | T_Int64 = 0x06, |
187 | | T_Double = 0x07, |
188 | | T_String = 0x08, |
189 | | T_Binary = 0x09, |
190 | | T_Object = 0x0A, |
191 | | T_Array = 0x0B, |
192 | | T_Int128 = 0x0C, |
193 | | T_Float = 0x0D, |
194 | | T_Decimal32 = 0x0E, // DecimalV3 only |
195 | | T_Decimal64 = 0x0F, // DecimalV3 only |
196 | | T_Decimal128 = 0x10, // DecimalV3 only |
197 | | T_Decimal256 = 0x11, // DecimalV3 only |
198 | | NUM_TYPES, |
199 | | }; |
200 | | |
201 | 11 | inline PrimitiveType get_primitive_type_from_json_type(JsonbType json_type) { |
202 | 11 | switch (json_type) { |
203 | 1 | case JsonbType::T_Null: |
204 | 1 | return TYPE_NULL; |
205 | 1 | case JsonbType::T_True: |
206 | 2 | case JsonbType::T_False: |
207 | 2 | return TYPE_BOOLEAN; |
208 | 0 | case JsonbType::T_Int8: |
209 | 0 | return TYPE_TINYINT; |
210 | 0 | case JsonbType::T_Int16: |
211 | 0 | return TYPE_SMALLINT; |
212 | 0 | case JsonbType::T_Int32: |
213 | 0 | return TYPE_INT; |
214 | 0 | case JsonbType::T_Int64: |
215 | 0 | return TYPE_BIGINT; |
216 | 0 | case JsonbType::T_Double: |
217 | 0 | return TYPE_DOUBLE; |
218 | 1 | case JsonbType::T_String: |
219 | 1 | return TYPE_STRING; |
220 | 0 | case JsonbType::T_Binary: |
221 | 0 | return TYPE_BINARY; |
222 | 0 | case JsonbType::T_Object: |
223 | 0 | return TYPE_STRUCT; |
224 | 1 | case JsonbType::T_Array: |
225 | 1 | return TYPE_ARRAY; |
226 | 1 | case JsonbType::T_Int128: |
227 | 1 | return TYPE_LARGEINT; |
228 | 1 | case JsonbType::T_Float: |
229 | 1 | return TYPE_FLOAT; |
230 | 1 | case JsonbType::T_Decimal32: |
231 | 1 | return TYPE_DECIMAL32; |
232 | 1 | case JsonbType::T_Decimal64: |
233 | 1 | return TYPE_DECIMAL64; |
234 | 1 | case JsonbType::T_Decimal128: |
235 | 1 | return TYPE_DECIMAL128I; |
236 | 1 | case JsonbType::T_Decimal256: |
237 | 1 | return TYPE_DECIMAL256; |
238 | 0 | default: |
239 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Unsupported JsonbType: {}", |
240 | 0 | static_cast<int>(json_type)); |
241 | 11 | } |
242 | 11 | } |
243 | | |
244 | | //for parse json path |
245 | | constexpr char SCOPE = '$'; |
246 | | constexpr char BEGIN_MEMBER = '.'; |
247 | | constexpr char BEGIN_ARRAY = '['; |
248 | | constexpr char END_ARRAY = ']'; |
249 | | constexpr char DOUBLE_QUOTE = '"'; |
250 | | constexpr char WILDCARD = '*'; |
251 | | constexpr char MINUS = '-'; |
252 | | constexpr char LAST[] = "last"; |
253 | | constexpr char ESCAPE = '\\'; |
254 | | constexpr unsigned int MEMBER_CODE = 0; |
255 | | constexpr unsigned int ARRAY_CODE = 1; |
256 | | |
257 | | /// A simple input stream class for the JSON path parser. |
258 | | class Stream { |
259 | | public: |
260 | | /// Creates an input stream reading from a character string. |
261 | | /// @param string the input string |
262 | | /// @param length the length of the input string |
263 | 99 | Stream(const char* string, size_t length) : m_position(string), m_end(string + length) {} |
264 | | |
265 | | /// Returns a pointer to the current position in the stream. |
266 | 93 | const char* position() const { return m_position; } |
267 | | |
268 | | /// Returns a pointer to the position just after the end of the stream. |
269 | 0 | const char* end() const { return m_end; } |
270 | | |
271 | | /// Returns the number of bytes remaining in the stream. |
272 | 1.78k | size_t remaining() const { |
273 | 1.78k | assert(m_position <= m_end); |
274 | 1.78k | return m_end - m_position; |
275 | 1.78k | } |
276 | | |
277 | | /// Tells if the stream has been exhausted. |
278 | 1.62k | bool exhausted() const { return remaining() == 0; } |
279 | | |
280 | | /// Reads the next byte from the stream and moves the position forward. |
281 | 99 | char read() { |
282 | 99 | assert(!exhausted()); |
283 | 99 | return *m_position++; |
284 | 99 | } |
285 | | |
286 | | /// Reads the next byte from the stream without moving the position forward. |
287 | 831 | char peek() const { |
288 | 831 | assert(!exhausted()); |
289 | 831 | return *m_position; |
290 | 831 | } |
291 | | |
292 | | /// Moves the position to the next non-whitespace character. |
293 | 347 | void skip_whitespace() { |
294 | 347 | m_position = std::find_if_not(m_position, m_end, [](char c) { return std::isspace(c); }); |
295 | 347 | } |
296 | | |
297 | | /// Moves the position n bytes forward. |
298 | 155 | void skip(size_t n) { |
299 | 155 | assert(remaining() >= n); |
300 | 155 | m_position += n; |
301 | 155 | skip_whitespace(); |
302 | 155 | } |
303 | | |
304 | 125 | void advance() { m_position++; } |
305 | | |
306 | 186 | void clear_leg_ptr() { leg_ptr = nullptr; } |
307 | | |
308 | 93 | void set_leg_ptr(char* ptr) { |
309 | 93 | clear_leg_ptr(); |
310 | 93 | leg_ptr = ptr; |
311 | 93 | } |
312 | | |
313 | 128 | char* get_leg_ptr() { return leg_ptr; } |
314 | | |
315 | 93 | void clear_leg_len() { leg_len = 0; } |
316 | | |
317 | 125 | void add_leg_len() { leg_len++; } |
318 | | |
319 | 186 | unsigned int get_leg_len() const { return leg_len; } |
320 | | |
321 | 0 | void remove_escapes() { |
322 | 0 | int new_len = 0; |
323 | 0 | for (int i = 0; i < leg_len; i++) { |
324 | 0 | if (leg_ptr[i] != '\\') { |
325 | 0 | leg_ptr[new_len++] = leg_ptr[i]; |
326 | 0 | } |
327 | 0 | } |
328 | 0 | leg_ptr[new_len] = '\0'; |
329 | 0 | leg_len = new_len; |
330 | 0 | } |
331 | | |
332 | 0 | void set_has_escapes(bool has) { has_escapes = has; } |
333 | | |
334 | 35 | bool get_has_escapes() const { return has_escapes; } |
335 | | |
336 | | private: |
337 | | /// The current position in the stream. |
338 | | const char* m_position = nullptr; |
339 | | |
340 | | /// The end of the stream. |
341 | | const char* const m_end; |
342 | | |
343 | | ///path leg ptr |
344 | | char* leg_ptr = nullptr; |
345 | | |
346 | | ///path leg len |
347 | | unsigned int leg_len; |
348 | | |
349 | | ///Whether to contain escape characters |
350 | | bool has_escapes = false; |
351 | | }; |
352 | | |
353 | | struct leg_info { |
354 | | ///path leg ptr |
355 | | char* leg_ptr = nullptr; |
356 | | |
357 | | ///path leg len |
358 | | unsigned int leg_len; |
359 | | |
360 | | ///array_index |
361 | | int array_index; |
362 | | |
363 | | ///type: 0 is member 1 is array |
364 | | unsigned int type; |
365 | | |
366 | 0 | bool to_string(std::string* str) const { |
367 | 0 | if (type == MEMBER_CODE) { |
368 | 0 | str->push_back(BEGIN_MEMBER); |
369 | 0 | bool contains_space = false; |
370 | 0 | std::string tmp; |
371 | 0 | for (auto* it = leg_ptr; it != (leg_ptr + leg_len); ++it) { |
372 | 0 | if (std::isspace(*it)) { |
373 | 0 | contains_space = true; |
374 | 0 | } else if (*it == '"' || *it == ESCAPE || *it == '\r' || *it == '\n' || |
375 | 0 | *it == '\b' || *it == '\t') { |
376 | 0 | tmp.push_back(ESCAPE); |
377 | 0 | } |
378 | 0 | tmp.push_back(*it); |
379 | 0 | } |
380 | 0 | if (contains_space) { |
381 | 0 | str->push_back(DOUBLE_QUOTE); |
382 | 0 | } |
383 | 0 | str->append(tmp); |
384 | 0 | if (contains_space) { |
385 | 0 | str->push_back(DOUBLE_QUOTE); |
386 | 0 | } |
387 | 0 | return true; |
388 | 0 | } else if (type == ARRAY_CODE) { |
389 | 0 | str->push_back(BEGIN_ARRAY); |
390 | 0 | std::string int_str = std::to_string(array_index); |
391 | 0 | str->append(int_str); |
392 | 0 | str->push_back(END_ARRAY); |
393 | 0 | return true; |
394 | 0 | } else { |
395 | 0 | return false; |
396 | 0 | } |
397 | 0 | } |
398 | | }; |
399 | | |
400 | | class JsonbPath { |
401 | | public: |
402 | | // parse json path |
403 | | static bool parsePath(Stream* stream, JsonbPath* path); |
404 | | |
405 | | static bool parse_array(Stream* stream, JsonbPath* path); |
406 | | static bool parse_member(Stream* stream, JsonbPath* path); |
407 | | |
408 | | //return true if json path valid else return false |
409 | | bool seek(const char* string, size_t length); |
410 | | |
411 | 93 | void add_leg_to_leg_vector(std::unique_ptr<leg_info> leg) { |
412 | 93 | leg_vector.emplace_back(leg.release()); |
413 | 93 | } |
414 | | |
415 | 0 | void pop_leg_from_leg_vector() { leg_vector.pop_back(); } |
416 | | |
417 | 0 | bool to_string(std::string* res) const { |
418 | 0 | res->push_back(SCOPE); |
419 | 0 | for (const auto& leg : leg_vector) { |
420 | 0 | auto valid = leg->to_string(res); |
421 | 0 | if (!valid) { |
422 | 0 | return false; |
423 | 0 | } |
424 | 0 | } |
425 | 0 | return true; |
426 | 0 | } |
427 | | |
428 | 187 | size_t get_leg_vector_size() const { return leg_vector.size(); } |
429 | | |
430 | 300 | leg_info* get_leg_from_leg_vector(size_t i) const { return leg_vector[i].get(); } |
431 | | |
432 | 3 | bool is_wildcard() const { return _is_wildcard; } |
433 | 99 | bool is_supper_wildcard() const { return _is_supper_wildcard; } |
434 | | |
435 | 6 | void clean() { leg_vector.clear(); } |
436 | | |
437 | | private: |
438 | | std::vector<std::unique_ptr<leg_info>> leg_vector; |
439 | | bool _is_wildcard = false; // whether the path is a wildcard path |
440 | | bool _is_supper_wildcard = false; // supper wildcard likes '$**.a' or '$**[1]' |
441 | | }; |
442 | | |
443 | | /* |
444 | | * JsonbFwdIteratorT implements JSONB's iterator template. |
445 | | * |
446 | | * Note: it is an FORWARD iterator only due to the design of JSONB format. |
447 | | */ |
448 | | template <class Iter_Type, class Cont_Type> |
449 | | class JsonbFwdIteratorT { |
450 | | public: |
451 | | using iterator = Iter_Type; |
452 | | using pointer = typename std::iterator_traits<Iter_Type>::pointer; |
453 | | using reference = typename std::iterator_traits<Iter_Type>::reference; |
454 | | |
455 | | explicit JsonbFwdIteratorT() : current_(nullptr) {} |
456 | 20.5k | explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}_ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEC2ERKS3_ Line | Count | Source | 456 | 20.4k | explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {} |
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEC2ERKS3_ Line | Count | Source | 456 | 147 | explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {} |
|
457 | | |
458 | | // allow non-const to const iterator conversion (same container type) |
459 | | template <class Iter_Ty> |
460 | | JsonbFwdIteratorT(const JsonbFwdIteratorT<Iter_Ty, Cont_Type>& rhs) : current_(rhs.base()) {} |
461 | | |
462 | 20.7k | bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEeqERKS5_ Line | Count | Source | 462 | 19.3k | bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEeqERKS5_ Line | Count | Source | 462 | 1.37k | bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); } |
|
463 | | |
464 | 20.1k | bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEneERKS5_ Line | Count | Source | 464 | 18.8k | bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEneERKS5_ Line | Count | Source | 464 | 1.30k | bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); } |
|
465 | | |
466 | 1.14k | bool operator<(const JsonbFwdIteratorT& rhs) const { return (current_ < rhs.current_); } |
467 | | |
468 | | bool operator>(const JsonbFwdIteratorT& rhs) const { return !operator<(rhs); } |
469 | | |
470 | 18.2k | JsonbFwdIteratorT& operator++() { |
471 | 18.2k | current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); |
472 | 18.2k | return *this; |
473 | 18.2k | } _ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEppEv Line | Count | Source | 470 | 17.6k | JsonbFwdIteratorT& operator++() { | 471 | 17.6k | current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); | 472 | 17.6k | return *this; | 473 | 17.6k | } |
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEppEv Line | Count | Source | 470 | 616 | JsonbFwdIteratorT& operator++() { | 471 | 616 | current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); | 472 | 616 | return *this; | 473 | 616 | } |
|
474 | | |
475 | | JsonbFwdIteratorT operator++(int) { |
476 | | auto tmp = *this; |
477 | | current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); |
478 | | return tmp; |
479 | | } |
480 | | |
481 | 616 | explicit operator pointer() { return current_; } |
482 | | |
483 | 39 | reference operator*() const { return *current_; }Unexecuted instantiation: _ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEdeEv _ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEdeEv Line | Count | Source | 483 | 39 | reference operator*() const { return *current_; } |
|
484 | | |
485 | 29.0k | pointer operator->() const { return current_; }_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEptEv Line | Count | Source | 485 | 29.0k | pointer operator->() const { return current_; } |
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEptEv Line | Count | Source | 485 | 3 | pointer operator->() const { return current_; } |
|
486 | | |
487 | | iterator base() const { return current_; } |
488 | | |
489 | | private: |
490 | | iterator current_; |
491 | | }; |
492 | | using JsonbTypeUnder = std::underlying_type_t<JsonbType>; |
493 | | |
494 | | #if defined(__clang__) |
495 | | #pragma clang diagnostic push |
496 | | #pragma clang diagnostic ignored "-Wzero-length-array" |
497 | | #endif |
498 | | #pragma pack(push, 1) |
499 | | |
500 | | /* |
501 | | * JsonbDocument is the main object that accesses and queries JSONB packed |
502 | | * bytes. NOTE: JsonbDocument only allows object container as the top level |
503 | | * JSONB value. However, you can use the static method "createValue" to get any |
504 | | * JsonbValue object from the packed bytes. |
505 | | * |
506 | | * JsonbDocument object also dereferences to an object container value |
507 | | * (ObjectVal) once JSONB is loaded. |
508 | | * |
509 | | * ** Load ** |
510 | | * JsonbDocument is usable after loading packed bytes (memory location) into |
511 | | * the object. We only need the header and first few bytes of the payload after |
512 | | * header to verify the JSONB. |
513 | | * |
514 | | * Note: creating an JsonbDocument (through createDocument) does not allocate |
515 | | * any memory. The document object is an efficient wrapper on the packed bytes |
516 | | * which is accessed directly. |
517 | | * |
518 | | * ** Query ** |
519 | | * Query is through dereferencing into ObjectVal. |
520 | | */ |
521 | | class JsonbDocument { |
522 | | public: |
523 | | // create an JsonbDocument object from JSONB packed bytes |
524 | | [[nodiscard]] static Status checkAndCreateDocument(const char* pb, size_t size, |
525 | | const JsonbDocument** doc); |
526 | | |
527 | | // create an JsonbValue from JSONB packed bytes |
528 | | static const JsonbValue* createValue(const char* pb, size_t size); |
529 | | |
530 | 0 | uint8_t version() const { return header_.ver_; } |
531 | | |
532 | 26.4k | const JsonbValue* getValue() const { return ((const JsonbValue*)payload_); } |
533 | | |
534 | | unsigned int numPackedBytes() const; |
535 | | |
536 | | const ObjectVal* operator->() const; |
537 | | |
538 | | private: |
539 | | /* |
540 | | * JsonbHeader class defines JSONB header (internal to JsonbDocument). |
541 | | * |
542 | | * Currently it only contains version information (1-byte). We may expand the |
543 | | * header to include checksum of the JSONB binary for more security. |
544 | | */ |
545 | | struct JsonbHeader { |
546 | | uint8_t ver_; |
547 | | } header_; |
548 | | |
549 | | char payload_[0]; |
550 | | }; |
551 | | |
552 | | /* |
553 | | * JsonbKeyValue class defines JSONB key type, as described below. |
554 | | * |
555 | | * key ::= |
556 | | * 0x00 int8 //1-byte dictionary id |
557 | | * | int8 (byte*) //int8 (>0) is the size of the key string |
558 | | * |
559 | | * value ::= primitive_value | container |
560 | | * |
561 | | * JsonbKeyValue can be either an id mapping to the key string in an external |
562 | | * dictionary, or it is the original key string. Whether to read an id or a |
563 | | * string is decided by the first byte (size). |
564 | | * |
565 | | * Note: a key object must be followed by a value object. Therefore, a key |
566 | | * object implicitly refers to a key-value pair, and you can get the value |
567 | | * object right after the key object. The function numPackedBytes hence |
568 | | * indicates the total size of the key-value pair, so that we will be able go |
569 | | * to next pair from the key. |
570 | | * |
571 | | * ** Dictionary size ** |
572 | | * By default, the dictionary size is 255 (1-byte). Users can define |
573 | | * "USE_LARGE_DICT" to increase the dictionary size to 655535 (2-byte). |
574 | | */ |
575 | | class JsonbKeyValue { |
576 | | public: |
577 | | // now we use sMaxKeyId to represent an empty key |
578 | | static const int sMaxKeyId = 65535; |
579 | | using keyid_type = uint16_t; |
580 | | |
581 | | static const uint8_t sMaxKeyLen = 64; |
582 | | |
583 | | // size of the key. 0 indicates it is stored as id |
584 | 1.42k | uint8_t klen() const { return size; } |
585 | | |
586 | | // get the key string. Note the string may not be null terminated. |
587 | 767 | const char* getKeyStr() const { return key.str_; } |
588 | | |
589 | 9.33k | keyid_type getKeyId() const { return key.id_; } |
590 | | |
591 | 35.4k | unsigned int keyPackedBytes() const { |
592 | 35.4k | return size ? (sizeof(size) + size) : (sizeof(size) + sizeof(keyid_type)); |
593 | 35.4k | } |
594 | | |
595 | 17.7k | const JsonbValue* value() const { |
596 | 17.7k | return (const JsonbValue*)(((char*)this) + keyPackedBytes()); |
597 | 17.7k | } |
598 | | |
599 | | // size of the total packed bytes (key+value) |
600 | | unsigned int numPackedBytes() const; |
601 | | |
602 | | uint8_t size; |
603 | | |
604 | | union key_ { |
605 | | keyid_type id_; |
606 | | char str_[1]; |
607 | | } key; |
608 | | }; |
609 | | |
610 | | struct JsonbFindResult { |
611 | | const JsonbValue* value = nullptr; // found value |
612 | | std::unique_ptr<JsonbWriter> writer; // writer to write the value |
613 | | bool is_wildcard = false; // whether the path is a wildcard path |
614 | | }; |
615 | | |
616 | | /* |
617 | | * JsonbValue is the base class of all JSONB types. It contains only one member |
618 | | * variable - type info, which can be retrieved by member functions is[Type]() |
619 | | * or type(). |
620 | | */ |
621 | | struct JsonbValue { |
622 | | static const uint32_t sMaxValueLen = 1 << 24; // 16M |
623 | | |
624 | 4.27k | bool isNull() const { return (type == JsonbType::T_Null); } |
625 | 21 | bool isTrue() const { return (type == JsonbType::T_True); } |
626 | 1 | bool isFalse() const { return (type == JsonbType::T_False); } |
627 | 54 | bool isInt() const { return isInt8() || isInt16() || isInt32() || isInt64() || isInt128(); } |
628 | 54 | bool isInt8() const { return (type == JsonbType::T_Int8); } |
629 | 30 | bool isInt16() const { return (type == JsonbType::T_Int16); } |
630 | 28 | bool isInt32() const { return (type == JsonbType::T_Int32); } |
631 | 31 | bool isInt64() const { return (type == JsonbType::T_Int64); } |
632 | 190 | bool isDouble() const { return (type == JsonbType::T_Double); } |
633 | 153 | bool isFloat() const { return (type == JsonbType::T_Float); } |
634 | 45 | bool isString() const { return (type == JsonbType::T_String); } |
635 | 1.09k | bool isBinary() const { return (type == JsonbType::T_Binary); } |
636 | 97 | bool isObject() const { return (type == JsonbType::T_Object); } |
637 | 15 | bool isArray() const { return (type == JsonbType::T_Array); } |
638 | 31 | bool isInt128() const { return (type == JsonbType::T_Int128); } |
639 | 74 | bool isDecimal() const { |
640 | 74 | return (type == JsonbType::T_Decimal32 || type == JsonbType::T_Decimal64 || |
641 | 74 | type == JsonbType::T_Decimal128 || type == JsonbType::T_Decimal256); |
642 | 74 | } |
643 | 1 | bool isDecimal32() const { return (type == JsonbType::T_Decimal32); } |
644 | 1 | bool isDecimal64() const { return (type == JsonbType::T_Decimal64); } |
645 | 1 | bool isDecimal128() const { return (type == JsonbType::T_Decimal128); } |
646 | 1 | bool isDecimal256() const { return (type == JsonbType::T_Decimal256); } |
647 | | |
648 | 11 | PrimitiveType get_primitive_type() const { return get_primitive_type_from_json_type(type); } |
649 | | |
650 | 0 | const char* typeName() const { |
651 | 0 | switch (type) { |
652 | 0 | case JsonbType::T_Null: |
653 | 0 | return "null"; |
654 | 0 | case JsonbType::T_True: |
655 | 0 | case JsonbType::T_False: |
656 | 0 | return "bool"; |
657 | 0 | case JsonbType::T_Int8: |
658 | 0 | case JsonbType::T_Int16: |
659 | 0 | case JsonbType::T_Int32: |
660 | 0 | return "int"; |
661 | 0 | case JsonbType::T_Int64: |
662 | 0 | return "bigint"; |
663 | 0 | case JsonbType::T_Int128: |
664 | 0 | return "largeint"; |
665 | 0 | case JsonbType::T_Double: |
666 | 0 | return "double"; |
667 | 0 | case JsonbType::T_Float: |
668 | 0 | return "float"; |
669 | 0 | case JsonbType::T_String: |
670 | 0 | return "string"; |
671 | 0 | case JsonbType::T_Binary: |
672 | 0 | return "binary"; |
673 | 0 | case JsonbType::T_Object: |
674 | 0 | return "object"; |
675 | 0 | case JsonbType::T_Array: |
676 | 0 | return "array"; |
677 | 0 | case JsonbType::T_Decimal32: |
678 | 0 | return "Decimal32"; |
679 | 0 | case JsonbType::T_Decimal64: |
680 | 0 | return "Decimal64"; |
681 | 0 | case JsonbType::T_Decimal128: |
682 | 0 | return "Decimal128"; |
683 | 0 | case JsonbType::T_Decimal256: |
684 | 0 | return "Decimal256"; |
685 | 0 | default: |
686 | 0 | return "unknown"; |
687 | 0 | } |
688 | 0 | } |
689 | | |
690 | | // size of the total packed bytes |
691 | | unsigned int numPackedBytes() const; |
692 | | |
693 | | // size of the value in bytes |
694 | | unsigned int size() const; |
695 | | |
696 | | //Get the number of jsonbvalue elements |
697 | | int numElements() const; |
698 | | |
699 | | //Whether to include the jsonbvalue rhs |
700 | | bool contains(const JsonbValue* rhs) const; |
701 | | |
702 | | // find the JSONB value by JsonbPath |
703 | | JsonbFindResult findValue(JsonbPath& path) const; |
704 | | friend class JsonbDocument; |
705 | | |
706 | | JsonbType type; // type info |
707 | | |
708 | | char payload[0]; // payload, which is the packed bytes of the value |
709 | | |
710 | | /** |
711 | | * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`. |
712 | | * |
713 | | * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept. |
714 | | * This ensures that `T` is trivially copyable, standard-layout, and safe to |
715 | | * reinterpret from raw bytes without invoking undefined behavior. |
716 | | * |
717 | | * @return A pointer to a `const T` object, interpreted from the internal buffer. |
718 | | * |
719 | | * @note The caller must ensure that the current JsonbValue actually contains data |
720 | | * compatible with type `T`, otherwise the result is undefined. |
721 | | */ |
722 | | template <JsonbPodType T> |
723 | 52.6k | const T* unpack() const { |
724 | 52.6k | static_assert(is_pod_v<T>, "T must be a POD type"); |
725 | 52.6k | return reinterpret_cast<const T*>(payload); |
726 | 52.6k | } _ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_9ObjectValEEEPKT_v Line | Count | Source | 723 | 19.8k | const T* unpack() const { | 724 | 19.8k | static_assert(is_pod_v<T>, "T must be a POD type"); | 725 | 19.8k | return reinterpret_cast<const T*>(payload); | 726 | 19.8k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIaEEEEPKT_v Line | Count | Source | 723 | 740 | const T* unpack() const { | 724 | 740 | static_assert(is_pod_v<T>, "T must be a POD type"); | 725 | 740 | return reinterpret_cast<const T*>(payload); | 726 | 740 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIsEEEEPKT_v Line | Count | Source | 723 | 108 | const T* unpack() const { | 724 | 108 | static_assert(is_pod_v<T>, "T must be a POD type"); | 725 | 108 | return reinterpret_cast<const T*>(payload); | 726 | 108 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIiEEEEPKT_v Line | Count | Source | 723 | 3.48k | const T* unpack() const { | 724 | 3.48k | static_assert(is_pod_v<T>, "T must be a POD type"); | 725 | 3.48k | return reinterpret_cast<const T*>(payload); | 726 | 3.48k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIlEEEEPKT_v Line | Count | Source | 723 | 1.91k | const T* unpack() const { | 724 | 1.91k | static_assert(is_pod_v<T>, "T must be a POD type"); | 725 | 1.91k | return reinterpret_cast<const T*>(payload); | 726 | 1.91k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTInEEEEPKT_v Line | Count | Source | 723 | 4.17k | const T* unpack() const { | 724 | 4.17k | static_assert(is_pod_v<T>, "T must be a POD type"); | 725 | 4.17k | return reinterpret_cast<const T*>(payload); | 726 | 4.17k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIdEEEEPKT_v Line | Count | Source | 723 | 184 | const T* unpack() const { | 724 | 184 | static_assert(is_pod_v<T>, "T must be a POD type"); | 725 | 184 | return reinterpret_cast<const T*>(payload); | 726 | 184 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIfEEEEPKT_v Line | Count | Source | 723 | 28 | const T* unpack() const { | 724 | 28 | static_assert(is_pod_v<T>, "T must be a POD type"); | 725 | 28 | return reinterpret_cast<const T*>(payload); | 726 | 28 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_7DecimalIiEEEEEEPKT_v Line | Count | Source | 723 | 25 | const T* unpack() const { | 724 | 25 | static_assert(is_pod_v<T>, "T must be a POD type"); | 725 | 25 | return reinterpret_cast<const T*>(payload); | 726 | 25 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_7DecimalIlEEEEEEPKT_v Line | Count | Source | 723 | 15 | const T* unpack() const { | 724 | 15 | static_assert(is_pod_v<T>, "T must be a POD type"); | 725 | 15 | return reinterpret_cast<const T*>(payload); | 726 | 15 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_12Decimal128V3EEEEEPKT_v Line | Count | Source | 723 | 23 | const T* unpack() const { | 724 | 23 | static_assert(is_pod_v<T>, "T must be a POD type"); | 725 | 23 | return reinterpret_cast<const T*>(payload); | 726 | 23 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_7DecimalIN4wide7integerILm256EiEEEEEEEEPKT_v Line | Count | Source | 723 | 13 | const T* unpack() const { | 724 | 13 | static_assert(is_pod_v<T>, "T must be a POD type"); | 725 | 13 | return reinterpret_cast<const T*>(payload); | 726 | 13 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_14JsonbBinaryValEEEPKT_v Line | Count | Source | 723 | 19.6k | const T* unpack() const { | 724 | 19.6k | static_assert(is_pod_v<T>, "T must be a POD type"); | 725 | 19.6k | return reinterpret_cast<const T*>(payload); | 726 | 19.6k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_12ContainerValEEEPKT_v Line | Count | Source | 723 | 1.96k | const T* unpack() const { | 724 | 1.96k | static_assert(is_pod_v<T>, "T must be a POD type"); | 725 | 1.96k | return reinterpret_cast<const T*>(payload); | 726 | 1.96k | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_8ArrayValEEEPKT_v Line | Count | Source | 723 | 121 | const T* unpack() const { | 724 | 121 | static_assert(is_pod_v<T>, "T must be a POD type"); | 725 | 121 | return reinterpret_cast<const T*>(payload); | 726 | 121 | } |
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_14JsonbStringValEEEPKT_v Line | Count | Source | 723 | 339 | const T* unpack() const { | 724 | 339 | static_assert(is_pod_v<T>, "T must be a POD type"); | 725 | 339 | return reinterpret_cast<const T*>(payload); | 726 | 339 | } |
|
727 | | |
728 | | // /** |
729 | | // * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`. |
730 | | // * |
731 | | // * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept. |
732 | | // * This ensures that `T` is trivially copyable, standard-layout, and safe to |
733 | | // * reinterpret from raw bytes without invoking undefined behavior. |
734 | | // * |
735 | | // * @return A pointer to a `T` object, interpreted from the internal buffer. |
736 | | // * |
737 | | // * @note The caller must ensure that the current JsonbValue actually contains data |
738 | | // * compatible with type `T`, otherwise the result is undefined. |
739 | | // */ |
740 | | // template <JsonbPodType T> |
741 | | // T* unpack() { |
742 | | // static_assert(is_pod_v<T>, "T must be a POD type"); |
743 | | // return reinterpret_cast<T*>(payload); |
744 | | // } |
745 | | |
746 | | int128_t int_val() const; |
747 | | }; |
748 | | |
749 | | // inline ObjectVal* JsonbDocument::operator->() { |
750 | | // return (((JsonbValue*)payload_)->unpack<ObjectVal>()); |
751 | | // } |
752 | | |
753 | 19.2k | inline const ObjectVal* JsonbDocument::operator->() const { |
754 | 19.2k | return (((const JsonbValue*)payload_)->unpack<ObjectVal>()); |
755 | 19.2k | } |
756 | | |
757 | | /* |
758 | | * NumerValT is the template class (derived from JsonbValue) of all number |
759 | | * types (integers and double). |
760 | | */ |
761 | | template <typename T> |
762 | | requires std::is_integral_v<T> || std::is_floating_point_v<T> |
763 | | struct NumberValT { |
764 | | public: |
765 | 10.6k | T val() const { return num; }_ZNK5doris10NumberValTIaE3valEv Line | Count | Source | 765 | 740 | T val() const { return num; } |
_ZNK5doris10NumberValTIsE3valEv Line | Count | Source | 765 | 108 | T val() const { return num; } |
_ZNK5doris10NumberValTIiE3valEv Line | Count | Source | 765 | 3.48k | T val() const { return num; } |
_ZNK5doris10NumberValTIlE3valEv Line | Count | Source | 765 | 1.91k | T val() const { return num; } |
_ZNK5doris10NumberValTInE3valEv Line | Count | Source | 765 | 4.17k | T val() const { return num; } |
_ZNK5doris10NumberValTIdE3valEv Line | Count | Source | 765 | 184 | T val() const { return num; } |
_ZNK5doris10NumberValTIfE3valEv Line | Count | Source | 765 | 28 | T val() const { return num; } |
|
766 | | |
767 | | static unsigned int numPackedBytes() { return sizeof(JsonbValue) + sizeof(T); } |
768 | | |
769 | | T num; |
770 | | }; |
771 | | |
772 | 49 | inline int128_t JsonbValue::int_val() const { |
773 | 49 | switch (type) { |
774 | 42 | case JsonbType::T_Int8: |
775 | 42 | return unpack<JsonbInt8Val>()->val(); |
776 | 1 | case JsonbType::T_Int16: |
777 | 1 | return unpack<JsonbInt16Val>()->val(); |
778 | 0 | case JsonbType::T_Int32: |
779 | 0 | return unpack<JsonbInt32Val>()->val(); |
780 | 3 | case JsonbType::T_Int64: |
781 | 3 | return unpack<JsonbInt64Val>()->val(); |
782 | 3 | case JsonbType::T_Int128: |
783 | 3 | return unpack<JsonbInt128Val>()->val(); |
784 | 0 | default: |
785 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
786 | 0 | static_cast<int32_t>(type)); |
787 | 49 | } |
788 | 49 | } |
789 | | |
790 | | template <JsonbDecimalType T> |
791 | | struct JsonbDecimalVal { |
792 | | public: |
793 | | using NativeType = typename T::NativeType; |
794 | | |
795 | | // get the decimal value |
796 | 44 | NativeType val() const { |
797 | | // to avoid memory alignment issues, we use memcpy to copy the value |
798 | 44 | NativeType tmp; |
799 | 44 | memcpy(&tmp, &value, sizeof(NativeType)); |
800 | 44 | return tmp; |
801 | 44 | } _ZNK5doris15JsonbDecimalValINS_7DecimalIiEEE3valEv Line | Count | Source | 796 | 16 | NativeType val() const { | 797 | | // to avoid memory alignment issues, we use memcpy to copy the value | 798 | 16 | NativeType tmp; | 799 | 16 | memcpy(&tmp, &value, sizeof(NativeType)); | 800 | 16 | return tmp; | 801 | 16 | } |
_ZNK5doris15JsonbDecimalValINS_7DecimalIlEEE3valEv Line | Count | Source | 796 | 8 | NativeType val() const { | 797 | | // to avoid memory alignment issues, we use memcpy to copy the value | 798 | 8 | NativeType tmp; | 799 | 8 | memcpy(&tmp, &value, sizeof(NativeType)); | 800 | 8 | return tmp; | 801 | 8 | } |
_ZNK5doris15JsonbDecimalValINS_12Decimal128V3EE3valEv Line | Count | Source | 796 | 14 | NativeType val() const { | 797 | | // to avoid memory alignment issues, we use memcpy to copy the value | 798 | 14 | NativeType tmp; | 799 | 14 | memcpy(&tmp, &value, sizeof(NativeType)); | 800 | 14 | return tmp; | 801 | 14 | } |
_ZNK5doris15JsonbDecimalValINS_7DecimalIN4wide7integerILm256EiEEEEE3valEv Line | Count | Source | 796 | 6 | NativeType val() const { | 797 | | // to avoid memory alignment issues, we use memcpy to copy the value | 798 | 6 | NativeType tmp; | 799 | 6 | memcpy(&tmp, &value, sizeof(NativeType)); | 800 | 6 | return tmp; | 801 | 6 | } |
|
802 | | |
803 | 49 | static constexpr int numPackedBytes() { |
804 | 49 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); |
805 | 49 | } _ZN5doris15JsonbDecimalValINS_7DecimalIiEEE14numPackedBytesEv Line | Count | Source | 803 | 19 | static constexpr int numPackedBytes() { | 804 | 19 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); | 805 | 19 | } |
_ZN5doris15JsonbDecimalValINS_7DecimalIlEEE14numPackedBytesEv Line | Count | Source | 803 | 9 | static constexpr int numPackedBytes() { | 804 | 9 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); | 805 | 9 | } |
_ZN5doris15JsonbDecimalValINS_12Decimal128V3EE14numPackedBytesEv Line | Count | Source | 803 | 15 | static constexpr int numPackedBytes() { | 804 | 15 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); | 805 | 15 | } |
_ZN5doris15JsonbDecimalValINS_7DecimalIN4wide7integerILm256EiEEEEE14numPackedBytesEv Line | Count | Source | 803 | 6 | static constexpr int numPackedBytes() { | 804 | 6 | return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); | 805 | 6 | } |
|
806 | | |
807 | | uint32_t precision; |
808 | | uint32_t scale; |
809 | | NativeType value; |
810 | | }; |
811 | | |
812 | | /* |
813 | | * BlobVal is the base class (derived from JsonbValue) for string and binary |
814 | | * types. The size indicates the total bytes of the payload. |
815 | | */ |
816 | | struct JsonbBinaryVal { |
817 | | public: |
818 | | // size of the blob payload only |
819 | 2.14k | unsigned int getBlobLen() const { return size; } |
820 | | |
821 | | // return the blob as byte array |
822 | 4.59k | const char* getBlob() const { return payload; } |
823 | | |
824 | | // size of the total packed bytes |
825 | 15.2k | unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; } |
826 | | friend class JsonbDocument; |
827 | | |
828 | | uint32_t size; |
829 | | char payload[0]; |
830 | | }; |
831 | | |
832 | | /* |
833 | | * String type |
834 | | * Note: JSONB string may not be a c-string (NULL-terminated) |
835 | | */ |
836 | | struct JsonbStringVal : public JsonbBinaryVal { |
837 | | public: |
838 | | /* |
839 | | This function return the actual size of a string. Since for |
840 | | a string, it can be null-terminated with null paddings or it |
841 | | can take all the space in the payload without null in the end. |
842 | | So we need to check it to get the true actual length of a string. |
843 | | */ |
844 | 170 | size_t length() const { |
845 | | // It's an empty string |
846 | 170 | if (0 == size) { |
847 | 0 | return size; |
848 | 0 | } |
849 | | // The string stored takes all the spaces in payload |
850 | 170 | if (payload[size - 1] != 0) { |
851 | 170 | return size; |
852 | 170 | } |
853 | | // It's shorter than the size of payload |
854 | 0 | return strnlen(payload, size); |
855 | 170 | } |
856 | | }; |
857 | | |
858 | | /* |
859 | | * ContainerVal is the base class (derived from JsonbValue) for object and |
860 | | * array types. The size indicates the total bytes of the payload. |
861 | | */ |
862 | | struct ContainerVal { |
863 | | // size of the container payload only |
864 | 0 | unsigned int getContainerSize() const { return size; } |
865 | | |
866 | | // return the container payload as byte array |
867 | 0 | const char* getPayload() const { return payload; } |
868 | | |
869 | | // size of the total packed bytes |
870 | 1.96k | unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; } |
871 | | friend class JsonbDocument; |
872 | | |
873 | | uint32_t size; |
874 | | char payload[0]; |
875 | | }; |
876 | | |
877 | | /* |
878 | | * Object type |
879 | | */ |
880 | | struct ObjectVal : public ContainerVal { |
881 | | using value_type = JsonbKeyValue; |
882 | | using pointer = value_type*; |
883 | | using const_pointer = const value_type*; |
884 | | using const_iterator = JsonbFwdIteratorT<const_pointer, ObjectVal>; |
885 | | |
886 | 1 | const_iterator search(const char* key) const { |
887 | 1 | if (!key) { |
888 | 0 | return end(); |
889 | 0 | } |
890 | 1 | return search(key, (unsigned int)strlen(key)); |
891 | 1 | } |
892 | | |
893 | 32 | const_iterator search(const char* key, unsigned int klen) const { |
894 | 32 | if (!key || !klen) { |
895 | 0 | return end(); |
896 | 0 | } |
897 | 32 | return internalSearch(key, klen); |
898 | 32 | } |
899 | | |
900 | | // Get number of elements in object |
901 | 57 | int numElem() const { |
902 | 57 | const char* pch = payload; |
903 | 57 | const char* fence = payload + size; |
904 | | |
905 | 57 | unsigned int num = 0; |
906 | 149 | while (pch < fence) { |
907 | 92 | auto* pkey = (JsonbKeyValue*)(pch); |
908 | 92 | ++num; |
909 | 92 | pch += pkey->numPackedBytes(); |
910 | 92 | } |
911 | | |
912 | 57 | assert(pch == fence); |
913 | | |
914 | 57 | return num; |
915 | 57 | } |
916 | | |
917 | | // find the JSONB value by a key string (null terminated) |
918 | 1 | const JsonbValue* find(const char* key) const { |
919 | 1 | if (!key) { |
920 | 0 | return nullptr; |
921 | 0 | } |
922 | 1 | return find(key, (unsigned int)strlen(key)); |
923 | 1 | } |
924 | | |
925 | | // find the JSONB value by a key string (with length) |
926 | 30 | const JsonbValue* find(const char* key, unsigned int klen) const { |
927 | 30 | const_iterator kv = search(key, klen); |
928 | 30 | if (end() == kv) { |
929 | 2 | return nullptr; |
930 | 2 | } |
931 | 28 | return kv->value(); |
932 | 30 | } |
933 | | |
934 | 1.69k | const_iterator begin() const { return const_iterator((pointer)payload); } |
935 | | |
936 | 18.7k | const_iterator end() const { return const_iterator((pointer)(payload + size)); } |
937 | | |
938 | | std::vector<std::pair<StringRef, const JsonbValue*>> get_ordered_key_value_pairs() const; |
939 | | |
940 | | private: |
941 | 32 | const_iterator internalSearch(const char* key, unsigned int klen) const { |
942 | 32 | const char* pch = payload; |
943 | 32 | const char* fence = payload + size; |
944 | | |
945 | 42 | while (pch < fence) { |
946 | 40 | const auto* pkey = (const JsonbKeyValue*)(pch); |
947 | 40 | if (klen == pkey->klen() && strncmp(key, pkey->getKeyStr(), klen) == 0) { |
948 | 30 | return const_iterator(pkey); |
949 | 30 | } |
950 | 10 | pch += pkey->numPackedBytes(); |
951 | 10 | } |
952 | | |
953 | 32 | assert(pch == fence); |
954 | | |
955 | 2 | return end(); |
956 | 2 | } |
957 | | }; |
958 | | |
959 | | /* |
960 | | * Array type |
961 | | */ |
962 | | struct ArrayVal : public ContainerVal { |
963 | | using value_type = JsonbValue; |
964 | | using pointer = value_type*; |
965 | | using const_pointer = const value_type*; |
966 | | using const_iterator = JsonbFwdIteratorT<const_pointer, ArrayVal>; |
967 | | |
968 | | // get the JSONB value at index |
969 | 64 | const JsonbValue* get(int idx) const { |
970 | 64 | if (idx < 0) { |
971 | 0 | return nullptr; |
972 | 0 | } |
973 | | |
974 | 64 | const char* pch = payload; |
975 | 64 | const char* fence = payload + size; |
976 | | |
977 | 119 | while (pch < fence && idx-- > 0) { |
978 | 55 | pch += ((const JsonbValue*)pch)->numPackedBytes(); |
979 | 55 | } |
980 | 64 | if (idx > 0 || pch == fence) { |
981 | 7 | return nullptr; |
982 | 7 | } |
983 | | |
984 | 57 | return (const JsonbValue*)pch; |
985 | 64 | } |
986 | | |
987 | | // Get number of elements in array |
988 | 28 | int numElem() const { |
989 | 28 | const char* pch = payload; |
990 | 28 | const char* fence = payload + size; |
991 | | |
992 | 28 | unsigned int num = 0; |
993 | 91 | while (pch < fence) { |
994 | 63 | ++num; |
995 | 63 | pch += ((const JsonbValue*)pch)->numPackedBytes(); |
996 | 63 | } |
997 | | |
998 | 28 | assert(pch == fence); |
999 | | |
1000 | 28 | return num; |
1001 | 28 | } |
1002 | | |
1003 | 74 | const_iterator begin() const { return const_iterator((pointer)payload); } |
1004 | | |
1005 | 73 | const_iterator end() const { return const_iterator((pointer)(payload + size)); } |
1006 | | }; |
1007 | | |
1008 | | namespace jsonb_detail { |
1009 | | |
1010 | | struct JsonbScaledDecimal { |
1011 | | wide::Int256 value; |
1012 | | uint32_t scale; |
1013 | | }; |
1014 | | |
1015 | 26 | inline void validate_decimal_scale(uint32_t scale) { |
1016 | 26 | if (scale > static_cast<uint32_t>(BeConsts::MAX_DECIMALV3_SCALE)) { |
1017 | 2 | throw Exception(ErrorCode::INTERNAL_ERROR, |
1018 | 2 | "Invalid JSONB decimal scale: {}, max allowed scale: {}", scale, |
1019 | 2 | BeConsts::MAX_DECIMALV3_SCALE); |
1020 | 2 | } |
1021 | 26 | } |
1022 | | |
1023 | 39 | inline bool is_numeric(const JsonbValue* value) { |
1024 | 39 | return value->isInt() || value->isDouble() || value->isFloat() || value->isDecimal(); |
1025 | 39 | } |
1026 | | |
1027 | 10 | inline double floating_value(const JsonbValue* value) { |
1028 | 10 | if (value->isDouble()) { |
1029 | 10 | return value->unpack<JsonbDoubleVal>()->val(); |
1030 | 10 | } |
1031 | 0 | return value->unpack<JsonbFloatVal>()->val(); |
1032 | 10 | } |
1033 | | |
1034 | 20 | inline JsonbScaledDecimal get_scaled_decimal(const JsonbValue* value) { |
1035 | 20 | switch (value->type) { |
1036 | 12 | case JsonbType::T_Decimal32: { |
1037 | 12 | const auto* decimal = value->unpack<JsonbDecimal32>(); |
1038 | 12 | validate_decimal_scale(decimal->scale); |
1039 | 12 | return {wide::Int256(decimal->val()), decimal->scale}; |
1040 | 0 | } |
1041 | 2 | case JsonbType::T_Decimal64: { |
1042 | 2 | const auto* decimal = value->unpack<JsonbDecimal64>(); |
1043 | 2 | validate_decimal_scale(decimal->scale); |
1044 | 2 | return {wide::Int256(decimal->val()), decimal->scale}; |
1045 | 0 | } |
1046 | 6 | case JsonbType::T_Decimal128: { |
1047 | 6 | const auto* decimal = value->unpack<JsonbDecimal128>(); |
1048 | 6 | validate_decimal_scale(decimal->scale); |
1049 | 6 | return {wide::Int256(decimal->val()), decimal->scale}; |
1050 | 0 | } |
1051 | 0 | case JsonbType::T_Decimal256: { |
1052 | 0 | const auto* decimal = value->unpack<JsonbDecimal256>(); |
1053 | 0 | validate_decimal_scale(decimal->scale); |
1054 | 0 | return {decimal->val(), decimal->scale}; |
1055 | 0 | } |
1056 | 0 | default: |
1057 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB decimal value type: {}", |
1058 | 0 | static_cast<int32_t>(value->type)); |
1059 | 20 | } |
1060 | 20 | } |
1061 | | |
1062 | | inline bool scaled_decimal_equal_decimal(const JsonbScaledDecimal& lhs, |
1063 | 4 | const JsonbScaledDecimal& rhs) { |
1064 | 4 | if (lhs.scale == rhs.scale) { |
1065 | 0 | return lhs.value == rhs.value; |
1066 | 0 | } |
1067 | | |
1068 | 4 | if (lhs.scale < rhs.scale) { |
1069 | 2 | const auto scale_multiplier = decimal_scale_multiplier<wide::Int256>(rhs.scale - lhs.scale); |
1070 | 2 | return rhs.value % scale_multiplier == 0 && lhs.value == rhs.value / scale_multiplier; |
1071 | 2 | } |
1072 | | |
1073 | 2 | const auto scale_multiplier = decimal_scale_multiplier<wide::Int256>(lhs.scale - rhs.scale); |
1074 | 2 | return lhs.value % scale_multiplier == 0 && lhs.value / scale_multiplier == rhs.value; |
1075 | 4 | } |
1076 | | |
1077 | 4 | inline bool scaled_decimal_equal_integer(const JsonbScaledDecimal& decimal, int128_t integer) { |
1078 | 4 | const auto integer_value = wide::Int256(integer); |
1079 | 4 | if (decimal.scale == 0) { |
1080 | 0 | return decimal.value == integer_value; |
1081 | 0 | } |
1082 | | |
1083 | 4 | const auto scale_multiplier = decimal_scale_multiplier<wide::Int256>(decimal.scale); |
1084 | 4 | return decimal.value % scale_multiplier == 0 && |
1085 | 4 | decimal.value / scale_multiplier == integer_value; |
1086 | 4 | } |
1087 | | |
1088 | | inline constexpr auto kPowersOfFive = [] { |
1089 | | std::array<wide::Int256, BeConsts::MAX_DECIMALV3_SCALE + 1> powers {}; |
1090 | | powers[0] = 1; |
1091 | | for (size_t i = 1; i < powers.size(); ++i) { |
1092 | | powers[i] = powers[i - 1] * 5; |
1093 | | } |
1094 | | return powers; |
1095 | | }(); |
1096 | | |
1097 | 6 | inline wide::Int256 power_of_five(uint32_t exponent) { |
1098 | 6 | validate_decimal_scale(exponent); |
1099 | 6 | return kPowersOfFive[exponent]; |
1100 | 6 | } |
1101 | | |
1102 | 6 | inline bool scaled_binary_equal(wide::Int256 value, int exponent, wide::Int256 significand) { |
1103 | 6 | if (exponent < 0) { |
1104 | 4 | const int divisor_exponent = -exponent; |
1105 | 4 | if (divisor_exponent >= std::numeric_limits<int64_t>::digits) { |
1106 | 0 | return false; |
1107 | 0 | } |
1108 | 4 | const auto divisor = wide::Int256(1) << divisor_exponent; |
1109 | 4 | return significand % divisor == 0 && value == significand / divisor; |
1110 | 4 | } |
1111 | 2 | constexpr int max_positive_int256_shift = std::numeric_limits<wide::Int256>::digits; |
1112 | | // wide::Int256 is signed, so shifting 1 by 255 reaches the sign bit. |
1113 | 2 | if (exponent >= max_positive_int256_shift) { |
1114 | 0 | return false; |
1115 | 0 | } |
1116 | 2 | const auto multiplier = wide::Int256(1) << exponent; |
1117 | 2 | return value % multiplier == 0 && value / multiplier == significand; |
1118 | 2 | } |
1119 | | |
1120 | 4 | inline bool floating_equal_integer(const JsonbValue* floating, int128_t integer) { |
1121 | 4 | const double value = floating_value(floating); |
1122 | 4 | int exponent = 0; |
1123 | 4 | std::frexp(value, &exponent); |
1124 | 4 | if (!std::isfinite(value) || std::trunc(value) != value) { |
1125 | 1 | return false; |
1126 | 1 | } |
1127 | 3 | if (exponent >= 128) { |
1128 | 0 | return value == -std::ldexp(1.0, 127) && integer == std::numeric_limits<int128_t>::min(); |
1129 | 0 | } |
1130 | 3 | if (exponent <= -1) { |
1131 | 0 | return false; |
1132 | 0 | } |
1133 | 3 | return static_cast<int128_t>(value) == integer; |
1134 | 3 | } |
1135 | | |
1136 | 6 | inline bool floating_equal_decimal(const JsonbValue* floating, const JsonbScaledDecimal& decimal) { |
1137 | 6 | const double value = floating_value(floating); |
1138 | 6 | if (!std::isfinite(value)) { |
1139 | 0 | return false; |
1140 | 0 | } |
1141 | 6 | if (value == 0) { |
1142 | 0 | return decimal.value == 0; |
1143 | 0 | } |
1144 | | |
1145 | 6 | int exponent = 0; |
1146 | 6 | const double significand_fraction = std::frexp(value, &exponent); |
1147 | 6 | const double significand_double = |
1148 | 6 | std::ldexp(significand_fraction, std::numeric_limits<double>::digits); |
1149 | 6 | auto significand = wide::Int256(static_cast<int64_t>(significand_double)); |
1150 | 6 | exponent -= std::numeric_limits<double>::digits; |
1151 | | |
1152 | 6 | const auto five_multiplier = power_of_five(decimal.scale); |
1153 | 6 | if (decimal.value % five_multiplier != 0) { |
1154 | 0 | return false; |
1155 | 0 | } |
1156 | 6 | const auto binary_scaled_decimal = decimal.value / five_multiplier; |
1157 | 6 | return scaled_binary_equal(binary_scaled_decimal, exponent + decimal.scale, significand); |
1158 | 6 | } |
1159 | | |
1160 | 39 | inline bool numeric_equal(const JsonbValue* lhs, const JsonbValue* rhs) { |
1161 | 39 | if (!is_numeric(rhs)) { |
1162 | 2 | return false; |
1163 | 2 | } |
1164 | | |
1165 | 37 | if ((lhs->isDouble() || lhs->isFloat()) && rhs->isInt()) { |
1166 | 1 | return floating_equal_integer(lhs, rhs->int_val()); |
1167 | 1 | } |
1168 | | |
1169 | 36 | if ((rhs->isDouble() || rhs->isFloat()) && lhs->isInt()) { |
1170 | 3 | return floating_equal_integer(rhs, lhs->int_val()); |
1171 | 3 | } |
1172 | | |
1173 | 33 | if ((lhs->isDouble() || lhs->isFloat()) && rhs->isDecimal()) { |
1174 | 4 | return floating_equal_decimal(lhs, get_scaled_decimal(rhs)); |
1175 | 4 | } |
1176 | | |
1177 | 29 | if ((rhs->isDouble() || rhs->isFloat()) && lhs->isDecimal()) { |
1178 | 4 | return floating_equal_decimal(rhs, get_scaled_decimal(lhs)); |
1179 | 4 | } |
1180 | | |
1181 | 25 | if (lhs->isDouble() || lhs->isFloat()) { |
1182 | 0 | return (rhs->isDouble() || rhs->isFloat()) && floating_value(lhs) == floating_value(rhs); |
1183 | 0 | } |
1184 | | |
1185 | 25 | if (lhs->isDecimal()) { |
1186 | 6 | const auto lhs_decimal = get_scaled_decimal(lhs); |
1187 | 6 | if (rhs->isDecimal()) { |
1188 | 4 | return scaled_decimal_equal_decimal(lhs_decimal, get_scaled_decimal(rhs)); |
1189 | 4 | } |
1190 | 2 | return scaled_decimal_equal_integer(lhs_decimal, rhs->int_val()); |
1191 | 6 | } |
1192 | | |
1193 | 19 | if (rhs->isDecimal()) { |
1194 | 2 | return scaled_decimal_equal_integer(get_scaled_decimal(rhs), lhs->int_val()); |
1195 | 2 | } |
1196 | | |
1197 | 17 | return lhs->int_val() == rhs->int_val(); |
1198 | 19 | } |
1199 | | |
1200 | 14 | inline bool array_contains_value(const ArrayVal* target_array, const JsonbValue* candidate) { |
1201 | 14 | const int target_num = target_array->numElem(); |
1202 | 25 | for (int i = 0; i < target_num; ++i) { |
1203 | 22 | if (target_array->get(i)->contains(candidate)) { |
1204 | 11 | return true; |
1205 | 11 | } |
1206 | 22 | } |
1207 | 3 | return false; |
1208 | 14 | } |
1209 | | |
1210 | 7 | inline bool array_contains_array(const ArrayVal* target_array, const ArrayVal* candidate_array) { |
1211 | 7 | const int candidate_num = candidate_array->numElem(); |
1212 | 17 | for (int i = 0; i < candidate_num; ++i) { |
1213 | 12 | if (!array_contains_value(target_array, candidate_array->get(i))) { |
1214 | 2 | return false; |
1215 | 2 | } |
1216 | 12 | } |
1217 | 5 | return true; |
1218 | 7 | } |
1219 | | |
1220 | | } // namespace jsonb_detail |
1221 | | |
1222 | 12 | inline const JsonbValue* JsonbDocument::createValue(const char* pb, size_t size) { |
1223 | 12 | if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) { |
1224 | 0 | return nullptr; |
1225 | 0 | } |
1226 | | |
1227 | 12 | auto* doc = (JsonbDocument*)pb; |
1228 | 12 | if (doc->header_.ver_ != JSONB_VER) { |
1229 | 0 | return nullptr; |
1230 | 0 | } |
1231 | | |
1232 | 12 | const auto* val = (const JsonbValue*)doc->payload_; |
1233 | | // Same as checkAndCreateDocument(), this is intentionally a lightweight structural check for |
1234 | | // hot paths. Do not recursively validate container bodies here unless the caller is a clearly |
1235 | | // untrusted raw binary boundary and accepts the O(document size) cost. |
1236 | 12 | if (size != sizeof(JsonbHeader) + val->numPackedBytes()) { |
1237 | 0 | return nullptr; |
1238 | 0 | } |
1239 | | |
1240 | 12 | return val; |
1241 | 12 | } |
1242 | | |
1243 | 0 | inline unsigned int JsonbDocument::numPackedBytes() const { |
1244 | 0 | return ((const JsonbValue*)payload_)->numPackedBytes() + sizeof(header_); |
1245 | 0 | } |
1246 | | |
1247 | 17.7k | inline unsigned int JsonbKeyValue::numPackedBytes() const { |
1248 | 17.7k | unsigned int ks = keyPackedBytes(); |
1249 | 17.7k | const auto* val = (const JsonbValue*)(((char*)this) + ks); |
1250 | 17.7k | return ks + val->numPackedBytes(); |
1251 | 17.7k | } |
1252 | | |
1253 | | // Poor man's "virtual" function JsonbValue::numPackedBytes |
1254 | 71.3k | inline unsigned int JsonbValue::numPackedBytes() const { |
1255 | 71.3k | switch (type) { |
1256 | 2.77k | case JsonbType::T_Null: |
1257 | 12.3k | case JsonbType::T_True: |
1258 | 12.9k | case JsonbType::T_False: { |
1259 | 12.9k | return sizeof(type); |
1260 | 12.3k | } |
1261 | | |
1262 | 854 | case JsonbType::T_Int8: { |
1263 | 854 | return sizeof(type) + sizeof(int8_t); |
1264 | 12.3k | } |
1265 | 125 | case JsonbType::T_Int16: { |
1266 | 125 | return sizeof(type) + sizeof(int16_t); |
1267 | 12.3k | } |
1268 | 3.50k | case JsonbType::T_Int32: { |
1269 | 3.50k | return sizeof(type) + sizeof(int32_t); |
1270 | 12.3k | } |
1271 | 12.1k | case JsonbType::T_Int64: { |
1272 | 12.1k | return sizeof(type) + sizeof(int64_t); |
1273 | 12.3k | } |
1274 | 10.6k | case JsonbType::T_Double: { |
1275 | 10.6k | return sizeof(type) + sizeof(double); |
1276 | 12.3k | } |
1277 | 29 | case JsonbType::T_Float: { |
1278 | 29 | return sizeof(type) + sizeof(float); |
1279 | 12.3k | } |
1280 | 13.8k | case JsonbType::T_Int128: { |
1281 | 13.8k | return sizeof(type) + sizeof(int128_t); |
1282 | 12.3k | } |
1283 | 10.8k | case JsonbType::T_String: |
1284 | 15.2k | case JsonbType::T_Binary: { |
1285 | 15.2k | return unpack<JsonbBinaryVal>()->numPackedBytes(); |
1286 | 10.8k | } |
1287 | | |
1288 | 1.77k | case JsonbType::T_Object: |
1289 | 1.96k | case JsonbType::T_Array: { |
1290 | 1.96k | return unpack<ContainerVal>()->numPackedBytes(); |
1291 | 1.77k | } |
1292 | 19 | case JsonbType::T_Decimal32: { |
1293 | 19 | return JsonbDecimal32::numPackedBytes(); |
1294 | 1.77k | } |
1295 | 9 | case JsonbType::T_Decimal64: { |
1296 | 9 | return JsonbDecimal64::numPackedBytes(); |
1297 | 1.77k | } |
1298 | 15 | case JsonbType::T_Decimal128: { |
1299 | 15 | return JsonbDecimal128::numPackedBytes(); |
1300 | 1.77k | } |
1301 | 6 | case JsonbType::T_Decimal256: { |
1302 | 6 | return JsonbDecimal256::numPackedBytes(); |
1303 | 1.77k | } |
1304 | 0 | case JsonbType::NUM_TYPES: |
1305 | 0 | break; |
1306 | 71.3k | } |
1307 | | |
1308 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
1309 | 0 | static_cast<int32_t>(type)); |
1310 | 71.3k | } |
1311 | | |
1312 | 6 | inline int JsonbValue::numElements() const { |
1313 | 6 | switch (type) { |
1314 | 0 | case JsonbType::T_Int8: |
1315 | 0 | case JsonbType::T_Int16: |
1316 | 0 | case JsonbType::T_Int32: |
1317 | 0 | case JsonbType::T_Int64: |
1318 | 0 | case JsonbType::T_Double: |
1319 | 0 | case JsonbType::T_Float: |
1320 | 0 | case JsonbType::T_Int128: |
1321 | 1 | case JsonbType::T_String: |
1322 | 1 | case JsonbType::T_Binary: |
1323 | 2 | case JsonbType::T_Null: |
1324 | 2 | case JsonbType::T_True: |
1325 | 2 | case JsonbType::T_False: |
1326 | 2 | case JsonbType::T_Decimal32: |
1327 | 2 | case JsonbType::T_Decimal64: |
1328 | 2 | case JsonbType::T_Decimal128: |
1329 | 2 | case JsonbType::T_Decimal256: { |
1330 | 2 | return 1; |
1331 | 2 | } |
1332 | 0 | case JsonbType::T_Object: { |
1333 | 0 | return unpack<ObjectVal>()->numElem(); |
1334 | 2 | } |
1335 | 4 | case JsonbType::T_Array: { |
1336 | 4 | return unpack<ArrayVal>()->numElem(); |
1337 | 2 | } |
1338 | 0 | case JsonbType::NUM_TYPES: |
1339 | 0 | break; |
1340 | 6 | } |
1341 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
1342 | 0 | static_cast<int32_t>(type)); |
1343 | 6 | } |
1344 | | |
1345 | 52 | inline bool JsonbValue::contains(const JsonbValue* rhs) const { |
1346 | 52 | switch (type) { |
1347 | 23 | case JsonbType::T_Int8: |
1348 | 23 | case JsonbType::T_Int16: |
1349 | 23 | case JsonbType::T_Int32: |
1350 | 23 | case JsonbType::T_Int64: |
1351 | 24 | case JsonbType::T_Int128: |
1352 | 29 | case JsonbType::T_Double: |
1353 | 29 | case JsonbType::T_Float: |
1354 | 35 | case JsonbType::T_Decimal32: |
1355 | 36 | case JsonbType::T_Decimal64: |
1356 | 39 | case JsonbType::T_Decimal128: |
1357 | 39 | case JsonbType::T_Decimal256: { |
1358 | 39 | return jsonb_detail::numeric_equal(this, rhs); |
1359 | 39 | } |
1360 | 1 | case JsonbType::T_String: |
1361 | 1 | case JsonbType::T_Binary: { |
1362 | 1 | if (rhs->isString() || rhs->isBinary()) { |
1363 | 1 | const auto* str_value1 = unpack<JsonbStringVal>(); |
1364 | 1 | const auto* str_value2 = rhs->unpack<JsonbStringVal>(); |
1365 | 1 | return str_value1->length() == str_value2->length() && |
1366 | 1 | std::memcmp(str_value1->getBlob(), str_value2->getBlob(), |
1367 | 1 | str_value1->length()) == 0; |
1368 | 1 | } |
1369 | 0 | return false; |
1370 | 1 | } |
1371 | 9 | case JsonbType::T_Array: { |
1372 | 9 | const auto* lhs_array = unpack<ArrayVal>(); |
1373 | 9 | if (rhs->isArray()) { |
1374 | 7 | return jsonb_detail::array_contains_array(lhs_array, rhs->unpack<ArrayVal>()); |
1375 | 7 | } |
1376 | 2 | return jsonb_detail::array_contains_value(lhs_array, rhs); |
1377 | 9 | } |
1378 | 3 | case JsonbType::T_Object: { |
1379 | 3 | if (rhs->isObject()) { |
1380 | 2 | const auto* obj_value1 = unpack<ObjectVal>(); |
1381 | 2 | const auto* obj_value2 = rhs->unpack<ObjectVal>(); |
1382 | 3 | for (auto it = obj_value2->begin(); it != obj_value2->end(); ++it) { |
1383 | 2 | const JsonbValue* value = obj_value1->find(it->getKeyStr(), it->klen()); |
1384 | 2 | if (value == nullptr || !value->contains(it->value())) { |
1385 | 1 | return false; |
1386 | 1 | } |
1387 | 2 | } |
1388 | 1 | return true; |
1389 | 2 | } |
1390 | 1 | return false; |
1391 | 3 | } |
1392 | 0 | case JsonbType::T_Null: { |
1393 | 0 | return rhs->isNull(); |
1394 | 3 | } |
1395 | 0 | case JsonbType::T_True: { |
1396 | 0 | return rhs->isTrue(); |
1397 | 3 | } |
1398 | 0 | case JsonbType::T_False: { |
1399 | 0 | return rhs->isFalse(); |
1400 | 3 | } |
1401 | 0 | case JsonbType::NUM_TYPES: |
1402 | 0 | break; |
1403 | 52 | } |
1404 | | |
1405 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
1406 | 0 | static_cast<int32_t>(type)); |
1407 | 52 | } |
1408 | | |
1409 | 99 | inline bool JsonbPath::seek(const char* key_path, size_t kp_len) { |
1410 | 99 | while (kp_len > 0 && std::isspace(key_path[kp_len - 1])) { |
1411 | 0 | --kp_len; |
1412 | 0 | } |
1413 | | |
1414 | | //path invalid |
1415 | 99 | if (!key_path || kp_len == 0) { |
1416 | 0 | return false; |
1417 | 0 | } |
1418 | 99 | Stream stream(key_path, kp_len); |
1419 | 99 | stream.skip_whitespace(); |
1420 | 99 | if (stream.exhausted() || stream.read() != SCOPE) { |
1421 | | //path invalid |
1422 | 0 | return false; |
1423 | 0 | } |
1424 | | |
1425 | 192 | while (!stream.exhausted()) { |
1426 | 93 | stream.skip_whitespace(); |
1427 | 93 | stream.clear_leg_ptr(); |
1428 | 93 | stream.clear_leg_len(); |
1429 | | |
1430 | 93 | if (!JsonbPath::parsePath(&stream, this)) { |
1431 | | //path invalid |
1432 | 0 | return false; |
1433 | 0 | } |
1434 | 93 | } |
1435 | 99 | return true; |
1436 | 99 | } |
1437 | | |
1438 | 93 | inline bool JsonbPath::parsePath(Stream* stream, JsonbPath* path) { |
1439 | | // $[0] |
1440 | 93 | if (stream->peek() == BEGIN_ARRAY) { |
1441 | 58 | return parse_array(stream, path); |
1442 | 58 | } |
1443 | | // $.a or $.[0] |
1444 | | // Keep $.[0] for backward compatibility: although the dot before an array |
1445 | | // leg is non-standard, existing JSONB users may rely on it. |
1446 | 35 | else if (stream->peek() == BEGIN_MEMBER) { |
1447 | | // advance past the . |
1448 | 33 | stream->skip(1); |
1449 | | |
1450 | 33 | if (stream->exhausted()) { |
1451 | 0 | return false; |
1452 | 0 | } |
1453 | | |
1454 | | // $.[0] |
1455 | 33 | if (stream->peek() == BEGIN_ARRAY) { |
1456 | 0 | return parse_array(stream, path); |
1457 | 0 | } |
1458 | | // $.a |
1459 | 33 | else { |
1460 | 33 | return parse_member(stream, path); |
1461 | 33 | } |
1462 | 33 | } else if (stream->peek() == WILDCARD) { |
1463 | 2 | stream->skip(1); |
1464 | 2 | if (stream->exhausted()) { |
1465 | 0 | return false; |
1466 | 0 | } |
1467 | | |
1468 | | // $** |
1469 | 2 | if (stream->peek() == WILDCARD) { |
1470 | 2 | path->_is_supper_wildcard = true; |
1471 | 2 | } |
1472 | | |
1473 | 2 | stream->skip(1); |
1474 | 2 | if (stream->exhausted()) { |
1475 | 0 | return false; |
1476 | 0 | } |
1477 | | |
1478 | 2 | if (stream->peek() == BEGIN_ARRAY) { |
1479 | 0 | return parse_array(stream, path); |
1480 | 2 | } else if (stream->peek() == BEGIN_MEMBER) { |
1481 | | // advance past the . |
1482 | 2 | stream->skip(1); |
1483 | | |
1484 | 2 | if (stream->exhausted()) { |
1485 | 0 | return false; |
1486 | 0 | } |
1487 | | |
1488 | | // $**.[0] |
1489 | | // Keep the dot-array form compatible with the root path behavior. |
1490 | 2 | if (stream->peek() == BEGIN_ARRAY) { |
1491 | 0 | return parse_array(stream, path); |
1492 | 0 | } |
1493 | | // $.a |
1494 | 2 | else { |
1495 | 2 | return parse_member(stream, path); |
1496 | 2 | } |
1497 | 2 | } |
1498 | 0 | return false; |
1499 | 2 | } else { |
1500 | 0 | return false; //invalid json path |
1501 | 0 | } |
1502 | 93 | } |
1503 | | |
1504 | 58 | inline bool JsonbPath::parse_array(Stream* stream, JsonbPath* path) { |
1505 | 58 | assert(stream->peek() == BEGIN_ARRAY); |
1506 | 58 | stream->skip(1); |
1507 | 58 | if (stream->exhausted()) { |
1508 | 0 | return false; |
1509 | 0 | } |
1510 | | |
1511 | 58 | if (stream->peek() == WILDCARD) { |
1512 | | // Called by function_jsonb.cpp, the variables passed in originate from a mutable block; |
1513 | | // using const_cast is acceptable. |
1514 | 0 | stream->set_leg_ptr(const_cast<char*>(stream->position())); |
1515 | 0 | stream->add_leg_len(); |
1516 | 0 | stream->skip(1); |
1517 | 0 | if (stream->exhausted()) { |
1518 | 0 | return false; |
1519 | 0 | } |
1520 | | |
1521 | 0 | if (stream->peek() == END_ARRAY) { |
1522 | 0 | std::unique_ptr<leg_info> leg( |
1523 | 0 | new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, ARRAY_CODE)); |
1524 | 0 | path->add_leg_to_leg_vector(std::move(leg)); |
1525 | 0 | stream->skip(1); |
1526 | 0 | path->_is_wildcard = true; |
1527 | 0 | return true; |
1528 | 0 | } else { |
1529 | 0 | return false; |
1530 | 0 | } |
1531 | 0 | } |
1532 | | |
1533 | | // Called by function_jsonb.cpp, the variables passed in originate from a mutable block; |
1534 | | // using const_cast is acceptable. |
1535 | 58 | stream->set_leg_ptr(const_cast<char*>(stream->position())); |
1536 | | |
1537 | 116 | for (; !stream->exhausted() && stream->peek() != END_ARRAY; stream->advance()) { |
1538 | 58 | stream->add_leg_len(); |
1539 | 58 | } |
1540 | | |
1541 | 58 | if (stream->exhausted() || stream->peek() != END_ARRAY) { |
1542 | 0 | return false; |
1543 | 58 | } else { |
1544 | 58 | stream->skip(1); |
1545 | 58 | } |
1546 | | |
1547 | | //parse array index to int |
1548 | | |
1549 | 58 | std::string_view idx_string(stream->get_leg_ptr(), stream->get_leg_len()); |
1550 | 58 | int index = 0; |
1551 | | |
1552 | | // Match "last" case-insensitively for compatibility with existing JSONB |
1553 | | // paths such as [Last] and [LAST]. |
1554 | 58 | if (stream->get_leg_len() >= 4 && |
1555 | 58 | std::equal(LAST, LAST + 4, stream->get_leg_ptr(), |
1556 | 0 | [](char c1, char c2) { return std::tolower(c1) == std::tolower(c2); })) { |
1557 | 0 | auto pos = idx_string.find(MINUS); |
1558 | |
|
1559 | 0 | if (pos != std::string::npos) { |
1560 | 0 | for (size_t i = 4; i < pos; ++i) { |
1561 | 0 | if (std::isspace(idx_string[i])) { |
1562 | 0 | continue; |
1563 | 0 | } else { |
1564 | | // leading zeroes are not allowed |
1565 | 0 | LOG(WARNING) << "Non-space char in idx_string: '" << idx_string << "'"; |
1566 | 0 | return false; |
1567 | 0 | } |
1568 | 0 | } |
1569 | 0 | idx_string = idx_string.substr(pos + 1); |
1570 | 0 | idx_string = trim(idx_string); |
1571 | | |
1572 | | // Keep numeric-prefix parsing for last-N offsets as existing JSONB |
1573 | | // path behavior. |
1574 | 0 | auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(), |
1575 | 0 | index); |
1576 | 0 | if (result.ec != std::errc()) { |
1577 | 0 | LOG(WARNING) << "Invalid index in JSON path: '" << idx_string << "'"; |
1578 | 0 | return false; |
1579 | 0 | } |
1580 | |
|
1581 | 0 | } else if (stream->get_leg_len() > 4) { |
1582 | 0 | return false; |
1583 | 0 | } |
1584 | | |
1585 | 0 | std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, -index - 1, ARRAY_CODE)); |
1586 | 0 | path->add_leg_to_leg_vector(std::move(leg)); |
1587 | |
|
1588 | 0 | return true; |
1589 | 0 | } |
1590 | | |
1591 | | // Preserve legacy numeric-prefix parsing for array indexes. std::from_chars |
1592 | | // may stop before the end (for example [1.5] is parsed as index 1), and |
1593 | | // current JSONB path semantics treat that as supported behavior. |
1594 | 58 | auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(), index); |
1595 | | |
1596 | 58 | if (result.ec != std::errc()) { |
1597 | 0 | return false; |
1598 | 0 | } |
1599 | | |
1600 | 58 | std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, index, ARRAY_CODE)); |
1601 | 58 | path->add_leg_to_leg_vector(std::move(leg)); |
1602 | | |
1603 | 58 | return true; |
1604 | 58 | } |
1605 | | |
1606 | 35 | inline bool JsonbPath::parse_member(Stream* stream, JsonbPath* path) { |
1607 | 35 | if (stream->exhausted()) { |
1608 | 0 | return false; |
1609 | 0 | } |
1610 | | |
1611 | 35 | if (stream->peek() == WILDCARD) { |
1612 | | // Called by function_jsonb.cpp, the variables passed in originate from a mutable block; |
1613 | | // using const_cast is acceptable. |
1614 | 0 | stream->set_leg_ptr(const_cast<char*>(stream->position())); |
1615 | 0 | stream->add_leg_len(); |
1616 | 0 | stream->skip(1); |
1617 | 0 | std::unique_ptr<leg_info> leg( |
1618 | 0 | new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE)); |
1619 | 0 | path->add_leg_to_leg_vector(std::move(leg)); |
1620 | 0 | path->_is_wildcard = true; |
1621 | 0 | return true; |
1622 | 0 | } |
1623 | | |
1624 | | // Called by function_jsonb.cpp, the variables passed in originate from a mutable block; |
1625 | | // using const_cast is acceptable. |
1626 | 35 | stream->set_leg_ptr(const_cast<char*>(stream->position())); |
1627 | | |
1628 | 35 | const char* left_quotation_marks = nullptr; |
1629 | 35 | const char* right_quotation_marks = nullptr; |
1630 | | |
1631 | 102 | for (; !stream->exhausted(); stream->advance()) { |
1632 | | // Only accept space characters quoted by double quotes. |
1633 | 67 | if (std::isspace(stream->peek()) && left_quotation_marks == nullptr) { |
1634 | 0 | return false; |
1635 | 67 | } else if (stream->peek() == ESCAPE) { |
1636 | 0 | stream->add_leg_len(); |
1637 | 0 | stream->skip(1); |
1638 | 0 | stream->add_leg_len(); |
1639 | 0 | stream->set_has_escapes(true); |
1640 | 0 | if (stream->exhausted()) { |
1641 | 0 | return false; |
1642 | 0 | } |
1643 | 0 | continue; |
1644 | 67 | } else if (stream->peek() == DOUBLE_QUOTE) { |
1645 | 0 | if (left_quotation_marks == nullptr) { |
1646 | 0 | left_quotation_marks = stream->position(); |
1647 | | // Called by function_jsonb.cpp, the variables passed in originate from a mutable block; |
1648 | | // using const_cast is acceptable. |
1649 | 0 | stream->set_leg_ptr(const_cast<char*>(++left_quotation_marks)); |
1650 | 0 | continue; |
1651 | 0 | } else { |
1652 | 0 | right_quotation_marks = stream->position(); |
1653 | 0 | stream->skip(1); |
1654 | 0 | break; |
1655 | 0 | } |
1656 | 67 | } else if (stream->peek() == BEGIN_MEMBER || stream->peek() == BEGIN_ARRAY) { |
1657 | 0 | if (left_quotation_marks == nullptr) { |
1658 | 0 | break; |
1659 | 0 | } |
1660 | 0 | } |
1661 | | |
1662 | 67 | stream->add_leg_len(); |
1663 | 67 | } |
1664 | | |
1665 | 35 | if ((left_quotation_marks != nullptr && right_quotation_marks == nullptr) || |
1666 | 35 | stream->get_leg_ptr() == nullptr || stream->get_leg_len() == 0) { |
1667 | 0 | return false; //invalid json path |
1668 | 0 | } |
1669 | | |
1670 | 35 | if (stream->get_has_escapes()) { |
1671 | 0 | stream->remove_escapes(); |
1672 | 0 | } |
1673 | | |
1674 | 35 | std::unique_ptr<leg_info> leg( |
1675 | 35 | new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE)); |
1676 | 35 | path->add_leg_to_leg_vector(std::move(leg)); |
1677 | | |
1678 | 35 | return true; |
1679 | 35 | } |
1680 | | |
1681 | | static_assert(is_pod_v<JsonbDocument>, "JsonbDocument must be standard layout and trivial"); |
1682 | | static_assert(is_pod_v<JsonbValue>, "JsonbValue must be standard layout and trivial"); |
1683 | | static_assert(is_pod_v<JsonbDecimal32>, "JsonbDecimal32 must be standard layout and trivial"); |
1684 | | static_assert(is_pod_v<JsonbDecimal64>, "JsonbDecimal64 must be standard layout and trivial"); |
1685 | | static_assert(is_pod_v<JsonbDecimal128>, "JsonbDecimal128 must be standard layout and trivial"); |
1686 | | static_assert(is_pod_v<JsonbDecimal256>, "JsonbDecimal256 must be standard layout and trivial"); |
1687 | | static_assert(is_pod_v<JsonbInt8Val>, "JsonbInt8Val must be standard layout and trivial"); |
1688 | | static_assert(is_pod_v<JsonbInt32Val>, "JsonbInt32Val must be standard layout and trivial"); |
1689 | | static_assert(is_pod_v<JsonbInt64Val>, "JsonbInt64Val must be standard layout and trivial"); |
1690 | | static_assert(is_pod_v<JsonbInt128Val>, "JsonbInt128Val must be standard layout and trivial"); |
1691 | | static_assert(is_pod_v<JsonbDoubleVal>, "JsonbDoubleVal must be standard layout and trivial"); |
1692 | | static_assert(is_pod_v<JsonbFloatVal>, "JsonbFloatVal must be standard layout and trivial"); |
1693 | | static_assert(is_pod_v<JsonbBinaryVal>, "JsonbBinaryVal must be standard layout and trivial"); |
1694 | | static_assert(is_pod_v<ContainerVal>, "ContainerVal must be standard layout and trivial"); |
1695 | | |
1696 | | #define ASSERT_DECIMAL_LAYOUT(type) \ |
1697 | | static_assert(offsetof(type, precision) == 0); \ |
1698 | | static_assert(offsetof(type, scale) == 4); \ |
1699 | | static_assert(offsetof(type, value) == 8); |
1700 | | |
1701 | | ASSERT_DECIMAL_LAYOUT(JsonbDecimal32) |
1702 | | ASSERT_DECIMAL_LAYOUT(JsonbDecimal64) |
1703 | | ASSERT_DECIMAL_LAYOUT(JsonbDecimal128) |
1704 | | ASSERT_DECIMAL_LAYOUT(JsonbDecimal256) |
1705 | | |
1706 | | #define ASSERT_NUMERIC_LAYOUT(type) static_assert(offsetof(type, num) == 0); |
1707 | | |
1708 | | ASSERT_NUMERIC_LAYOUT(JsonbInt8Val) |
1709 | | ASSERT_NUMERIC_LAYOUT(JsonbInt32Val) |
1710 | | ASSERT_NUMERIC_LAYOUT(JsonbInt64Val) |
1711 | | ASSERT_NUMERIC_LAYOUT(JsonbInt128Val) |
1712 | | ASSERT_NUMERIC_LAYOUT(JsonbDoubleVal) |
1713 | | |
1714 | | static_assert(offsetof(JsonbBinaryVal, size) == 0); |
1715 | | static_assert(offsetof(JsonbBinaryVal, payload) == 4); |
1716 | | |
1717 | | static_assert(offsetof(ContainerVal, size) == 0); |
1718 | | static_assert(offsetof(ContainerVal, payload) == 4); |
1719 | | |
1720 | | #pragma pack(pop) |
1721 | | #if defined(__clang__) |
1722 | | #pragma clang diagnostic pop |
1723 | | #endif |
1724 | | } // namespace doris |
1725 | | |
1726 | | #endif // JSONB_JSONBDOCUMENT_H |