Coverage Report

Created: 2025-10-23 12:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/jsonb_document.h
Line
Count
Source
1
/*
2
 *  Copyright (c) 2014, Facebook, Inc.
3
 *  All rights reserved.
4
 *
5
 *  This source code is licensed under the BSD-style license found in the
6
 *  LICENSE file in the root directory of this source tree. An additional grant
7
 *  of patent rights can be found in the PATENTS file in the same directory.
8
 *
9
 */
10
11
/*
12
 * This header defines JsonbDocument, JsonbKeyValue, and various value classes
13
 * which are derived from JsonbValue, and a forward iterator for container
14
 * values - essentially everything that is related to JSONB binary data
15
 * structures.
16
 *
17
 * Implementation notes:
18
 *
19
 * None of the classes in this header file can be instantiated directly (i.e.
20
 * you cannot create a JsonbKeyValue or JsonbValue object - all constructors
21
 * are declared non-public). We use the classes as wrappers on the packed JSONB
22
 * bytes (serialized), and cast the classes (types) to the underlying packed
23
 * byte array.
24
 *
25
 * For the same reason, we cannot define any JSONB value class to be virtual,
26
 * since we never call constructors, and will not instantiate vtbl and vptrs.
27
 *
28
 * Therefore, the classes are defined as packed structures (i.e. no data
29
 * alignment and padding), and the private member variables of the classes are
30
 * defined precisely in the same order as the JSONB spec. This ensures we
31
 * access the packed JSONB bytes correctly.
32
 *
33
 * The packed structures are highly optimized for in-place operations with low
34
 * overhead. The reads (and in-place writes) are performed directly on packed
35
 * bytes. There is no memory allocation at all at runtime.
36
 *
37
 * For updates/writes of values that will expand the original JSONB size, the
38
 * write will fail, and the caller needs to handle buffer increase.
39
 *
40
 * ** Iterator **
41
 * Both ObjectVal class and ArrayVal class have iterator type that you can use
42
 * to declare an iterator on a container object to go through the key-value
43
 * pairs or value list. The iterator has both non-const and const types.
44
 *
45
 * Note: iterators are forward direction only.
46
 *
47
 * ** Query **
48
 * Querying into containers is through the member functions find (for key/value
49
 * pairs) and get (for array elements), and is in streaming style. We don't
50
 * need to read/scan the whole JSONB packed bytes in order to return results.
51
 * Once the key/index is found, we will stop search.  You can use text to query
52
 * both objects and array (for array, text will be converted to integer index),
53
 * and use index to retrieve from array. Array index is 0-based.
54
 *
55
 * ** External dictionary **
56
 * During query processing, you can also pass a call-back function, so the
57
 * search will first try to check if the key string exists in the dictionary.
58
 * If so, search will be based on the id instead of the key string.
59
 * @author Tian Xia <tianx@fb.com>
60
 * 
61
 * this file is copied from 
62
 * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonDocument.h
63
 * and modified by Doris
64
 */
65
66
#ifndef JSONB_JSONBDOCUMENT_H
67
#define JSONB_JSONBDOCUMENT_H
68
69
#include <algorithm>
70
#include <cctype>
71
#include <charconv>
72
#include <cstddef>
73
#include <cstdint>
74
#include <string>
75
#include <string_view>
76
#include <type_traits>
77
78
#include "common/compiler_util.h" // IWYU pragma: keep
79
#include "common/status.h"
80
#include "runtime/define_primitive_type.h"
81
#include "util/string_util.h"
82
#include "vec/common/string_ref.h"
83
#include "vec/core/types.h"
84
85
// #include "util/string_parser.hpp"
86
87
// Concept to check for supported decimal types
88
template <typename T>
89
concept JsonbDecimalType = std::same_as<T, doris::vectorized::Decimal256> ||
90
                           std::same_as<T, doris::vectorized::Decimal64> ||
91
                           std::same_as<T, doris::vectorized::Decimal128V3> ||
92
                           std::same_as<T, doris::vectorized::Decimal32>;
93
94
namespace doris {
95
96
template <typename T>
97
constexpr bool is_pod_v = std::is_trivial_v<T> && std::is_standard_layout_v<T>;
98
99
struct JsonbStringVal;
100
struct ObjectVal;
101
struct ArrayVal;
102
struct JsonbBinaryVal;
103
struct ContainerVal;
104
105
template <JsonbDecimalType T>
106
struct JsonbDecimalVal;
107
108
using JsonbDecimal256 = JsonbDecimalVal<vectorized::Decimal256>;
109
using JsonbDecimal128 = JsonbDecimalVal<vectorized::Decimal128V3>;
110
using JsonbDecimal64 = JsonbDecimalVal<vectorized::Decimal64>;
111
using JsonbDecimal32 = JsonbDecimalVal<vectorized::Decimal32>;
112
113
template <typename T>
114
    requires std::is_integral_v<T> || std::is_floating_point_v<T>
115
struct NumberValT;
116
117
using JsonbInt8Val = NumberValT<int8_t>;
118
using JsonbInt16Val = NumberValT<int16_t>;
119
using JsonbInt32Val = NumberValT<int32_t>;
120
using JsonbInt64Val = NumberValT<int64_t>;
121
using JsonbInt128Val = NumberValT<int128_t>;
122
using JsonbDoubleVal = NumberValT<double>;
123
using JsonbFloatVal = NumberValT<float>;
124
125
template <typename T>
126
concept JsonbPodType = (std::same_as<T, JsonbStringVal> || std::same_as<T, ObjectVal> ||
127
                        std::same_as<T, ContainerVal> || std::same_as<T, ArrayVal> ||
128
                        std::same_as<T, JsonbBinaryVal> || std::same_as<T, JsonbDecimal32> ||
129
                        std::same_as<T, JsonbDecimal64> || std::same_as<T, JsonbDecimal128> ||
130
                        std::same_as<T, JsonbDecimal256> || std::same_as<T, JsonbDecimal32> ||
131
                        std::same_as<T, JsonbInt8Val> || std::same_as<T, JsonbInt16Val> ||
132
                        std::same_as<T, JsonbInt32Val> || std::same_as<T, JsonbInt64Val> ||
133
                        std::same_as<T, JsonbInt128Val> || std::same_as<T, JsonbFloatVal> ||
134
                        std::same_as<T, JsonbFloatVal> || std::same_as<T, JsonbDoubleVal>);
135
136
207k
#define JSONB_VER 1
137
138
using int128_t = __int128;
139
140
// forward declaration
141
struct JsonbValue;
142
143
class JsonbOutStream;
144
145
template <class OS_TYPE>
146
class JsonbWriterT;
147
148
using JsonbWriter = JsonbWriterT<JsonbOutStream>;
149
150
const int MaxNestingLevel = 100;
151
152
/*
153
 * JsonbType defines 10 primitive types and 2 container types, as described
154
 * below.
155
 * NOTE: Do NOT modify the existing values or their order in this enum.
156
 *      You may only append new entries at the end before `NUM_TYPES`.
157
 *      This enum will be used in serialized data and/or persisted data.
158
 *      Changing existing values may break backward compatibility
159
 *      with previously stored or transmitted data.
160
 *
161
 * primitive_value ::=
162
 *   0x00        //null value (0 byte)
163
 * | 0x01        //boolean true (0 byte)
164
 * | 0x02        //boolean false (0 byte)
165
 * | 0x03 int8   //char/int8 (1 byte)
166
 * | 0x04 int16  //int16 (2 bytes)
167
 * | 0x05 int32  //int32 (4 bytes)
168
 * | 0x06 int64  //int64 (8 bytes)
169
 * | 0x07 double //floating point (8 bytes)
170
 * | 0x08 string //variable length string
171
 * | 0x09 binary //variable length binary
172
 *
173
 * container ::=
174
 *   0x0A int32 key_value_list //object, int32 is the total bytes of the object
175
 * | 0x0B int32 value_list     //array, int32 is the total bytes of the array
176
 */
177
enum class JsonbType : char {
178
    T_Null = 0x00,
179
    T_True = 0x01,
180
    T_False = 0x02,
181
    T_Int8 = 0x03,
182
    T_Int16 = 0x04,
183
    T_Int32 = 0x05,
184
    T_Int64 = 0x06,
185
    T_Double = 0x07,
186
    T_String = 0x08,
187
    T_Binary = 0x09,
188
    T_Object = 0x0A,
189
    T_Array = 0x0B,
190
    T_Int128 = 0x0C,
191
    T_Float = 0x0D,
192
    T_Decimal32 = 0x0E,  // DecimalV3 only
193
    T_Decimal64 = 0x0F,  // DecimalV3 only
194
    T_Decimal128 = 0x10, // DecimalV3 only
195
    T_Decimal256 = 0x11, // DecimalV3 only
196
    NUM_TYPES,
197
};
198
199
11
inline PrimitiveType get_primitive_type_from_json_type(JsonbType json_type) {
200
11
    switch (json_type) {
201
1
    case JsonbType::T_Null:
202
1
        return TYPE_NULL;
203
1
    case JsonbType::T_True:
204
2
    case JsonbType::T_False:
205
2
        return TYPE_BOOLEAN;
206
0
    case JsonbType::T_Int8:
207
0
        return TYPE_TINYINT;
208
0
    case JsonbType::T_Int16:
209
0
        return TYPE_SMALLINT;
210
0
    case JsonbType::T_Int32:
211
0
        return TYPE_INT;
212
0
    case JsonbType::T_Int64:
213
0
        return TYPE_BIGINT;
214
0
    case JsonbType::T_Double:
215
0
        return TYPE_DOUBLE;
216
1
    case JsonbType::T_String:
217
1
        return TYPE_STRING;
218
0
    case JsonbType::T_Binary:
219
0
        return TYPE_BINARY;
220
0
    case JsonbType::T_Object:
221
0
        return TYPE_STRUCT;
222
1
    case JsonbType::T_Array:
223
1
        return TYPE_ARRAY;
224
1
    case JsonbType::T_Int128:
225
1
        return TYPE_LARGEINT;
226
1
    case JsonbType::T_Float:
227
1
        return TYPE_FLOAT;
228
1
    case JsonbType::T_Decimal32:
229
1
        return TYPE_DECIMAL32;
230
1
    case JsonbType::T_Decimal64:
231
1
        return TYPE_DECIMAL64;
232
1
    case JsonbType::T_Decimal128:
233
1
        return TYPE_DECIMAL128I;
234
1
    case JsonbType::T_Decimal256:
235
1
        return TYPE_DECIMAL256;
236
0
    default:
237
0
        throw Exception(ErrorCode::INTERNAL_ERROR, "Unsupported JsonbType: {}",
238
0
                        static_cast<int>(json_type));
239
11
    }
240
11
}
241
242
//for parse json path
243
constexpr char SCOPE = '$';
244
constexpr char BEGIN_MEMBER = '.';
245
constexpr char BEGIN_ARRAY = '[';
246
constexpr char END_ARRAY = ']';
247
constexpr char DOUBLE_QUOTE = '"';
248
constexpr char WILDCARD = '*';
249
constexpr char MINUS = '-';
250
constexpr char LAST[] = "last";
251
constexpr char ESCAPE = '\\';
252
constexpr unsigned int MEMBER_CODE = 0;
253
constexpr unsigned int ARRAY_CODE = 1;
254
255
/// A simple input stream class for the JSON path parser.
256
class Stream {
257
public:
258
    /// Creates an input stream reading from a character string.
259
    /// @param string  the input string
260
    /// @param length  the length of the input string
261
96
    Stream(const char* string, size_t length) : m_position(string), m_end(string + length) {}
262
263
    /// Returns a pointer to the current position in the stream.
264
90
    const char* position() const { return m_position; }
265
266
    /// Returns a pointer to the position just after the end of the stream.
267
0
    const char* end() const { return m_end; }
268
269
    /// Returns the number of bytes remaining in the stream.
270
1.71k
    size_t remaining() const {
271
1.71k
        assert(m_position <= m_end);
272
1.71k
        return m_end - m_position;
273
1.71k
    }
274
275
    /// Tells if the stream has been exhausted.
276
1.56k
    bool exhausted() const { return remaining() == 0; }
277
278
    /// Reads the next byte from the stream and moves the position forward.
279
96
    char read() {
280
96
        assert(!exhausted());
281
96
        return *m_position++;
282
96
    }
283
284
    /// Reads the next byte from the stream without moving the position forward.
285
796
    char peek() const {
286
796
        assert(!exhausted());
287
796
        return *m_position;
288
796
    }
289
290
    /// Moves the position to the next non-whitespace character.
291
334
    void skip_whitespace() {
292
334
        m_position = std::find_if_not(m_position, m_end, [](char c) { return std::isspace(c); });
293
334
    }
294
295
    /// Moves the position n bytes forward.
296
148
    void skip(size_t n) {
297
148
        assert(remaining() >= n);
298
148
        m_position += n;
299
148
        skip_whitespace();
300
148
    }
301
302
122
    void advance() { m_position++; }
303
304
180
    void clear_leg_ptr() { leg_ptr = nullptr; }
305
306
90
    void set_leg_ptr(char* ptr) {
307
90
        clear_leg_ptr();
308
90
        leg_ptr = ptr;
309
90
    }
310
311
122
    char* get_leg_ptr() { return leg_ptr; }
312
313
90
    void clear_leg_len() { leg_len = 0; }
314
315
122
    void add_leg_len() { leg_len++; }
316
317
180
    unsigned int get_leg_len() const { return leg_len; }
318
319
0
    void remove_escapes() {
320
0
        int new_len = 0;
321
0
        for (int i = 0; i < leg_len; i++) {
322
0
            if (leg_ptr[i] != '\\') {
323
0
                leg_ptr[new_len++] = leg_ptr[i];
324
0
            }
325
0
        }
326
0
        leg_ptr[new_len] = '\0';
327
0
        leg_len = new_len;
328
0
    }
329
330
0
    void set_has_escapes(bool has) { has_escapes = has; }
331
332
32
    bool get_has_escapes() const { return has_escapes; }
333
334
private:
335
    /// The current position in the stream.
336
    const char* m_position = nullptr;
337
338
    /// The end of the stream.
339
    const char* const m_end;
340
341
    ///path leg ptr
342
    char* leg_ptr = nullptr;
343
344
    ///path leg len
345
    unsigned int leg_len;
346
347
    ///Whether to contain escape characters
348
    bool has_escapes = false;
349
};
350
351
struct leg_info {
352
    ///path leg ptr
353
    char* leg_ptr = nullptr;
354
355
    ///path leg len
356
    unsigned int leg_len;
357
358
    ///array_index
359
    int array_index;
360
361
    ///type: 0 is member 1 is array
362
    unsigned int type;
363
364
0
    bool to_string(std::string* str) const {
365
0
        if (type == MEMBER_CODE) {
366
0
            str->push_back(BEGIN_MEMBER);
367
0
            bool contains_space = false;
368
0
            std::string tmp;
369
0
            for (auto* it = leg_ptr; it != (leg_ptr + leg_len); ++it) {
370
0
                if (std::isspace(*it)) {
371
0
                    contains_space = true;
372
0
                } else if (*it == '"' || *it == ESCAPE || *it == '\r' || *it == '\n' ||
373
0
                           *it == '\b' || *it == '\t') {
374
0
                    tmp.push_back(ESCAPE);
375
0
                }
376
0
                tmp.push_back(*it);
377
0
            }
378
0
            if (contains_space) {
379
0
                str->push_back(DOUBLE_QUOTE);
380
0
            }
381
0
            str->append(tmp);
382
0
            if (contains_space) {
383
0
                str->push_back(DOUBLE_QUOTE);
384
0
            }
385
0
            return true;
386
0
        } else if (type == ARRAY_CODE) {
387
0
            str->push_back(BEGIN_ARRAY);
388
0
            std::string int_str = std::to_string(array_index);
389
0
            str->append(int_str);
390
0
            str->push_back(END_ARRAY);
391
0
            return true;
392
0
        } else {
393
0
            return false;
394
0
        }
395
0
    }
396
};
397
398
class JsonbPath {
399
public:
400
    // parse json path
401
    static bool parsePath(Stream* stream, JsonbPath* path);
402
403
    static bool parse_array(Stream* stream, JsonbPath* path);
404
    static bool parse_member(Stream* stream, JsonbPath* path);
405
406
    //return true if json path valid else return false
407
    bool seek(const char* string, size_t length);
408
409
90
    void add_leg_to_leg_vector(std::unique_ptr<leg_info> leg) {
410
90
        leg_vector.emplace_back(leg.release());
411
90
    }
412
413
0
    void pop_leg_from_leg_vector() { leg_vector.pop_back(); }
414
415
0
    bool to_string(std::string* res) const {
416
0
        res->push_back(SCOPE);
417
0
        for (const auto& leg : leg_vector) {
418
0
            auto valid = leg->to_string(res);
419
0
            if (!valid) {
420
0
                return false;
421
0
            }
422
0
        }
423
0
        return true;
424
0
    }
425
426
185
    size_t get_leg_vector_size() const { return leg_vector.size(); }
427
428
295
    leg_info* get_leg_from_leg_vector(size_t i) const { return leg_vector[i].get(); }
429
430
0
    bool is_wildcard() const { return _is_wildcard; }
431
95
    bool is_supper_wildcard() const { return _is_supper_wildcard; }
432
433
6
    void clean() { leg_vector.clear(); }
434
435
private:
436
    std::vector<std::unique_ptr<leg_info>> leg_vector;
437
    bool _is_wildcard = false;        // whether the path is a wildcard path
438
    bool _is_supper_wildcard = false; // supper wildcard likes '$**.a' or '$**[1]'
439
};
440
441
/*
442
 * JsonbFwdIteratorT implements JSONB's iterator template.
443
 *
444
 * Note: it is an FORWARD iterator only due to the design of JSONB format.
445
 */
446
template <class Iter_Type, class Cont_Type>
447
class JsonbFwdIteratorT {
448
public:
449
    using iterator = Iter_Type;
450
    using pointer = typename std::iterator_traits<Iter_Type>::pointer;
451
    using reference = typename std::iterator_traits<Iter_Type>::reference;
452
453
    explicit JsonbFwdIteratorT() : current_(nullptr) {}
454
20.3k
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
_ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEC2ERKS3_
Line
Count
Source
454
20.1k
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEC2ERKS3_
Line
Count
Source
454
139
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
_ZN5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEC2ERKS2_
Line
Count
Source
454
56
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
455
456
    // allow non-const to const iterator conversion (same container type)
457
    template <class Iter_Ty>
458
2
    JsonbFwdIteratorT(const JsonbFwdIteratorT<Iter_Ty, Cont_Type>& rhs) : current_(rhs.base()) {}
459
460
20.5k
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
_ZNK5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEeqERKS4_
Line
Count
Source
460
27
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEeqERKS5_
Line
Count
Source
460
19.1k
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEeqERKS5_
Line
Count
Source
460
1.45k
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
461
462
19.9k
    bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEneERKS5_
Line
Count
Source
462
18.5k
    bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEneERKS5_
Line
Count
Source
462
1.38k
    bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }
463
464
1.15k
    bool operator<(const JsonbFwdIteratorT& rhs) const { return (current_ < rhs.current_); }
465
466
    bool operator>(const JsonbFwdIteratorT& rhs) const { return !operator<(rhs); }
467
468
18.1k
    JsonbFwdIteratorT& operator++() {
469
18.1k
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
470
18.1k
        return *this;
471
18.1k
    }
_ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEppEv
Line
Count
Source
468
17.4k
    JsonbFwdIteratorT& operator++() {
469
17.4k
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
470
17.4k
        return *this;
471
17.4k
    }
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEppEv
Line
Count
Source
468
657
    JsonbFwdIteratorT& operator++() {
469
657
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
470
657
        return *this;
471
657
    }
472
473
    JsonbFwdIteratorT operator++(int) {
474
        auto tmp = *this;
475
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
476
        return tmp;
477
    }
478
479
657
    explicit operator pointer() { return current_; }
480
481
0
    reference operator*() const { return *current_; }
Unexecuted instantiation: _ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEdeEv
Unexecuted instantiation: _ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEdeEv
482
483
28.6k
    pointer operator->() const { return current_; }
_ZNK5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEptEv
Line
Count
Source
483
25
    pointer operator->() const { return current_; }
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEptEv
Line
Count
Source
483
28.6k
    pointer operator->() const { return current_; }
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEptEv
Line
Count
Source
483
3
    pointer operator->() const { return current_; }
484
485
2
    iterator base() const { return current_; }
486
487
private:
488
    iterator current_;
489
};
490
using JsonbTypeUnder = std::underlying_type_t<JsonbType>;
491
492
#if defined(__clang__)
493
#pragma clang diagnostic push
494
#pragma clang diagnostic ignored "-Wzero-length-array"
495
#endif
496
#pragma pack(push, 1)
497
498
/*
499
 * JsonbDocument is the main object that accesses and queries JSONB packed
500
 * bytes. NOTE: JsonbDocument only allows object container as the top level
501
 * JSONB value. However, you can use the static method "createValue" to get any
502
 * JsonbValue object from the packed bytes.
503
 *
504
 * JsonbDocument object also dereferences to an object container value
505
 * (ObjectVal) once JSONB is loaded.
506
 *
507
 * ** Load **
508
 * JsonbDocument is usable after loading packed bytes (memory location) into
509
 * the object. We only need the header and first few bytes of the payload after
510
 * header to verify the JSONB.
511
 *
512
 * Note: creating an JsonbDocument (through createDocument) does not allocate
513
 * any memory. The document object is an efficient wrapper on the packed bytes
514
 * which is accessed directly.
515
 *
516
 * ** Query **
517
 * Query is through dereferencing into ObjectVal.
518
 */
519
class JsonbDocument {
520
public:
521
    // create an JsonbDocument object from JSONB packed bytes
522
    [[nodiscard]] static Status checkAndCreateDocument(const char* pb, size_t size,
523
                                                       JsonbDocument** doc);
524
525
    // create an JsonbValue from JSONB packed bytes
526
    static JsonbValue* createValue(const char* pb, size_t size);
527
528
0
    uint8_t version() const { return header_.ver_; }
529
530
26.4k
    JsonbValue* getValue() { return ((JsonbValue*)payload_); }
531
532
    void setValue(const JsonbValue* value);
533
534
    unsigned int numPackedBytes() const;
535
536
    // ObjectVal* operator->();
537
538
    const ObjectVal* operator->() const;
539
540
private:
541
    /*
542
   * JsonbHeader class defines JSONB header (internal to JsonbDocument).
543
   *
544
   * Currently it only contains version information (1-byte). We may expand the
545
   * header to include checksum of the JSONB binary for more security.
546
   */
547
    struct JsonbHeader {
548
        uint8_t ver_;
549
    } header_;
550
551
    char payload_[0];
552
};
553
554
/*
555
 * JsonbKeyValue class defines JSONB key type, as described below.
556
 *
557
 * key ::=
558
 *   0x00 int8    //1-byte dictionary id
559
 * | int8 (byte*) //int8 (>0) is the size of the key string
560
 *
561
 * value ::= primitive_value | container
562
 *
563
 * JsonbKeyValue can be either an id mapping to the key string in an external
564
 * dictionary, or it is the original key string. Whether to read an id or a
565
 * string is decided by the first byte (size).
566
 *
567
 * Note: a key object must be followed by a value object. Therefore, a key
568
 * object implicitly refers to a key-value pair, and you can get the value
569
 * object right after the key object. The function numPackedBytes hence
570
 * indicates the total size of the key-value pair, so that we will be able go
571
 * to next pair from the key.
572
 *
573
 * ** Dictionary size **
574
 * By default, the dictionary size is 255 (1-byte). Users can define
575
 * "USE_LARGE_DICT" to increase the dictionary size to 655535 (2-byte).
576
 */
577
class JsonbKeyValue {
578
public:
579
    // now we use sMaxKeyId to represent an empty key
580
    static const int sMaxKeyId = 65535;
581
    using keyid_type = uint16_t;
582
583
    static const uint8_t sMaxKeyLen = 64;
584
585
    // size of the key. 0 indicates it is stored as id
586
1.33k
    uint8_t klen() const { return size; }
587
588
    // get the key string. Note the string may not be null terminated.
589
682
    const char* getKeyStr() const { return key.str_; }
590
591
9.22k
    keyid_type getKeyId() const { return key.id_; }
592
593
35.0k
    unsigned int keyPackedBytes() const {
594
35.0k
        return size ? (sizeof(size) + size) : (sizeof(size) + sizeof(keyid_type));
595
35.0k
    }
596
597
17.5k
    JsonbValue* value() const { return (JsonbValue*)(((char*)this) + keyPackedBytes()); }
598
599
    // size of the total packed bytes (key+value)
600
    unsigned int numPackedBytes() const;
601
602
    uint8_t size;
603
604
    union key_ {
605
        keyid_type id_;
606
        char str_[1];
607
    } key;
608
};
609
610
struct JsonbFindResult {
611
    const JsonbValue* value = nullptr;   // found value
612
    std::unique_ptr<JsonbWriter> writer; // writer to write the value
613
    bool is_wildcard = false;            // whether the path is a wildcard path
614
};
615
616
/*
617
 * JsonbValue is the base class of all JSONB types. It contains only one member
618
 * variable - type info, which can be retrieved by member functions is[Type]()
619
 * or type().
620
 */
621
struct JsonbValue {
622
    static const uint32_t sMaxValueLen = 1 << 24; // 16M
623
624
4.22k
    bool isNull() const { return (type == JsonbType::T_Null); }
625
21
    bool isTrue() const { return (type == JsonbType::T_True); }
626
1
    bool isFalse() const { return (type == JsonbType::T_False); }
627
4
    bool isInt() const { return isInt8() || isInt16() || isInt32() || isInt64() || isInt128(); }
628
4
    bool isInt8() const { return (type == JsonbType::T_Int8); }
629
2
    bool isInt16() const { return (type == JsonbType::T_Int16); }
630
0
    bool isInt32() const { return (type == JsonbType::T_Int32); }
631
3
    bool isInt64() const { return (type == JsonbType::T_Int64); }
632
1
    bool isDouble() const { return (type == JsonbType::T_Double); }
633
1
    bool isFloat() const { return (type == JsonbType::T_Float); }
634
33
    bool isString() const { return (type == JsonbType::T_String); }
635
1.07k
    bool isBinary() const { return (type == JsonbType::T_Binary); }
636
6
    bool isObject() const { return (type == JsonbType::T_Object); }
637
7
    bool isArray() const { return (type == JsonbType::T_Array); }
638
3
    bool isInt128() const { return (type == JsonbType::T_Int128); }
639
4
    bool isDecimal() const {
640
4
        return (type == JsonbType::T_Decimal32 || type == JsonbType::T_Decimal64 ||
641
4
                type == JsonbType::T_Decimal128 || type == JsonbType::T_Decimal256);
642
4
    }
643
1
    bool isDecimal32() const { return (type == JsonbType::T_Decimal32); }
644
1
    bool isDecimal64() const { return (type == JsonbType::T_Decimal64); }
645
1
    bool isDecimal128() const { return (type == JsonbType::T_Decimal128); }
646
1
    bool isDecimal256() const { return (type == JsonbType::T_Decimal256); }
647
648
11
    PrimitiveType get_primitive_type() const { return get_primitive_type_from_json_type(type); }
649
650
0
    const char* typeName() const {
651
0
        switch (type) {
652
0
        case JsonbType::T_Null:
653
0
            return "null";
654
0
        case JsonbType::T_True:
655
0
        case JsonbType::T_False:
656
0
            return "bool";
657
0
        case JsonbType::T_Int8:
658
0
        case JsonbType::T_Int16:
659
0
        case JsonbType::T_Int32:
660
0
            return "int";
661
0
        case JsonbType::T_Int64:
662
0
            return "bigint";
663
0
        case JsonbType::T_Int128:
664
0
            return "largeint";
665
0
        case JsonbType::T_Double:
666
0
            return "double";
667
0
        case JsonbType::T_Float:
668
0
            return "float";
669
0
        case JsonbType::T_String:
670
0
            return "string";
671
0
        case JsonbType::T_Binary:
672
0
            return "binary";
673
0
        case JsonbType::T_Object:
674
0
            return "object";
675
0
        case JsonbType::T_Array:
676
0
            return "array";
677
0
        case JsonbType::T_Decimal32:
678
0
            return "Decimal32";
679
0
        case JsonbType::T_Decimal64:
680
0
            return "Decimal64";
681
0
        case JsonbType::T_Decimal128:
682
0
            return "Decimal128";
683
0
        case JsonbType::T_Decimal256:
684
0
            return "Decimal256";
685
0
        default:
686
0
            return "unknown";
687
0
        }
688
0
    }
689
690
    // size of the total packed bytes
691
    unsigned int numPackedBytes() const;
692
693
    // size of the value in bytes
694
    unsigned int size() const;
695
696
    //Get the number of jsonbvalue elements
697
    int numElements() const;
698
699
    //Whether to include the jsonbvalue rhs
700
    bool contains(JsonbValue* rhs) const;
701
702
    // find the JSONB value by JsonbPath
703
    JsonbFindResult findValue(JsonbPath& path) const;
704
    friend class JsonbDocument;
705
706
    JsonbType type; // type info
707
708
    char payload[0]; // payload, which is the packed bytes of the value
709
710
    /**
711
    * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`.
712
    *
713
    * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept.
714
    *           This ensures that `T` is trivially copyable, standard-layout, and safe to
715
    *           reinterpret from raw bytes without invoking undefined behavior.
716
    *
717
    * @return A pointer to a `const T` object, interpreted from the internal buffer.
718
    *
719
    * @note The caller must ensure that the current JsonbValue actually contains data
720
    *       compatible with type `T`, otherwise the result is undefined.
721
    */
722
    template <JsonbPodType T>
723
51.9k
    const T* unpack() const {
724
51.9k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
51.9k
        return reinterpret_cast<const T*>(payload);
726
51.9k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_9ObjectValEEEPKT_v
Line
Count
Source
723
19.5k
    const T* unpack() const {
724
19.5k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
19.5k
        return reinterpret_cast<const T*>(payload);
726
19.5k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIaEEEEPKT_v
Line
Count
Source
723
722
    const T* unpack() const {
724
722
        static_assert(is_pod_v<T>, "T must be a POD type");
725
722
        return reinterpret_cast<const T*>(payload);
726
722
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIsEEEEPKT_v
Line
Count
Source
723
105
    const T* unpack() const {
724
105
        static_assert(is_pod_v<T>, "T must be a POD type");
725
105
        return reinterpret_cast<const T*>(payload);
726
105
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIiEEEEPKT_v
Line
Count
Source
723
3.47k
    const T* unpack() const {
724
3.47k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
3.47k
        return reinterpret_cast<const T*>(payload);
726
3.47k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIlEEEEPKT_v
Line
Count
Source
723
1.87k
    const T* unpack() const {
724
1.87k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
1.87k
        return reinterpret_cast<const T*>(payload);
726
1.87k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTInEEEEPKT_v
Line
Count
Source
723
4.16k
    const T* unpack() const {
724
4.16k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
4.16k
        return reinterpret_cast<const T*>(payload);
726
4.16k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_14JsonbBinaryValEEEPKT_v
Line
Count
Source
723
19.5k
    const T* unpack() const {
724
19.5k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
19.5k
        return reinterpret_cast<const T*>(payload);
726
19.5k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_12ContainerValEEEPKT_v
Line
Count
Source
723
1.85k
    const T* unpack() const {
724
1.85k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
1.85k
        return reinterpret_cast<const T*>(payload);
726
1.85k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_8ArrayValEEEPKT_v
Line
Count
Source
723
103
    const T* unpack() const {
724
103
        static_assert(is_pod_v<T>, "T must be a POD type");
725
103
        return reinterpret_cast<const T*>(payload);
726
103
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIdEEEEPKT_v
Line
Count
Source
723
169
    const T* unpack() const {
724
169
        static_assert(is_pod_v<T>, "T must be a POD type");
725
169
        return reinterpret_cast<const T*>(payload);
726
169
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIfEEEEPKT_v
Line
Count
Source
723
25
    const T* unpack() const {
724
25
        static_assert(is_pod_v<T>, "T must be a POD type");
725
25
        return reinterpret_cast<const T*>(payload);
726
25
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_14JsonbStringValEEEPKT_v
Line
Count
Source
723
269
    const T* unpack() const {
724
269
        static_assert(is_pod_v<T>, "T must be a POD type");
725
269
        return reinterpret_cast<const T*>(payload);
726
269
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_10vectorized7DecimalIiEEEEEEPKT_v
Line
Count
Source
723
13
    const T* unpack() const {
724
13
        static_assert(is_pod_v<T>, "T must be a POD type");
725
13
        return reinterpret_cast<const T*>(payload);
726
13
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_10vectorized7DecimalIlEEEEEEPKT_v
Line
Count
Source
723
13
    const T* unpack() const {
724
13
        static_assert(is_pod_v<T>, "T must be a POD type");
725
13
        return reinterpret_cast<const T*>(payload);
726
13
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_10vectorized12Decimal128V3EEEEEPKT_v
Line
Count
Source
723
17
    const T* unpack() const {
724
17
        static_assert(is_pod_v<T>, "T must be a POD type");
725
17
        return reinterpret_cast<const T*>(payload);
726
17
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_10vectorized7DecimalIN4wide7integerILm256EiEEEEEEEEPKT_v
Line
Count
Source
723
13
    const T* unpack() const {
724
13
        static_assert(is_pod_v<T>, "T must be a POD type");
725
13
        return reinterpret_cast<const T*>(payload);
726
13
    }
727
728
    // /**
729
    // * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`.
730
    // *
731
    // * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept.
732
    // *           This ensures that `T` is trivially copyable, standard-layout, and safe to
733
    // *           reinterpret from raw bytes without invoking undefined behavior.
734
    // *
735
    // * @return A pointer to a `T` object, interpreted from the internal buffer.
736
    // *
737
    // * @note The caller must ensure that the current JsonbValue actually contains data
738
    // *       compatible with type `T`, otherwise the result is undefined.
739
    // */
740
    // template <JsonbPodType T>
741
    // T* unpack() {
742
    //     static_assert(is_pod_v<T>, "T must be a POD type");
743
    //     return reinterpret_cast<T*>(payload);
744
    // }
745
746
    int128_t int_val() const;
747
};
748
749
// inline ObjectVal* JsonbDocument::operator->() {
750
//     return (((JsonbValue*)payload_)->unpack<ObjectVal>());
751
// }
752
753
19.0k
inline const ObjectVal* JsonbDocument::operator->() const {
754
19.0k
    return (((JsonbValue*)payload_)->unpack<ObjectVal>());
755
19.0k
}
756
757
/*
758
 * NumerValT is the template class (derived from JsonbValue) of all number
759
 * types (integers and double).
760
 */
761
template <typename T>
762
    requires std::is_integral_v<T> || std::is_floating_point_v<T>
763
struct NumberValT {
764
public:
765
10.5k
    T val() const { return num; }
_ZNK5doris10NumberValTIaE3valEv
Line
Count
Source
765
722
    T val() const { return num; }
_ZNK5doris10NumberValTIsE3valEv
Line
Count
Source
765
105
    T val() const { return num; }
_ZNK5doris10NumberValTIiE3valEv
Line
Count
Source
765
3.47k
    T val() const { return num; }
_ZNK5doris10NumberValTIlE3valEv
Line
Count
Source
765
1.87k
    T val() const { return num; }
_ZNK5doris10NumberValTInE3valEv
Line
Count
Source
765
4.16k
    T val() const { return num; }
_ZNK5doris10NumberValTIdE3valEv
Line
Count
Source
765
169
    T val() const { return num; }
_ZNK5doris10NumberValTIfE3valEv
Line
Count
Source
765
25
    T val() const { return num; }
766
767
    static unsigned int numPackedBytes() { return sizeof(JsonbValue) + sizeof(T); }
768
769
    T num;
770
};
771
772
9
inline int128_t JsonbValue::int_val() const {
773
9
    switch (type) {
774
3
    case JsonbType::T_Int8:
775
3
        return unpack<JsonbInt8Val>()->val();
776
1
    case JsonbType::T_Int16:
777
1
        return unpack<JsonbInt16Val>()->val();
778
0
    case JsonbType::T_Int32:
779
0
        return unpack<JsonbInt32Val>()->val();
780
3
    case JsonbType::T_Int64:
781
3
        return unpack<JsonbInt64Val>()->val();
782
2
    case JsonbType::T_Int128:
783
2
        return unpack<JsonbInt128Val>()->val();
784
0
    default:
785
0
        throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}",
786
0
                        static_cast<int32_t>(type));
787
9
    }
788
9
}
789
790
template <JsonbDecimalType T>
791
struct JsonbDecimalVal {
792
public:
793
    using NativeType = typename T::NativeType;
794
795
    // get the decimal value
796
26
    NativeType val() const {
797
        // to avoid memory alignment issues, we use memcpy to copy the value
798
26
        NativeType tmp;
799
26
        memcpy(&tmp, &value, sizeof(NativeType));
800
26
        return tmp;
801
26
    }
_ZNK5doris15JsonbDecimalValINS_10vectorized7DecimalIiEEE3valEv
Line
Count
Source
796
6
    NativeType val() const {
797
        // to avoid memory alignment issues, we use memcpy to copy the value
798
6
        NativeType tmp;
799
6
        memcpy(&tmp, &value, sizeof(NativeType));
800
6
        return tmp;
801
6
    }
_ZNK5doris15JsonbDecimalValINS_10vectorized7DecimalIlEEE3valEv
Line
Count
Source
796
6
    NativeType val() const {
797
        // to avoid memory alignment issues, we use memcpy to copy the value
798
6
        NativeType tmp;
799
6
        memcpy(&tmp, &value, sizeof(NativeType));
800
6
        return tmp;
801
6
    }
_ZNK5doris15JsonbDecimalValINS_10vectorized12Decimal128V3EE3valEv
Line
Count
Source
796
8
    NativeType val() const {
797
        // to avoid memory alignment issues, we use memcpy to copy the value
798
8
        NativeType tmp;
799
8
        memcpy(&tmp, &value, sizeof(NativeType));
800
8
        return tmp;
801
8
    }
_ZNK5doris15JsonbDecimalValINS_10vectorized7DecimalIN4wide7integerILm256EiEEEEE3valEv
Line
Count
Source
796
6
    NativeType val() const {
797
        // to avoid memory alignment issues, we use memcpy to copy the value
798
6
        NativeType tmp;
799
6
        memcpy(&tmp, &value, sizeof(NativeType));
800
6
        return tmp;
801
6
    }
802
803
29
    static constexpr int numPackedBytes() {
804
29
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
805
29
    }
_ZN5doris15JsonbDecimalValINS_10vectorized7DecimalIiEEE14numPackedBytesEv
Line
Count
Source
803
7
    static constexpr int numPackedBytes() {
804
7
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
805
7
    }
_ZN5doris15JsonbDecimalValINS_10vectorized7DecimalIlEEE14numPackedBytesEv
Line
Count
Source
803
7
    static constexpr int numPackedBytes() {
804
7
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
805
7
    }
_ZN5doris15JsonbDecimalValINS_10vectorized12Decimal128V3EE14numPackedBytesEv
Line
Count
Source
803
9
    static constexpr int numPackedBytes() {
804
9
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
805
9
    }
_ZN5doris15JsonbDecimalValINS_10vectorized7DecimalIN4wide7integerILm256EiEEEEE14numPackedBytesEv
Line
Count
Source
803
6
    static constexpr int numPackedBytes() {
804
6
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
805
6
    }
806
807
    uint32_t precision;
808
    uint32_t scale;
809
    NativeType value;
810
};
811
812
/*
813
 * BlobVal is the base class (derived from JsonbValue) for string and binary
814
 * types. The size indicates the total bytes of the payload.
815
 */
816
struct JsonbBinaryVal {
817
public:
818
    // size of the blob payload only
819
2.11k
    unsigned int getBlobLen() const { return size; }
820
821
    // return the blob as byte array
822
4.51k
    const char* getBlob() const { return payload; }
823
824
    // size of the total packed bytes
825
15.1k
    unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; }
826
    friend class JsonbDocument;
827
828
    uint32_t size;
829
    char payload[0];
830
};
831
832
/*
833
 * String type
834
 * Note: JSONB string may not be a c-string (NULL-terminated)
835
 */
836
struct JsonbStringVal : public JsonbBinaryVal {
837
public:
838
    /*
839
    This function return the actual size of a string. Since for
840
    a string, it can be null-terminated with null paddings or it
841
    can take all the space in the payload without null in the end.
842
    So we need to check it to get the true actual length of a string.
843
  */
844
138
    size_t length() const {
845
        // It's an empty string
846
138
        if (0 == size) {
847
0
            return size;
848
0
        }
849
        // The string stored takes all the spaces in payload
850
138
        if (payload[size - 1] != 0) {
851
138
            return size;
852
138
        }
853
        // It's shorter than the size of payload
854
0
        return strnlen(payload, size);
855
138
    }
856
};
857
858
/*
859
 * ContainerVal is the base class (derived from JsonbValue) for object and
860
 * array types. The size indicates the total bytes of the payload.
861
 */
862
struct ContainerVal {
863
    // size of the container payload only
864
0
    unsigned int getContainerSize() const { return size; }
865
866
    // return the container payload as byte array
867
0
    const char* getPayload() const { return payload; }
868
869
    // size of the total packed bytes
870
1.85k
    unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; }
871
    friend class JsonbDocument;
872
873
    uint32_t size;
874
    char payload[0];
875
};
876
877
/*
878
 * Object type
879
 */
880
struct ObjectVal : public ContainerVal {
881
    using value_type = JsonbKeyValue;
882
    using pointer = value_type*;
883
    using const_pointer = const value_type*;
884
    using iterator = JsonbFwdIteratorT<pointer, ObjectVal>;
885
    using const_iterator = JsonbFwdIteratorT<const_pointer, ObjectVal>;
886
887
1
    const_iterator search(const char* key) const {
888
1
        return const_cast<ObjectVal*>(this)->search(key);
889
1
    }
890
891
1
    const_iterator search(const char* key, unsigned int klen) const {
892
1
        return const_cast<ObjectVal*>(this)->search(key, klen);
893
1
    }
894
895
1
    iterator search(const char* key) {
896
1
        if (!key) {
897
0
            return end();
898
0
        }
899
1
        return search(key, (unsigned int)strlen(key));
900
1
    }
901
902
29
    iterator search(const char* key, unsigned int klen) {
903
29
        if (!key || !klen) {
904
0
            return end();
905
0
        }
906
29
        return internalSearch(key, klen);
907
29
    }
908
909
    // Get number of elements in object
910
8
    int numElem() const {
911
8
        const char* pch = payload;
912
8
        const char* fence = payload + size;
913
914
8
        unsigned int num = 0;
915
40
        while (pch < fence) {
916
32
            auto* pkey = (JsonbKeyValue*)(pch);
917
32
            ++num;
918
32
            pch += pkey->numPackedBytes();
919
32
        }
920
921
8
        assert(pch == fence);
922
923
8
        return num;
924
8
    }
925
926
1
    JsonbValue* find(const char* key) const { return const_cast<ObjectVal*>(this)->find(key); }
927
928
26
    JsonbValue* find(const char* key, unsigned int klen) const {
929
26
        return const_cast<ObjectVal*>(this)->find(key, klen);
930
26
    }
931
932
    // find the JSONB value by a key string (null terminated)
933
1
    JsonbValue* find(const char* key) {
934
1
        if (!key) {
935
0
            return nullptr;
936
0
        }
937
1
        return find(key, (unsigned int)strlen(key));
938
1
    }
939
940
    // find the JSONB value by a key string (with length)
941
27
    JsonbValue* find(const char* key, unsigned int klen) {
942
27
        iterator kv = search(key, klen);
943
27
        if (end() == kv) {
944
2
            return nullptr;
945
2
        }
946
25
        return kv->value();
947
27
    }
948
949
0
    iterator begin() { return iterator((pointer)payload); }
950
951
1.64k
    const_iterator begin() const { return const_iterator((pointer)payload); }
952
953
29
    iterator end() { return iterator((pointer)(payload + size)); }
954
955
18.4k
    const_iterator end() const { return const_iterator((pointer)(payload + size)); }
956
957
    std::vector<std::pair<StringRef, const JsonbValue*>> get_ordered_key_value_pairs() const;
958
959
private:
960
29
    iterator internalSearch(const char* key, unsigned int klen) {
961
29
        const char* pch = payload;
962
29
        const char* fence = payload + size;
963
964
39
        while (pch < fence) {
965
37
            auto* pkey = (JsonbKeyValue*)(pch);
966
37
            if (klen == pkey->klen() && strncmp(key, pkey->getKeyStr(), klen) == 0) {
967
27
                return iterator(pkey);
968
27
            }
969
10
            pch += pkey->numPackedBytes();
970
10
        }
971
972
29
        assert(pch == fence);
973
974
2
        return end();
975
2
    }
976
};
977
978
/*
979
 * Array type
980
 */
981
struct ArrayVal : public ContainerVal {
982
    using value_type = JsonbValue;
983
    using pointer = value_type*;
984
    using const_pointer = const value_type*;
985
    using iterator = JsonbFwdIteratorT<pointer, ArrayVal>;
986
    using const_iterator = JsonbFwdIteratorT<const_pointer, ArrayVal>;
987
988
    // get the JSONB value at index
989
31
    JsonbValue* get(int idx) const {
990
31
        if (idx < 0) {
991
0
            return nullptr;
992
0
        }
993
994
31
        const char* pch = payload;
995
31
        const char* fence = payload + size;
996
997
72
        while (pch < fence && idx-- > 0) {
998
41
            pch += ((JsonbValue*)pch)->numPackedBytes();
999
41
        }
1000
31
        if (idx > 0 || pch == fence) {
1001
7
            return nullptr;
1002
7
        }
1003
1004
24
        return (JsonbValue*)pch;
1005
31
    }
1006
1007
    // Get number of elements in array
1008
8
    int numElem() const {
1009
8
        const char* pch = payload;
1010
8
        const char* fence = payload + size;
1011
1012
8
        unsigned int num = 0;
1013
29
        while (pch < fence) {
1014
21
            ++num;
1015
21
            pch += ((JsonbValue*)pch)->numPackedBytes();
1016
21
        }
1017
1018
8
        assert(pch == fence);
1019
1020
8
        return num;
1021
8
    }
1022
1023
0
    iterator begin() { return iterator((pointer)payload); }
1024
1025
70
    const_iterator begin() const { return const_iterator((pointer)payload); }
1026
1027
0
    iterator end() { return iterator((pointer)(payload + size)); }
1028
1029
69
    const_iterator end() const { return const_iterator((pointer)(payload + size)); }
1030
};
1031
1032
inline Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t size,
1033
27.3k
                                                    JsonbDocument** doc) {
1034
27.3k
    *doc = nullptr;
1035
27.3k
    if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
1036
0
        return Status::InvalidArgument("Invalid JSONB document: too small size({}) or null pointer",
1037
0
                                       size);
1038
0
    }
1039
1040
27.3k
    auto* doc_ptr = (JsonbDocument*)pb;
1041
27.3k
    if (doc_ptr->header_.ver_ != JSONB_VER) {
1042
2
        return Status::InvalidArgument("Invalid JSONB document: invalid version({})",
1043
2
                                       doc_ptr->header_.ver_);
1044
2
    }
1045
1046
27.3k
    auto* val = (JsonbValue*)doc_ptr->payload_;
1047
27.3k
    if (val->type < JsonbType::T_Null || val->type >= JsonbType::NUM_TYPES ||
1048
27.3k
        size != sizeof(JsonbHeader) + val->numPackedBytes()) {
1049
0
        return Status::InvalidArgument("Invalid JSONB document: invalid type({}) or size({})",
1050
0
                                       static_cast<JsonbTypeUnder>(val->type), size);
1051
0
    }
1052
1053
27.3k
    *doc = doc_ptr;
1054
27.3k
    return Status::OK();
1055
27.3k
}
1056
0
inline void JsonbDocument::setValue(const JsonbValue* value) {
1057
0
    memcpy(payload_, value, value->numPackedBytes());
1058
0
}
1059
1060
12
inline JsonbValue* JsonbDocument::createValue(const char* pb, size_t size) {
1061
12
    if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
1062
0
        return nullptr;
1063
0
    }
1064
1065
12
    auto* doc = (JsonbDocument*)pb;
1066
12
    if (doc->header_.ver_ != JSONB_VER) {
1067
0
        return nullptr;
1068
0
    }
1069
1070
12
    auto* val = (JsonbValue*)doc->payload_;
1071
12
    if (size != sizeof(JsonbHeader) + val->numPackedBytes()) {
1072
0
        return nullptr;
1073
0
    }
1074
1075
12
    return val;
1076
12
}
1077
1078
0
inline unsigned int JsonbDocument::numPackedBytes() const {
1079
0
    return ((const JsonbValue*)payload_)->numPackedBytes() + sizeof(header_);
1080
0
}
1081
1082
17.5k
inline unsigned int JsonbKeyValue::numPackedBytes() const {
1083
17.5k
    unsigned int ks = keyPackedBytes();
1084
17.5k
    auto* val = (JsonbValue*)(((char*)this) + ks);
1085
17.5k
    return ks + val->numPackedBytes();
1086
17.5k
}
1087
1088
// Poor man's "virtual" function JsonbValue::numPackedBytes
1089
71.3k
inline unsigned int JsonbValue::numPackedBytes() const {
1090
71.3k
    switch (type) {
1091
2.75k
    case JsonbType::T_Null:
1092
2.83k
    case JsonbType::T_True:
1093
2.88k
    case JsonbType::T_False: {
1094
2.88k
        return sizeof(type);
1095
2.83k
    }
1096
1097
1.68k
    case JsonbType::T_Int8: {
1098
1.68k
        return sizeof(type) + sizeof(int8_t);
1099
2.83k
    }
1100
122
    case JsonbType::T_Int16: {
1101
122
        return sizeof(type) + sizeof(int16_t);
1102
2.83k
    }
1103
3.49k
    case JsonbType::T_Int32: {
1104
3.49k
        return sizeof(type) + sizeof(int32_t);
1105
2.83k
    }
1106
21.5k
    case JsonbType::T_Int64: {
1107
21.5k
        return sizeof(type) + sizeof(int64_t);
1108
2.83k
    }
1109
10.5k
    case JsonbType::T_Double: {
1110
10.5k
        return sizeof(type) + sizeof(double);
1111
2.83k
    }
1112
26
    case JsonbType::T_Float: {
1113
26
        return sizeof(type) + sizeof(float);
1114
2.83k
    }
1115
14.0k
    case JsonbType::T_Int128: {
1116
14.0k
        return sizeof(type) + sizeof(int128_t);
1117
2.83k
    }
1118
10.7k
    case JsonbType::T_String:
1119
15.1k
    case JsonbType::T_Binary: {
1120
15.1k
        return unpack<JsonbBinaryVal>()->numPackedBytes();
1121
10.7k
    }
1122
1123
1.69k
    case JsonbType::T_Object:
1124
1.85k
    case JsonbType::T_Array: {
1125
1.85k
        return unpack<ContainerVal>()->numPackedBytes();
1126
1.69k
    }
1127
7
    case JsonbType::T_Decimal32: {
1128
7
        return JsonbDecimal32::numPackedBytes();
1129
1.69k
    }
1130
7
    case JsonbType::T_Decimal64: {
1131
7
        return JsonbDecimal64::numPackedBytes();
1132
1.69k
    }
1133
9
    case JsonbType::T_Decimal128: {
1134
9
        return JsonbDecimal128::numPackedBytes();
1135
1.69k
    }
1136
6
    case JsonbType::T_Decimal256: {
1137
6
        return JsonbDecimal256::numPackedBytes();
1138
1.69k
    }
1139
0
    case JsonbType::NUM_TYPES:
1140
0
        break;
1141
71.3k
    }
1142
1143
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}",
1144
0
                    static_cast<int32_t>(type));
1145
71.3k
}
1146
1147
6
inline int JsonbValue::numElements() const {
1148
6
    switch (type) {
1149
0
    case JsonbType::T_Int8:
1150
0
    case JsonbType::T_Int16:
1151
0
    case JsonbType::T_Int32:
1152
0
    case JsonbType::T_Int64:
1153
0
    case JsonbType::T_Double:
1154
0
    case JsonbType::T_Float:
1155
0
    case JsonbType::T_Int128:
1156
1
    case JsonbType::T_String:
1157
1
    case JsonbType::T_Binary:
1158
2
    case JsonbType::T_Null:
1159
2
    case JsonbType::T_True:
1160
2
    case JsonbType::T_False:
1161
2
    case JsonbType::T_Decimal32:
1162
2
    case JsonbType::T_Decimal64:
1163
2
    case JsonbType::T_Decimal128:
1164
2
    case JsonbType::T_Decimal256: {
1165
2
        return 1;
1166
2
    }
1167
0
    case JsonbType::T_Object: {
1168
0
        return unpack<ObjectVal>()->numElem();
1169
2
    }
1170
4
    case JsonbType::T_Array: {
1171
4
        return unpack<ArrayVal>()->numElem();
1172
2
    }
1173
0
    case JsonbType::NUM_TYPES:
1174
0
        break;
1175
6
    }
1176
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}",
1177
0
                    static_cast<int32_t>(type));
1178
6
}
1179
1180
3
inline bool JsonbValue::contains(JsonbValue* rhs) const {
1181
3
    switch (type) {
1182
1
    case JsonbType::T_Int8:
1183
1
    case JsonbType::T_Int16:
1184
1
    case JsonbType::T_Int32:
1185
1
    case JsonbType::T_Int64:
1186
1
    case JsonbType::T_Int128: {
1187
1
        return rhs->isInt() && this->int_val() == rhs->int_val();
1188
1
    }
1189
0
    case JsonbType::T_Double:
1190
0
    case JsonbType::T_Float: {
1191
0
        if (!rhs->isDouble() && !rhs->isFloat()) {
1192
0
            return false;
1193
0
        }
1194
0
        double left = isDouble() ? unpack<JsonbDoubleVal>()->val() : unpack<JsonbFloatVal>()->val();
1195
0
        double right = rhs->isDouble() ? rhs->unpack<JsonbDoubleVal>()->val()
1196
0
                                       : rhs->unpack<JsonbFloatVal>()->val();
1197
0
        return left == right;
1198
0
    }
1199
1
    case JsonbType::T_String:
1200
1
    case JsonbType::T_Binary: {
1201
1
        if (rhs->isString() || rhs->isBinary()) {
1202
1
            const auto* str_value1 = unpack<JsonbStringVal>();
1203
1
            const auto* str_value2 = rhs->unpack<JsonbStringVal>();
1204
1
            return str_value1->length() == str_value2->length() &&
1205
1
                   std::memcmp(str_value1->getBlob(), str_value2->getBlob(),
1206
1
                               str_value1->length()) == 0;
1207
1
        }
1208
0
        return false;
1209
1
    }
1210
1
    case JsonbType::T_Array: {
1211
1
        int lhs_num = unpack<ArrayVal>()->numElem();
1212
1
        if (rhs->isArray()) {
1213
0
            int rhs_num = rhs->unpack<ArrayVal>()->numElem();
1214
0
            if (rhs_num > lhs_num) {
1215
0
                return false;
1216
0
            }
1217
0
            int contains_num = 0;
1218
0
            for (int i = 0; i < lhs_num; ++i) {
1219
0
                for (int j = 0; j < rhs_num; ++j) {
1220
0
                    if (unpack<ArrayVal>()->get(i)->contains(rhs->unpack<ArrayVal>()->get(j))) {
1221
0
                        contains_num++;
1222
0
                        break;
1223
0
                    }
1224
0
                }
1225
0
            }
1226
0
            return contains_num == rhs_num;
1227
0
        }
1228
1
        for (int i = 0; i < lhs_num; ++i) {
1229
1
            if (unpack<ArrayVal>()->get(i)->contains(rhs)) {
1230
1
                return true;
1231
1
            }
1232
1
        }
1233
0
        return false;
1234
1
    }
1235
0
    case JsonbType::T_Object: {
1236
0
        if (rhs->isObject()) {
1237
0
            const auto* obj_value1 = unpack<ObjectVal>();
1238
0
            const auto* obj_value2 = rhs->unpack<ObjectVal>();
1239
0
            for (auto it = obj_value2->begin(); it != obj_value2->end(); ++it) {
1240
0
                JsonbValue* value = obj_value1->find(it->getKeyStr(), it->klen());
1241
0
                if (value == nullptr || !value->contains(it->value())) {
1242
0
                    return false;
1243
0
                }
1244
0
            }
1245
0
            return true;
1246
0
        }
1247
0
        return false;
1248
0
    }
1249
0
    case JsonbType::T_Null: {
1250
0
        return rhs->isNull();
1251
0
    }
1252
0
    case JsonbType::T_True: {
1253
0
        return rhs->isTrue();
1254
0
    }
1255
0
    case JsonbType::T_False: {
1256
0
        return rhs->isFalse();
1257
0
    }
1258
0
    case JsonbType::T_Decimal32: {
1259
0
        if (rhs->isDecimal32()) {
1260
0
            return unpack<JsonbDecimal32>()->val() == rhs->unpack<JsonbDecimal32>()->val() &&
1261
0
                   unpack<JsonbDecimal32>()->precision ==
1262
0
                           rhs->unpack<JsonbDecimal32>()->precision &&
1263
0
                   unpack<JsonbDecimal32>()->scale == rhs->unpack<JsonbDecimal32>()->scale;
1264
0
        }
1265
0
        return false;
1266
0
    }
1267
0
    case JsonbType::T_Decimal64: {
1268
0
        if (rhs->isDecimal64()) {
1269
0
            return unpack<JsonbDecimal64>()->val() == rhs->unpack<JsonbDecimal64>()->val() &&
1270
0
                   unpack<JsonbDecimal64>()->precision ==
1271
0
                           rhs->unpack<JsonbDecimal64>()->precision &&
1272
0
                   unpack<JsonbDecimal64>()->scale == rhs->unpack<JsonbDecimal64>()->scale;
1273
0
        }
1274
0
        return false;
1275
0
    }
1276
0
    case JsonbType::T_Decimal128: {
1277
0
        if (rhs->isDecimal128()) {
1278
0
            return unpack<JsonbDecimal128>()->val() == rhs->unpack<JsonbDecimal128>()->val() &&
1279
0
                   unpack<JsonbDecimal128>()->precision ==
1280
0
                           rhs->unpack<JsonbDecimal128>()->precision &&
1281
0
                   unpack<JsonbDecimal128>()->scale == rhs->unpack<JsonbDecimal128>()->scale;
1282
0
        }
1283
0
        return false;
1284
0
    }
1285
0
    case JsonbType::T_Decimal256: {
1286
0
        if (rhs->isDecimal256()) {
1287
0
            return unpack<JsonbDecimal256>()->val() == rhs->unpack<JsonbDecimal256>()->val() &&
1288
0
                   unpack<JsonbDecimal256>()->precision ==
1289
0
                           rhs->unpack<JsonbDecimal256>()->precision &&
1290
0
                   unpack<JsonbDecimal256>()->scale == rhs->unpack<JsonbDecimal256>()->scale;
1291
0
        }
1292
0
        return false;
1293
0
    }
1294
0
    case JsonbType::NUM_TYPES:
1295
0
        break;
1296
3
    }
1297
1298
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}",
1299
0
                    static_cast<int32_t>(type));
1300
3
}
1301
1302
96
inline bool JsonbPath::seek(const char* key_path, size_t kp_len) {
1303
96
    while (kp_len > 0 && std::isspace(key_path[kp_len - 1])) {
1304
0
        --kp_len;
1305
0
    }
1306
1307
    //path invalid
1308
96
    if (!key_path || kp_len == 0) {
1309
0
        return false;
1310
0
    }
1311
96
    Stream stream(key_path, kp_len);
1312
96
    stream.skip_whitespace();
1313
96
    if (stream.exhausted() || stream.read() != SCOPE) {
1314
        //path invalid
1315
0
        return false;
1316
0
    }
1317
1318
186
    while (!stream.exhausted()) {
1319
90
        stream.skip_whitespace();
1320
90
        stream.clear_leg_ptr();
1321
90
        stream.clear_leg_len();
1322
1323
90
        if (!JsonbPath::parsePath(&stream, this)) {
1324
            //path invalid
1325
0
            return false;
1326
0
        }
1327
90
    }
1328
96
    return true;
1329
96
}
1330
1331
90
inline bool JsonbPath::parsePath(Stream* stream, JsonbPath* path) {
1332
    // $[0]
1333
90
    if (stream->peek() == BEGIN_ARRAY) {
1334
58
        return parse_array(stream, path);
1335
58
    }
1336
    // $.a or $.[0]
1337
32
    else if (stream->peek() == BEGIN_MEMBER) {
1338
        // advance past the .
1339
32
        stream->skip(1);
1340
1341
32
        if (stream->exhausted()) {
1342
0
            return false;
1343
0
        }
1344
1345
        // $.[0]
1346
32
        if (stream->peek() == BEGIN_ARRAY) {
1347
0
            return parse_array(stream, path);
1348
0
        }
1349
        // $.a
1350
32
        else {
1351
32
            return parse_member(stream, path);
1352
32
        }
1353
32
    } else if (stream->peek() == WILDCARD) {
1354
0
        stream->skip(1);
1355
0
        if (stream->exhausted()) {
1356
0
            return false;
1357
0
        }
1358
1359
        // $**
1360
0
        if (stream->peek() == WILDCARD) {
1361
0
            path->_is_supper_wildcard = true;
1362
0
        }
1363
1364
0
        stream->skip(1);
1365
0
        if (stream->exhausted()) {
1366
0
            return false;
1367
0
        }
1368
1369
0
        if (stream->peek() == BEGIN_ARRAY) {
1370
0
            return parse_array(stream, path);
1371
0
        } else if (stream->peek() == BEGIN_MEMBER) {
1372
            // advance past the .
1373
0
            stream->skip(1);
1374
1375
0
            if (stream->exhausted()) {
1376
0
                return false;
1377
0
            }
1378
1379
            // $.[0]
1380
0
            if (stream->peek() == BEGIN_ARRAY) {
1381
0
                return parse_array(stream, path);
1382
0
            }
1383
            // $.a
1384
0
            else {
1385
0
                return parse_member(stream, path);
1386
0
            }
1387
0
        }
1388
0
        return false;
1389
0
    } else {
1390
0
        return false; //invalid json path
1391
0
    }
1392
90
}
1393
1394
58
inline bool JsonbPath::parse_array(Stream* stream, JsonbPath* path) {
1395
58
    assert(stream->peek() == BEGIN_ARRAY);
1396
58
    stream->skip(1);
1397
58
    if (stream->exhausted()) {
1398
0
        return false;
1399
0
    }
1400
1401
58
    if (stream->peek() == WILDCARD) {
1402
0
        stream->set_leg_ptr(const_cast<char*>(stream->position()));
1403
0
        stream->add_leg_len();
1404
0
        stream->skip(1);
1405
0
        if (stream->exhausted()) {
1406
0
            return false;
1407
0
        }
1408
1409
0
        if (stream->peek() == END_ARRAY) {
1410
0
            std::unique_ptr<leg_info> leg(
1411
0
                    new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, ARRAY_CODE));
1412
0
            path->add_leg_to_leg_vector(std::move(leg));
1413
0
            stream->skip(1);
1414
0
            path->_is_wildcard = true;
1415
0
            return true;
1416
0
        } else {
1417
0
            return false;
1418
0
        }
1419
0
    }
1420
1421
58
    stream->set_leg_ptr(const_cast<char*>(stream->position()));
1422
1423
116
    for (; !stream->exhausted() && stream->peek() != END_ARRAY; stream->advance()) {
1424
58
        stream->add_leg_len();
1425
58
    }
1426
1427
58
    if (stream->exhausted() || stream->peek() != END_ARRAY) {
1428
0
        return false;
1429
58
    } else {
1430
58
        stream->skip(1);
1431
58
    }
1432
1433
    //parse array index to int
1434
1435
58
    std::string_view idx_string(stream->get_leg_ptr(), stream->get_leg_len());
1436
58
    int index = 0;
1437
1438
58
    if (stream->get_leg_len() >= 4 &&
1439
58
        std::equal(LAST, LAST + 4, stream->get_leg_ptr(),
1440
0
                   [](char c1, char c2) { return std::tolower(c1) == std::tolower(c2); })) {
1441
0
        auto pos = idx_string.find(MINUS);
1442
1443
0
        if (pos != std::string::npos) {
1444
0
            for (size_t i = 4; i < pos; ++i) {
1445
0
                if (std::isspace(idx_string[i])) {
1446
0
                    continue;
1447
0
                } else {
1448
                    // leading zeroes are not allowed
1449
0
                    LOG(WARNING) << "Non-space char in idx_string: '" << idx_string << "'";
1450
0
                    return false;
1451
0
                }
1452
0
            }
1453
0
            idx_string = idx_string.substr(pos + 1);
1454
0
            idx_string = trim(idx_string);
1455
1456
0
            auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(),
1457
0
                                          index);
1458
0
            if (result.ec != std::errc()) {
1459
0
                LOG(WARNING) << "Invalid index in JSON path: '" << idx_string << "'";
1460
0
                return false;
1461
0
            }
1462
1463
0
        } else if (stream->get_leg_len() > 4) {
1464
0
            return false;
1465
0
        }
1466
1467
0
        std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, -index - 1, ARRAY_CODE));
1468
0
        path->add_leg_to_leg_vector(std::move(leg));
1469
1470
0
        return true;
1471
0
    }
1472
1473
58
    auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(), index);
1474
1475
58
    if (result.ec != std::errc()) {
1476
0
        return false;
1477
0
    }
1478
1479
58
    std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, index, ARRAY_CODE));
1480
58
    path->add_leg_to_leg_vector(std::move(leg));
1481
1482
58
    return true;
1483
58
}
1484
1485
32
inline bool JsonbPath::parse_member(Stream* stream, JsonbPath* path) {
1486
32
    if (stream->exhausted()) {
1487
0
        return false;
1488
0
    }
1489
1490
32
    if (stream->peek() == WILDCARD) {
1491
0
        stream->set_leg_ptr(const_cast<char*>(stream->position()));
1492
0
        stream->add_leg_len();
1493
0
        stream->skip(1);
1494
0
        std::unique_ptr<leg_info> leg(
1495
0
                new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE));
1496
0
        path->add_leg_to_leg_vector(std::move(leg));
1497
0
        path->_is_wildcard = true;
1498
0
        return true;
1499
0
    }
1500
1501
32
    stream->set_leg_ptr(const_cast<char*>(stream->position()));
1502
1503
32
    const char* left_quotation_marks = nullptr;
1504
32
    const char* right_quotation_marks = nullptr;
1505
1506
96
    for (; !stream->exhausted(); stream->advance()) {
1507
        // Only accept space characters quoted by double quotes.
1508
64
        if (std::isspace(stream->peek()) && left_quotation_marks == nullptr) {
1509
0
            return false;
1510
64
        } else if (stream->peek() == ESCAPE) {
1511
0
            stream->add_leg_len();
1512
0
            stream->skip(1);
1513
0
            stream->add_leg_len();
1514
0
            stream->set_has_escapes(true);
1515
0
            if (stream->exhausted()) {
1516
0
                return false;
1517
0
            }
1518
0
            continue;
1519
64
        } else if (stream->peek() == DOUBLE_QUOTE) {
1520
0
            if (left_quotation_marks == nullptr) {
1521
0
                left_quotation_marks = stream->position();
1522
0
                stream->set_leg_ptr(const_cast<char*>(++left_quotation_marks));
1523
0
                continue;
1524
0
            } else {
1525
0
                right_quotation_marks = stream->position();
1526
0
                stream->skip(1);
1527
0
                break;
1528
0
            }
1529
64
        } else if (stream->peek() == BEGIN_MEMBER || stream->peek() == BEGIN_ARRAY) {
1530
0
            if (left_quotation_marks == nullptr) {
1531
0
                break;
1532
0
            }
1533
0
        }
1534
1535
64
        stream->add_leg_len();
1536
64
    }
1537
1538
32
    if ((left_quotation_marks != nullptr && right_quotation_marks == nullptr) ||
1539
32
        stream->get_leg_ptr() == nullptr || stream->get_leg_len() == 0) {
1540
0
        return false; //invalid json path
1541
0
    }
1542
1543
32
    if (stream->get_has_escapes()) {
1544
0
        stream->remove_escapes();
1545
0
    }
1546
1547
32
    std::unique_ptr<leg_info> leg(
1548
32
            new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE));
1549
32
    path->add_leg_to_leg_vector(std::move(leg));
1550
1551
32
    return true;
1552
32
}
1553
1554
static_assert(is_pod_v<JsonbDocument>, "JsonbDocument must be standard layout and trivial");
1555
static_assert(is_pod_v<JsonbValue>, "JsonbValue must be standard layout and trivial");
1556
static_assert(is_pod_v<JsonbDecimal32>, "JsonbDecimal32 must be standard layout and trivial");
1557
static_assert(is_pod_v<JsonbDecimal64>, "JsonbDecimal64 must be standard layout and trivial");
1558
static_assert(is_pod_v<JsonbDecimal128>, "JsonbDecimal128 must be standard layout and trivial");
1559
static_assert(is_pod_v<JsonbDecimal256>, "JsonbDecimal256 must be standard layout and trivial");
1560
static_assert(is_pod_v<JsonbInt8Val>, "JsonbInt8Val must be standard layout and trivial");
1561
static_assert(is_pod_v<JsonbInt32Val>, "JsonbInt32Val must be standard layout and trivial");
1562
static_assert(is_pod_v<JsonbInt64Val>, "JsonbInt64Val must be standard layout and trivial");
1563
static_assert(is_pod_v<JsonbInt128Val>, "JsonbInt128Val must be standard layout and trivial");
1564
static_assert(is_pod_v<JsonbDoubleVal>, "JsonbDoubleVal must be standard layout and trivial");
1565
static_assert(is_pod_v<JsonbFloatVal>, "JsonbFloatVal must be standard layout and trivial");
1566
static_assert(is_pod_v<JsonbBinaryVal>, "JsonbBinaryVal must be standard layout and trivial");
1567
static_assert(is_pod_v<ContainerVal>, "ContainerVal must be standard layout and trivial");
1568
1569
#define ASSERT_DECIMAL_LAYOUT(type)                \
1570
    static_assert(offsetof(type, precision) == 0); \
1571
    static_assert(offsetof(type, scale) == 4);     \
1572
    static_assert(offsetof(type, value) == 8);
1573
1574
ASSERT_DECIMAL_LAYOUT(JsonbDecimal32)
1575
ASSERT_DECIMAL_LAYOUT(JsonbDecimal64)
1576
ASSERT_DECIMAL_LAYOUT(JsonbDecimal128)
1577
ASSERT_DECIMAL_LAYOUT(JsonbDecimal256)
1578
1579
#define ASSERT_NUMERIC_LAYOUT(type) static_assert(offsetof(type, num) == 0);
1580
1581
ASSERT_NUMERIC_LAYOUT(JsonbInt8Val)
1582
ASSERT_NUMERIC_LAYOUT(JsonbInt32Val)
1583
ASSERT_NUMERIC_LAYOUT(JsonbInt64Val)
1584
ASSERT_NUMERIC_LAYOUT(JsonbInt128Val)
1585
ASSERT_NUMERIC_LAYOUT(JsonbDoubleVal)
1586
1587
static_assert(offsetof(JsonbBinaryVal, size) == 0);
1588
static_assert(offsetof(JsonbBinaryVal, payload) == 4);
1589
1590
static_assert(offsetof(ContainerVal, size) == 0);
1591
static_assert(offsetof(ContainerVal, payload) == 4);
1592
1593
#pragma pack(pop)
1594
#if defined(__clang__)
1595
#pragma clang diagnostic pop
1596
#endif
1597
} // namespace doris
1598
1599
#endif // JSONB_JSONBDOCUMENT_H