Coverage Report

Created: 2025-09-21 19:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/jsonb_document.h
Line
Count
Source
1
/*
2
 *  Copyright (c) 2014, Facebook, Inc.
3
 *  All rights reserved.
4
 *
5
 *  This source code is licensed under the BSD-style license found in the
6
 *  LICENSE file in the root directory of this source tree. An additional grant
7
 *  of patent rights can be found in the PATENTS file in the same directory.
8
 *
9
 */
10
11
/*
12
 * This header defines JsonbDocument, JsonbKeyValue, and various value classes
13
 * which are derived from JsonbValue, and a forward iterator for container
14
 * values - essentially everything that is related to JSONB binary data
15
 * structures.
16
 *
17
 * Implementation notes:
18
 *
19
 * None of the classes in this header file can be instantiated directly (i.e.
20
 * you cannot create a JsonbKeyValue or JsonbValue object - all constructors
21
 * are declared non-public). We use the classes as wrappers on the packed JSONB
22
 * bytes (serialized), and cast the classes (types) to the underlying packed
23
 * byte array.
24
 *
25
 * For the same reason, we cannot define any JSONB value class to be virtual,
26
 * since we never call constructors, and will not instantiate vtbl and vptrs.
27
 *
28
 * Therefore, the classes are defined as packed structures (i.e. no data
29
 * alignment and padding), and the private member variables of the classes are
30
 * defined precisely in the same order as the JSONB spec. This ensures we
31
 * access the packed JSONB bytes correctly.
32
 *
33
 * The packed structures are highly optimized for in-place operations with low
34
 * overhead. The reads (and in-place writes) are performed directly on packed
35
 * bytes. There is no memory allocation at all at runtime.
36
 *
37
 * For updates/writes of values that will expand the original JSONB size, the
38
 * write will fail, and the caller needs to handle buffer increase.
39
 *
40
 * ** Iterator **
41
 * Both ObjectVal class and ArrayVal class have iterator type that you can use
42
 * to declare an iterator on a container object to go through the key-value
43
 * pairs or value list. The iterator has both non-const and const types.
44
 *
45
 * Note: iterators are forward direction only.
46
 *
47
 * ** Query **
48
 * Querying into containers is through the member functions find (for key/value
49
 * pairs) and get (for array elements), and is in streaming style. We don't
50
 * need to read/scan the whole JSONB packed bytes in order to return results.
51
 * Once the key/index is found, we will stop search.  You can use text to query
52
 * both objects and array (for array, text will be converted to integer index),
53
 * and use index to retrieve from array. Array index is 0-based.
54
 *
55
 * ** External dictionary **
56
 * During query processing, you can also pass a call-back function, so the
57
 * search will first try to check if the key string exists in the dictionary.
58
 * If so, search will be based on the id instead of the key string.
59
 * @author Tian Xia <tianx@fb.com>
60
 * 
61
 * this file is copied from 
62
 * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonDocument.h
63
 * and modified by Doris
64
 */
65
66
#ifndef JSONB_JSONBDOCUMENT_H
67
#define JSONB_JSONBDOCUMENT_H
68
69
#include <algorithm>
70
#include <cctype>
71
#include <charconv>
72
#include <cstddef>
73
#include <cstdint>
74
#include <string>
75
#include <string_view>
76
#include <type_traits>
77
78
#include "common/compiler_util.h" // IWYU pragma: keep
79
#include "common/status.h"
80
#include "runtime/define_primitive_type.h"
81
#include "util/string_util.h"
82
#include "vec/core/types.h"
83
84
// #include "util/string_parser.hpp"
85
86
// Concept to check for supported decimal types
87
template <typename T>
88
concept JsonbDecimalType = std::same_as<T, doris::vectorized::Decimal256> ||
89
                           std::same_as<T, doris::vectorized::Decimal64> ||
90
                           std::same_as<T, doris::vectorized::Decimal128V3> ||
91
                           std::same_as<T, doris::vectorized::Decimal32>;
92
93
namespace doris {
94
95
template <typename T>
96
constexpr bool is_pod_v = std::is_trivial_v<T> && std::is_standard_layout_v<T>;
97
98
struct JsonbStringVal;
99
struct ObjectVal;
100
struct ArrayVal;
101
struct JsonbBinaryVal;
102
struct ContainerVal;
103
104
template <JsonbDecimalType T>
105
struct JsonbDecimalVal;
106
107
using JsonbDecimal256 = JsonbDecimalVal<vectorized::Decimal256>;
108
using JsonbDecimal128 = JsonbDecimalVal<vectorized::Decimal128V3>;
109
using JsonbDecimal64 = JsonbDecimalVal<vectorized::Decimal64>;
110
using JsonbDecimal32 = JsonbDecimalVal<vectorized::Decimal32>;
111
112
template <typename T>
113
    requires std::is_integral_v<T> || std::is_floating_point_v<T>
114
struct NumberValT;
115
116
using JsonbInt8Val = NumberValT<int8_t>;
117
using JsonbInt16Val = NumberValT<int16_t>;
118
using JsonbInt32Val = NumberValT<int32_t>;
119
using JsonbInt64Val = NumberValT<int64_t>;
120
using JsonbInt128Val = NumberValT<int128_t>;
121
using JsonbDoubleVal = NumberValT<double>;
122
using JsonbFloatVal = NumberValT<float>;
123
124
template <typename T>
125
concept JsonbPodType = (std::same_as<T, JsonbStringVal> || std::same_as<T, ObjectVal> ||
126
                        std::same_as<T, ContainerVal> || std::same_as<T, ArrayVal> ||
127
                        std::same_as<T, JsonbBinaryVal> || std::same_as<T, JsonbDecimal32> ||
128
                        std::same_as<T, JsonbDecimal64> || std::same_as<T, JsonbDecimal128> ||
129
                        std::same_as<T, JsonbDecimal256> || std::same_as<T, JsonbDecimal32> ||
130
                        std::same_as<T, JsonbInt8Val> || std::same_as<T, JsonbInt16Val> ||
131
                        std::same_as<T, JsonbInt32Val> || std::same_as<T, JsonbInt64Val> ||
132
                        std::same_as<T, JsonbInt128Val> || std::same_as<T, JsonbFloatVal> ||
133
                        std::same_as<T, JsonbFloatVal> || std::same_as<T, JsonbDoubleVal>);
134
135
208k
#define JSONB_VER 1
136
137
using int128_t = __int128;
138
139
// forward declaration
140
struct JsonbValue;
141
142
class JsonbOutStream;
143
144
template <class OS_TYPE>
145
class JsonbWriterT;
146
147
using JsonbWriter = JsonbWriterT<JsonbOutStream>;
148
149
const int MaxNestingLevel = 100;
150
151
/*
152
 * JsonbType defines 10 primitive types and 2 container types, as described
153
 * below.
154
 * NOTE: Do NOT modify the existing values or their order in this enum.
155
 *      You may only append new entries at the end before `NUM_TYPES`.
156
 *      This enum will be used in serialized data and/or persisted data.
157
 *      Changing existing values may break backward compatibility
158
 *      with previously stored or transmitted data.
159
 *
160
 * primitive_value ::=
161
 *   0x00        //null value (0 byte)
162
 * | 0x01        //boolean true (0 byte)
163
 * | 0x02        //boolean false (0 byte)
164
 * | 0x03 int8   //char/int8 (1 byte)
165
 * | 0x04 int16  //int16 (2 bytes)
166
 * | 0x05 int32  //int32 (4 bytes)
167
 * | 0x06 int64  //int64 (8 bytes)
168
 * | 0x07 double //floating point (8 bytes)
169
 * | 0x08 string //variable length string
170
 * | 0x09 binary //variable length binary
171
 *
172
 * container ::=
173
 *   0x0A int32 key_value_list //object, int32 is the total bytes of the object
174
 * | 0x0B int32 value_list     //array, int32 is the total bytes of the array
175
 */
176
enum class JsonbType : char {
177
    T_Null = 0x00,
178
    T_True = 0x01,
179
    T_False = 0x02,
180
    T_Int8 = 0x03,
181
    T_Int16 = 0x04,
182
    T_Int32 = 0x05,
183
    T_Int64 = 0x06,
184
    T_Double = 0x07,
185
    T_String = 0x08,
186
    T_Binary = 0x09,
187
    T_Object = 0x0A,
188
    T_Array = 0x0B,
189
    T_Int128 = 0x0C,
190
    T_Float = 0x0D,
191
    T_Decimal32 = 0x0E,  // DecimalV3 only
192
    T_Decimal64 = 0x0F,  // DecimalV3 only
193
    T_Decimal128 = 0x10, // DecimalV3 only
194
    T_Decimal256 = 0x11, // DecimalV3 only
195
    NUM_TYPES,
196
};
197
198
11
inline PrimitiveType get_primitive_type_from_json_type(JsonbType json_type) {
199
11
    switch (json_type) {
200
1
    case JsonbType::T_Null:
201
1
        return TYPE_NULL;
202
1
    case JsonbType::T_True:
203
2
    case JsonbType::T_False:
204
2
        return TYPE_BOOLEAN;
205
0
    case JsonbType::T_Int8:
206
0
        return TYPE_TINYINT;
207
0
    case JsonbType::T_Int16:
208
0
        return TYPE_SMALLINT;
209
0
    case JsonbType::T_Int32:
210
0
        return TYPE_INT;
211
0
    case JsonbType::T_Int64:
212
0
        return TYPE_BIGINT;
213
0
    case JsonbType::T_Double:
214
0
        return TYPE_DOUBLE;
215
1
    case JsonbType::T_String:
216
1
        return TYPE_STRING;
217
0
    case JsonbType::T_Binary:
218
0
        return TYPE_BINARY;
219
0
    case JsonbType::T_Object:
220
0
        return TYPE_STRUCT;
221
1
    case JsonbType::T_Array:
222
1
        return TYPE_ARRAY;
223
1
    case JsonbType::T_Int128:
224
1
        return TYPE_LARGEINT;
225
1
    case JsonbType::T_Float:
226
1
        return TYPE_FLOAT;
227
1
    case JsonbType::T_Decimal32:
228
1
        return TYPE_DECIMAL32;
229
1
    case JsonbType::T_Decimal64:
230
1
        return TYPE_DECIMAL64;
231
1
    case JsonbType::T_Decimal128:
232
1
        return TYPE_DECIMAL128I;
233
1
    case JsonbType::T_Decimal256:
234
1
        return TYPE_DECIMAL256;
235
0
    default:
236
0
        throw Exception(ErrorCode::INTERNAL_ERROR, "Unsupported JsonbType: {}",
237
0
                        static_cast<int>(json_type));
238
11
    }
239
11
}
240
241
//for parse json path
242
constexpr char SCOPE = '$';
243
constexpr char BEGIN_MEMBER = '.';
244
constexpr char BEGIN_ARRAY = '[';
245
constexpr char END_ARRAY = ']';
246
constexpr char DOUBLE_QUOTE = '"';
247
constexpr char WILDCARD = '*';
248
constexpr char MINUS = '-';
249
constexpr char LAST[] = "last";
250
constexpr char ESCAPE = '\\';
251
constexpr unsigned int MEMBER_CODE = 0;
252
constexpr unsigned int ARRAY_CODE = 1;
253
254
/// A simple input stream class for the JSON path parser.
255
class Stream {
256
public:
257
    /// Creates an input stream reading from a character string.
258
    /// @param string  the input string
259
    /// @param length  the length of the input string
260
96
    Stream(const char* string, size_t length) : m_position(string), m_end(string + length) {}
261
262
    /// Returns a pointer to the current position in the stream.
263
90
    const char* position() const { return m_position; }
264
265
    /// Returns a pointer to the position just after the end of the stream.
266
0
    const char* end() const { return m_end; }
267
268
    /// Returns the number of bytes remaining in the stream.
269
1.71k
    size_t remaining() const {
270
1.71k
        assert(m_position <= m_end);
271
1.71k
        return m_end - m_position;
272
1.71k
    }
273
274
    /// Tells if the stream has been exhausted.
275
1.56k
    bool exhausted() const { return remaining() == 0; }
276
277
    /// Reads the next byte from the stream and moves the position forward.
278
96
    char read() {
279
96
        assert(!exhausted());
280
96
        return *m_position++;
281
96
    }
282
283
    /// Reads the next byte from the stream without moving the position forward.
284
796
    char peek() const {
285
796
        assert(!exhausted());
286
796
        return *m_position;
287
796
    }
288
289
    /// Moves the position to the next non-whitespace character.
290
334
    void skip_whitespace() {
291
334
        m_position = std::find_if_not(m_position, m_end, [](char c) { return std::isspace(c); });
292
334
    }
293
294
    /// Moves the position n bytes forward.
295
148
    void skip(size_t n) {
296
148
        assert(remaining() >= n);
297
148
        m_position += n;
298
148
        skip_whitespace();
299
148
    }
300
301
122
    void advance() { m_position++; }
302
303
180
    void clear_leg_ptr() { leg_ptr = nullptr; }
304
305
90
    void set_leg_ptr(char* ptr) {
306
90
        clear_leg_ptr();
307
90
        leg_ptr = ptr;
308
90
    }
309
310
122
    char* get_leg_ptr() { return leg_ptr; }
311
312
90
    void clear_leg_len() { leg_len = 0; }
313
314
122
    void add_leg_len() { leg_len++; }
315
316
180
    unsigned int get_leg_len() const { return leg_len; }
317
318
0
    void remove_escapes() {
319
0
        int new_len = 0;
320
0
        for (int i = 0; i < leg_len; i++) {
321
0
            if (leg_ptr[i] != '\\') {
322
0
                leg_ptr[new_len++] = leg_ptr[i];
323
0
            }
324
0
        }
325
0
        leg_ptr[new_len] = '\0';
326
0
        leg_len = new_len;
327
0
    }
328
329
0
    void set_has_escapes(bool has) { has_escapes = has; }
330
331
32
    bool get_has_escapes() const { return has_escapes; }
332
333
private:
334
    /// The current position in the stream.
335
    const char* m_position = nullptr;
336
337
    /// The end of the stream.
338
    const char* const m_end;
339
340
    ///path leg ptr
341
    char* leg_ptr = nullptr;
342
343
    ///path leg len
344
    unsigned int leg_len;
345
346
    ///Whether to contain escape characters
347
    bool has_escapes = false;
348
};
349
350
struct leg_info {
351
    ///path leg ptr
352
    char* leg_ptr = nullptr;
353
354
    ///path leg len
355
    unsigned int leg_len;
356
357
    ///array_index
358
    int array_index;
359
360
    ///type: 0 is member 1 is array
361
    unsigned int type;
362
363
0
    bool to_string(std::string* str) const {
364
0
        if (type == MEMBER_CODE) {
365
0
            str->push_back(BEGIN_MEMBER);
366
0
            bool contains_space = false;
367
0
            std::string tmp;
368
0
            for (auto* it = leg_ptr; it != (leg_ptr + leg_len); ++it) {
369
0
                if (std::isspace(*it)) {
370
0
                    contains_space = true;
371
0
                } else if (*it == '"' || *it == ESCAPE || *it == '\r' || *it == '\n' ||
372
0
                           *it == '\b' || *it == '\t') {
373
0
                    tmp.push_back(ESCAPE);
374
0
                }
375
0
                tmp.push_back(*it);
376
0
            }
377
0
            if (contains_space) {
378
0
                str->push_back(DOUBLE_QUOTE);
379
0
            }
380
0
            str->append(tmp);
381
0
            if (contains_space) {
382
0
                str->push_back(DOUBLE_QUOTE);
383
0
            }
384
0
            return true;
385
0
        } else if (type == ARRAY_CODE) {
386
0
            str->push_back(BEGIN_ARRAY);
387
0
            std::string int_str = std::to_string(array_index);
388
0
            str->append(int_str);
389
0
            str->push_back(END_ARRAY);
390
0
            return true;
391
0
        } else {
392
0
            return false;
393
0
        }
394
0
    }
395
};
396
397
class JsonbPath {
398
public:
399
    // parse json path
400
    static bool parsePath(Stream* stream, JsonbPath* path);
401
402
    static bool parse_array(Stream* stream, JsonbPath* path);
403
    static bool parse_member(Stream* stream, JsonbPath* path);
404
405
    //return true if json path valid else return false
406
    bool seek(const char* string, size_t length);
407
408
90
    void add_leg_to_leg_vector(std::unique_ptr<leg_info> leg) {
409
90
        leg_vector.emplace_back(leg.release());
410
90
    }
411
412
0
    void pop_leg_from_leg_vector() { leg_vector.pop_back(); }
413
414
0
    bool to_string(std::string* res) const {
415
0
        res->push_back(SCOPE);
416
0
        for (const auto& leg : leg_vector) {
417
0
            auto valid = leg->to_string(res);
418
0
            if (!valid) {
419
0
                return false;
420
0
            }
421
0
        }
422
0
        return true;
423
0
    }
424
425
185
    size_t get_leg_vector_size() const { return leg_vector.size(); }
426
427
295
    leg_info* get_leg_from_leg_vector(size_t i) const { return leg_vector[i].get(); }
428
429
0
    bool is_wildcard() const { return _is_wildcard; }
430
95
    bool is_supper_wildcard() const { return _is_supper_wildcard; }
431
432
6
    void clean() { leg_vector.clear(); }
433
434
private:
435
    std::vector<std::unique_ptr<leg_info>> leg_vector;
436
    bool _is_wildcard = false;        // whether the path is a wildcard path
437
    bool _is_supper_wildcard = false; // supper wildcard likes '$**.a' or '$**[1]'
438
};
439
440
/*
441
 * JsonbFwdIteratorT implements JSONB's iterator template.
442
 *
443
 * Note: it is an FORWARD iterator only due to the design of JSONB format.
444
 */
445
template <class Iter_Type, class Cont_Type>
446
class JsonbFwdIteratorT {
447
public:
448
    using iterator = Iter_Type;
449
    using pointer = typename std::iterator_traits<Iter_Type>::pointer;
450
    using reference = typename std::iterator_traits<Iter_Type>::reference;
451
452
    explicit JsonbFwdIteratorT() : current_(nullptr) {}
453
20.2k
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
_ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEC2ERKS3_
Line
Count
Source
453
20.0k
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEC2ERKS3_
Line
Count
Source
453
135
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
_ZN5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEC2ERKS2_
Line
Count
Source
453
50
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
454
455
    // allow non-const to const iterator conversion (same container type)
456
    template <class Iter_Ty>
457
    JsonbFwdIteratorT(const JsonbFwdIteratorT<Iter_Ty, Cont_Type>& rhs) : current_(rhs.base()) {}
458
459
20.5k
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
_ZNK5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEeqERKS4_
Line
Count
Source
459
25
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEeqERKS5_
Line
Count
Source
459
19.0k
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEeqERKS5_
Line
Count
Source
459
1.43k
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
460
461
19.9k
    bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEneERKS5_
Line
Count
Source
461
18.5k
    bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEneERKS5_
Line
Count
Source
461
1.37k
    bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }
462
463
1.14k
    bool operator<(const JsonbFwdIteratorT& rhs) const { return (current_ < rhs.current_); }
464
465
    bool operator>(const JsonbFwdIteratorT& rhs) const { return !operator<(rhs); }
466
467
18.1k
    JsonbFwdIteratorT& operator++() {
468
18.1k
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
469
18.1k
        return *this;
470
18.1k
    }
_ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEppEv
Line
Count
Source
467
17.4k
    JsonbFwdIteratorT& operator++() {
468
17.4k
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
469
17.4k
        return *this;
470
17.4k
    }
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEppEv
Line
Count
Source
467
653
    JsonbFwdIteratorT& operator++() {
468
653
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
469
653
        return *this;
470
653
    }
471
472
    JsonbFwdIteratorT operator++(int) {
473
        auto tmp = *this;
474
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
475
        return tmp;
476
    }
477
478
653
    explicit operator pointer() { return current_; }
479
480
0
    reference operator*() const { return *current_; }
Unexecuted instantiation: _ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEdeEv
Unexecuted instantiation: _ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEdeEv
481
482
28.6k
    pointer operator->() const { return current_; }
_ZNK5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEptEv
Line
Count
Source
482
23
    pointer operator->() const { return current_; }
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEptEv
Line
Count
Source
482
28.6k
    pointer operator->() const { return current_; }
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEptEv
Line
Count
Source
482
3
    pointer operator->() const { return current_; }
483
484
0
    iterator base() const { return current_; }
485
486
private:
487
    iterator current_;
488
};
489
490
using hDictInsert = int (*)(const char*, unsigned int);
491
using hDictFind = int (*)(const char*, unsigned int);
492
493
using JsonbTypeUnder = std::underlying_type_t<JsonbType>;
494
495
#if defined(__clang__)
496
#pragma clang diagnostic push
497
#pragma clang diagnostic ignored "-Wzero-length-array"
498
#endif
499
#pragma pack(push, 1)
500
501
/*
502
 * JsonbDocument is the main object that accesses and queries JSONB packed
503
 * bytes. NOTE: JsonbDocument only allows object container as the top level
504
 * JSONB value. However, you can use the static method "createValue" to get any
505
 * JsonbValue object from the packed bytes.
506
 *
507
 * JsonbDocument object also dereferences to an object container value
508
 * (ObjectVal) once JSONB is loaded.
509
 *
510
 * ** Load **
511
 * JsonbDocument is usable after loading packed bytes (memory location) into
512
 * the object. We only need the header and first few bytes of the payload after
513
 * header to verify the JSONB.
514
 *
515
 * Note: creating an JsonbDocument (through createDocument) does not allocate
516
 * any memory. The document object is an efficient wrapper on the packed bytes
517
 * which is accessed directly.
518
 *
519
 * ** Query **
520
 * Query is through dereferencing into ObjectVal.
521
 */
522
class JsonbDocument {
523
public:
524
    // create an JsonbDocument object from JSONB packed bytes
525
    [[nodiscard]] static Status checkAndCreateDocument(const char* pb, size_t size,
526
                                                       JsonbDocument** doc);
527
528
    // create an JsonbValue from JSONB packed bytes
529
    static JsonbValue* createValue(const char* pb, size_t size);
530
531
0
    uint8_t version() const { return header_.ver_; }
532
533
26.6k
    JsonbValue* getValue() { return ((JsonbValue*)payload_); }
534
535
    void setValue(const JsonbValue* value);
536
537
    unsigned int numPackedBytes() const;
538
539
    // ObjectVal* operator->();
540
541
    const ObjectVal* operator->() const;
542
543
private:
544
    /*
545
   * JsonbHeader class defines JSONB header (internal to JsonbDocument).
546
   *
547
   * Currently it only contains version information (1-byte). We may expand the
548
   * header to include checksum of the JSONB binary for more security.
549
   */
550
    struct JsonbHeader {
551
        uint8_t ver_;
552
    } header_;
553
554
    char payload_[0];
555
};
556
557
/*
558
 * JsonbKeyValue class defines JSONB key type, as described below.
559
 *
560
 * key ::=
561
 *   0x00 int8    //1-byte dictionary id
562
 * | int8 (byte*) //int8 (>0) is the size of the key string
563
 *
564
 * value ::= primitive_value | container
565
 *
566
 * JsonbKeyValue can be either an id mapping to the key string in an external
567
 * dictionary, or it is the original key string. Whether to read an id or a
568
 * string is decided by the first byte (size).
569
 *
570
 * Note: a key object must be followed by a value object. Therefore, a key
571
 * object implicitly refers to a key-value pair, and you can get the value
572
 * object right after the key object. The function numPackedBytes hence
573
 * indicates the total size of the key-value pair, so that we will be able go
574
 * to next pair from the key.
575
 *
576
 * ** Dictionary size **
577
 * By default, the dictionary size is 255 (1-byte). Users can define
578
 * "USE_LARGE_DICT" to increase the dictionary size to 655535 (2-byte).
579
 */
580
class JsonbKeyValue {
581
public:
582
    // now we use sMaxKeyId to represent an empty key
583
    static const int sMaxKeyId = 65535;
584
    using keyid_type = uint16_t;
585
586
    static const uint8_t sMaxKeyLen = 64;
587
588
    // size of the key. 0 indicates it is stored as id
589
1.30k
    uint8_t klen() const { return size; }
590
591
    // get the key string. Note the string may not be null terminated.
592
663
    const char* getKeyStr() const { return key.str_; }
593
594
9.22k
    keyid_type getKeyId() const { return key.id_; }
595
596
34.9k
    unsigned int keyPackedBytes() const {
597
34.9k
        return size ? (sizeof(size) + size) : (sizeof(size) + sizeof(keyid_type));
598
34.9k
    }
599
600
17.4k
    JsonbValue* value() const { return (JsonbValue*)(((char*)this) + keyPackedBytes()); }
601
602
    // size of the total packed bytes (key+value)
603
    unsigned int numPackedBytes() const;
604
605
    uint8_t size;
606
607
    union key_ {
608
        keyid_type id_;
609
        char str_[1];
610
    } key;
611
};
612
613
struct JsonbFindResult {
614
    const JsonbValue* value = nullptr;   // found value
615
    std::unique_ptr<JsonbWriter> writer; // writer to write the value
616
    bool is_wildcard = false;            // whether the path is a wildcard path
617
};
618
619
/*
620
 * JsonbValue is the base class of all JSONB types. It contains only one member
621
 * variable - type info, which can be retrieved by member functions is[Type]()
622
 * or type().
623
 */
624
struct JsonbValue {
625
    static const uint32_t sMaxValueLen = 1 << 24; // 16M
626
627
4.22k
    bool isNull() const { return (type == JsonbType::T_Null); }
628
21
    bool isTrue() const { return (type == JsonbType::T_True); }
629
1
    bool isFalse() const { return (type == JsonbType::T_False); }
630
4
    bool isInt() const { return isInt8() || isInt16() || isInt32() || isInt64() || isInt128(); }
631
4
    bool isInt8() const { return (type == JsonbType::T_Int8); }
632
2
    bool isInt16() const { return (type == JsonbType::T_Int16); }
633
0
    bool isInt32() const { return (type == JsonbType::T_Int32); }
634
3
    bool isInt64() const { return (type == JsonbType::T_Int64); }
635
1
    bool isDouble() const { return (type == JsonbType::T_Double); }
636
1
    bool isFloat() const { return (type == JsonbType::T_Float); }
637
33
    bool isString() const { return (type == JsonbType::T_String); }
638
1.07k
    bool isBinary() const { return (type == JsonbType::T_Binary); }
639
6
    bool isObject() const { return (type == JsonbType::T_Object); }
640
7
    bool isArray() const { return (type == JsonbType::T_Array); }
641
3
    bool isInt128() const { return (type == JsonbType::T_Int128); }
642
4
    bool isDecimal() const {
643
4
        return (type == JsonbType::T_Decimal32 || type == JsonbType::T_Decimal64 ||
644
4
                type == JsonbType::T_Decimal128 || type == JsonbType::T_Decimal256);
645
4
    }
646
1
    bool isDecimal32() const { return (type == JsonbType::T_Decimal32); }
647
1
    bool isDecimal64() const { return (type == JsonbType::T_Decimal64); }
648
1
    bool isDecimal128() const { return (type == JsonbType::T_Decimal128); }
649
1
    bool isDecimal256() const { return (type == JsonbType::T_Decimal256); }
650
651
11
    PrimitiveType get_primitive_type() const { return get_primitive_type_from_json_type(type); }
652
653
0
    const char* typeName() const {
654
0
        switch (type) {
655
0
        case JsonbType::T_Null:
656
0
            return "null";
657
0
        case JsonbType::T_True:
658
0
        case JsonbType::T_False:
659
0
            return "bool";
660
0
        case JsonbType::T_Int8:
661
0
        case JsonbType::T_Int16:
662
0
        case JsonbType::T_Int32:
663
0
            return "int";
664
0
        case JsonbType::T_Int64:
665
0
            return "bigint";
666
0
        case JsonbType::T_Int128:
667
0
            return "largeint";
668
0
        case JsonbType::T_Double:
669
0
            return "double";
670
0
        case JsonbType::T_Float:
671
0
            return "float";
672
0
        case JsonbType::T_String:
673
0
            return "string";
674
0
        case JsonbType::T_Binary:
675
0
            return "binary";
676
0
        case JsonbType::T_Object:
677
0
            return "object";
678
0
        case JsonbType::T_Array:
679
0
            return "array";
680
0
        case JsonbType::T_Decimal32:
681
0
            return "Decimal32";
682
0
        case JsonbType::T_Decimal64:
683
0
            return "Decimal64";
684
0
        case JsonbType::T_Decimal128:
685
0
            return "Decimal128";
686
0
        case JsonbType::T_Decimal256:
687
0
            return "Decimal256";
688
0
        default:
689
0
            return "unknown";
690
0
        }
691
0
    }
692
693
    // size of the total packed bytes
694
    unsigned int numPackedBytes() const;
695
696
    // size of the value in bytes
697
    unsigned int size() const;
698
699
    //Get the number of jsonbvalue elements
700
    int numElements() const;
701
702
    //Whether to include the jsonbvalue rhs
703
    bool contains(JsonbValue* rhs) const;
704
705
    // find the JSONB value by JsonbPath
706
    JsonbFindResult findValue(JsonbPath& path) const;
707
    friend class JsonbDocument;
708
709
    JsonbType type; // type info
710
711
    char payload[0]; // payload, which is the packed bytes of the value
712
713
    /**
714
    * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`.
715
    *
716
    * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept.
717
    *           This ensures that `T` is trivially copyable, standard-layout, and safe to
718
    *           reinterpret from raw bytes without invoking undefined behavior.
719
    *
720
    * @return A pointer to a `const T` object, interpreted from the internal buffer.
721
    *
722
    * @note The caller must ensure that the current JsonbValue actually contains data
723
    *       compatible with type `T`, otherwise the result is undefined.
724
    */
725
    template <JsonbPodType T>
726
52.0k
    const T* unpack() const {
727
52.0k
        static_assert(is_pod_v<T>, "T must be a POD type");
728
52.0k
        return reinterpret_cast<const T*>(payload);
729
52.0k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_9ObjectValEEEPKT_v
Line
Count
Source
726
19.5k
    const T* unpack() const {
727
19.5k
        static_assert(is_pod_v<T>, "T must be a POD type");
728
19.5k
        return reinterpret_cast<const T*>(payload);
729
19.5k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIaEEEEPKT_v
Line
Count
Source
726
720
    const T* unpack() const {
727
720
        static_assert(is_pod_v<T>, "T must be a POD type");
728
720
        return reinterpret_cast<const T*>(payload);
729
720
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIsEEEEPKT_v
Line
Count
Source
726
103
    const T* unpack() const {
727
103
        static_assert(is_pod_v<T>, "T must be a POD type");
728
103
        return reinterpret_cast<const T*>(payload);
729
103
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIiEEEEPKT_v
Line
Count
Source
726
3.47k
    const T* unpack() const {
727
3.47k
        static_assert(is_pod_v<T>, "T must be a POD type");
728
3.47k
        return reinterpret_cast<const T*>(payload);
729
3.47k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIlEEEEPKT_v
Line
Count
Source
726
1.87k
    const T* unpack() const {
727
1.87k
        static_assert(is_pod_v<T>, "T must be a POD type");
728
1.87k
        return reinterpret_cast<const T*>(payload);
729
1.87k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTInEEEEPKT_v
Line
Count
Source
726
4.16k
    const T* unpack() const {
727
4.16k
        static_assert(is_pod_v<T>, "T must be a POD type");
728
4.16k
        return reinterpret_cast<const T*>(payload);
729
4.16k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_14JsonbBinaryValEEEPKT_v
Line
Count
Source
726
19.6k
    const T* unpack() const {
727
19.6k
        static_assert(is_pod_v<T>, "T must be a POD type");
728
19.6k
        return reinterpret_cast<const T*>(payload);
729
19.6k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_12ContainerValEEEPKT_v
Line
Count
Source
726
1.84k
    const T* unpack() const {
727
1.84k
        static_assert(is_pod_v<T>, "T must be a POD type");
728
1.84k
        return reinterpret_cast<const T*>(payload);
729
1.84k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_8ArrayValEEEPKT_v
Line
Count
Source
726
101
    const T* unpack() const {
727
101
        static_assert(is_pod_v<T>, "T must be a POD type");
728
101
        return reinterpret_cast<const T*>(payload);
729
101
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIdEEEEPKT_v
Line
Count
Source
726
169
    const T* unpack() const {
727
169
        static_assert(is_pod_v<T>, "T must be a POD type");
728
169
        return reinterpret_cast<const T*>(payload);
729
169
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIfEEEEPKT_v
Line
Count
Source
726
25
    const T* unpack() const {
727
25
        static_assert(is_pod_v<T>, "T must be a POD type");
728
25
        return reinterpret_cast<const T*>(payload);
729
25
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_14JsonbStringValEEEPKT_v
Line
Count
Source
726
265
    const T* unpack() const {
727
265
        static_assert(is_pod_v<T>, "T must be a POD type");
728
265
        return reinterpret_cast<const T*>(payload);
729
265
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_10vectorized7DecimalIiEEEEEEPKT_v
Line
Count
Source
726
13
    const T* unpack() const {
727
13
        static_assert(is_pod_v<T>, "T must be a POD type");
728
13
        return reinterpret_cast<const T*>(payload);
729
13
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_10vectorized7DecimalIlEEEEEEPKT_v
Line
Count
Source
726
13
    const T* unpack() const {
727
13
        static_assert(is_pod_v<T>, "T must be a POD type");
728
13
        return reinterpret_cast<const T*>(payload);
729
13
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_10vectorized12Decimal128V3EEEEEPKT_v
Line
Count
Source
726
17
    const T* unpack() const {
727
17
        static_assert(is_pod_v<T>, "T must be a POD type");
728
17
        return reinterpret_cast<const T*>(payload);
729
17
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_10vectorized7DecimalIN4wide7integerILm256EiEEEEEEEEPKT_v
Line
Count
Source
726
13
    const T* unpack() const {
727
13
        static_assert(is_pod_v<T>, "T must be a POD type");
728
13
        return reinterpret_cast<const T*>(payload);
729
13
    }
730
731
    // /**
732
    // * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`.
733
    // *
734
    // * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept.
735
    // *           This ensures that `T` is trivially copyable, standard-layout, and safe to
736
    // *           reinterpret from raw bytes without invoking undefined behavior.
737
    // *
738
    // * @return A pointer to a `T` object, interpreted from the internal buffer.
739
    // *
740
    // * @note The caller must ensure that the current JsonbValue actually contains data
741
    // *       compatible with type `T`, otherwise the result is undefined.
742
    // */
743
    // template <JsonbPodType T>
744
    // T* unpack() {
745
    //     static_assert(is_pod_v<T>, "T must be a POD type");
746
    //     return reinterpret_cast<T*>(payload);
747
    // }
748
749
    int128_t int_val() const;
750
};
751
752
// inline ObjectVal* JsonbDocument::operator->() {
753
//     return (((JsonbValue*)payload_)->unpack<ObjectVal>());
754
// }
755
756
19.0k
inline const ObjectVal* JsonbDocument::operator->() const {
757
19.0k
    return (((JsonbValue*)payload_)->unpack<ObjectVal>());
758
19.0k
}
759
760
/*
761
 * NumerValT is the template class (derived from JsonbValue) of all number
762
 * types (integers and double).
763
 */
764
template <typename T>
765
    requires std::is_integral_v<T> || std::is_floating_point_v<T>
766
struct NumberValT {
767
public:
768
10.5k
    T val() const { return num; }
_ZNK5doris10NumberValTIaE3valEv
Line
Count
Source
768
720
    T val() const { return num; }
_ZNK5doris10NumberValTIsE3valEv
Line
Count
Source
768
103
    T val() const { return num; }
_ZNK5doris10NumberValTIiE3valEv
Line
Count
Source
768
3.47k
    T val() const { return num; }
_ZNK5doris10NumberValTIlE3valEv
Line
Count
Source
768
1.87k
    T val() const { return num; }
_ZNK5doris10NumberValTInE3valEv
Line
Count
Source
768
4.16k
    T val() const { return num; }
_ZNK5doris10NumberValTIdE3valEv
Line
Count
Source
768
169
    T val() const { return num; }
_ZNK5doris10NumberValTIfE3valEv
Line
Count
Source
768
25
    T val() const { return num; }
769
770
    static unsigned int numPackedBytes() { return sizeof(JsonbValue) + sizeof(T); }
771
772
    T num;
773
};
774
775
9
inline int128_t JsonbValue::int_val() const {
776
9
    switch (type) {
777
3
    case JsonbType::T_Int8:
778
3
        return unpack<JsonbInt8Val>()->val();
779
1
    case JsonbType::T_Int16:
780
1
        return unpack<JsonbInt16Val>()->val();
781
0
    case JsonbType::T_Int32:
782
0
        return unpack<JsonbInt32Val>()->val();
783
3
    case JsonbType::T_Int64:
784
3
        return unpack<JsonbInt64Val>()->val();
785
2
    case JsonbType::T_Int128:
786
2
        return unpack<JsonbInt128Val>()->val();
787
0
    default:
788
0
        throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}",
789
0
                        static_cast<int32_t>(type));
790
9
    }
791
9
}
792
793
template <JsonbDecimalType T>
794
struct JsonbDecimalVal {
795
public:
796
    using NativeType = typename T::NativeType;
797
798
    // get the decimal value
799
26
    NativeType val() const {
800
        // to avoid memory alignment issues, we use memcpy to copy the value
801
26
        NativeType tmp;
802
26
        memcpy(&tmp, &value, sizeof(NativeType));
803
26
        return tmp;
804
26
    }
_ZNK5doris15JsonbDecimalValINS_10vectorized7DecimalIiEEE3valEv
Line
Count
Source
799
6
    NativeType val() const {
800
        // to avoid memory alignment issues, we use memcpy to copy the value
801
6
        NativeType tmp;
802
6
        memcpy(&tmp, &value, sizeof(NativeType));
803
6
        return tmp;
804
6
    }
_ZNK5doris15JsonbDecimalValINS_10vectorized7DecimalIlEEE3valEv
Line
Count
Source
799
6
    NativeType val() const {
800
        // to avoid memory alignment issues, we use memcpy to copy the value
801
6
        NativeType tmp;
802
6
        memcpy(&tmp, &value, sizeof(NativeType));
803
6
        return tmp;
804
6
    }
_ZNK5doris15JsonbDecimalValINS_10vectorized12Decimal128V3EE3valEv
Line
Count
Source
799
8
    NativeType val() const {
800
        // to avoid memory alignment issues, we use memcpy to copy the value
801
8
        NativeType tmp;
802
8
        memcpy(&tmp, &value, sizeof(NativeType));
803
8
        return tmp;
804
8
    }
_ZNK5doris15JsonbDecimalValINS_10vectorized7DecimalIN4wide7integerILm256EiEEEEE3valEv
Line
Count
Source
799
6
    NativeType val() const {
800
        // to avoid memory alignment issues, we use memcpy to copy the value
801
6
        NativeType tmp;
802
6
        memcpy(&tmp, &value, sizeof(NativeType));
803
6
        return tmp;
804
6
    }
805
806
29
    static constexpr int numPackedBytes() {
807
29
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
808
29
    }
_ZN5doris15JsonbDecimalValINS_10vectorized7DecimalIiEEE14numPackedBytesEv
Line
Count
Source
806
7
    static constexpr int numPackedBytes() {
807
7
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
808
7
    }
_ZN5doris15JsonbDecimalValINS_10vectorized7DecimalIlEEE14numPackedBytesEv
Line
Count
Source
806
7
    static constexpr int numPackedBytes() {
807
7
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
808
7
    }
_ZN5doris15JsonbDecimalValINS_10vectorized12Decimal128V3EE14numPackedBytesEv
Line
Count
Source
806
9
    static constexpr int numPackedBytes() {
807
9
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
808
9
    }
_ZN5doris15JsonbDecimalValINS_10vectorized7DecimalIN4wide7integerILm256EiEEEEE14numPackedBytesEv
Line
Count
Source
806
6
    static constexpr int numPackedBytes() {
807
6
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
808
6
    }
809
810
    uint32_t precision;
811
    uint32_t scale;
812
    NativeType value;
813
};
814
815
/*
816
 * BlobVal is the base class (derived from JsonbValue) for string and binary
817
 * types. The size indicates the total bytes of the payload.
818
 */
819
struct JsonbBinaryVal {
820
public:
821
    // size of the blob payload only
822
2.11k
    unsigned int getBlobLen() const { return size; }
823
824
    // return the blob as byte array
825
4.51k
    const char* getBlob() const { return payload; }
826
827
    // size of the total packed bytes
828
15.3k
    unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; }
829
    friend class JsonbDocument;
830
831
    uint32_t size;
832
    char payload[0];
833
};
834
835
/*
836
 * String type
837
 * Note: JSONB string may not be a c-string (NULL-terminated)
838
 */
839
struct JsonbStringVal : public JsonbBinaryVal {
840
public:
841
    /*
842
    This function return the actual size of a string. Since for
843
    a string, it can be null-terminated with null paddings or it
844
    can take all the space in the payload without null in the end.
845
    So we need to check it to get the true actual length of a string.
846
  */
847
136
    size_t length() const {
848
        // It's an empty string
849
136
        if (0 == size) {
850
0
            return size;
851
0
        }
852
        // The string stored takes all the spaces in payload
853
136
        if (payload[size - 1] != 0) {
854
136
            return size;
855
136
        }
856
        // It's shorter than the size of payload
857
0
        return strnlen(payload, size);
858
136
    }
859
    // convert the string (case insensitive) to a boolean value
860
    // "false": 0
861
    // "true": 1
862
    // all other strings: -1
863
0
    int getBoolVal() {
864
0
        if (size == 4 && tolower(payload[0]) == 't' && tolower(payload[1]) == 'r' &&
865
0
            tolower(payload[2]) == 'u' && tolower(payload[3]) == 'e') {
866
0
            return 1;
867
0
        } else if (size == 5 && tolower(payload[0]) == 'f' && tolower(payload[1]) == 'a' &&
868
0
                   tolower(payload[2]) == 'l' && tolower(payload[3]) == 's' &&
869
0
                   tolower(payload[4]) == 'e') {
870
0
            return 0;
871
0
        } else {
872
0
            return -1;
873
0
        }
874
0
    }
875
};
876
877
/*
878
 * ContainerVal is the base class (derived from JsonbValue) for object and
879
 * array types. The size indicates the total bytes of the payload.
880
 */
881
struct ContainerVal {
882
    // size of the container payload only
883
0
    unsigned int getContainerSize() const { return size; }
884
885
    // return the container payload as byte array
886
0
    const char* getPayload() const { return payload; }
887
888
    // size of the total packed bytes
889
1.84k
    unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; }
890
    friend class JsonbDocument;
891
892
    uint32_t size;
893
    char payload[0];
894
};
895
896
/*
897
 * Object type
898
 */
899
struct ObjectVal : public ContainerVal {
900
    using value_type = JsonbKeyValue;
901
    using pointer = value_type*;
902
    using const_pointer = const value_type*;
903
    using iterator = JsonbFwdIteratorT<pointer, ObjectVal>;
904
    using const_iterator = JsonbFwdIteratorT<const_pointer, ObjectVal>;
905
906
0
    const_iterator search(const char* key, hDictFind handler = nullptr) const {
907
0
        return const_cast<ObjectVal*>(this)->search(key, handler);
908
0
    }
909
910
0
    const_iterator search(const char* key, unsigned int klen, hDictFind handler = nullptr) const {
911
0
        return const_cast<ObjectVal*>(this)->search(key, klen, handler);
912
0
    }
913
914
0
    const_iterator search(int key_id) const { return const_cast<ObjectVal*>(this)->search(key_id); }
915
0
    iterator search(const char* key, hDictFind handler = nullptr) {
916
0
        if (!key) {
917
0
            return end();
918
0
        }
919
0
        return search(key, (unsigned int)strlen(key), handler);
920
0
    }
921
922
25
    iterator search(const char* key, unsigned int klen, hDictFind handler = nullptr) {
923
25
        if (!key || !klen) {
924
0
            return end();
925
0
        }
926
927
25
        int key_id = -1;
928
25
        if (handler && (key_id = handler(key, klen)) >= 0) {
929
0
            return search(key_id);
930
0
        }
931
25
        return internalSearch(key, klen);
932
25
    }
933
934
0
    iterator search(int key_id) {
935
0
        if (key_id < 0 || key_id > JsonbKeyValue::sMaxKeyId) {
936
0
            return end();
937
0
        }
938
939
0
        const char* pch = payload;
940
0
        const char* fence = payload + size;
941
942
0
        while (pch < fence) {
943
0
            auto* pkey = (JsonbKeyValue*)(pch);
944
0
            if (!pkey->klen() && key_id == pkey->getKeyId()) {
945
0
                return iterator(pkey);
946
0
            }
947
0
            pch += pkey->numPackedBytes();
948
0
        }
949
950
0
        assert(pch == fence);
951
0
        return end();
952
0
    }
953
954
    // Get number of elements in object
955
6
    int numElem() const {
956
6
        const char* pch = payload;
957
6
        const char* fence = payload + size;
958
959
6
        unsigned int num = 0;
960
28
        while (pch < fence) {
961
22
            auto* pkey = (JsonbKeyValue*)(pch);
962
22
            ++num;
963
22
            pch += pkey->numPackedBytes();
964
22
        }
965
966
6
        assert(pch == fence);
967
968
6
        return num;
969
6
    }
970
971
0
    JsonbKeyValue* getJsonbKeyValue(unsigned int i) const {
972
0
        const char* pch = payload;
973
0
        const char* fence = payload + size;
974
975
0
        unsigned int num = 0;
976
0
        while (pch < fence) {
977
0
            auto* pkey = (JsonbKeyValue*)(pch);
978
0
            if (num == i) {
979
0
                return pkey;
980
0
            }
981
0
            ++num;
982
0
            pch += pkey->numPackedBytes();
983
0
        }
984
985
0
        assert(pch == fence);
986
987
0
        return nullptr;
988
0
    }
989
990
0
    JsonbValue* find(const char* key, hDictFind handler = nullptr) const {
991
0
        return const_cast<ObjectVal*>(this)->find(key, handler);
992
0
    }
993
994
25
    JsonbValue* find(const char* key, unsigned int klen, hDictFind handler = nullptr) const {
995
25
        return const_cast<ObjectVal*>(this)->find(key, klen, handler);
996
25
    }
997
0
    JsonbValue* find(int key_id) const { return const_cast<ObjectVal*>(this)->find(key_id); }
998
999
    // find the JSONB value by a key string (null terminated)
1000
0
    JsonbValue* find(const char* key, hDictFind handler = nullptr) {
1001
0
        if (!key) {
1002
0
            return nullptr;
1003
0
        }
1004
0
        return find(key, (unsigned int)strlen(key), handler);
1005
0
    }
1006
1007
    // find the JSONB value by a key string (with length)
1008
25
    JsonbValue* find(const char* key, unsigned int klen, hDictFind handler = nullptr) {
1009
25
        iterator kv = search(key, klen, handler);
1010
25
        if (end() == kv) {
1011
2
            return nullptr;
1012
2
        }
1013
23
        return kv->value();
1014
25
    }
1015
1016
    // find the JSONB value by a key dictionary ID
1017
0
    JsonbValue* find(int key_id) {
1018
0
        iterator kv = search(key_id);
1019
0
        if (end() == kv) {
1020
0
            return nullptr;
1021
0
        }
1022
0
        return kv->value();
1023
0
    }
1024
1025
0
    iterator begin() { return iterator((pointer)payload); }
1026
1027
1.64k
    const_iterator begin() const { return const_iterator((pointer)payload); }
1028
1029
27
    iterator end() { return iterator((pointer)(payload + size)); }
1030
1031
18.4k
    const_iterator end() const { return const_iterator((pointer)(payload + size)); }
1032
1033
private:
1034
25
    iterator internalSearch(const char* key, unsigned int klen) {
1035
25
        const char* pch = payload;
1036
25
        const char* fence = payload + size;
1037
1038
35
        while (pch < fence) {
1039
33
            auto* pkey = (JsonbKeyValue*)(pch);
1040
33
            if (klen == pkey->klen() && strncmp(key, pkey->getKeyStr(), klen) == 0) {
1041
23
                return iterator(pkey);
1042
23
            }
1043
10
            pch += pkey->numPackedBytes();
1044
10
        }
1045
1046
25
        assert(pch == fence);
1047
1048
2
        return end();
1049
2
    }
1050
};
1051
1052
/*
1053
 * Array type
1054
 */
1055
struct ArrayVal : public ContainerVal {
1056
    using value_type = JsonbValue;
1057
    using pointer = value_type*;
1058
    using const_pointer = const value_type*;
1059
    using iterator = JsonbFwdIteratorT<pointer, ArrayVal>;
1060
    using const_iterator = JsonbFwdIteratorT<const_pointer, ArrayVal>;
1061
1062
    // get the JSONB value at index
1063
31
    JsonbValue* get(int idx) const {
1064
31
        if (idx < 0) {
1065
0
            return nullptr;
1066
0
        }
1067
1068
31
        const char* pch = payload;
1069
31
        const char* fence = payload + size;
1070
1071
72
        while (pch < fence && idx-- > 0) {
1072
41
            pch += ((JsonbValue*)pch)->numPackedBytes();
1073
41
        }
1074
31
        if (idx > 0 || pch == fence) {
1075
7
            return nullptr;
1076
7
        }
1077
1078
24
        return (JsonbValue*)pch;
1079
31
    }
1080
1081
    // Get number of elements in array
1082
8
    int numElem() const {
1083
8
        const char* pch = payload;
1084
8
        const char* fence = payload + size;
1085
1086
8
        unsigned int num = 0;
1087
29
        while (pch < fence) {
1088
21
            ++num;
1089
21
            pch += ((JsonbValue*)pch)->numPackedBytes();
1090
21
        }
1091
1092
8
        assert(pch == fence);
1093
1094
8
        return num;
1095
8
    }
1096
1097
0
    iterator begin() { return iterator((pointer)payload); }
1098
1099
68
    const_iterator begin() const { return const_iterator((pointer)payload); }
1100
1101
0
    iterator end() { return iterator((pointer)(payload + size)); }
1102
1103
67
    const_iterator end() const { return const_iterator((pointer)(payload + size)); }
1104
};
1105
1106
inline Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t size,
1107
27.6k
                                                    JsonbDocument** doc) {
1108
27.6k
    *doc = nullptr;
1109
27.6k
    if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
1110
0
        return Status::InvalidArgument("Invalid JSONB document: too small size({}) or null pointer",
1111
0
                                       size);
1112
0
    }
1113
1114
27.6k
    auto* doc_ptr = (JsonbDocument*)pb;
1115
27.6k
    if (doc_ptr->header_.ver_ != JSONB_VER) {
1116
2
        return Status::InvalidArgument("Invalid JSONB document: invalid version({})",
1117
2
                                       doc_ptr->header_.ver_);
1118
2
    }
1119
1120
27.6k
    auto* val = (JsonbValue*)doc_ptr->payload_;
1121
27.6k
    if (val->type < JsonbType::T_Null || val->type >= JsonbType::NUM_TYPES ||
1122
27.6k
        size != sizeof(JsonbHeader) + val->numPackedBytes()) {
1123
0
        return Status::InvalidArgument("Invalid JSONB document: invalid type({}) or size({})",
1124
0
                                       static_cast<JsonbTypeUnder>(val->type), size);
1125
0
    }
1126
1127
27.6k
    *doc = doc_ptr;
1128
27.6k
    return Status::OK();
1129
27.6k
}
1130
0
inline void JsonbDocument::setValue(const JsonbValue* value) {
1131
0
    memcpy(payload_, value, value->numPackedBytes());
1132
0
}
1133
1134
12
inline JsonbValue* JsonbDocument::createValue(const char* pb, size_t size) {
1135
12
    if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
1136
0
        return nullptr;
1137
0
    }
1138
1139
12
    auto* doc = (JsonbDocument*)pb;
1140
12
    if (doc->header_.ver_ != JSONB_VER) {
1141
0
        return nullptr;
1142
0
    }
1143
1144
12
    auto* val = (JsonbValue*)doc->payload_;
1145
12
    if (size != sizeof(JsonbHeader) + val->numPackedBytes()) {
1146
0
        return nullptr;
1147
0
    }
1148
1149
12
    return val;
1150
12
}
1151
1152
0
inline unsigned int JsonbDocument::numPackedBytes() const {
1153
0
    return ((const JsonbValue*)payload_)->numPackedBytes() + sizeof(header_);
1154
0
}
1155
1156
17.4k
inline unsigned int JsonbKeyValue::numPackedBytes() const {
1157
17.4k
    unsigned int ks = keyPackedBytes();
1158
17.4k
    auto* val = (JsonbValue*)(((char*)this) + ks);
1159
17.4k
    return ks + val->numPackedBytes();
1160
17.4k
}
1161
1162
// Poor man's "virtual" function JsonbValue::numPackedBytes
1163
71.8k
inline unsigned int JsonbValue::numPackedBytes() const {
1164
71.8k
    switch (type) {
1165
2.74k
    case JsonbType::T_Null:
1166
2.82k
    case JsonbType::T_True:
1167
2.86k
    case JsonbType::T_False: {
1168
2.86k
        return sizeof(type);
1169
2.82k
    }
1170
1171
1.66k
    case JsonbType::T_Int8: {
1172
1.66k
        return sizeof(type) + sizeof(int8_t);
1173
2.82k
    }
1174
117
    case JsonbType::T_Int16: {
1175
117
        return sizeof(type) + sizeof(int16_t);
1176
2.82k
    }
1177
3.49k
    case JsonbType::T_Int32: {
1178
3.49k
        return sizeof(type) + sizeof(int32_t);
1179
2.82k
    }
1180
21.4k
    case JsonbType::T_Int64: {
1181
21.4k
        return sizeof(type) + sizeof(int64_t);
1182
2.82k
    }
1183
10.6k
    case JsonbType::T_Double: {
1184
10.6k
        return sizeof(type) + sizeof(double);
1185
2.82k
    }
1186
26
    case JsonbType::T_Float: {
1187
26
        return sizeof(type) + sizeof(float);
1188
2.82k
    }
1189
14.4k
    case JsonbType::T_Int128: {
1190
14.4k
        return sizeof(type) + sizeof(int128_t);
1191
2.82k
    }
1192
10.9k
    case JsonbType::T_String:
1193
15.3k
    case JsonbType::T_Binary: {
1194
15.3k
        return unpack<JsonbBinaryVal>()->numPackedBytes();
1195
10.9k
    }
1196
1197
1.69k
    case JsonbType::T_Object:
1198
1.84k
    case JsonbType::T_Array: {
1199
1.84k
        return unpack<ContainerVal>()->numPackedBytes();
1200
1.69k
    }
1201
7
    case JsonbType::T_Decimal32: {
1202
7
        return JsonbDecimal32::numPackedBytes();
1203
1.69k
    }
1204
7
    case JsonbType::T_Decimal64: {
1205
7
        return JsonbDecimal64::numPackedBytes();
1206
1.69k
    }
1207
9
    case JsonbType::T_Decimal128: {
1208
9
        return JsonbDecimal128::numPackedBytes();
1209
1.69k
    }
1210
6
    case JsonbType::T_Decimal256: {
1211
6
        return JsonbDecimal256::numPackedBytes();
1212
1.69k
    }
1213
0
    case JsonbType::NUM_TYPES:
1214
0
        break;
1215
71.8k
    }
1216
1217
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}",
1218
0
                    static_cast<int32_t>(type));
1219
71.8k
}
1220
1221
6
inline int JsonbValue::numElements() const {
1222
6
    switch (type) {
1223
0
    case JsonbType::T_Int8:
1224
0
    case JsonbType::T_Int16:
1225
0
    case JsonbType::T_Int32:
1226
0
    case JsonbType::T_Int64:
1227
0
    case JsonbType::T_Double:
1228
0
    case JsonbType::T_Float:
1229
0
    case JsonbType::T_Int128:
1230
1
    case JsonbType::T_String:
1231
1
    case JsonbType::T_Binary:
1232
2
    case JsonbType::T_Null:
1233
2
    case JsonbType::T_True:
1234
2
    case JsonbType::T_False:
1235
2
    case JsonbType::T_Decimal32:
1236
2
    case JsonbType::T_Decimal64:
1237
2
    case JsonbType::T_Decimal128:
1238
2
    case JsonbType::T_Decimal256: {
1239
2
        return 1;
1240
2
    }
1241
0
    case JsonbType::T_Object: {
1242
0
        return unpack<ObjectVal>()->numElem();
1243
2
    }
1244
4
    case JsonbType::T_Array: {
1245
4
        return unpack<ArrayVal>()->numElem();
1246
2
    }
1247
0
    case JsonbType::NUM_TYPES:
1248
0
        break;
1249
6
    }
1250
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}",
1251
0
                    static_cast<int32_t>(type));
1252
6
}
1253
1254
3
inline bool JsonbValue::contains(JsonbValue* rhs) const {
1255
3
    switch (type) {
1256
1
    case JsonbType::T_Int8:
1257
1
    case JsonbType::T_Int16:
1258
1
    case JsonbType::T_Int32:
1259
1
    case JsonbType::T_Int64:
1260
1
    case JsonbType::T_Int128: {
1261
1
        return rhs->isInt() && this->int_val() == rhs->int_val();
1262
1
    }
1263
0
    case JsonbType::T_Double:
1264
0
    case JsonbType::T_Float: {
1265
0
        if (!rhs->isDouble() && !rhs->isFloat()) {
1266
0
            return false;
1267
0
        }
1268
0
        double left = isDouble() ? unpack<JsonbDoubleVal>()->val() : unpack<JsonbFloatVal>()->val();
1269
0
        double right = rhs->isDouble() ? rhs->unpack<JsonbDoubleVal>()->val()
1270
0
                                       : rhs->unpack<JsonbFloatVal>()->val();
1271
0
        return left == right;
1272
0
    }
1273
1
    case JsonbType::T_String:
1274
1
    case JsonbType::T_Binary: {
1275
1
        if (rhs->isString() || rhs->isBinary()) {
1276
1
            const auto* str_value1 = unpack<JsonbStringVal>();
1277
1
            const auto* str_value2 = rhs->unpack<JsonbStringVal>();
1278
1
            return str_value1->length() == str_value2->length() &&
1279
1
                   std::memcmp(str_value1->getBlob(), str_value2->getBlob(),
1280
1
                               str_value1->length()) == 0;
1281
1
        }
1282
0
        return false;
1283
1
    }
1284
1
    case JsonbType::T_Array: {
1285
1
        int lhs_num = unpack<ArrayVal>()->numElem();
1286
1
        if (rhs->isArray()) {
1287
0
            int rhs_num = rhs->unpack<ArrayVal>()->numElem();
1288
0
            if (rhs_num > lhs_num) {
1289
0
                return false;
1290
0
            }
1291
0
            int contains_num = 0;
1292
0
            for (int i = 0; i < lhs_num; ++i) {
1293
0
                for (int j = 0; j < rhs_num; ++j) {
1294
0
                    if (unpack<ArrayVal>()->get(i)->contains(rhs->unpack<ArrayVal>()->get(j))) {
1295
0
                        contains_num++;
1296
0
                        break;
1297
0
                    }
1298
0
                }
1299
0
            }
1300
0
            return contains_num == rhs_num;
1301
0
        }
1302
1
        for (int i = 0; i < lhs_num; ++i) {
1303
1
            if (unpack<ArrayVal>()->get(i)->contains(rhs)) {
1304
1
                return true;
1305
1
            }
1306
1
        }
1307
0
        return false;
1308
1
    }
1309
0
    case JsonbType::T_Object: {
1310
0
        if (rhs->isObject()) {
1311
0
            const auto* obj_value1 = unpack<ObjectVal>();
1312
0
            const auto* obj_value2 = rhs->unpack<ObjectVal>();
1313
0
            for (int i = 0; i < obj_value2->numElem(); ++i) {
1314
0
                JsonbKeyValue* key = obj_value2->getJsonbKeyValue(i);
1315
0
                JsonbValue* value = obj_value1->find(key->getKeyStr(), key->klen());
1316
0
                if (value == nullptr || !value->contains(key->value())) {
1317
0
                    return false;
1318
0
                }
1319
0
            }
1320
0
            return true;
1321
0
        }
1322
0
        return false;
1323
0
    }
1324
0
    case JsonbType::T_Null: {
1325
0
        return rhs->isNull();
1326
0
    }
1327
0
    case JsonbType::T_True: {
1328
0
        return rhs->isTrue();
1329
0
    }
1330
0
    case JsonbType::T_False: {
1331
0
        return rhs->isFalse();
1332
0
    }
1333
0
    case JsonbType::T_Decimal32: {
1334
0
        if (rhs->isDecimal32()) {
1335
0
            return unpack<JsonbDecimal32>()->val() == rhs->unpack<JsonbDecimal32>()->val() &&
1336
0
                   unpack<JsonbDecimal32>()->precision ==
1337
0
                           rhs->unpack<JsonbDecimal32>()->precision &&
1338
0
                   unpack<JsonbDecimal32>()->scale == rhs->unpack<JsonbDecimal32>()->scale;
1339
0
        }
1340
0
        return false;
1341
0
    }
1342
0
    case JsonbType::T_Decimal64: {
1343
0
        if (rhs->isDecimal64()) {
1344
0
            return unpack<JsonbDecimal64>()->val() == rhs->unpack<JsonbDecimal64>()->val() &&
1345
0
                   unpack<JsonbDecimal64>()->precision ==
1346
0
                           rhs->unpack<JsonbDecimal64>()->precision &&
1347
0
                   unpack<JsonbDecimal64>()->scale == rhs->unpack<JsonbDecimal64>()->scale;
1348
0
        }
1349
0
        return false;
1350
0
    }
1351
0
    case JsonbType::T_Decimal128: {
1352
0
        if (rhs->isDecimal128()) {
1353
0
            return unpack<JsonbDecimal128>()->val() == rhs->unpack<JsonbDecimal128>()->val() &&
1354
0
                   unpack<JsonbDecimal128>()->precision ==
1355
0
                           rhs->unpack<JsonbDecimal128>()->precision &&
1356
0
                   unpack<JsonbDecimal128>()->scale == rhs->unpack<JsonbDecimal128>()->scale;
1357
0
        }
1358
0
        return false;
1359
0
    }
1360
0
    case JsonbType::T_Decimal256: {
1361
0
        if (rhs->isDecimal256()) {
1362
0
            return unpack<JsonbDecimal256>()->val() == rhs->unpack<JsonbDecimal256>()->val() &&
1363
0
                   unpack<JsonbDecimal256>()->precision ==
1364
0
                           rhs->unpack<JsonbDecimal256>()->precision &&
1365
0
                   unpack<JsonbDecimal256>()->scale == rhs->unpack<JsonbDecimal256>()->scale;
1366
0
        }
1367
0
        return false;
1368
0
    }
1369
0
    case JsonbType::NUM_TYPES:
1370
0
        break;
1371
3
    }
1372
1373
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}",
1374
0
                    static_cast<int32_t>(type));
1375
3
}
1376
1377
96
inline bool JsonbPath::seek(const char* key_path, size_t kp_len) {
1378
96
    while (kp_len > 0 && std::isspace(key_path[kp_len - 1])) {
1379
0
        --kp_len;
1380
0
    }
1381
1382
    //path invalid
1383
96
    if (!key_path || kp_len == 0) {
1384
0
        return false;
1385
0
    }
1386
96
    Stream stream(key_path, kp_len);
1387
96
    stream.skip_whitespace();
1388
96
    if (stream.exhausted() || stream.read() != SCOPE) {
1389
        //path invalid
1390
0
        return false;
1391
0
    }
1392
1393
186
    while (!stream.exhausted()) {
1394
90
        stream.skip_whitespace();
1395
90
        stream.clear_leg_ptr();
1396
90
        stream.clear_leg_len();
1397
1398
90
        if (!JsonbPath::parsePath(&stream, this)) {
1399
            //path invalid
1400
0
            return false;
1401
0
        }
1402
90
    }
1403
96
    return true;
1404
96
}
1405
1406
90
inline bool JsonbPath::parsePath(Stream* stream, JsonbPath* path) {
1407
    // $[0]
1408
90
    if (stream->peek() == BEGIN_ARRAY) {
1409
58
        return parse_array(stream, path);
1410
58
    }
1411
    // $.a or $.[0]
1412
32
    else if (stream->peek() == BEGIN_MEMBER) {
1413
        // advance past the .
1414
32
        stream->skip(1);
1415
1416
32
        if (stream->exhausted()) {
1417
0
            return false;
1418
0
        }
1419
1420
        // $.[0]
1421
32
        if (stream->peek() == BEGIN_ARRAY) {
1422
0
            return parse_array(stream, path);
1423
0
        }
1424
        // $.a
1425
32
        else {
1426
32
            return parse_member(stream, path);
1427
32
        }
1428
32
    } else if (stream->peek() == WILDCARD) {
1429
0
        stream->skip(1);
1430
0
        if (stream->exhausted()) {
1431
0
            return false;
1432
0
        }
1433
1434
        // $**
1435
0
        if (stream->peek() == WILDCARD) {
1436
0
            path->_is_supper_wildcard = true;
1437
0
        }
1438
1439
0
        stream->skip(1);
1440
0
        if (stream->exhausted()) {
1441
0
            return false;
1442
0
        }
1443
1444
0
        if (stream->peek() == BEGIN_ARRAY) {
1445
0
            return parse_array(stream, path);
1446
0
        } else if (stream->peek() == BEGIN_MEMBER) {
1447
            // advance past the .
1448
0
            stream->skip(1);
1449
1450
0
            if (stream->exhausted()) {
1451
0
                return false;
1452
0
            }
1453
1454
            // $.[0]
1455
0
            if (stream->peek() == BEGIN_ARRAY) {
1456
0
                return parse_array(stream, path);
1457
0
            }
1458
            // $.a
1459
0
            else {
1460
0
                return parse_member(stream, path);
1461
0
            }
1462
0
        }
1463
0
        return false;
1464
0
    } else {
1465
0
        return false; //invalid json path
1466
0
    }
1467
90
}
1468
1469
58
inline bool JsonbPath::parse_array(Stream* stream, JsonbPath* path) {
1470
58
    assert(stream->peek() == BEGIN_ARRAY);
1471
58
    stream->skip(1);
1472
58
    if (stream->exhausted()) {
1473
0
        return false;
1474
0
    }
1475
1476
58
    if (stream->peek() == WILDCARD) {
1477
0
        stream->set_leg_ptr(const_cast<char*>(stream->position()));
1478
0
        stream->add_leg_len();
1479
0
        stream->skip(1);
1480
0
        if (stream->exhausted()) {
1481
0
            return false;
1482
0
        }
1483
1484
0
        if (stream->peek() == END_ARRAY) {
1485
0
            std::unique_ptr<leg_info> leg(
1486
0
                    new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, ARRAY_CODE));
1487
0
            path->add_leg_to_leg_vector(std::move(leg));
1488
0
            stream->skip(1);
1489
0
            path->_is_wildcard = true;
1490
0
            return true;
1491
0
        } else {
1492
0
            return false;
1493
0
        }
1494
0
    }
1495
1496
58
    stream->set_leg_ptr(const_cast<char*>(stream->position()));
1497
1498
116
    for (; !stream->exhausted() && stream->peek() != END_ARRAY; stream->advance()) {
1499
58
        stream->add_leg_len();
1500
58
    }
1501
1502
58
    if (stream->exhausted() || stream->peek() != END_ARRAY) {
1503
0
        return false;
1504
58
    } else {
1505
58
        stream->skip(1);
1506
58
    }
1507
1508
    //parse array index to int
1509
1510
58
    std::string_view idx_string(stream->get_leg_ptr(), stream->get_leg_len());
1511
58
    int index = 0;
1512
1513
58
    if (stream->get_leg_len() >= 4 &&
1514
58
        std::equal(LAST, LAST + 4, stream->get_leg_ptr(),
1515
0
                   [](char c1, char c2) { return std::tolower(c1) == std::tolower(c2); })) {
1516
0
        auto pos = idx_string.find(MINUS);
1517
1518
0
        if (pos != std::string::npos) {
1519
0
            for (size_t i = 4; i < pos; ++i) {
1520
0
                if (std::isspace(idx_string[i])) {
1521
0
                    continue;
1522
0
                } else {
1523
                    // leading zeroes are not allowed
1524
0
                    LOG(WARNING) << "Non-space char in idx_string: '" << idx_string << "'";
1525
0
                    return false;
1526
0
                }
1527
0
            }
1528
0
            idx_string = idx_string.substr(pos + 1);
1529
0
            idx_string = trim(idx_string);
1530
1531
0
            auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(),
1532
0
                                          index);
1533
0
            if (result.ec != std::errc()) {
1534
0
                LOG(WARNING) << "Invalid index in JSON path: '" << idx_string << "'";
1535
0
                return false;
1536
0
            }
1537
1538
0
        } else if (stream->get_leg_len() > 4) {
1539
0
            return false;
1540
0
        }
1541
1542
0
        std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, -index - 1, ARRAY_CODE));
1543
0
        path->add_leg_to_leg_vector(std::move(leg));
1544
1545
0
        return true;
1546
0
    }
1547
1548
58
    auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(), index);
1549
1550
58
    if (result.ec != std::errc()) {
1551
0
        return false;
1552
0
    }
1553
1554
58
    std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, index, ARRAY_CODE));
1555
58
    path->add_leg_to_leg_vector(std::move(leg));
1556
1557
58
    return true;
1558
58
}
1559
1560
32
inline bool JsonbPath::parse_member(Stream* stream, JsonbPath* path) {
1561
32
    if (stream->exhausted()) {
1562
0
        return false;
1563
0
    }
1564
1565
32
    if (stream->peek() == WILDCARD) {
1566
0
        stream->set_leg_ptr(const_cast<char*>(stream->position()));
1567
0
        stream->add_leg_len();
1568
0
        stream->skip(1);
1569
0
        std::unique_ptr<leg_info> leg(
1570
0
                new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE));
1571
0
        path->add_leg_to_leg_vector(std::move(leg));
1572
0
        path->_is_wildcard = true;
1573
0
        return true;
1574
0
    }
1575
1576
32
    stream->set_leg_ptr(const_cast<char*>(stream->position()));
1577
1578
32
    const char* left_quotation_marks = nullptr;
1579
32
    const char* right_quotation_marks = nullptr;
1580
1581
96
    for (; !stream->exhausted(); stream->advance()) {
1582
        // Only accept space characters quoted by double quotes.
1583
64
        if (std::isspace(stream->peek()) && left_quotation_marks == nullptr) {
1584
0
            return false;
1585
64
        } else if (stream->peek() == ESCAPE) {
1586
0
            stream->add_leg_len();
1587
0
            stream->skip(1);
1588
0
            stream->add_leg_len();
1589
0
            stream->set_has_escapes(true);
1590
0
            if (stream->exhausted()) {
1591
0
                return false;
1592
0
            }
1593
0
            continue;
1594
64
        } else if (stream->peek() == DOUBLE_QUOTE) {
1595
0
            if (left_quotation_marks == nullptr) {
1596
0
                left_quotation_marks = stream->position();
1597
0
                stream->set_leg_ptr(const_cast<char*>(++left_quotation_marks));
1598
0
                continue;
1599
0
            } else {
1600
0
                right_quotation_marks = stream->position();
1601
0
                stream->skip(1);
1602
0
                break;
1603
0
            }
1604
64
        } else if (stream->peek() == BEGIN_MEMBER || stream->peek() == BEGIN_ARRAY) {
1605
0
            if (left_quotation_marks == nullptr) {
1606
0
                break;
1607
0
            }
1608
0
        }
1609
1610
64
        stream->add_leg_len();
1611
64
    }
1612
1613
32
    if ((left_quotation_marks != nullptr && right_quotation_marks == nullptr) ||
1614
32
        stream->get_leg_ptr() == nullptr || stream->get_leg_len() == 0) {
1615
0
        return false; //invalid json path
1616
0
    }
1617
1618
32
    if (stream->get_has_escapes()) {
1619
0
        stream->remove_escapes();
1620
0
    }
1621
1622
32
    std::unique_ptr<leg_info> leg(
1623
32
            new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE));
1624
32
    path->add_leg_to_leg_vector(std::move(leg));
1625
1626
32
    return true;
1627
32
}
1628
1629
static_assert(is_pod_v<JsonbDocument>, "JsonbDocument must be standard layout and trivial");
1630
static_assert(is_pod_v<JsonbValue>, "JsonbValue must be standard layout and trivial");
1631
static_assert(is_pod_v<JsonbDecimal32>, "JsonbDecimal32 must be standard layout and trivial");
1632
static_assert(is_pod_v<JsonbDecimal64>, "JsonbDecimal64 must be standard layout and trivial");
1633
static_assert(is_pod_v<JsonbDecimal128>, "JsonbDecimal128 must be standard layout and trivial");
1634
static_assert(is_pod_v<JsonbDecimal256>, "JsonbDecimal256 must be standard layout and trivial");
1635
static_assert(is_pod_v<JsonbInt8Val>, "JsonbInt8Val must be standard layout and trivial");
1636
static_assert(is_pod_v<JsonbInt32Val>, "JsonbInt32Val must be standard layout and trivial");
1637
static_assert(is_pod_v<JsonbInt64Val>, "JsonbInt64Val must be standard layout and trivial");
1638
static_assert(is_pod_v<JsonbInt128Val>, "JsonbInt128Val must be standard layout and trivial");
1639
static_assert(is_pod_v<JsonbDoubleVal>, "JsonbDoubleVal must be standard layout and trivial");
1640
static_assert(is_pod_v<JsonbFloatVal>, "JsonbFloatVal must be standard layout and trivial");
1641
static_assert(is_pod_v<JsonbBinaryVal>, "JsonbBinaryVal must be standard layout and trivial");
1642
static_assert(is_pod_v<ContainerVal>, "ContainerVal must be standard layout and trivial");
1643
1644
#define ASSERT_DECIMAL_LAYOUT(type)                \
1645
    static_assert(offsetof(type, precision) == 0); \
1646
    static_assert(offsetof(type, scale) == 4);     \
1647
    static_assert(offsetof(type, value) == 8);
1648
1649
ASSERT_DECIMAL_LAYOUT(JsonbDecimal32)
1650
ASSERT_DECIMAL_LAYOUT(JsonbDecimal64)
1651
ASSERT_DECIMAL_LAYOUT(JsonbDecimal128)
1652
ASSERT_DECIMAL_LAYOUT(JsonbDecimal256)
1653
1654
#define ASSERT_NUMERIC_LAYOUT(type) static_assert(offsetof(type, num) == 0);
1655
1656
ASSERT_NUMERIC_LAYOUT(JsonbInt8Val)
1657
ASSERT_NUMERIC_LAYOUT(JsonbInt32Val)
1658
ASSERT_NUMERIC_LAYOUT(JsonbInt64Val)
1659
ASSERT_NUMERIC_LAYOUT(JsonbInt128Val)
1660
ASSERT_NUMERIC_LAYOUT(JsonbDoubleVal)
1661
1662
static_assert(offsetof(JsonbBinaryVal, size) == 0);
1663
static_assert(offsetof(JsonbBinaryVal, payload) == 4);
1664
1665
static_assert(offsetof(ContainerVal, size) == 0);
1666
static_assert(offsetof(ContainerVal, payload) == 4);
1667
1668
#pragma pack(pop)
1669
#if defined(__clang__)
1670
#pragma clang diagnostic pop
1671
#endif
1672
} // namespace doris
1673
1674
#endif // JSONB_JSONBDOCUMENT_H