Coverage Report

Created: 2026-05-22 15:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/jsonb_document.h
Line
Count
Source
1
/*
2
 *  Copyright (c) 2014, Facebook, Inc.
3
 *  All rights reserved.
4
 *
5
 *  This source code is licensed under the BSD-style license found in the
6
 *  LICENSE file in the root directory of this source tree. An additional grant
7
 *  of patent rights can be found in the PATENTS file in the same directory.
8
 *
9
 */
10
11
/*
12
 * This header defines JsonbDocument, JsonbKeyValue, and various value classes
13
 * which are derived from JsonbValue, and a forward iterator for container
14
 * values - essentially everything that is related to JSONB binary data
15
 * structures.
16
 *
17
 * Implementation notes:
18
 *
19
 * None of the classes in this header file can be instantiated directly (i.e.
20
 * you cannot create a JsonbKeyValue or JsonbValue object - all constructors
21
 * are declared non-public). We use the classes as wrappers on the packed JSONB
22
 * bytes (serialized), and cast the classes (types) to the underlying packed
23
 * byte array.
24
 *
25
 * For the same reason, we cannot define any JSONB value class to be virtual,
26
 * since we never call constructors, and will not instantiate vtbl and vptrs.
27
 *
28
 * Therefore, the classes are defined as packed structures (i.e. no data
29
 * alignment and padding), and the private member variables of the classes are
30
 * defined precisely in the same order as the JSONB spec. This ensures we
31
 * access the packed JSONB bytes correctly.
32
 *
33
 * The packed structures are highly optimized for in-place operations with low
34
 * overhead. The reads (and in-place writes) are performed directly on packed
35
 * bytes. There is no memory allocation at all at runtime.
36
 *
37
 * For updates/writes of values that will expand the original JSONB size, the
38
 * write will fail, and the caller needs to handle buffer increase.
39
 *
40
 * ** Iterator **
41
 * Both ObjectVal class and ArrayVal class have iterator type that you can use
42
 * to declare an iterator on a container object to go through the key-value
43
 * pairs or value list. The iterator has both non-const and const types.
44
 *
45
 * Note: iterators are forward direction only.
46
 *
47
 * ** Query **
48
 * Querying into containers is through the member functions find (for key/value
49
 * pairs) and get (for array elements), and is in streaming style. We don't
50
 * need to read/scan the whole JSONB packed bytes in order to return results.
51
 * Once the key/index is found, we will stop search.  You can use text to query
52
 * both objects and array (for array, text will be converted to integer index),
53
 * and use index to retrieve from array. Array index is 0-based.
54
 *
55
 * ** External dictionary **
56
 * During query processing, you can also pass a call-back function, so the
57
 * search will first try to check if the key string exists in the dictionary.
58
 * If so, search will be based on the id instead of the key string.
59
 * @author Tian Xia <tianx@fb.com>
60
 * 
61
 * this file is copied from 
62
 * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonDocument.h
63
 * and modified by Doris
64
 */
65
66
#ifndef JSONB_JSONBDOCUMENT_H
67
#define JSONB_JSONBDOCUMENT_H
68
69
#include <algorithm>
70
#include <array>
71
#include <cctype>
72
#include <charconv>
73
#include <cmath>
74
#include <cstddef>
75
#include <cstdint>
76
#include <limits>
77
#include <string>
78
#include <string_view>
79
#include <type_traits>
80
81
#include "common/compiler_util.h" // IWYU pragma: keep
82
#include "common/status.h"
83
#include "core/data_type/define_primitive_type.h"
84
#include "core/string_ref.h"
85
#include "core/types.h"
86
#include "util/string_util.h"
87
88
// #include "util/string_parser.hpp"
89
90
// Concept to check for supported decimal types
91
template <typename T>
92
concept JsonbDecimalType =
93
        std::same_as<T, doris::Decimal256> || std::same_as<T, doris::Decimal64> ||
94
        std::same_as<T, doris::Decimal128V3> || std::same_as<T, doris::Decimal32>;
95
96
namespace doris {
97
98
template <typename T>
99
constexpr bool is_pod_v = std::is_trivial_v<T> && std::is_standard_layout_v<T>;
100
101
struct JsonbStringVal;
102
struct ObjectVal;
103
struct ArrayVal;
104
struct JsonbBinaryVal;
105
struct ContainerVal;
106
107
template <JsonbDecimalType T>
108
struct JsonbDecimalVal;
109
110
using JsonbDecimal256 = JsonbDecimalVal<Decimal256>;
111
using JsonbDecimal128 = JsonbDecimalVal<Decimal128V3>;
112
using JsonbDecimal64 = JsonbDecimalVal<Decimal64>;
113
using JsonbDecimal32 = JsonbDecimalVal<Decimal32>;
114
115
template <typename T>
116
    requires std::is_integral_v<T> || std::is_floating_point_v<T>
117
struct NumberValT;
118
119
using JsonbInt8Val = NumberValT<int8_t>;
120
using JsonbInt16Val = NumberValT<int16_t>;
121
using JsonbInt32Val = NumberValT<int32_t>;
122
using JsonbInt64Val = NumberValT<int64_t>;
123
using JsonbInt128Val = NumberValT<int128_t>;
124
using JsonbDoubleVal = NumberValT<double>;
125
using JsonbFloatVal = NumberValT<float>;
126
127
template <typename T>
128
concept JsonbPodType = (std::same_as<T, JsonbStringVal> || std::same_as<T, ObjectVal> ||
129
                        std::same_as<T, ContainerVal> || std::same_as<T, ArrayVal> ||
130
                        std::same_as<T, JsonbBinaryVal> || std::same_as<T, JsonbDecimal32> ||
131
                        std::same_as<T, JsonbDecimal64> || std::same_as<T, JsonbDecimal128> ||
132
                        std::same_as<T, JsonbDecimal256> || std::same_as<T, JsonbDecimal32> ||
133
                        std::same_as<T, JsonbInt8Val> || std::same_as<T, JsonbInt16Val> ||
134
                        std::same_as<T, JsonbInt32Val> || std::same_as<T, JsonbInt64Val> ||
135
                        std::same_as<T, JsonbInt128Val> || std::same_as<T, JsonbFloatVal> ||
136
                        std::same_as<T, JsonbFloatVal> || std::same_as<T, JsonbDoubleVal>);
137
138
4.51M
#define JSONB_VER 1
139
140
using int128_t = __int128;
141
142
// forward declaration
143
struct JsonbValue;
144
145
class JsonbOutStream;
146
147
template <class OS_TYPE>
148
class JsonbWriterT;
149
150
using JsonbWriter = JsonbWriterT<JsonbOutStream>;
151
152
const int MaxNestingLevel = 100;
153
154
/*
155
 * JsonbType defines 10 primitive types and 2 container types, as described
156
 * below.
157
 * NOTE: Do NOT modify the existing values or their order in this enum.
158
 *      You may only append new entries at the end before `NUM_TYPES`.
159
 *      This enum will be used in serialized data and/or persisted data.
160
 *      Changing existing values may break backward compatibility
161
 *      with previously stored or transmitted data.
162
 *
163
 * primitive_value ::=
164
 *   0x00        //null value (0 byte)
165
 * | 0x01        //boolean true (0 byte)
166
 * | 0x02        //boolean false (0 byte)
167
 * | 0x03 int8   //char/int8 (1 byte)
168
 * | 0x04 int16  //int16 (2 bytes)
169
 * | 0x05 int32  //int32 (4 bytes)
170
 * | 0x06 int64  //int64 (8 bytes)
171
 * | 0x07 double //floating point (8 bytes)
172
 * | 0x08 string //variable length string
173
 * | 0x09 binary //variable length binary
174
 *
175
 * container ::=
176
 *   0x0A int32 key_value_list //object, int32 is the total bytes of the object
177
 * | 0x0B int32 value_list     //array, int32 is the total bytes of the array
178
 */
179
enum class JsonbType : char {
180
    T_Null = 0x00,
181
    T_True = 0x01,
182
    T_False = 0x02,
183
    T_Int8 = 0x03,
184
    T_Int16 = 0x04,
185
    T_Int32 = 0x05,
186
    T_Int64 = 0x06,
187
    T_Double = 0x07,
188
    T_String = 0x08,
189
    T_Binary = 0x09,
190
    T_Object = 0x0A,
191
    T_Array = 0x0B,
192
    T_Int128 = 0x0C,
193
    T_Float = 0x0D,
194
    T_Decimal32 = 0x0E,  // DecimalV3 only
195
    T_Decimal64 = 0x0F,  // DecimalV3 only
196
    T_Decimal128 = 0x10, // DecimalV3 only
197
    T_Decimal256 = 0x11, // DecimalV3 only
198
    NUM_TYPES,
199
};
200
201
inline PrimitiveType get_primitive_type_from_json_type(JsonbType json_type) {
202
    switch (json_type) {
203
    case JsonbType::T_Null:
204
        return TYPE_NULL;
205
    case JsonbType::T_True:
206
    case JsonbType::T_False:
207
        return TYPE_BOOLEAN;
208
    case JsonbType::T_Int8:
209
        return TYPE_TINYINT;
210
    case JsonbType::T_Int16:
211
        return TYPE_SMALLINT;
212
    case JsonbType::T_Int32:
213
        return TYPE_INT;
214
    case JsonbType::T_Int64:
215
        return TYPE_BIGINT;
216
    case JsonbType::T_Double:
217
        return TYPE_DOUBLE;
218
    case JsonbType::T_String:
219
        return TYPE_STRING;
220
    case JsonbType::T_Binary:
221
        return TYPE_BINARY;
222
    case JsonbType::T_Object:
223
        return TYPE_STRUCT;
224
    case JsonbType::T_Array:
225
        return TYPE_ARRAY;
226
    case JsonbType::T_Int128:
227
        return TYPE_LARGEINT;
228
    case JsonbType::T_Float:
229
        return TYPE_FLOAT;
230
    case JsonbType::T_Decimal32:
231
        return TYPE_DECIMAL32;
232
    case JsonbType::T_Decimal64:
233
        return TYPE_DECIMAL64;
234
    case JsonbType::T_Decimal128:
235
        return TYPE_DECIMAL128I;
236
    case JsonbType::T_Decimal256:
237
        return TYPE_DECIMAL256;
238
    default:
239
        throw Exception(ErrorCode::INTERNAL_ERROR, "Unsupported JsonbType: {}",
240
                        static_cast<int>(json_type));
241
    }
242
}
243
244
//for parse json path
245
constexpr char SCOPE = '$';
246
constexpr char BEGIN_MEMBER = '.';
247
constexpr char BEGIN_ARRAY = '[';
248
constexpr char END_ARRAY = ']';
249
constexpr char DOUBLE_QUOTE = '"';
250
constexpr char WILDCARD = '*';
251
constexpr char MINUS = '-';
252
constexpr char LAST[] = "last";
253
constexpr char ESCAPE = '\\';
254
constexpr unsigned int MEMBER_CODE = 0;
255
constexpr unsigned int ARRAY_CODE = 1;
256
257
/// A simple input stream class for the JSON path parser.
258
class Stream {
259
public:
260
    /// Creates an input stream reading from a character string.
261
    /// @param string  the input string
262
    /// @param length  the length of the input string
263
15.6k
    Stream(const char* string, size_t length) : m_position(string), m_end(string + length) {}
264
265
    /// Returns a pointer to the current position in the stream.
266
21.4k
    const char* position() const { return m_position; }
267
268
    /// Returns a pointer to the position just after the end of the stream.
269
0
    const char* end() const { return m_end; }
270
271
    /// Returns the number of bytes remaining in the stream.
272
474k
    size_t remaining() const {
273
474k
        assert(m_position <= m_end);
274
474k
        return m_end - m_position;
275
474k
    }
276
277
    /// Tells if the stream has been exhausted.
278
441k
    bool exhausted() const { return remaining() == 0; }
279
280
    /// Reads the next byte from the stream and moves the position forward.
281
15.6k
    char read() {
282
15.6k
        assert(!exhausted());
283
15.6k
        return *m_position++;
284
15.6k
    }
285
286
    /// Reads the next byte from the stream without moving the position forward.
287
265k
    char peek() const {
288
265k
        assert(!exhausted());
289
265k
        return *m_position;
290
265k
    }
291
292
    /// Moves the position to the next non-whitespace character.
293
69.6k
    void skip_whitespace() {
294
69.6k
        m_position = std::find_if_not(m_position, m_end, [](char c) { return std::isspace(c); });
295
69.6k
    }
296
297
    /// Moves the position n bytes forward.
298
32.8k
    void skip(size_t n) {
299
32.8k
        assert(remaining() >= n);
300
32.8k
        m_position += n;
301
32.8k
        skip_whitespace();
302
32.8k
    }
303
304
44.6k
    void advance() { m_position++; }
305
306
42.3k
    void clear_leg_ptr() { leg_ptr = nullptr; }
307
308
21.2k
    void set_leg_ptr(char* ptr) {
309
21.2k
        clear_leg_ptr();
310
21.2k
        leg_ptr = ptr;
311
21.2k
    }
312
313
32.4k
    char* get_leg_ptr() { return leg_ptr; }
314
315
21.0k
    void clear_leg_len() { leg_len = 0; }
316
317
44.5k
    void add_leg_len() { leg_len++; }
318
319
42.3k
    unsigned int get_leg_len() const { return leg_len; }
320
321
3
    void remove_escapes() {
322
3
        int new_len = 0;
323
27
        for (int i = 0; i < leg_len; i++) {
324
24
            if (leg_ptr[i] != '\\') {
325
21
                leg_ptr[new_len++] = leg_ptr[i];
326
21
            }
327
24
        }
328
3
        leg_ptr[new_len] = '\0';
329
3
        leg_len = new_len;
330
3
    }
331
332
3
    void set_has_escapes(bool has) { has_escapes = has; }
333
334
9.67k
    bool get_has_escapes() const { return has_escapes; }
335
336
private:
337
    /// The current position in the stream.
338
    const char* m_position = nullptr;
339
340
    /// The end of the stream.
341
    const char* const m_end;
342
343
    ///path leg ptr
344
    char* leg_ptr = nullptr;
345
346
    ///path leg len
347
    unsigned int leg_len;
348
349
    ///Whether to contain escape characters
350
    bool has_escapes = false;
351
};
352
353
struct leg_info {
354
    ///path leg ptr
355
    char* leg_ptr = nullptr;
356
357
    ///path leg len
358
    unsigned int leg_len;
359
360
    ///array_index
361
    int array_index;
362
363
    ///type: 0 is member 1 is array
364
    unsigned int type;
365
366
341
    bool to_string(std::string* str) const {
367
341
        if (type == MEMBER_CODE) {
368
109
            str->push_back(BEGIN_MEMBER);
369
109
            bool contains_space = false;
370
109
            std::string tmp;
371
287
            for (auto* it = leg_ptr; it != (leg_ptr + leg_len); ++it) {
372
178
                if (std::isspace(*it)) {
373
3
                    contains_space = true;
374
175
                } else if (*it == '"' || *it == ESCAPE || *it == '\r' || *it == '\n' ||
375
175
                           *it == '\b' || *it == '\t') {
376
2
                    tmp.push_back(ESCAPE);
377
2
                }
378
178
                tmp.push_back(*it);
379
178
            }
380
109
            if (contains_space) {
381
3
                str->push_back(DOUBLE_QUOTE);
382
3
            }
383
109
            str->append(tmp);
384
109
            if (contains_space) {
385
3
                str->push_back(DOUBLE_QUOTE);
386
3
            }
387
109
            return true;
388
232
        } else if (type == ARRAY_CODE) {
389
232
            str->push_back(BEGIN_ARRAY);
390
232
            std::string int_str = std::to_string(array_index);
391
232
            str->append(int_str);
392
232
            str->push_back(END_ARRAY);
393
232
            return true;
394
232
        } else {
395
0
            return false;
396
0
        }
397
341
    }
398
};
399
400
class JsonbPath {
401
public:
402
    // parse json path
403
    static bool parsePath(Stream* stream, JsonbPath* path);
404
405
    static bool parse_array(Stream* stream, JsonbPath* path);
406
    static bool parse_member(Stream* stream, JsonbPath* path);
407
408
    //return true if json path valid else return false
409
    bool seek(const char* string, size_t length);
410
411
21.8k
    void add_leg_to_leg_vector(std::unique_ptr<leg_info> leg) {
412
21.8k
        leg_vector.emplace_back(leg.release());
413
21.8k
    }
414
415
586
    void pop_leg_from_leg_vector() { leg_vector.pop_back(); }
416
417
218
    bool to_string(std::string* res) const {
418
218
        res->push_back(SCOPE);
419
341
        for (const auto& leg : leg_vector) {
420
341
            auto valid = leg->to_string(res);
421
341
            if (!valid) {
422
0
                return false;
423
0
            }
424
341
        }
425
218
        return true;
426
218
    }
427
428
394k
    size_t get_leg_vector_size() const { return leg_vector.size(); }
429
430
1.25M
    leg_info* get_leg_from_leg_vector(size_t i) const { return leg_vector[i].get(); }
431
432
246
    bool is_wildcard() const { return _is_wildcard; }
433
150k
    bool is_supper_wildcard() const { return _is_supper_wildcard; }
434
435
77
    void clean() { leg_vector.clear(); }
436
437
private:
438
    std::vector<std::unique_ptr<leg_info>> leg_vector;
439
    bool _is_wildcard = false;        // whether the path is a wildcard path
440
    bool _is_supper_wildcard = false; // supper wildcard likes '$**.a' or '$**[1]'
441
};
442
443
/*
444
 * JsonbFwdIteratorT implements JSONB's iterator template.
445
 *
446
 * Note: it is an FORWARD iterator only due to the design of JSONB format.
447
 */
448
template <class Iter_Type, class Cont_Type>
449
class JsonbFwdIteratorT {
450
public:
451
    using iterator = Iter_Type;
452
    using pointer = typename std::iterator_traits<Iter_Type>::pointer;
453
    using reference = typename std::iterator_traits<Iter_Type>::reference;
454
455
    explicit JsonbFwdIteratorT() : current_(nullptr) {}
456
705k
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
_ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEC2ERKS3_
Line
Count
Source
456
369k
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEC2ERKS3_
Line
Count
Source
456
336k
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
457
458
    // allow non-const to const iterator conversion (same container type)
459
    template <class Iter_Ty>
460
    JsonbFwdIteratorT(const JsonbFwdIteratorT<Iter_Ty, Cont_Type>& rhs) : current_(rhs.base()) {}
461
462
1.33M
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEeqERKS5_
Line
Count
Source
462
571k
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEeqERKS5_
Line
Count
Source
462
763k
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
463
464
1.04M
    bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEneERKS5_
Line
Count
Source
464
454k
    bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEneERKS5_
Line
Count
Source
464
594k
    bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }
465
466
437k
    bool operator<(const JsonbFwdIteratorT& rhs) const { return (current_ < rhs.current_); }
467
468
    bool operator>(const JsonbFwdIteratorT& rhs) const { return !operator<(rhs); }
469
470
660k
    JsonbFwdIteratorT& operator++() {
471
660k
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
472
660k
        return *this;
473
660k
    }
_ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEppEv
Line
Count
Source
470
446k
    JsonbFwdIteratorT& operator++() {
471
446k
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
472
446k
        return *this;
473
446k
    }
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEppEv
Line
Count
Source
470
213k
    JsonbFwdIteratorT& operator++() {
471
213k
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
472
213k
        return *this;
473
213k
    }
474
475
    JsonbFwdIteratorT operator++(int) {
476
        auto tmp = *this;
477
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
478
        return tmp;
479
    }
480
481
213k
    explicit operator pointer() { return current_; }
482
483
1.33k
    reference operator*() const { return *current_; }
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEdeEv
Line
Count
Source
483
311
    reference operator*() const { return *current_; }
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEdeEv
Line
Count
Source
483
1.01k
    reference operator*() const { return *current_; }
484
485
1.52M
    pointer operator->() const { return current_; }
486
487
    iterator base() const { return current_; }
488
489
private:
490
    iterator current_;
491
};
492
using JsonbTypeUnder = std::underlying_type_t<JsonbType>;
493
494
#if defined(__clang__)
495
#pragma clang diagnostic push
496
#pragma clang diagnostic ignored "-Wzero-length-array"
497
#endif
498
#pragma pack(push, 1)
499
500
/*
501
 * JsonbDocument is the main object that accesses and queries JSONB packed
502
 * bytes. NOTE: JsonbDocument only allows object container as the top level
503
 * JSONB value. However, you can use the static method "createValue" to get any
504
 * JsonbValue object from the packed bytes.
505
 *
506
 * JsonbDocument object also dereferences to an object container value
507
 * (ObjectVal) once JSONB is loaded.
508
 *
509
 * ** Load **
510
 * JsonbDocument is usable after loading packed bytes (memory location) into
511
 * the object. We only need the header and first few bytes of the payload after
512
 * header to verify the JSONB.
513
 *
514
 * Note: creating an JsonbDocument (through createDocument) does not allocate
515
 * any memory. The document object is an efficient wrapper on the packed bytes
516
 * which is accessed directly.
517
 *
518
 * ** Query **
519
 * Query is through dereferencing into ObjectVal.
520
 */
521
class JsonbDocument {
522
public:
523
    // create an JsonbDocument object from JSONB packed bytes
524
    [[nodiscard]] static Status checkAndCreateDocument(const char* pb, size_t size,
525
                                                       const JsonbDocument** doc);
526
527
    // create an JsonbValue from JSONB packed bytes
528
    static const JsonbValue* createValue(const char* pb, size_t size);
529
530
0
    uint8_t version() const { return header_.ver_; }
531
532
723k
    const JsonbValue* getValue() const { return ((const JsonbValue*)payload_); }
533
534
    unsigned int numPackedBytes() const;
535
536
    const ObjectVal* operator->() const;
537
538
private:
539
    /*
540
   * JsonbHeader class defines JSONB header (internal to JsonbDocument).
541
   *
542
   * Currently it only contains version information (1-byte). We may expand the
543
   * header to include checksum of the JSONB binary for more security.
544
   */
545
    struct JsonbHeader {
546
        uint8_t ver_;
547
    } header_;
548
549
    char payload_[0];
550
};
551
552
/*
553
 * JsonbKeyValue class defines JSONB key type, as described below.
554
 *
555
 * key ::=
556
 *   0x00 int8    //1-byte dictionary id
557
 * | int8 (byte*) //int8 (>0) is the size of the key string
558
 *
559
 * value ::= primitive_value | container
560
 *
561
 * JsonbKeyValue can be either an id mapping to the key string in an external
562
 * dictionary, or it is the original key string. Whether to read an id or a
563
 * string is decided by the first byte (size).
564
 *
565
 * Note: a key object must be followed by a value object. Therefore, a key
566
 * object implicitly refers to a key-value pair, and you can get the value
567
 * object right after the key object. The function numPackedBytes hence
568
 * indicates the total size of the key-value pair, so that we will be able go
569
 * to next pair from the key.
570
 *
571
 * ** Dictionary size **
572
 * By default, the dictionary size is 255 (1-byte). Users can define
573
 * "USE_LARGE_DICT" to increase the dictionary size to 655535 (2-byte).
574
 */
575
class JsonbKeyValue {
576
public:
577
    // now we use sMaxKeyId to represent an empty key
578
    static const int sMaxKeyId = 65535;
579
    using keyid_type = uint16_t;
580
581
    static const uint8_t sMaxKeyLen = 64;
582
583
    // size of the key. 0 indicates it is stored as id
584
668k
    uint8_t klen() const { return size; }
585
586
    // get the key string. Note the string may not be null terminated.
587
335k
    const char* getKeyStr() const { return key.str_; }
588
589
112k
    keyid_type getKeyId() const { return key.id_; }
590
591
893k
    unsigned int keyPackedBytes() const {
592
893k
        return size ? (sizeof(size) + size) : (sizeof(size) + sizeof(keyid_type));
593
893k
    }
594
595
435k
    const JsonbValue* value() const {
596
435k
        return (const JsonbValue*)(((char*)this) + keyPackedBytes());
597
435k
    }
598
599
    // size of the total packed bytes (key+value)
600
    unsigned int numPackedBytes() const;
601
602
    uint8_t size;
603
604
    union key_ {
605
        keyid_type id_;
606
        char str_[1];
607
    } key;
608
};
609
610
struct JsonbFindResult {
611
    const JsonbValue* value = nullptr;   // found value
612
    std::unique_ptr<JsonbWriter> writer; // writer to write the value
613
    bool is_wildcard = false;            // whether the path is a wildcard path
614
};
615
616
/*
617
 * JsonbValue is the base class of all JSONB types. It contains only one member
618
 * variable - type info, which can be retrieved by member functions is[Type]()
619
 * or type().
620
 */
621
struct JsonbValue {
622
    static const uint32_t sMaxValueLen = 1 << 24; // 16M
623
624
215k
    bool isNull() const { return (type == JsonbType::T_Null); }
625
342
    bool isTrue() const { return (type == JsonbType::T_True); }
626
11
    bool isFalse() const { return (type == JsonbType::T_False); }
627
484
    bool isInt() const { return isInt8() || isInt16() || isInt32() || isInt64() || isInt128(); }
628
484
    bool isInt8() const { return (type == JsonbType::T_Int8); }
629
343
    bool isInt16() const { return (type == JsonbType::T_Int16); }
630
248
    bool isInt32() const { return (type == JsonbType::T_Int32); }
631
251
    bool isInt64() const { return (type == JsonbType::T_Int64); }
632
753
    bool isDouble() const { return (type == JsonbType::T_Double); }
633
672
    bool isFloat() const { return (type == JsonbType::T_Float); }
634
19.3k
    bool isString() const { return (type == JsonbType::T_String); }
635
15.4k
    bool isBinary() const { return (type == JsonbType::T_Binary); }
636
10.1k
    bool isObject() const { return (type == JsonbType::T_Object); }
637
9.94k
    bool isArray() const { return (type == JsonbType::T_Array); }
638
246
    bool isInt128() const { return (type == JsonbType::T_Int128); }
639
404
    bool isDecimal() const {
640
404
        return (type == JsonbType::T_Decimal32 || type == JsonbType::T_Decimal64 ||
641
404
                type == JsonbType::T_Decimal128 || type == JsonbType::T_Decimal256);
642
404
    }
643
    bool isDecimal32() const { return (type == JsonbType::T_Decimal32); }
644
    bool isDecimal64() const { return (type == JsonbType::T_Decimal64); }
645
    bool isDecimal128() const { return (type == JsonbType::T_Decimal128); }
646
    bool isDecimal256() const { return (type == JsonbType::T_Decimal256); }
647
648
    PrimitiveType get_primitive_type() const { return get_primitive_type_from_json_type(type); }
649
650
555
    const char* typeName() const {
651
555
        switch (type) {
652
24
        case JsonbType::T_Null:
653
24
            return "null";
654
20
        case JsonbType::T_True:
655
40
        case JsonbType::T_False:
656
40
            return "bool";
657
44
        case JsonbType::T_Int8:
658
82
        case JsonbType::T_Int16:
659
96
        case JsonbType::T_Int32:
660
96
            return "int";
661
126
        case JsonbType::T_Int64:
662
126
            return "bigint";
663
18
        case JsonbType::T_Int128:
664
18
            return "largeint";
665
43
        case JsonbType::T_Double:
666
43
            return "double";
667
0
        case JsonbType::T_Float:
668
0
            return "float";
669
67
        case JsonbType::T_String:
670
67
            return "string";
671
0
        case JsonbType::T_Binary:
672
0
            return "binary";
673
103
        case JsonbType::T_Object:
674
103
            return "object";
675
38
        case JsonbType::T_Array:
676
38
            return "array";
677
0
        case JsonbType::T_Decimal32:
678
0
            return "Decimal32";
679
0
        case JsonbType::T_Decimal64:
680
0
            return "Decimal64";
681
0
        case JsonbType::T_Decimal128:
682
0
            return "Decimal128";
683
0
        case JsonbType::T_Decimal256:
684
0
            return "Decimal256";
685
0
        default:
686
0
            return "unknown";
687
555
        }
688
555
    }
689
690
    // size of the total packed bytes
691
    unsigned int numPackedBytes() const;
692
693
    // size of the value in bytes
694
    unsigned int size() const;
695
696
    //Get the number of jsonbvalue elements
697
    int numElements() const;
698
699
    //Whether to include the jsonbvalue rhs
700
    bool contains(const JsonbValue* rhs) const;
701
702
    // find the JSONB value by JsonbPath
703
    JsonbFindResult findValue(JsonbPath& path) const;
704
    friend class JsonbDocument;
705
706
    JsonbType type; // type info
707
708
    char payload[0]; // payload, which is the packed bytes of the value
709
710
    /**
711
    * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`.
712
    *
713
    * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept.
714
    *           This ensures that `T` is trivially copyable, standard-layout, and safe to
715
    *           reinterpret from raw bytes without invoking undefined behavior.
716
    *
717
    * @return A pointer to a `const T` object, interpreted from the internal buffer.
718
    *
719
    * @note The caller must ensure that the current JsonbValue actually contains data
720
    *       compatible with type `T`, otherwise the result is undefined.
721
    */
722
    template <JsonbPodType T>
723
2.81M
    const T* unpack() const {
724
2.81M
        static_assert(is_pod_v<T>, "T must be a POD type");
725
2.81M
        return reinterpret_cast<const T*>(payload);
726
2.81M
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_9ObjectValEEEPKT_v
Line
Count
Source
723
249k
    const T* unpack() const {
724
249k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
249k
        return reinterpret_cast<const T*>(payload);
726
249k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIaEEEEPKT_v
Line
Count
Source
723
31.9k
    const T* unpack() const {
724
31.9k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
31.9k
        return reinterpret_cast<const T*>(payload);
726
31.9k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIsEEEEPKT_v
Line
Count
Source
723
42.4k
    const T* unpack() const {
724
42.4k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
42.4k
        return reinterpret_cast<const T*>(payload);
726
42.4k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIiEEEEPKT_v
Line
Count
Source
723
16.9k
    const T* unpack() const {
724
16.9k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
16.9k
        return reinterpret_cast<const T*>(payload);
726
16.9k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIlEEEEPKT_v
Line
Count
Source
723
203k
    const T* unpack() const {
724
203k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
203k
        return reinterpret_cast<const T*>(payload);
726
203k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTInEEEEPKT_v
Line
Count
Source
723
18.7k
    const T* unpack() const {
724
18.7k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
18.7k
        return reinterpret_cast<const T*>(payload);
726
18.7k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIdEEEEPKT_v
Line
Count
Source
723
11.9k
    const T* unpack() const {
724
11.9k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
11.9k
        return reinterpret_cast<const T*>(payload);
726
11.9k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIfEEEEPKT_v
Line
Count
Source
723
4.62k
    const T* unpack() const {
724
4.62k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
4.62k
        return reinterpret_cast<const T*>(payload);
726
4.62k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_7DecimalIiEEEEEEPKT_v
Line
Count
Source
723
62
    const T* unpack() const {
724
62
        static_assert(is_pod_v<T>, "T must be a POD type");
725
62
        return reinterpret_cast<const T*>(payload);
726
62
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_7DecimalIlEEEEEEPKT_v
Line
Count
Source
723
45
    const T* unpack() const {
724
45
        static_assert(is_pod_v<T>, "T must be a POD type");
725
45
        return reinterpret_cast<const T*>(payload);
726
45
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_12Decimal128V3EEEEEPKT_v
Line
Count
Source
723
47
    const T* unpack() const {
724
47
        static_assert(is_pod_v<T>, "T must be a POD type");
725
47
        return reinterpret_cast<const T*>(payload);
726
47
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_7DecimalIN4wide7integerILm256EiEEEEEEEEPKT_v
Line
Count
Source
723
13
    const T* unpack() const {
724
13
        static_assert(is_pod_v<T>, "T must be a POD type");
725
13
        return reinterpret_cast<const T*>(payload);
726
13
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_14JsonbBinaryValEEEPKT_v
Line
Count
Source
723
335k
    const T* unpack() const {
724
335k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
335k
        return reinterpret_cast<const T*>(payload);
726
335k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_12ContainerValEEEPKT_v
Line
Count
Source
723
1.05M
    const T* unpack() const {
724
1.05M
        static_assert(is_pod_v<T>, "T must be a POD type");
725
1.05M
        return reinterpret_cast<const T*>(payload);
726
1.05M
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_8ArrayValEEEPKT_v
Line
Count
Source
723
366k
    const T* unpack() const {
724
366k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
366k
        return reinterpret_cast<const T*>(payload);
726
366k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_14JsonbStringValEEEPKT_v
Line
Count
Source
723
478k
    const T* unpack() const {
724
478k
        static_assert(is_pod_v<T>, "T must be a POD type");
725
478k
        return reinterpret_cast<const T*>(payload);
726
478k
    }
727
728
    // /**
729
    // * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`.
730
    // *
731
    // * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept.
732
    // *           This ensures that `T` is trivially copyable, standard-layout, and safe to
733
    // *           reinterpret from raw bytes without invoking undefined behavior.
734
    // *
735
    // * @return A pointer to a `T` object, interpreted from the internal buffer.
736
    // *
737
    // * @note The caller must ensure that the current JsonbValue actually contains data
738
    // *       compatible with type `T`, otherwise the result is undefined.
739
    // */
740
    // template <JsonbPodType T>
741
    // T* unpack() {
742
    //     static_assert(is_pod_v<T>, "T must be a POD type");
743
    //     return reinterpret_cast<T*>(payload);
744
    // }
745
746
    int128_t int_val() const;
747
};
748
749
// inline ObjectVal* JsonbDocument::operator->() {
750
//     return (((JsonbValue*)payload_)->unpack<ObjectVal>());
751
// }
752
753
131k
inline const ObjectVal* JsonbDocument::operator->() const {
754
131k
    return (((const JsonbValue*)payload_)->unpack<ObjectVal>());
755
131k
}
756
757
/*
758
 * NumerValT is the template class (derived from JsonbValue) of all number
759
 * types (integers and double).
760
 */
761
template <typename T>
762
    requires std::is_integral_v<T> || std::is_floating_point_v<T>
763
struct NumberValT {
764
public:
765
330k
    T val() const { return num; }
_ZNK5doris10NumberValTIaE3valEv
Line
Count
Source
765
31.9k
    T val() const { return num; }
_ZNK5doris10NumberValTIsE3valEv
Line
Count
Source
765
42.4k
    T val() const { return num; }
_ZNK5doris10NumberValTIiE3valEv
Line
Count
Source
765
16.9k
    T val() const { return num; }
_ZNK5doris10NumberValTIlE3valEv
Line
Count
Source
765
203k
    T val() const { return num; }
_ZNK5doris10NumberValTInE3valEv
Line
Count
Source
765
18.7k
    T val() const { return num; }
_ZNK5doris10NumberValTIdE3valEv
Line
Count
Source
765
11.9k
    T val() const { return num; }
_ZNK5doris10NumberValTIfE3valEv
Line
Count
Source
765
4.62k
    T val() const { return num; }
766
767
    static unsigned int numPackedBytes() { return sizeof(JsonbValue) + sizeof(T); }
768
769
    T num;
770
};
771
772
184
inline int128_t JsonbValue::int_val() const {
773
184
    switch (type) {
774
139
    case JsonbType::T_Int8:
775
139
        return unpack<JsonbInt8Val>()->val();
776
39
    case JsonbType::T_Int16:
777
39
        return unpack<JsonbInt16Val>()->val();
778
0
    case JsonbType::T_Int32:
779
0
        return unpack<JsonbInt32Val>()->val();
780
3
    case JsonbType::T_Int64:
781
3
        return unpack<JsonbInt64Val>()->val();
782
3
    case JsonbType::T_Int128:
783
3
        return unpack<JsonbInt128Val>()->val();
784
0
    default:
785
0
        throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}",
786
0
                        static_cast<int32_t>(type));
787
184
    }
788
184
}
789
790
template <JsonbDecimalType T>
791
struct JsonbDecimalVal {
792
public:
793
    using NativeType = typename T::NativeType;
794
795
    // get the decimal value
796
129
    NativeType val() const {
797
        // to avoid memory alignment issues, we use memcpy to copy the value
798
129
        NativeType tmp;
799
129
        memcpy(&tmp, &value, sizeof(NativeType));
800
129
        return tmp;
801
129
    }
_ZNK5doris15JsonbDecimalValINS_7DecimalIiEEE3valEv
Line
Count
Source
796
48
    NativeType val() const {
797
        // to avoid memory alignment issues, we use memcpy to copy the value
798
48
        NativeType tmp;
799
48
        memcpy(&tmp, &value, sizeof(NativeType));
800
48
        return tmp;
801
48
    }
_ZNK5doris15JsonbDecimalValINS_7DecimalIlEEE3valEv
Line
Count
Source
796
38
    NativeType val() const {
797
        // to avoid memory alignment issues, we use memcpy to copy the value
798
38
        NativeType tmp;
799
38
        memcpy(&tmp, &value, sizeof(NativeType));
800
38
        return tmp;
801
38
    }
_ZNK5doris15JsonbDecimalValINS_12Decimal128V3EE3valEv
Line
Count
Source
796
37
    NativeType val() const {
797
        // to avoid memory alignment issues, we use memcpy to copy the value
798
37
        NativeType tmp;
799
37
        memcpy(&tmp, &value, sizeof(NativeType));
800
37
        return tmp;
801
37
    }
_ZNK5doris15JsonbDecimalValINS_7DecimalIN4wide7integerILm256EiEEEEE3valEv
Line
Count
Source
796
6
    NativeType val() const {
797
        // to avoid memory alignment issues, we use memcpy to copy the value
798
6
        NativeType tmp;
799
6
        memcpy(&tmp, &value, sizeof(NativeType));
800
6
        return tmp;
801
6
    }
802
803
160
    static constexpr int numPackedBytes() {
804
160
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
805
160
    }
_ZN5doris15JsonbDecimalValINS_7DecimalIiEEE14numPackedBytesEv
Line
Count
Source
803
51
    static constexpr int numPackedBytes() {
804
51
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
805
51
    }
_ZN5doris15JsonbDecimalValINS_7DecimalIlEEE14numPackedBytesEv
Line
Count
Source
803
63
    static constexpr int numPackedBytes() {
804
63
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
805
63
    }
_ZN5doris15JsonbDecimalValINS_12Decimal128V3EE14numPackedBytesEv
Line
Count
Source
803
40
    static constexpr int numPackedBytes() {
804
40
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
805
40
    }
_ZN5doris15JsonbDecimalValINS_7DecimalIN4wide7integerILm256EiEEEEE14numPackedBytesEv
Line
Count
Source
803
6
    static constexpr int numPackedBytes() {
804
6
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
805
6
    }
806
807
    uint32_t precision;
808
    uint32_t scale;
809
    NativeType value;
810
};
811
812
/*
813
 * BlobVal is the base class (derived from JsonbValue) for string and binary
814
 * types. The size indicates the total bytes of the payload.
815
 */
816
struct JsonbBinaryVal {
817
public:
818
    // size of the blob payload only
819
17.2k
    unsigned int getBlobLen() const { return size; }
820
821
    // return the blob as byte array
822
264k
    const char* getBlob() const { return payload; }
823
824
    // size of the total packed bytes
825
311k
    unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; }
826
    friend class JsonbDocument;
827
828
    uint32_t size;
829
    char payload[0];
830
};
831
832
/*
833
 * String type
834
 * Note: JSONB string may not be a c-string (NULL-terminated)
835
 */
836
struct JsonbStringVal : public JsonbBinaryVal {
837
public:
838
    /*
839
    This function return the actual size of a string. Since for
840
    a string, it can be null-terminated with null paddings or it
841
    can take all the space in the payload without null in the end.
842
    So we need to check it to get the true actual length of a string.
843
  */
844
239k
    size_t length() const {
845
        // It's an empty string
846
239k
        if (0 == size) {
847
147
            return size;
848
147
        }
849
        // The string stored takes all the spaces in payload
850
239k
        if (payload[size - 1] != 0) {
851
239k
            return size;
852
239k
        }
853
        // It's shorter than the size of payload
854
0
        return strnlen(payload, size);
855
239k
    }
856
};
857
858
/*
859
 * ContainerVal is the base class (derived from JsonbValue) for object and
860
 * array types. The size indicates the total bytes of the payload.
861
 */
862
struct ContainerVal {
863
    // size of the container payload only
864
0
    unsigned int getContainerSize() const { return size; }
865
866
    // return the container payload as byte array
867
0
    const char* getPayload() const { return payload; }
868
869
    // size of the total packed bytes
870
1.05M
    unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; }
871
    friend class JsonbDocument;
872
873
    uint32_t size;
874
    char payload[0];
875
};
876
877
/*
878
 * Object type
879
 */
880
struct ObjectVal : public ContainerVal {
881
    using value_type = JsonbKeyValue;
882
    using pointer = value_type*;
883
    using const_pointer = const value_type*;
884
    using const_iterator = JsonbFwdIteratorT<const_pointer, ObjectVal>;
885
886
    const_iterator search(const char* key) const {
887
        if (!key) {
888
            return end();
889
        }
890
        return search(key, (unsigned int)strlen(key));
891
    }
892
893
7.34k
    const_iterator search(const char* key, unsigned int klen) const {
894
7.34k
        if (!key || !klen) {
895
0
            return end();
896
0
        }
897
7.34k
        return internalSearch(key, klen);
898
7.34k
    }
899
900
    // Get number of elements in object
901
174
    int numElem() const {
902
174
        const char* pch = payload;
903
174
        const char* fence = payload + size;
904
905
174
        unsigned int num = 0;
906
449
        while (pch < fence) {
907
275
            auto* pkey = (JsonbKeyValue*)(pch);
908
275
            ++num;
909
275
            pch += pkey->numPackedBytes();
910
275
        }
911
912
174
        assert(pch == fence);
913
914
174
        return num;
915
174
    }
916
917
    // find the JSONB value by a key string (null terminated)
918
    const JsonbValue* find(const char* key) const {
919
        if (!key) {
920
            return nullptr;
921
        }
922
        return find(key, (unsigned int)strlen(key));
923
    }
924
925
    // find the JSONB value by a key string (with length)
926
7.34k
    const JsonbValue* find(const char* key, unsigned int klen) const {
927
7.34k
        const_iterator kv = search(key, klen);
928
7.34k
        if (end() == kv) {
929
4.45k
            return nullptr;
930
4.45k
        }
931
2.88k
        return kv->value();
932
7.34k
    }
933
934
118k
    const_iterator begin() const { return const_iterator((pointer)payload); }
935
936
248k
    const_iterator end() const { return const_iterator((pointer)(payload + size)); }
937
938
    std::vector<std::pair<StringRef, const JsonbValue*>> get_ordered_key_value_pairs() const;
939
940
private:
941
7.34k
    const_iterator internalSearch(const char* key, unsigned int klen) const {
942
7.34k
        const char* pch = payload;
943
7.34k
        const char* fence = payload + size;
944
945
18.8k
        while (pch < fence) {
946
14.4k
            const auto* pkey = (const JsonbKeyValue*)(pch);
947
14.4k
            if (klen == pkey->klen() && strncmp(key, pkey->getKeyStr(), klen) == 0) {
948
2.89k
                return const_iterator(pkey);
949
2.89k
            }
950
11.5k
            pch += pkey->numPackedBytes();
951
11.5k
        }
952
953
7.34k
        assert(pch == fence);
954
955
4.45k
        return end();
956
4.45k
    }
957
};
958
959
/*
960
 * Array type
961
 */
962
struct ArrayVal : public ContainerVal {
963
    using value_type = JsonbValue;
964
    using pointer = value_type*;
965
    using const_pointer = const value_type*;
966
    using const_iterator = JsonbFwdIteratorT<const_pointer, ArrayVal>;
967
968
    // get the JSONB value at index
969
198k
    const JsonbValue* get(int idx) const {
970
198k
        if (idx < 0) {
971
65
            return nullptr;
972
65
        }
973
974
198k
        const char* pch = payload;
975
198k
        const char* fence = payload + size;
976
977
204k
        while (pch < fence && idx-- > 0) {
978
6.79k
            pch += ((const JsonbValue*)pch)->numPackedBytes();
979
6.79k
        }
980
198k
        if (idx > 0 || pch == fence) {
981
1.65k
            return nullptr;
982
1.65k
        }
983
984
196k
        return (const JsonbValue*)pch;
985
198k
    }
986
987
    // Get number of elements in array
988
1.85k
    int numElem() const {
989
1.85k
        const char* pch = payload;
990
1.85k
        const char* fence = payload + size;
991
992
1.85k
        unsigned int num = 0;
993
108k
        while (pch < fence) {
994
106k
            ++num;
995
106k
            pch += ((const JsonbValue*)pch)->numPackedBytes();
996
106k
        }
997
998
1.85k
        assert(pch == fence);
999
1000
1.85k
        return num;
1001
1.85k
    }
1002
1003
168k
    const_iterator begin() const { return const_iterator((pointer)payload); }
1004
1005
168k
    const_iterator end() const { return const_iterator((pointer)(payload + size)); }
1006
};
1007
1008
namespace jsonb_detail {
1009
1010
struct JsonbScaledDecimal {
1011
    wide::Int256 value;
1012
    uint32_t scale;
1013
};
1014
1015
26
inline void validate_decimal_scale(uint32_t scale) {
1016
26
    if (scale > static_cast<uint32_t>(BeConsts::MAX_DECIMALV3_SCALE)) {
1017
2
        throw Exception(ErrorCode::INTERNAL_ERROR,
1018
2
                        "Invalid JSONB decimal scale: {}, max allowed scale: {}", scale,
1019
2
                        BeConsts::MAX_DECIMALV3_SCALE);
1020
2
    }
1021
26
}
1022
1023
180
inline bool is_numeric(const JsonbValue* value) {
1024
180
    return value->isInt() || value->isDouble() || value->isFloat() || value->isDecimal();
1025
180
}
1026
1027
25
inline double floating_value(const JsonbValue* value) {
1028
25
    if (value->isDouble()) {
1029
25
        return value->unpack<JsonbDoubleVal>()->val();
1030
25
    }
1031
0
    return value->unpack<JsonbFloatVal>()->val();
1032
25
}
1033
1034
20
inline JsonbScaledDecimal get_scaled_decimal(const JsonbValue* value) {
1035
20
    switch (value->type) {
1036
12
    case JsonbType::T_Decimal32: {
1037
12
        const auto* decimal = value->unpack<JsonbDecimal32>();
1038
12
        validate_decimal_scale(decimal->scale);
1039
12
        return {wide::Int256(decimal->val()), decimal->scale};
1040
0
    }
1041
2
    case JsonbType::T_Decimal64: {
1042
2
        const auto* decimal = value->unpack<JsonbDecimal64>();
1043
2
        validate_decimal_scale(decimal->scale);
1044
2
        return {wide::Int256(decimal->val()), decimal->scale};
1045
0
    }
1046
6
    case JsonbType::T_Decimal128: {
1047
6
        const auto* decimal = value->unpack<JsonbDecimal128>();
1048
6
        validate_decimal_scale(decimal->scale);
1049
6
        return {wide::Int256(decimal->val()), decimal->scale};
1050
0
    }
1051
0
    case JsonbType::T_Decimal256: {
1052
0
        const auto* decimal = value->unpack<JsonbDecimal256>();
1053
0
        validate_decimal_scale(decimal->scale);
1054
0
        return {decimal->val(), decimal->scale};
1055
0
    }
1056
0
    default:
1057
0
        throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB decimal value type: {}",
1058
0
                        static_cast<int32_t>(value->type));
1059
20
    }
1060
20
}
1061
1062
inline bool scaled_decimal_equal_decimal(const JsonbScaledDecimal& lhs,
1063
4
                                         const JsonbScaledDecimal& rhs) {
1064
4
    if (lhs.scale == rhs.scale) {
1065
0
        return lhs.value == rhs.value;
1066
0
    }
1067
1068
4
    if (lhs.scale < rhs.scale) {
1069
2
        const auto scale_multiplier = decimal_scale_multiplier<wide::Int256>(rhs.scale - lhs.scale);
1070
2
        return rhs.value % scale_multiplier == 0 && lhs.value == rhs.value / scale_multiplier;
1071
2
    }
1072
1073
2
    const auto scale_multiplier = decimal_scale_multiplier<wide::Int256>(lhs.scale - rhs.scale);
1074
2
    return lhs.value % scale_multiplier == 0 && lhs.value / scale_multiplier == rhs.value;
1075
4
}
1076
1077
4
inline bool scaled_decimal_equal_integer(const JsonbScaledDecimal& decimal, int128_t integer) {
1078
4
    const auto integer_value = wide::Int256(integer);
1079
4
    if (decimal.scale == 0) {
1080
0
        return decimal.value == integer_value;
1081
0
    }
1082
1083
4
    const auto scale_multiplier = decimal_scale_multiplier<wide::Int256>(decimal.scale);
1084
4
    return decimal.value % scale_multiplier == 0 &&
1085
4
           decimal.value / scale_multiplier == integer_value;
1086
4
}
1087
1088
inline constexpr auto kPowersOfFive = [] {
1089
    std::array<wide::Int256, BeConsts::MAX_DECIMALV3_SCALE + 1> powers {};
1090
    powers[0] = 1;
1091
    for (size_t i = 1; i < powers.size(); ++i) {
1092
        powers[i] = powers[i - 1] * 5;
1093
    }
1094
    return powers;
1095
}();
1096
1097
6
inline wide::Int256 power_of_five(uint32_t exponent) {
1098
6
    validate_decimal_scale(exponent);
1099
6
    return kPowersOfFive[exponent];
1100
6
}
1101
1102
6
inline bool scaled_binary_equal(wide::Int256 value, int exponent, wide::Int256 significand) {
1103
6
    if (exponent < 0) {
1104
4
        const int divisor_exponent = -exponent;
1105
4
        if (divisor_exponent >= std::numeric_limits<int64_t>::digits) {
1106
0
            return false;
1107
0
        }
1108
4
        const auto divisor = wide::Int256(1) << divisor_exponent;
1109
4
        return significand % divisor == 0 && value == significand / divisor;
1110
4
    }
1111
2
    constexpr int max_positive_int256_shift = std::numeric_limits<wide::Int256>::digits;
1112
    // wide::Int256 is signed, so shifting 1 by 255 reaches the sign bit.
1113
2
    if (exponent >= max_positive_int256_shift) {
1114
0
        return false;
1115
0
    }
1116
2
    const auto multiplier = wide::Int256(1) << exponent;
1117
2
    return value % multiplier == 0 && value / multiplier == significand;
1118
2
}
1119
1120
15
inline bool floating_equal_integer(const JsonbValue* floating, int128_t integer) {
1121
15
    const double value = floating_value(floating);
1122
15
    int exponent = 0;
1123
15
    std::frexp(value, &exponent);
1124
15
    if (!std::isfinite(value) || std::trunc(value) != value) {
1125
6
        return false;
1126
6
    }
1127
9
    if (exponent >= 128) {
1128
0
        return value == -std::ldexp(1.0, 127) && integer == std::numeric_limits<int128_t>::min();
1129
0
    }
1130
9
    if (exponent <= -1) {
1131
0
        return false;
1132
0
    }
1133
9
    return static_cast<int128_t>(value) == integer;
1134
9
}
1135
1136
6
inline bool floating_equal_decimal(const JsonbValue* floating, const JsonbScaledDecimal& decimal) {
1137
6
    const double value = floating_value(floating);
1138
6
    if (!std::isfinite(value)) {
1139
0
        return false;
1140
0
    }
1141
6
    if (value == 0) {
1142
0
        return decimal.value == 0;
1143
0
    }
1144
1145
6
    int exponent = 0;
1146
6
    const double significand_fraction = std::frexp(value, &exponent);
1147
6
    const double significand_double =
1148
6
            std::ldexp(significand_fraction, std::numeric_limits<double>::digits);
1149
6
    auto significand = wide::Int256(static_cast<int64_t>(significand_double));
1150
6
    exponent -= std::numeric_limits<double>::digits;
1151
1152
6
    const auto five_multiplier = power_of_five(decimal.scale);
1153
6
    if (decimal.value % five_multiplier != 0) {
1154
0
        return false;
1155
0
    }
1156
6
    const auto binary_scaled_decimal = decimal.value / five_multiplier;
1157
6
    return scaled_binary_equal(binary_scaled_decimal, exponent + decimal.scale, significand);
1158
6
}
1159
1160
180
inline bool numeric_equal(const JsonbValue* lhs, const JsonbValue* rhs) {
1161
180
    if (!is_numeric(rhs)) {
1162
68
        return false;
1163
68
    }
1164
1165
112
    if ((lhs->isDouble() || lhs->isFloat()) && rhs->isInt()) {
1166
8
        return floating_equal_integer(lhs, rhs->int_val());
1167
8
    }
1168
1169
104
    if ((rhs->isDouble() || rhs->isFloat()) && lhs->isInt()) {
1170
7
        return floating_equal_integer(rhs, lhs->int_val());
1171
7
    }
1172
1173
97
    if ((lhs->isDouble() || lhs->isFloat()) && rhs->isDecimal()) {
1174
4
        return floating_equal_decimal(lhs, get_scaled_decimal(rhs));
1175
4
    }
1176
1177
93
    if ((rhs->isDouble() || rhs->isFloat()) && lhs->isDecimal()) {
1178
4
        return floating_equal_decimal(rhs, get_scaled_decimal(lhs));
1179
4
    }
1180
1181
89
    if (lhs->isDouble() || lhs->isFloat()) {
1182
2
        return (rhs->isDouble() || rhs->isFloat()) && floating_value(lhs) == floating_value(rhs);
1183
2
    }
1184
1185
87
    if (lhs->isDecimal()) {
1186
6
        const auto lhs_decimal = get_scaled_decimal(lhs);
1187
6
        if (rhs->isDecimal()) {
1188
4
            return scaled_decimal_equal_decimal(lhs_decimal, get_scaled_decimal(rhs));
1189
4
        }
1190
2
        return scaled_decimal_equal_integer(lhs_decimal, rhs->int_val());
1191
6
    }
1192
1193
81
    if (rhs->isDecimal()) {
1194
2
        return scaled_decimal_equal_integer(get_scaled_decimal(rhs), lhs->int_val());
1195
2
    }
1196
1197
79
    return lhs->int_val() == rhs->int_val();
1198
81
}
1199
1200
61
inline bool array_contains_value(const ArrayVal* target_array, const JsonbValue* candidate) {
1201
61
    const int target_num = target_array->numElem();
1202
160
    for (int i = 0; i < target_num; ++i) {
1203
127
        if (target_array->get(i)->contains(candidate)) {
1204
28
            return true;
1205
28
        }
1206
127
    }
1207
33
    return false;
1208
61
}
1209
1210
18
inline bool array_contains_array(const ArrayVal* target_array, const ArrayVal* candidate_array) {
1211
18
    const int candidate_num = candidate_array->numElem();
1212
33
    for (int i = 0; i < candidate_num; ++i) {
1213
25
        if (!array_contains_value(target_array, candidate_array->get(i))) {
1214
10
            return false;
1215
10
        }
1216
25
    }
1217
8
    return true;
1218
18
}
1219
1220
} // namespace jsonb_detail
1221
1222
inline const JsonbValue* JsonbDocument::createValue(const char* pb, size_t size) {
1223
    if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
1224
        return nullptr;
1225
    }
1226
1227
    auto* doc = (JsonbDocument*)pb;
1228
    if (doc->header_.ver_ != JSONB_VER) {
1229
        return nullptr;
1230
    }
1231
1232
    const auto* val = (const JsonbValue*)doc->payload_;
1233
    // Same as checkAndCreateDocument(), this is intentionally a lightweight structural check for
1234
    // hot paths. Do not recursively validate container bodies here unless the caller is a clearly
1235
    // untrusted raw binary boundary and accepts the O(document size) cost.
1236
    if (size != sizeof(JsonbHeader) + val->numPackedBytes()) {
1237
        return nullptr;
1238
    }
1239
1240
    return val;
1241
}
1242
1243
124
inline unsigned int JsonbDocument::numPackedBytes() const {
1244
124
    return ((const JsonbValue*)payload_)->numPackedBytes() + sizeof(header_);
1245
124
}
1246
1247
458k
inline unsigned int JsonbKeyValue::numPackedBytes() const {
1248
458k
    unsigned int ks = keyPackedBytes();
1249
458k
    const auto* val = (const JsonbValue*)(((char*)this) + ks);
1250
458k
    return ks + val->numPackedBytes();
1251
458k
}
1252
1253
// Poor man's "virtual" function JsonbValue::numPackedBytes
1254
1.95M
inline unsigned int JsonbValue::numPackedBytes() const {
1255
1.95M
    switch (type) {
1256
12.3k
    case JsonbType::T_Null:
1257
53.0k
    case JsonbType::T_True:
1258
64.2k
    case JsonbType::T_False: {
1259
64.2k
        return sizeof(type);
1260
53.0k
    }
1261
1262
40.9k
    case JsonbType::T_Int8: {
1263
40.9k
        return sizeof(type) + sizeof(int8_t);
1264
53.0k
    }
1265
47.0k
    case JsonbType::T_Int16: {
1266
47.0k
        return sizeof(type) + sizeof(int16_t);
1267
53.0k
    }
1268
20.8k
    case JsonbType::T_Int32: {
1269
20.8k
        return sizeof(type) + sizeof(int32_t);
1270
53.0k
    }
1271
356k
    case JsonbType::T_Int64: {
1272
356k
        return sizeof(type) + sizeof(int64_t);
1273
53.0k
    }
1274
25.5k
    case JsonbType::T_Double: {
1275
25.5k
        return sizeof(type) + sizeof(double);
1276
53.0k
    }
1277
4.77k
    case JsonbType::T_Float: {
1278
4.77k
        return sizeof(type) + sizeof(float);
1279
53.0k
    }
1280
31.5k
    case JsonbType::T_Int128: {
1281
31.5k
        return sizeof(type) + sizeof(int128_t);
1282
53.0k
    }
1283
287k
    case JsonbType::T_String:
1284
311k
    case JsonbType::T_Binary: {
1285
311k
        return unpack<JsonbBinaryVal>()->numPackedBytes();
1286
287k
    }
1287
1288
274k
    case JsonbType::T_Object:
1289
1.05M
    case JsonbType::T_Array: {
1290
1.05M
        return unpack<ContainerVal>()->numPackedBytes();
1291
274k
    }
1292
51
    case JsonbType::T_Decimal32: {
1293
51
        return JsonbDecimal32::numPackedBytes();
1294
274k
    }
1295
63
    case JsonbType::T_Decimal64: {
1296
63
        return JsonbDecimal64::numPackedBytes();
1297
274k
    }
1298
40
    case JsonbType::T_Decimal128: {
1299
40
        return JsonbDecimal128::numPackedBytes();
1300
274k
    }
1301
6
    case JsonbType::T_Decimal256: {
1302
6
        return JsonbDecimal256::numPackedBytes();
1303
274k
    }
1304
0
    case JsonbType::NUM_TYPES:
1305
0
        break;
1306
1.95M
    }
1307
1308
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}",
1309
0
                    static_cast<int32_t>(type));
1310
1.95M
}
1311
1312
772
inline int JsonbValue::numElements() const {
1313
772
    switch (type) {
1314
6
    case JsonbType::T_Int8:
1315
16
    case JsonbType::T_Int16:
1316
18
    case JsonbType::T_Int32:
1317
21
    case JsonbType::T_Int64:
1318
23
    case JsonbType::T_Double:
1319
23
    case JsonbType::T_Float:
1320
25
    case JsonbType::T_Int128:
1321
32
    case JsonbType::T_String:
1322
32
    case JsonbType::T_Binary:
1323
37
    case JsonbType::T_Null:
1324
43
    case JsonbType::T_True:
1325
45
    case JsonbType::T_False:
1326
45
    case JsonbType::T_Decimal32:
1327
45
    case JsonbType::T_Decimal64:
1328
45
    case JsonbType::T_Decimal128:
1329
45
    case JsonbType::T_Decimal256: {
1330
45
        return 1;
1331
45
    }
1332
27
    case JsonbType::T_Object: {
1333
27
        return unpack<ObjectVal>()->numElem();
1334
45
    }
1335
700
    case JsonbType::T_Array: {
1336
700
        return unpack<ArrayVal>()->numElem();
1337
45
    }
1338
0
    case JsonbType::NUM_TYPES:
1339
0
        break;
1340
772
    }
1341
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}",
1342
0
                    static_cast<int32_t>(type));
1343
772
}
1344
1345
395
inline bool JsonbValue::contains(const JsonbValue* rhs) const {
1346
395
    switch (type) {
1347
94
    case JsonbType::T_Int8:
1348
122
    case JsonbType::T_Int16:
1349
128
    case JsonbType::T_Int32:
1350
137
    case JsonbType::T_Int64:
1351
144
    case JsonbType::T_Int128:
1352
170
    case JsonbType::T_Double:
1353
170
    case JsonbType::T_Float:
1354
176
    case JsonbType::T_Decimal32:
1355
177
    case JsonbType::T_Decimal64:
1356
180
    case JsonbType::T_Decimal128:
1357
180
    case JsonbType::T_Decimal256: {
1358
180
        return jsonb_detail::numeric_equal(this, rhs);
1359
180
    }
1360
45
    case JsonbType::T_String:
1361
45
    case JsonbType::T_Binary: {
1362
45
        if (rhs->isString() || rhs->isBinary()) {
1363
17
            const auto* str_value1 = unpack<JsonbStringVal>();
1364
17
            const auto* str_value2 = rhs->unpack<JsonbStringVal>();
1365
17
            return str_value1->length() == str_value2->length() &&
1366
17
                   std::memcmp(str_value1->getBlob(), str_value2->getBlob(),
1367
16
                               str_value1->length()) == 0;
1368
17
        }
1369
28
        return false;
1370
45
    }
1371
54
    case JsonbType::T_Array: {
1372
54
        const auto* lhs_array = unpack<ArrayVal>();
1373
54
        if (rhs->isArray()) {
1374
18
            return jsonb_detail::array_contains_array(lhs_array, rhs->unpack<ArrayVal>());
1375
18
        }
1376
36
        return jsonb_detail::array_contains_value(lhs_array, rhs);
1377
54
    }
1378
82
    case JsonbType::T_Object: {
1379
82
        if (rhs->isObject()) {
1380
49
            const auto* obj_value1 = unpack<ObjectVal>();
1381
49
            const auto* obj_value2 = rhs->unpack<ObjectVal>();
1382
92
            for (auto it = obj_value2->begin(); it != obj_value2->end(); ++it) {
1383
65
                const JsonbValue* value = obj_value1->find(it->getKeyStr(), it->klen());
1384
65
                if (value == nullptr || !value->contains(it->value())) {
1385
22
                    return false;
1386
22
                }
1387
65
            }
1388
27
            return true;
1389
49
        }
1390
33
        return false;
1391
82
    }
1392
12
    case JsonbType::T_Null: {
1393
12
        return rhs->isNull();
1394
82
    }
1395
12
    case JsonbType::T_True: {
1396
12
        return rhs->isTrue();
1397
82
    }
1398
10
    case JsonbType::T_False: {
1399
10
        return rhs->isFalse();
1400
82
    }
1401
0
    case JsonbType::NUM_TYPES:
1402
0
        break;
1403
395
    }
1404
1405
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}",
1406
0
                    static_cast<int32_t>(type));
1407
395
}
1408
1409
15.6k
inline bool JsonbPath::seek(const char* key_path, size_t kp_len) {
1410
15.6k
    while (kp_len > 0 && std::isspace(key_path[kp_len - 1])) {
1411
7
        --kp_len;
1412
7
    }
1413
1414
    //path invalid
1415
15.6k
    if (!key_path || kp_len == 0) {
1416
2
        return false;
1417
2
    }
1418
15.6k
    Stream stream(key_path, kp_len);
1419
15.6k
    stream.skip_whitespace();
1420
15.6k
    if (stream.exhausted() || stream.read() != SCOPE) {
1421
        //path invalid
1422
4
        return false;
1423
4
    }
1424
1425
36.6k
    while (!stream.exhausted()) {
1426
21.0k
        stream.skip_whitespace();
1427
21.0k
        stream.clear_leg_ptr();
1428
21.0k
        stream.clear_leg_len();
1429
1430
21.0k
        if (!JsonbPath::parsePath(&stream, this)) {
1431
            //path invalid
1432
20
            return false;
1433
20
        }
1434
21.0k
    }
1435
15.6k
    return true;
1436
15.6k
}
1437
1438
21.0k
inline bool JsonbPath::parsePath(Stream* stream, JsonbPath* path) {
1439
    // $[0]
1440
21.0k
    if (stream->peek() == BEGIN_ARRAY) {
1441
11.0k
        return parse_array(stream, path);
1442
11.0k
    }
1443
    // $.a or $.[0]
1444
    // Keep $.[0] for backward compatibility: although the dot before an array
1445
    // leg is non-standard, existing JSONB users may rely on it.
1446
10.0k
    else if (stream->peek() == BEGIN_MEMBER) {
1447
        // advance past the .
1448
10.0k
        stream->skip(1);
1449
1450
10.0k
        if (stream->exhausted()) {
1451
16
            return false;
1452
16
        }
1453
1454
        // $.[0]
1455
9.98k
        if (stream->peek() == BEGIN_ARRAY) {
1456
199
            return parse_array(stream, path);
1457
199
        }
1458
        // $.a
1459
9.78k
        else {
1460
9.78k
            return parse_member(stream, path);
1461
9.78k
        }
1462
9.98k
    } else if (stream->peek() == WILDCARD) {
1463
13
        stream->skip(1);
1464
13
        if (stream->exhausted()) {
1465
0
            return false;
1466
0
        }
1467
1468
        // $**
1469
13
        if (stream->peek() == WILDCARD) {
1470
13
            path->_is_supper_wildcard = true;
1471
13
        }
1472
1473
13
        stream->skip(1);
1474
13
        if (stream->exhausted()) {
1475
2
            return false;
1476
2
        }
1477
1478
11
        if (stream->peek() == BEGIN_ARRAY) {
1479
2
            return parse_array(stream, path);
1480
9
        } else if (stream->peek() == BEGIN_MEMBER) {
1481
            // advance past the .
1482
9
            stream->skip(1);
1483
1484
9
            if (stream->exhausted()) {
1485
0
                return false;
1486
0
            }
1487
1488
            // $**.[0]
1489
            // Keep the dot-array form compatible with the root path behavior.
1490
9
            if (stream->peek() == BEGIN_ARRAY) {
1491
0
                return parse_array(stream, path);
1492
0
            }
1493
            // $.a
1494
9
            else {
1495
9
                return parse_member(stream, path);
1496
9
            }
1497
9
        }
1498
0
        return false;
1499
11
    } else {
1500
0
        return false; //invalid json path
1501
0
    }
1502
21.0k
}
1503
1504
11.2k
inline bool JsonbPath::parse_array(Stream* stream, JsonbPath* path) {
1505
11.2k
    assert(stream->peek() == BEGIN_ARRAY);
1506
11.2k
    stream->skip(1);
1507
11.2k
    if (stream->exhausted()) {
1508
0
        return false;
1509
0
    }
1510
1511
11.2k
    if (stream->peek() == WILDCARD) {
1512
        // Called by function_jsonb.cpp, the variables passed in originate from a mutable block;
1513
        // using const_cast is acceptable.
1514
20
        stream->set_leg_ptr(const_cast<char*>(stream->position()));
1515
20
        stream->add_leg_len();
1516
20
        stream->skip(1);
1517
20
        if (stream->exhausted()) {
1518
0
            return false;
1519
0
        }
1520
1521
20
        if (stream->peek() == END_ARRAY) {
1522
20
            std::unique_ptr<leg_info> leg(
1523
20
                    new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, ARRAY_CODE));
1524
20
            path->add_leg_to_leg_vector(std::move(leg));
1525
20
            stream->skip(1);
1526
20
            path->_is_wildcard = true;
1527
20
            return true;
1528
20
        } else {
1529
0
            return false;
1530
0
        }
1531
20
    }
1532
1533
    // Called by function_jsonb.cpp, the variables passed in originate from a mutable block;
1534
    // using const_cast is acceptable.
1535
11.2k
    stream->set_leg_ptr(const_cast<char*>(stream->position()));
1536
1537
32.9k
    for (; !stream->exhausted() && stream->peek() != END_ARRAY; stream->advance()) {
1538
21.7k
        stream->add_leg_len();
1539
21.7k
    }
1540
1541
11.2k
    if (stream->exhausted() || stream->peek() != END_ARRAY) {
1542
0
        return false;
1543
11.2k
    } else {
1544
11.2k
        stream->skip(1);
1545
11.2k
    }
1546
1547
    //parse array index to int
1548
1549
11.2k
    std::string_view idx_string(stream->get_leg_ptr(), stream->get_leg_len());
1550
11.2k
    int index = 0;
1551
1552
    // Match "last" case-insensitively for compatibility with existing JSONB
1553
    // paths such as [Last] and [LAST].
1554
11.2k
    if (stream->get_leg_len() >= 4 &&
1555
11.2k
        std::equal(LAST, LAST + 4, stream->get_leg_ptr(),
1556
6.71k
                   [](char c1, char c2) { return std::tolower(c1) == std::tolower(c2); })) {
1557
1.67k
        auto pos = idx_string.find(MINUS);
1558
1559
1.67k
        if (pos != std::string::npos) {
1560
1.34k
            for (size_t i = 4; i < pos; ++i) {
1561
6
                if (std::isspace(idx_string[i])) {
1562
4
                    continue;
1563
4
                } else {
1564
                    // leading zeroes are not allowed
1565
2
                    LOG(WARNING) << "Non-space char in idx_string: '" << idx_string << "'";
1566
2
                    return false;
1567
2
                }
1568
6
            }
1569
1.33k
            idx_string = idx_string.substr(pos + 1);
1570
1.33k
            idx_string = trim(idx_string);
1571
1572
            // Keep numeric-prefix parsing for last-N offsets as existing JSONB
1573
            // path behavior.
1574
1.33k
            auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(),
1575
1.33k
                                          index);
1576
1.33k
            if (result.ec != std::errc()) {
1577
0
                LOG(WARNING) << "Invalid index in JSON path: '" << idx_string << "'";
1578
0
                return false;
1579
0
            }
1580
1581
1.33k
        } else if (stream->get_leg_len() > 4) {
1582
0
            return false;
1583
0
        }
1584
1585
1.67k
        std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, -index - 1, ARRAY_CODE));
1586
1.67k
        path->add_leg_to_leg_vector(std::move(leg));
1587
1588
1.67k
        return true;
1589
1.67k
    }
1590
1591
    // Preserve legacy numeric-prefix parsing for array indexes. std::from_chars
1592
    // may stop before the end (for example [1.5] is parsed as index 1), and
1593
    // current JSONB path semantics treat that as supported behavior.
1594
9.56k
    auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(), index);
1595
1596
9.56k
    if (result.ec != std::errc()) {
1597
0
        return false;
1598
0
    }
1599
1600
9.56k
    std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, index, ARRAY_CODE));
1601
9.56k
    path->add_leg_to_leg_vector(std::move(leg));
1602
1603
9.56k
    return true;
1604
9.56k
}
1605
1606
9.79k
inline bool JsonbPath::parse_member(Stream* stream, JsonbPath* path) {
1607
9.79k
    if (stream->exhausted()) {
1608
0
        return false;
1609
0
    }
1610
1611
9.79k
    if (stream->peek() == WILDCARD) {
1612
        // Called by function_jsonb.cpp, the variables passed in originate from a mutable block;
1613
        // using const_cast is acceptable.
1614
116
        stream->set_leg_ptr(const_cast<char*>(stream->position()));
1615
116
        stream->add_leg_len();
1616
116
        stream->skip(1);
1617
116
        std::unique_ptr<leg_info> leg(
1618
116
                new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE));
1619
116
        path->add_leg_to_leg_vector(std::move(leg));
1620
116
        path->_is_wildcard = true;
1621
116
        return true;
1622
116
    }
1623
1624
    // Called by function_jsonb.cpp, the variables passed in originate from a mutable block;
1625
    // using const_cast is acceptable.
1626
9.67k
    stream->set_leg_ptr(const_cast<char*>(stream->position()));
1627
1628
9.67k
    const char* left_quotation_marks = nullptr;
1629
9.67k
    const char* right_quotation_marks = nullptr;
1630
1631
32.5k
    for (; !stream->exhausted(); stream->advance()) {
1632
        // Only accept space characters quoted by double quotes.
1633
29.8k
        if (std::isspace(stream->peek()) && left_quotation_marks == nullptr) {
1634
0
            return false;
1635
29.8k
        } else if (stream->peek() == ESCAPE) {
1636
3
            stream->add_leg_len();
1637
3
            stream->skip(1);
1638
3
            stream->add_leg_len();
1639
3
            stream->set_has_escapes(true);
1640
3
            if (stream->exhausted()) {
1641
0
                return false;
1642
0
            }
1643
3
            continue;
1644
29.8k
        } else if (stream->peek() == DOUBLE_QUOTE) {
1645
378
            if (left_quotation_marks == nullptr) {
1646
189
                left_quotation_marks = stream->position();
1647
                // Called by function_jsonb.cpp, the variables passed in originate from a mutable block;
1648
                // using const_cast is acceptable.
1649
189
                stream->set_leg_ptr(const_cast<char*>(++left_quotation_marks));
1650
189
                continue;
1651
189
            } else {
1652
189
                right_quotation_marks = stream->position();
1653
189
                stream->skip(1);
1654
189
                break;
1655
189
            }
1656
29.4k
        } else if (stream->peek() == BEGIN_MEMBER || stream->peek() == BEGIN_ARRAY) {
1657
7.06k
            if (left_quotation_marks == nullptr) {
1658
6.76k
                break;
1659
6.76k
            }
1660
7.06k
        }
1661
1662
22.7k
        stream->add_leg_len();
1663
22.7k
    }
1664
1665
9.67k
    if ((left_quotation_marks != nullptr && right_quotation_marks == nullptr) ||
1666
9.67k
        stream->get_leg_ptr() == nullptr || stream->get_leg_len() == 0) {
1667
0
        return false; //invalid json path
1668
0
    }
1669
1670
9.67k
    if (stream->get_has_escapes()) {
1671
3
        stream->remove_escapes();
1672
3
    }
1673
1674
9.67k
    std::unique_ptr<leg_info> leg(
1675
9.67k
            new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE));
1676
9.67k
    path->add_leg_to_leg_vector(std::move(leg));
1677
1678
9.67k
    return true;
1679
9.67k
}
1680
1681
static_assert(is_pod_v<JsonbDocument>, "JsonbDocument must be standard layout and trivial");
1682
static_assert(is_pod_v<JsonbValue>, "JsonbValue must be standard layout and trivial");
1683
static_assert(is_pod_v<JsonbDecimal32>, "JsonbDecimal32 must be standard layout and trivial");
1684
static_assert(is_pod_v<JsonbDecimal64>, "JsonbDecimal64 must be standard layout and trivial");
1685
static_assert(is_pod_v<JsonbDecimal128>, "JsonbDecimal128 must be standard layout and trivial");
1686
static_assert(is_pod_v<JsonbDecimal256>, "JsonbDecimal256 must be standard layout and trivial");
1687
static_assert(is_pod_v<JsonbInt8Val>, "JsonbInt8Val must be standard layout and trivial");
1688
static_assert(is_pod_v<JsonbInt32Val>, "JsonbInt32Val must be standard layout and trivial");
1689
static_assert(is_pod_v<JsonbInt64Val>, "JsonbInt64Val must be standard layout and trivial");
1690
static_assert(is_pod_v<JsonbInt128Val>, "JsonbInt128Val must be standard layout and trivial");
1691
static_assert(is_pod_v<JsonbDoubleVal>, "JsonbDoubleVal must be standard layout and trivial");
1692
static_assert(is_pod_v<JsonbFloatVal>, "JsonbFloatVal must be standard layout and trivial");
1693
static_assert(is_pod_v<JsonbBinaryVal>, "JsonbBinaryVal must be standard layout and trivial");
1694
static_assert(is_pod_v<ContainerVal>, "ContainerVal must be standard layout and trivial");
1695
1696
#define ASSERT_DECIMAL_LAYOUT(type)                \
1697
    static_assert(offsetof(type, precision) == 0); \
1698
    static_assert(offsetof(type, scale) == 4);     \
1699
    static_assert(offsetof(type, value) == 8);
1700
1701
ASSERT_DECIMAL_LAYOUT(JsonbDecimal32)
1702
ASSERT_DECIMAL_LAYOUT(JsonbDecimal64)
1703
ASSERT_DECIMAL_LAYOUT(JsonbDecimal128)
1704
ASSERT_DECIMAL_LAYOUT(JsonbDecimal256)
1705
1706
#define ASSERT_NUMERIC_LAYOUT(type) static_assert(offsetof(type, num) == 0);
1707
1708
ASSERT_NUMERIC_LAYOUT(JsonbInt8Val)
1709
ASSERT_NUMERIC_LAYOUT(JsonbInt32Val)
1710
ASSERT_NUMERIC_LAYOUT(JsonbInt64Val)
1711
ASSERT_NUMERIC_LAYOUT(JsonbInt128Val)
1712
ASSERT_NUMERIC_LAYOUT(JsonbDoubleVal)
1713
1714
static_assert(offsetof(JsonbBinaryVal, size) == 0);
1715
static_assert(offsetof(JsonbBinaryVal, payload) == 4);
1716
1717
static_assert(offsetof(ContainerVal, size) == 0);
1718
static_assert(offsetof(ContainerVal, payload) == 4);
1719
1720
#pragma pack(pop)
1721
#if defined(__clang__)
1722
#pragma clang diagnostic pop
1723
#endif
1724
} // namespace doris
1725
1726
#endif // JSONB_JSONBDOCUMENT_H