Coverage Report

Created: 2026-06-10 14:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/jsonb_document.h
Line
Count
Source
1
/*
2
 *  Copyright (c) 2014, Facebook, Inc.
3
 *  All rights reserved.
4
 *
5
 *  This source code is licensed under the BSD-style license found in the
6
 *  LICENSE file in the root directory of this source tree. An additional grant
7
 *  of patent rights can be found in the PATENTS file in the same directory.
8
 *
9
 */
10
11
/*
12
 * This header defines JsonbDocument, JsonbKeyValue, and various value classes
13
 * which are derived from JsonbValue, and a forward iterator for container
14
 * values - essentially everything that is related to JSONB binary data
15
 * structures.
16
 *
17
 * Implementation notes:
18
 *
19
 * None of the classes in this header file can be instantiated directly (i.e.
20
 * you cannot create a JsonbKeyValue or JsonbValue object - all constructors
21
 * are declared non-public). We use the classes as wrappers on the packed JSONB
22
 * bytes (serialized), and cast the classes (types) to the underlying packed
23
 * byte array.
24
 *
25
 * For the same reason, we cannot define any JSONB value class to be virtual,
26
 * since we never call constructors, and will not instantiate vtbl and vptrs.
27
 *
28
 * Therefore, the classes are defined as packed structures (i.e. no data
29
 * alignment and padding), and the private member variables of the classes are
30
 * defined precisely in the same order as the JSONB spec. This ensures we
31
 * access the packed JSONB bytes correctly.
32
 *
33
 * The packed structures are highly optimized for in-place operations with low
34
 * overhead. The reads (and in-place writes) are performed directly on packed
35
 * bytes. There is no memory allocation at all at runtime.
36
 *
37
 * For updates/writes of values that will expand the original JSONB size, the
38
 * write will fail, and the caller needs to handle buffer increase.
39
 *
40
 * ** Iterator **
41
 * Both ObjectVal class and ArrayVal class have iterator type that you can use
42
 * to declare an iterator on a container object to go through the key-value
43
 * pairs or value list. The iterator has both non-const and const types.
44
 *
45
 * Note: iterators are forward direction only.
46
 *
47
 * ** Query **
48
 * Querying into containers is through the member functions find (for key/value
49
 * pairs) and get (for array elements), and is in streaming style. We don't
50
 * need to read/scan the whole JSONB packed bytes in order to return results.
51
 * Once the key/index is found, we will stop search.  You can use text to query
52
 * both objects and array (for array, text will be converted to integer index),
53
 * and use index to retrieve from array. Array index is 0-based.
54
 *
55
 * ** External dictionary **
56
 * During query processing, you can also pass a call-back function, so the
57
 * search will first try to check if the key string exists in the dictionary.
58
 * If so, search will be based on the id instead of the key string.
59
 * @author Tian Xia <tianx@fb.com>
60
 * 
61
 * this file is copied from 
62
 * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonDocument.h
63
 * and modified by Doris
64
 */
65
66
#ifndef JSONB_JSONBDOCUMENT_H
67
#define JSONB_JSONBDOCUMENT_H
68
69
#include <algorithm>
70
#include <array>
71
#include <cctype>
72
#include <charconv>
73
#include <cmath>
74
#include <cstddef>
75
#include <cstdint>
76
#include <limits>
77
#include <string>
78
#include <string_view>
79
#include <type_traits>
80
81
#include "common/compiler_util.h" // IWYU pragma: keep
82
#include "common/status.h"
83
#include "core/data_type/define_primitive_type.h"
84
#include "core/string_ref.h"
85
#include "core/types.h"
86
#include "util/string_util.h"
87
88
// #include "util/string_parser.hpp"
89
90
// Concept to check for supported decimal types
91
template <typename T>
92
concept JsonbDecimalType =
93
        std::same_as<T, doris::Decimal256> || std::same_as<T, doris::Decimal64> ||
94
        std::same_as<T, doris::Decimal128V3> || std::same_as<T, doris::Decimal32>;
95
96
namespace doris {
97
98
template <typename T>
99
constexpr bool is_pod_v = std::is_trivial_v<T> && std::is_standard_layout_v<T>;
100
101
struct JsonbStringVal;
102
struct ObjectVal;
103
struct ArrayVal;
104
struct JsonbBinaryVal;
105
struct ContainerVal;
106
107
template <JsonbDecimalType T>
108
struct JsonbDecimalVal;
109
110
using JsonbDecimal256 = JsonbDecimalVal<Decimal256>;
111
using JsonbDecimal128 = JsonbDecimalVal<Decimal128V3>;
112
using JsonbDecimal64 = JsonbDecimalVal<Decimal64>;
113
using JsonbDecimal32 = JsonbDecimalVal<Decimal32>;
114
115
template <typename T>
116
    requires std::is_integral_v<T> || std::is_floating_point_v<T>
117
struct NumberValT;
118
119
using JsonbInt8Val = NumberValT<int8_t>;
120
using JsonbInt16Val = NumberValT<int16_t>;
121
using JsonbInt32Val = NumberValT<int32_t>;
122
using JsonbInt64Val = NumberValT<int64_t>;
123
using JsonbInt128Val = NumberValT<int128_t>;
124
using JsonbDoubleVal = NumberValT<double>;
125
using JsonbFloatVal = NumberValT<float>;
126
127
template <typename T>
128
concept JsonbPodType = (std::same_as<T, JsonbStringVal> || std::same_as<T, ObjectVal> ||
129
                        std::same_as<T, ContainerVal> || std::same_as<T, ArrayVal> ||
130
                        std::same_as<T, JsonbBinaryVal> || std::same_as<T, JsonbDecimal32> ||
131
                        std::same_as<T, JsonbDecimal64> || std::same_as<T, JsonbDecimal128> ||
132
                        std::same_as<T, JsonbDecimal256> || std::same_as<T, JsonbDecimal32> ||
133
                        std::same_as<T, JsonbInt8Val> || std::same_as<T, JsonbInt16Val> ||
134
                        std::same_as<T, JsonbInt32Val> || std::same_as<T, JsonbInt64Val> ||
135
                        std::same_as<T, JsonbInt128Val> || std::same_as<T, JsonbFloatVal> ||
136
                        std::same_as<T, JsonbFloatVal> || std::same_as<T, JsonbDoubleVal>);
137
138
4.90M
#define JSONB_VER 1
139
140
using int128_t = __int128;
141
142
// forward declaration
143
struct JsonbValue;
144
145
class JsonbOutStream;
146
147
template <class OS_TYPE>
148
class JsonbWriterT;
149
150
using JsonbWriter = JsonbWriterT<JsonbOutStream>;
151
152
const int MaxNestingLevel = 100;
153
154
/*
155
 * JsonbType defines 10 primitive types and 2 container types, as described
156
 * below.
157
 * NOTE: Do NOT modify the existing values or their order in this enum.
158
 *      You may only append new entries at the end before `NUM_TYPES`.
159
 *      This enum will be used in serialized data and/or persisted data.
160
 *      Changing existing values may break backward compatibility
161
 *      with previously stored or transmitted data.
162
 *
163
 * primitive_value ::=
164
 *   0x00        //null value (0 byte)
165
 * | 0x01        //boolean true (0 byte)
166
 * | 0x02        //boolean false (0 byte)
167
 * | 0x03 int8   //char/int8 (1 byte)
168
 * | 0x04 int16  //int16 (2 bytes)
169
 * | 0x05 int32  //int32 (4 bytes)
170
 * | 0x06 int64  //int64 (8 bytes)
171
 * | 0x07 double //floating point (8 bytes)
172
 * | 0x08 string //variable length string
173
 * | 0x09 binary //variable length binary
174
 *
175
 * container ::=
176
 *   0x0A int32 key_value_list //object, int32 is the total bytes of the object
177
 * | 0x0B int32 value_list     //array, int32 is the total bytes of the array
178
 */
179
enum class JsonbType : char {
180
    T_Null = 0x00,
181
    T_True = 0x01,
182
    T_False = 0x02,
183
    T_Int8 = 0x03,
184
    T_Int16 = 0x04,
185
    T_Int32 = 0x05,
186
    T_Int64 = 0x06,
187
    T_Double = 0x07,
188
    T_String = 0x08,
189
    T_Binary = 0x09,
190
    T_Object = 0x0A,
191
    T_Array = 0x0B,
192
    T_Int128 = 0x0C,
193
    T_Float = 0x0D,
194
    T_Decimal32 = 0x0E,  // DecimalV3 only
195
    T_Decimal64 = 0x0F,  // DecimalV3 only
196
    T_Decimal128 = 0x10, // DecimalV3 only
197
    T_Decimal256 = 0x11, // DecimalV3 only
198
    NUM_TYPES,
199
};
200
201
inline PrimitiveType get_primitive_type_from_json_type(JsonbType json_type) {
202
    switch (json_type) {
203
    case JsonbType::T_Null:
204
        return TYPE_NULL;
205
    case JsonbType::T_True:
206
    case JsonbType::T_False:
207
        return TYPE_BOOLEAN;
208
    case JsonbType::T_Int8:
209
        return TYPE_TINYINT;
210
    case JsonbType::T_Int16:
211
        return TYPE_SMALLINT;
212
    case JsonbType::T_Int32:
213
        return TYPE_INT;
214
    case JsonbType::T_Int64:
215
        return TYPE_BIGINT;
216
    case JsonbType::T_Double:
217
        return TYPE_DOUBLE;
218
    case JsonbType::T_String:
219
        return TYPE_STRING;
220
    case JsonbType::T_Binary:
221
        return TYPE_BINARY;
222
    case JsonbType::T_Object:
223
        return TYPE_STRUCT;
224
    case JsonbType::T_Array:
225
        return TYPE_ARRAY;
226
    case JsonbType::T_Int128:
227
        return TYPE_LARGEINT;
228
    case JsonbType::T_Float:
229
        return TYPE_FLOAT;
230
    case JsonbType::T_Decimal32:
231
        return TYPE_DECIMAL32;
232
    case JsonbType::T_Decimal64:
233
        return TYPE_DECIMAL64;
234
    case JsonbType::T_Decimal128:
235
        return TYPE_DECIMAL128I;
236
    case JsonbType::T_Decimal256:
237
        return TYPE_DECIMAL256;
238
    default:
239
        throw Exception(ErrorCode::INTERNAL_ERROR, "Unsupported JsonbType: {}",
240
                        static_cast<int>(json_type));
241
    }
242
}
243
244
//for parse json path
245
constexpr char SCOPE = '$';
246
constexpr char BEGIN_MEMBER = '.';
247
constexpr char BEGIN_ARRAY = '[';
248
constexpr char END_ARRAY = ']';
249
constexpr char DOUBLE_QUOTE = '"';
250
constexpr char WILDCARD = '*';
251
constexpr char MINUS = '-';
252
constexpr char LAST[] = "last";
253
constexpr char ESCAPE = '\\';
254
constexpr unsigned int MEMBER_CODE = 0;
255
constexpr unsigned int ARRAY_CODE = 1;
256
257
/// A simple input stream class for the JSON path parser.
258
class Stream {
259
public:
260
    /// Creates an input stream reading from a character string.
261
    /// @param string  the input string
262
    /// @param length  the length of the input string
263
15.7k
    Stream(const char* string, size_t length) : m_position(string), m_end(string + length) {}
264
265
    /// Returns a pointer to the current position in the stream.
266
21.5k
    const char* position() const { return m_position; }
267
268
    /// Returns a pointer to the position just after the end of the stream.
269
0
    const char* end() const { return m_end; }
270
271
    /// Returns the number of bytes remaining in the stream.
272
476k
    size_t remaining() const {
273
476k
        assert(m_position <= m_end);
274
476k
        return m_end - m_position;
275
476k
    }
276
277
    /// Tells if the stream has been exhausted.
278
443k
    bool exhausted() const { return remaining() == 0; }
279
280
    /// Reads the next byte from the stream and moves the position forward.
281
15.7k
    char read() {
282
15.7k
        assert(!exhausted());
283
15.7k
        return *m_position++;
284
15.7k
    }
285
286
    /// Reads the next byte from the stream without moving the position forward.
287
266k
    char peek() const {
288
266k
        assert(!exhausted());
289
266k
        return *m_position;
290
266k
    }
291
292
    /// Moves the position to the next non-whitespace character.
293
69.9k
    void skip_whitespace() {
294
69.9k
        m_position = std::find_if_not(m_position, m_end, [](char c) { return std::isspace(c); });
295
69.9k
    }
296
297
    /// Moves the position n bytes forward.
298
33.0k
    void skip(size_t n) {
299
33.0k
        assert(remaining() >= n);
300
33.0k
        m_position += n;
301
33.0k
        skip_whitespace();
302
33.0k
    }
303
304
44.8k
    void advance() { m_position++; }
305
306
42.5k
    void clear_leg_ptr() { leg_ptr = nullptr; }
307
308
21.3k
    void set_leg_ptr(char* ptr) {
309
21.3k
        clear_leg_ptr();
310
21.3k
        leg_ptr = ptr;
311
21.3k
    }
312
313
32.5k
    char* get_leg_ptr() { return leg_ptr; }
314
315
21.1k
    void clear_leg_len() { leg_len = 0; }
316
317
44.8k
    void add_leg_len() { leg_len++; }
318
319
42.4k
    unsigned int get_leg_len() const { return leg_len; }
320
321
15
    void remove_escapes() {
322
15
        unsigned int new_len = 0;
323
110
        for (unsigned int i = 0; i < leg_len; ++i) {
324
96
            if (leg_ptr[i] != ESCAPE) {
325
63
                leg_ptr[new_len++] = leg_ptr[i];
326
63
                continue;
327
63
            }
328
329
33
            ++i;
330
33
            if (i >= leg_len) {
331
1
                break;
332
1
            }
333
334
32
            switch (leg_ptr[i]) {
335
2
            case 'b':
336
2
                leg_ptr[new_len++] = '\b';
337
2
                break;
338
2
            case 'f':
339
2
                leg_ptr[new_len++] = '\f';
340
2
                break;
341
4
            case 'n':
342
4
                leg_ptr[new_len++] = '\n';
343
4
                break;
344
2
            case 'r':
345
2
                leg_ptr[new_len++] = '\r';
346
2
                break;
347
2
            case 't':
348
2
                leg_ptr[new_len++] = '\t';
349
2
                break;
350
11
            case 'u': {
351
11
                if (i + 4 >= leg_len || leg_ptr[i + 1] != '0' || leg_ptr[i + 2] != '0') {
352
2
                    leg_ptr[new_len++] = leg_ptr[i];
353
2
                    break;
354
2
                }
355
356
18
                auto hex_to_int = [](char c) -> int {
357
18
                    if (c >= '0' && c <= '9') {
358
12
                        return c - '0';
359
12
                    }
360
6
                    if (c >= 'a' && c <= 'f') {
361
2
                        return c - 'a' + 10;
362
2
                    }
363
4
                    if (c >= 'A' && c <= 'F') {
364
1
                        return c - 'A' + 10;
365
1
                    }
366
3
                    return -1;
367
4
                };
368
9
                int high = hex_to_int(leg_ptr[i + 3]);
369
9
                int low = hex_to_int(leg_ptr[i + 4]);
370
9
                if (high < 0 || low < 0) {
371
3
                    leg_ptr[new_len++] = leg_ptr[i];
372
3
                    break;
373
3
                }
374
6
                leg_ptr[new_len++] = static_cast<char>((high << 4) | low);
375
6
                i += 4;
376
6
                break;
377
9
            }
378
9
            default:
379
9
                leg_ptr[new_len++] = leg_ptr[i];
380
9
                break;
381
32
            }
382
32
        }
383
15
        leg_ptr[new_len] = '\0';
384
15
        leg_len = new_len;
385
15
    }
386
387
21.1k
    void set_has_escapes(bool has) { has_escapes = has; }
388
389
9.71k
    bool get_has_escapes() const { return has_escapes; }
390
391
private:
392
    /// The current position in the stream.
393
    const char* m_position = nullptr;
394
395
    /// The end of the stream.
396
    const char* const m_end;
397
398
    ///path leg ptr
399
    char* leg_ptr = nullptr;
400
401
    ///path leg len
402
    unsigned int leg_len;
403
404
    ///Whether to contain escape characters
405
    bool has_escapes = false;
406
};
407
408
struct leg_info {
409
    ///path leg ptr
410
    char* leg_ptr = nullptr;
411
412
    ///path leg len
413
    unsigned int leg_len;
414
415
    ///array_index
416
    int array_index;
417
418
    ///type: 0 is member 1 is array
419
    unsigned int type;
420
421
    // NOLINTNEXTLINE(readability-non-const-parameter): str is an output parameter.
422
371
    bool to_string(std::string* str) const {
423
371
        if (type == MEMBER_CODE) {
424
127
            str->push_back(BEGIN_MEMBER);
425
127
            bool contains_space = false;
426
127
            std::string tmp;
427
373
            for (auto* it = leg_ptr; it != (leg_ptr + leg_len); ++it) {
428
246
                auto c = static_cast<unsigned char>(*it);
429
246
                if (std::isspace(c)) {
430
16
                    contains_space = true;
431
16
                }
432
433
246
                switch (*it) {
434
4
                case '"':
435
4
                    tmp.append("\\\"");
436
4
                    break;
437
2
                case ESCAPE:
438
2
                    tmp.append("\\\\");
439
2
                    break;
440
2
                case '\b':
441
2
                    tmp.append("\\b");
442
2
                    break;
443
2
                case '\f':
444
2
                    tmp.append("\\f");
445
2
                    break;
446
5
                case '\n':
447
5
                    tmp.append("\\n");
448
5
                    break;
449
3
                case '\r':
450
3
                    tmp.append("\\r");
451
3
                    break;
452
3
                case '\t':
453
3
                    tmp.append("\\t");
454
3
                    break;
455
225
                default:
456
225
                    if (c < 0x20) {
457
7
                        constexpr char hex[] = "0123456789abcdef";
458
7
                        tmp.append("\\u00");
459
7
                        tmp.push_back(hex[c >> 4]);
460
7
                        tmp.push_back(hex[c & 0x0F]);
461
218
                    } else {
462
218
                        tmp.push_back(*it);
463
218
                    }
464
225
                    break;
465
246
                }
466
246
            }
467
127
            if (contains_space) {
468
12
                str->push_back(DOUBLE_QUOTE);
469
12
            }
470
127
            str->append(tmp);
471
127
            if (contains_space) {
472
12
                str->push_back(DOUBLE_QUOTE);
473
12
            }
474
127
            return true;
475
244
        } else if (type == ARRAY_CODE) {
476
244
            str->push_back(BEGIN_ARRAY);
477
244
            std::string int_str = std::to_string(array_index);
478
244
            str->append(int_str);
479
244
            str->push_back(END_ARRAY);
480
244
            return true;
481
244
        } else {
482
0
            return false;
483
0
        }
484
371
    }
485
};
486
487
class JsonbPath {
488
public:
489
    // parse json path
490
    static bool parsePath(Stream* stream, JsonbPath* path);
491
492
    static bool parse_array(Stream* stream, JsonbPath* path);
493
    static bool parse_member(Stream* stream, JsonbPath* path);
494
495
    //return true if json path valid else return false
496
    bool seek(const char* string, size_t length);
497
498
22.0k
    void add_leg_to_leg_vector(std::unique_ptr<leg_info> leg) {
499
22.0k
        leg_vector.emplace_back(leg.release());
500
22.0k
    }
501
502
609
    void pop_leg_from_leg_vector() { leg_vector.pop_back(); }
503
504
    // NOLINTNEXTLINE(readability-non-const-parameter): res is an output parameter.
505
236
    bool to_string(std::string* res) const {
506
236
        res->push_back(SCOPE);
507
369
        for (const auto& leg : leg_vector) {
508
369
            auto valid = leg->to_string(res);
509
369
            if (!valid) {
510
0
                return false;
511
0
            }
512
369
        }
513
236
        return true;
514
236
    }
515
516
392k
    size_t get_leg_vector_size() const { return leg_vector.size(); }
517
518
1.24M
    leg_info* get_leg_from_leg_vector(size_t i) const { return leg_vector[i].get(); }
519
520
254
    bool is_wildcard() const { return _is_wildcard; }
521
149k
    bool is_supper_wildcard() const { return _is_supper_wildcard; }
522
523
79
    void clean() { leg_vector.clear(); }
524
525
private:
526
    std::vector<std::unique_ptr<leg_info>> leg_vector;
527
    bool _is_wildcard = false;        // whether the path is a wildcard path
528
    bool _is_supper_wildcard = false; // supper wildcard likes '$**.a' or '$**[1]'
529
};
530
531
/*
532
 * JsonbFwdIteratorT implements JSONB's iterator template.
533
 *
534
 * Note: it is an FORWARD iterator only due to the design of JSONB format.
535
 */
536
template <class Iter_Type, class Cont_Type>
537
class JsonbFwdIteratorT {
538
public:
539
    using iterator = Iter_Type;
540
    using pointer = typename std::iterator_traits<Iter_Type>::pointer;
541
    using reference = typename std::iterator_traits<Iter_Type>::reference;
542
543
    explicit JsonbFwdIteratorT() : current_(nullptr) {}
544
706k
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
_ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEC2ERKS3_
Line
Count
Source
544
368k
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEC2ERKS3_
Line
Count
Source
544
338k
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
545
546
    // allow non-const to const iterator conversion (same container type)
547
    template <class Iter_Ty>
548
    JsonbFwdIteratorT(const JsonbFwdIteratorT<Iter_Ty, Cont_Type>& rhs) : current_(rhs.base()) {}
549
550
1.33M
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEeqERKS5_
Line
Count
Source
550
571k
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEeqERKS5_
Line
Count
Source
550
766k
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
551
552
1.05M
    bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEneERKS5_
Line
Count
Source
552
453k
    bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEneERKS5_
Line
Count
Source
552
597k
    bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }
553
554
437k
    bool operator<(const JsonbFwdIteratorT& rhs) const { return (current_ < rhs.current_); }
555
556
    bool operator>(const JsonbFwdIteratorT& rhs) const { return !operator<(rhs); }
557
558
660k
    JsonbFwdIteratorT& operator++() {
559
660k
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
560
660k
        return *this;
561
660k
    }
_ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEppEv
Line
Count
Source
558
445k
    JsonbFwdIteratorT& operator++() {
559
445k
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
560
445k
        return *this;
561
445k
    }
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEppEv
Line
Count
Source
558
214k
    JsonbFwdIteratorT& operator++() {
559
214k
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
560
214k
        return *this;
561
214k
    }
562
563
    JsonbFwdIteratorT operator++(int) {
564
        auto tmp = *this;
565
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
566
        return tmp;
567
    }
568
569
214k
    explicit operator pointer() { return current_; }
570
571
1.34k
    reference operator*() const { return *current_; }
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEdeEv
Line
Count
Source
571
311
    reference operator*() const { return *current_; }
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEdeEv
Line
Count
Source
571
1.03k
    reference operator*() const { return *current_; }
572
573
1.52M
    pointer operator->() const { return current_; }
574
575
    iterator base() const { return current_; }
576
577
private:
578
    iterator current_;
579
};
580
using JsonbTypeUnder = std::underlying_type_t<JsonbType>;
581
582
#if defined(__clang__)
583
#pragma clang diagnostic push
584
#pragma clang diagnostic ignored "-Wzero-length-array"
585
#endif
586
#pragma pack(push, 1)
587
588
/*
589
 * JsonbDocument is the main object that accesses and queries JSONB packed
590
 * bytes. NOTE: JsonbDocument only allows object container as the top level
591
 * JSONB value. However, you can use the static method "createValue" to get any
592
 * JsonbValue object from the packed bytes.
593
 *
594
 * JsonbDocument object also dereferences to an object container value
595
 * (ObjectVal) once JSONB is loaded.
596
 *
597
 * ** Load **
598
 * JsonbDocument is usable after loading packed bytes (memory location) into
599
 * the object. We only need the header and first few bytes of the payload after
600
 * header to verify the JSONB.
601
 *
602
 * Note: creating an JsonbDocument (through createDocument) does not allocate
603
 * any memory. The document object is an efficient wrapper on the packed bytes
604
 * which is accessed directly.
605
 *
606
 * ** Query **
607
 * Query is through dereferencing into ObjectVal.
608
 */
609
class JsonbDocument {
610
public:
611
    // create an JsonbDocument object from JSONB packed bytes
612
    [[nodiscard]] static Status checkAndCreateDocument(const char* pb, size_t size,
613
                                                       const JsonbDocument** doc);
614
615
    // create an JsonbValue from JSONB packed bytes
616
    static const JsonbValue* createValue(const char* pb, size_t size);
617
618
0
    uint8_t version() const { return header_.ver_; }
619
620
726k
    const JsonbValue* getValue() const { return ((const JsonbValue*)payload_); }
621
622
    unsigned int numPackedBytes() const;
623
624
    const ObjectVal* operator->() const;
625
626
private:
627
    /*
628
   * JsonbHeader class defines JSONB header (internal to JsonbDocument).
629
   *
630
   * Currently it only contains version information (1-byte). We may expand the
631
   * header to include checksum of the JSONB binary for more security.
632
   */
633
    struct JsonbHeader {
634
        uint8_t ver_;
635
    } header_;
636
637
    char payload_[0];
638
};
639
640
/*
641
 * JsonbKeyValue class defines JSONB key type, as described below.
642
 *
643
 * key ::=
644
 *   0x00 int8    //1-byte dictionary id
645
 * | int8 (byte*) //int8 (>0) is the size of the key string
646
 *
647
 * value ::= primitive_value | container
648
 *
649
 * JsonbKeyValue can be either an id mapping to the key string in an external
650
 * dictionary, or it is the original key string. Whether to read an id or a
651
 * string is decided by the first byte (size).
652
 *
653
 * Note: a key object must be followed by a value object. Therefore, a key
654
 * object implicitly refers to a key-value pair, and you can get the value
655
 * object right after the key object. The function numPackedBytes hence
656
 * indicates the total size of the key-value pair, so that we will be able go
657
 * to next pair from the key.
658
 *
659
 * ** Dictionary size **
660
 * By default, the dictionary size is 255 (1-byte). Users can define
661
 * "USE_LARGE_DICT" to increase the dictionary size to 655535 (2-byte).
662
 */
663
class JsonbKeyValue {
664
public:
665
    // now we use sMaxKeyId to represent an empty key
666
    static const int sMaxKeyId = 65535;
667
    using keyid_type = uint16_t;
668
669
    static const uint8_t sMaxKeyLen = 64;
670
671
    // size of the key. 0 indicates it is stored as id
672
667k
    uint8_t klen() const { return size; }
673
674
    // get the key string. Note the string may not be null terminated.
675
335k
    const char* getKeyStr() const { return key.str_; }
676
677
111k
    keyid_type getKeyId() const { return key.id_; }
678
679
891k
    unsigned int keyPackedBytes() const {
680
891k
        return size ? (sizeof(size) + size) : (sizeof(size) + sizeof(keyid_type));
681
891k
    }
682
683
434k
    const JsonbValue* value() const {
684
434k
        return (const JsonbValue*)(((char*)this) + keyPackedBytes());
685
434k
    }
686
687
    // size of the total packed bytes (key+value)
688
    unsigned int numPackedBytes() const;
689
690
    uint8_t size;
691
692
    union key_ {
693
        keyid_type id_;
694
        char str_[1];
695
    } key;
696
};
697
698
struct JsonbFindResult {
699
    const JsonbValue* value = nullptr;   // found value
700
    std::unique_ptr<JsonbWriter> writer; // writer to write the value
701
    bool is_wildcard = false;            // whether the path is a wildcard path
702
};
703
704
/*
705
 * JsonbValue is the base class of all JSONB types. It contains only one member
706
 * variable - type info, which can be retrieved by member functions is[Type]()
707
 * or type().
708
 */
709
struct JsonbValue {
710
    static const uint32_t sMaxValueLen = 1 << 24; // 16M
711
712
215k
    bool isNull() const { return (type == JsonbType::T_Null); }
713
342
    bool isTrue() const { return (type == JsonbType::T_True); }
714
11
    bool isFalse() const { return (type == JsonbType::T_False); }
715
487
    bool isInt() const { return isInt8() || isInt16() || isInt32() || isInt64() || isInt128(); }
716
487
    bool isInt8() const { return (type == JsonbType::T_Int8); }
717
345
    bool isInt16() const { return (type == JsonbType::T_Int16); }
718
248
    bool isInt32() const { return (type == JsonbType::T_Int32); }
719
251
    bool isInt64() const { return (type == JsonbType::T_Int64); }
720
763
    bool isDouble() const { return (type == JsonbType::T_Double); }
721
682
    bool isFloat() const { return (type == JsonbType::T_Float); }
722
19.4k
    bool isString() const { return (type == JsonbType::T_String); }
723
15.3k
    bool isBinary() const { return (type == JsonbType::T_Binary); }
724
10.1k
    bool isObject() const { return (type == JsonbType::T_Object); }
725
9.95k
    bool isArray() const { return (type == JsonbType::T_Array); }
726
246
    bool isInt128() const { return (type == JsonbType::T_Int128); }
727
408
    bool isDecimal() const {
728
408
        return (type == JsonbType::T_Decimal32 || type == JsonbType::T_Decimal64 ||
729
408
                type == JsonbType::T_Decimal128 || type == JsonbType::T_Decimal256);
730
408
    }
731
    bool isDecimal32() const { return (type == JsonbType::T_Decimal32); }
732
    bool isDecimal64() const { return (type == JsonbType::T_Decimal64); }
733
    bool isDecimal128() const { return (type == JsonbType::T_Decimal128); }
734
    bool isDecimal256() const { return (type == JsonbType::T_Decimal256); }
735
736
    PrimitiveType get_primitive_type() const { return get_primitive_type_from_json_type(type); }
737
738
555
    const char* typeName() const {
739
555
        switch (type) {
740
24
        case JsonbType::T_Null:
741
24
            return "null";
742
20
        case JsonbType::T_True:
743
40
        case JsonbType::T_False:
744
40
            return "bool";
745
44
        case JsonbType::T_Int8:
746
82
        case JsonbType::T_Int16:
747
96
        case JsonbType::T_Int32:
748
96
            return "int";
749
126
        case JsonbType::T_Int64:
750
126
            return "bigint";
751
18
        case JsonbType::T_Int128:
752
18
            return "largeint";
753
43
        case JsonbType::T_Double:
754
43
            return "double";
755
0
        case JsonbType::T_Float:
756
0
            return "float";
757
67
        case JsonbType::T_String:
758
67
            return "string";
759
0
        case JsonbType::T_Binary:
760
0
            return "binary";
761
103
        case JsonbType::T_Object:
762
103
            return "object";
763
38
        case JsonbType::T_Array:
764
38
            return "array";
765
0
        case JsonbType::T_Decimal32:
766
0
            return "Decimal32";
767
0
        case JsonbType::T_Decimal64:
768
0
            return "Decimal64";
769
0
        case JsonbType::T_Decimal128:
770
0
            return "Decimal128";
771
0
        case JsonbType::T_Decimal256:
772
0
            return "Decimal256";
773
0
        default:
774
0
            return "unknown";
775
555
        }
776
555
    }
777
778
    // size of the total packed bytes
779
    unsigned int numPackedBytes() const;
780
781
    // size of the value in bytes
782
    unsigned int size() const;
783
784
    //Get the number of jsonbvalue elements
785
    int numElements() const;
786
787
    //Whether to include the jsonbvalue rhs
788
    bool contains(const JsonbValue* rhs) const;
789
790
    // find the JSONB value by JsonbPath
791
    JsonbFindResult findValue(JsonbPath& path) const;
792
    friend class JsonbDocument;
793
794
    JsonbType type; // type info
795
796
    char payload[0]; // payload, which is the packed bytes of the value
797
798
    /**
799
    * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`.
800
    *
801
    * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept.
802
    *           This ensures that `T` is trivially copyable, standard-layout, and safe to
803
    *           reinterpret from raw bytes without invoking undefined behavior.
804
    *
805
    * @return A pointer to a `const T` object, interpreted from the internal buffer.
806
    *
807
    * @note The caller must ensure that the current JsonbValue actually contains data
808
    *       compatible with type `T`, otherwise the result is undefined.
809
    */
810
    template <JsonbPodType T>
811
2.81M
    const T* unpack() const {
812
2.81M
        static_assert(is_pod_v<T>, "T must be a POD type");
813
2.81M
        return reinterpret_cast<const T*>(payload);
814
2.81M
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_9ObjectValEEEPKT_v
Line
Count
Source
811
249k
    const T* unpack() const {
812
249k
        static_assert(is_pod_v<T>, "T must be a POD type");
813
249k
        return reinterpret_cast<const T*>(payload);
814
249k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIaEEEEPKT_v
Line
Count
Source
811
32.1k
    const T* unpack() const {
812
32.1k
        static_assert(is_pod_v<T>, "T must be a POD type");
813
32.1k
        return reinterpret_cast<const T*>(payload);
814
32.1k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIsEEEEPKT_v
Line
Count
Source
811
42.4k
    const T* unpack() const {
812
42.4k
        static_assert(is_pod_v<T>, "T must be a POD type");
813
42.4k
        return reinterpret_cast<const T*>(payload);
814
42.4k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIiEEEEPKT_v
Line
Count
Source
811
16.8k
    const T* unpack() const {
812
16.8k
        static_assert(is_pod_v<T>, "T must be a POD type");
813
16.8k
        return reinterpret_cast<const T*>(payload);
814
16.8k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIlEEEEPKT_v
Line
Count
Source
811
204k
    const T* unpack() const {
812
204k
        static_assert(is_pod_v<T>, "T must be a POD type");
813
204k
        return reinterpret_cast<const T*>(payload);
814
204k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTInEEEEPKT_v
Line
Count
Source
811
18.6k
    const T* unpack() const {
812
18.6k
        static_assert(is_pod_v<T>, "T must be a POD type");
813
18.6k
        return reinterpret_cast<const T*>(payload);
814
18.6k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIdEEEEPKT_v
Line
Count
Source
811
11.9k
    const T* unpack() const {
812
11.9k
        static_assert(is_pod_v<T>, "T must be a POD type");
813
11.9k
        return reinterpret_cast<const T*>(payload);
814
11.9k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_10NumberValTIfEEEEPKT_v
Line
Count
Source
811
4.58k
    const T* unpack() const {
812
4.58k
        static_assert(is_pod_v<T>, "T must be a POD type");
813
4.58k
        return reinterpret_cast<const T*>(payload);
814
4.58k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_7DecimalIiEEEEEEPKT_v
Line
Count
Source
811
62
    const T* unpack() const {
812
62
        static_assert(is_pod_v<T>, "T must be a POD type");
813
62
        return reinterpret_cast<const T*>(payload);
814
62
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_7DecimalIlEEEEEEPKT_v
Line
Count
Source
811
45
    const T* unpack() const {
812
45
        static_assert(is_pod_v<T>, "T must be a POD type");
813
45
        return reinterpret_cast<const T*>(payload);
814
45
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_12Decimal128V3EEEEEPKT_v
Line
Count
Source
811
47
    const T* unpack() const {
812
47
        static_assert(is_pod_v<T>, "T must be a POD type");
813
47
        return reinterpret_cast<const T*>(payload);
814
47
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_15JsonbDecimalValINS_7DecimalIN4wide7integerILm256EiEEEEEEEEPKT_v
Line
Count
Source
811
13
    const T* unpack() const {
812
13
        static_assert(is_pod_v<T>, "T must be a POD type");
813
13
        return reinterpret_cast<const T*>(payload);
814
13
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_14JsonbBinaryValEEEPKT_v
Line
Count
Source
811
335k
    const T* unpack() const {
812
335k
        static_assert(is_pod_v<T>, "T must be a POD type");
813
335k
        return reinterpret_cast<const T*>(payload);
814
335k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_12ContainerValEEEPKT_v
Line
Count
Source
811
1.04M
    const T* unpack() const {
812
1.04M
        static_assert(is_pod_v<T>, "T must be a POD type");
813
1.04M
        return reinterpret_cast<const T*>(payload);
814
1.04M
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_8ArrayValEEEPKT_v
Line
Count
Source
811
368k
    const T* unpack() const {
812
368k
        static_assert(is_pod_v<T>, "T must be a POD type");
813
368k
        return reinterpret_cast<const T*>(payload);
814
368k
    }
_ZNK5doris10JsonbValue6unpackITkNS_12JsonbPodTypeENS_14JsonbStringValEEEPKT_v
Line
Count
Source
811
478k
    const T* unpack() const {
812
478k
        static_assert(is_pod_v<T>, "T must be a POD type");
813
478k
        return reinterpret_cast<const T*>(payload);
814
478k
    }
815
816
    // /**
817
    // * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`.
818
    // *
819
    // * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept.
820
    // *           This ensures that `T` is trivially copyable, standard-layout, and safe to
821
    // *           reinterpret from raw bytes without invoking undefined behavior.
822
    // *
823
    // * @return A pointer to a `T` object, interpreted from the internal buffer.
824
    // *
825
    // * @note The caller must ensure that the current JsonbValue actually contains data
826
    // *       compatible with type `T`, otherwise the result is undefined.
827
    // */
828
    // template <JsonbPodType T>
829
    // T* unpack() {
830
    //     static_assert(is_pod_v<T>, "T must be a POD type");
831
    //     return reinterpret_cast<T*>(payload);
832
    // }
833
834
    int128_t int_val() const;
835
};
836
837
// inline ObjectVal* JsonbDocument::operator->() {
838
//     return (((JsonbValue*)payload_)->unpack<ObjectVal>());
839
// }
840
841
130k
inline const ObjectVal* JsonbDocument::operator->() const {
842
130k
    return (((const JsonbValue*)payload_)->unpack<ObjectVal>());
843
130k
}
844
845
/*
846
 * NumerValT is the template class (derived from JsonbValue) of all number
847
 * types (integers and double).
848
 */
849
template <typename T>
850
    requires std::is_integral_v<T> || std::is_floating_point_v<T>
851
struct NumberValT {
852
public:
853
330k
    T val() const { return num; }
_ZNK5doris10NumberValTIaE3valEv
Line
Count
Source
853
32.1k
    T val() const { return num; }
_ZNK5doris10NumberValTIsE3valEv
Line
Count
Source
853
42.4k
    T val() const { return num; }
_ZNK5doris10NumberValTIiE3valEv
Line
Count
Source
853
16.8k
    T val() const { return num; }
_ZNK5doris10NumberValTIlE3valEv
Line
Count
Source
853
204k
    T val() const { return num; }
_ZNK5doris10NumberValTInE3valEv
Line
Count
Source
853
18.6k
    T val() const { return num; }
_ZNK5doris10NumberValTIdE3valEv
Line
Count
Source
853
11.9k
    T val() const { return num; }
_ZNK5doris10NumberValTIfE3valEv
Line
Count
Source
853
4.58k
    T val() const { return num; }
854
855
    static unsigned int numPackedBytes() { return sizeof(JsonbValue) + sizeof(T); }
856
857
    T num;
858
};
859
860
188
inline int128_t JsonbValue::int_val() const {
861
188
    switch (type) {
862
139
    case JsonbType::T_Int8:
863
139
        return unpack<JsonbInt8Val>()->val();
864
43
    case JsonbType::T_Int16:
865
43
        return unpack<JsonbInt16Val>()->val();
866
0
    case JsonbType::T_Int32:
867
0
        return unpack<JsonbInt32Val>()->val();
868
3
    case JsonbType::T_Int64:
869
3
        return unpack<JsonbInt64Val>()->val();
870
3
    case JsonbType::T_Int128:
871
3
        return unpack<JsonbInt128Val>()->val();
872
0
    default:
873
0
        throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}",
874
0
                        static_cast<int32_t>(type));
875
188
    }
876
188
}
877
878
template <JsonbDecimalType T>
879
struct JsonbDecimalVal {
880
public:
881
    using NativeType = typename T::NativeType;
882
883
    // get the decimal value
884
129
    NativeType val() const {
885
        // to avoid memory alignment issues, we use memcpy to copy the value
886
129
        NativeType tmp;
887
129
        memcpy(&tmp, &value, sizeof(NativeType));
888
129
        return tmp;
889
129
    }
_ZNK5doris15JsonbDecimalValINS_7DecimalIiEEE3valEv
Line
Count
Source
884
48
    NativeType val() const {
885
        // to avoid memory alignment issues, we use memcpy to copy the value
886
48
        NativeType tmp;
887
48
        memcpy(&tmp, &value, sizeof(NativeType));
888
48
        return tmp;
889
48
    }
_ZNK5doris15JsonbDecimalValINS_7DecimalIlEEE3valEv
Line
Count
Source
884
38
    NativeType val() const {
885
        // to avoid memory alignment issues, we use memcpy to copy the value
886
38
        NativeType tmp;
887
38
        memcpy(&tmp, &value, sizeof(NativeType));
888
38
        return tmp;
889
38
    }
_ZNK5doris15JsonbDecimalValINS_12Decimal128V3EE3valEv
Line
Count
Source
884
37
    NativeType val() const {
885
        // to avoid memory alignment issues, we use memcpy to copy the value
886
37
        NativeType tmp;
887
37
        memcpy(&tmp, &value, sizeof(NativeType));
888
37
        return tmp;
889
37
    }
_ZNK5doris15JsonbDecimalValINS_7DecimalIN4wide7integerILm256EiEEEEE3valEv
Line
Count
Source
884
6
    NativeType val() const {
885
        // to avoid memory alignment issues, we use memcpy to copy the value
886
6
        NativeType tmp;
887
6
        memcpy(&tmp, &value, sizeof(NativeType));
888
6
        return tmp;
889
6
    }
890
891
160
    static constexpr int numPackedBytes() {
892
160
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
893
160
    }
_ZN5doris15JsonbDecimalValINS_7DecimalIiEEE14numPackedBytesEv
Line
Count
Source
891
51
    static constexpr int numPackedBytes() {
892
51
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
893
51
    }
_ZN5doris15JsonbDecimalValINS_7DecimalIlEEE14numPackedBytesEv
Line
Count
Source
891
63
    static constexpr int numPackedBytes() {
892
63
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
893
63
    }
_ZN5doris15JsonbDecimalValINS_12Decimal128V3EE14numPackedBytesEv
Line
Count
Source
891
40
    static constexpr int numPackedBytes() {
892
40
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
893
40
    }
_ZN5doris15JsonbDecimalValINS_7DecimalIN4wide7integerILm256EiEEEEE14numPackedBytesEv
Line
Count
Source
891
6
    static constexpr int numPackedBytes() {
892
6
        return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value);
893
6
    }
894
895
    uint32_t precision;
896
    uint32_t scale;
897
    NativeType value;
898
};
899
900
/*
901
 * BlobVal is the base class (derived from JsonbValue) for string and binary
902
 * types. The size indicates the total bytes of the payload.
903
 */
904
struct JsonbBinaryVal {
905
public:
906
    // size of the blob payload only
907
17.1k
    unsigned int getBlobLen() const { return size; }
908
909
    // return the blob as byte array
910
263k
    const char* getBlob() const { return payload; }
911
912
    // size of the total packed bytes
913
310k
    unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; }
914
    friend class JsonbDocument;
915
916
    uint32_t size;
917
    char payload[0];
918
};
919
920
/*
921
 * String type
922
 * Note: JSONB string may not be a c-string (NULL-terminated)
923
 */
924
struct JsonbStringVal : public JsonbBinaryVal {
925
public:
926
    /*
927
    This function return the actual size of a string. Since for
928
    a string, it can be null-terminated with null paddings or it
929
    can take all the space in the payload without null in the end.
930
    So we need to check it to get the true actual length of a string.
931
  */
932
239k
    size_t length() const {
933
        // It's an empty string
934
239k
        if (0 == size) {
935
147
            return size;
936
147
        }
937
        // The string stored takes all the spaces in payload
938
239k
        if (payload[size - 1] != 0) {
939
239k
            return size;
940
239k
        }
941
        // It's shorter than the size of payload
942
0
        return strnlen(payload, size);
943
239k
    }
944
};
945
946
/*
947
 * ContainerVal is the base class (derived from JsonbValue) for object and
948
 * array types. The size indicates the total bytes of the payload.
949
 */
950
struct ContainerVal {
951
    // size of the container payload only
952
0
    unsigned int getContainerSize() const { return size; }
953
954
    // return the container payload as byte array
955
0
    const char* getPayload() const { return payload; }
956
957
    // size of the total packed bytes
958
1.04M
    unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; }
959
    friend class JsonbDocument;
960
961
    uint32_t size;
962
    char payload[0];
963
};
964
965
/*
966
 * Object type
967
 */
968
struct ObjectVal : public ContainerVal {
969
    using value_type = JsonbKeyValue;
970
    using pointer = value_type*;
971
    using const_pointer = const value_type*;
972
    using const_iterator = JsonbFwdIteratorT<const_pointer, ObjectVal>;
973
974
    const_iterator search(const char* key) const {
975
        if (!key) {
976
            return end();
977
        }
978
        return search(key, (unsigned int)strlen(key));
979
    }
980
981
7.20k
    const_iterator search(const char* key, unsigned int klen) const {
982
7.20k
        if (!key || !klen) {
983
0
            return end();
984
0
        }
985
7.20k
        return internalSearch(key, klen);
986
7.20k
    }
987
988
    // Get number of elements in object
989
174
    int numElem() const {
990
174
        const char* pch = payload;
991
174
        const char* fence = payload + size;
992
993
174
        unsigned int num = 0;
994
449
        while (pch < fence) {
995
275
            auto* pkey = (JsonbKeyValue*)(pch);
996
275
            ++num;
997
275
            pch += pkey->numPackedBytes();
998
275
        }
999
1000
174
        assert(pch == fence);
1001
1002
174
        return num;
1003
174
    }
1004
1005
    // find the JSONB value by a key string (null terminated)
1006
    const JsonbValue* find(const char* key) const {
1007
        if (!key) {
1008
            return nullptr;
1009
        }
1010
        return find(key, (unsigned int)strlen(key));
1011
    }
1012
1013
    // find the JSONB value by a key string (with length)
1014
7.20k
    const JsonbValue* find(const char* key, unsigned int klen) const {
1015
7.20k
        const_iterator kv = search(key, klen);
1016
7.20k
        if (end() == kv) {
1017
4.45k
            return nullptr;
1018
4.45k
        }
1019
2.75k
        return kv->value();
1020
7.20k
    }
1021
1022
118k
    const_iterator begin() const { return const_iterator((pointer)payload); }
1023
1024
247k
    const_iterator end() const { return const_iterator((pointer)(payload + size)); }
1025
1026
    std::vector<std::pair<StringRef, const JsonbValue*>> get_ordered_key_value_pairs() const;
1027
1028
private:
1029
7.20k
    const_iterator internalSearch(const char* key, unsigned int klen) const {
1030
7.20k
        const char* pch = payload;
1031
7.20k
        const char* fence = payload + size;
1032
1033
17.7k
        while (pch < fence) {
1034
13.3k
            const auto* pkey = (const JsonbKeyValue*)(pch);
1035
13.3k
            if (klen == pkey->klen() && strncmp(key, pkey->getKeyStr(), klen) == 0) {
1036
2.75k
                return const_iterator(pkey);
1037
2.75k
            }
1038
10.5k
            pch += pkey->numPackedBytes();
1039
10.5k
        }
1040
1041
7.20k
        assert(pch == fence);
1042
1043
4.45k
        return end();
1044
4.45k
    }
1045
};
1046
1047
/*
1048
 * Array type
1049
 */
1050
struct ArrayVal : public ContainerVal {
1051
    using value_type = JsonbValue;
1052
    using pointer = value_type*;
1053
    using const_pointer = const value_type*;
1054
    using const_iterator = JsonbFwdIteratorT<const_pointer, ArrayVal>;
1055
1056
    // get the JSONB value at index
1057
198k
    const JsonbValue* get(int idx) const {
1058
198k
        if (idx < 0) {
1059
65
            return nullptr;
1060
65
        }
1061
1062
198k
        const char* pch = payload;
1063
198k
        const char* fence = payload + size;
1064
1065
205k
        while (pch < fence && idx-- > 0) {
1066
6.81k
            pch += ((const JsonbValue*)pch)->numPackedBytes();
1067
6.81k
        }
1068
198k
        if (idx > 0 || pch == fence) {
1069
1.65k
            return nullptr;
1070
1.65k
        }
1071
1072
196k
        return (const JsonbValue*)pch;
1073
198k
    }
1074
1075
    // Get number of elements in array
1076
1.91k
    int numElem() const {
1077
1.91k
        const char* pch = payload;
1078
1.91k
        const char* fence = payload + size;
1079
1080
1.91k
        unsigned int num = 0;
1081
112k
        while (pch < fence) {
1082
110k
            ++num;
1083
110k
            pch += ((const JsonbValue*)pch)->numPackedBytes();
1084
110k
        }
1085
1086
1.91k
        assert(pch == fence);
1087
1088
1.91k
        return num;
1089
1.91k
    }
1090
1091
169k
    const_iterator begin() const { return const_iterator((pointer)payload); }
1092
1093
169k
    const_iterator end() const { return const_iterator((pointer)(payload + size)); }
1094
};
1095
1096
namespace jsonb_detail {
1097
1098
struct JsonbScaledDecimal {
1099
    wide::Int256 value;
1100
    uint32_t scale;
1101
};
1102
1103
26
inline void validate_decimal_scale(uint32_t scale) {
1104
26
    if (scale > static_cast<uint32_t>(BeConsts::MAX_DECIMALV3_SCALE)) {
1105
2
        throw Exception(ErrorCode::INTERNAL_ERROR,
1106
2
                        "Invalid JSONB decimal scale: {}, max allowed scale: {}", scale,
1107
2
                        BeConsts::MAX_DECIMALV3_SCALE);
1108
2
    }
1109
26
}
1110
1111
182
inline bool is_numeric(const JsonbValue* value) {
1112
182
    return value->isInt() || value->isDouble() || value->isFloat() || value->isDecimal();
1113
182
}
1114
1115
25
inline double floating_value(const JsonbValue* value) {
1116
25
    if (value->isDouble()) {
1117
25
        return value->unpack<JsonbDoubleVal>()->val();
1118
25
    }
1119
0
    return value->unpack<JsonbFloatVal>()->val();
1120
25
}
1121
1122
20
inline JsonbScaledDecimal get_scaled_decimal(const JsonbValue* value) {
1123
20
    switch (value->type) {
1124
12
    case JsonbType::T_Decimal32: {
1125
12
        const auto* decimal = value->unpack<JsonbDecimal32>();
1126
12
        validate_decimal_scale(decimal->scale);
1127
12
        return {wide::Int256(decimal->val()), decimal->scale};
1128
0
    }
1129
2
    case JsonbType::T_Decimal64: {
1130
2
        const auto* decimal = value->unpack<JsonbDecimal64>();
1131
2
        validate_decimal_scale(decimal->scale);
1132
2
        return {wide::Int256(decimal->val()), decimal->scale};
1133
0
    }
1134
6
    case JsonbType::T_Decimal128: {
1135
6
        const auto* decimal = value->unpack<JsonbDecimal128>();
1136
6
        validate_decimal_scale(decimal->scale);
1137
6
        return {wide::Int256(decimal->val()), decimal->scale};
1138
0
    }
1139
0
    case JsonbType::T_Decimal256: {
1140
0
        const auto* decimal = value->unpack<JsonbDecimal256>();
1141
0
        validate_decimal_scale(decimal->scale);
1142
0
        return {decimal->val(), decimal->scale};
1143
0
    }
1144
0
    default:
1145
0
        throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB decimal value type: {}",
1146
0
                        static_cast<int32_t>(value->type));
1147
20
    }
1148
20
}
1149
1150
inline bool scaled_decimal_equal_decimal(const JsonbScaledDecimal& lhs,
1151
4
                                         const JsonbScaledDecimal& rhs) {
1152
4
    if (lhs.scale == rhs.scale) {
1153
0
        return lhs.value == rhs.value;
1154
0
    }
1155
1156
4
    if (lhs.scale < rhs.scale) {
1157
2
        const auto scale_multiplier = decimal_scale_multiplier<wide::Int256>(rhs.scale - lhs.scale);
1158
2
        return rhs.value % scale_multiplier == 0 && lhs.value == rhs.value / scale_multiplier;
1159
2
    }
1160
1161
2
    const auto scale_multiplier = decimal_scale_multiplier<wide::Int256>(lhs.scale - rhs.scale);
1162
2
    return lhs.value % scale_multiplier == 0 && lhs.value / scale_multiplier == rhs.value;
1163
4
}
1164
1165
4
inline bool scaled_decimal_equal_integer(const JsonbScaledDecimal& decimal, int128_t integer) {
1166
4
    const auto integer_value = wide::Int256(integer);
1167
4
    if (decimal.scale == 0) {
1168
0
        return decimal.value == integer_value;
1169
0
    }
1170
1171
4
    const auto scale_multiplier = decimal_scale_multiplier<wide::Int256>(decimal.scale);
1172
4
    return decimal.value % scale_multiplier == 0 &&
1173
4
           decimal.value / scale_multiplier == integer_value;
1174
4
}
1175
1176
inline constexpr auto kPowersOfFive = [] {
1177
    std::array<wide::Int256, BeConsts::MAX_DECIMALV3_SCALE + 1> powers {};
1178
    powers[0] = 1;
1179
    for (size_t i = 1; i < powers.size(); ++i) {
1180
        powers[i] = powers[i - 1] * 5;
1181
    }
1182
    return powers;
1183
}();
1184
1185
6
inline wide::Int256 power_of_five(uint32_t exponent) {
1186
6
    validate_decimal_scale(exponent);
1187
6
    return kPowersOfFive[exponent];
1188
6
}
1189
1190
6
inline bool scaled_binary_equal(wide::Int256 value, int exponent, wide::Int256 significand) {
1191
6
    if (exponent < 0) {
1192
4
        const int divisor_exponent = -exponent;
1193
4
        if (divisor_exponent >= std::numeric_limits<int64_t>::digits) {
1194
0
            return false;
1195
0
        }
1196
4
        const auto divisor = wide::Int256(1) << divisor_exponent;
1197
4
        return significand % divisor == 0 && value == significand / divisor;
1198
4
    }
1199
2
    constexpr int max_positive_int256_shift = std::numeric_limits<wide::Int256>::digits;
1200
    // wide::Int256 is signed, so shifting 1 by 255 reaches the sign bit.
1201
2
    if (exponent >= max_positive_int256_shift) {
1202
0
        return false;
1203
0
    }
1204
2
    const auto multiplier = wide::Int256(1) << exponent;
1205
2
    return value % multiplier == 0 && value / multiplier == significand;
1206
2
}
1207
1208
15
inline bool floating_equal_integer(const JsonbValue* floating, int128_t integer) {
1209
15
    const double value = floating_value(floating);
1210
15
    int exponent = 0;
1211
15
    std::frexp(value, &exponent);
1212
15
    if (!std::isfinite(value) || std::trunc(value) != value) {
1213
6
        return false;
1214
6
    }
1215
9
    if (exponent >= 128) {
1216
0
        return value == -std::ldexp(1.0, 127) && integer == std::numeric_limits<int128_t>::min();
1217
0
    }
1218
9
    if (exponent <= -1) {
1219
0
        return false;
1220
0
    }
1221
9
    return static_cast<int128_t>(value) == integer;
1222
9
}
1223
1224
6
inline bool floating_equal_decimal(const JsonbValue* floating, const JsonbScaledDecimal& decimal) {
1225
6
    const double value = floating_value(floating);
1226
6
    if (!std::isfinite(value)) {
1227
0
        return false;
1228
0
    }
1229
6
    if (value == 0) {
1230
0
        return decimal.value == 0;
1231
0
    }
1232
1233
6
    int exponent = 0;
1234
6
    const double significand_fraction = std::frexp(value, &exponent);
1235
6
    const double significand_double =
1236
6
            std::ldexp(significand_fraction, std::numeric_limits<double>::digits);
1237
6
    auto significand = wide::Int256(static_cast<int64_t>(significand_double));
1238
6
    exponent -= std::numeric_limits<double>::digits;
1239
1240
6
    const auto five_multiplier = power_of_five(decimal.scale);
1241
6
    if (decimal.value % five_multiplier != 0) {
1242
0
        return false;
1243
0
    }
1244
6
    const auto binary_scaled_decimal = decimal.value / five_multiplier;
1245
6
    return scaled_binary_equal(binary_scaled_decimal, exponent + decimal.scale, significand);
1246
6
}
1247
1248
182
inline bool numeric_equal(const JsonbValue* lhs, const JsonbValue* rhs) {
1249
182
    if (!is_numeric(rhs)) {
1250
68
        return false;
1251
68
    }
1252
1253
114
    if ((lhs->isDouble() || lhs->isFloat()) && rhs->isInt()) {
1254
8
        return floating_equal_integer(lhs, rhs->int_val());
1255
8
    }
1256
1257
106
    if ((rhs->isDouble() || rhs->isFloat()) && lhs->isInt()) {
1258
7
        return floating_equal_integer(rhs, lhs->int_val());
1259
7
    }
1260
1261
99
    if ((lhs->isDouble() || lhs->isFloat()) && rhs->isDecimal()) {
1262
4
        return floating_equal_decimal(lhs, get_scaled_decimal(rhs));
1263
4
    }
1264
1265
95
    if ((rhs->isDouble() || rhs->isFloat()) && lhs->isDecimal()) {
1266
4
        return floating_equal_decimal(rhs, get_scaled_decimal(lhs));
1267
4
    }
1268
1269
91
    if (lhs->isDouble() || lhs->isFloat()) {
1270
2
        return (rhs->isDouble() || rhs->isFloat()) && floating_value(lhs) == floating_value(rhs);
1271
2
    }
1272
1273
89
    if (lhs->isDecimal()) {
1274
6
        const auto lhs_decimal = get_scaled_decimal(lhs);
1275
6
        if (rhs->isDecimal()) {
1276
4
            return scaled_decimal_equal_decimal(lhs_decimal, get_scaled_decimal(rhs));
1277
4
        }
1278
2
        return scaled_decimal_equal_integer(lhs_decimal, rhs->int_val());
1279
6
    }
1280
1281
83
    if (rhs->isDecimal()) {
1282
2
        return scaled_decimal_equal_integer(get_scaled_decimal(rhs), lhs->int_val());
1283
2
    }
1284
1285
81
    return lhs->int_val() == rhs->int_val();
1286
83
}
1287
1288
61
inline bool array_contains_value(const ArrayVal* target_array, const JsonbValue* candidate) {
1289
61
    const int target_num = target_array->numElem();
1290
160
    for (int i = 0; i < target_num; ++i) {
1291
127
        if (target_array->get(i)->contains(candidate)) {
1292
28
            return true;
1293
28
        }
1294
127
    }
1295
33
    return false;
1296
61
}
1297
1298
18
inline bool array_contains_array(const ArrayVal* target_array, const ArrayVal* candidate_array) {
1299
18
    const int candidate_num = candidate_array->numElem();
1300
33
    for (int i = 0; i < candidate_num; ++i) {
1301
25
        if (!array_contains_value(target_array, candidate_array->get(i))) {
1302
10
            return false;
1303
10
        }
1304
25
    }
1305
8
    return true;
1306
18
}
1307
1308
} // namespace jsonb_detail
1309
1310
inline const JsonbValue* JsonbDocument::createValue(const char* pb, size_t size) {
1311
    if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
1312
        return nullptr;
1313
    }
1314
1315
    auto* doc = (JsonbDocument*)pb;
1316
    if (doc->header_.ver_ != JSONB_VER) {
1317
        return nullptr;
1318
    }
1319
1320
    const auto* val = (const JsonbValue*)doc->payload_;
1321
    // Same as checkAndCreateDocument(), this is intentionally a lightweight structural check for
1322
    // hot paths. Do not recursively validate container bodies here unless the caller is a clearly
1323
    // untrusted raw binary boundary and accepts the O(document size) cost.
1324
    if (size != sizeof(JsonbHeader) + val->numPackedBytes()) {
1325
        return nullptr;
1326
    }
1327
1328
    return val;
1329
}
1330
1331
130
inline unsigned int JsonbDocument::numPackedBytes() const {
1332
130
    return ((const JsonbValue*)payload_)->numPackedBytes() + sizeof(header_);
1333
130
}
1334
1335
456k
inline unsigned int JsonbKeyValue::numPackedBytes() const {
1336
456k
    unsigned int ks = keyPackedBytes();
1337
456k
    const auto* val = (const JsonbValue*)(((char*)this) + ks);
1338
456k
    return ks + val->numPackedBytes();
1339
456k
}
1340
1341
// Poor man's "virtual" function JsonbValue::numPackedBytes
1342
1.94M
inline unsigned int JsonbValue::numPackedBytes() const {
1343
1.94M
    switch (type) {
1344
12.3k
    case JsonbType::T_Null:
1345
53.0k
    case JsonbType::T_True:
1346
64.1k
    case JsonbType::T_False: {
1347
64.1k
        return sizeof(type);
1348
53.0k
    }
1349
1350
41.0k
    case JsonbType::T_Int8: {
1351
41.0k
        return sizeof(type) + sizeof(int8_t);
1352
53.0k
    }
1353
47.0k
    case JsonbType::T_Int16: {
1354
47.0k
        return sizeof(type) + sizeof(int16_t);
1355
53.0k
    }
1356
20.7k
    case JsonbType::T_Int32: {
1357
20.7k
        return sizeof(type) + sizeof(int32_t);
1358
53.0k
    }
1359
355k
    case JsonbType::T_Int64: {
1360
355k
        return sizeof(type) + sizeof(int64_t);
1361
53.0k
    }
1362
25.4k
    case JsonbType::T_Double: {
1363
25.4k
        return sizeof(type) + sizeof(double);
1364
53.0k
    }
1365
4.74k
    case JsonbType::T_Float: {
1366
4.74k
        return sizeof(type) + sizeof(float);
1367
53.0k
    }
1368
31.5k
    case JsonbType::T_Int128: {
1369
31.5k
        return sizeof(type) + sizeof(int128_t);
1370
53.0k
    }
1371
286k
    case JsonbType::T_String:
1372
310k
    case JsonbType::T_Binary: {
1373
310k
        return unpack<JsonbBinaryVal>()->numPackedBytes();
1374
286k
    }
1375
1376
277k
    case JsonbType::T_Object:
1377
1.04M
    case JsonbType::T_Array: {
1378
1.04M
        return unpack<ContainerVal>()->numPackedBytes();
1379
277k
    }
1380
51
    case JsonbType::T_Decimal32: {
1381
51
        return JsonbDecimal32::numPackedBytes();
1382
277k
    }
1383
63
    case JsonbType::T_Decimal64: {
1384
63
        return JsonbDecimal64::numPackedBytes();
1385
277k
    }
1386
40
    case JsonbType::T_Decimal128: {
1387
40
        return JsonbDecimal128::numPackedBytes();
1388
277k
    }
1389
6
    case JsonbType::T_Decimal256: {
1390
6
        return JsonbDecimal256::numPackedBytes();
1391
277k
    }
1392
0
    case JsonbType::NUM_TYPES:
1393
0
        break;
1394
1.94M
    }
1395
1396
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}",
1397
0
                    static_cast<int32_t>(type));
1398
1.94M
}
1399
1400
808
inline int JsonbValue::numElements() const {
1401
808
    switch (type) {
1402
6
    case JsonbType::T_Int8:
1403
16
    case JsonbType::T_Int16:
1404
18
    case JsonbType::T_Int32:
1405
21
    case JsonbType::T_Int64:
1406
23
    case JsonbType::T_Double:
1407
23
    case JsonbType::T_Float:
1408
25
    case JsonbType::T_Int128:
1409
32
    case JsonbType::T_String:
1410
32
    case JsonbType::T_Binary:
1411
37
    case JsonbType::T_Null:
1412
43
    case JsonbType::T_True:
1413
45
    case JsonbType::T_False:
1414
45
    case JsonbType::T_Decimal32:
1415
45
    case JsonbType::T_Decimal64:
1416
45
    case JsonbType::T_Decimal128:
1417
45
    case JsonbType::T_Decimal256: {
1418
45
        return 1;
1419
45
    }
1420
27
    case JsonbType::T_Object: {
1421
27
        return unpack<ObjectVal>()->numElem();
1422
45
    }
1423
736
    case JsonbType::T_Array: {
1424
736
        return unpack<ArrayVal>()->numElem();
1425
45
    }
1426
0
    case JsonbType::NUM_TYPES:
1427
0
        break;
1428
808
    }
1429
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}",
1430
0
                    static_cast<int32_t>(type));
1431
808
}
1432
1433
401
inline bool JsonbValue::contains(const JsonbValue* rhs) const {
1434
401
    switch (type) {
1435
94
    case JsonbType::T_Int8:
1436
124
    case JsonbType::T_Int16:
1437
130
    case JsonbType::T_Int32:
1438
139
    case JsonbType::T_Int64:
1439
146
    case JsonbType::T_Int128:
1440
172
    case JsonbType::T_Double:
1441
172
    case JsonbType::T_Float:
1442
178
    case JsonbType::T_Decimal32:
1443
179
    case JsonbType::T_Decimal64:
1444
182
    case JsonbType::T_Decimal128:
1445
182
    case JsonbType::T_Decimal256: {
1446
182
        return jsonb_detail::numeric_equal(this, rhs);
1447
182
    }
1448
47
    case JsonbType::T_String:
1449
47
    case JsonbType::T_Binary: {
1450
47
        if (rhs->isString() || rhs->isBinary()) {
1451
19
            const auto* str_value1 = unpack<JsonbStringVal>();
1452
19
            const auto* str_value2 = rhs->unpack<JsonbStringVal>();
1453
19
            return str_value1->length() == str_value2->length() &&
1454
19
                   std::memcmp(str_value1->getBlob(), str_value2->getBlob(),
1455
18
                               str_value1->length()) == 0;
1456
19
        }
1457
28
        return false;
1458
47
    }
1459
54
    case JsonbType::T_Array: {
1460
54
        const auto* lhs_array = unpack<ArrayVal>();
1461
54
        if (rhs->isArray()) {
1462
18
            return jsonb_detail::array_contains_array(lhs_array, rhs->unpack<ArrayVal>());
1463
18
        }
1464
36
        return jsonb_detail::array_contains_value(lhs_array, rhs);
1465
54
    }
1466
84
    case JsonbType::T_Object: {
1467
84
        if (rhs->isObject()) {
1468
51
            const auto* obj_value1 = unpack<ObjectVal>();
1469
51
            const auto* obj_value2 = rhs->unpack<ObjectVal>();
1470
98
            for (auto it = obj_value2->begin(); it != obj_value2->end(); ++it) {
1471
69
                const JsonbValue* value = obj_value1->find(it->getKeyStr(), it->klen());
1472
69
                if (value == nullptr || !value->contains(it->value())) {
1473
22
                    return false;
1474
22
                }
1475
69
            }
1476
29
            return true;
1477
51
        }
1478
33
        return false;
1479
84
    }
1480
12
    case JsonbType::T_Null: {
1481
12
        return rhs->isNull();
1482
84
    }
1483
12
    case JsonbType::T_True: {
1484
12
        return rhs->isTrue();
1485
84
    }
1486
10
    case JsonbType::T_False: {
1487
10
        return rhs->isFalse();
1488
84
    }
1489
0
    case JsonbType::NUM_TYPES:
1490
0
        break;
1491
401
    }
1492
1493
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}",
1494
0
                    static_cast<int32_t>(type));
1495
401
}
1496
1497
15.7k
inline bool JsonbPath::seek(const char* key_path, size_t kp_len) {
1498
15.7k
    while (kp_len > 0 && std::isspace(key_path[kp_len - 1])) {
1499
7
        --kp_len;
1500
7
    }
1501
1502
    //path invalid
1503
15.7k
    if (!key_path || kp_len == 0) {
1504
2
        return false;
1505
2
    }
1506
15.7k
    Stream stream(key_path, kp_len);
1507
15.7k
    stream.skip_whitespace();
1508
15.7k
    if (stream.exhausted() || stream.read() != SCOPE) {
1509
        //path invalid
1510
4
        return false;
1511
4
    }
1512
1513
36.8k
    while (!stream.exhausted()) {
1514
21.1k
        stream.skip_whitespace();
1515
21.1k
        stream.clear_leg_ptr();
1516
21.1k
        stream.clear_leg_len();
1517
21.1k
        stream.set_has_escapes(false);
1518
1519
21.1k
        if (!JsonbPath::parsePath(&stream, this)) {
1520
            //path invalid
1521
20
            return false;
1522
20
        }
1523
21.1k
    }
1524
15.6k
    return true;
1525
15.7k
}
1526
1527
21.1k
inline bool JsonbPath::parsePath(Stream* stream, JsonbPath* path) {
1528
    // $[0]
1529
21.1k
    if (stream->peek() == BEGIN_ARRAY) {
1530
11.0k
        return parse_array(stream, path);
1531
11.0k
    }
1532
    // $.a or $.[0]
1533
    // Keep $.[0] for backward compatibility: although the dot before an array
1534
    // leg is non-standard, existing JSONB users may rely on it.
1535
10.0k
    else if (stream->peek() == BEGIN_MEMBER) {
1536
        // advance past the .
1537
10.0k
        stream->skip(1);
1538
1539
10.0k
        if (stream->exhausted()) {
1540
16
            return false;
1541
16
        }
1542
1543
        // $.[0]
1544
10.0k
        if (stream->peek() == BEGIN_ARRAY) {
1545
237
            return parse_array(stream, path);
1546
237
        }
1547
        // $.a
1548
9.82k
        else {
1549
9.82k
            return parse_member(stream, path);
1550
9.82k
        }
1551
10.0k
    } else if (stream->peek() == WILDCARD) {
1552
13
        stream->skip(1);
1553
13
        if (stream->exhausted()) {
1554
0
            return false;
1555
0
        }
1556
1557
        // $**
1558
13
        if (stream->peek() == WILDCARD) {
1559
13
            path->_is_supper_wildcard = true;
1560
13
        }
1561
1562
13
        stream->skip(1);
1563
13
        if (stream->exhausted()) {
1564
2
            return false;
1565
2
        }
1566
1567
11
        if (stream->peek() == BEGIN_ARRAY) {
1568
2
            return parse_array(stream, path);
1569
9
        } else if (stream->peek() == BEGIN_MEMBER) {
1570
            // advance past the .
1571
9
            stream->skip(1);
1572
1573
9
            if (stream->exhausted()) {
1574
0
                return false;
1575
0
            }
1576
1577
            // $**.[0]
1578
            // Keep the dot-array form compatible with the root path behavior.
1579
9
            if (stream->peek() == BEGIN_ARRAY) {
1580
0
                return parse_array(stream, path);
1581
0
            }
1582
            // $.a
1583
9
            else {
1584
9
                return parse_member(stream, path);
1585
9
            }
1586
9
        }
1587
0
        return false;
1588
11
    } else {
1589
0
        return false; //invalid json path
1590
0
    }
1591
21.1k
}
1592
1593
11.3k
inline bool JsonbPath::parse_array(Stream* stream, JsonbPath* path) {
1594
11.3k
    assert(stream->peek() == BEGIN_ARRAY);
1595
11.3k
    stream->skip(1);
1596
11.3k
    if (stream->exhausted()) {
1597
0
        return false;
1598
0
    }
1599
1600
11.3k
    if (stream->peek() == WILDCARD) {
1601
        // Called by function_jsonb.cpp, the variables passed in originate from a mutable block;
1602
        // using const_cast is acceptable.
1603
20
        stream->set_leg_ptr(const_cast<char*>(stream->position()));
1604
20
        stream->add_leg_len();
1605
20
        stream->skip(1);
1606
20
        if (stream->exhausted()) {
1607
0
            return false;
1608
0
        }
1609
1610
20
        if (stream->peek() == END_ARRAY) {
1611
20
            std::unique_ptr<leg_info> leg(
1612
20
                    new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, ARRAY_CODE));
1613
20
            path->add_leg_to_leg_vector(std::move(leg));
1614
20
            stream->skip(1);
1615
20
            path->_is_wildcard = true;
1616
20
            return true;
1617
20
        } else {
1618
0
            return false;
1619
0
        }
1620
20
    }
1621
1622
    // Called by function_jsonb.cpp, the variables passed in originate from a mutable block;
1623
    // using const_cast is acceptable.
1624
11.2k
    stream->set_leg_ptr(const_cast<char*>(stream->position()));
1625
1626
33.0k
    for (; !stream->exhausted() && stream->peek() != END_ARRAY; stream->advance()) {
1627
21.7k
        stream->add_leg_len();
1628
21.7k
    }
1629
1630
11.2k
    if (stream->exhausted() || stream->peek() != END_ARRAY) {
1631
0
        return false;
1632
11.2k
    } else {
1633
11.2k
        stream->skip(1);
1634
11.2k
    }
1635
1636
    //parse array index to int
1637
1638
11.2k
    std::string_view idx_string(stream->get_leg_ptr(), stream->get_leg_len());
1639
11.2k
    int index = 0;
1640
1641
    // Match "last" case-insensitively for compatibility with existing JSONB
1642
    // paths such as [Last] and [LAST].
1643
11.2k
    if (stream->get_leg_len() >= 4 &&
1644
11.2k
        std::equal(LAST, LAST + 4, stream->get_leg_ptr(),
1645
6.71k
                   [](char c1, char c2) { return std::tolower(c1) == std::tolower(c2); })) {
1646
1.67k
        auto pos = idx_string.find(MINUS);
1647
1648
1.67k
        if (pos != std::string::npos) {
1649
1.34k
            for (size_t i = 4; i < pos; ++i) {
1650
6
                if (std::isspace(idx_string[i])) {
1651
4
                    continue;
1652
4
                } else {
1653
                    // leading zeroes are not allowed
1654
2
                    LOG(WARNING) << "Non-space char in idx_string: '" << idx_string << "'";
1655
2
                    return false;
1656
2
                }
1657
6
            }
1658
1.33k
            idx_string = idx_string.substr(pos + 1);
1659
1.33k
            idx_string = trim(idx_string);
1660
1661
            // Keep numeric-prefix parsing for last-N offsets as existing JSONB
1662
            // path behavior.
1663
1.33k
            auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(),
1664
1.33k
                                          index);
1665
1.33k
            if (result.ec != std::errc()) {
1666
0
                LOG(WARNING) << "Invalid index in JSON path: '" << idx_string << "'";
1667
0
                return false;
1668
0
            }
1669
1670
1.33k
        } else if (stream->get_leg_len() > 4) {
1671
0
            return false;
1672
0
        }
1673
1674
1.67k
        std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, -index - 1, ARRAY_CODE));
1675
1.67k
        path->add_leg_to_leg_vector(std::move(leg));
1676
1677
1.67k
        return true;
1678
1.67k
    }
1679
1680
    // Preserve legacy numeric-prefix parsing for array indexes. std::from_chars
1681
    // may stop before the end (for example [1.5] is parsed as index 1), and
1682
    // current JSONB path semantics treat that as supported behavior.
1683
9.61k
    auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(), index);
1684
1685
9.61k
    if (result.ec != std::errc()) {
1686
0
        return false;
1687
0
    }
1688
1689
9.61k
    std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, index, ARRAY_CODE));
1690
9.61k
    path->add_leg_to_leg_vector(std::move(leg));
1691
1692
9.61k
    return true;
1693
9.61k
}
1694
1695
9.83k
inline bool JsonbPath::parse_member(Stream* stream, JsonbPath* path) {
1696
9.83k
    if (stream->exhausted()) {
1697
0
        return false;
1698
0
    }
1699
1700
9.83k
    if (stream->peek() == WILDCARD) {
1701
        // Called by function_jsonb.cpp, the variables passed in originate from a mutable block;
1702
        // using const_cast is acceptable.
1703
118
        stream->set_leg_ptr(const_cast<char*>(stream->position()));
1704
118
        stream->add_leg_len();
1705
118
        stream->skip(1);
1706
118
        std::unique_ptr<leg_info> leg(
1707
118
                new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE));
1708
118
        path->add_leg_to_leg_vector(std::move(leg));
1709
118
        path->_is_wildcard = true;
1710
118
        return true;
1711
118
    }
1712
1713
    // Called by function_jsonb.cpp, the variables passed in originate from a mutable block;
1714
    // using const_cast is acceptable.
1715
9.71k
    stream->set_leg_ptr(const_cast<char*>(stream->position()));
1716
1717
9.71k
    const char* left_quotation_marks = nullptr;
1718
9.71k
    const char* right_quotation_marks = nullptr;
1719
1720
32.7k
    for (; !stream->exhausted(); stream->advance()) {
1721
        // Only accept space characters quoted by double quotes.
1722
30.0k
        if (std::isspace(stream->peek()) && left_quotation_marks == nullptr) {
1723
0
            return false;
1724
30.0k
        } else if (stream->peek() == ESCAPE) {
1725
15
            stream->add_leg_len();
1726
15
            stream->skip(1);
1727
15
            stream->add_leg_len();
1728
15
            stream->set_has_escapes(true);
1729
15
            if (stream->exhausted()) {
1730
0
                return false;
1731
0
            }
1732
15
            continue;
1733
30.0k
        } else if (stream->peek() == DOUBLE_QUOTE) {
1734
406
            if (left_quotation_marks == nullptr) {
1735
203
                left_quotation_marks = stream->position();
1736
                // Called by function_jsonb.cpp, the variables passed in originate from a mutable block;
1737
                // using const_cast is acceptable.
1738
203
                stream->set_leg_ptr(const_cast<char*>(++left_quotation_marks));
1739
203
                continue;
1740
203
            } else {
1741
203
                right_quotation_marks = stream->position();
1742
203
                stream->skip(1);
1743
203
                break;
1744
203
            }
1745
29.6k
        } else if (stream->peek() == BEGIN_MEMBER || stream->peek() == BEGIN_ARRAY) {
1746
7.08k
            if (left_quotation_marks == nullptr) {
1747
6.77k
                break;
1748
6.77k
            }
1749
7.08k
        }
1750
1751
22.8k
        stream->add_leg_len();
1752
22.8k
    }
1753
1754
9.71k
    if ((left_quotation_marks != nullptr && right_quotation_marks == nullptr) ||
1755
9.71k
        stream->get_leg_ptr() == nullptr || stream->get_leg_len() == 0) {
1756
0
        return false; //invalid json path
1757
0
    }
1758
1759
9.71k
    if (stream->get_has_escapes()) {
1760
7
        stream->remove_escapes();
1761
7
    }
1762
1763
9.71k
    std::unique_ptr<leg_info> leg(
1764
9.71k
            new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE));
1765
9.71k
    path->add_leg_to_leg_vector(std::move(leg));
1766
1767
9.71k
    return true;
1768
9.71k
}
1769
1770
static_assert(is_pod_v<JsonbDocument>, "JsonbDocument must be standard layout and trivial");
1771
static_assert(is_pod_v<JsonbValue>, "JsonbValue must be standard layout and trivial");
1772
static_assert(is_pod_v<JsonbDecimal32>, "JsonbDecimal32 must be standard layout and trivial");
1773
static_assert(is_pod_v<JsonbDecimal64>, "JsonbDecimal64 must be standard layout and trivial");
1774
static_assert(is_pod_v<JsonbDecimal128>, "JsonbDecimal128 must be standard layout and trivial");
1775
static_assert(is_pod_v<JsonbDecimal256>, "JsonbDecimal256 must be standard layout and trivial");
1776
static_assert(is_pod_v<JsonbInt8Val>, "JsonbInt8Val must be standard layout and trivial");
1777
static_assert(is_pod_v<JsonbInt32Val>, "JsonbInt32Val must be standard layout and trivial");
1778
static_assert(is_pod_v<JsonbInt64Val>, "JsonbInt64Val must be standard layout and trivial");
1779
static_assert(is_pod_v<JsonbInt128Val>, "JsonbInt128Val must be standard layout and trivial");
1780
static_assert(is_pod_v<JsonbDoubleVal>, "JsonbDoubleVal must be standard layout and trivial");
1781
static_assert(is_pod_v<JsonbFloatVal>, "JsonbFloatVal must be standard layout and trivial");
1782
static_assert(is_pod_v<JsonbBinaryVal>, "JsonbBinaryVal must be standard layout and trivial");
1783
static_assert(is_pod_v<ContainerVal>, "ContainerVal must be standard layout and trivial");
1784
1785
#define ASSERT_DECIMAL_LAYOUT(type)                \
1786
    static_assert(offsetof(type, precision) == 0); \
1787
    static_assert(offsetof(type, scale) == 4);     \
1788
    static_assert(offsetof(type, value) == 8);
1789
1790
ASSERT_DECIMAL_LAYOUT(JsonbDecimal32)
1791
ASSERT_DECIMAL_LAYOUT(JsonbDecimal64)
1792
ASSERT_DECIMAL_LAYOUT(JsonbDecimal128)
1793
ASSERT_DECIMAL_LAYOUT(JsonbDecimal256)
1794
1795
#define ASSERT_NUMERIC_LAYOUT(type) static_assert(offsetof(type, num) == 0);
1796
1797
ASSERT_NUMERIC_LAYOUT(JsonbInt8Val)
1798
ASSERT_NUMERIC_LAYOUT(JsonbInt32Val)
1799
ASSERT_NUMERIC_LAYOUT(JsonbInt64Val)
1800
ASSERT_NUMERIC_LAYOUT(JsonbInt128Val)
1801
ASSERT_NUMERIC_LAYOUT(JsonbDoubleVal)
1802
1803
static_assert(offsetof(JsonbBinaryVal, size) == 0);
1804
static_assert(offsetof(JsonbBinaryVal, payload) == 4);
1805
1806
static_assert(offsetof(ContainerVal, size) == 0);
1807
static_assert(offsetof(ContainerVal, payload) == 4);
1808
1809
#pragma pack(pop)
1810
#if defined(__clang__)
1811
#pragma clang diagnostic pop
1812
#endif
1813
} // namespace doris
1814
1815
#endif // JSONB_JSONBDOCUMENT_H