Coverage Report

Created: 2024-11-21 14:46

/root/doris/be/src/util/jsonb_document.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (c) 2014, Facebook, Inc.
3
 *  All rights reserved.
4
 *
5
 *  This source code is licensed under the BSD-style license found in the
6
 *  LICENSE file in the root directory of this source tree. An additional grant
7
 *  of patent rights can be found in the PATENTS file in the same directory.
8
 *
9
 */
10
11
/*
12
 * This header defines JsonbDocument, JsonbKeyValue, and various value classes
13
 * which are derived from JsonbValue, and a forward iterator for container
14
 * values - essentially everything that is related to JSONB binary data
15
 * structures.
16
 *
17
 * Implementation notes:
18
 *
19
 * None of the classes in this header file can be instantiated directly (i.e.
20
 * you cannot create a JsonbKeyValue or JsonbValue object - all constructors
21
 * are declared non-public). We use the classes as wrappers on the packed JSONB
22
 * bytes (serialized), and cast the classes (types) to the underlying packed
23
 * byte array.
24
 *
25
 * For the same reason, we cannot define any JSONB value class to be virtual,
26
 * since we never call constructors, and will not instantiate vtbl and vptrs.
27
 *
28
 * Therefore, the classes are defined as packed structures (i.e. no data
29
 * alignment and padding), and the private member variables of the classes are
30
 * defined precisely in the same order as the JSONB spec. This ensures we
31
 * access the packed JSONB bytes correctly.
32
 *
33
 * The packed structures are highly optimized for in-place operations with low
34
 * overhead. The reads (and in-place writes) are performed directly on packed
35
 * bytes. There is no memory allocation at all at runtime.
36
 *
37
 * For updates/writes of values that will expand the original JSONB size, the
38
 * write will fail, and the caller needs to handle buffer increase.
39
 *
40
 * ** Iterator **
41
 * Both ObjectVal class and ArrayVal class have iterator type that you can use
42
 * to declare an iterator on a container object to go through the key-value
43
 * pairs or value list. The iterator has both non-const and const types.
44
 *
45
 * Note: iterators are forward direction only.
46
 *
47
 * ** Query **
48
 * Querying into containers is through the member functions find (for key/value
49
 * pairs) and get (for array elements), and is in streaming style. We don't
50
 * need to read/scan the whole JSONB packed bytes in order to return results.
51
 * Once the key/index is found, we will stop search.  You can use text to query
52
 * both objects and array (for array, text will be converted to integer index),
53
 * and use index to retrieve from array. Array index is 0-based.
54
 *
55
 * ** External dictionary **
56
 * During query processing, you can also pass a call-back function, so the
57
 * search will first try to check if the key string exists in the dictionary.
58
 * If so, search will be based on the id instead of the key string.
59
 * @author Tian Xia <tianx@fb.com>
60
 * 
61
 * this file is copied from 
62
 * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonDocument.h
63
 * and modified by Doris
64
 */
65
66
#ifndef JSONB_JSONBDOCUMENT_H
67
#define JSONB_JSONBDOCUMENT_H
68
69
#include <assert.h>
70
#include <stdint.h>
71
#include <stdlib.h>
72
73
#include <algorithm>
74
#include <cctype>
75
#include <charconv>
76
#include <limits>
77
#include <string>
78
#include <type_traits>
79
80
#include "common/compiler_util.h" // IWYU pragma: keep
81
82
// #include "util/string_parser.hpp"
83
84
namespace doris {
85
#if defined(__clang__)
86
#pragma clang diagnostic push
87
#pragma clang diagnostic ignored "-Wzero-length-array"
88
#endif
89
#pragma pack(push, 1)
90
91
4.26k
#define JSONB_VER 1
92
93
using int128_t = __int128;
94
95
// forward declaration
96
class JsonbValue;
97
class ObjectVal;
98
99
const int MaxNestingLevel = 100;
100
101
/*
102
 * JsonbType defines 10 primitive types and 2 container types, as described
103
 * below.
104
 *
105
 * primitive_value ::=
106
 *   0x00        //null value (0 byte)
107
 * | 0x01        //boolean true (0 byte)
108
 * | 0x02        //boolean false (0 byte)
109
 * | 0x03 int8   //char/int8 (1 byte)
110
 * | 0x04 int16  //int16 (2 bytes)
111
 * | 0x05 int32  //int32 (4 bytes)
112
 * | 0x06 int64  //int64 (8 bytes)
113
 * | 0x07 double //floating point (8 bytes)
114
 * | 0x08 string //variable length string
115
 * | 0x09 binary //variable length binary
116
 *
117
 * container ::=
118
 *   0x0A int32 key_value_list //object, int32 is the total bytes of the object
119
 * | 0x0B int32 value_list     //array, int32 is the total bytes of the array
120
 */
121
enum class JsonbType : char {
122
    T_Null = 0x00,
123
    T_True = 0x01,
124
    T_False = 0x02,
125
    T_Int8 = 0x03,
126
    T_Int16 = 0x04,
127
    T_Int32 = 0x05,
128
    T_Int64 = 0x06,
129
    T_Double = 0x07,
130
    T_String = 0x08,
131
    T_Binary = 0x09,
132
    T_Object = 0x0A,
133
    T_Array = 0x0B,
134
    T_Int128 = 0x0C,
135
    T_Float = 0x0D,
136
    NUM_TYPES,
137
};
138
139
//for parse json path
140
constexpr char SCOPE = '$';
141
constexpr char BEGIN_MEMBER = '.';
142
constexpr char BEGIN_ARRAY = '[';
143
constexpr char END_ARRAY = ']';
144
constexpr char DOUBLE_QUOTE = '"';
145
constexpr char WILDCARD = '*';
146
constexpr char MINUS = '-';
147
constexpr char LAST[] = "last";
148
constexpr char ESCAPE = '\\';
149
constexpr unsigned int MEMBER_CODE = 0;
150
constexpr unsigned int ARRAY_CODE = 1;
151
152
/*
153
 * JsonbDocument is the main object that accesses and queries JSONB packed
154
 * bytes. NOTE: JsonbDocument only allows object container as the top level
155
 * JSONB value. However, you can use the static method "createValue" to get any
156
 * JsonbValue object from the packed bytes.
157
 *
158
 * JsonbDocument object also dereferences to an object container value
159
 * (ObjectVal) once JSONB is loaded.
160
 *
161
 * ** Load **
162
 * JsonbDocument is usable after loading packed bytes (memory location) into
163
 * the object. We only need the header and first few bytes of the payload after
164
 * header to verify the JSONB.
165
 *
166
 * Note: creating an JsonbDocument (through createDocument) does not allocate
167
 * any memory. The document object is an efficient wrapper on the packed bytes
168
 * which is accessed directly.
169
 *
170
 * ** Query **
171
 * Query is through dereferencing into ObjectVal.
172
 */
173
class JsonbDocument {
174
public:
175
    // Prepare a document in the buffer
176
    static JsonbDocument* makeDocument(char* pb, uint32_t size, JsonbType type);
177
    static JsonbDocument* makeDocument(char* pb, uint32_t size, const JsonbValue* rval);
178
179
    // create an JsonbDocument object from JSONB packed bytes
180
    static JsonbDocument* createDocument(const char* pb, uint32_t size);
181
182
    // create an JsonbValue from JSONB packed bytes
183
    static JsonbValue* createValue(const char* pb, uint32_t size);
184
185
0
    uint8_t version() { return header_.ver_; }
186
187
1.87k
    JsonbValue* getValue() { return ((JsonbValue*)payload_); }
188
189
    void setValue(const JsonbValue* value);
190
191
    unsigned int numPackedBytes() const;
192
193
11.2k
    ObjectVal* operator->() { return ((ObjectVal*)payload_); }
194
195
0
    const ObjectVal* operator->() const { return ((const ObjectVal*)payload_); }
196
197
public:
198
0
    bool operator==(const JsonbDocument& other) const {
199
0
        assert(false);
200
0
        return false;
201
0
    }
202
203
0
    bool operator!=(const JsonbDocument& other) const {
204
0
        assert(false);
205
0
        return false;
206
0
    }
207
208
0
    bool operator<=(const JsonbDocument& other) const {
209
0
        assert(false);
210
0
        return false;
211
0
    }
212
213
0
    bool operator>=(const JsonbDocument& other) const {
214
0
        assert(false);
215
0
        return false;
216
0
    }
217
218
0
    bool operator<(const JsonbDocument& other) const {
219
0
        assert(false);
220
0
        return false;
221
0
    }
222
223
0
    bool operator>(const JsonbDocument& other) const {
224
0
        assert(false);
225
0
        return false;
226
0
    }
227
228
private:
229
    /*
230
   * JsonbHeader class defines JSONB header (internal to JsonbDocument).
231
   *
232
   * Currently it only contains version information (1-byte). We may expand the
233
   * header to include checksum of the JSONB binary for more security.
234
   */
235
    struct JsonbHeader {
236
        uint8_t ver_;
237
    } header_;
238
239
    char payload_[0];
240
};
241
242
/// A simple input stream class for the JSON path parser.
243
class Stream {
244
public:
245
    /// Creates an input stream reading from a character string.
246
    /// @param string  the input string
247
    /// @param length  the length of the input string
248
846
    Stream(const char* string, size_t length) : m_position(string), m_end(string + length) {}
249
250
    /// Returns a pointer to the current position in the stream.
251
850
    const char* position() const { return m_position; }
252
253
    /// Returns a pointer to the position just after the end of the stream.
254
0
    const char* end() const { return m_end; }
255
256
    /// Returns the number of bytes remaining in the stream.
257
16.2k
    size_t remaining() const {
258
16.2k
        assert(m_position <= m_end);
259
0
        return m_end - m_position;
260
16.2k
    }
261
262
    /// Tells if the stream has been exhausted.
263
13.7k
    bool exhausted() const { return remaining() == 0; }
264
265
    /// Reads the next byte from the stream and moves the position forward.
266
846
    char read() {
267
846
        assert(!exhausted());
268
0
        return *m_position++;
269
846
    }
270
271
    /// Reads the next byte from the stream without moving the position forward.
272
6.94k
    char peek() const {
273
6.94k
        assert(!exhausted());
274
0
        return *m_position;
275
6.94k
    }
276
277
    /// Moves the position to the next non-whitespace character.
278
4.24k
    void skip_whitespace() {
279
4.24k
        m_position = std::find_if_not(m_position, m_end, [](char c) { return std::isspace(c); });
280
4.24k
    }
281
282
    /// Moves the position n bytes forward.
283
2.55k
    void skip(size_t n) {
284
2.55k
        assert(remaining() >= n);
285
0
        m_position += n;
286
2.55k
        skip_whitespace();
287
2.55k
    }
288
289
1.70k
    void clear_leg_ptr() { leg_ptr = nullptr; }
290
291
850
    void set_leg_ptr(char* ptr) {
292
850
        clear_leg_ptr();
293
850
        leg_ptr = ptr;
294
850
    }
295
296
1.15k
    char* get_leg_ptr() { return leg_ptr; }
297
298
850
    void clear_leg_len() { leg_len = 0; }
299
300
1.15k
    void add_leg_len() { leg_len++; }
301
302
1.70k
    unsigned int get_leg_len() const { return leg_len; }
303
304
0
    void remove_escapes() {
305
0
        int new_len = 0;
306
0
        for (int i = 0; i < leg_len; i++) {
307
0
            if (leg_ptr[i] != '\\') {
308
0
                leg_ptr[new_len++] = leg_ptr[i];
309
0
            }
310
0
        }
311
0
        leg_ptr[new_len] = '\0';
312
0
        leg_len = new_len;
313
0
    }
314
315
0
    void set_has_escapes(bool has) { has_escapes = has; }
316
317
308
    bool get_has_escapes() const { return has_escapes; }
318
319
private:
320
    /// The current position in the stream.
321
    const char* m_position = nullptr;
322
323
    /// The end of the stream.
324
    const char* const m_end;
325
326
    ///path leg ptr
327
    char* leg_ptr = nullptr;
328
329
    ///path leg len
330
    unsigned int leg_len;
331
332
    ///Whether to contain escape characters
333
    bool has_escapes = false;
334
};
335
336
struct leg_info {
337
    ///path leg ptr
338
    char* leg_ptr = nullptr;
339
340
    ///path leg len
341
    unsigned int leg_len;
342
343
    ///array_index
344
    int array_index;
345
346
    ///type: 0 is member 1 is array
347
    unsigned int type;
348
349
0
    bool to_string(std::string* str) const {
350
0
        if (type == MEMBER_CODE) {
351
0
            str->push_back(BEGIN_MEMBER);
352
0
            str->append(leg_ptr, leg_len);
353
0
            return true;
354
0
        } else if (type == ARRAY_CODE) {
355
0
            str->push_back(BEGIN_ARRAY);
356
0
            std::string int_str = std::to_string(array_index);
357
0
            str->append(int_str);
358
0
            str->push_back(END_ARRAY);
359
0
            return true;
360
0
        } else {
361
0
            return false;
362
0
        }
363
0
    }
364
};
365
366
class JsonbPath {
367
public:
368
    // parse json path
369
    static bool parsePath(Stream* stream, JsonbPath* path);
370
371
    static bool parse_array(Stream* stream, JsonbPath* path);
372
    static bool parse_member(Stream* stream, JsonbPath* path);
373
374
    //return true if json path valid else return false
375
    bool seek(const char* string, size_t length);
376
377
850
    void add_leg_to_leg_vector(std::unique_ptr<leg_info> leg) {
378
850
        leg_vector.emplace_back(leg.release());
379
850
    }
380
381
0
    void pop_leg_from_leg_vector() { leg_vector.pop_back(); }
382
383
0
    bool to_string(std::string* res) const {
384
0
        res->push_back(SCOPE);
385
0
        for (const auto& leg : leg_vector) {
386
0
            auto valid = leg->to_string(res);
387
0
            if (!valid) {
388
0
                return false;
389
0
            }
390
0
        }
391
0
        return true;
392
0
    }
393
394
1.05k
    size_t get_leg_vector_size() { return leg_vector.size(); }
395
396
2.39k
    leg_info* get_leg_from_leg_vector(size_t i) { return leg_vector[i].get(); }
397
398
0
    void clean() { leg_vector.clear(); }
399
400
private:
401
    std::vector<std::unique_ptr<leg_info>> leg_vector;
402
};
403
404
/*
405
 * JsonbFwdIteratorT implements JSONB's iterator template.
406
 *
407
 * Note: it is an FORWARD iterator only due to the design of JSONB format.
408
 */
409
template <class Iter_Type, class Cont_Type>
410
class JsonbFwdIteratorT {
411
public:
412
    typedef Iter_Type iterator;
413
    typedef typename std::iterator_traits<Iter_Type>::pointer pointer;
414
    typedef typename std::iterator_traits<Iter_Type>::reference reference;
415
416
public:
417
    explicit JsonbFwdIteratorT() : current_(nullptr) {}
418
11.6k
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
_ZN5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEC2ERKS2_
Line
Count
Source
418
11.4k
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
_ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEC2ERKS3_
Line
Count
Source
418
108
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEC2ERKS3_
Line
Count
Source
418
148
    explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {}
Unexecuted instantiation: _ZN5doris17JsonbFwdIteratorTIPNS_10JsonbValueENS_8ArrayValEEC2ERKS2_
419
420
    // allow non-const to const iterator conversion (same container type)
421
    template <class Iter_Ty>
422
    JsonbFwdIteratorT(const JsonbFwdIteratorT<Iter_Ty, Cont_Type>& rhs) : current_(rhs.base()) {}
423
424
11.0k
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
_ZNK5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEeqERKS4_
Line
Count
Source
424
10.3k
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEeqERKS5_
Line
Count
Source
424
122
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEeqERKS5_
Line
Count
Source
424
564
    bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); }
Unexecuted instantiation: _ZNK5doris17JsonbFwdIteratorTIPNS_10JsonbValueENS_8ArrayValEEeqERKS4_
425
426
10.8k
    bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEneERKS5_
Line
Count
Source
426
68
    bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }
_ZNK5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEneERKS5_
Line
Count
Source
426
490
    bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }
_ZNK5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEneERKS4_
Line
Count
Source
426
10.2k
    bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); }
Unexecuted instantiation: _ZNK5doris17JsonbFwdIteratorTIPNS_10JsonbValueENS_8ArrayValEEneERKS4_
427
428
122
    bool operator<(const JsonbFwdIteratorT& rhs) const { return (current_ < rhs.current_); }
429
430
    bool operator>(const JsonbFwdIteratorT& rhs) const { return !operator<(rhs); }
431
432
9.50k
    JsonbFwdIteratorT& operator++() {
433
9.50k
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
434
9.50k
        return *this;
435
9.50k
    }
_ZN5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEppEv
Line
Count
Source
432
68
    JsonbFwdIteratorT& operator++() {
433
68
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
434
68
        return *this;
435
68
    }
_ZN5doris17JsonbFwdIteratorTIPKNS_10JsonbValueENS_8ArrayValEEppEv
Line
Count
Source
432
208
    JsonbFwdIteratorT& operator++() {
433
208
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
434
208
        return *this;
435
208
    }
_ZN5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEppEv
Line
Count
Source
432
9.23k
    JsonbFwdIteratorT& operator++() {
433
9.23k
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
434
9.23k
        return *this;
435
9.23k
    }
Unexecuted instantiation: _ZN5doris17JsonbFwdIteratorTIPNS_10JsonbValueENS_8ArrayValEEppEv
436
437
    JsonbFwdIteratorT operator++(int) {
438
        auto tmp = *this;
439
        current_ = (iterator)(((char*)current_) + current_->numPackedBytes());
440
        return tmp;
441
    }
442
443
208
    explicit operator pointer() { return current_; }
444
445
0
    reference operator*() const { return *current_; }
446
447
18.7k
    pointer operator->() const { return current_; }
_ZNK5doris17JsonbFwdIteratorTIPNS_13JsonbKeyValueENS_9ObjectValEEptEv
Line
Count
Source
447
18.4k
    pointer operator->() const { return current_; }
_ZNK5doris17JsonbFwdIteratorTIPKNS_13JsonbKeyValueENS_9ObjectValEEptEv
Line
Count
Source
447
272
    pointer operator->() const { return current_; }
448
449
0
    iterator base() const { return current_; }
450
451
private:
452
    iterator current_;
453
};
454
455
typedef int (*hDictInsert)(const char* key, unsigned len);
456
typedef int (*hDictFind)(const char* key, unsigned len);
457
458
typedef std::underlying_type<JsonbType>::type JsonbTypeUnder;
459
460
/*
461
 * JsonbKeyValue class defines JSONB key type, as described below.
462
 *
463
 * key ::=
464
 *   0x00 int8    //1-byte dictionary id
465
 * | int8 (byte*) //int8 (>0) is the size of the key string
466
 *
467
 * value ::= primitive_value | container
468
 *
469
 * JsonbKeyValue can be either an id mapping to the key string in an external
470
 * dictionary, or it is the original key string. Whether to read an id or a
471
 * string is decided by the first byte (size_).
472
 *
473
 * Note: a key object must be followed by a value object. Therefore, a key
474
 * object implicitly refers to a key-value pair, and you can get the value
475
 * object right after the key object. The function numPackedBytes hence
476
 * indicates the total size of the key-value pair, so that we will be able go
477
 * to next pair from the key.
478
 *
479
 * ** Dictionary size **
480
 * By default, the dictionary size is 255 (1-byte). Users can define
481
 * "USE_LARGE_DICT" to increase the dictionary size to 655535 (2-byte).
482
 */
483
class JsonbKeyValue {
484
public:
485
    // now we use sMaxKeyId to represent an empty key
486
    static const int sMaxKeyId = 65535;
487
    typedef uint16_t keyid_type;
488
489
    static const uint8_t sMaxKeyLen = 64;
490
491
    // size of the key. 0 indicates it is stored as id
492
175
    uint8_t klen() const { return size_; }
493
494
    // get the key string. Note the string may not be null terminated.
495
107
    const char* getKeyStr() const { return key_.str_; }
496
497
9.22k
    keyid_type getKeyId() const { return key_.id_; }
498
499
18.6k
    unsigned int keyPackedBytes() const {
500
18.6k
        return size_ ? (sizeof(size_) + size_) : (sizeof(size_) + sizeof(keyid_type));
501
18.6k
    }
502
503
9.33k
    JsonbValue* value() const { return (JsonbValue*)(((char*)this) + keyPackedBytes()); }
504
505
    // size of the total packed bytes (key+value)
506
    unsigned int numPackedBytes() const;
507
508
private:
509
    uint8_t size_;
510
511
    union key_ {
512
        keyid_type id_;
513
        char str_[1];
514
    } key_;
515
516
    JsonbKeyValue();
517
};
518
519
/*
520
 * JsonbValue is the base class of all JSONB types. It contains only one member
521
 * variable - type info, which can be retrieved by member functions is[Type]()
522
 * or type().
523
 */
524
class JsonbValue {
525
public:
526
    static const uint32_t sMaxValueLen = 1 << 24; // 16M
527
528
3.12k
    bool isNull() const { return (type_ == JsonbType::T_Null); }
529
50
    bool isTrue() const { return (type_ == JsonbType::T_True); }
530
46
    bool isFalse() const { return (type_ == JsonbType::T_False); }
531
0
    bool isInt() const { return isInt8() || isInt16() || isInt32() || isInt64() || isInt128(); }
532
252
    bool isInt8() const { return (type_ == JsonbType::T_Int8); }
533
220
    bool isInt16() const { return (type_ == JsonbType::T_Int16); }
534
198
    bool isInt32() const { return (type_ == JsonbType::T_Int32); }
535
134
    bool isInt64() const { return (type_ == JsonbType::T_Int64); }
536
72
    bool isDouble() const { return (type_ == JsonbType::T_Double); }
537
0
    bool isFloat() const { return (type_ == JsonbType::T_Float); }
538
70
    bool isString() const { return (type_ == JsonbType::T_String); }
539
1.02k
    bool isBinary() const { return (type_ == JsonbType::T_Binary); }
540
0
    bool isObject() const { return (type_ == JsonbType::T_Object); }
541
0
    bool isArray() const { return (type_ == JsonbType::T_Array); }
542
0
    bool isInt128() const { return (type_ == JsonbType::T_Int128); }
543
544
4.74k
    JsonbType type() const { return type_; }
545
546
0
    const char* typeName() const {
547
0
        switch (type_) {
548
0
        case JsonbType::T_Null:
549
0
            return "null";
550
0
        case JsonbType::T_True:
551
0
        case JsonbType::T_False:
552
0
            return "bool";
553
0
        case JsonbType::T_Int8:
554
0
        case JsonbType::T_Int16:
555
0
        case JsonbType::T_Int32:
556
0
            return "int";
557
0
        case JsonbType::T_Int64:
558
0
            return "bigint";
559
0
        case JsonbType::T_Int128:
560
0
            return "largeint";
561
0
        case JsonbType::T_Double:
562
0
            return "double";
563
0
        case JsonbType::T_Float:
564
0
            return "float";
565
0
        case JsonbType::T_String:
566
0
            return "string";
567
0
        case JsonbType::T_Binary:
568
0
            return "binary";
569
0
        case JsonbType::T_Object:
570
0
            return "object";
571
0
        case JsonbType::T_Array:
572
0
            return "array";
573
0
        default:
574
0
            return "unknown";
575
0
        }
576
0
    }
577
578
    // size of the total packed bytes
579
    unsigned int numPackedBytes() const;
580
581
    // size of the value in bytes
582
    unsigned int size() const;
583
584
    //Get the number of jsonbvalue elements
585
    int length() const;
586
587
    //Whether to include the jsonbvalue rhs
588
    bool contains(JsonbValue* rhs) const;
589
590
    // get the raw byte array of the value
591
    const char* getValuePtr() const;
592
593
    // find the JSONB value by JsonbPath
594
    JsonbValue* findValue(JsonbPath& path, hDictFind handler);
595
    friend class JsonbDocument;
596
597
protected:
598
    JsonbType type_; // type info
599
600
    JsonbValue();
601
};
602
603
/*
604
 * NumerValT is the template class (derived from JsonbValue) of all number
605
 * types (integers and double).
606
 */
607
template <class T>
608
class NumberValT : public JsonbValue {
609
public:
610
4.38k
    T val() const { return num_; }
_ZNK5doris10NumberValTIaE3valEv
Line
Count
Source
610
98
    T val() const { return num_; }
_ZNK5doris10NumberValTIsE3valEv
Line
Count
Source
610
85
    T val() const { return num_; }
_ZNK5doris10NumberValTIiE3valEv
Line
Count
Source
610
3.09k
    T val() const { return num_; }
_ZNK5doris10NumberValTIlE3valEv
Line
Count
Source
610
20
    T val() const { return num_; }
_ZNK5doris10NumberValTInE3valEv
Line
Count
Source
610
1.02k
    T val() const { return num_; }
_ZNK5doris10NumberValTIdE3valEv
Line
Count
Source
610
60
    T val() const { return num_; }
Unexecuted instantiation: _ZNK5doris10NumberValTIfE3valEv
611
612
    unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(T); }
613
614
    // catch all unknow specialization of the template class
615
    bool setVal(T value) { return false; }
616
617
private:
618
    T num_;
619
620
    NumberValT();
621
};
622
623
typedef NumberValT<int8_t> JsonbInt8Val;
624
625
// override setVal for Int8Val
626
template <>
627
0
inline bool JsonbInt8Val::setVal(int8_t value) {
628
0
    if (!isInt8()) {
629
0
        return false;
630
0
    }
631
0
632
0
    num_ = value;
633
0
    return true;
634
0
}
635
636
typedef NumberValT<int16_t> JsonbInt16Val;
637
638
// override setVal for Int16Val
639
template <>
640
0
inline bool JsonbInt16Val::setVal(int16_t value) {
641
0
    if (!isInt16()) {
642
0
        return false;
643
0
    }
644
0
645
0
    num_ = value;
646
0
    return true;
647
0
}
648
typedef NumberValT<int32_t> JsonbInt32Val;
649
650
// override setVal for Int32Val
651
template <>
652
0
inline bool JsonbInt32Val::setVal(int32_t value) {
653
0
    if (!isInt32()) {
654
0
        return false;
655
0
    }
656
0
657
0
    num_ = value;
658
0
    return true;
659
0
}
660
661
typedef NumberValT<int64_t> JsonbInt64Val;
662
663
// override setVal for Int64Val
664
template <>
665
0
inline bool JsonbInt64Val::setVal(int64_t value) {
666
0
    if (!isInt64()) {
667
0
        return false;
668
0
    }
669
0
670
0
    num_ = value;
671
0
    return true;
672
0
}
673
674
typedef NumberValT<int128_t> JsonbInt128Val;
675
676
// override setVal for Int128Val
677
template <>
678
0
inline bool JsonbInt128Val::setVal(int128_t value) {
679
0
    if (!isInt128()) {
680
0
        return false;
681
0
    }
682
0
683
0
    num_ = value;
684
0
    return true;
685
0
}
686
687
typedef NumberValT<double> JsonbDoubleVal;
688
689
// override setVal for DoubleVal
690
template <>
691
0
inline bool JsonbDoubleVal::setVal(double value) {
692
0
    if (!isDouble()) {
693
0
        return false;
694
0
    }
695
0
696
0
    num_ = value;
697
0
    return true;
698
0
}
699
700
typedef NumberValT<float> JsonbFloatVal;
701
702
// override setVal for DoubleVal
703
template <>
704
0
inline bool JsonbFloatVal::setVal(float value) {
705
0
    if (!isFloat()) {
706
0
        return false;
707
0
    }
708
0
709
0
    num_ = value;
710
0
    return true;
711
0
}
712
713
// A class to get an integer
714
class JsonbIntVal : public JsonbValue {
715
public:
716
52
    int128_t val() const {
717
52
        switch (type_) {
718
24
        case JsonbType::T_Int8:
719
24
            return ((JsonbInt8Val*)this)->val();
720
18
        case JsonbType::T_Int16:
721
18
            return ((JsonbInt16Val*)this)->val();
722
6
        case JsonbType::T_Int32:
723
6
            return ((JsonbInt32Val*)this)->val();
724
4
        case JsonbType::T_Int64:
725
4
            return ((JsonbInt64Val*)this)->val();
726
0
        case JsonbType::T_Int128:
727
0
            return ((JsonbInt128Val*)this)->val();
728
0
        default:
729
0
            return 0;
730
52
        }
731
52
    }
732
0
    bool setVal(int128_t val) {
733
0
        switch (type_) {
734
0
        case JsonbType::T_Int8:
735
0
            if (val < std::numeric_limits<int8_t>::min() ||
736
0
                val > std::numeric_limits<int8_t>::max())
737
0
                return false;
738
0
            return ((JsonbInt8Val*)this)->setVal((int8_t)val);
739
0
        case JsonbType::T_Int16:
740
0
            if (val < std::numeric_limits<int16_t>::min() ||
741
0
                val > std::numeric_limits<int16_t>::max())
742
0
                return false;
743
0
            return ((JsonbInt16Val*)this)->setVal((int16_t)val);
744
0
        case JsonbType::T_Int32:
745
0
            if (val < std::numeric_limits<int32_t>::min() ||
746
0
                val > std::numeric_limits<int32_t>::max())
747
0
                return false;
748
0
            return ((JsonbInt32Val*)this)->setVal((int32_t)val);
749
0
        case JsonbType::T_Int64:
750
0
            return ((JsonbInt64Val*)this)->setVal((int64_t)val);
751
0
        case JsonbType::T_Int128:
752
0
            return ((JsonbInt128Val*)this)->setVal(val);
753
0
        default:
754
0
            return false;
755
0
        }
756
0
    }
757
};
758
759
/*
760
 * BlobVal is the base class (derived from JsonbValue) for string and binary
761
 * types. The size_ indicates the total bytes of the payload_.
762
 */
763
class JsonbBlobVal : public JsonbValue {
764
public:
765
    // size of the blob payload only
766
1.03k
    unsigned int getBlobLen() const { return size_; }
767
768
    // return the blob as byte array
769
3.22k
    const char* getBlob() const { return payload_; }
770
771
    // size of the total packed bytes
772
3.31k
    unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size_) + size_; }
773
    friend class JsonbDocument;
774
775
protected:
776
    uint32_t size_;
777
    char payload_[0];
778
779
    // set new blob bytes
780
0
    bool internalSetVal(const char* blob, uint32_t blobSize) {
781
0
        // if we cannot fit the new blob, fail the operation
782
0
        if (blobSize > size_) {
783
0
            return false;
784
0
        }
785
0
786
0
        memcpy(payload_, blob, blobSize);
787
0
788
0
        // Set the reset of the bytes to 0.  Note we cannot change the size_ of the
789
0
        // current payload, as all values are packed.
790
0
        memset(payload_ + blobSize, 0, size_ - blobSize);
791
0
792
0
        return true;
793
0
    }
794
795
    JsonbBlobVal();
796
};
797
798
/*
799
 * Binary type
800
 */
801
class JsonbBinaryVal : public JsonbBlobVal {
802
public:
803
0
    bool setVal(const char* blob, uint32_t blobSize) {
804
0
        if (!isBinary()) {
805
0
            return false;
806
0
        }
807
0
808
0
        return internalSetVal(blob, blobSize);
809
0
    }
810
811
private:
812
    JsonbBinaryVal();
813
};
814
815
/*
816
 * String type
817
 * Note: JSONB string may not be a c-string (NULL-terminated)
818
 */
819
class JsonbStringVal : public JsonbBlobVal {
820
public:
821
0
    bool setVal(const char* str, uint32_t blobSize) {
822
0
        if (!isString()) {
823
0
            return false;
824
0
        }
825
0
826
0
        return internalSetVal(str, blobSize);
827
0
    }
828
    /*
829
    This function return the actual size of a string. Since for
830
    a string, it can be null-terminated with null paddings or it
831
    can take all the space in the payload_ without null in the end.
832
    So we need to check it to get the true actual length of a string.
833
  */
834
137
    size_t length() {
835
        // It's an empty string
836
137
        if (0 == size_) return size_;
837
        // The string stored takes all the spaces in payload_
838
137
        if (payload_[size_ - 1] != 0) {
839
137
            return size_;
840
137
        }
841
        // It's shorter than the size of payload_
842
0
        return strnlen(payload_, size_);
843
137
    }
844
    // convert the string (case insensitive) to a boolean value
845
    // "false": 0
846
    // "true": 1
847
    // all other strings: -1
848
0
    int getBoolVal() {
849
0
        if (size_ == 4 && tolower(payload_[0]) == 't' && tolower(payload_[1]) == 'r' &&
850
0
            tolower(payload_[2]) == 'u' && tolower(payload_[3]) == 'e')
851
0
            return 1;
852
0
        else if (size_ == 5 && tolower(payload_[0]) == 'f' && tolower(payload_[1]) == 'a' &&
853
0
                 tolower(payload_[2]) == 'l' && tolower(payload_[3]) == 's' &&
854
0
                 tolower(payload_[4]) == 'e')
855
0
            return 0;
856
0
        else
857
0
            return -1;
858
0
    }
859
860
private:
861
    JsonbStringVal();
862
};
863
864
/*
865
 * ContainerVal is the base class (derived from JsonbValue) for object and
866
 * array types. The size_ indicates the total bytes of the payload_.
867
 */
868
class ContainerVal : public JsonbValue {
869
public:
870
    // size of the container payload only
871
10
    unsigned int getContainerSize() const { return size_; }
872
873
    // return the container payload as byte array
874
0
    const char* getPayload() const { return payload_; }
875
876
    // size of the total packed bytes
877
1.59k
    unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size_) + size_; }
878
    friend class JsonbDocument;
879
880
protected:
881
    uint32_t size_;
882
    char payload_[0];
883
884
    ContainerVal();
885
};
886
887
/*
888
 * Object type
889
 */
890
class ObjectVal : public ContainerVal {
891
public:
892
    typedef JsonbKeyValue value_type;
893
    typedef value_type* pointer;
894
    typedef const value_type* const_pointer;
895
    typedef JsonbFwdIteratorT<pointer, ObjectVal> iterator;
896
    typedef JsonbFwdIteratorT<const_pointer, ObjectVal> const_iterator;
897
898
public:
899
0
    const_iterator search(const char* key, hDictFind handler = nullptr) const {
900
0
        return const_cast<ObjectVal*>(this)->search(key, handler);
901
0
    }
902
903
0
    const_iterator search(const char* key, unsigned int klen, hDictFind handler = nullptr) const {
904
0
        return const_cast<ObjectVal*>(this)->search(key, klen, handler);
905
0
    }
906
907
0
    const_iterator search(int key_id) const { return const_cast<ObjectVal*>(this)->search(key_id); }
908
0
    iterator search(const char* key, hDictFind handler = nullptr) {
909
0
        if (!key) {
910
0
            return end();
911
0
        }
912
0
        return search(key, (unsigned int)strlen(key), handler);
913
0
    }
914
915
51
    iterator search(const char* key, unsigned int klen, hDictFind handler = nullptr) {
916
51
        if (!key || !klen) return end();
917
918
51
        int key_id = -1;
919
51
        if (handler && (key_id = handler(key, klen)) >= 0) {
920
0
            return search(key_id);
921
0
        }
922
51
        return internalSearch(key, klen);
923
51
    }
924
925
0
    iterator search(int key_id) {
926
0
        if (key_id < 0 || key_id > JsonbKeyValue::sMaxKeyId) return end();
927
928
0
        const char* pch = payload_;
929
0
        const char* fence = payload_ + size_;
930
931
0
        while (pch < fence) {
932
0
            JsonbKeyValue* pkey = (JsonbKeyValue*)(pch);
933
0
            if (!pkey->klen() && key_id == pkey->getKeyId()) {
934
0
                return iterator(pkey);
935
0
            }
936
0
            pch += pkey->numPackedBytes();
937
0
        }
938
939
0
        assert(pch == fence);
940
0
        return end();
941
0
    }
942
943
    // Get number of elements in object
944
0
    int numElem() const {
945
0
        const char* pch = payload_;
946
0
        const char* fence = payload_ + size_;
947
948
0
        unsigned int num = 0;
949
0
        while (pch < fence) {
950
0
            JsonbKeyValue* pkey = (JsonbKeyValue*)(pch);
951
0
            ++num;
952
0
            pch += pkey->numPackedBytes();
953
0
        }
954
955
0
        assert(pch == fence);
956
957
0
        return num;
958
0
    }
959
960
0
    JsonbKeyValue* getJsonbKeyValue(unsigned int i) const {
961
0
        const char* pch = payload_;
962
0
        const char* fence = payload_ + size_;
963
964
0
        unsigned int num = 0;
965
0
        while (pch < fence) {
966
0
            JsonbKeyValue* pkey = (JsonbKeyValue*)(pch);
967
0
            if (num == i) return pkey;
968
0
            ++num;
969
0
            pch += pkey->numPackedBytes();
970
0
        }
971
972
0
        assert(pch == fence);
973
974
0
        return nullptr;
975
0
    }
976
977
0
    JsonbValue* find(const char* key, hDictFind handler = nullptr) const {
978
0
        return const_cast<ObjectVal*>(this)->find(key, handler);
979
0
    }
980
981
0
    JsonbValue* find(const char* key, unsigned int klen, hDictFind handler = nullptr) const {
982
0
        return const_cast<ObjectVal*>(this)->find(key, klen, handler);
983
0
    }
984
0
    JsonbValue* find(int key_id) const { return const_cast<ObjectVal*>(this)->find(key_id); }
985
986
    // find the JSONB value by a key string (null terminated)
987
0
    JsonbValue* find(const char* key, hDictFind handler = nullptr) {
988
0
        if (!key) return nullptr;
989
0
        return find(key, (unsigned int)strlen(key), handler);
990
0
    }
991
992
    // find the JSONB value by a key string (with length)
993
51
    JsonbValue* find(const char* key, unsigned int klen, hDictFind handler = nullptr) {
994
51
        iterator kv = search(key, klen, handler);
995
51
        if (end() == kv) return nullptr;
996
33
        return kv->value();
997
51
    }
998
999
    // find the JSONB value by a key dictionary ID
1000
0
    JsonbValue* find(int key_id) {
1001
0
        iterator kv = search(key_id);
1002
0
        if (end() == kv) return nullptr;
1003
0
        return kv->value();
1004
0
    }
1005
1006
1.03k
    iterator begin() { return iterator((pointer)payload_); }
1007
1008
54
    const_iterator begin() const { return const_iterator((pointer)payload_); }
1009
1010
10.3k
    iterator end() { return iterator((pointer)(payload_ + size_)); }
1011
1012
54
    const_iterator end() const { return const_iterator((pointer)(payload_ + size_)); }
1013
1014
private:
1015
51
    iterator internalSearch(const char* key, unsigned int klen) {
1016
51
        const char* pch = payload_;
1017
51
        const char* fence = payload_ + size_;
1018
1019
57
        while (pch < fence) {
1020
39
            JsonbKeyValue* pkey = (JsonbKeyValue*)(pch);
1021
39
            if (klen == pkey->klen() && strncmp(key, pkey->getKeyStr(), klen) == 0) {
1022
33
                return iterator(pkey);
1023
33
            }
1024
6
            pch += pkey->numPackedBytes();
1025
6
        }
1026
1027
18
        assert(pch == fence);
1028
1029
0
        return end();
1030
51
    }
1031
1032
private:
1033
    ObjectVal();
1034
};
1035
1036
/*
1037
 * Array type
1038
 */
1039
class ArrayVal : public ContainerVal {
1040
public:
1041
    // get the JSONB value at index
1042
231
    JsonbValue* get(int idx) const {
1043
231
        if (idx < 0) return nullptr;
1044
1045
231
        const char* pch = payload_;
1046
231
        const char* fence = payload_ + size_;
1047
1048
564
        while (pch < fence && idx-- > 0) pch += ((JsonbValue*)pch)->numPackedBytes();
1049
231
        if (idx > 0 || pch == fence) return nullptr;
1050
1051
168
        return (JsonbValue*)pch;
1052
231
    }
1053
1054
    // Get number of elements in array
1055
0
    int numElem() const {
1056
0
        const char* pch = payload_;
1057
0
        const char* fence = payload_ + size_;
1058
1059
0
        unsigned int num = 0;
1060
0
        while (pch < fence) {
1061
0
            ++num;
1062
0
            pch += ((JsonbValue*)pch)->numPackedBytes();
1063
0
        }
1064
1065
0
        assert(pch == fence);
1066
1067
0
        return num;
1068
0
    }
1069
1070
    typedef JsonbValue value_type;
1071
    typedef value_type* pointer;
1072
    typedef const value_type* const_pointer;
1073
    typedef JsonbFwdIteratorT<pointer, ArrayVal> iterator;
1074
    typedef JsonbFwdIteratorT<const_pointer, ArrayVal> const_iterator;
1075
1076
0
    iterator begin() { return iterator((pointer)payload_); }
1077
1078
74
    const_iterator begin() const { return const_iterator((pointer)payload_); }
1079
1080
0
    iterator end() { return iterator((pointer)(payload_ + size_)); }
1081
1082
74
    const_iterator end() const { return const_iterator((pointer)(payload_ + size_)); }
1083
1084
private:
1085
    ArrayVal();
1086
};
1087
1088
// Prepare an empty document
1089
// input: pb - buuffer/packed bytes for jsonb document
1090
//        size - size of the buffer
1091
//        type - value type in the document
1092
0
inline JsonbDocument* JsonbDocument::makeDocument(char* pb, uint32_t size, JsonbType type) {
1093
0
    if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
1094
0
        return nullptr;
1095
0
    }
1096
0
1097
0
    if (type < JsonbType::T_Null || type >= JsonbType::NUM_TYPES) {
1098
0
        return nullptr;
1099
0
    }
1100
0
    JsonbDocument* doc = (JsonbDocument*)pb;
1101
0
    // Write header
1102
0
    doc->header_.ver_ = JSONB_VER;
1103
0
    JsonbValue* value = doc->getValue();
1104
0
    // Write type
1105
0
    value->type_ = type;
1106
0
1107
0
    // Set empty JsonbValue
1108
0
    if (type == JsonbType::T_Object || type == JsonbType::T_Array)
1109
0
        ((ContainerVal*)value)->size_ = 0;
1110
0
    if (type == JsonbType::T_String || type == JsonbType::T_Binary)
1111
0
        ((JsonbBlobVal*)value)->size_ = 0;
1112
0
    return doc;
1113
0
}
1114
1115
// Prepare a document from an JsonbValue
1116
// input: pb - buuffer/packed bytes for jsonb document
1117
//        size - size of the buffer
1118
//        rval - jsonb value to be copied into the document
1119
0
inline JsonbDocument* JsonbDocument::makeDocument(char* pb, uint32_t size, const JsonbValue* rval) {
1120
0
    // checking if the buffer is big enough to store the value
1121
0
    if (!pb || !rval || size < sizeof(JsonbHeader) + rval->numPackedBytes()) {
1122
0
        return nullptr;
1123
0
    }
1124
0
1125
0
    JsonbType type = rval->type();
1126
0
    if (type < JsonbType::T_Null || type >= JsonbType::NUM_TYPES) {
1127
0
        return nullptr;
1128
0
    }
1129
0
    JsonbDocument* doc = (JsonbDocument*)pb;
1130
0
    // Write header
1131
0
    doc->header_.ver_ = JSONB_VER;
1132
0
    // get the starting byte of the value
1133
0
    JsonbValue* value = doc->getValue();
1134
0
    // binary copy of the rval
1135
0
    if (value != rval) // copy not necessary if values are the same
1136
0
        memmove(value, rval, rval->numPackedBytes());
1137
0
1138
0
    return doc;
1139
0
}
1140
1141
2.12k
inline JsonbDocument* JsonbDocument::createDocument(const char* pb, uint32_t size) {
1142
2.12k
    if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
1143
30
        return nullptr;
1144
30
    }
1145
1146
2.09k
    JsonbDocument* doc = (JsonbDocument*)pb;
1147
2.09k
    if (doc->header_.ver_ != JSONB_VER) {
1148
0
        return nullptr;
1149
0
    }
1150
1151
2.09k
    JsonbValue* val = (JsonbValue*)doc->payload_;
1152
2.09k
    if (val->type() < JsonbType::T_Null || val->type() >= JsonbType::NUM_TYPES ||
1153
2.09k
        size != sizeof(JsonbHeader) + val->numPackedBytes()) {
1154
0
        return nullptr;
1155
0
    }
1156
1157
2.09k
    return doc;
1158
2.09k
}
1159
0
inline void JsonbDocument::setValue(const JsonbValue* value) {
1160
0
    memcpy(payload_, value, value->numPackedBytes());
1161
0
}
1162
1163
0
inline JsonbValue* JsonbDocument::createValue(const char* pb, uint32_t size) {
1164
0
    if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
1165
0
        return nullptr;
1166
0
    }
1167
1168
0
    JsonbDocument* doc = (JsonbDocument*)pb;
1169
0
    if (doc->header_.ver_ != JSONB_VER) {
1170
0
        return nullptr;
1171
0
    }
1172
1173
0
    JsonbValue* val = (JsonbValue*)doc->payload_;
1174
0
    if (size != sizeof(JsonbHeader) + val->numPackedBytes()) {
1175
0
        return nullptr;
1176
0
    }
1177
1178
0
    return val;
1179
0
}
1180
1181
0
inline unsigned int JsonbDocument::numPackedBytes() const {
1182
0
    return ((const JsonbValue*)payload_)->numPackedBytes() + sizeof(header_);
1183
0
}
1184
1185
9.30k
inline unsigned int JsonbKeyValue::numPackedBytes() const {
1186
9.30k
    unsigned int ks = keyPackedBytes();
1187
9.30k
    JsonbValue* val = (JsonbValue*)(((char*)this) + ks);
1188
9.30k
    return ks + val->numPackedBytes();
1189
9.30k
}
1190
1191
// Poor man's "virtual" function JsonbValue::numPackedBytes
1192
11.9k
inline unsigned int JsonbValue::numPackedBytes() const {
1193
11.9k
    switch (type_) {
1194
2.17k
    case JsonbType::T_Null:
1195
2.29k
    case JsonbType::T_True:
1196
2.41k
    case JsonbType::T_False: {
1197
2.41k
        return sizeof(type_);
1198
2.29k
    }
1199
1200
187
    case JsonbType::T_Int8: {
1201
187
        return sizeof(type_) + sizeof(int8_t);
1202
2.29k
    }
1203
119
    case JsonbType::T_Int16: {
1204
119
        return sizeof(type_) + sizeof(int16_t);
1205
2.29k
    }
1206
3.13k
    case JsonbType::T_Int32: {
1207
3.13k
        return sizeof(type_) + sizeof(int32_t);
1208
2.29k
    }
1209
60
    case JsonbType::T_Int64: {
1210
60
        return sizeof(type_) + sizeof(int64_t);
1211
2.29k
    }
1212
123
    case JsonbType::T_Double: {
1213
123
        return sizeof(type_) + sizeof(double);
1214
2.29k
    }
1215
0
    case JsonbType::T_Float: {
1216
0
        return sizeof(type_) + sizeof(float);
1217
2.29k
    }
1218
1.02k
    case JsonbType::T_Int128: {
1219
1.02k
        return sizeof(type_) + sizeof(int128_t);
1220
2.29k
    }
1221
234
    case JsonbType::T_String:
1222
3.31k
    case JsonbType::T_Binary: {
1223
3.31k
        return ((JsonbBlobVal*)(this))->numPackedBytes();
1224
234
    }
1225
1226
1.20k
    case JsonbType::T_Object:
1227
1.59k
    case JsonbType::T_Array: {
1228
1.59k
        return ((ContainerVal*)(this))->numPackedBytes();
1229
1.20k
    }
1230
0
    default:
1231
0
        return 0;
1232
11.9k
    }
1233
11.9k
}
1234
1235
35
inline unsigned int JsonbValue::size() const {
1236
35
    switch (type_) {
1237
4
    case JsonbType::T_Int8: {
1238
4
        return sizeof(int8_t);
1239
0
    }
1240
2
    case JsonbType::T_Int16: {
1241
2
        return sizeof(int16_t);
1242
0
    }
1243
1
    case JsonbType::T_Int32: {
1244
1
        return sizeof(int32_t);
1245
0
    }
1246
1
    case JsonbType::T_Int64: {
1247
1
        return sizeof(int64_t);
1248
0
    }
1249
3
    case JsonbType::T_Double: {
1250
3
        return sizeof(double);
1251
0
    }
1252
0
    case JsonbType::T_Float: {
1253
0
        return sizeof(float);
1254
0
    }
1255
0
    case JsonbType::T_Int128: {
1256
0
        return sizeof(int128_t);
1257
0
    }
1258
8
    case JsonbType::T_String:
1259
8
    case JsonbType::T_Binary: {
1260
8
        return ((JsonbBlobVal*)(this))->getBlobLen();
1261
8
    }
1262
1263
5
    case JsonbType::T_Object:
1264
10
    case JsonbType::T_Array: {
1265
10
        return ((ContainerVal*)(this))->getContainerSize();
1266
5
    }
1267
2
    case JsonbType::T_Null:
1268
4
    case JsonbType::T_True:
1269
6
    case JsonbType::T_False:
1270
6
    default:
1271
6
        return 0;
1272
35
    }
1273
35
}
1274
1275
0
inline int JsonbValue::length() const {
1276
0
    switch (type_) {
1277
0
    case JsonbType::T_Int8:
1278
0
    case JsonbType::T_Int16:
1279
0
    case JsonbType::T_Int32:
1280
0
    case JsonbType::T_Int64:
1281
0
    case JsonbType::T_Double:
1282
0
    case JsonbType::T_Float:
1283
0
    case JsonbType::T_Int128:
1284
0
    case JsonbType::T_String:
1285
0
    case JsonbType::T_Binary:
1286
0
    case JsonbType::T_Null:
1287
0
    case JsonbType::T_True:
1288
0
    case JsonbType::T_False: {
1289
0
        return 1;
1290
0
    }
1291
0
    case JsonbType::T_Object: {
1292
0
        return ((ObjectVal*)this)->numElem();
1293
0
    }
1294
0
    case JsonbType::T_Array: {
1295
0
        return ((ArrayVal*)this)->numElem();
1296
0
    }
1297
0
    default:
1298
0
        return 0;
1299
0
    }
1300
0
}
1301
1302
0
inline bool JsonbValue::contains(JsonbValue* rhs) const {
1303
0
    switch (type_) {
1304
0
    case JsonbType::T_Int8:
1305
0
    case JsonbType::T_Int16:
1306
0
    case JsonbType::T_Int32:
1307
0
    case JsonbType::T_Int64:
1308
0
    case JsonbType::T_Int128: {
1309
0
        return ((JsonbIntVal*)(this))->val() == ((JsonbIntVal*)(rhs))->val();
1310
0
    }
1311
0
    case JsonbType::T_Double: {
1312
0
        if (rhs->isDouble()) {
1313
0
            return ((JsonbDoubleVal*)(this))->val() == ((JsonbDoubleVal*)(rhs))->val();
1314
0
        }
1315
0
        return false;
1316
0
    }
1317
0
    case JsonbType::T_Float: {
1318
0
        if (rhs->isDouble()) {
1319
0
            return ((JsonbFloatVal*)(this))->val() == ((JsonbFloatVal*)(rhs))->val();
1320
0
        }
1321
0
        return false;
1322
0
    }
1323
0
    case JsonbType::T_String:
1324
0
    case JsonbType::T_Binary: {
1325
0
        if (rhs->isString()) {
1326
0
            auto str_value1 = (JsonbStringVal*)this;
1327
0
            auto str_value2 = (JsonbStringVal*)rhs;
1328
0
            return str_value1->length() == str_value2->length() &&
1329
0
                   std::memcmp(str_value1->getBlob(), str_value2->getBlob(),
1330
0
                               str_value1->length()) == 0;
1331
0
        }
1332
0
        return false;
1333
0
    }
1334
0
    case JsonbType::T_Array: {
1335
0
        int lhs_num = ((ArrayVal*)this)->numElem();
1336
0
        if (rhs->isArray()) {
1337
0
            int rhs_num = ((ArrayVal*)rhs)->numElem();
1338
0
            if (rhs_num > lhs_num) return false;
1339
0
            int contains_num = 0;
1340
0
            for (int i = 0; i < lhs_num; ++i) {
1341
0
                for (int j = 0; j < rhs_num; ++j) {
1342
0
                    if (((ArrayVal*)this)->get(i)->contains(((ArrayVal*)rhs)->get(j))) {
1343
0
                        contains_num++;
1344
0
                        break;
1345
0
                    }
1346
0
                }
1347
0
            }
1348
0
            return contains_num == rhs_num;
1349
0
        }
1350
0
        for (int i = 0; i < lhs_num; ++i) {
1351
0
            if (((ArrayVal*)this)->get(i)->contains(rhs)) {
1352
0
                return true;
1353
0
            }
1354
0
        }
1355
0
        return false;
1356
0
    }
1357
0
    case JsonbType::T_Object: {
1358
0
        if (rhs->isObject()) {
1359
0
            auto str_value1 = (ObjectVal*)this;
1360
0
            auto str_value2 = (ObjectVal*)rhs;
1361
0
            for (int i = 0; i < str_value2->numElem(); ++i) {
1362
0
                JsonbKeyValue* key = str_value2->getJsonbKeyValue(i);
1363
0
                JsonbValue* value = str_value1->find(key->getKeyStr(), key->klen());
1364
0
                if (key != nullptr && value != nullptr && !value->contains(key->value()))
1365
0
                    return false;
1366
0
            }
1367
0
            return true;
1368
0
        }
1369
0
        return false;
1370
0
    }
1371
0
    case JsonbType::T_Null: {
1372
0
        return rhs->isNull();
1373
0
    }
1374
0
    case JsonbType::T_True: {
1375
0
        return rhs->isTrue();
1376
0
    }
1377
0
    case JsonbType::T_False: {
1378
0
        return rhs->isFalse();
1379
0
    }
1380
0
    default:
1381
0
        return false;
1382
0
    }
1383
0
}
1384
1385
0
inline const char* JsonbValue::getValuePtr() const {
1386
0
    switch (type_) {
1387
0
    case JsonbType::T_Int8:
1388
0
    case JsonbType::T_Int16:
1389
0
    case JsonbType::T_Int32:
1390
0
    case JsonbType::T_Int64:
1391
0
    case JsonbType::T_Double:
1392
0
    case JsonbType::T_Float:
1393
0
    case JsonbType::T_Int128:
1394
0
        return ((char*)this) + sizeof(JsonbType);
1395
0
1396
0
    case JsonbType::T_String:
1397
0
    case JsonbType::T_Binary:
1398
0
        return ((JsonbBlobVal*)(this))->getBlob();
1399
0
1400
0
    case JsonbType::T_Object:
1401
0
    case JsonbType::T_Array:
1402
0
        return ((ContainerVal*)(this))->getPayload();
1403
0
1404
0
    case JsonbType::T_Null:
1405
0
    case JsonbType::T_True:
1406
0
    case JsonbType::T_False:
1407
0
    default:
1408
0
        return nullptr;
1409
0
    }
1410
0
}
1411
1412
846
inline bool JsonbPath::seek(const char* key_path, size_t kp_len) {
1413
    //path invalid
1414
846
    if (!key_path || kp_len == 0) return false;
1415
846
    Stream stream(key_path, kp_len);
1416
846
    stream.skip_whitespace();
1417
846
    if (stream.exhausted() || stream.read() != SCOPE) {
1418
        //path invalid
1419
0
        return false;
1420
0
    }
1421
1422
1.69k
    while (!stream.exhausted()) {
1423
850
        stream.skip_whitespace();
1424
850
        stream.clear_leg_ptr();
1425
850
        stream.clear_leg_len();
1426
1427
850
        if (!JsonbPath::parsePath(&stream, this)) {
1428
            //path invalid
1429
0
            return false;
1430
0
        }
1431
850
    }
1432
846
    return true;
1433
846
}
1434
1435
816
inline JsonbValue* JsonbValue::findValue(JsonbPath& path, hDictFind handler) {
1436
816
    JsonbValue* pval = this;
1437
1.05k
    for (size_t i = 0; i < path.get_leg_vector_size(); ++i) {
1438
732
        switch (path.get_leg_from_leg_vector(i)->type) {
1439
204
        case MEMBER_CODE: {
1440
204
            if (LIKELY(pval->type_ == JsonbType::T_Object)) {
1441
51
                if (path.get_leg_from_leg_vector(i)->leg_len == 1 &&
1442
51
                    *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) {
1443
0
                    continue;
1444
0
                }
1445
1446
51
                pval = ((ObjectVal*)pval)
1447
51
                               ->find(path.get_leg_from_leg_vector(i)->leg_ptr,
1448
51
                                      path.get_leg_from_leg_vector(i)->leg_len, handler);
1449
1450
51
                if (!pval) return nullptr;
1451
33
                continue;
1452
153
            } else {
1453
153
                return nullptr;
1454
153
            }
1455
204
        }
1456
528
        case ARRAY_CODE: {
1457
528
            if (path.get_leg_from_leg_vector(i)->leg_len == 1 &&
1458
528
                *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) {
1459
0
                if (LIKELY(pval->type_ == JsonbType::T_Array)) {
1460
0
                    continue;
1461
0
                } else {
1462
0
                    return nullptr;
1463
0
                }
1464
0
            }
1465
1466
528
            if (pval->type_ == JsonbType::T_Object &&
1467
528
                path.get_leg_from_leg_vector(i)->array_index == 0) {
1468
36
                continue;
1469
36
            }
1470
1471
492
            if (pval->type_ != JsonbType::T_Array ||
1472
492
                path.get_leg_from_leg_vector(i)->leg_ptr != nullptr ||
1473
492
                path.get_leg_from_leg_vector(i)->leg_len != 0)
1474
261
                return nullptr;
1475
1476
231
            if (path.get_leg_from_leg_vector(i)->array_index >= 0) {
1477
231
                pval = ((ArrayVal*)pval)->get(path.get_leg_from_leg_vector(i)->array_index);
1478
231
            } else {
1479
0
                pval = ((ArrayVal*)pval)
1480
0
                               ->get(((ArrayVal*)pval)->numElem() +
1481
0
                                     path.get_leg_from_leg_vector(i)->array_index);
1482
0
            }
1483
1484
231
            if (!pval) return nullptr;
1485
168
            continue;
1486
231
        }
1487
732
        }
1488
732
    }
1489
321
    return pval;
1490
816
}
1491
1492
850
inline bool JsonbPath::parsePath(Stream* stream, JsonbPath* path) {
1493
850
    if (stream->peek() == BEGIN_ARRAY) {
1494
542
        return parse_array(stream, path);
1495
542
    } else if (stream->peek() == BEGIN_MEMBER) {
1496
308
        return parse_member(stream, path);
1497
308
    } else {
1498
0
        return false; //invalid json path
1499
0
    }
1500
850
}
1501
1502
542
inline bool JsonbPath::parse_array(Stream* stream, JsonbPath* path) {
1503
542
    assert(stream->peek() == BEGIN_ARRAY);
1504
0
    stream->skip(1);
1505
542
    if (stream->exhausted()) {
1506
0
        return false;
1507
0
    }
1508
1509
542
    if (stream->peek() == WILDCARD) {
1510
0
        stream->set_leg_ptr(const_cast<char*>(stream->position()));
1511
0
        stream->add_leg_len();
1512
0
        stream->skip(1);
1513
0
        if (stream->peek() == END_ARRAY) {
1514
0
            std::unique_ptr<leg_info> leg(
1515
0
                    new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, ARRAY_CODE));
1516
0
            path->add_leg_to_leg_vector(std::move(leg));
1517
0
            stream->skip(1);
1518
0
            return true;
1519
0
        } else {
1520
0
            return false;
1521
0
        }
1522
0
    }
1523
1524
542
    stream->set_leg_ptr(const_cast<char*>(stream->position()));
1525
1526
1.08k
    for (; !stream->exhausted() && stream->peek() != END_ARRAY; stream->skip(1)) {
1527
542
        stream->add_leg_len();
1528
542
    }
1529
1530
542
    if (stream->exhausted() || stream->peek() != END_ARRAY) {
1531
0
        return false;
1532
542
    } else {
1533
542
        stream->skip(1);
1534
542
    }
1535
1536
    //parse array index to int
1537
1538
542
    std::string_view idx_string(stream->get_leg_ptr(), stream->get_leg_len());
1539
542
    int index = 0;
1540
1541
542
    if (stream->get_leg_len() >= 4 &&
1542
542
        std::equal(LAST, LAST + 4, stream->get_leg_ptr(),
1543
0
                   [](char c1, char c2) { return std::tolower(c1) == std::tolower(c2); })) {
1544
0
        auto pos = idx_string.find(MINUS);
1545
1546
0
        if (pos != std::string::npos) {
1547
0
            idx_string = idx_string.substr(pos + 1);
1548
1549
0
            auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(),
1550
0
                                          index);
1551
0
            if (result.ec != std::errc()) {
1552
0
                return false;
1553
0
            }
1554
1555
0
        } else if (stream->get_leg_len() > 4) {
1556
0
            return false;
1557
0
        }
1558
1559
0
        std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, -index - 1, ARRAY_CODE));
1560
0
        path->add_leg_to_leg_vector(std::move(leg));
1561
1562
0
        return true;
1563
0
    }
1564
1565
542
    auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(), index);
1566
1567
542
    if (result.ec != std::errc()) {
1568
0
        return false;
1569
0
    }
1570
1571
542
    std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, index, ARRAY_CODE));
1572
542
    path->add_leg_to_leg_vector(std::move(leg));
1573
1574
542
    return true;
1575
542
}
1576
1577
308
inline bool JsonbPath::parse_member(Stream* stream, JsonbPath* path) {
1578
    // advance past the .
1579
308
    assert(stream->peek() == BEGIN_MEMBER);
1580
0
    stream->skip(1);
1581
308
    if (stream->exhausted()) {
1582
0
        return false;
1583
0
    }
1584
1585
308
    if (stream->peek() == WILDCARD) {
1586
0
        stream->set_leg_ptr(const_cast<char*>(stream->position()));
1587
0
        stream->add_leg_len();
1588
0
        stream->skip(1);
1589
0
        std::unique_ptr<leg_info> leg(
1590
0
                new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE));
1591
0
        path->add_leg_to_leg_vector(std::move(leg));
1592
0
        return true;
1593
0
    }
1594
1595
308
    stream->set_leg_ptr(const_cast<char*>(stream->position()));
1596
1597
308
    const char* left_quotation_marks = nullptr;
1598
308
    const char* right_quotation_marks = nullptr;
1599
1600
924
    for (; !stream->exhausted(); stream->skip(1)) {
1601
616
        if (stream->peek() == ESCAPE) {
1602
0
            stream->add_leg_len();
1603
0
            stream->skip(1);
1604
0
            stream->add_leg_len();
1605
0
            stream->set_has_escapes(true);
1606
0
            if (stream->exhausted()) {
1607
0
                return false;
1608
0
            }
1609
0
            continue;
1610
616
        } else if (stream->peek() == DOUBLE_QUOTE) {
1611
0
            if (left_quotation_marks == nullptr) {
1612
0
                left_quotation_marks = stream->position();
1613
0
                stream->set_leg_ptr(const_cast<char*>(++left_quotation_marks));
1614
0
                continue;
1615
0
            } else {
1616
0
                right_quotation_marks = stream->position();
1617
0
                stream->skip(1);
1618
0
                break;
1619
0
            }
1620
616
        } else if (stream->peek() == BEGIN_MEMBER || stream->peek() == BEGIN_ARRAY) {
1621
0
            if (left_quotation_marks == nullptr) {
1622
0
                break;
1623
0
            }
1624
0
        }
1625
1626
616
        stream->add_leg_len();
1627
616
    }
1628
1629
308
    if ((left_quotation_marks != nullptr && right_quotation_marks == nullptr) ||
1630
308
        stream->get_leg_ptr() == nullptr || stream->get_leg_len() == 0) {
1631
0
        return false; //invalid json path
1632
0
    }
1633
1634
308
    if (stream->get_has_escapes()) {
1635
0
        stream->remove_escapes();
1636
0
    }
1637
1638
308
    std::unique_ptr<leg_info> leg(
1639
308
            new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE));
1640
308
    path->add_leg_to_leg_vector(std::move(leg));
1641
1642
308
    return true;
1643
308
}
1644
1645
#pragma pack(pop)
1646
#if defined(__clang__)
1647
#pragma clang diagnostic pop
1648
#endif
1649
} // namespace doris
1650
1651
#endif // JSONB_JSONBDOCUMENT_H