Coverage Report

Created: 2025-06-13 16:38

/root/doris/be/src/util/jsonb_writer.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (c) 2014, Facebook, Inc.
3
 *  All rights reserved.
4
 *
5
 *  This source code is licensed under the BSD-style license found in the
6
 *  LICENSE file in the root directory of this source tree. An additional grant
7
 *  of patent rights can be found in the PATENTS file in the same directory.
8
 *
9
 */
10
11
/*
12
 * This file defines JsonbWriterT (template) and JsonbWriter.
13
 *
14
 * JsonbWriterT is a template class which implements an JSONB serializer.
15
 * Users call various write functions of JsonbWriterT object to write values
16
 * directly to JSONB packed bytes. All write functions of value or key return
17
 * the number of bytes written to JSONB, or 0 if there is an error. To write an
18
 * object, an array, or a string, you must call writeStart[..] before writing
19
 * values or key, and call writeEnd[..] after finishing at the end.
20
 *
21
 * By default, an JsonbWriterT object creates an output stream buffer.
22
 * Alternatively, you can also pass any output stream object to a writer, as
23
 * long as the stream object implements some basic functions of std::ostream
24
 * (such as JsonbOutStream, see JsonbStream.h).
25
 *
26
 * JsonbWriter specializes JsonbWriterT with JsonbOutStream type (see
27
 * JsonbStream.h). So unless you want to provide own a different output stream
28
 * type, use JsonbParser object.
29
 *
30
 * @author Tian Xia <tianx@fb.com>
31
 * this file is copied from 
32
 * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonWriter.h
33
 * and modified by Doris
34
 */
35
36
#ifndef JSONB_JSONBWRITER_H
37
#define JSONB_JSONBWRITER_H
38
39
#include <limits>
40
#include <stack>
41
#include <string>
42
43
#include "jsonb_document.h"
44
#include "jsonb_stream.h"
45
46
namespace doris {
47
48
using int128_t = __int128;
49
50
template <class OS_TYPE>
51
class JsonbWriterT {
52
    /// TODO: maybe we should not use a template class here
53
    static_assert(std::is_same_v<OS_TYPE, JsonbOutStream>);
54
55
public:
56
4.51k
    JsonbWriterT() : alloc_(true), hasHdr_(false), kvState_(WS_Value), str_pos_(0) {
57
4.51k
        os_ = new OS_TYPE();
58
4.51k
    }
59
60
    explicit JsonbWriterT(OS_TYPE& os)
61
            : os_(&os), alloc_(false), hasHdr_(false), kvState_(WS_Value), str_pos_(0) {}
62
63
4.51k
    ~JsonbWriterT() {
64
4.51k
        if (alloc_) {
65
4.51k
            delete os_;
66
4.51k
        }
67
4.51k
    }
68
69
7.16k
    void reset() {
70
7.16k
        os_->clear();
71
7.16k
        os_->seekp(0);
72
7.16k
        hasHdr_ = false;
73
7.16k
        kvState_ = WS_Value;
74
7.16k
        first_ = true;
75
7.18k
        for (; !stack_.empty(); stack_.pop())
76
11
            ;
77
7.16k
    }
78
79
    uint32_t writeKey(const char* key) { return writeKey(key, strlen(key)); }
80
81
    // write a key string (or key id if an external dict is provided)
82
6.71k
    uint32_t writeKey(const char* key, uint8_t len) {
83
6.71k
        if (!stack_.empty() && verifyKeyState()) {
84
6.71k
            uint32_t size = sizeof(uint8_t);
85
6.71k
            os_->put(len);
86
6.71k
            if (len == 0) {
87
                // NOTE: we use sMaxKeyId to represent an empty key
88
250
                JsonbKeyValue::keyid_type idx = JsonbKeyValue::sMaxKeyId;
89
250
                os_->write((char*)&idx, sizeof(JsonbKeyValue::keyid_type));
90
250
                size += sizeof(JsonbKeyValue::keyid_type);
91
6.46k
            } else {
92
6.46k
                os_->write(key, len);
93
6.46k
                size += len;
94
6.46k
            }
95
6.71k
            kvState_ = WS_Key;
96
6.71k
            return size;
97
6.71k
        }
98
99
0
        return 0;
100
6.71k
    }
101
102
35
    uint32_t writeValue(const JsonbValue* value) {
103
35
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
104
35
            if (!writeFirstHeader()) return 0;
105
35
            os_->write((char*)value, value->numPackedBytes());
106
35
            kvState_ = WS_Value;
107
35
            return value->size();
108
35
        }
109
0
        return 0;
110
35
    }
111
112
    // write a key id
113
18.2k
    uint32_t writeKey(JsonbKeyValue::keyid_type idx) {
114
18.2k
        if (!stack_.empty() && verifyKeyState()) {
115
16.8k
            os_->put(0);
116
16.8k
            os_->write((char*)&idx, sizeof(JsonbKeyValue::keyid_type));
117
16.8k
            kvState_ = WS_Key;
118
16.8k
            return sizeof(uint8_t) + sizeof(JsonbKeyValue::keyid_type);
119
16.8k
        }
120
121
1.42k
        return 0;
122
18.2k
    }
123
124
527k
    bool writeFirstHeader() {
125
527k
        if (first_ && stack_.empty()) {
126
1.19k
            first_ = false;
127
            // if this is a new JSONB, write the header
128
1.19k
            if (!hasHdr_) {
129
1.19k
                writeHeader();
130
1.19k
                return true;
131
1.19k
            } else {
132
0
                return false;
133
0
            }
134
526k
        } else {
135
526k
            return true;
136
526k
        }
137
527k
    }
138
139
14.6k
    uint32_t writeNull() {
140
14.6k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
141
14.6k
            if (!writeFirstHeader()) return 0;
142
14.6k
            os_->put((JsonbTypeUnder)JsonbType::T_Null);
143
14.6k
            kvState_ = WS_Value;
144
14.6k
            return sizeof(JsonbValue);
145
14.6k
        }
146
147
0
        return 0;
148
14.6k
    }
149
150
20.9k
    uint32_t writeBool(bool b) {
151
20.9k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
152
20.9k
            if (!writeFirstHeader()) return 0;
153
20.9k
            if (b) {
154
10.4k
                os_->put((JsonbTypeUnder)JsonbType::T_True);
155
10.5k
            } else {
156
10.5k
                os_->put((JsonbTypeUnder)JsonbType::T_False);
157
10.5k
            }
158
159
20.9k
            kvState_ = WS_Value;
160
20.9k
            return sizeof(JsonbValue);
161
20.9k
        }
162
163
0
        return 0;
164
20.9k
    }
165
166
    // This function is a helper. It will make use of smallest space to
167
    // write an int
168
    uint32_t writeInt(int64_t val) {
169
        if (val >= std::numeric_limits<int8_t>::min() &&
170
            val <= std::numeric_limits<int8_t>::max()) {
171
            return writeInt8((int8_t)val);
172
        } else if (val >= std::numeric_limits<int16_t>::min() &&
173
                   val <= std::numeric_limits<int16_t>::max()) {
174
            return writeInt16((int16_t)val);
175
        } else if (val >= std::numeric_limits<int32_t>::min() &&
176
                   val <= std::numeric_limits<int32_t>::max()) {
177
            return writeInt32((int32_t)val);
178
        } else {
179
            return writeInt64(val);
180
        }
181
    }
182
183
49.1k
    uint32_t writeInt8(int8_t v) {
184
49.1k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
185
49.1k
            if (!writeFirstHeader()) return 0;
186
49.1k
            os_->put((JsonbTypeUnder)JsonbType::T_Int8);
187
49.1k
            os_->put(v);
188
49.1k
            kvState_ = WS_Value;
189
49.1k
            return sizeof(JsonbInt8Val);
190
49.1k
        }
191
192
0
        return 0;
193
49.1k
    }
194
195
440
    uint32_t writeInt16(int16_t v) {
196
440
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
197
440
            if (!writeFirstHeader()) return 0;
198
440
            os_->put((JsonbTypeUnder)JsonbType::T_Int16);
199
440
            os_->write((char*)&v, sizeof(int16_t));
200
440
            kvState_ = WS_Value;
201
440
            return sizeof(JsonbInt16Val);
202
440
        }
203
204
0
        return 0;
205
440
    }
206
207
3.52k
    uint32_t writeInt32(int32_t v) {
208
3.52k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
209
3.52k
            if (!writeFirstHeader()) return 0;
210
3.52k
            os_->put((JsonbTypeUnder)JsonbType::T_Int32);
211
3.52k
            os_->write((char*)&v, sizeof(int32_t));
212
3.52k
            kvState_ = WS_Value;
213
3.52k
            return sizeof(JsonbInt32Val);
214
3.52k
        }
215
216
0
        return 0;
217
3.52k
    }
218
219
62.8k
    uint32_t writeInt64(int64_t v) {
220
62.8k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
221
62.8k
            if (!writeFirstHeader()) return 0;
222
62.8k
            os_->put((JsonbTypeUnder)JsonbType::T_Int64);
223
62.8k
            os_->write((char*)&v, sizeof(int64_t));
224
62.8k
            kvState_ = WS_Value;
225
62.8k
            return sizeof(JsonbInt64Val);
226
62.8k
        }
227
228
0
        return 0;
229
62.8k
    }
230
231
4.17k
    uint32_t writeInt128(int128_t v) {
232
4.17k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
233
4.17k
            if (!writeFirstHeader()) return 0;
234
4.17k
            os_->put((JsonbTypeUnder)JsonbType::T_Int128);
235
4.17k
            os_->write((char*)&v, sizeof(int128_t));
236
4.17k
            kvState_ = WS_Value;
237
4.17k
            return sizeof(JsonbInt128Val);
238
4.17k
        }
239
240
0
        return 0;
241
4.17k
    }
242
243
115k
    uint32_t writeDouble(double v) {
244
115k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
245
115k
            if (!writeFirstHeader()) return 0;
246
115k
            os_->put((JsonbTypeUnder)JsonbType::T_Double);
247
115k
            os_->write((char*)&v, sizeof(double));
248
115k
            kvState_ = WS_Value;
249
115k
            return sizeof(JsonbDoubleVal);
250
115k
        }
251
252
0
        return 0;
253
115k
    }
254
255
14
    uint32_t writeFloat(float v) {
256
14
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
257
14
            if (!writeFirstHeader()) return 0;
258
14
            os_->put((JsonbTypeUnder)JsonbType::T_Float);
259
14
            os_->write((char*)&v, sizeof(float));
260
14
            kvState_ = WS_Value;
261
14
            return sizeof(JsonbFloatVal);
262
14
        }
263
264
0
        return 0;
265
14
    }
266
267
    // must call writeStartString before writing a string val
268
251k
    bool writeStartString() {
269
251k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
270
251k
            if (!writeFirstHeader()) return 0;
271
251k
            os_->put((JsonbTypeUnder)JsonbType::T_String);
272
251k
            str_pos_ = os_->tellp();
273
274
            // fill the size bytes with 0 for now
275
251k
            uint32_t size = 0;
276
251k
            os_->write((char*)&size, sizeof(uint32_t));
277
278
251k
            kvState_ = WS_String;
279
251k
            return true;
280
251k
        }
281
282
0
        return false;
283
251k
    }
284
285
    // finish writing a string val
286
251k
    bool writeEndString() {
287
251k
        if (kvState_ == WS_String) {
288
251k
            std::streampos cur_pos = os_->tellp();
289
251k
            int32_t size = (int32_t)(cur_pos - str_pos_ - sizeof(uint32_t));
290
251k
            assert(size >= 0);
291
292
0
            os_->seekp(str_pos_);
293
251k
            os_->write((char*)&size, sizeof(uint32_t));
294
251k
            os_->seekp(cur_pos);
295
296
251k
            kvState_ = WS_Value;
297
251k
            return true;
298
251k
        }
299
300
0
        return false;
301
251k
    }
302
303
    // TODO: here changed length to uint64_t, as some api also need changed, But the thirdparty api is uint_32t
304
    // need consider a better way to handle case.
305
240k
    uint64_t writeString(const char* str, uint64_t len) {
306
240k
        if (kvState_ == WS_String) {
307
240k
            os_->write(str, len);
308
240k
            return len;
309
240k
        }
310
311
0
        return 0;
312
240k
    }
313
314
2
    uint32_t writeString(const std::string& str) { return writeString(str.c_str(), str.size()); }
315
    uint32_t writeString(char ch) {
316
        if (kvState_ == WS_String) {
317
            os_->put(ch);
318
            return 1;
319
        }
320
321
        return 0;
322
    }
323
324
    // must call writeStartBinary before writing a binary val
325
4.36k
    bool writeStartBinary() {
326
4.36k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
327
4.36k
            if (!writeFirstHeader()) return 0;
328
4.36k
            os_->put((JsonbTypeUnder)JsonbType::T_Binary);
329
4.36k
            str_pos_ = os_->tellp();
330
331
            // fill the size bytes with 0 for now
332
4.36k
            uint32_t size = 0;
333
4.36k
            os_->write((char*)&size, sizeof(uint32_t));
334
335
4.36k
            kvState_ = WS_Binary;
336
4.36k
            return true;
337
4.36k
        }
338
339
0
        return false;
340
4.36k
    }
341
342
    // finish writing a binary val
343
4.36k
    bool writeEndBinary() {
344
4.36k
        if (kvState_ == WS_Binary) {
345
4.36k
            std::streampos cur_pos = os_->tellp();
346
4.36k
            int32_t size = (int32_t)(cur_pos - str_pos_ - sizeof(uint32_t));
347
4.36k
            assert(size >= 0);
348
349
0
            os_->seekp(str_pos_);
350
4.36k
            os_->write((char*)&size, sizeof(uint32_t));
351
4.36k
            os_->seekp(cur_pos);
352
353
4.36k
            kvState_ = WS_Value;
354
4.36k
            return true;
355
4.36k
        }
356
357
0
        return false;
358
4.36k
    }
359
360
4.36k
    uint64_t writeBinary(const char* bin, uint64_t len) {
361
4.36k
        if (kvState_ == WS_Binary) {
362
4.36k
            os_->write(bin, len);
363
4.36k
            return len;
364
4.36k
        }
365
366
0
        return 0;
367
4.36k
    }
368
369
    // must call writeStartObject before writing an object val
370
4.34k
    bool writeStartObject() {
371
4.34k
        if (stack_.empty() || verifyValueState()) {
372
4.34k
            if (stack_.empty()) {
373
                // if this is a new JSONB, write the header
374
1.59k
                if (!hasHdr_) {
375
1.59k
                    writeHeader();
376
1.59k
                } else
377
0
                    return false;
378
1.59k
            }
379
380
            // check if the object exceeds the maximum nesting level
381
4.34k
            if (stack_.size() >= MaxNestingLevel) return false;
382
383
4.34k
            os_->put((JsonbTypeUnder)JsonbType::T_Object);
384
            // save the size position
385
4.34k
            stack_.push(WriteInfo({WS_Object, os_->tellp()}));
386
387
            // fill the size bytes with 0 for now
388
4.34k
            uint32_t size = 0;
389
4.34k
            os_->write((char*)&size, sizeof(uint32_t));
390
391
4.34k
            kvState_ = WS_Value;
392
4.34k
            return true;
393
4.34k
        }
394
395
0
        return false;
396
4.34k
    }
397
398
    // finish writing an object val
399
3.96k
    bool writeEndObject() {
400
3.96k
        if (!stack_.empty() && stack_.top().state == WS_Object && kvState_ == WS_Value) {
401
3.96k
            WriteInfo& ci = stack_.top();
402
3.96k
            std::streampos cur_pos = os_->tellp();
403
3.96k
            int32_t size = (int32_t)(cur_pos - ci.sz_pos - sizeof(uint32_t));
404
3.96k
            assert(size >= 0);
405
406
0
            os_->seekp(ci.sz_pos);
407
3.96k
            os_->write((char*)&size, sizeof(uint32_t));
408
3.96k
            os_->seekp(cur_pos);
409
3.96k
            stack_.pop();
410
411
3.96k
            return true;
412
3.96k
        }
413
414
0
        return false;
415
3.96k
    }
416
417
    // must call writeStartArray before writing an array val
418
52.1k
    bool writeStartArray() {
419
52.1k
        if (stack_.empty() || verifyValueState()) {
420
52.1k
            if (stack_.empty()) {
421
                // if this is a new JSONB, write the header
422
1.88k
                if (!hasHdr_) {
423
1.88k
                    writeHeader();
424
1.88k
                } else
425
0
                    return false;
426
1.88k
            }
427
428
            // check if the array exceeds the maximum nesting level
429
52.1k
            if (stack_.size() >= MaxNestingLevel) return false;
430
431
52.1k
            os_->put((JsonbTypeUnder)JsonbType::T_Array);
432
            // save the size position
433
52.1k
            stack_.push(WriteInfo({WS_Array, os_->tellp()}));
434
435
            // fill the size bytes with 0 for now
436
52.1k
            uint32_t size = 0;
437
52.1k
            os_->write((char*)&size, sizeof(uint32_t));
438
439
52.1k
            kvState_ = WS_Value;
440
52.1k
            return true;
441
52.1k
        }
442
443
0
        return false;
444
52.1k
    }
445
446
    // finish writing an array val
447
51.9k
    bool writeEndArray() {
448
51.9k
        if (!stack_.empty() && stack_.top().state == WS_Array && kvState_ == WS_Value) {
449
51.9k
            WriteInfo& ci = stack_.top();
450
51.9k
            std::streampos cur_pos = os_->tellp();
451
51.9k
            int32_t size = (int32_t)(cur_pos - ci.sz_pos - sizeof(uint32_t));
452
51.9k
            assert(size >= 0);
453
454
0
            os_->seekp(ci.sz_pos);
455
51.9k
            os_->write((char*)&size, sizeof(uint32_t));
456
51.9k
            os_->seekp(cur_pos);
457
51.9k
            stack_.pop();
458
459
51.9k
            return true;
460
51.9k
        }
461
462
0
        return false;
463
51.9k
    }
464
465
8.75k
    OS_TYPE* getOutput() { return os_; }
466
    JsonbDocument* getDocument() {
467
        return JsonbDocument::checkAndCreateDocument(getOutput()->getBuffer(),
468
                                                     getOutput()->getSize());
469
    }
470
471
    JsonbValue* getValue() {
472
        return JsonbDocument::createValue(getOutput()->getBuffer(), getOutput()->getSize());
473
    }
474
475
    bool writeEnd() {
476
        while (!stack_.empty()) {
477
            bool ok = false;
478
            switch (stack_.top().state) {
479
            case WS_Array:
480
                ok = writeEndArray();
481
                break;
482
            case WS_Object:
483
                ok = writeEndObject();
484
                break;
485
            case WS_String:
486
                ok = writeEndString();
487
                break;
488
            case WS_Binary:
489
                ok = writeEndBinary();
490
                break;
491
            default:
492
                ok = false;
493
                break;
494
            }
495
            if (ok == false) return false;
496
        }
497
        return true;
498
    }
499
500
private:
501
    // verify we are in the right state before writing a value
502
579k
    bool verifyValueState() {
503
579k
        assert(!stack_.empty());
504
        // The document can only be an Object or an Array which follows
505
        // the standard.
506
579k
        return (stack_.top().state == WS_Object && kvState_ == WS_Key) ||
507
579k
               (stack_.top().state == WS_Array && kvState_ == WS_Value);
508
579k
    }
509
510
    // verify we are in the right state before writing a key
511
24.9k
    bool verifyKeyState() {
512
24.9k
        assert(!stack_.empty());
513
24.9k
        return stack_.top().state == WS_Object && kvState_ == WS_Value;
514
24.9k
    }
515
516
4.67k
    void writeHeader() {
517
4.67k
        os_->put(JSONB_VER);
518
4.67k
        hasHdr_ = true;
519
4.67k
    }
520
521
private:
522
    enum WriteState {
523
        WS_NONE,
524
        WS_Array,
525
        WS_Object,
526
        WS_Key,
527
        WS_Value,
528
        WS_String,
529
        WS_Binary,
530
    };
531
532
    struct WriteInfo {
533
        WriteState state;
534
        std::streampos sz_pos;
535
    };
536
537
private:
538
    OS_TYPE* os_ = nullptr;
539
    bool alloc_;
540
    bool hasHdr_;
541
    WriteState kvState_; // key or value state
542
    std::streampos str_pos_;
543
    std::stack<WriteInfo> stack_;
544
    bool first_ = true;
545
};
546
547
using JsonbWriter = JsonbWriterT<JsonbOutStream>;
548
549
} // namespace doris
550
551
#endif // JSONB_JSONBWRITER_H