Coverage Report

Created: 2025-06-09 22:36

/root/doris/be/src/util/jsonb_writer.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (c) 2014, Facebook, Inc.
3
 *  All rights reserved.
4
 *
5
 *  This source code is licensed under the BSD-style license found in the
6
 *  LICENSE file in the root directory of this source tree. An additional grant
7
 *  of patent rights can be found in the PATENTS file in the same directory.
8
 *
9
 */
10
11
/*
12
 * This file defines JsonbWriterT (template) and JsonbWriter.
13
 *
14
 * JsonbWriterT is a template class which implements an JSONB serializer.
15
 * Users call various write functions of JsonbWriterT object to write values
16
 * directly to JSONB packed bytes. All write functions of value or key return
17
 * the number of bytes written to JSONB, or 0 if there is an error. To write an
18
 * object, an array, or a string, you must call writeStart[..] before writing
19
 * values or key, and call writeEnd[..] after finishing at the end.
20
 *
21
 * By default, an JsonbWriterT object creates an output stream buffer.
22
 * Alternatively, you can also pass any output stream object to a writer, as
23
 * long as the stream object implements some basic functions of std::ostream
24
 * (such as JsonbOutStream, see JsonbStream.h).
25
 *
26
 * JsonbWriter specializes JsonbWriterT with JsonbOutStream type (see
27
 * JsonbStream.h). So unless you want to provide own a different output stream
28
 * type, use JsonbParser object.
29
 *
30
 * @author Tian Xia <tianx@fb.com>
31
 * this file is copied from 
32
 * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonWriter.h
33
 * and modified by Doris
34
 */
35
36
#ifndef JSONB_JSONBWRITER_H
37
#define JSONB_JSONBWRITER_H
38
39
#include <limits>
40
#include <stack>
41
#include <string>
42
43
#include "jsonb_document.h"
44
#include "jsonb_stream.h"
45
46
namespace doris {
47
48
using int128_t = __int128;
49
50
template <class OS_TYPE>
51
class JsonbWriterT {
52
    /// TODO: maybe we should not use a template class here
53
    static_assert(std::is_same_v<OS_TYPE, JsonbOutStream>);
54
55
public:
56
2.48k
    JsonbWriterT() : alloc_(true), hasHdr_(false), kvState_(WS_Value), str_pos_(0) {
57
2.48k
        os_ = new OS_TYPE();
58
2.48k
    }
59
60
    explicit JsonbWriterT(OS_TYPE& os)
61
            : os_(&os), alloc_(false), hasHdr_(false), kvState_(WS_Value), str_pos_(0) {}
62
63
2.48k
    ~JsonbWriterT() {
64
2.48k
        if (alloc_) {
65
2.48k
            delete os_;
66
2.48k
        }
67
2.48k
    }
68
69
3.08k
    void reset() {
70
3.08k
        os_->clear();
71
3.08k
        os_->seekp(0);
72
3.08k
        hasHdr_ = false;
73
3.08k
        kvState_ = WS_Value;
74
3.08k
        first_ = true;
75
3.09k
        for (; !stack_.empty(); stack_.pop())
76
11
            ;
77
3.08k
    }
78
79
    uint32_t writeKey(const char* key) { return writeKey(key, strlen(key)); }
80
81
    // write a key string (or key id if an external dict is provided)
82
476
    uint32_t writeKey(const char* key, uint8_t len) {
83
476
        if (!stack_.empty() && verifyKeyState()) {
84
476
            uint32_t size = sizeof(uint8_t);
85
476
            os_->put(len);
86
476
            if (len == 0) {
87
                // NOTE: we use sMaxKeyId to represent an empty key
88
1
                JsonbKeyValue::keyid_type idx = JsonbKeyValue::sMaxKeyId;
89
1
                os_->write((char*)&idx, sizeof(JsonbKeyValue::keyid_type));
90
1
                size += sizeof(JsonbKeyValue::keyid_type);
91
475
            } else {
92
475
                os_->write(key, len);
93
475
                size += len;
94
475
            }
95
476
            kvState_ = WS_Key;
96
476
            return size;
97
476
        }
98
99
0
        return 0;
100
476
    }
101
102
35
    uint32_t writeValue(const JsonbValue* value) {
103
35
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
104
35
            if (!writeFirstHeader()) return 0;
105
35
            os_->write((char*)value, value->numPackedBytes());
106
35
            kvState_ = WS_Value;
107
35
            return value->size();
108
35
        }
109
0
        return 0;
110
35
    }
111
112
    // write a key id
113
18.2k
    uint32_t writeKey(JsonbKeyValue::keyid_type idx) {
114
18.2k
        if (!stack_.empty() && verifyKeyState()) {
115
16.7k
            os_->put(0);
116
16.7k
            os_->write((char*)&idx, sizeof(JsonbKeyValue::keyid_type));
117
16.7k
            kvState_ = WS_Key;
118
16.7k
            return sizeof(uint8_t) + sizeof(JsonbKeyValue::keyid_type);
119
16.7k
        }
120
121
1.42k
        return 0;
122
18.2k
    }
123
124
19.4k
    bool writeFirstHeader() {
125
19.4k
        if (first_ && stack_.empty()) {
126
784
            first_ = false;
127
            // if this is a new JSONB, write the header
128
784
            if (!hasHdr_) {
129
784
                writeHeader();
130
784
                return true;
131
784
            } else {
132
0
                return false;
133
0
            }
134
18.6k
        } else {
135
18.6k
            return true;
136
18.6k
        }
137
19.4k
    }
138
139
2.87k
    uint32_t writeNull() {
140
2.87k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
141
2.87k
            if (!writeFirstHeader()) return 0;
142
2.87k
            os_->put((JsonbTypeUnder)JsonbType::T_Null);
143
2.87k
            kvState_ = WS_Value;
144
2.87k
            return sizeof(JsonbValue);
145
2.87k
        }
146
147
0
        return 0;
148
2.87k
    }
149
150
409
    uint32_t writeBool(bool b) {
151
409
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
152
409
            if (!writeFirstHeader()) return 0;
153
409
            if (b) {
154
208
                os_->put((JsonbTypeUnder)JsonbType::T_True);
155
208
            } else {
156
201
                os_->put((JsonbTypeUnder)JsonbType::T_False);
157
201
            }
158
159
409
            kvState_ = WS_Value;
160
409
            return sizeof(JsonbValue);
161
409
        }
162
163
0
        return 0;
164
409
    }
165
166
    // This function is a helper. It will make use of smallest space to
167
    // write an int
168
    uint32_t writeInt(int64_t val) {
169
        if (val >= std::numeric_limits<int8_t>::min() &&
170
            val <= std::numeric_limits<int8_t>::max()) {
171
            return writeInt8((int8_t)val);
172
        } else if (val >= std::numeric_limits<int16_t>::min() &&
173
                   val <= std::numeric_limits<int16_t>::max()) {
174
            return writeInt16((int16_t)val);
175
        } else if (val >= std::numeric_limits<int32_t>::min() &&
176
                   val <= std::numeric_limits<int32_t>::max()) {
177
            return writeInt32((int32_t)val);
178
        } else {
179
            return writeInt64(val);
180
        }
181
    }
182
183
540
    uint32_t writeInt8(int8_t v) {
184
540
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
185
540
            if (!writeFirstHeader()) return 0;
186
540
            os_->put((JsonbTypeUnder)JsonbType::T_Int8);
187
540
            os_->put(v);
188
540
            kvState_ = WS_Value;
189
540
            return sizeof(JsonbInt8Val);
190
540
        }
191
192
0
        return 0;
193
540
    }
194
195
440
    uint32_t writeInt16(int16_t v) {
196
440
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
197
440
            if (!writeFirstHeader()) return 0;
198
440
            os_->put((JsonbTypeUnder)JsonbType::T_Int16);
199
440
            os_->write((char*)&v, sizeof(int16_t));
200
440
            kvState_ = WS_Value;
201
440
            return sizeof(JsonbInt16Val);
202
440
        }
203
204
0
        return 0;
205
440
    }
206
207
3.52k
    uint32_t writeInt32(int32_t v) {
208
3.52k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
209
3.52k
            if (!writeFirstHeader()) return 0;
210
3.52k
            os_->put((JsonbTypeUnder)JsonbType::T_Int32);
211
3.52k
            os_->write((char*)&v, sizeof(int32_t));
212
3.52k
            kvState_ = WS_Value;
213
3.52k
            return sizeof(JsonbInt32Val);
214
3.52k
        }
215
216
0
        return 0;
217
3.52k
    }
218
219
1.94k
    uint32_t writeInt64(int64_t v) {
220
1.94k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
221
1.94k
            if (!writeFirstHeader()) return 0;
222
1.94k
            os_->put((JsonbTypeUnder)JsonbType::T_Int64);
223
1.94k
            os_->write((char*)&v, sizeof(int64_t));
224
1.94k
            kvState_ = WS_Value;
225
1.94k
            return sizeof(JsonbInt64Val);
226
1.94k
        }
227
228
0
        return 0;
229
1.94k
    }
230
231
4.15k
    uint32_t writeInt128(int128_t v) {
232
4.15k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
233
4.15k
            if (!writeFirstHeader()) return 0;
234
4.15k
            os_->put((JsonbTypeUnder)JsonbType::T_Int128);
235
4.15k
            os_->write((char*)&v, sizeof(int128_t));
236
4.15k
            kvState_ = WS_Value;
237
4.15k
            return sizeof(JsonbInt128Val);
238
4.15k
        }
239
240
0
        return 0;
241
4.15k
    }
242
243
456
    uint32_t writeDouble(double v) {
244
456
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
245
456
            if (!writeFirstHeader()) return 0;
246
456
            os_->put((JsonbTypeUnder)JsonbType::T_Double);
247
456
            os_->write((char*)&v, sizeof(double));
248
456
            kvState_ = WS_Value;
249
456
            return sizeof(JsonbDoubleVal);
250
456
        }
251
252
0
        return 0;
253
456
    }
254
255
14
    uint32_t writeFloat(float v) {
256
14
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
257
14
            if (!writeFirstHeader()) return 0;
258
14
            os_->put((JsonbTypeUnder)JsonbType::T_Float);
259
14
            os_->write((char*)&v, sizeof(float));
260
14
            kvState_ = WS_Value;
261
14
            return sizeof(JsonbFloatVal);
262
14
        }
263
264
0
        return 0;
265
14
    }
266
267
    // must call writeStartString before writing a string val
268
716
    bool writeStartString() {
269
716
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
270
716
            if (!writeFirstHeader()) return 0;
271
716
            os_->put((JsonbTypeUnder)JsonbType::T_String);
272
716
            str_pos_ = os_->tellp();
273
274
            // fill the size bytes with 0 for now
275
716
            uint32_t size = 0;
276
716
            os_->write((char*)&size, sizeof(uint32_t));
277
278
716
            kvState_ = WS_String;
279
716
            return true;
280
716
        }
281
282
0
        return false;
283
716
    }
284
285
    // finish writing a string val
286
716
    bool writeEndString() {
287
716
        if (kvState_ == WS_String) {
288
716
            std::streampos cur_pos = os_->tellp();
289
716
            int32_t size = (int32_t)(cur_pos - str_pos_ - sizeof(uint32_t));
290
716
            assert(size >= 0);
291
292
0
            os_->seekp(str_pos_);
293
716
            os_->write((char*)&size, sizeof(uint32_t));
294
716
            os_->seekp(cur_pos);
295
296
716
            kvState_ = WS_Value;
297
716
            return true;
298
716
        }
299
300
0
        return false;
301
716
    }
302
303
    // TODO: here changed length to uint64_t, as some api also need changed, But the thirdparty api is uint_32t
304
    // need consider a better way to handle case.
305
716
    uint64_t writeString(const char* str, uint64_t len) {
306
716
        if (kvState_ == WS_String) {
307
716
            os_->write(str, len);
308
716
            return len;
309
716
        }
310
311
0
        return 0;
312
716
    }
313
314
2
    uint32_t writeString(const std::string& str) { return writeString(str.c_str(), str.size()); }
315
    uint32_t writeString(char ch) {
316
        if (kvState_ == WS_String) {
317
            os_->put(ch);
318
            return 1;
319
        }
320
321
        return 0;
322
    }
323
324
    // must call writeStartBinary before writing a binary val
325
4.35k
    bool writeStartBinary() {
326
4.35k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
327
4.35k
            if (!writeFirstHeader()) return 0;
328
4.35k
            os_->put((JsonbTypeUnder)JsonbType::T_Binary);
329
4.35k
            str_pos_ = os_->tellp();
330
331
            // fill the size bytes with 0 for now
332
4.35k
            uint32_t size = 0;
333
4.35k
            os_->write((char*)&size, sizeof(uint32_t));
334
335
4.35k
            kvState_ = WS_Binary;
336
4.35k
            return true;
337
4.35k
        }
338
339
0
        return false;
340
4.35k
    }
341
342
    // finish writing a binary val
343
4.35k
    bool writeEndBinary() {
344
4.35k
        if (kvState_ == WS_Binary) {
345
4.35k
            std::streampos cur_pos = os_->tellp();
346
4.35k
            int32_t size = (int32_t)(cur_pos - str_pos_ - sizeof(uint32_t));
347
4.35k
            assert(size >= 0);
348
349
0
            os_->seekp(str_pos_);
350
4.35k
            os_->write((char*)&size, sizeof(uint32_t));
351
4.35k
            os_->seekp(cur_pos);
352
353
4.35k
            kvState_ = WS_Value;
354
4.35k
            return true;
355
4.35k
        }
356
357
0
        return false;
358
4.35k
    }
359
360
4.35k
    uint64_t writeBinary(const char* bin, uint64_t len) {
361
4.35k
        if (kvState_ == WS_Binary) {
362
4.35k
            os_->write(bin, len);
363
4.35k
            return len;
364
4.35k
        }
365
366
0
        return 0;
367
4.35k
    }
368
369
    // must call writeStartObject before writing an object val
370
1.49k
    bool writeStartObject() {
371
1.49k
        if (stack_.empty() || verifyValueState()) {
372
1.49k
            if (stack_.empty()) {
373
                // if this is a new JSONB, write the header
374
1.36k
                if (!hasHdr_) {
375
1.36k
                    writeHeader();
376
1.36k
                } else
377
0
                    return false;
378
1.36k
            }
379
380
            // check if the object exceeds the maximum nesting level
381
1.49k
            if (stack_.size() >= MaxNestingLevel) return false;
382
383
1.49k
            os_->put((JsonbTypeUnder)JsonbType::T_Object);
384
            // save the size position
385
1.49k
            stack_.push(WriteInfo({WS_Object, os_->tellp()}));
386
387
            // fill the size bytes with 0 for now
388
1.49k
            uint32_t size = 0;
389
1.49k
            os_->write((char*)&size, sizeof(uint32_t));
390
391
1.49k
            kvState_ = WS_Value;
392
1.49k
            return true;
393
1.49k
        }
394
395
0
        return false;
396
1.49k
    }
397
398
    // finish writing an object val
399
1.47k
    bool writeEndObject() {
400
1.47k
        if (!stack_.empty() && stack_.top().state == WS_Object && kvState_ == WS_Value) {
401
1.47k
            WriteInfo& ci = stack_.top();
402
1.47k
            std::streampos cur_pos = os_->tellp();
403
1.47k
            int32_t size = (int32_t)(cur_pos - ci.sz_pos - sizeof(uint32_t));
404
1.47k
            assert(size >= 0);
405
406
0
            os_->seekp(ci.sz_pos);
407
1.47k
            os_->write((char*)&size, sizeof(uint32_t));
408
1.47k
            os_->seekp(cur_pos);
409
1.47k
            stack_.pop();
410
411
1.47k
            return true;
412
1.47k
        }
413
414
0
        return false;
415
1.47k
    }
416
417
    // must call writeStartArray before writing an array val
418
506
    bool writeStartArray() {
419
506
        if (stack_.empty() || verifyValueState()) {
420
506
            if (stack_.empty()) {
421
                // if this is a new JSONB, write the header
422
489
                if (!hasHdr_) {
423
489
                    writeHeader();
424
489
                } else
425
0
                    return false;
426
489
            }
427
428
            // check if the array exceeds the maximum nesting level
429
506
            if (stack_.size() >= MaxNestingLevel) return false;
430
431
506
            os_->put((JsonbTypeUnder)JsonbType::T_Array);
432
            // save the size position
433
506
            stack_.push(WriteInfo({WS_Array, os_->tellp()}));
434
435
            // fill the size bytes with 0 for now
436
506
            uint32_t size = 0;
437
506
            os_->write((char*)&size, sizeof(uint32_t));
438
439
506
            kvState_ = WS_Value;
440
506
            return true;
441
506
        }
442
443
0
        return false;
444
506
    }
445
446
    // finish writing an array val
447
494
    bool writeEndArray() {
448
494
        if (!stack_.empty() && stack_.top().state == WS_Array && kvState_ == WS_Value) {
449
494
            WriteInfo& ci = stack_.top();
450
494
            std::streampos cur_pos = os_->tellp();
451
494
            int32_t size = (int32_t)(cur_pos - ci.sz_pos - sizeof(uint32_t));
452
494
            assert(size >= 0);
453
454
0
            os_->seekp(ci.sz_pos);
455
494
            os_->write((char*)&size, sizeof(uint32_t));
456
494
            os_->seekp(cur_pos);
457
494
            stack_.pop();
458
459
494
            return true;
460
494
        }
461
462
0
        return false;
463
494
    }
464
465
5.20k
    OS_TYPE* getOutput() { return os_; }
466
    JsonbDocument* getDocument() {
467
        return JsonbDocument::checkAndCreateDocument(getOutput()->getBuffer(),
468
                                                     getOutput()->getSize());
469
    }
470
471
    JsonbValue* getValue() {
472
        return JsonbDocument::createValue(getOutput()->getBuffer(), getOutput()->getSize());
473
    }
474
475
    bool writeEnd() {
476
        while (!stack_.empty()) {
477
            bool ok = false;
478
            switch (stack_.top().state) {
479
            case WS_Array:
480
                ok = writeEndArray();
481
                break;
482
            case WS_Object:
483
                ok = writeEndObject();
484
                break;
485
            case WS_String:
486
                ok = writeEndString();
487
                break;
488
            case WS_Binary:
489
                ok = writeEndBinary();
490
                break;
491
            default:
492
                ok = false;
493
                break;
494
            }
495
            if (ok == false) return false;
496
        }
497
        return true;
498
    }
499
500
private:
501
    // verify we are in the right state before writing a value
502
18.8k
    bool verifyValueState() {
503
18.8k
        assert(!stack_.empty());
504
        // The document can only be an Object or an Array which follows
505
        // the standard.
506
18.8k
        return (stack_.top().state == WS_Object && kvState_ == WS_Key) ||
507
18.8k
               (stack_.top().state == WS_Array && kvState_ == WS_Value);
508
18.8k
    }
509
510
    // verify we are in the right state before writing a key
511
18.6k
    bool verifyKeyState() {
512
18.6k
        assert(!stack_.empty());
513
18.6k
        return stack_.top().state == WS_Object && kvState_ == WS_Value;
514
18.6k
    }
515
516
2.63k
    void writeHeader() {
517
2.63k
        os_->put(JSONB_VER);
518
2.63k
        hasHdr_ = true;
519
2.63k
    }
520
521
private:
522
    enum WriteState {
523
        WS_NONE,
524
        WS_Array,
525
        WS_Object,
526
        WS_Key,
527
        WS_Value,
528
        WS_String,
529
        WS_Binary,
530
    };
531
532
    struct WriteInfo {
533
        WriteState state;
534
        std::streampos sz_pos;
535
    };
536
537
private:
538
    OS_TYPE* os_ = nullptr;
539
    bool alloc_;
540
    bool hasHdr_;
541
    WriteState kvState_; // key or value state
542
    std::streampos str_pos_;
543
    std::stack<WriteInfo> stack_;
544
    bool first_ = true;
545
};
546
547
using JsonbWriter = JsonbWriterT<JsonbOutStream>;
548
549
} // namespace doris
550
551
#endif // JSONB_JSONBWRITER_H