Coverage Report

Created: 2025-03-27 13:57

/root/doris/be/src/util/jsonb_writer.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (c) 2014, Facebook, Inc.
3
 *  All rights reserved.
4
 *
5
 *  This source code is licensed under the BSD-style license found in the
6
 *  LICENSE file in the root directory of this source tree. An additional grant
7
 *  of patent rights can be found in the PATENTS file in the same directory.
8
 *
9
 */
10
11
/*
12
 * This file defines JsonbWriterT (template) and JsonbWriter.
13
 *
14
 * JsonbWriterT is a template class which implements an JSONB serializer.
15
 * Users call various write functions of JsonbWriterT object to write values
16
 * directly to JSONB packed bytes. All write functions of value or key return
17
 * the number of bytes written to JSONB, or 0 if there is an error. To write an
18
 * object, an array, or a string, you must call writeStart[..] before writing
19
 * values or key, and call writeEnd[..] after finishing at the end.
20
 *
21
 * By default, an JsonbWriterT object creates an output stream buffer.
22
 * Alternatively, you can also pass any output stream object to a writer, as
23
 * long as the stream object implements some basic functions of std::ostream
24
 * (such as JsonbOutStream, see JsonbStream.h).
25
 *
26
 * JsonbWriter specializes JsonbWriterT with JsonbOutStream type (see
27
 * JsonbStream.h). So unless you want to provide own a different output stream
28
 * type, use JsonbParser object.
29
 *
30
 * @author Tian Xia <tianx@fb.com>
31
 * this file is copied from 
32
 * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonWriter.h
33
 * and modified by Doris
34
 */
35
36
#ifndef JSONB_JSONBWRITER_H
37
#define JSONB_JSONBWRITER_H
38
39
#include <limits>
40
#include <stack>
41
#include <string>
42
43
#include "jsonb_document.h"
44
#include "jsonb_stream.h"
45
46
namespace doris {
47
48
using int128_t = __int128;
49
50
template <class OS_TYPE>
51
class JsonbWriterT {
52
public:
53
2.21k
    JsonbWriterT() : alloc_(true), hasHdr_(false), kvState_(WS_Value), str_pos_(0) {
54
2.21k
        os_ = new OS_TYPE();
55
2.21k
    }
56
57
    explicit JsonbWriterT(OS_TYPE& os)
58
            : os_(&os), alloc_(false), hasHdr_(false), kvState_(WS_Value), str_pos_(0) {}
59
60
2.21k
    ~JsonbWriterT() {
61
2.21k
        if (alloc_) {
62
2.21k
            delete os_;
63
2.21k
        }
64
2.21k
    }
65
66
1.30k
    void reset() {
67
1.30k
        os_->clear();
68
1.30k
        os_->seekp(0);
69
1.30k
        hasHdr_ = false;
70
1.30k
        kvState_ = WS_Value;
71
1.30k
        first_ = true;
72
1.32k
        for (; !stack_.empty(); stack_.pop())
73
11
            ;
74
1.30k
    }
75
76
    uint32_t writeKey(const char* key, hDictInsert handler = nullptr) {
77
        return writeKey(key, strlen(key), handler);
78
    }
79
80
    // write a key string (or key id if an external dict is provided)
81
424
    uint32_t writeKey(const char* key, uint8_t len, hDictInsert handler = nullptr) {
82
424
        if (!stack_.empty() && verifyKeyState()) {
83
424
            int key_id = -1;
84
424
            if (handler) {
85
0
                key_id = handler(key, len);
86
0
            }
87
88
424
            uint32_t size = sizeof(uint8_t);
89
424
            if (key_id < 0) {
90
424
                os_->put(len);
91
424
                if (len == 0) {
92
                    // NOTE: we use sMaxKeyId to represent an empty key
93
1
                    JsonbKeyValue::keyid_type idx = JsonbKeyValue::sMaxKeyId;
94
1
                    os_->write((char*)&idx, sizeof(JsonbKeyValue::keyid_type));
95
1
                    size += sizeof(JsonbKeyValue::keyid_type);
96
423
                } else {
97
423
                    os_->write(key, len);
98
423
                    size += len;
99
423
                }
100
424
            } else if (key_id < JsonbKeyValue::sMaxKeyId) {
101
0
                JsonbKeyValue::keyid_type idx = key_id;
102
0
                os_->put(0);
103
0
                os_->write((char*)&idx, sizeof(JsonbKeyValue::keyid_type));
104
0
                size += sizeof(JsonbKeyValue::keyid_type);
105
0
            } else { // key id overflow
106
0
                assert(0);
107
0
                return 0;
108
0
            }
109
110
424
            kvState_ = WS_Key;
111
424
            return size;
112
424
        }
113
114
0
        return 0;
115
424
    }
116
117
35
    uint32_t writeValue(const JsonbValue* value) {
118
35
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
119
35
            if (!writeFirstHeader()) return 0;
120
35
            os_->write((char*)value, value->numPackedBytes());
121
35
            kvState_ = WS_Value;
122
35
            return value->size();
123
35
        }
124
0
        return 0;
125
35
    }
126
127
    // write a key id
128
18.4k
    uint32_t writeKey(JsonbKeyValue::keyid_type idx) {
129
18.4k
        if (!stack_.empty() && verifyKeyState()) {
130
17.0k
            os_->put(0);
131
17.0k
            os_->write((char*)&idx, sizeof(JsonbKeyValue::keyid_type));
132
17.0k
            kvState_ = WS_Key;
133
17.0k
            return sizeof(uint8_t) + sizeof(JsonbKeyValue::keyid_type);
134
17.0k
        }
135
136
1.42k
        return 0;
137
18.4k
    }
138
139
19.3k
    bool writeFirstHeader() {
140
19.3k
        if (first_ && stack_.empty()) {
141
632
            first_ = false;
142
            // if this is a new JSONB, write the header
143
632
            if (!hasHdr_) {
144
632
                writeHeader();
145
632
                return true;
146
632
            } else {
147
0
                return false;
148
0
            }
149
18.7k
        } else {
150
18.7k
            return true;
151
18.7k
        }
152
19.3k
    }
153
154
2.85k
    uint32_t writeNull() {
155
2.85k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
156
2.85k
            if (!writeFirstHeader()) return 0;
157
2.85k
            os_->put((JsonbTypeUnder)JsonbType::T_Null);
158
2.85k
            kvState_ = WS_Value;
159
2.85k
            return sizeof(JsonbValue);
160
2.85k
        }
161
162
0
        return 0;
163
2.85k
    }
164
165
355
    uint32_t writeBool(bool b) {
166
355
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
167
355
            if (!writeFirstHeader()) return 0;
168
355
            if (b) {
169
181
                os_->put((JsonbTypeUnder)JsonbType::T_True);
170
181
            } else {
171
174
                os_->put((JsonbTypeUnder)JsonbType::T_False);
172
174
            }
173
174
355
            kvState_ = WS_Value;
175
355
            return sizeof(JsonbValue);
176
355
        }
177
178
0
        return 0;
179
355
    }
180
181
    // This function is a helper. It will make use of smallest space to
182
    // write an int
183
    uint32_t writeInt(int64_t val) {
184
        if (val >= std::numeric_limits<int8_t>::min() &&
185
            val <= std::numeric_limits<int8_t>::max()) {
186
            return writeInt8((int8_t)val);
187
        } else if (val >= std::numeric_limits<int16_t>::min() &&
188
                   val <= std::numeric_limits<int16_t>::max()) {
189
            return writeInt16((int16_t)val);
190
        } else if (val >= std::numeric_limits<int32_t>::min() &&
191
                   val <= std::numeric_limits<int32_t>::max()) {
192
            return writeInt32((int32_t)val);
193
        } else {
194
            return writeInt64(val);
195
        }
196
    }
197
198
481
    uint32_t writeInt8(int8_t v) {
199
481
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
200
481
            if (!writeFirstHeader()) return 0;
201
481
            os_->put((JsonbTypeUnder)JsonbType::T_Int8);
202
481
            os_->put(v);
203
481
            kvState_ = WS_Value;
204
481
            return sizeof(JsonbInt8Val);
205
481
        }
206
207
0
        return 0;
208
481
    }
209
210
461
    uint32_t writeInt16(int16_t v) {
211
461
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
212
461
            if (!writeFirstHeader()) return 0;
213
461
            os_->put((JsonbTypeUnder)JsonbType::T_Int16);
214
461
            os_->write((char*)&v, sizeof(int16_t));
215
461
            kvState_ = WS_Value;
216
461
            return sizeof(JsonbInt16Val);
217
461
        }
218
219
0
        return 0;
220
461
    }
221
222
3.59k
    uint32_t writeInt32(int32_t v) {
223
3.59k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
224
3.59k
            if (!writeFirstHeader()) return 0;
225
3.59k
            os_->put((JsonbTypeUnder)JsonbType::T_Int32);
226
3.59k
            os_->write((char*)&v, sizeof(int32_t));
227
3.59k
            kvState_ = WS_Value;
228
3.59k
            return sizeof(JsonbInt32Val);
229
3.59k
        }
230
231
0
        return 0;
232
3.59k
    }
233
234
2.00k
    uint32_t writeInt64(int64_t v) {
235
2.00k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
236
2.00k
            if (!writeFirstHeader()) return 0;
237
2.00k
            os_->put((JsonbTypeUnder)JsonbType::T_Int64);
238
2.00k
            os_->write((char*)&v, sizeof(int64_t));
239
2.00k
            kvState_ = WS_Value;
240
2.00k
            return sizeof(JsonbInt64Val);
241
2.00k
        }
242
243
0
        return 0;
244
2.00k
    }
245
246
4.15k
    uint32_t writeInt128(int128_t v) {
247
4.15k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
248
4.15k
            if (!writeFirstHeader()) return 0;
249
4.15k
            os_->put((JsonbTypeUnder)JsonbType::T_Int128);
250
4.15k
            os_->write((char*)&v, sizeof(int128_t));
251
4.15k
            kvState_ = WS_Value;
252
4.15k
            return sizeof(JsonbInt128Val);
253
4.15k
        }
254
255
0
        return 0;
256
4.15k
    }
257
258
413
    uint32_t writeDouble(double v) {
259
413
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
260
413
            if (!writeFirstHeader()) return 0;
261
413
            os_->put((JsonbTypeUnder)JsonbType::T_Double);
262
413
            os_->write((char*)&v, sizeof(double));
263
413
            kvState_ = WS_Value;
264
413
            return sizeof(JsonbDoubleVal);
265
413
        }
266
267
0
        return 0;
268
413
    }
269
270
14
    uint32_t writeFloat(float v) {
271
14
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
272
14
            if (!writeFirstHeader()) return 0;
273
14
            os_->put((JsonbTypeUnder)JsonbType::T_Float);
274
14
            os_->write((char*)&v, sizeof(float));
275
14
            kvState_ = WS_Value;
276
14
            return sizeof(JsonbFloatVal);
277
14
        }
278
279
0
        return 0;
280
14
    }
281
282
    // must call writeStartString before writing a string val
283
612
    bool writeStartString() {
284
612
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
285
612
            if (!writeFirstHeader()) return 0;
286
612
            os_->put((JsonbTypeUnder)JsonbType::T_String);
287
612
            str_pos_ = os_->tellp();
288
289
            // fill the size bytes with 0 for now
290
612
            uint32_t size = 0;
291
612
            os_->write((char*)&size, sizeof(uint32_t));
292
293
612
            kvState_ = WS_String;
294
612
            return true;
295
612
        }
296
297
0
        return false;
298
612
    }
299
300
    // finish writing a string val
301
612
    bool writeEndString() {
302
612
        if (kvState_ == WS_String) {
303
612
            std::streampos cur_pos = os_->tellp();
304
612
            int32_t size = (int32_t)(cur_pos - str_pos_ - sizeof(uint32_t));
305
612
            assert(size >= 0);
306
307
0
            os_->seekp(str_pos_);
308
612
            os_->write((char*)&size, sizeof(uint32_t));
309
612
            os_->seekp(cur_pos);
310
311
612
            kvState_ = WS_Value;
312
612
            return true;
313
612
        }
314
315
0
        return false;
316
612
    }
317
318
    // TODO: here changed length to uint64_t, as some api also need changed, But the thirdparty api is uint_32t
319
    // need consider a better way to handle case.
320
612
    uint64_t writeString(const char* str, uint64_t len) {
321
612
        if (kvState_ == WS_String) {
322
612
            os_->write(str, len);
323
612
            return len;
324
612
        }
325
326
0
        return 0;
327
612
    }
328
329
2
    uint32_t writeString(const std::string& str) { return writeString(str.c_str(), str.size()); }
330
    uint32_t writeString(char ch) {
331
        if (kvState_ == WS_String) {
332
            os_->put(ch);
333
            return 1;
334
        }
335
336
        return 0;
337
    }
338
339
    // must call writeStartBinary before writing a binary val
340
4.35k
    bool writeStartBinary() {
341
4.35k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
342
4.35k
            if (!writeFirstHeader()) return 0;
343
4.35k
            os_->put((JsonbTypeUnder)JsonbType::T_Binary);
344
4.35k
            str_pos_ = os_->tellp();
345
346
            // fill the size bytes with 0 for now
347
4.35k
            uint32_t size = 0;
348
4.35k
            os_->write((char*)&size, sizeof(uint32_t));
349
350
4.35k
            kvState_ = WS_Binary;
351
4.35k
            return true;
352
4.35k
        }
353
354
0
        return false;
355
4.35k
    }
356
357
    // finish writing a binary val
358
4.35k
    bool writeEndBinary() {
359
4.35k
        if (kvState_ == WS_Binary) {
360
4.35k
            std::streampos cur_pos = os_->tellp();
361
4.35k
            int32_t size = (int32_t)(cur_pos - str_pos_ - sizeof(uint32_t));
362
4.35k
            assert(size >= 0);
363
364
0
            os_->seekp(str_pos_);
365
4.35k
            os_->write((char*)&size, sizeof(uint32_t));
366
4.35k
            os_->seekp(cur_pos);
367
368
4.35k
            kvState_ = WS_Value;
369
4.35k
            return true;
370
4.35k
        }
371
372
0
        return false;
373
4.35k
    }
374
375
4.35k
    uint64_t writeBinary(const char* bin, uint64_t len) {
376
4.35k
        if (kvState_ == WS_Binary) {
377
4.35k
            os_->write(bin, len);
378
4.35k
            return len;
379
4.35k
        }
380
381
0
        return 0;
382
4.35k
    }
383
384
    // must call writeStartObject before writing an object val
385
1.45k
    bool writeStartObject() {
386
1.45k
        if (stack_.empty() || verifyValueState()) {
387
1.45k
            if (stack_.empty()) {
388
                // if this is a new JSONB, write the header
389
1.33k
                if (!hasHdr_) {
390
1.33k
                    writeHeader();
391
1.33k
                } else
392
0
                    return false;
393
1.33k
            }
394
395
            // check if the object exceeds the maximum nesting level
396
1.45k
            if (stack_.size() >= MaxNestingLevel) return false;
397
398
1.45k
            os_->put((JsonbTypeUnder)JsonbType::T_Object);
399
            // save the size position
400
1.45k
            stack_.push(WriteInfo({WS_Object, os_->tellp()}));
401
402
            // fill the size bytes with 0 for now
403
1.45k
            uint32_t size = 0;
404
1.45k
            os_->write((char*)&size, sizeof(uint32_t));
405
406
1.45k
            kvState_ = WS_Value;
407
1.45k
            return true;
408
1.45k
        }
409
410
0
        return false;
411
1.45k
    }
412
413
    // finish writing an object val
414
1.43k
    bool writeEndObject() {
415
1.43k
        if (!stack_.empty() && stack_.top().state == WS_Object && kvState_ == WS_Value) {
416
1.43k
            WriteInfo& ci = stack_.top();
417
1.43k
            std::streampos cur_pos = os_->tellp();
418
1.43k
            int32_t size = (int32_t)(cur_pos - ci.sz_pos - sizeof(uint32_t));
419
1.43k
            assert(size >= 0);
420
421
0
            os_->seekp(ci.sz_pos);
422
1.43k
            os_->write((char*)&size, sizeof(uint32_t));
423
1.43k
            os_->seekp(cur_pos);
424
1.43k
            stack_.pop();
425
426
1.43k
            return true;
427
1.43k
        }
428
429
0
        return false;
430
1.43k
    }
431
432
    // must call writeStartArray before writing an array val
433
491
    bool writeStartArray() {
434
491
        if (stack_.empty() || verifyValueState()) {
435
491
            if (stack_.empty()) {
436
                // if this is a new JSONB, write the header
437
474
                if (!hasHdr_) {
438
474
                    writeHeader();
439
474
                } else
440
0
                    return false;
441
474
            }
442
443
            // check if the array exceeds the maximum nesting level
444
491
            if (stack_.size() >= MaxNestingLevel) return false;
445
446
491
            os_->put((JsonbTypeUnder)JsonbType::T_Array);
447
            // save the size position
448
491
            stack_.push(WriteInfo({WS_Array, os_->tellp()}));
449
450
            // fill the size bytes with 0 for now
451
491
            uint32_t size = 0;
452
491
            os_->write((char*)&size, sizeof(uint32_t));
453
454
491
            kvState_ = WS_Value;
455
491
            return true;
456
491
        }
457
458
0
        return false;
459
491
    }
460
461
    // finish writing an array val
462
479
    bool writeEndArray() {
463
479
        if (!stack_.empty() && stack_.top().state == WS_Array && kvState_ == WS_Value) {
464
479
            WriteInfo& ci = stack_.top();
465
479
            std::streampos cur_pos = os_->tellp();
466
479
            int32_t size = (int32_t)(cur_pos - ci.sz_pos - sizeof(uint32_t));
467
479
            assert(size >= 0);
468
469
0
            os_->seekp(ci.sz_pos);
470
479
            os_->write((char*)&size, sizeof(uint32_t));
471
479
            os_->seekp(cur_pos);
472
479
            stack_.pop();
473
474
479
            return true;
475
479
        }
476
477
0
        return false;
478
479
    }
479
480
4.72k
    OS_TYPE* getOutput() { return os_; }
481
    JsonbDocument* getDocument() {
482
        return JsonbDocument::checkAndCreateDocument(getOutput()->getBuffer(),
483
                                                     getOutput()->getSize());
484
    }
485
486
    JsonbValue* getValue() {
487
        return JsonbDocument::createValue(getOutput()->getBuffer(), getOutput()->getSize());
488
    }
489
490
    bool writeEnd() {
491
        while (!stack_.empty()) {
492
            bool ok = false;
493
            switch (stack_.top().state) {
494
            case WS_Array:
495
                ok = writeEndArray();
496
                break;
497
            case WS_Object:
498
                ok = writeEndObject();
499
                break;
500
            case WS_String:
501
                ok = writeEndString();
502
                break;
503
            case WS_Binary:
504
                ok = writeEndBinary();
505
                break;
506
            default:
507
                ok = false;
508
                break;
509
            }
510
            if (ok == false) return false;
511
        }
512
        return true;
513
    }
514
515
private:
516
    // verify we are in the right state before writing a value
517
18.8k
    bool verifyValueState() {
518
18.8k
        assert(!stack_.empty());
519
        // The document can only be an Object or an Array which follows
520
        // the standard.
521
18.8k
        return (stack_.top().state == WS_Object && kvState_ == WS_Key) ||
522
18.8k
               (stack_.top().state == WS_Array && kvState_ == WS_Value);
523
18.8k
    }
524
525
    // verify we are in the right state before writing a key
526
18.8k
    bool verifyKeyState() {
527
18.8k
        assert(!stack_.empty());
528
18.8k
        return stack_.top().state == WS_Object && kvState_ == WS_Value;
529
18.8k
    }
530
531
2.44k
    void writeHeader() {
532
2.44k
        os_->put(JSONB_VER);
533
2.44k
        hasHdr_ = true;
534
2.44k
    }
535
536
private:
537
    enum WriteState {
538
        WS_NONE,
539
        WS_Array,
540
        WS_Object,
541
        WS_Key,
542
        WS_Value,
543
        WS_String,
544
        WS_Binary,
545
    };
546
547
    struct WriteInfo {
548
        WriteState state;
549
        std::streampos sz_pos;
550
    };
551
552
private:
553
    OS_TYPE* os_ = nullptr;
554
    bool alloc_;
555
    bool hasHdr_;
556
    WriteState kvState_; // key or value state
557
    std::streampos str_pos_;
558
    std::stack<WriteInfo> stack_;
559
    bool first_ = true;
560
};
561
562
typedef JsonbWriterT<JsonbOutStream> JsonbWriter;
563
564
} // namespace doris
565
566
#endif // JSONB_JSONBWRITER_H