Coverage Report

Created: 2024-11-21 12:31

/root/doris/be/src/util/jsonb_writer.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (c) 2014, Facebook, Inc.
3
 *  All rights reserved.
4
 *
5
 *  This source code is licensed under the BSD-style license found in the
6
 *  LICENSE file in the root directory of this source tree. An additional grant
7
 *  of patent rights can be found in the PATENTS file in the same directory.
8
 *
9
 */
10
11
/*
12
 * This file defines JsonbWriterT (template) and JsonbWriter.
13
 *
14
 * JsonbWriterT is a template class which implements an JSONB serializer.
15
 * Users call various write functions of JsonbWriterT object to write values
16
 * directly to JSONB packed bytes. All write functions of value or key return
17
 * the number of bytes written to JSONB, or 0 if there is an error. To write an
18
 * object, an array, or a string, you must call writeStart[..] before writing
19
 * values or key, and call writeEnd[..] after finishing at the end.
20
 *
21
 * By default, an JsonbWriterT object creates an output stream buffer.
22
 * Alternatively, you can also pass any output stream object to a writer, as
23
 * long as the stream object implements some basic functions of std::ostream
24
 * (such as JsonbOutStream, see JsonbStream.h).
25
 *
26
 * JsonbWriter specializes JsonbWriterT with JsonbOutStream type (see
27
 * JsonbStream.h). So unless you want to provide own a different output stream
28
 * type, use JsonbParser object.
29
 *
30
 * @author Tian Xia <tianx@fb.com>
31
 * this file is copied from 
32
 * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonWriter.h
33
 * and modified by Doris
34
 */
35
36
#ifndef JSONB_JSONBWRITER_H
37
#define JSONB_JSONBWRITER_H
38
39
#include <limits>
40
#include <stack>
41
#include <string>
42
43
#include "jsonb_document.h"
44
#include "jsonb_stream.h"
45
46
namespace doris {
47
48
using int128_t = __int128;
49
50
template <class OS_TYPE>
51
class JsonbWriterT {
52
public:
53
2.05k
    JsonbWriterT() : alloc_(true), hasHdr_(false), kvState_(WS_Value), str_pos_(0) {
54
2.05k
        os_ = new OS_TYPE();
55
2.05k
    }
56
57
    explicit JsonbWriterT(OS_TYPE& os)
58
            : os_(&os), alloc_(false), hasHdr_(false), kvState_(WS_Value), str_pos_(0) {}
59
60
2.05k
    ~JsonbWriterT() {
61
2.05k
        if (alloc_) {
62
2.05k
            delete os_;
63
2.05k
        }
64
2.05k
    }
65
66
1.24k
    void reset() {
67
1.24k
        os_->clear();
68
1.24k
        os_->seekp(0);
69
1.24k
        hasHdr_ = false;
70
1.24k
        kvState_ = WS_Value;
71
1.24k
        first_ = true;
72
1.25k
        for (; !stack_.empty(); stack_.pop())
73
11
            ;
74
1.24k
    }
75
76
    uint32_t writeKey(const char* key, hDictInsert handler = nullptr) {
77
        return writeKey(key, strlen(key), handler);
78
    }
79
80
    // write a key string (or key id if an external dict is provided)
81
318
    uint32_t writeKey(const char* key, uint8_t len, hDictInsert handler = nullptr) {
82
318
        if (!stack_.empty() && verifyKeyState()) {
83
318
            int key_id = -1;
84
318
            if (handler) {
85
0
                key_id = handler(key, len);
86
0
            }
87
88
318
            uint32_t size = sizeof(uint8_t);
89
318
            if (key_id < 0) {
90
318
                os_->put(len);
91
318
                if (len == 0) {
92
                    // NOTE: we use sMaxKeyId to represent an empty key
93
0
                    JsonbKeyValue::keyid_type idx = JsonbKeyValue::sMaxKeyId;
94
0
                    os_->write((char*)&idx, sizeof(JsonbKeyValue::keyid_type));
95
0
                    size += sizeof(JsonbKeyValue::keyid_type);
96
318
                } else {
97
318
                    os_->write(key, len);
98
318
                    size += len;
99
318
                }
100
318
            } else if (key_id < JsonbKeyValue::sMaxKeyId) {
101
0
                JsonbKeyValue::keyid_type idx = key_id;
102
0
                os_->put(0);
103
0
                os_->write((char*)&idx, sizeof(JsonbKeyValue::keyid_type));
104
0
                size += sizeof(JsonbKeyValue::keyid_type);
105
0
            } else { // key id overflow
106
0
                assert(0);
107
0
                return 0;
108
0
            }
109
110
318
            kvState_ = WS_Key;
111
318
            return size;
112
318
        }
113
114
0
        return 0;
115
318
    }
116
117
35
    uint32_t writeValue(const JsonbValue* value) {
118
35
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
119
35
            if (!writeFirstHeader()) return 0;
120
35
            os_->write((char*)value, value->numPackedBytes());
121
35
            kvState_ = WS_Value;
122
35
            return value->size();
123
35
        }
124
0
        return 0;
125
35
    }
126
127
    // write a key id
128
10.2k
    uint32_t writeKey(JsonbKeyValue::keyid_type idx) {
129
10.2k
        if (!stack_.empty() && verifyKeyState()) {
130
9.23k
            os_->put(0);
131
9.23k
            os_->write((char*)&idx, sizeof(JsonbKeyValue::keyid_type));
132
9.23k
            kvState_ = WS_Key;
133
9.23k
            return sizeof(uint8_t) + sizeof(JsonbKeyValue::keyid_type);
134
9.23k
        }
135
136
1.02k
        return 0;
137
10.2k
    }
138
139
11.4k
    bool writeFirstHeader() {
140
11.4k
        if (first_ && stack_.empty()) {
141
630
            first_ = false;
142
            // if this is a new JSONB, write the header
143
630
            if (!hasHdr_) {
144
630
                writeHeader();
145
630
                return true;
146
630
            } else {
147
0
                return false;
148
0
            }
149
10.7k
        } else {
150
10.7k
            return true;
151
10.7k
        }
152
11.4k
    }
153
154
2.22k
    uint32_t writeNull() {
155
2.22k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
156
2.22k
            if (!writeFirstHeader()) return 0;
157
2.22k
            os_->put((JsonbTypeUnder)JsonbType::T_Null);
158
2.22k
            kvState_ = WS_Value;
159
2.22k
            return sizeof(JsonbValue);
160
2.22k
        }
161
162
0
        return 0;
163
2.22k
    }
164
165
348
    uint32_t writeBool(bool b) {
166
348
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
167
348
            if (!writeFirstHeader()) return 0;
168
348
            if (b) {
169
174
                os_->put((JsonbTypeUnder)JsonbType::T_True);
170
174
            } else {
171
174
                os_->put((JsonbTypeUnder)JsonbType::T_False);
172
174
            }
173
174
348
            kvState_ = WS_Value;
175
348
            return sizeof(JsonbValue);
176
348
        }
177
178
0
        return 0;
179
348
    }
180
181
    // This function is a helper. It will make use of smallest space to
182
    // write an int
183
    uint32_t writeInt(int64_t val) {
184
        if (val >= std::numeric_limits<int8_t>::min() &&
185
            val <= std::numeric_limits<int8_t>::max()) {
186
            return writeInt8((int8_t)val);
187
        } else if (val >= std::numeric_limits<int16_t>::min() &&
188
                   val <= std::numeric_limits<int16_t>::max()) {
189
            return writeInt16((int16_t)val);
190
        } else if (val >= std::numeric_limits<int32_t>::min() &&
191
                   val <= std::numeric_limits<int32_t>::max()) {
192
            return writeInt32((int32_t)val);
193
        } else {
194
            return writeInt64(val);
195
        }
196
    }
197
198
359
    uint32_t writeInt8(int8_t v) {
199
359
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
200
359
            if (!writeFirstHeader()) return 0;
201
359
            os_->put((JsonbTypeUnder)JsonbType::T_Int8);
202
359
            os_->put(v);
203
359
            kvState_ = WS_Value;
204
359
            return sizeof(JsonbInt8Val);
205
359
        }
206
207
0
        return 0;
208
359
    }
209
210
296
    uint32_t writeInt16(int16_t v) {
211
296
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
212
296
            if (!writeFirstHeader()) return 0;
213
296
            os_->put((JsonbTypeUnder)JsonbType::T_Int16);
214
296
            os_->write((char*)&v, sizeof(int16_t));
215
296
            kvState_ = WS_Value;
216
296
            return sizeof(JsonbInt16Val);
217
296
        }
218
219
0
        return 0;
220
296
    }
221
222
3.13k
    uint32_t writeInt32(int32_t v) {
223
3.13k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
224
3.13k
            if (!writeFirstHeader()) return 0;
225
3.13k
            os_->put((JsonbTypeUnder)JsonbType::T_Int32);
226
3.13k
            os_->write((char*)&v, sizeof(int32_t));
227
3.13k
            kvState_ = WS_Value;
228
3.13k
            return sizeof(JsonbInt32Val);
229
3.13k
        }
230
231
0
        return 0;
232
3.13k
    }
233
234
65
    uint32_t writeInt64(int64_t v) {
235
65
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
236
65
            if (!writeFirstHeader()) return 0;
237
65
            os_->put((JsonbTypeUnder)JsonbType::T_Int64);
238
65
            os_->write((char*)&v, sizeof(int64_t));
239
65
            kvState_ = WS_Value;
240
65
            return sizeof(JsonbInt64Val);
241
65
        }
242
243
0
        return 0;
244
65
    }
245
246
1.02k
    uint32_t writeInt128(int128_t v) {
247
1.02k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
248
1.02k
            if (!writeFirstHeader()) return 0;
249
1.02k
            os_->put((JsonbTypeUnder)JsonbType::T_Int128);
250
1.02k
            os_->write((char*)&v, sizeof(int128_t));
251
1.02k
            kvState_ = WS_Value;
252
1.02k
            return sizeof(JsonbInt128Val);
253
1.02k
        }
254
255
0
        return 0;
256
1.02k
    }
257
258
273
    uint32_t writeDouble(double v) {
259
273
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
260
273
            if (!writeFirstHeader()) return 0;
261
273
            os_->put((JsonbTypeUnder)JsonbType::T_Double);
262
273
            os_->write((char*)&v, sizeof(double));
263
273
            kvState_ = WS_Value;
264
273
            return sizeof(JsonbDoubleVal);
265
273
        }
266
267
0
        return 0;
268
273
    }
269
270
0
    uint32_t writeFloat(float v) {
271
0
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
272
0
            if (!writeFirstHeader()) return 0;
273
0
            os_->put((JsonbTypeUnder)JsonbType::T_Float);
274
0
            os_->write((char*)&v, sizeof(float));
275
0
            kvState_ = WS_Value;
276
0
            return sizeof(JsonbFloatVal);
277
0
        }
278
279
0
        return 0;
280
0
    }
281
282
    // must call writeStartString before writing a string val
283
582
    bool writeStartString() {
284
582
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
285
582
            if (!writeFirstHeader()) return 0;
286
582
            os_->put((JsonbTypeUnder)JsonbType::T_String);
287
582
            str_pos_ = os_->tellp();
288
289
            // fill the size bytes with 0 for now
290
582
            uint32_t size = 0;
291
582
            os_->write((char*)&size, sizeof(uint32_t));
292
293
582
            kvState_ = WS_String;
294
582
            return true;
295
582
        }
296
297
0
        return false;
298
582
    }
299
300
    // finish writing a string val
301
582
    bool writeEndString() {
302
582
        if (kvState_ == WS_String) {
303
582
            std::streampos cur_pos = os_->tellp();
304
582
            int32_t size = (int32_t)(cur_pos - str_pos_ - sizeof(uint32_t));
305
582
            assert(size >= 0);
306
307
0
            os_->seekp(str_pos_);
308
582
            os_->write((char*)&size, sizeof(uint32_t));
309
582
            os_->seekp(cur_pos);
310
311
582
            kvState_ = WS_Value;
312
582
            return true;
313
582
        }
314
315
0
        return false;
316
582
    }
317
318
    // TODO: here changed length to uint64_t, as some api also need changed, But the thirdparty api is uint_32t
319
    // need consider a better way to handle case.
320
582
    uint64_t writeString(const char* str, uint64_t len) {
321
582
        if (kvState_ == WS_String) {
322
582
            os_->write(str, len);
323
582
            return len;
324
582
        }
325
326
0
        return 0;
327
582
    }
328
329
0
    uint32_t writeString(const std::string& str) { return writeString(str.c_str(), str.size()); }
330
    uint32_t writeString(char ch) {
331
        if (kvState_ == WS_String) {
332
            os_->put(ch);
333
            return 1;
334
        }
335
336
        return 0;
337
    }
338
339
    // must call writeStartBinary before writing a binary val
340
3.08k
    bool writeStartBinary() {
341
3.08k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
342
3.08k
            if (!writeFirstHeader()) return 0;
343
3.08k
            os_->put((JsonbTypeUnder)JsonbType::T_Binary);
344
3.08k
            str_pos_ = os_->tellp();
345
346
            // fill the size bytes with 0 for now
347
3.08k
            uint32_t size = 0;
348
3.08k
            os_->write((char*)&size, sizeof(uint32_t));
349
350
3.08k
            kvState_ = WS_Binary;
351
3.08k
            return true;
352
3.08k
        }
353
354
0
        return false;
355
3.08k
    }
356
357
    // finish writing a binary val
358
3.08k
    bool writeEndBinary() {
359
3.08k
        if (kvState_ == WS_Binary) {
360
3.08k
            std::streampos cur_pos = os_->tellp();
361
3.08k
            int32_t size = (int32_t)(cur_pos - str_pos_ - sizeof(uint32_t));
362
3.08k
            assert(size >= 0);
363
364
0
            os_->seekp(str_pos_);
365
3.08k
            os_->write((char*)&size, sizeof(uint32_t));
366
3.08k
            os_->seekp(cur_pos);
367
368
3.08k
            kvState_ = WS_Value;
369
3.08k
            return true;
370
3.08k
        }
371
372
0
        return false;
373
3.08k
    }
374
375
3.08k
    uint64_t writeBinary(const char* bin, uint64_t len) {
376
3.08k
        if (kvState_ == WS_Binary) {
377
3.08k
            os_->write(bin, len);
378
3.08k
            return len;
379
3.08k
        }
380
381
0
        return 0;
382
3.08k
    }
383
384
    // must call writeStartObject before writing an object val
385
1.27k
    bool writeStartObject() {
386
1.27k
        if (stack_.empty() || verifyValueState()) {
387
1.27k
            if (stack_.empty()) {
388
                // if this is a new JSONB, write the header
389
1.17k
                if (!hasHdr_) {
390
1.17k
                    writeHeader();
391
1.17k
                } else
392
0
                    return false;
393
1.17k
            }
394
395
            // check if the object exceeds the maximum nesting level
396
1.27k
            if (stack_.size() >= MaxNestingLevel) return false;
397
398
1.27k
            os_->put((JsonbTypeUnder)JsonbType::T_Object);
399
            // save the size position
400
1.27k
            stack_.push(WriteInfo({WS_Object, os_->tellp()}));
401
402
            // fill the size bytes with 0 for now
403
1.27k
            uint32_t size = 0;
404
1.27k
            os_->write((char*)&size, sizeof(uint32_t));
405
406
1.27k
            kvState_ = WS_Value;
407
1.27k
            return true;
408
1.27k
        }
409
410
0
        return false;
411
1.27k
    }
412
413
    // finish writing an object val
414
1.25k
    bool writeEndObject() {
415
1.25k
        if (!stack_.empty() && stack_.top().state == WS_Object && kvState_ == WS_Value) {
416
1.25k
            WriteInfo& ci = stack_.top();
417
1.25k
            std::streampos cur_pos = os_->tellp();
418
1.25k
            int32_t size = (int32_t)(cur_pos - ci.sz_pos - sizeof(uint32_t));
419
1.25k
            assert(size >= 0);
420
421
0
            os_->seekp(ci.sz_pos);
422
1.25k
            os_->write((char*)&size, sizeof(uint32_t));
423
1.25k
            os_->seekp(cur_pos);
424
1.25k
            stack_.pop();
425
426
1.25k
            return true;
427
1.25k
        }
428
429
0
        return false;
430
1.25k
    }
431
432
    // must call writeStartArray before writing an array val
433
472
    bool writeStartArray() {
434
472
        if (stack_.empty() || verifyValueState()) {
435
472
            if (stack_.empty()) {
436
                // if this is a new JSONB, write the header
437
472
                if (!hasHdr_) {
438
472
                    writeHeader();
439
472
                } else
440
0
                    return false;
441
472
            }
442
443
            // check if the array exceeds the maximum nesting level
444
472
            if (stack_.size() >= MaxNestingLevel) return false;
445
446
472
            os_->put((JsonbTypeUnder)JsonbType::T_Array);
447
            // save the size position
448
472
            stack_.push(WriteInfo({WS_Array, os_->tellp()}));
449
450
            // fill the size bytes with 0 for now
451
472
            uint32_t size = 0;
452
472
            os_->write((char*)&size, sizeof(uint32_t));
453
454
472
            kvState_ = WS_Value;
455
472
            return true;
456
472
        }
457
458
0
        return false;
459
472
    }
460
461
    // finish writing an array val
462
461
    bool writeEndArray() {
463
461
        if (!stack_.empty() && stack_.top().state == WS_Array && kvState_ == WS_Value) {
464
461
            WriteInfo& ci = stack_.top();
465
461
            std::streampos cur_pos = os_->tellp();
466
461
            int32_t size = (int32_t)(cur_pos - ci.sz_pos - sizeof(uint32_t));
467
461
            assert(size >= 0);
468
469
0
            os_->seekp(ci.sz_pos);
470
461
            os_->write((char*)&size, sizeof(uint32_t));
471
461
            os_->seekp(cur_pos);
472
461
            stack_.pop();
473
474
461
            return true;
475
461
        }
476
477
0
        return false;
478
461
    }
479
480
4.41k
    OS_TYPE* getOutput() { return os_; }
481
    JsonbDocument* getDocument() {
482
        return JsonbDocument::createDocument(getOutput()->getBuffer(), getOutput()->getSize());
483
    }
484
485
    JsonbValue* getValue() {
486
        return JsonbDocument::createValue(getOutput()->getBuffer(), getOutput()->getSize());
487
    }
488
489
    bool writeEnd() {
490
        while (!stack_.empty()) {
491
            bool ok = false;
492
            switch (stack_.top().state) {
493
            case WS_Array:
494
                ok = writeEndArray();
495
                break;
496
            case WS_Object:
497
                ok = writeEndObject();
498
                break;
499
            case WS_String:
500
                ok = writeEndString();
501
                break;
502
            case WS_Binary:
503
                ok = writeEndBinary();
504
                break;
505
            default:
506
                ok = false;
507
                break;
508
            }
509
            if (ok == false) return false;
510
        }
511
        return true;
512
    }
513
514
private:
515
    // verify we are in the right state before writing a value
516
10.8k
    bool verifyValueState() {
517
10.8k
        assert(!stack_.empty());
518
        // The document can only be an Object or an Array which follows
519
        // the standard.
520
10.8k
        return (stack_.top().state == WS_Object && kvState_ == WS_Key) ||
521
10.8k
               (stack_.top().state == WS_Array && kvState_ == WS_Value);
522
10.8k
    }
523
524
    // verify we are in the right state before writing a key
525
10.5k
    bool verifyKeyState() {
526
10.5k
        assert(!stack_.empty());
527
10.5k
        return stack_.top().state == WS_Object && kvState_ == WS_Value;
528
10.5k
    }
529
530
2.27k
    void writeHeader() {
531
2.27k
        os_->put(JSONB_VER);
532
2.27k
        hasHdr_ = true;
533
2.27k
    }
534
535
private:
536
    enum WriteState {
537
        WS_NONE,
538
        WS_Array,
539
        WS_Object,
540
        WS_Key,
541
        WS_Value,
542
        WS_String,
543
        WS_Binary,
544
    };
545
546
    struct WriteInfo {
547
        WriteState state;
548
        std::streampos sz_pos;
549
    };
550
551
private:
552
    OS_TYPE* os_ = nullptr;
553
    bool alloc_;
554
    bool hasHdr_;
555
    WriteState kvState_; // key or value state
556
    std::streampos str_pos_;
557
    std::stack<WriteInfo> stack_;
558
    bool first_ = true;
559
};
560
561
typedef JsonbWriterT<JsonbOutStream> JsonbWriter;
562
563
} // namespace doris
564
565
#endif // JSONB_JSONBWRITER_H