Coverage Report

Created: 2024-11-20 12:30

/root/doris/be/src/util/jsonb_writer.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (c) 2014, Facebook, Inc.
3
 *  All rights reserved.
4
 *
5
 *  This source code is licensed under the BSD-style license found in the
6
 *  LICENSE file in the root directory of this source tree. An additional grant
7
 *  of patent rights can be found in the PATENTS file in the same directory.
8
 *
9
 */
10
11
/*
12
 * This file defines JsonbWriterT (template) and JsonbWriter.
13
 *
14
 * JsonbWriterT is a template class which implements an JSONB serializer.
15
 * Users call various write functions of JsonbWriterT object to write values
16
 * directly to JSONB packed bytes. All write functions of value or key return
17
 * the number of bytes written to JSONB, or 0 if there is an error. To write an
18
 * object, an array, or a string, you must call writeStart[..] before writing
19
 * values or key, and call writeEnd[..] after finishing at the end.
20
 *
21
 * By default, an JsonbWriterT object creates an output stream buffer.
22
 * Alternatively, you can also pass any output stream object to a writer, as
23
 * long as the stream object implements some basic functions of std::ostream
24
 * (such as JsonbOutStream, see JsonbStream.h).
25
 *
26
 * JsonbWriter specializes JsonbWriterT with JsonbOutStream type (see
27
 * JsonbStream.h). So unless you want to provide own a different output stream
28
 * type, use JsonbParser object.
29
 *
30
 * @author Tian Xia <tianx@fb.com>
31
 * this file is copied from 
32
 * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonWriter.h
33
 * and modified by Doris
34
 */
35
36
#ifndef JSONB_JSONBWRITER_H
37
#define JSONB_JSONBWRITER_H
38
39
#include <limits>
40
#include <stack>
41
#include <string>
42
43
#include "jsonb_document.h"
44
#include "jsonb_stream.h"
45
46
namespace doris {
47
48
using int128_t = __int128;
49
50
template <class OS_TYPE>
51
class JsonbWriterT {
52
public:
53
1.93k
    JsonbWriterT() : alloc_(true), hasHdr_(false), kvState_(WS_Value), str_pos_(0) {
54
1.93k
        os_ = new OS_TYPE();
55
1.93k
    }
56
57
    explicit JsonbWriterT(OS_TYPE& os)
58
            : os_(&os), alloc_(false), hasHdr_(false), kvState_(WS_Value), str_pos_(0) {}
59
60
1.93k
    ~JsonbWriterT() {
61
1.93k
        if (alloc_) {
62
1.93k
            delete os_;
63
1.93k
        }
64
1.93k
    }
65
66
1.13k
    void reset() {
67
1.13k
        os_->clear();
68
1.13k
        os_->seekp(0);
69
1.13k
        hasHdr_ = false;
70
1.13k
        kvState_ = WS_Value;
71
1.13k
        first_ = true;
72
1.14k
        for (; !stack_.empty(); stack_.pop())
73
11
            ;
74
1.13k
    }
75
76
    uint32_t writeKey(const char* key, hDictInsert handler = nullptr) {
77
        return writeKey(key, strlen(key), handler);
78
    }
79
80
    // write a key string (or key id if an external dict is provided)
81
290
    uint32_t writeKey(const char* key, uint8_t len, hDictInsert handler = nullptr) {
82
290
        if (!stack_.empty() && verifyKeyState()) {
83
290
            int key_id = -1;
84
290
            if (handler) {
85
0
                key_id = handler(key, len);
86
0
            }
87
88
290
            uint32_t size = sizeof(uint8_t);
89
290
            if (key_id < 0) {
90
290
                os_->put(len);
91
290
                if (len == 0) {
92
                    // NOTE: we use sMaxKeyId to represent an empty key
93
0
                    JsonbKeyValue::keyid_type idx = JsonbKeyValue::sMaxKeyId;
94
0
                    os_->write((char*)&idx, sizeof(JsonbKeyValue::keyid_type));
95
0
                    size += sizeof(JsonbKeyValue::keyid_type);
96
290
                } else {
97
290
                    os_->write(key, len);
98
290
                    size += len;
99
290
                }
100
290
            } else if (key_id < JsonbKeyValue::sMaxKeyId) {
101
0
                JsonbKeyValue::keyid_type idx = key_id;
102
0
                os_->put(0);
103
0
                os_->write((char*)&idx, sizeof(JsonbKeyValue::keyid_type));
104
0
                size += sizeof(JsonbKeyValue::keyid_type);
105
0
            } else { // key id overflow
106
0
                assert(0);
107
0
                return 0;
108
0
            }
109
110
290
            kvState_ = WS_Key;
111
290
            return size;
112
290
        }
113
114
0
        return 0;
115
290
    }
116
117
35
    uint32_t writeValue(const JsonbValue* value) {
118
35
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
119
35
            if (!writeFirstHeader()) return 0;
120
35
            os_->write((char*)value, value->numPackedBytes());
121
35
            kvState_ = WS_Value;
122
35
            return value->size();
123
35
        }
124
0
        return 0;
125
35
    }
126
127
    // write a key id
128
10.2k
    uint32_t writeKey(JsonbKeyValue::keyid_type idx) {
129
10.2k
        if (!stack_.empty() && verifyKeyState()) {
130
9.23k
            os_->put(0);
131
9.23k
            os_->write((char*)&idx, sizeof(JsonbKeyValue::keyid_type));
132
9.23k
            kvState_ = WS_Key;
133
9.23k
            return sizeof(uint8_t) + sizeof(JsonbKeyValue::keyid_type);
134
9.23k
        }
135
136
1.02k
        return 0;
137
10.2k
    }
138
139
11.2k
    bool writeFirstHeader() {
140
11.2k
        if (first_ && stack_.empty()) {
141
567
            first_ = false;
142
            // if this is a new JSONB, write the header
143
567
            if (!hasHdr_) {
144
567
                writeHeader();
145
567
                return true;
146
567
            } else {
147
0
                return false;
148
0
            }
149
10.6k
        } else {
150
10.6k
            return true;
151
10.6k
        }
152
11.2k
    }
153
154
2.20k
    uint32_t writeNull() {
155
2.20k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
156
2.20k
            if (!writeFirstHeader()) return 0;
157
2.20k
            os_->put((JsonbTypeUnder)JsonbType::T_Null);
158
2.20k
            kvState_ = WS_Value;
159
2.20k
            return sizeof(JsonbValue);
160
2.20k
        }
161
162
0
        return 0;
163
2.20k
    }
164
165
320
    uint32_t writeBool(bool b) {
166
320
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
167
320
            if (!writeFirstHeader()) return 0;
168
320
            if (b) {
169
160
                os_->put((JsonbTypeUnder)JsonbType::T_True);
170
160
            } else {
171
160
                os_->put((JsonbTypeUnder)JsonbType::T_False);
172
160
            }
173
174
320
            kvState_ = WS_Value;
175
320
            return sizeof(JsonbValue);
176
320
        }
177
178
0
        return 0;
179
320
    }
180
181
    // This function is a helper. It will make use of smallest space to
182
    // write an int
183
    uint32_t writeInt(int64_t val) {
184
        if (val >= std::numeric_limits<int8_t>::min() &&
185
            val <= std::numeric_limits<int8_t>::max()) {
186
            return writeInt8((int8_t)val);
187
        } else if (val >= std::numeric_limits<int16_t>::min() &&
188
                   val <= std::numeric_limits<int16_t>::max()) {
189
            return writeInt16((int16_t)val);
190
        } else if (val >= std::numeric_limits<int32_t>::min() &&
191
                   val <= std::numeric_limits<int32_t>::max()) {
192
            return writeInt32((int32_t)val);
193
        } else {
194
            return writeInt64(val);
195
        }
196
    }
197
198
331
    uint32_t writeInt8(int8_t v) {
199
331
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
200
331
            if (!writeFirstHeader()) return 0;
201
331
            os_->put((JsonbTypeUnder)JsonbType::T_Int8);
202
331
            os_->put(v);
203
331
            kvState_ = WS_Value;
204
331
            return sizeof(JsonbInt8Val);
205
331
        }
206
207
0
        return 0;
208
331
    }
209
210
268
    uint32_t writeInt16(int16_t v) {
211
268
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
212
268
            if (!writeFirstHeader()) return 0;
213
268
            os_->put((JsonbTypeUnder)JsonbType::T_Int16);
214
268
            os_->write((char*)&v, sizeof(int16_t));
215
268
            kvState_ = WS_Value;
216
268
            return sizeof(JsonbInt16Val);
217
268
        }
218
219
0
        return 0;
220
268
    }
221
222
3.13k
    uint32_t writeInt32(int32_t v) {
223
3.13k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
224
3.13k
            if (!writeFirstHeader()) return 0;
225
3.13k
            os_->put((JsonbTypeUnder)JsonbType::T_Int32);
226
3.13k
            os_->write((char*)&v, sizeof(int32_t));
227
3.13k
            kvState_ = WS_Value;
228
3.13k
            return sizeof(JsonbInt32Val);
229
3.13k
        }
230
231
0
        return 0;
232
3.13k
    }
233
234
58
    uint32_t writeInt64(int64_t v) {
235
58
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
236
58
            if (!writeFirstHeader()) return 0;
237
58
            os_->put((JsonbTypeUnder)JsonbType::T_Int64);
238
58
            os_->write((char*)&v, sizeof(int64_t));
239
58
            kvState_ = WS_Value;
240
58
            return sizeof(JsonbInt64Val);
241
58
        }
242
243
0
        return 0;
244
58
    }
245
246
1.02k
    uint32_t writeInt128(int128_t v) {
247
1.02k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
248
1.02k
            if (!writeFirstHeader()) return 0;
249
1.02k
            os_->put((JsonbTypeUnder)JsonbType::T_Int128);
250
1.02k
            os_->write((char*)&v, sizeof(int128_t));
251
1.02k
            kvState_ = WS_Value;
252
1.02k
            return sizeof(JsonbInt128Val);
253
1.02k
        }
254
255
0
        return 0;
256
1.02k
    }
257
258
252
    uint32_t writeDouble(double v) {
259
252
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
260
252
            if (!writeFirstHeader()) return 0;
261
252
            os_->put((JsonbTypeUnder)JsonbType::T_Double);
262
252
            os_->write((char*)&v, sizeof(double));
263
252
            kvState_ = WS_Value;
264
252
            return sizeof(JsonbDoubleVal);
265
252
        }
266
267
0
        return 0;
268
252
    }
269
270
0
    uint32_t writeFloat(float v) {
271
0
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
272
0
            if (!writeFirstHeader()) return 0;
273
0
            os_->put((JsonbTypeUnder)JsonbType::T_Float);
274
0
            os_->write((char*)&v, sizeof(float));
275
0
            kvState_ = WS_Value;
276
0
            return sizeof(JsonbFloatVal);
277
0
        }
278
279
0
        return 0;
280
0
    }
281
282
    // must call writeStartString before writing a string val
283
533
    bool writeStartString() {
284
533
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
285
533
            if (!writeFirstHeader()) return 0;
286
533
            os_->put((JsonbTypeUnder)JsonbType::T_String);
287
533
            str_pos_ = os_->tellp();
288
289
            // fill the size bytes with 0 for now
290
533
            uint32_t size = 0;
291
533
            os_->write((char*)&size, sizeof(uint32_t));
292
293
533
            kvState_ = WS_String;
294
533
            return true;
295
533
        }
296
297
0
        return false;
298
533
    }
299
300
    // finish writing a string val
301
533
    bool writeEndString() {
302
533
        if (kvState_ == WS_String) {
303
533
            std::streampos cur_pos = os_->tellp();
304
533
            int32_t size = (int32_t)(cur_pos - str_pos_ - sizeof(uint32_t));
305
533
            assert(size >= 0);
306
307
0
            os_->seekp(str_pos_);
308
533
            os_->write((char*)&size, sizeof(uint32_t));
309
533
            os_->seekp(cur_pos);
310
311
533
            kvState_ = WS_Value;
312
533
            return true;
313
533
        }
314
315
0
        return false;
316
533
    }
317
318
533
    uint32_t writeString(const char* str, uint32_t len) {
319
533
        if (kvState_ == WS_String) {
320
533
            os_->write(str, len);
321
533
            return len;
322
533
        }
323
324
0
        return 0;
325
533
    }
326
327
0
    uint32_t writeString(const std::string& str) {
328
0
        return writeString(str.c_str(), (uint32_t)str.size());
329
0
    }
330
    uint32_t writeString(char ch) {
331
        if (kvState_ == WS_String) {
332
            os_->put(ch);
333
            return 1;
334
        }
335
336
        return 0;
337
    }
338
339
    // must call writeStartBinary before writing a binary val
340
3.08k
    bool writeStartBinary() {
341
3.08k
        if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
342
3.08k
            if (!writeFirstHeader()) return 0;
343
3.08k
            os_->put((JsonbTypeUnder)JsonbType::T_Binary);
344
3.08k
            str_pos_ = os_->tellp();
345
346
            // fill the size bytes with 0 for now
347
3.08k
            uint32_t size = 0;
348
3.08k
            os_->write((char*)&size, sizeof(uint32_t));
349
350
3.08k
            kvState_ = WS_Binary;
351
3.08k
            return true;
352
3.08k
        }
353
354
0
        return false;
355
3.08k
    }
356
357
    // finish writing a binary val
358
3.08k
    bool writeEndBinary() {
359
3.08k
        if (kvState_ == WS_Binary) {
360
3.08k
            std::streampos cur_pos = os_->tellp();
361
3.08k
            int32_t size = (int32_t)(cur_pos - str_pos_ - sizeof(uint32_t));
362
3.08k
            assert(size >= 0);
363
364
0
            os_->seekp(str_pos_);
365
3.08k
            os_->write((char*)&size, sizeof(uint32_t));
366
3.08k
            os_->seekp(cur_pos);
367
368
3.08k
            kvState_ = WS_Value;
369
3.08k
            return true;
370
3.08k
        }
371
372
0
        return false;
373
3.08k
    }
374
375
3.08k
    uint32_t writeBinary(const char* bin, uint32_t len) {
376
3.08k
        if (kvState_ == WS_Binary) {
377
3.08k
            os_->write(bin, len);
378
3.08k
            return len;
379
3.08k
        }
380
381
0
        return 0;
382
3.08k
    }
383
384
    // must call writeStartObject before writing an object val
385
1.24k
    bool writeStartObject() {
386
1.24k
        if (stack_.empty() || verifyValueState()) {
387
1.24k
            if (stack_.empty()) {
388
                // if this is a new JSONB, write the header
389
1.16k
                if (!hasHdr_) {
390
1.16k
                    writeHeader();
391
1.16k
                } else
392
0
                    return false;
393
1.16k
            }
394
395
            // check if the object exceeds the maximum nesting level
396
1.24k
            if (stack_.size() >= MaxNestingLevel) return false;
397
398
1.24k
            os_->put((JsonbTypeUnder)JsonbType::T_Object);
399
            // save the size position
400
1.24k
            stack_.push(WriteInfo({WS_Object, os_->tellp()}));
401
402
            // fill the size bytes with 0 for now
403
1.24k
            uint32_t size = 0;
404
1.24k
            os_->write((char*)&size, sizeof(uint32_t));
405
406
1.24k
            kvState_ = WS_Value;
407
1.24k
            return true;
408
1.24k
        }
409
410
0
        return false;
411
1.24k
    }
412
413
    // finish writing an object val
414
1.23k
    bool writeEndObject() {
415
1.23k
        if (!stack_.empty() && stack_.top().state == WS_Object && kvState_ == WS_Value) {
416
1.23k
            WriteInfo& ci = stack_.top();
417
1.23k
            std::streampos cur_pos = os_->tellp();
418
1.23k
            int32_t size = (int32_t)(cur_pos - ci.sz_pos - sizeof(uint32_t));
419
1.23k
            assert(size >= 0);
420
421
0
            os_->seekp(ci.sz_pos);
422
1.23k
            os_->write((char*)&size, sizeof(uint32_t));
423
1.23k
            os_->seekp(cur_pos);
424
1.23k
            stack_.pop();
425
426
1.23k
            return true;
427
1.23k
        }
428
429
0
        return false;
430
1.23k
    }
431
432
    // must call writeStartArray before writing an array val
433
437
    bool writeStartArray() {
434
437
        if (stack_.empty() || verifyValueState()) {
435
437
            if (stack_.empty()) {
436
                // if this is a new JSONB, write the header
437
437
                if (!hasHdr_) {
438
437
                    writeHeader();
439
437
                } else
440
0
                    return false;
441
437
            }
442
443
            // check if the array exceeds the maximum nesting level
444
437
            if (stack_.size() >= MaxNestingLevel) return false;
445
446
437
            os_->put((JsonbTypeUnder)JsonbType::T_Array);
447
            // save the size position
448
437
            stack_.push(WriteInfo({WS_Array, os_->tellp()}));
449
450
            // fill the size bytes with 0 for now
451
437
            uint32_t size = 0;
452
437
            os_->write((char*)&size, sizeof(uint32_t));
453
454
437
            kvState_ = WS_Value;
455
437
            return true;
456
437
        }
457
458
0
        return false;
459
437
    }
460
461
    // finish writing an array val
462
426
    bool writeEndArray() {
463
426
        if (!stack_.empty() && stack_.top().state == WS_Array && kvState_ == WS_Value) {
464
426
            WriteInfo& ci = stack_.top();
465
426
            std::streampos cur_pos = os_->tellp();
466
426
            int32_t size = (int32_t)(cur_pos - ci.sz_pos - sizeof(uint32_t));
467
426
            assert(size >= 0);
468
469
0
            os_->seekp(ci.sz_pos);
470
426
            os_->write((char*)&size, sizeof(uint32_t));
471
426
            os_->seekp(cur_pos);
472
426
            stack_.pop();
473
474
426
            return true;
475
426
        }
476
477
0
        return false;
478
426
    }
479
480
4.19k
    OS_TYPE* getOutput() { return os_; }
481
    JsonbDocument* getDocument() {
482
        return JsonbDocument::createDocument(getOutput()->getBuffer(), getOutput()->getSize());
483
    }
484
485
    JsonbValue* getValue() {
486
        return JsonbDocument::createValue(getOutput()->getBuffer(),
487
                                          (uint32_t)getOutput()->getSize());
488
    }
489
490
    bool writeEnd() {
491
        while (!stack_.empty()) {
492
            bool ok = false;
493
            switch (stack_.top().state) {
494
            case WS_Array:
495
                ok = writeEndArray();
496
                break;
497
            case WS_Object:
498
                ok = writeEndObject();
499
                break;
500
            case WS_String:
501
                ok = writeEndString();
502
                break;
503
            case WS_Binary:
504
                ok = writeEndBinary();
505
                break;
506
            default:
507
                ok = false;
508
                break;
509
            }
510
            if (ok == false) return false;
511
        }
512
        return true;
513
    }
514
515
private:
516
    // verify we are in the right state before writing a value
517
10.7k
    bool verifyValueState() {
518
10.7k
        assert(!stack_.empty());
519
        // The document can only be an Object or an Array which follows
520
        // the standard.
521
10.7k
        return (stack_.top().state == WS_Object && kvState_ == WS_Key) ||
522
10.7k
               (stack_.top().state == WS_Array && kvState_ == WS_Value);
523
10.7k
    }
524
525
    // verify we are in the right state before writing a key
526
10.5k
    bool verifyKeyState() {
527
10.5k
        assert(!stack_.empty());
528
10.5k
        return stack_.top().state == WS_Object && kvState_ == WS_Value;
529
10.5k
    }
530
531
2.16k
    void writeHeader() {
532
2.16k
        os_->put(JSONB_VER);
533
2.16k
        hasHdr_ = true;
534
2.16k
    }
535
536
private:
537
    enum WriteState {
538
        WS_NONE,
539
        WS_Array,
540
        WS_Object,
541
        WS_Key,
542
        WS_Value,
543
        WS_String,
544
        WS_Binary,
545
    };
546
547
    struct WriteInfo {
548
        WriteState state;
549
        std::streampos sz_pos;
550
    };
551
552
private:
553
    OS_TYPE* os_ = nullptr;
554
    bool alloc_;
555
    bool hasHdr_;
556
    WriteState kvState_; // key or value state
557
    std::streampos str_pos_;
558
    std::stack<WriteInfo> stack_;
559
    bool first_ = true;
560
};
561
562
typedef JsonbWriterT<JsonbOutStream> JsonbWriter;
563
564
} // namespace doris
565
566
#endif // JSONB_JSONBWRITER_H