Coverage Report

Created: 2026-03-12 17:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/segment/column_writer.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/segment_v2.pb.h>
21
#include <stddef.h>
22
#include <stdint.h>
23
24
#include <algorithm>
25
#include <memory> // for unique_ptr
26
#include <ostream>
27
#include <string>
28
#include <unordered_map>
29
#include <utility>
30
#include <vector>
31
32
#include "common/status.h" // for Status
33
#include "core/column/column_variant.h"
34
#include "storage/field.h" // for StorageField
35
#include "storage/index/ann/ann_index_writer.h"
36
#include "storage/index/bloom_filter/bloom_filter.h"
37
#include "storage/index/inverted/inverted_index_writer.h"
38
#include "storage/segment/common.h"
39
#include "storage/segment/options.h"
40
#include "storage/segment/variant/nested_group_provider.h"
41
#include "storage/segment/variant/variant_statistics.h"
42
#include "util/bitmap.h" // for BitmapChange
43
#include "util/slice.h"  // for OwnedSlice
44
45
namespace doris {
46
47
class BlockCompressionCodec;
48
class TabletColumn;
49
class TabletIndex;
50
struct RowsetWriterContext;
51
52
namespace io {
53
class FileWriter;
54
}
55
56
namespace segment_v2 {
57
58
struct ColumnWriterOptions {
59
    // input and output parameter:
60
    // - input: column_id/unique_id/type/length/encoding/compression/is_nullable members
61
    // - output: encoding/indexes/dict_page members
62
    ColumnMetaPB* meta = nullptr;
63
    size_t data_page_size = STORAGE_PAGE_SIZE_DEFAULT_VALUE;
64
    size_t dict_page_size = STORAGE_DICT_PAGE_SIZE_DEFAULT_VALUE;
65
    // store compressed page only when space saving is above the threshold.
66
    // space saving = 1 - compressed_size / uncompressed_size
67
    double compression_min_space_saving = 0.1;
68
    bool need_zone_map = false;
69
    bool need_bloom_filter = false;
70
    bool is_ngram_bf_index = false;
71
    bool need_inverted_index = false;
72
    bool need_ann_index = false;
73
    uint8_t gram_size;
74
    uint16_t gram_bf_size;
75
    BloomFilterOptions bf_options;
76
    std::vector<const TabletIndex*> inverted_indexes;
77
    IndexFileWriter* index_file_writer = nullptr;
78
79
    SegmentFooterPB* footer = nullptr;
80
    io::FileWriter* file_writer = nullptr;
81
    CompressionTypePB compression_type = UNKNOWN_COMPRESSION;
82
    RowsetWriterContext* rowset_ctx = nullptr;
83
    // For collect segment statistics for compaction
84
    std::vector<RowsetReaderSharedPtr> input_rs_readers;
85
    const TabletIndex* ann_index = nullptr;
86
87
    EncodingPreference encoding_preference {};
88
89
0
    std::string to_string() const {
90
0
        std::stringstream ss;
91
0
        ss << std::boolalpha << "meta=" << meta->DebugString()
92
0
           << ", data_page_size=" << data_page_size << ", dict_page_size=" << dict_page_size
93
0
           << ", compression_min_space_saving = " << compression_min_space_saving
94
0
           << ", need_zone_map=" << need_zone_map << ", need_bloom_filter" << need_bloom_filter;
95
0
        return ss.str();
96
0
    }
97
};
98
99
class EncodingInfo;
100
class NullBitmapBuilder;
101
class OrdinalIndexWriter;
102
class PageBuilder;
103
class BloomFilterIndexWriter;
104
class ZoneMapIndexWriter;
105
class VariantColumnWriterImpl;
106
class ColumnWriter;
107
108
class ColumnWriter {
109
public:
110
    static Status create(const ColumnWriterOptions& opts, const TabletColumn* column,
111
                         io::FileWriter* file_writer, std::unique_ptr<ColumnWriter>* writer);
112
    static Status create_struct_writer(const ColumnWriterOptions& opts, const TabletColumn* column,
113
                                       io::FileWriter* file_writer,
114
                                       std::unique_ptr<ColumnWriter>* writer);
115
    static Status create_array_writer(const ColumnWriterOptions& opts, const TabletColumn* column,
116
                                      io::FileWriter* file_writer,
117
                                      std::unique_ptr<ColumnWriter>* writer);
118
    static Status create_map_writer(const ColumnWriterOptions& opts, const TabletColumn* column,
119
                                    io::FileWriter* file_writer,
120
                                    std::unique_ptr<ColumnWriter>* writer);
121
122
    static Status create_variant_writer(const ColumnWriterOptions& opts, const TabletColumn* column,
123
                                        io::FileWriter* file_writer,
124
                                        std::unique_ptr<ColumnWriter>* writer);
125
126
    static Status create_agg_state_writer(const ColumnWriterOptions& opts,
127
                                          const TabletColumn* column, io::FileWriter* file_writer,
128
                                          std::unique_ptr<ColumnWriter>* writer);
129
130
    explicit ColumnWriter(std::unique_ptr<StorageField> field, bool is_nullable,
131
                          ColumnMetaPB* meta);
132
133
89.4k
    virtual ~ColumnWriter() = default;
134
135
    virtual Status init() = 0;
136
137
    template <typename CellType>
138
651k
    Status append(const CellType& cell) {
139
651k
        if (_is_nullable) {
140
651k
            uint8_t nullmap = 0;
141
651k
            BitmapChange(&nullmap, 0, cell.is_null());
142
651k
            return append_nullable(&nullmap, cell.cell_ptr(), 1);
143
651k
        } else {
144
4
            auto* cel_ptr = cell.cell_ptr();
145
4
            return append_data((const uint8_t**)&cel_ptr, 1);
146
4
        }
147
651k
    }
148
149
    // Now we only support append one by one, we should support append
150
    // multi rows in one call
151
0
    Status append(bool is_null, void* data) {
152
0
        uint8_t nullmap = 0;
153
0
        BitmapChange(&nullmap, 0, is_null);
154
0
        return append_nullable(&nullmap, data, 1);
155
0
    }
156
157
    Status append(const uint8_t* nullmap, const void* data, size_t num_rows);
158
159
    Status append_nullable(const uint8_t* nullmap, const void* data, size_t num_rows);
160
161
    // use only in vectorized load
162
    virtual Status append_nullable(const uint8_t* null_map, const uint8_t** data, size_t num_rows);
163
164
    virtual Status append_nulls(size_t num_rows) = 0;
165
166
    virtual Status finish_current_page() = 0;
167
168
    virtual uint64_t estimate_buffer_size() = 0;
169
170
    // finish append data
171
    virtual Status finish() = 0;
172
173
    // write all data into file
174
    virtual Status write_data() = 0;
175
176
    virtual Status write_ordinal_index() = 0;
177
178
    virtual Status write_zone_map() = 0;
179
180
    virtual Status write_inverted_index() = 0;
181
182
74.4k
    virtual Status write_ann_index() { return Status::OK(); }
183
184
    virtual Status write_bloom_filter_index() = 0;
185
186
    virtual ordinal_t get_next_rowid() const = 0;
187
188
    virtual uint64_t get_raw_data_bytes() const = 0;
189
    virtual uint64_t get_total_uncompressed_data_pages_bytes() const = 0;
190
    virtual uint64_t get_total_compressed_data_pages_bytes() const = 0;
191
192
    // used for append not null data.
193
    virtual Status append_data(const uint8_t** ptr, size_t num_rows) = 0;
194
195
965k
    bool is_nullable() const { return _is_nullable; }
196
197
991k
    StorageField* get_field() const { return _field.get(); }
198
199
76.2k
    ColumnMetaPB* get_column_meta() const { return _column_meta; }
200
201
protected:
202
    DataTypePtr _data_type;
203
204
private:
205
    std::unique_ptr<StorageField> _field;
206
    bool _is_nullable;
207
    ColumnMetaPB* _column_meta;
208
    std::vector<uint8_t> _null_bitmap;
209
};
210
211
class FlushPageCallback {
212
public:
213
3.16k
    virtual ~FlushPageCallback() = default;
214
0
    virtual void put_extra_info_in_page(DataPageFooterPB* footer) {}
215
};
216
217
// Encode one column's data into some memory slice.
218
// Because some columns would be stored in a file, we should wait
219
// until all columns has been finished, and then data can be written
220
// to file
221
class ScalarColumnWriter : public ColumnWriter {
222
public:
223
    ScalarColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr<StorageField> field,
224
                       io::FileWriter* file_writer);
225
226
    ~ScalarColumnWriter() override;
227
228
    Status init() override;
229
230
    Status append_nulls(size_t num_rows) override;
231
232
    Status finish_current_page() override;
233
234
    uint64_t estimate_buffer_size() override;
235
236
    // finish append data
237
    Status finish() override;
238
239
    Status write_data() override;
240
    Status write_ordinal_index() override;
241
    Status write_zone_map() override;
242
    Status write_inverted_index() override;
243
    Status write_bloom_filter_index() override;
244
8.65k
    ordinal_t get_next_rowid() const override { return _next_rowid; }
245
246
83.4k
    uint64_t get_raw_data_bytes() const override { return _raw_data_bytes; }
247
248
83.4k
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
249
83.4k
        return _total_uncompressed_data_pages_size;
250
83.4k
    }
251
252
83.4k
    uint64_t get_total_compressed_data_pages_bytes() const override {
253
83.4k
        return _total_compressed_data_pages_size;
254
83.4k
    }
255
256
3.16k
    void register_flush_page_callback(FlushPageCallback* flush_page_callback) {
257
3.16k
        _new_page_callback = flush_page_callback;
258
3.16k
    }
259
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
260
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
261
262
    // used for append not null data. When page is full, will append data not reach num_rows.
263
    Status append_data_in_current_page(const uint8_t** ptr, size_t* num_written);
264
265
777k
    Status append_data_in_current_page(const uint8_t* ptr, size_t* num_written) {
266
777k
        RETURN_IF_CATCH_EXCEPTION(
267
777k
                { return _internal_append_data_in_current_page(ptr, num_written); });
268
777k
    }
269
    friend class ArrayColumnWriter;
270
    friend class OffsetColumnWriter;
271
272
private:
273
    Status _internal_append_data_in_current_page(const uint8_t* ptr, size_t* num_written);
274
275
private:
276
    struct NullRun {
277
        bool is_null;
278
        uint32_t len;
279
    };
280
281
    std::vector<NullRun> _null_run_buffer;
282
    std::unique_ptr<PageBuilder> _page_builder;
283
284
    std::unique_ptr<NullBitmapBuilder> _null_bitmap_builder;
285
286
    ColumnWriterOptions _opts;
287
288
    const EncodingInfo* _encoding_info = nullptr;
289
290
    ordinal_t _next_rowid = 0;
291
292
    // All Pages will be organized into a linked list
293
    struct Page {
294
        // the data vector may contain:
295
        //     1. one OwnedSlice if the page body is compressed
296
        //     2. one OwnedSlice if the page body is not compressed and doesn't have nullmap
297
        //     3. two OwnedSlice if the page body is not compressed and has nullmap
298
        // use vector for easier management for lifetime of OwnedSlice
299
        std::vector<OwnedSlice> data;
300
        PageFooterPB footer;
301
    };
302
303
90.4k
    void _push_back_page(std::unique_ptr<Page> page) {
304
166k
        for (auto& data_slice : page->data) {
305
166k
            _data_size += data_slice.slice().size;
306
166k
        }
307
        // estimate (page footer + footer size + checksum) took 20 bytes
308
90.4k
        _data_size += 20;
309
        // add page to pages' tail
310
90.4k
        _pages.emplace_back(std::move(page));
311
90.4k
    }
312
313
    Status _write_data_page(Page* page);
314
315
private:
316
    io::FileWriter* _file_writer = nullptr;
317
    // total size of data page list
318
    uint64_t _data_size;
319
320
    uint64_t _raw_data_bytes {0};
321
    uint64_t _total_uncompressed_data_pages_size {0};
322
    uint64_t _total_compressed_data_pages_size {0};
323
324
    // cached generated pages,
325
    std::vector<std::unique_ptr<Page>> _pages;
326
    ordinal_t _first_rowid = 0;
327
328
    BlockCompressionCodec* _compress_codec;
329
330
    std::unique_ptr<OrdinalIndexWriter> _ordinal_index_builder;
331
    std::unique_ptr<ZoneMapIndexWriter> _zone_map_index_builder;
332
    std::vector<std::unique_ptr<IndexColumnWriter>> _inverted_index_builders;
333
    std::unique_ptr<BloomFilterIndexWriter> _bloom_filter_index_builder;
334
335
    // call before flush data page.
336
    FlushPageCallback* _new_page_callback = nullptr;
337
};
338
339
// offsetColumnWriter is used column which has offset column, like array, map.
340
//  column type is only uint64 and should response for whole column value [start, end], end will set
341
//  in footer.next_array_item_ordinal which in finish_cur_page() callback put_extra_info_in_page()
342
class OffsetColumnWriter final : public ScalarColumnWriter, FlushPageCallback {
343
public:
344
    OffsetColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr<StorageField> field,
345
                       io::FileWriter* file_writer);
346
347
    ~OffsetColumnWriter() override;
348
349
    Status init() override;
350
351
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
352
353
private:
354
    void put_extra_info_in_page(DataPageFooterPB* footer) override;
355
356
    uint64_t _next_offset;
357
};
358
359
class StructColumnWriter final : public ColumnWriter {
360
public:
361
    explicit StructColumnWriter(const ColumnWriterOptions& opts,
362
                                std::unique_ptr<StorageField> field,
363
                                ScalarColumnWriter* null_writer,
364
                                std::vector<std::unique_ptr<ColumnWriter>>& sub_column_writers);
365
188
    ~StructColumnWriter() override = default;
366
367
    Status init() override;
368
369
    Status append_nullable(const uint8_t* null_map, const uint8_t** data, size_t num_rows) override;
370
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
371
372
    uint64_t estimate_buffer_size() override;
373
374
    Status finish() override;
375
    Status write_data() override;
376
    Status write_ordinal_index() override;
377
    Status append_nulls(size_t num_rows) override;
378
379
    Status finish_current_page() override;
380
381
188
    Status write_zone_map() override {
382
188
        if (_opts.need_zone_map) {
383
0
            return Status::NotSupported("struct not support zone map");
384
0
        }
385
188
        return Status::OK();
386
188
    }
387
388
    Status write_inverted_index() override;
389
188
    Status write_bloom_filter_index() override {
390
188
        if (_opts.need_bloom_filter) {
391
0
            return Status::NotSupported("struct not support bloom filter index");
392
0
        }
393
188
        return Status::OK();
394
188
    }
395
396
188
    ordinal_t get_next_rowid() const override { return _sub_column_writers[0]->get_next_rowid(); }
397
398
188
    uint64_t get_raw_data_bytes() const override {
399
188
        return _get_total_data_pages_bytes(&ColumnWriter::get_raw_data_bytes);
400
188
    }
401
402
188
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
403
188
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_uncompressed_data_pages_bytes);
404
188
    }
405
406
188
    uint64_t get_total_compressed_data_pages_bytes() const override {
407
188
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_compressed_data_pages_bytes);
408
188
    }
409
410
private:
411
    template <typename Func>
412
564
    uint64_t _get_total_data_pages_bytes(Func func) const {
413
564
        uint64_t size = is_nullable() ? std::invoke(func, _null_writer.get()) : 0;
414
3.25k
        for (const auto& writer : _sub_column_writers) {
415
3.25k
            size += std::invoke(func, writer.get());
416
3.25k
        }
417
564
        return size;
418
564
    }
419
420
private:
421
    size_t _num_sub_column_writers;
422
    std::unique_ptr<ScalarColumnWriter> _null_writer;
423
    std::vector<std::unique_ptr<ColumnWriter>> _sub_column_writers;
424
    ColumnWriterOptions _opts;
425
};
426
427
class ArrayColumnWriter final : public ColumnWriter {
428
public:
429
    explicit ArrayColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr<StorageField> field,
430
                               OffsetColumnWriter* offset_writer, ScalarColumnWriter* null_writer,
431
                               std::unique_ptr<ColumnWriter> item_writer);
432
1.82k
    ~ArrayColumnWriter() override = default;
433
434
    Status init() override;
435
436
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
437
438
    uint64_t estimate_buffer_size() override;
439
440
    Status finish() override;
441
    Status write_data() override;
442
    Status write_ordinal_index() override;
443
    Status append_nulls(size_t num_rows) override;
444
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
445
446
    Status finish_current_page() override;
447
448
1.71k
    Status write_zone_map() override {
449
1.71k
        if (_opts.need_zone_map) {
450
0
            return Status::NotSupported("array not support zone map");
451
0
        }
452
1.71k
        return Status::OK();
453
1.71k
    }
454
455
    Status write_inverted_index() override;
456
    Status write_ann_index() override;
457
1.71k
    Status write_bloom_filter_index() override {
458
1.71k
        if (_opts.need_bloom_filter) {
459
0
            return Status::NotSupported("array not support bloom filter index");
460
0
        }
461
1.71k
        return Status::OK();
462
1.71k
    }
463
1.92k
    ordinal_t get_next_rowid() const override { return _offset_writer->get_next_rowid(); }
464
465
1.81k
    uint64_t get_raw_data_bytes() const override {
466
1.81k
        return _get_total_data_pages_bytes(&ColumnWriter::get_raw_data_bytes);
467
1.81k
    }
468
469
1.81k
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
470
1.81k
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_uncompressed_data_pages_bytes);
471
1.81k
    }
472
473
1.81k
    uint64_t get_total_compressed_data_pages_bytes() const override {
474
1.81k
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_compressed_data_pages_bytes);
475
1.81k
    }
476
477
private:
478
    template <typename Func>
479
5.45k
    uint64_t _get_total_data_pages_bytes(Func func) const {
480
5.45k
        uint64_t size = std::invoke(func, _offset_writer.get());
481
5.45k
        if (is_nullable()) {
482
4.91k
            size += std::invoke(func, _null_writer.get());
483
4.91k
        }
484
5.45k
        size += std::invoke(func, _item_writer.get());
485
5.45k
        return size;
486
5.45k
    }
487
488
private:
489
    Status write_null_column(size_t num_rows, bool is_null); // 写入num_rows个null标记
490
1.79k
    bool has_empty_items() const { return _item_writer->get_next_rowid() == 0; }
491
492
private:
493
    std::unique_ptr<OffsetColumnWriter> _offset_writer;
494
    std::unique_ptr<ScalarColumnWriter> _null_writer;
495
    std::unique_ptr<ColumnWriter> _item_writer;
496
    std::unique_ptr<IndexColumnWriter> _inverted_index_writer;
497
    std::unique_ptr<AnnIndexColumnWriter> _ann_index_writer;
498
    ColumnWriterOptions _opts;
499
};
500
501
class MapColumnWriter final : public ColumnWriter {
502
public:
503
    explicit MapColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr<StorageField> field,
504
                             ScalarColumnWriter* null_writer, OffsetColumnWriter* offsets_writer,
505
                             std::vector<std::unique_ptr<ColumnWriter>>& _kv_writers);
506
507
1.33k
    ~MapColumnWriter() override = default;
508
509
    Status init() override;
510
511
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
512
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
513
    uint64_t estimate_buffer_size() override;
514
515
    Status finish() override;
516
    Status write_data() override;
517
    Status write_ordinal_index() override;
518
    Status write_inverted_index() override;
519
    Status append_nulls(size_t num_rows) override;
520
521
    Status finish_current_page() override;
522
523
994
    Status write_zone_map() override {
524
994
        if (_opts.need_zone_map) {
525
0
            return Status::NotSupported("map not support zone map");
526
0
        }
527
994
        return Status::OK();
528
994
    }
529
530
994
    Status write_bloom_filter_index() override {
531
994
        if (_opts.need_bloom_filter) {
532
0
            return Status::NotSupported("map not support bloom filter index");
533
0
        }
534
994
        return Status::OK();
535
994
    }
536
537
    // according key writer to get next rowid
538
1.33k
    ordinal_t get_next_rowid() const override { return _offsets_writer->get_next_rowid(); }
539
540
1.00k
    uint64_t get_raw_data_bytes() const override {
541
1.00k
        return _get_total_data_pages_bytes(&ColumnWriter::get_raw_data_bytes);
542
1.00k
    }
543
544
1.00k
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
545
1.00k
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_uncompressed_data_pages_bytes);
546
1.00k
    }
547
548
1.00k
    uint64_t get_total_compressed_data_pages_bytes() const override {
549
1.00k
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_compressed_data_pages_bytes);
550
1.00k
    }
551
552
private:
553
    template <typename Func>
554
3.00k
    uint64_t _get_total_data_pages_bytes(Func func) const {
555
3.00k
        uint64_t size = std::invoke(func, _offsets_writer.get());
556
3.00k
        if (is_nullable()) {
557
3.00k
            size += std::invoke(func, _null_writer.get());
558
3.00k
        }
559
6.01k
        for (const auto& writer : _kv_writers) {
560
6.01k
            size += std::invoke(func, writer.get());
561
6.01k
        }
562
3.00k
        return size;
563
3.00k
    }
564
565
private:
566
    std::vector<std::unique_ptr<ColumnWriter>> _kv_writers;
567
    // we need null writer to make sure a row is null or not
568
    std::unique_ptr<ScalarColumnWriter> _null_writer;
569
    std::unique_ptr<OffsetColumnWriter> _offsets_writer;
570
    std::unique_ptr<IndexColumnWriter> _index_builder;
571
    ColumnWriterOptions _opts;
572
};
573
574
// used for compaction to write sub variant column
575
class VariantSubcolumnWriter : public ColumnWriter {
576
public:
577
    explicit VariantSubcolumnWriter(const ColumnWriterOptions& opts, const TabletColumn* column,
578
                                    std::unique_ptr<StorageField> field);
579
580
1
    ~VariantSubcolumnWriter() override = default;
581
582
    Status init() override;
583
584
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
585
586
    uint64_t estimate_buffer_size() override;
587
588
    Status finish() override;
589
    Status write_data() override;
590
    Status write_ordinal_index() override;
591
592
    Status write_zone_map() override;
593
594
    Status write_inverted_index() override;
595
    Status write_bloom_filter_index() override;
596
0
    ordinal_t get_next_rowid() const override { return _next_rowid; }
597
598
0
    uint64_t get_raw_data_bytes() const override {
599
0
        return 0; // TODO
600
0
    }
601
602
0
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
603
0
        return 0; // TODO
604
0
    }
605
606
0
    uint64_t get_total_compressed_data_pages_bytes() const override {
607
0
        return 0; // TODO
608
0
    }
609
610
0
    Status append_nulls(size_t num_rows) override {
611
0
        return Status::NotSupported("variant writer can not append_nulls");
612
0
    }
613
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
614
615
0
    Status finish_current_page() override {
616
0
        return Status::NotSupported("variant writer has no data, can not finish_current_page");
617
0
    }
618
619
0
    size_t get_non_null_size() const { return none_null_size; }
620
621
    Status finalize();
622
623
private:
624
    bool is_finalized() const;
625
    bool _is_finalized = false;
626
    ordinal_t _next_rowid = 0;
627
    size_t none_null_size = 0;
628
    ColumnVariant::MutablePtr _column;
629
    const TabletColumn* _tablet_column = nullptr;
630
    ColumnWriterOptions _opts;
631
    std::unique_ptr<ColumnWriter> _writer;
632
    TabletIndexes _indexes;
633
634
    std::unique_ptr<NestedGroupWriteProvider> _nested_group_provider;
635
    VariantStatistics _statistics;
636
};
637
638
class VariantColumnWriter : public ColumnWriter {
639
public:
640
    explicit VariantColumnWriter(const ColumnWriterOptions& opts, const TabletColumn* column,
641
                                 std::unique_ptr<StorageField> field);
642
643
321
    ~VariantColumnWriter() override = default;
644
645
    Status init() override;
646
647
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
648
649
    uint64_t estimate_buffer_size() override;
650
651
    Status finish() override;
652
    Status write_data() override;
653
    Status write_ordinal_index() override;
654
655
    Status write_zone_map() override;
656
657
    Status write_inverted_index() override;
658
    Status write_bloom_filter_index() override;
659
0
    ordinal_t get_next_rowid() const override { return _next_rowid; }
660
661
300
    uint64_t get_raw_data_bytes() const override {
662
300
        return 0; // TODO
663
300
    }
664
665
300
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
666
300
        return 0; // TODO
667
300
    }
668
669
300
    uint64_t get_total_compressed_data_pages_bytes() const override {
670
300
        return 0; // TODO
671
300
    }
672
673
0
    Status append_nulls(size_t num_rows) override {
674
0
        return Status::NotSupported("variant writer can not append_nulls");
675
0
    }
676
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
677
678
0
    Status finish_current_page() override {
679
0
        return Status::NotSupported("variant writer has no data, can not finish_current_page");
680
0
    }
681
682
private:
683
    std::unique_ptr<VariantColumnWriterImpl> _impl;
684
    ordinal_t _next_rowid = 0;
685
};
686
687
} // namespace segment_v2
688
} // namespace doris