Coverage Report

Created: 2026-05-21 18:55

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/segment/column_writer.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/segment_v2.pb.h>
21
#include <stddef.h>
22
#include <stdint.h>
23
24
#include <algorithm>
25
#include <memory> // for unique_ptr
26
#include <ostream>
27
#include <string>
28
#include <unordered_map>
29
#include <utility>
30
#include <vector>
31
32
#include "common/status.h" // for Status
33
#include "core/column/column_variant.h"
34
#include "storage/index/ann/ann_index_writer.h"
35
#include "storage/index/bloom_filter/bloom_filter.h"
36
#include "storage/index/inverted/inverted_index_writer.h"
37
#include "storage/segment/common.h"
38
#include "storage/segment/options.h"
39
#include "storage/segment/variant/nested_group_provider.h"
40
#include "storage/segment/variant/variant_statistics.h"
41
#include "storage/tablet/tablet_schema.h" // for TabletColumnPtr
42
#include "storage/types.h"                // for field_type_size
43
#include "util/bitmap.h"                  // for BitmapChange
44
#include "util/slice.h"                   // for OwnedSlice
45
46
namespace doris {
47
48
class BlockCompressionCodec;
49
class TabletColumn;
50
class TabletIndex;
51
struct RowsetWriterContext;
52
53
namespace io {
54
class FileWriter;
55
}
56
57
namespace segment_v2 {
58
59
struct ColumnWriterOptions {
60
    // input and output parameter:
61
    // - input: column_id/unique_id/type/length/encoding/compression/is_nullable members
62
    // - output: encoding/indexes/dict_page members
63
    ColumnMetaPB* meta = nullptr;
64
    size_t data_page_size = STORAGE_PAGE_SIZE_DEFAULT_VALUE;
65
    size_t dict_page_size = STORAGE_DICT_PAGE_SIZE_DEFAULT_VALUE;
66
    // store compressed page only when space saving is above the threshold.
67
    // space saving = 1 - compressed_size / uncompressed_size
68
    double compression_min_space_saving = 0.1;
69
    bool need_zone_map = false;
70
    bool need_bloom_filter = false;
71
    bool is_ngram_bf_index = false;
72
    bool need_inverted_index = false;
73
    bool need_ann_index = false;
74
    uint8_t gram_size;
75
    uint16_t gram_bf_size;
76
    BloomFilterOptions bf_options;
77
    std::vector<const TabletIndex*> inverted_indexes;
78
    IndexFileWriter* index_file_writer = nullptr;
79
80
    SegmentFooterPB* footer = nullptr;
81
    io::FileWriter* file_writer = nullptr;
82
    CompressionTypePB compression_type = UNKNOWN_COMPRESSION;
83
    RowsetWriterContext* rowset_ctx = nullptr;
84
    // For collect segment statistics for compaction
85
    std::vector<RowsetReaderSharedPtr> input_rs_readers;
86
    const TabletIndex* ann_index = nullptr;
87
88
    EncodingPreference encoding_preference {};
89
90
0
    std::string to_string() const {
91
0
        std::stringstream ss;
92
0
        ss << std::boolalpha << "meta=" << meta->DebugString()
93
0
           << ", data_page_size=" << data_page_size << ", dict_page_size=" << dict_page_size
94
0
           << ", compression_min_space_saving = " << compression_min_space_saving
95
0
           << ", need_zone_map=" << need_zone_map << ", need_bloom_filter" << need_bloom_filter;
96
0
        return ss.str();
97
0
    }
98
};
99
100
class EncodingInfo;
101
class NullBitmapBuilder;
102
class OrdinalIndexWriter;
103
class PageBuilder;
104
class BloomFilterIndexWriter;
105
class ZoneMapIndexWriter;
106
class VariantColumnWriterImpl;
107
class ColumnWriter;
108
109
class ColumnWriter {
110
public:
111
    static Status create(const ColumnWriterOptions& opts, const TabletColumn* column,
112
                         io::FileWriter* file_writer, std::unique_ptr<ColumnWriter>* writer);
113
    static Status create_struct_writer(const ColumnWriterOptions& opts, const TabletColumn* column,
114
                                       io::FileWriter* file_writer,
115
                                       std::unique_ptr<ColumnWriter>* writer);
116
    static Status create_array_writer(const ColumnWriterOptions& opts, const TabletColumn* column,
117
                                      io::FileWriter* file_writer,
118
                                      std::unique_ptr<ColumnWriter>* writer);
119
    static Status create_map_writer(const ColumnWriterOptions& opts, const TabletColumn* column,
120
                                    io::FileWriter* file_writer,
121
                                    std::unique_ptr<ColumnWriter>* writer);
122
123
    static Status create_variant_writer(const ColumnWriterOptions& opts, const TabletColumn* column,
124
                                        io::FileWriter* file_writer,
125
                                        std::unique_ptr<ColumnWriter>* writer);
126
127
    static Status create_agg_state_writer(const ColumnWriterOptions& opts,
128
                                          const TabletColumn* column, io::FileWriter* file_writer,
129
                                          std::unique_ptr<ColumnWriter>* writer);
130
131
    explicit ColumnWriter(TabletColumnPtr column, bool is_nullable, ColumnMetaPB* meta);
132
133
12.9k
    virtual ~ColumnWriter() = default;
134
135
    virtual Status init() = 0;
136
137
    template <typename CellType>
138
    Status append(const CellType& cell) {
139
        if (_is_nullable) {
140
            uint8_t nullmap = 0;
141
            BitmapChange(&nullmap, 0, cell.is_null());
142
            return append_nullable(&nullmap, cell.cell_ptr(), 1);
143
        } else {
144
            auto* cel_ptr = cell.cell_ptr();
145
            return append_data((const uint8_t**)&cel_ptr, 1);
146
        }
147
    }
148
149
    // Now we only support append one by one, we should support append
150
    // multi rows in one call
151
651k
    Status append(bool is_null, void* data) {
152
651k
        uint8_t nullmap = 0;
153
651k
        BitmapChange(&nullmap, 0, is_null);
154
651k
        return append_nullable(&nullmap, data, 1);
155
651k
    }
156
157
    Status append(const uint8_t* nullmap, const void* data, size_t num_rows);
158
159
    Status append_nullable(const uint8_t* nullmap, const void* data, size_t num_rows);
160
161
    // use only in vectorized load
162
    virtual Status append_nullable(const uint8_t* null_map, const uint8_t** data, size_t num_rows);
163
164
    virtual Status append_nulls(size_t num_rows) = 0;
165
166
    virtual Status finish_current_page() = 0;
167
168
    virtual uint64_t estimate_buffer_size() = 0;
169
170
    // finish append data
171
    virtual Status finish() = 0;
172
173
    // write all data into file
174
    virtual Status write_data() = 0;
175
176
    virtual Status write_ordinal_index() = 0;
177
178
    virtual Status write_zone_map() = 0;
179
180
    virtual Status write_inverted_index() = 0;
181
182
10.3k
    virtual Status write_ann_index() { return Status::OK(); }
183
184
    virtual Status write_bloom_filter_index() = 0;
185
186
    virtual ordinal_t get_next_rowid() const = 0;
187
188
    virtual uint64_t get_raw_data_bytes() const = 0;
189
    virtual uint64_t get_total_uncompressed_data_pages_bytes() const = 0;
190
    virtual uint64_t get_total_compressed_data_pages_bytes() const = 0;
191
192
    // used for append not null data.
193
    virtual Status append_data(const uint8_t** ptr, size_t num_rows) = 0;
194
195
728k
    bool is_nullable() const { return _is_nullable; }
196
197
30.1k
    const TabletColumn* get_column() const { return _column.get(); }
198
199
    // Per-row in-memory cell footprint of this writer's column, used to step
200
    // the input pointer across rows in append_*/null-run loops.
201
724k
    size_t cell_size() const { return field_type_size(_column->type()); }
202
203
10.3k
    ColumnMetaPB* get_column_meta() const { return _column_meta; }
204
205
protected:
206
    DataTypePtr _data_type;
207
208
private:
209
    TabletColumnPtr _column;
210
    bool _is_nullable;
211
    ColumnMetaPB* _column_meta;
212
    std::vector<uint8_t> _null_bitmap;
213
};
214
215
class FlushPageCallback {
216
public:
217
337
    virtual ~FlushPageCallback() = default;
218
0
    virtual void put_extra_info_in_page(DataPageFooterPB* footer) {}
219
};
220
221
// Encode one column's data into some memory slice.
222
// Because some columns would be stored in a file, we should wait
223
// until all columns has been finished, and then data can be written
224
// to file
225
class ScalarColumnWriter : public ColumnWriter {
226
public:
227
    ScalarColumnWriter(const ColumnWriterOptions& opts, TabletColumnPtr column,
228
                       io::FileWriter* file_writer);
229
230
    ~ScalarColumnWriter() override;
231
232
    Status init() override;
233
234
    Status append_nulls(size_t num_rows) override;
235
236
    Status finish_current_page() override;
237
238
    uint64_t estimate_buffer_size() override;
239
240
    // finish append data
241
    Status finish() override;
242
243
    Status write_data() override;
244
    Status write_ordinal_index() override;
245
    Status write_zone_map() override;
246
    Status write_inverted_index() override;
247
    Status write_bloom_filter_index() override;
248
988
    ordinal_t get_next_rowid() const override { return _next_rowid; }
249
250
10.0k
    uint64_t get_raw_data_bytes() const override { return _raw_data_bytes; }
251
252
10.0k
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
253
10.0k
        return _total_uncompressed_data_pages_size;
254
10.0k
    }
255
256
10.0k
    uint64_t get_total_compressed_data_pages_bytes() const override {
257
10.0k
        return _total_compressed_data_pages_size;
258
10.0k
    }
259
260
337
    void register_flush_page_callback(FlushPageCallback* flush_page_callback) {
261
337
        _new_page_callback = flush_page_callback;
262
337
    }
263
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
264
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
265
266
    // used for append not null data. When page is full, will append data not reach num_rows.
267
    Status append_data_in_current_page(const uint8_t** ptr, size_t* num_written);
268
269
696k
    Status append_data_in_current_page(const uint8_t* ptr, size_t* num_written) {
270
696k
        RETURN_IF_CATCH_EXCEPTION(
271
696k
                { return _internal_append_data_in_current_page(ptr, num_written); });
272
696k
    }
273
    friend class ArrayColumnWriter;
274
    friend class OffsetColumnWriter;
275
276
private:
277
    Status _internal_append_data_in_current_page(const uint8_t* ptr, size_t* num_written);
278
279
private:
280
    struct NullRun {
281
        bool is_null;
282
        uint32_t len;
283
    };
284
285
    std::vector<NullRun> _null_run_buffer;
286
    std::unique_ptr<PageBuilder> _page_builder;
287
288
    std::unique_ptr<NullBitmapBuilder> _null_bitmap_builder;
289
290
    ColumnWriterOptions _opts;
291
292
    const EncodingInfo* _encoding_info = nullptr;
293
294
    ordinal_t _next_rowid = 0;
295
296
    // All Pages will be organized into a linked list
297
    struct Page {
298
        // the data vector may contain:
299
        //     1. one OwnedSlice if the page body is compressed
300
        //     2. one OwnedSlice if the page body is not compressed and doesn't have nullmap
301
        //     3. two OwnedSlice if the page body is not compressed and has nullmap
302
        // use vector for easier management for lifetime of OwnedSlice
303
        std::vector<OwnedSlice> data;
304
        PageFooterPB footer;
305
    };
306
307
16.3k
    void _push_back_page(std::unique_ptr<Page> page) {
308
24.9k
        for (auto& data_slice : page->data) {
309
24.9k
            _data_size += data_slice.slice().size;
310
24.9k
        }
311
        // estimate (page footer + footer size + checksum) took 20 bytes
312
16.3k
        _data_size += 20;
313
        // add page to pages' tail
314
16.3k
        _pages.emplace_back(std::move(page));
315
16.3k
    }
316
317
    Status _write_data_page(Page* page);
318
319
private:
320
    io::FileWriter* _file_writer = nullptr;
321
    // total size of data page list
322
    uint64_t _data_size;
323
324
    uint64_t _raw_data_bytes {0};
325
    uint64_t _total_uncompressed_data_pages_size {0};
326
    uint64_t _total_compressed_data_pages_size {0};
327
328
    // cached generated pages,
329
    std::vector<std::unique_ptr<Page>> _pages;
330
    ordinal_t _first_rowid = 0;
331
332
    BlockCompressionCodec* _compress_codec;
333
334
    std::unique_ptr<OrdinalIndexWriter> _ordinal_index_builder;
335
    std::unique_ptr<ZoneMapIndexWriter> _zone_map_index_builder;
336
    std::vector<std::unique_ptr<IndexColumnWriter>> _inverted_index_builders;
337
    std::unique_ptr<BloomFilterIndexWriter> _bloom_filter_index_builder;
338
339
    // call before flush data page.
340
    FlushPageCallback* _new_page_callback = nullptr;
341
};
342
343
// offsetColumnWriter is used column which has offset column, like array, map.
344
//  column type is only uint64 and should response for whole column value [start, end], end will set
345
//  in footer.next_array_item_ordinal which in finish_cur_page() callback put_extra_info_in_page()
346
class OffsetColumnWriter final : public ScalarColumnWriter, FlushPageCallback {
347
public:
348
    OffsetColumnWriter(const ColumnWriterOptions& opts, TabletColumnPtr column,
349
                       io::FileWriter* file_writer);
350
351
    ~OffsetColumnWriter() override;
352
353
    Status init() override;
354
355
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
356
357
private:
358
    void put_extra_info_in_page(DataPageFooterPB* footer) override;
359
360
    uint64_t _next_offset;
361
};
362
363
class StructColumnWriter final : public ColumnWriter {
364
public:
365
    explicit StructColumnWriter(const ColumnWriterOptions& opts, TabletColumnPtr column,
366
                                ScalarColumnWriter* null_writer,
367
                                std::vector<std::unique_ptr<ColumnWriter>>& sub_column_writers);
368
0
    ~StructColumnWriter() override = default;
369
370
    Status init() override;
371
372
    Status append_nullable(const uint8_t* null_map, const uint8_t** data, size_t num_rows) override;
373
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
374
375
    uint64_t estimate_buffer_size() override;
376
377
    Status finish() override;
378
    Status write_data() override;
379
    Status write_ordinal_index() override;
380
    Status append_nulls(size_t num_rows) override;
381
382
    Status finish_current_page() override;
383
384
0
    Status write_zone_map() override {
385
0
        if (_opts.need_zone_map) {
386
0
            return Status::NotSupported("struct not support zone map");
387
0
        }
388
0
        return Status::OK();
389
0
    }
390
391
    Status write_inverted_index() override;
392
0
    Status write_bloom_filter_index() override {
393
0
        if (_opts.need_bloom_filter) {
394
0
            return Status::NotSupported("struct not support bloom filter index");
395
0
        }
396
0
        return Status::OK();
397
0
    }
398
399
0
    ordinal_t get_next_rowid() const override { return _sub_column_writers[0]->get_next_rowid(); }
400
401
0
    uint64_t get_raw_data_bytes() const override {
402
0
        return _get_total_data_pages_bytes(&ColumnWriter::get_raw_data_bytes);
403
0
    }
404
405
0
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
406
0
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_uncompressed_data_pages_bytes);
407
0
    }
408
409
0
    uint64_t get_total_compressed_data_pages_bytes() const override {
410
0
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_compressed_data_pages_bytes);
411
0
    }
412
413
private:
414
    template <typename Func>
415
0
    uint64_t _get_total_data_pages_bytes(Func func) const {
416
0
        uint64_t size = is_nullable() ? std::invoke(func, _null_writer.get()) : 0;
417
0
        for (const auto& writer : _sub_column_writers) {
418
0
            size += std::invoke(func, writer.get());
419
0
        }
420
0
        return size;
421
0
    }
422
423
private:
424
    size_t _num_sub_column_writers;
425
    std::unique_ptr<ScalarColumnWriter> _null_writer;
426
    std::vector<std::unique_ptr<ColumnWriter>> _sub_column_writers;
427
    ColumnWriterOptions _opts;
428
};
429
430
class ArrayColumnWriter final : public ColumnWriter {
431
public:
432
    explicit ArrayColumnWriter(const ColumnWriterOptions& opts, TabletColumnPtr column,
433
                               OffsetColumnWriter* offset_writer, ScalarColumnWriter* null_writer,
434
                               std::unique_ptr<ColumnWriter> item_writer);
435
14
    ~ArrayColumnWriter() override = default;
436
437
    Status init() override;
438
439
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
440
441
    uint64_t estimate_buffer_size() override;
442
443
    Status finish() override;
444
    Status write_data() override;
445
    Status write_ordinal_index() override;
446
    Status append_nulls(size_t num_rows) override;
447
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
448
449
    Status finish_current_page() override;
450
451
3
    Status write_zone_map() override {
452
3
        if (_opts.need_zone_map) {
453
0
            return Status::NotSupported("array not support zone map");
454
0
        }
455
3
        return Status::OK();
456
3
    }
457
458
    Status write_inverted_index() override;
459
    Status write_ann_index() override;
460
3
    Status write_bloom_filter_index() override {
461
3
        if (_opts.need_bloom_filter) {
462
0
            return Status::NotSupported("array not support bloom filter index");
463
0
        }
464
3
        return Status::OK();
465
3
    }
466
16
    ordinal_t get_next_rowid() const override { return _offset_writer->get_next_rowid(); }
467
468
3
    uint64_t get_raw_data_bytes() const override {
469
3
        return _get_total_data_pages_bytes(&ColumnWriter::get_raw_data_bytes);
470
3
    }
471
472
3
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
473
3
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_uncompressed_data_pages_bytes);
474
3
    }
475
476
3
    uint64_t get_total_compressed_data_pages_bytes() const override {
477
3
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_compressed_data_pages_bytes);
478
3
    }
479
480
private:
481
    template <typename Func>
482
9
    uint64_t _get_total_data_pages_bytes(Func func) const {
483
9
        uint64_t size = std::invoke(func, _offset_writer.get());
484
9
        if (is_nullable()) {
485
0
            size += std::invoke(func, _null_writer.get());
486
0
        }
487
9
        size += std::invoke(func, _item_writer.get());
488
9
        return size;
489
9
    }
490
491
private:
492
    Status write_null_column(size_t num_rows, bool is_null); // 写入num_rows个null标记
493
14
    bool has_empty_items() const { return _item_writer->get_next_rowid() == 0; }
494
495
private:
496
    std::unique_ptr<OffsetColumnWriter> _offset_writer;
497
    std::unique_ptr<ScalarColumnWriter> _null_writer;
498
    std::unique_ptr<ColumnWriter> _item_writer;
499
    std::unique_ptr<IndexColumnWriter> _inverted_index_writer;
500
    std::unique_ptr<AnnIndexColumnWriter> _ann_index_writer;
501
    ColumnWriterOptions _opts;
502
};
503
504
class MapColumnWriter final : public ColumnWriter {
505
public:
506
    explicit MapColumnWriter(const ColumnWriterOptions& opts, TabletColumnPtr column,
507
                             ScalarColumnWriter* null_writer, OffsetColumnWriter* offsets_writer,
508
                             std::vector<std::unique_ptr<ColumnWriter>>& _kv_writers);
509
510
323
    ~MapColumnWriter() override = default;
511
512
    Status init() override;
513
514
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
515
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
516
    uint64_t estimate_buffer_size() override;
517
518
    Status finish() override;
519
    Status write_data() override;
520
    Status write_ordinal_index() override;
521
    Status write_inverted_index() override;
522
    Status append_nulls(size_t num_rows) override;
523
524
    Status finish_current_page() override;
525
526
1
    Status write_zone_map() override {
527
1
        if (_opts.need_zone_map) {
528
0
            return Status::NotSupported("map not support zone map");
529
0
        }
530
1
        return Status::OK();
531
1
    }
532
533
1
    Status write_bloom_filter_index() override {
534
1
        if (_opts.need_bloom_filter) {
535
0
            return Status::NotSupported("map not support bloom filter index");
536
0
        }
537
1
        return Status::OK();
538
1
    }
539
540
    // according key writer to get next rowid
541
318
    ordinal_t get_next_rowid() const override { return _offsets_writer->get_next_rowid(); }
542
543
1
    uint64_t get_raw_data_bytes() const override {
544
1
        return _get_total_data_pages_bytes(&ColumnWriter::get_raw_data_bytes);
545
1
    }
546
547
1
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
548
1
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_uncompressed_data_pages_bytes);
549
1
    }
550
551
1
    uint64_t get_total_compressed_data_pages_bytes() const override {
552
1
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_compressed_data_pages_bytes);
553
1
    }
554
555
private:
556
    template <typename Func>
557
3
    uint64_t _get_total_data_pages_bytes(Func func) const {
558
3
        uint64_t size = std::invoke(func, _offsets_writer.get());
559
3
        if (is_nullable()) {
560
0
            size += std::invoke(func, _null_writer.get());
561
0
        }
562
6
        for (const auto& writer : _kv_writers) {
563
6
            size += std::invoke(func, writer.get());
564
6
        }
565
3
        return size;
566
3
    }
567
568
private:
569
    std::vector<std::unique_ptr<ColumnWriter>> _kv_writers;
570
    // we need null writer to make sure a row is null or not
571
    std::unique_ptr<ScalarColumnWriter> _null_writer;
572
    std::unique_ptr<OffsetColumnWriter> _offsets_writer;
573
    std::unique_ptr<IndexColumnWriter> _index_builder;
574
    ColumnWriterOptions _opts;
575
};
576
577
// used for compaction to write sub variant column
578
class VariantSubcolumnWriter : public ColumnWriter {
579
public:
580
    explicit VariantSubcolumnWriter(const ColumnWriterOptions& opts, TabletColumnPtr column);
581
582
1
    ~VariantSubcolumnWriter() override = default;
583
584
    Status init() override;
585
586
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
587
588
    uint64_t estimate_buffer_size() override;
589
590
    Status finish() override;
591
    Status write_data() override;
592
    Status write_ordinal_index() override;
593
594
    Status write_zone_map() override;
595
596
    Status write_inverted_index() override;
597
    Status write_bloom_filter_index() override;
598
0
    ordinal_t get_next_rowid() const override { return _next_rowid; }
599
600
0
    uint64_t get_raw_data_bytes() const override {
601
0
        return 0; // TODO
602
0
    }
603
604
0
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
605
0
        return 0; // TODO
606
0
    }
607
608
0
    uint64_t get_total_compressed_data_pages_bytes() const override {
609
0
        return 0; // TODO
610
0
    }
611
612
0
    Status append_nulls(size_t num_rows) override {
613
0
        return Status::NotSupported("variant writer can not append_nulls");
614
0
    }
615
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
616
617
0
    Status finish_current_page() override {
618
0
        return Status::NotSupported("variant writer has no data, can not finish_current_page");
619
0
    }
620
621
0
    size_t get_non_null_size() const { return none_null_size; }
622
623
    Status finalize();
624
625
private:
626
    bool is_finalized() const;
627
    bool _is_finalized = false;
628
    ordinal_t _next_rowid = 0;
629
    size_t none_null_size = 0;
630
    ColumnVariant::MutablePtr _column;
631
    ColumnWriterOptions _opts;
632
    std::unique_ptr<ColumnWriter> _writer;
633
    TabletIndexes _indexes;
634
635
    std::unique_ptr<NestedGroupWriteProvider> _nested_group_provider;
636
    VariantStatistics _statistics;
637
};
638
639
class VariantColumnWriter : public ColumnWriter {
640
public:
641
    explicit VariantColumnWriter(const ColumnWriterOptions& opts, TabletColumnPtr column);
642
643
310
    ~VariantColumnWriter() override = default;
644
645
    Status init() override;
646
647
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
648
649
    uint64_t estimate_buffer_size() override;
650
651
    Status finish() override;
652
    Status write_data() override;
653
    Status write_ordinal_index() override;
654
655
    Status write_zone_map() override;
656
657
    Status write_inverted_index() override;
658
    Status write_bloom_filter_index() override;
659
1
    ordinal_t get_next_rowid() const override { return _next_rowid; }
660
661
285
    uint64_t get_raw_data_bytes() const override {
662
285
        return 0; // TODO
663
285
    }
664
665
285
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
666
285
        return 0; // TODO
667
285
    }
668
669
285
    uint64_t get_total_compressed_data_pages_bytes() const override {
670
285
        return 0; // TODO
671
285
    }
672
673
0
    Status append_nulls(size_t num_rows) override {
674
0
        return Status::NotSupported("variant writer can not append_nulls");
675
0
    }
676
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
677
678
0
    Status finish_current_page() override {
679
0
        return Status::NotSupported("variant writer has no data, can not finish_current_page");
680
0
    }
681
682
1
    VariantColumnWriterImpl* impl_for_test() const { return _impl.get(); }
683
684
private:
685
    std::unique_ptr<VariantColumnWriterImpl> _impl;
686
    ordinal_t _next_rowid = 0;
687
};
688
689
} // namespace segment_v2
690
} // namespace doris