Coverage Report

Created: 2026-03-12 17:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/segment/column_writer.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/segment_v2.pb.h>
21
#include <stddef.h>
22
#include <stdint.h>
23
24
#include <algorithm>
25
#include <memory> // for unique_ptr
26
#include <ostream>
27
#include <string>
28
#include <unordered_map>
29
#include <utility>
30
#include <vector>
31
32
#include "common/status.h" // for Status
33
#include "core/column/column_variant.h"
34
#include "storage/field.h" // for StorageField
35
#include "storage/index/ann/ann_index_writer.h"
36
#include "storage/index/bloom_filter/bloom_filter.h"
37
#include "storage/index/inverted/inverted_index_writer.h"
38
#include "storage/segment/common.h"
39
#include "storage/segment/options.h"
40
#include "storage/segment/variant/nested_group_provider.h"
41
#include "storage/segment/variant/variant_statistics.h"
42
#include "util/bitmap.h" // for BitmapChange
43
#include "util/slice.h"  // for OwnedSlice
44
45
namespace doris {
46
47
class BlockCompressionCodec;
48
class TabletColumn;
49
class TabletIndex;
50
struct RowsetWriterContext;
51
52
namespace io {
53
class FileWriter;
54
}
55
56
namespace segment_v2 {
57
58
struct ColumnWriterOptions {
59
    // input and output parameter:
60
    // - input: column_id/unique_id/type/length/encoding/compression/is_nullable members
61
    // - output: encoding/indexes/dict_page members
62
    ColumnMetaPB* meta = nullptr;
63
    size_t data_page_size = STORAGE_PAGE_SIZE_DEFAULT_VALUE;
64
    size_t dict_page_size = STORAGE_DICT_PAGE_SIZE_DEFAULT_VALUE;
65
    // store compressed page only when space saving is above the threshold.
66
    // space saving = 1 - compressed_size / uncompressed_size
67
    double compression_min_space_saving = 0.1;
68
    bool need_zone_map = false;
69
    bool need_bloom_filter = false;
70
    bool is_ngram_bf_index = false;
71
    bool need_inverted_index = false;
72
    bool need_ann_index = false;
73
    uint8_t gram_size;
74
    uint16_t gram_bf_size;
75
    BloomFilterOptions bf_options;
76
    std::vector<const TabletIndex*> inverted_indexes;
77
    IndexFileWriter* index_file_writer = nullptr;
78
79
    SegmentFooterPB* footer = nullptr;
80
    io::FileWriter* file_writer = nullptr;
81
    CompressionTypePB compression_type = UNKNOWN_COMPRESSION;
82
    RowsetWriterContext* rowset_ctx = nullptr;
83
    // For collect segment statistics for compaction
84
    std::vector<RowsetReaderSharedPtr> input_rs_readers;
85
    const TabletIndex* ann_index = nullptr;
86
87
    EncodingPreference encoding_preference {};
88
89
0
    std::string to_string() const {
90
0
        std::stringstream ss;
91
0
        ss << std::boolalpha << "meta=" << meta->DebugString()
92
0
           << ", data_page_size=" << data_page_size << ", dict_page_size=" << dict_page_size
93
0
           << ", compression_min_space_saving = " << compression_min_space_saving
94
0
           << ", need_zone_map=" << need_zone_map << ", need_bloom_filter" << need_bloom_filter;
95
0
        return ss.str();
96
0
    }
97
};
98
99
class EncodingInfo;
100
class NullBitmapBuilder;
101
class OrdinalIndexWriter;
102
class PageBuilder;
103
class BloomFilterIndexWriter;
104
class ZoneMapIndexWriter;
105
class VariantColumnWriterImpl;
106
class ColumnWriter;
107
108
class ColumnWriter {
109
public:
110
    static Status create(const ColumnWriterOptions& opts, const TabletColumn* column,
111
                         io::FileWriter* file_writer, std::unique_ptr<ColumnWriter>* writer);
112
    static Status create_struct_writer(const ColumnWriterOptions& opts, const TabletColumn* column,
113
                                       io::FileWriter* file_writer,
114
                                       std::unique_ptr<ColumnWriter>* writer);
115
    static Status create_array_writer(const ColumnWriterOptions& opts, const TabletColumn* column,
116
                                      io::FileWriter* file_writer,
117
                                      std::unique_ptr<ColumnWriter>* writer);
118
    static Status create_map_writer(const ColumnWriterOptions& opts, const TabletColumn* column,
119
                                    io::FileWriter* file_writer,
120
                                    std::unique_ptr<ColumnWriter>* writer);
121
122
    static Status create_variant_writer(const ColumnWriterOptions& opts, const TabletColumn* column,
123
                                        io::FileWriter* file_writer,
124
                                        std::unique_ptr<ColumnWriter>* writer);
125
126
    static Status create_agg_state_writer(const ColumnWriterOptions& opts,
127
                                          const TabletColumn* column, io::FileWriter* file_writer,
128
                                          std::unique_ptr<ColumnWriter>* writer);
129
130
    explicit ColumnWriter(std::unique_ptr<StorageField> field, bool is_nullable,
131
                          ColumnMetaPB* meta);
132
133
1.04M
    virtual ~ColumnWriter() = default;
134
135
    virtual Status init() = 0;
136
137
    template <typename CellType>
138
651k
    Status append(const CellType& cell) {
139
651k
        if (_is_nullable) {
140
651k
            uint8_t nullmap = 0;
141
651k
            BitmapChange(&nullmap, 0, cell.is_null());
142
651k
            return append_nullable(&nullmap, cell.cell_ptr(), 1);
143
651k
        } else {
144
4
            auto* cel_ptr = cell.cell_ptr();
145
4
            return append_data((const uint8_t**)&cel_ptr, 1);
146
4
        }
147
651k
    }
148
149
    // Now we only support append one by one, we should support append
150
    // multi rows in one call
151
0
    Status append(bool is_null, void* data) {
152
0
        uint8_t nullmap = 0;
153
0
        BitmapChange(&nullmap, 0, is_null);
154
0
        return append_nullable(&nullmap, data, 1);
155
0
    }
156
157
    Status append(const uint8_t* nullmap, const void* data, size_t num_rows);
158
159
    Status append_nullable(const uint8_t* nullmap, const void* data, size_t num_rows);
160
161
    // use only in vectorized load
162
    virtual Status append_nullable(const uint8_t* null_map, const uint8_t** data, size_t num_rows);
163
164
    virtual Status append_nulls(size_t num_rows) = 0;
165
166
    virtual Status finish_current_page() = 0;
167
168
    virtual uint64_t estimate_buffer_size() = 0;
169
170
    // finish append data
171
    virtual Status finish() = 0;
172
173
    // write all data into file
174
    virtual Status write_data() = 0;
175
176
    virtual Status write_ordinal_index() = 0;
177
178
    virtual Status write_zone_map() = 0;
179
180
    virtual Status write_inverted_index() = 0;
181
182
674k
    virtual Status write_ann_index() { return Status::OK(); }
183
184
    virtual Status write_bloom_filter_index() = 0;
185
186
    virtual ordinal_t get_next_rowid() const = 0;
187
188
    virtual uint64_t get_raw_data_bytes() const = 0;
189
    virtual uint64_t get_total_uncompressed_data_pages_bytes() const = 0;
190
    virtual uint64_t get_total_compressed_data_pages_bytes() const = 0;
191
192
    // used for append not null data.
193
    virtual Status append_data(const uint8_t** ptr, size_t num_rows) = 0;
194
195
6.54M
    bool is_nullable() const { return _is_nullable; }
196
197
8.81M
    StorageField* get_field() const { return _field.get(); }
198
199
716k
    ColumnMetaPB* get_column_meta() const { return _column_meta; }
200
201
protected:
202
    DataTypePtr _data_type;
203
204
private:
205
    std::unique_ptr<StorageField> _field;
206
    bool _is_nullable;
207
    ColumnMetaPB* _column_meta;
208
    std::vector<uint8_t> _null_bitmap;
209
};
210
211
class FlushPageCallback {
212
public:
213
69.4k
    virtual ~FlushPageCallback() = default;
214
0
    virtual void put_extra_info_in_page(DataPageFooterPB* footer) {}
215
};
216
217
// Encode one column's data into some memory slice.
218
// Because some columns would be stored in a file, we should wait
219
// until all columns has been finished, and then data can be written
220
// to file
221
class ScalarColumnWriter : public ColumnWriter {
222
public:
223
    ScalarColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr<StorageField> field,
224
                       io::FileWriter* file_writer);
225
226
    ~ScalarColumnWriter() override;
227
228
    Status init() override;
229
230
    Status append_nulls(size_t num_rows) override;
231
232
    Status finish_current_page() override;
233
234
    uint64_t estimate_buffer_size() override;
235
236
    // finish append data
237
    Status finish() override;
238
239
    Status write_data() override;
240
    Status write_ordinal_index() override;
241
    Status write_zone_map() override;
242
    Status write_inverted_index() override;
243
    Status write_bloom_filter_index() override;
244
175k
    ordinal_t get_next_rowid() const override { return _next_rowid; }
245
246
818k
    uint64_t get_raw_data_bytes() const override { return _raw_data_bytes; }
247
248
818k
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
249
818k
        return _total_uncompressed_data_pages_size;
250
818k
    }
251
252
818k
    uint64_t get_total_compressed_data_pages_bytes() const override {
253
818k
        return _total_compressed_data_pages_size;
254
818k
    }
255
256
69.4k
    void register_flush_page_callback(FlushPageCallback* flush_page_callback) {
257
69.4k
        _new_page_callback = flush_page_callback;
258
69.4k
    }
259
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
260
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
261
262
    // used for append not null data. When page is full, will append data not reach num_rows.
263
    Status append_data_in_current_page(const uint8_t** ptr, size_t* num_written);
264
265
4.44M
    Status append_data_in_current_page(const uint8_t* ptr, size_t* num_written) {
266
4.44M
        RETURN_IF_CATCH_EXCEPTION(
267
4.44M
                { return _internal_append_data_in_current_page(ptr, num_written); });
268
4.44M
    }
269
    friend class ArrayColumnWriter;
270
    friend class OffsetColumnWriter;
271
272
private:
273
    Status _internal_append_data_in_current_page(const uint8_t* ptr, size_t* num_written);
274
275
private:
276
    struct NullRun {
277
        bool is_null;
278
        uint32_t len;
279
    };
280
281
    std::vector<NullRun> _null_run_buffer;
282
    std::unique_ptr<PageBuilder> _page_builder;
283
284
    std::unique_ptr<NullBitmapBuilder> _null_bitmap_builder;
285
286
    ColumnWriterOptions _opts;
287
288
    const EncodingInfo* _encoding_info = nullptr;
289
290
    ordinal_t _next_rowid = 0;
291
292
    // All Pages will be organized into a linked list
293
    struct Page {
294
        // the data vector may contain:
295
        //     1. one OwnedSlice if the page body is compressed
296
        //     2. one OwnedSlice if the page body is not compressed and doesn't have nullmap
297
        //     3. two OwnedSlice if the page body is not compressed and has nullmap
298
        // use vector for easier management for lifetime of OwnedSlice
299
        std::vector<OwnedSlice> data;
300
        PageFooterPB footer;
301
    };
302
303
1.00M
    void _push_back_page(std::unique_ptr<Page> page) {
304
1.91M
        for (auto& data_slice : page->data) {
305
1.91M
            _data_size += data_slice.slice().size;
306
1.91M
        }
307
        // estimate (page footer + footer size + checksum) took 20 bytes
308
1.00M
        _data_size += 20;
309
        // add page to pages' tail
310
1.00M
        _pages.emplace_back(std::move(page));
311
1.00M
    }
312
313
    Status _write_data_page(Page* page);
314
315
private:
316
    io::FileWriter* _file_writer = nullptr;
317
    // total size of data page list
318
    uint64_t _data_size;
319
320
    uint64_t _raw_data_bytes {0};
321
    uint64_t _total_uncompressed_data_pages_size {0};
322
    uint64_t _total_compressed_data_pages_size {0};
323
324
    // cached generated pages,
325
    std::vector<std::unique_ptr<Page>> _pages;
326
    ordinal_t _first_rowid = 0;
327
328
    BlockCompressionCodec* _compress_codec;
329
330
    std::unique_ptr<OrdinalIndexWriter> _ordinal_index_builder;
331
    std::unique_ptr<ZoneMapIndexWriter> _zone_map_index_builder;
332
    std::vector<std::unique_ptr<IndexColumnWriter>> _inverted_index_builders;
333
    std::unique_ptr<BloomFilterIndexWriter> _bloom_filter_index_builder;
334
335
    // call before flush data page.
336
    FlushPageCallback* _new_page_callback = nullptr;
337
};
338
339
// offsetColumnWriter is used column which has offset column, like array, map.
340
//  column type is only uint64 and should response for whole column value [start, end], end will set
341
//  in footer.next_array_item_ordinal which in finish_cur_page() callback put_extra_info_in_page()
342
class OffsetColumnWriter final : public ScalarColumnWriter, FlushPageCallback {
343
public:
344
    OffsetColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr<StorageField> field,
345
                       io::FileWriter* file_writer);
346
347
    ~OffsetColumnWriter() override;
348
349
    Status init() override;
350
351
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
352
353
private:
354
    void put_extra_info_in_page(DataPageFooterPB* footer) override;
355
356
    uint64_t _next_offset;
357
};
358
359
class StructColumnWriter final : public ColumnWriter {
360
public:
361
    explicit StructColumnWriter(const ColumnWriterOptions& opts,
362
                                std::unique_ptr<StorageField> field,
363
                                ScalarColumnWriter* null_writer,
364
                                std::vector<std::unique_ptr<ColumnWriter>>& sub_column_writers);
365
2.30k
    ~StructColumnWriter() override = default;
366
367
    Status init() override;
368
369
    Status append_nullable(const uint8_t* null_map, const uint8_t** data, size_t num_rows) override;
370
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
371
372
    uint64_t estimate_buffer_size() override;
373
374
    Status finish() override;
375
    Status write_data() override;
376
    Status write_ordinal_index() override;
377
    Status append_nulls(size_t num_rows) override;
378
379
    Status finish_current_page() override;
380
381
1.62k
    Status write_zone_map() override {
382
1.62k
        if (_opts.need_zone_map) {
383
0
            return Status::NotSupported("struct not support zone map");
384
0
        }
385
1.62k
        return Status::OK();
386
1.62k
    }
387
388
    Status write_inverted_index() override;
389
1.62k
    Status write_bloom_filter_index() override {
390
1.62k
        if (_opts.need_bloom_filter) {
391
0
            return Status::NotSupported("struct not support bloom filter index");
392
0
        }
393
1.62k
        return Status::OK();
394
1.62k
    }
395
396
2.69k
    ordinal_t get_next_rowid() const override { return _sub_column_writers[0]->get_next_rowid(); }
397
398
2.30k
    uint64_t get_raw_data_bytes() const override {
399
2.30k
        return _get_total_data_pages_bytes(&ColumnWriter::get_raw_data_bytes);
400
2.30k
    }
401
402
2.30k
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
403
2.30k
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_uncompressed_data_pages_bytes);
404
2.30k
    }
405
406
2.30k
    uint64_t get_total_compressed_data_pages_bytes() const override {
407
2.30k
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_compressed_data_pages_bytes);
408
2.30k
    }
409
410
private:
411
    template <typename Func>
412
6.91k
    uint64_t _get_total_data_pages_bytes(Func func) const {
413
6.91k
        uint64_t size = is_nullable() ? std::invoke(func, _null_writer.get()) : 0;
414
28.7k
        for (const auto& writer : _sub_column_writers) {
415
28.7k
            size += std::invoke(func, writer.get());
416
28.7k
        }
417
6.91k
        return size;
418
6.91k
    }
419
420
private:
421
    size_t _num_sub_column_writers;
422
    std::unique_ptr<ScalarColumnWriter> _null_writer;
423
    std::vector<std::unique_ptr<ColumnWriter>> _sub_column_writers;
424
    ColumnWriterOptions _opts;
425
};
426
427
class ArrayColumnWriter final : public ColumnWriter {
428
public:
429
    explicit ArrayColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr<StorageField> field,
430
                               OffsetColumnWriter* offset_writer, ScalarColumnWriter* null_writer,
431
                               std::unique_ptr<ColumnWriter> item_writer);
432
45.7k
    ~ArrayColumnWriter() override = default;
433
434
    Status init() override;
435
436
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
437
438
    uint64_t estimate_buffer_size() override;
439
440
    Status finish() override;
441
    Status write_data() override;
442
    Status write_ordinal_index() override;
443
    Status append_nulls(size_t num_rows) override;
444
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
445
446
    Status finish_current_page() override;
447
448
41.7k
    Status write_zone_map() override {
449
41.7k
        if (_opts.need_zone_map) {
450
0
            return Status::NotSupported("array not support zone map");
451
0
        }
452
41.7k
        return Status::OK();
453
41.7k
    }
454
455
    Status write_inverted_index() override;
456
    Status write_ann_index() override;
457
41.7k
    Status write_bloom_filter_index() override {
458
41.7k
        if (_opts.need_bloom_filter) {
459
0
            return Status::NotSupported("array not support bloom filter index");
460
0
        }
461
41.7k
        return Status::OK();
462
41.7k
    }
463
49.3k
    ordinal_t get_next_rowid() const override { return _offset_writer->get_next_rowid(); }
464
465
43.7k
    uint64_t get_raw_data_bytes() const override {
466
43.7k
        return _get_total_data_pages_bytes(&ColumnWriter::get_raw_data_bytes);
467
43.7k
    }
468
469
43.7k
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
470
43.7k
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_uncompressed_data_pages_bytes);
471
43.7k
    }
472
473
43.7k
    uint64_t get_total_compressed_data_pages_bytes() const override {
474
43.7k
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_compressed_data_pages_bytes);
475
43.7k
    }
476
477
private:
478
    template <typename Func>
479
131k
    uint64_t _get_total_data_pages_bytes(Func func) const {
480
131k
        uint64_t size = std::invoke(func, _offset_writer.get());
481
131k
        if (is_nullable()) {
482
81.3k
            size += std::invoke(func, _null_writer.get());
483
81.3k
        }
484
131k
        size += std::invoke(func, _item_writer.get());
485
131k
        return size;
486
131k
    }
487
488
private:
489
    Status write_null_column(size_t num_rows, bool is_null); // 写入num_rows个null标记
490
45.2k
    bool has_empty_items() const { return _item_writer->get_next_rowid() == 0; }
491
492
private:
493
    std::unique_ptr<OffsetColumnWriter> _offset_writer;
494
    std::unique_ptr<ScalarColumnWriter> _null_writer;
495
    std::unique_ptr<ColumnWriter> _item_writer;
496
    std::unique_ptr<IndexColumnWriter> _inverted_index_writer;
497
    std::unique_ptr<AnnIndexColumnWriter> _ann_index_writer;
498
    ColumnWriterOptions _opts;
499
};
500
501
class MapColumnWriter final : public ColumnWriter {
502
public:
503
    explicit MapColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr<StorageField> field,
504
                             ScalarColumnWriter* null_writer, OffsetColumnWriter* offsets_writer,
505
                             std::vector<std::unique_ptr<ColumnWriter>>& _kv_writers);
506
507
23.6k
    ~MapColumnWriter() override = default;
508
509
    Status init() override;
510
511
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
512
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
513
    uint64_t estimate_buffer_size() override;
514
515
    Status finish() override;
516
    Status write_data() override;
517
    Status write_ordinal_index() override;
518
    Status write_inverted_index() override;
519
    Status append_nulls(size_t num_rows) override;
520
521
    Status finish_current_page() override;
522
523
9.45k
    Status write_zone_map() override {
524
9.45k
        if (_opts.need_zone_map) {
525
0
            return Status::NotSupported("map not support zone map");
526
0
        }
527
9.45k
        return Status::OK();
528
9.45k
    }
529
530
9.45k
    Status write_bloom_filter_index() override {
531
9.45k
        if (_opts.need_bloom_filter) {
532
0
            return Status::NotSupported("map not support bloom filter index");
533
0
        }
534
9.45k
        return Status::OK();
535
9.45k
    }
536
537
    // according key writer to get next rowid
538
24.6k
    ordinal_t get_next_rowid() const override { return _offsets_writer->get_next_rowid(); }
539
540
10.2k
    uint64_t get_raw_data_bytes() const override {
541
10.2k
        return _get_total_data_pages_bytes(&ColumnWriter::get_raw_data_bytes);
542
10.2k
    }
543
544
10.2k
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
545
10.2k
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_uncompressed_data_pages_bytes);
546
10.2k
    }
547
548
10.2k
    uint64_t get_total_compressed_data_pages_bytes() const override {
549
10.2k
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_compressed_data_pages_bytes);
550
10.2k
    }
551
552
private:
553
    template <typename Func>
554
30.6k
    uint64_t _get_total_data_pages_bytes(Func func) const {
555
30.6k
        uint64_t size = std::invoke(func, _offsets_writer.get());
556
30.6k
        if (is_nullable()) {
557
23.9k
            size += std::invoke(func, _null_writer.get());
558
23.9k
        }
559
61.2k
        for (const auto& writer : _kv_writers) {
560
61.2k
            size += std::invoke(func, writer.get());
561
61.2k
        }
562
30.6k
        return size;
563
30.6k
    }
564
565
private:
566
    std::vector<std::unique_ptr<ColumnWriter>> _kv_writers;
567
    // we need null writer to make sure a row is null or not
568
    std::unique_ptr<ScalarColumnWriter> _null_writer;
569
    std::unique_ptr<OffsetColumnWriter> _offsets_writer;
570
    std::unique_ptr<IndexColumnWriter> _index_builder;
571
    ColumnWriterOptions _opts;
572
};
573
574
// used for compaction to write sub variant column
575
class VariantSubcolumnWriter : public ColumnWriter {
576
public:
577
    explicit VariantSubcolumnWriter(const ColumnWriterOptions& opts, const TabletColumn* column,
578
                                    std::unique_ptr<StorageField> field);
579
580
22
    ~VariantSubcolumnWriter() override = default;
581
582
    Status init() override;
583
584
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
585
586
    uint64_t estimate_buffer_size() override;
587
588
    Status finish() override;
589
    Status write_data() override;
590
    Status write_ordinal_index() override;
591
592
    Status write_zone_map() override;
593
594
    Status write_inverted_index() override;
595
    Status write_bloom_filter_index() override;
596
0
    ordinal_t get_next_rowid() const override { return _next_rowid; }
597
598
21
    uint64_t get_raw_data_bytes() const override {
599
21
        return 0; // TODO
600
21
    }
601
602
21
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
603
21
        return 0; // TODO
604
21
    }
605
606
21
    uint64_t get_total_compressed_data_pages_bytes() const override {
607
21
        return 0; // TODO
608
21
    }
609
610
0
    Status append_nulls(size_t num_rows) override {
611
0
        return Status::NotSupported("variant writer can not append_nulls");
612
0
    }
613
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
614
615
0
    Status finish_current_page() override {
616
0
        return Status::NotSupported("variant writer has no data, can not finish_current_page");
617
0
    }
618
619
0
    size_t get_non_null_size() const { return none_null_size; }
620
621
    Status finalize();
622
623
private:
624
    bool is_finalized() const;
625
    bool _is_finalized = false;
626
    ordinal_t _next_rowid = 0;
627
    size_t none_null_size = 0;
628
    ColumnVariant::MutablePtr _column;
629
    const TabletColumn* _tablet_column = nullptr;
630
    ColumnWriterOptions _opts;
631
    std::unique_ptr<ColumnWriter> _writer;
632
    TabletIndexes _indexes;
633
634
    std::unique_ptr<NestedGroupWriteProvider> _nested_group_provider;
635
    VariantStatistics _statistics;
636
};
637
638
class VariantColumnWriter : public ColumnWriter {
639
public:
640
    explicit VariantColumnWriter(const ColumnWriterOptions& opts, const TabletColumn* column,
641
                                 std::unique_ptr<StorageField> field);
642
643
6.02k
    ~VariantColumnWriter() override = default;
644
645
    Status init() override;
646
647
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
648
649
    uint64_t estimate_buffer_size() override;
650
651
    Status finish() override;
652
    Status write_data() override;
653
    Status write_ordinal_index() override;
654
655
    Status write_zone_map() override;
656
657
    Status write_inverted_index() override;
658
    Status write_bloom_filter_index() override;
659
16
    ordinal_t get_next_rowid() const override { return _next_rowid; }
660
661
6.00k
    uint64_t get_raw_data_bytes() const override {
662
6.00k
        return 0; // TODO
663
6.00k
    }
664
665
6.00k
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
666
6.00k
        return 0; // TODO
667
6.00k
    }
668
669
6.00k
    uint64_t get_total_compressed_data_pages_bytes() const override {
670
6.00k
        return 0; // TODO
671
6.00k
    }
672
673
0
    Status append_nulls(size_t num_rows) override {
674
0
        return Status::NotSupported("variant writer can not append_nulls");
675
0
    }
676
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
677
678
0
    Status finish_current_page() override {
679
0
        return Status::NotSupported("variant writer has no data, can not finish_current_page");
680
0
    }
681
682
private:
683
    std::unique_ptr<VariantColumnWriterImpl> _impl;
684
    ordinal_t _next_rowid = 0;
685
};
686
687
} // namespace segment_v2
688
} // namespace doris