Coverage Report

Created: 2026-05-28 15:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/segment/column_writer.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/AgentService_types.h>
21
#include <gen_cpp/olap_file.pb.h>
22
#include <gen_cpp/segment_v2.pb.h>
23
#include <stddef.h>
24
#include <stdint.h>
25
26
#include <algorithm>
27
#include <memory> // for unique_ptr
28
#include <ostream>
29
#include <string>
30
#include <unordered_map>
31
#include <utility>
32
#include <vector>
33
34
#include "common/status.h" // for Status
35
#include "core/column/column_variant.h"
36
#include "storage/index/ann/ann_index_writer.h"
37
#include "storage/index/bloom_filter/bloom_filter.h"
38
#include "storage/index/inverted/inverted_index_writer.h"
39
#include "storage/segment/common.h"
40
#include "storage/segment/options.h"
41
#include "storage/segment/variant/nested_group_provider.h"
42
#include "storage/segment/variant/variant_statistics.h"
43
#include "storage/tablet/tablet_schema.h" // for TabletColumnPtr
44
#include "storage/types.h"                // for field_type_size
45
#include "util/bitmap.h"                  // for BitmapChange
46
#include "util/slice.h"                   // for OwnedSlice
47
48
namespace doris {
49
50
class BlockCompressionCodec;
51
class TabletColumn;
52
class TabletIndex;
53
struct RowsetWriterContext;
54
55
namespace io {
56
class FileWriter;
57
}
58
59
namespace segment_v2 {
60
61
struct ColumnWriterOptions {
62
    // input and output parameter:
63
    // - input: column_id/unique_id/type/length/encoding/compression/is_nullable members
64
    // - output: encoding/indexes/dict_page members
65
    ColumnMetaPB* meta = nullptr;
66
    size_t data_page_size = STORAGE_PAGE_SIZE_DEFAULT_VALUE;
67
    size_t dict_page_size = STORAGE_DICT_PAGE_SIZE_DEFAULT_VALUE;
68
    // store compressed page only when space saving is above the threshold.
69
    // space saving = 1 - compressed_size / uncompressed_size
70
    double compression_min_space_saving = 0.1;
71
    bool need_zone_map = false;
72
    bool need_bloom_filter = false;
73
    bool is_ngram_bf_index = false;
74
    bool need_inverted_index = false;
75
    bool need_ann_index = false;
76
    uint8_t gram_size;
77
    uint16_t gram_bf_size;
78
    BloomFilterOptions bf_options;
79
    std::vector<const TabletIndex*> inverted_indexes;
80
    IndexFileWriter* index_file_writer = nullptr;
81
82
    SegmentFooterPB* footer = nullptr;
83
    io::FileWriter* file_writer = nullptr;
84
    CompressionTypePB compression_type = UNKNOWN_COMPRESSION;
85
    RowsetWriterContext* rowset_ctx = nullptr;
86
    // For collect segment statistics for compaction
87
    std::vector<RowsetReaderSharedPtr> input_rs_readers;
88
    const TabletIndex* ann_index = nullptr;
89
90
    // Storage format of the owning tablet (V2 or V3). Set once by the segment writer
91
    // (from TabletMeta::storage_format()) and propagated down to aux child writers
92
    // (null / array-length / map-length), struct subcolumn writers and variant subcolumn
93
    // writers. All encoding-default decisions consult this via resolve_default_encoding().
94
    // Also forwarded to BinaryDictPageBuilder via PageBuilderOptions::binary_plain_encoding.
95
    TabletStorageFormatPB storage_format = TabletStorageFormatPB::TABLET_STORAGE_FORMAT_V2;
96
97
0
    std::string to_string() const {
98
0
        std::stringstream ss;
99
0
        ss << std::boolalpha << "meta=" << meta->DebugString()
100
0
           << ", data_page_size=" << data_page_size << ", dict_page_size=" << dict_page_size
101
0
           << ", compression_min_space_saving = " << compression_min_space_saving
102
0
           << ", need_zone_map=" << need_zone_map << ", need_bloom_filter" << need_bloom_filter;
103
0
        return ss.str();
104
0
    }
105
};
106
107
class EncodingInfo;
108
class NullBitmapBuilder;
109
class OrdinalIndexWriter;
110
class PageBuilder;
111
class BloomFilterIndexWriter;
112
class ZoneMapIndexWriter;
113
class VariantColumnWriterImpl;
114
class ColumnWriter;
115
116
class ColumnWriter {
117
public:
118
    static Status create(const ColumnWriterOptions& opts, const TabletColumn* column,
119
                         io::FileWriter* file_writer, std::unique_ptr<ColumnWriter>* writer);
120
    static Status create_struct_writer(const ColumnWriterOptions& opts, const TabletColumn* column,
121
                                       io::FileWriter* file_writer,
122
                                       std::unique_ptr<ColumnWriter>* writer);
123
    static Status create_array_writer(const ColumnWriterOptions& opts, const TabletColumn* column,
124
                                      io::FileWriter* file_writer,
125
                                      std::unique_ptr<ColumnWriter>* writer);
126
    static Status create_map_writer(const ColumnWriterOptions& opts, const TabletColumn* column,
127
                                    io::FileWriter* file_writer,
128
                                    std::unique_ptr<ColumnWriter>* writer);
129
130
    static Status create_variant_writer(const ColumnWriterOptions& opts, const TabletColumn* column,
131
                                        io::FileWriter* file_writer,
132
                                        std::unique_ptr<ColumnWriter>* writer);
133
134
    static Status create_agg_state_writer(const ColumnWriterOptions& opts,
135
                                          const TabletColumn* column, io::FileWriter* file_writer,
136
                                          std::unique_ptr<ColumnWriter>* writer);
137
138
    explicit ColumnWriter(TabletColumnPtr column, bool is_nullable, ColumnMetaPB* meta);
139
140
13.0k
    virtual ~ColumnWriter() = default;
141
142
    virtual Status init() = 0;
143
144
    template <typename CellType>
145
    Status append(const CellType& cell) {
146
        if (_is_nullable) {
147
            uint8_t nullmap = 0;
148
            BitmapChange(&nullmap, 0, cell.is_null());
149
            return append_nullable(&nullmap, cell.cell_ptr(), 1);
150
        } else {
151
            auto* cel_ptr = cell.cell_ptr();
152
            return append_data((const uint8_t**)&cel_ptr, 1);
153
        }
154
    }
155
156
    // Now we only support append one by one, we should support append
157
    // multi rows in one call
158
651k
    Status append(bool is_null, void* data) {
159
651k
        uint8_t nullmap = 0;
160
651k
        BitmapChange(&nullmap, 0, is_null);
161
651k
        return append_nullable(&nullmap, data, 1);
162
651k
    }
163
164
    Status append(const uint8_t* nullmap, const void* data, size_t num_rows);
165
166
    Status append_nullable(const uint8_t* nullmap, const void* data, size_t num_rows);
167
168
    // use only in vectorized load
169
    virtual Status append_nullable(const uint8_t* null_map, const uint8_t** data, size_t num_rows);
170
171
    virtual Status append_nulls(size_t num_rows) = 0;
172
173
    virtual Status finish_current_page() = 0;
174
175
    virtual uint64_t estimate_buffer_size() = 0;
176
177
    // finish append data
178
    virtual Status finish() = 0;
179
180
    // write all data into file
181
    virtual Status write_data() = 0;
182
183
    virtual Status write_ordinal_index() = 0;
184
185
    virtual Status write_zone_map() = 0;
186
187
    virtual Status write_inverted_index() = 0;
188
189
10.3k
    virtual Status write_ann_index() { return Status::OK(); }
190
191
    virtual Status write_bloom_filter_index() = 0;
192
193
    virtual ordinal_t get_next_rowid() const = 0;
194
195
    virtual uint64_t get_raw_data_bytes() const = 0;
196
    virtual uint64_t get_total_uncompressed_data_pages_bytes() const = 0;
197
    virtual uint64_t get_total_compressed_data_pages_bytes() const = 0;
198
199
    // used for append not null data.
200
    virtual Status append_data(const uint8_t** ptr, size_t num_rows) = 0;
201
202
729k
    bool is_nullable() const { return _is_nullable; }
203
204
30.1k
    const TabletColumn* get_column() const { return _column.get(); }
205
206
    // Per-row in-memory cell footprint of this writer's column, used to step
207
    // the input pointer across rows in append_*/null-run loops.
208
724k
    size_t cell_size() const { return field_type_size(_column->type()); }
209
210
10.3k
    ColumnMetaPB* get_column_meta() const { return _column_meta; }
211
212
protected:
213
    DataTypePtr _data_type;
214
215
private:
216
    TabletColumnPtr _column;
217
    bool _is_nullable;
218
    ColumnMetaPB* _column_meta;
219
    std::vector<uint8_t> _null_bitmap;
220
};
221
222
class FlushPageCallback {
223
public:
224
337
    virtual ~FlushPageCallback() = default;
225
0
    virtual void put_extra_info_in_page(DataPageFooterPB* footer) {}
226
};
227
228
// Encode one column's data into some memory slice.
229
// Because some columns would be stored in a file, we should wait
230
// until all columns has been finished, and then data can be written
231
// to file
232
class ScalarColumnWriter : public ColumnWriter {
233
public:
234
    ScalarColumnWriter(const ColumnWriterOptions& opts, TabletColumnPtr column,
235
                       io::FileWriter* file_writer);
236
237
    ~ScalarColumnWriter() override;
238
239
    Status init() override;
240
241
    Status append_nulls(size_t num_rows) override;
242
243
    Status finish_current_page() override;
244
245
    uint64_t estimate_buffer_size() override;
246
247
    // finish append data
248
    Status finish() override;
249
250
    Status write_data() override;
251
    Status write_ordinal_index() override;
252
    Status write_zone_map() override;
253
    Status write_inverted_index() override;
254
    Status write_bloom_filter_index() override;
255
988
    ordinal_t get_next_rowid() const override { return _next_rowid; }
256
257
10.0k
    uint64_t get_raw_data_bytes() const override { return _raw_data_bytes; }
258
259
10.0k
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
260
10.0k
        return _total_uncompressed_data_pages_size;
261
10.0k
    }
262
263
10.0k
    uint64_t get_total_compressed_data_pages_bytes() const override {
264
10.0k
        return _total_compressed_data_pages_size;
265
10.0k
    }
266
267
337
    void register_flush_page_callback(FlushPageCallback* flush_page_callback) {
268
337
        _new_page_callback = flush_page_callback;
269
337
    }
270
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
271
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
272
273
    // used for append not null data. When page is full, will append data not reach num_rows.
274
    Status append_data_in_current_page(const uint8_t** ptr, size_t* num_written);
275
276
696k
    Status append_data_in_current_page(const uint8_t* ptr, size_t* num_written) {
277
696k
        RETURN_IF_CATCH_EXCEPTION(
278
696k
                { return _internal_append_data_in_current_page(ptr, num_written); });
279
696k
    }
280
    friend class ArrayColumnWriter;
281
    friend class OffsetColumnWriter;
282
283
private:
284
    Status _internal_append_data_in_current_page(const uint8_t* ptr, size_t* num_written);
285
286
private:
287
    struct NullRun {
288
        bool is_null;
289
        uint32_t len;
290
    };
291
292
    std::vector<NullRun> _null_run_buffer;
293
    std::unique_ptr<PageBuilder> _page_builder;
294
295
    std::unique_ptr<NullBitmapBuilder> _null_bitmap_builder;
296
297
    ColumnWriterOptions _opts;
298
299
    const EncodingInfo* _encoding_info = nullptr;
300
301
    ordinal_t _next_rowid = 0;
302
303
    // All Pages will be organized into a linked list
304
    struct Page {
305
        // the data vector may contain:
306
        //     1. one OwnedSlice if the page body is compressed
307
        //     2. one OwnedSlice if the page body is not compressed and doesn't have nullmap
308
        //     3. two OwnedSlice if the page body is not compressed and has nullmap
309
        // use vector for easier management for lifetime of OwnedSlice
310
        std::vector<OwnedSlice> data;
311
        PageFooterPB footer;
312
    };
313
314
16.3k
    void _push_back_page(std::unique_ptr<Page> page) {
315
24.7k
        for (auto& data_slice : page->data) {
316
24.7k
            _data_size += data_slice.slice().size;
317
24.7k
        }
318
        // estimate (page footer + footer size + checksum) took 20 bytes
319
16.3k
        _data_size += 20;
320
        // add page to pages' tail
321
16.3k
        _pages.emplace_back(std::move(page));
322
16.3k
    }
323
324
    Status _write_data_page(Page* page);
325
326
private:
327
    io::FileWriter* _file_writer = nullptr;
328
    // total size of data page list
329
    uint64_t _data_size;
330
331
    uint64_t _raw_data_bytes {0};
332
    uint64_t _total_uncompressed_data_pages_size {0};
333
    uint64_t _total_compressed_data_pages_size {0};
334
335
    // cached generated pages,
336
    std::vector<std::unique_ptr<Page>> _pages;
337
    ordinal_t _first_rowid = 0;
338
339
    BlockCompressionCodec* _compress_codec;
340
341
    std::unique_ptr<OrdinalIndexWriter> _ordinal_index_builder;
342
    std::unique_ptr<ZoneMapIndexWriter> _zone_map_index_builder;
343
    std::vector<std::unique_ptr<IndexColumnWriter>> _inverted_index_builders;
344
    std::unique_ptr<BloomFilterIndexWriter> _bloom_filter_index_builder;
345
346
    // call before flush data page.
347
    FlushPageCallback* _new_page_callback = nullptr;
348
};
349
350
// offsetColumnWriter is used column which has offset column, like array, map.
351
//  column type is only uint64 and should response for whole column value [start, end], end will set
352
//  in footer.next_array_item_ordinal which in finish_cur_page() callback put_extra_info_in_page()
353
class OffsetColumnWriter final : public ScalarColumnWriter, FlushPageCallback {
354
public:
355
    OffsetColumnWriter(const ColumnWriterOptions& opts, TabletColumnPtr column,
356
                       io::FileWriter* file_writer);
357
358
    ~OffsetColumnWriter() override;
359
360
    Status init() override;
361
362
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
363
364
private:
365
    void put_extra_info_in_page(DataPageFooterPB* footer) override;
366
367
    uint64_t _next_offset;
368
};
369
370
class StructColumnWriter final : public ColumnWriter {
371
public:
372
    explicit StructColumnWriter(const ColumnWriterOptions& opts, TabletColumnPtr column,
373
                                ScalarColumnWriter* null_writer,
374
                                std::vector<std::unique_ptr<ColumnWriter>>& sub_column_writers);
375
0
    ~StructColumnWriter() override = default;
376
377
    Status init() override;
378
379
    Status append_nullable(const uint8_t* null_map, const uint8_t** data, size_t num_rows) override;
380
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
381
382
    uint64_t estimate_buffer_size() override;
383
384
    Status finish() override;
385
    Status write_data() override;
386
    Status write_ordinal_index() override;
387
    Status append_nulls(size_t num_rows) override;
388
389
    Status finish_current_page() override;
390
391
0
    Status write_zone_map() override {
392
0
        if (_opts.need_zone_map) {
393
0
            return Status::NotSupported("struct not support zone map");
394
0
        }
395
0
        return Status::OK();
396
0
    }
397
398
    Status write_inverted_index() override;
399
0
    Status write_bloom_filter_index() override {
400
0
        if (_opts.need_bloom_filter) {
401
0
            return Status::NotSupported("struct not support bloom filter index");
402
0
        }
403
0
        return Status::OK();
404
0
    }
405
406
0
    ordinal_t get_next_rowid() const override { return _sub_column_writers[0]->get_next_rowid(); }
407
408
0
    uint64_t get_raw_data_bytes() const override {
409
0
        return _get_total_data_pages_bytes(&ColumnWriter::get_raw_data_bytes);
410
0
    }
411
412
0
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
413
0
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_uncompressed_data_pages_bytes);
414
0
    }
415
416
0
    uint64_t get_total_compressed_data_pages_bytes() const override {
417
0
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_compressed_data_pages_bytes);
418
0
    }
419
420
private:
421
    template <typename Func>
422
0
    uint64_t _get_total_data_pages_bytes(Func func) const {
423
0
        uint64_t size = is_nullable() ? std::invoke(func, _null_writer.get()) : 0;
424
0
        for (const auto& writer : _sub_column_writers) {
425
0
            size += std::invoke(func, writer.get());
426
0
        }
427
0
        return size;
428
0
    }
429
430
private:
431
    size_t _num_sub_column_writers;
432
    std::unique_ptr<ScalarColumnWriter> _null_writer;
433
    std::vector<std::unique_ptr<ColumnWriter>> _sub_column_writers;
434
    ColumnWriterOptions _opts;
435
};
436
437
class ArrayColumnWriter final : public ColumnWriter {
438
public:
439
    explicit ArrayColumnWriter(const ColumnWriterOptions& opts, TabletColumnPtr column,
440
                               OffsetColumnWriter* offset_writer, ScalarColumnWriter* null_writer,
441
                               std::unique_ptr<ColumnWriter> item_writer);
442
14
    ~ArrayColumnWriter() override = default;
443
444
    Status init() override;
445
446
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
447
448
    uint64_t estimate_buffer_size() override;
449
450
    Status finish() override;
451
    Status write_data() override;
452
    Status write_ordinal_index() override;
453
    Status append_nulls(size_t num_rows) override;
454
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
455
456
    Status finish_current_page() override;
457
458
3
    Status write_zone_map() override {
459
3
        if (_opts.need_zone_map) {
460
0
            return Status::NotSupported("array not support zone map");
461
0
        }
462
3
        return Status::OK();
463
3
    }
464
465
    Status write_inverted_index() override;
466
    Status write_ann_index() override;
467
3
    Status write_bloom_filter_index() override {
468
3
        if (_opts.need_bloom_filter) {
469
0
            return Status::NotSupported("array not support bloom filter index");
470
0
        }
471
3
        return Status::OK();
472
3
    }
473
16
    ordinal_t get_next_rowid() const override { return _offset_writer->get_next_rowid(); }
474
475
3
    uint64_t get_raw_data_bytes() const override {
476
3
        return _get_total_data_pages_bytes(&ColumnWriter::get_raw_data_bytes);
477
3
    }
478
479
3
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
480
3
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_uncompressed_data_pages_bytes);
481
3
    }
482
483
3
    uint64_t get_total_compressed_data_pages_bytes() const override {
484
3
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_compressed_data_pages_bytes);
485
3
    }
486
487
private:
488
    template <typename Func>
489
9
    uint64_t _get_total_data_pages_bytes(Func func) const {
490
9
        uint64_t size = std::invoke(func, _offset_writer.get());
491
9
        if (is_nullable()) {
492
0
            size += std::invoke(func, _null_writer.get());
493
0
        }
494
9
        size += std::invoke(func, _item_writer.get());
495
9
        return size;
496
9
    }
497
498
private:
499
    Status write_null_column(size_t num_rows, bool is_null); // 写入num_rows个null标记
500
14
    bool has_empty_items() const { return _item_writer->get_next_rowid() == 0; }
501
502
private:
503
    std::unique_ptr<OffsetColumnWriter> _offset_writer;
504
    std::unique_ptr<ScalarColumnWriter> _null_writer;
505
    std::unique_ptr<ColumnWriter> _item_writer;
506
    std::unique_ptr<IndexColumnWriter> _inverted_index_writer;
507
    std::unique_ptr<AnnIndexColumnWriter> _ann_index_writer;
508
    ColumnWriterOptions _opts;
509
};
510
511
class MapColumnWriter final : public ColumnWriter {
512
public:
513
    explicit MapColumnWriter(const ColumnWriterOptions& opts, TabletColumnPtr column,
514
                             ScalarColumnWriter* null_writer, OffsetColumnWriter* offsets_writer,
515
                             std::vector<std::unique_ptr<ColumnWriter>>& _kv_writers);
516
517
323
    ~MapColumnWriter() override = default;
518
519
    Status init() override;
520
521
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
522
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
523
    uint64_t estimate_buffer_size() override;
524
525
    Status finish() override;
526
    Status write_data() override;
527
    Status write_ordinal_index() override;
528
    Status write_inverted_index() override;
529
    Status append_nulls(size_t num_rows) override;
530
531
    Status finish_current_page() override;
532
533
1
    Status write_zone_map() override {
534
1
        if (_opts.need_zone_map) {
535
0
            return Status::NotSupported("map not support zone map");
536
0
        }
537
1
        return Status::OK();
538
1
    }
539
540
1
    Status write_bloom_filter_index() override {
541
1
        if (_opts.need_bloom_filter) {
542
0
            return Status::NotSupported("map not support bloom filter index");
543
0
        }
544
1
        return Status::OK();
545
1
    }
546
547
    // according key writer to get next rowid
548
318
    ordinal_t get_next_rowid() const override { return _offsets_writer->get_next_rowid(); }
549
550
1
    uint64_t get_raw_data_bytes() const override {
551
1
        return _get_total_data_pages_bytes(&ColumnWriter::get_raw_data_bytes);
552
1
    }
553
554
1
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
555
1
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_uncompressed_data_pages_bytes);
556
1
    }
557
558
1
    uint64_t get_total_compressed_data_pages_bytes() const override {
559
1
        return _get_total_data_pages_bytes(&ColumnWriter::get_total_compressed_data_pages_bytes);
560
1
    }
561
562
private:
563
    template <typename Func>
564
3
    uint64_t _get_total_data_pages_bytes(Func func) const {
565
3
        uint64_t size = std::invoke(func, _offsets_writer.get());
566
3
        if (is_nullable()) {
567
0
            size += std::invoke(func, _null_writer.get());
568
0
        }
569
6
        for (const auto& writer : _kv_writers) {
570
6
            size += std::invoke(func, writer.get());
571
6
        }
572
3
        return size;
573
3
    }
574
575
private:
576
    std::vector<std::unique_ptr<ColumnWriter>> _kv_writers;
577
    // we need null writer to make sure a row is null or not
578
    std::unique_ptr<ScalarColumnWriter> _null_writer;
579
    std::unique_ptr<OffsetColumnWriter> _offsets_writer;
580
    std::unique_ptr<IndexColumnWriter> _index_builder;
581
    ColumnWriterOptions _opts;
582
};
583
584
// used for compaction to write sub variant column
585
class VariantSubcolumnWriter : public ColumnWriter {
586
public:
587
    explicit VariantSubcolumnWriter(const ColumnWriterOptions& opts, TabletColumnPtr column);
588
589
1
    ~VariantSubcolumnWriter() override = default;
590
591
    Status init() override;
592
593
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
594
595
    uint64_t estimate_buffer_size() override;
596
597
    Status finish() override;
598
    Status write_data() override;
599
    Status write_ordinal_index() override;
600
601
    Status write_zone_map() override;
602
603
    Status write_inverted_index() override;
604
    Status write_bloom_filter_index() override;
605
0
    ordinal_t get_next_rowid() const override { return _next_rowid; }
606
607
0
    uint64_t get_raw_data_bytes() const override {
608
0
        return 0; // TODO
609
0
    }
610
611
0
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
612
0
        return 0; // TODO
613
0
    }
614
615
0
    uint64_t get_total_compressed_data_pages_bytes() const override {
616
0
        return 0; // TODO
617
0
    }
618
619
0
    Status append_nulls(size_t num_rows) override {
620
0
        return Status::NotSupported("variant writer can not append_nulls");
621
0
    }
622
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
623
624
0
    Status finish_current_page() override {
625
0
        return Status::NotSupported("variant writer has no data, can not finish_current_page");
626
0
    }
627
628
0
    size_t get_non_null_size() const { return none_null_size; }
629
630
    Status finalize();
631
632
private:
633
    bool is_finalized() const;
634
    bool _is_finalized = false;
635
    ordinal_t _next_rowid = 0;
636
    size_t none_null_size = 0;
637
    ColumnVariant::MutablePtr _column;
638
    ColumnWriterOptions _opts;
639
    std::unique_ptr<ColumnWriter> _writer;
640
    TabletIndexes _indexes;
641
642
    std::unique_ptr<NestedGroupWriteProvider> _nested_group_provider;
643
    VariantStatistics _statistics;
644
};
645
646
class VariantColumnWriter : public ColumnWriter {
647
public:
648
    explicit VariantColumnWriter(const ColumnWriterOptions& opts, TabletColumnPtr column);
649
650
311
    ~VariantColumnWriter() override = default;
651
652
    Status init() override;
653
654
    Status append_data(const uint8_t** ptr, size_t num_rows) override;
655
656
    uint64_t estimate_buffer_size() override;
657
658
    Status finish() override;
659
    Status write_data() override;
660
    Status write_ordinal_index() override;
661
662
    Status write_zone_map() override;
663
664
    Status write_inverted_index() override;
665
    Status write_bloom_filter_index() override;
666
1
    ordinal_t get_next_rowid() const override { return _next_rowid; }
667
668
285
    uint64_t get_raw_data_bytes() const override {
669
285
        return 0; // TODO
670
285
    }
671
672
285
    uint64_t get_total_uncompressed_data_pages_bytes() const override {
673
285
        return 0; // TODO
674
285
    }
675
676
285
    uint64_t get_total_compressed_data_pages_bytes() const override {
677
285
        return 0; // TODO
678
285
    }
679
680
0
    Status append_nulls(size_t num_rows) override {
681
0
        return Status::NotSupported("variant writer can not append_nulls");
682
0
    }
683
    Status append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) override;
684
685
0
    Status finish_current_page() override {
686
0
        return Status::NotSupported("variant writer has no data, can not finish_current_page");
687
0
    }
688
689
1
    VariantColumnWriterImpl* impl_for_test() const { return _impl.get(); }
690
691
private:
692
    std::unique_ptr<VariantColumnWriterImpl> _impl;
693
    ordinal_t _next_rowid = 0;
694
};
695
696
} // namespace segment_v2
697
} // namespace doris