Coverage Report

Created: 2025-04-24 12:23

/root/doris/be/src/olap/tablet_schema.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/Types_types.h>
21
#include <gen_cpp/olap_common.pb.h>
22
#include <gen_cpp/olap_file.pb.h>
23
#include <gen_cpp/segment_v2.pb.h>
24
#include <parallel_hashmap/phmap.h>
25
#include <stddef.h>
26
#include <stdint.h>
27
28
#include <map>
29
#include <memory>
30
#include <string>
31
#include <unordered_map>
32
#include <unordered_set>
33
#include <utility>
34
#include <vector>
35
36
#include "common/status.h"
37
#include "gutil/stringprintf.h"
38
#include "olap/metadata_adder.h"
39
#include "olap/olap_common.h"
40
#include "olap/rowset/segment_v2/options.h"
41
#include "runtime/define_primitive_type.h"
42
#include "runtime/descriptors.h"
43
#include "runtime/memory/lru_cache_policy.h"
44
#include "util/string_util.h"
45
#include "vec/aggregate_functions/aggregate_function.h"
46
#include "vec/common/string_ref.h"
47
#include "vec/common/string_utils/string_utils.h"
48
#include "vec/core/types.h"
49
#include "vec/json/path_in_data.h"
50
51
namespace doris {
52
namespace vectorized {
53
class Block;
54
class PathInData;
55
class IDataType;
56
} // namespace vectorized
57
58
struct OlapTableIndexSchema;
59
class TColumn;
60
class TOlapTableIndex;
61
class TabletColumn;
62
63
using TabletColumnPtr = std::shared_ptr<TabletColumn>;
64
65
class TabletColumn : public MetadataAdder<TabletColumn> {
66
public:
67
    TabletColumn();
68
    TabletColumn(const ColumnPB& column);
69
    TabletColumn(const TColumn& column);
70
    TabletColumn(FieldAggregationMethod agg, FieldType type);
71
    TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable);
72
    TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable,
73
                 int32_t unique_id, size_t length);
74
    void init_from_pb(const ColumnPB& column);
75
    void init_from_thrift(const TColumn& column);
76
    void to_schema_pb(ColumnPB* column) const;
77
78
205k
    int32_t unique_id() const { return _unique_id; }
79
74
    void set_unique_id(int32_t id) { _unique_id = id; }
80
74.4k
    const std::string& name() const { return _col_name; }
81
0
    const std::string& name_lower_case() const { return _col_name_lower_case; }
82
108
    void set_name(std::string col_name) {
83
108
        _col_name = col_name;
84
108
        _col_name_lower_case = to_lower(_col_name);
85
108
    }
86
795k
    FieldType type() const { return _type; }
87
103
    void set_type(FieldType type) { _type = type; }
88
47.0k
    bool is_key() const { return _is_key; }
89
149k
    bool is_nullable() const { return _is_nullable; }
90
0
    bool is_auto_increment() const { return _is_auto_increment; }
91
50.2k
    bool is_variant_type() const { return _type == FieldType::OLAP_FIELD_TYPE_VARIANT; }
92
10.4k
    bool is_bf_column() const { return _is_bf_column; }
93
10.4k
    bool has_bitmap_index() const { return _has_bitmap_index; }
94
10.5k
    bool is_array_type() const { return _type == FieldType::OLAP_FIELD_TYPE_ARRAY; }
95
0
    bool is_jsonb_type() const { return _type == FieldType::OLAP_FIELD_TYPE_JSONB; }
96
0
    bool is_length_variable_type() const {
97
0
        return _type == FieldType::OLAP_FIELD_TYPE_CHAR ||
98
0
               _type == FieldType::OLAP_FIELD_TYPE_VARCHAR ||
99
0
               _type == FieldType::OLAP_FIELD_TYPE_STRING ||
100
0
               _type == FieldType::OLAP_FIELD_TYPE_HLL ||
101
0
               _type == FieldType::OLAP_FIELD_TYPE_OBJECT ||
102
0
               _type == FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE ||
103
0
               _type == FieldType::OLAP_FIELD_TYPE_AGG_STATE;
104
0
    }
105
    // Such columns are not exist in frontend schema info, so we need to
106
    // add them into tablet_schema for later column indexing.
107
    static TabletColumn create_materialized_variant_column(const std::string& root,
108
                                                           const std::vector<std::string>& paths,
109
                                                           int32_t parent_unique_id);
110
738
    bool has_default_value() const { return _has_default_value; }
111
10.4k
    std::string default_value() const { return _default_value; }
112
31.1k
    size_t length() const { return _length; }
113
31
    void set_length(size_t length) { _length = length; }
114
0
    void set_default_value(const std::string& default_value) {
115
0
        _default_value = default_value;
116
0
        _has_default_value = true;
117
0
    }
118
23.9k
    size_t index_length() const { return _index_length; }
119
17
    void set_index_length(size_t index_length) { _index_length = index_length; }
120
61
    void set_is_key(bool is_key) { _is_key = is_key; }
121
28
    void set_is_nullable(bool is_nullable) { _is_nullable = is_nullable; }
122
0
    void set_is_auto_increment(bool is_auto_increment) { _is_auto_increment = is_auto_increment; }
123
0
    void set_has_default_value(bool has) { _has_default_value = has; }
124
    void set_path_info(const vectorized::PathInData& path);
125
14.2k
    FieldAggregationMethod aggregation() const { return _aggregation; }
126
    vectorized::AggregateFunctionPtr get_aggregate_function_union(
127
            vectorized::DataTypePtr type) const;
128
    vectorized::AggregateFunctionPtr get_aggregate_function(std::string suffix) const;
129
127k
    int precision() const { return _precision; }
130
127k
    int frac() const { return _frac; }
131
0
    inline bool visible() const { return _visible; }
132
133
4
    void set_aggregation_method(FieldAggregationMethod agg) {
134
4
        _aggregation = agg;
135
4
        _aggregation_name = get_string_by_aggregation_type(agg);
136
4
    }
137
138
    /**
139
     * Add a sub column.
140
     */
141
    void add_sub_column(TabletColumn& sub_column);
142
143
10.4k
    uint32_t get_subtype_count() const { return _sub_column_count; }
144
57
    const TabletColumn& get_sub_column(uint32_t i) const { return *_sub_columns[i]; }
145
0
    const std::vector<TabletColumnPtr>& get_sub_columns() const { return _sub_columns; }
146
147
    friend bool operator==(const TabletColumn& a, const TabletColumn& b);
148
    friend bool operator!=(const TabletColumn& a, const TabletColumn& b);
149
150
    static std::string get_string_by_field_type(FieldType type);
151
    static std::string get_string_by_aggregation_type(FieldAggregationMethod aggregation_type);
152
    static FieldType get_field_type_by_string(const std::string& str);
153
    static FieldType get_field_type_by_type(PrimitiveType type);
154
    static FieldAggregationMethod get_aggregation_type_by_string(const std::string& str);
155
    static uint32_t get_field_length_by_type(TPrimitiveType::type type, uint32_t string_length);
156
    bool is_row_store_column() const;
157
0
    std::string get_aggregation_name() const { return _aggregation_name; }
158
0
    bool get_result_is_nullable() const { return _result_is_nullable; }
159
91.3k
    bool has_path_info() const { return _column_path != nullptr && !_column_path->empty(); }
160
20.6k
    const vectorized::PathInDataPtr& path_info_ptr() const { return _column_path; }
161
    // If it is an extracted column from variant column
162
46.5k
    bool is_extracted_column() const {
163
46.5k
        return _column_path != nullptr && !_column_path->empty() && _parent_col_unique_id > 0;
164
46.5k
    };
165
20.6k
    int32_t parent_unique_id() const { return _parent_col_unique_id; }
166
5
    void set_parent_unique_id(int32_t col_unique_id) { _parent_col_unique_id = col_unique_id; }
167
11
    void set_is_bf_column(bool is_bf_column) { _is_bf_column = is_bf_column; }
168
0
    void set_has_bitmap_index(bool has_bitmap_index) { _has_bitmap_index = has_bitmap_index; }
169
    std::shared_ptr<const vectorized::IDataType> get_vec_type() const;
170
171
    void append_sparse_column(TabletColumn column);
172
    const TabletColumn& sparse_column_at(size_t oridinal) const;
173
    const std::vector<TabletColumnPtr>& sparse_columns() const;
174
10.4k
    size_t num_sparse_columns() const { return _num_sparse_columns; }
175
176
private:
177
    int32_t _unique_id = -1;
178
    std::string _col_name;
179
    std::string _col_name_lower_case;
180
    // the field _type will change from TPrimitiveType
181
    // to string by 'EnumToString(TPrimitiveType, tcolumn.column_type.type, data_type);' (reference: TabletMeta::init_column_from_tcolumn)
182
    // to FieldType by 'TabletColumn::get_field_type_by_string' (reference: TabletColumn::init_from_pb).
183
    // And the _type in columnPB is string and it changed from FieldType by 'get_string_by_field_type' (reference: TabletColumn::to_schema_pb).
184
    FieldType _type;
185
    bool _is_key = false;
186
    FieldAggregationMethod _aggregation;
187
    std::string _aggregation_name;
188
    bool _is_nullable = false;
189
    bool _is_auto_increment = false;
190
191
    bool _has_default_value = false;
192
    std::string _default_value;
193
194
    bool _is_decimal = false;
195
    int32_t _precision = -1;
196
    int32_t _frac = -1;
197
198
    int32_t _length = -1;
199
    int32_t _index_length = -1;
200
201
    bool _is_bf_column = false;
202
203
    bool _has_bitmap_index = false;
204
    bool _visible = true;
205
    int32_t _parent_col_unique_id = -1;
206
    std::vector<TabletColumnPtr> _sub_columns;
207
    uint32_t _sub_column_count = 0;
208
209
    bool _result_is_nullable = false;
210
    vectorized::PathInDataPtr _column_path;
211
212
    // Record information about columns merged into a sparse column within a variant
213
    // `{"id": 100, "name" : "jack", "point" : 3.9}`
214
    // If the information mentioned above is inserted into the variant column,
215
    // 'id' and 'name' are correctly extracted, while 'point' is merged into the sparse column due to its sparsity.
216
    // The path_info and type of 'point' will be recorded using the TabletColumn.
217
    // Use shared_ptr for reuse and reducing column memory usage
218
    std::vector<TabletColumnPtr> _sparse_cols;
219
    size_t _num_sparse_columns = 0;
220
};
221
222
bool operator==(const TabletColumn& a, const TabletColumn& b);
223
bool operator!=(const TabletColumn& a, const TabletColumn& b);
224
225
class TabletSchema;
226
227
class TabletIndex : public MetadataAdder<TabletIndex> {
228
public:
229
70
    TabletIndex() = default;
230
    void init_from_thrift(const TOlapTableIndex& index, const TabletSchema& tablet_schema);
231
    void init_from_thrift(const TOlapTableIndex& index, const std::vector<int32_t>& column_uids);
232
    void init_from_pb(const TabletIndexPB& index);
233
    void to_schema_pb(TabletIndexPB* index) const;
234
235
366
    int64_t index_id() const { return _index_id; }
236
0
    const std::string& index_name() const { return _index_name; }
237
643
    IndexType index_type() const { return _index_type; }
238
584
    const vector<int32_t>& col_unique_ids() const { return _col_unique_ids; }
239
3.39k
    const std::map<string, string>& properties() const { return _properties; }
240
0
    int32_t get_gram_size() const {
241
0
        if (_properties.count("gram_size")) {
242
0
            return std::stoi(_properties.at("gram_size"));
243
0
        }
244
245
0
        return 0;
246
0
    }
247
0
    int32_t get_gram_bf_size() const {
248
0
        if (_properties.count("bf_size")) {
249
0
            return std::stoi(_properties.at("bf_size"));
250
0
        }
251
252
0
        return 0;
253
0
    }
254
255
549
    const std::string& get_index_suffix() const { return _escaped_index_suffix_path; }
256
257
    void set_escaped_escaped_index_suffix_path(const std::string& name);
258
259
private:
260
    int64_t _index_id = -1;
261
    // Identify the different index with the same _index_id
262
    std::string _escaped_index_suffix_path;
263
    std::string _index_name;
264
    IndexType _index_type;
265
    std::vector<int32_t> _col_unique_ids;
266
    std::map<string, string> _properties;
267
};
268
269
class TabletSchema : public MetadataAdder<TabletSchema> {
270
public:
271
    enum ColumnType { NORMAL = 0, DROPPED = 1, VARIANT = 2 };
272
    // TODO(yingchun): better to make constructor as private to avoid
273
    // manually init members incorrectly, and define a new function like
274
    // void create_from_pb(const TabletSchemaPB& schema, TabletSchema* tablet_schema).
275
    TabletSchema();
276
    virtual ~TabletSchema();
277
278
    // Init from pb
279
    // ignore_extracted_columns: ignore the extracted columns from variant column
280
    // reuse_cached_column: reuse the cached column in the schema if they are the same, to reduce memory usage
281
    void init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns = false,
282
                      bool reuse_cached_column = false);
283
    // Notice: Use deterministic way to serialize protobuf,
284
    // since serialize Map in protobuf may could lead to un-deterministic by default
285
    template <class PbType>
286
3.27k
    static std::string deterministic_string_serialize(const PbType& pb) {
287
3.27k
        std::string output;
288
3.27k
        google::protobuf::io::StringOutputStream string_output_stream(&output);
289
3.27k
        google::protobuf::io::CodedOutputStream output_stream(&string_output_stream);
290
3.27k
        output_stream.SetSerializationDeterministic(true);
291
3.27k
        pb.SerializeToCodedStream(&output_stream);
292
3.27k
        return output;
293
3.27k
    }
_ZN5doris12TabletSchema30deterministic_string_serializeINS_14TabletSchemaPBEEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKT_
Line
Count
Source
286
3.11k
    static std::string deterministic_string_serialize(const PbType& pb) {
287
3.11k
        std::string output;
288
3.11k
        google::protobuf::io::StringOutputStream string_output_stream(&output);
289
3.11k
        google::protobuf::io::CodedOutputStream output_stream(&string_output_stream);
290
3.11k
        output_stream.SetSerializationDeterministic(true);
291
3.11k
        pb.SerializeToCodedStream(&output_stream);
292
3.11k
        return output;
293
3.11k
    }
_ZN5doris12TabletSchema30deterministic_string_serializeINS_8ColumnPBEEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKT_
Line
Count
Source
286
152
    static std::string deterministic_string_serialize(const PbType& pb) {
287
152
        std::string output;
288
152
        google::protobuf::io::StringOutputStream string_output_stream(&output);
289
152
        google::protobuf::io::CodedOutputStream output_stream(&string_output_stream);
290
152
        output_stream.SetSerializationDeterministic(true);
291
152
        pb.SerializeToCodedStream(&output_stream);
292
152
        return output;
293
152
    }
294
    void to_schema_pb(TabletSchemaPB* tablet_meta_pb) const;
295
    void append_column(TabletColumn column, ColumnType col_type = ColumnType::NORMAL);
296
    void append_index(TabletIndex index);
297
    void update_index(const TabletColumn& column, const IndexType& index_type, TabletIndex&& index);
298
    void remove_index(int64_t index_id);
299
    void clear_index();
300
    // Must make sure the row column is always the last column
301
    void add_row_column();
302
    void copy_from(const TabletSchema& tablet_schema);
303
    // lightweight copy, take care of lifecycle of TabletColumn
304
    void shawdow_copy_without_columns(const TabletSchema& tablet_schema);
305
    void update_index_info_from(const TabletSchema& tablet_schema);
306
    std::string to_key() const;
307
    // Don't use.
308
    // TODO: memory size of TabletSchema cannot be accurately tracked.
309
    // In some places, temporarily use num_columns() as TabletSchema size.
310
548
    int64_t mem_size() const { return _mem_size; }
311
    size_t row_size() const;
312
    int32_t field_index(const std::string& field_name) const;
313
    int32_t field_index(const vectorized::PathInData& path) const;
314
    int32_t field_index(int32_t col_unique_id) const;
315
    const TabletColumn& column(size_t ordinal) const;
316
    const TabletColumn& column(const std::string& field_name) const;
317
    Status have_column(const std::string& field_name) const;
318
    const TabletColumn& column_by_uid(int32_t col_unique_id) const;
319
    TabletColumn& mutable_column_by_uid(int32_t col_unique_id);
320
    TabletColumn& mutable_column(size_t ordinal);
321
    void replace_column(size_t pos, TabletColumn new_col);
322
    const std::vector<TabletColumnPtr>& columns() const;
323
623k
    size_t num_columns() const { return _num_columns; }
324
1.07M
    size_t num_key_columns() const { return _num_key_columns; }
325
10.5k
    const std::vector<uint32_t>& cluster_key_idxes() const { return _cluster_key_idxes; }
326
0
    size_t num_null_columns() const { return _num_null_columns; }
327
4.86k
    size_t num_short_key_columns() const { return _num_short_key_columns; }
328
0
    size_t num_rows_per_row_block() const { return _num_rows_per_row_block; }
329
234
    size_t num_variant_columns() const { return _num_variant_columns; };
330
6.95M
    KeysType keys_type() const { return _keys_type; }
331
4.95k
    SortType sort_type() const { return _sort_type; }
332
0
    size_t sort_col_num() const { return _sort_col_num; }
333
0
    CompressKind compress_kind() const { return _compress_kind; }
334
0
    size_t next_column_unique_id() const { return _next_column_unique_id; }
335
0
    bool has_bf_fpp() const { return _has_bf_fpp; }
336
0
    double bloom_filter_fpp() const { return _bf_fpp; }
337
10.7k
    bool is_in_memory() const { return _is_in_memory; }
338
0
    void set_is_in_memory(bool is_in_memory) { _is_in_memory = is_in_memory; }
339
0
    void set_disable_auto_compaction(bool disable_auto_compaction) {
340
0
        _disable_auto_compaction = disable_auto_compaction;
341
0
    }
342
280
    bool disable_auto_compaction() const { return _disable_auto_compaction; }
343
0
    void set_enable_single_replica_compaction(bool enable_single_replica_compaction) {
344
0
        _enable_single_replica_compaction = enable_single_replica_compaction;
345
0
    }
346
10
    bool enable_single_replica_compaction() const { return _enable_single_replica_compaction; }
347
0
    void set_store_row_column(bool store_row_column) { _store_row_column = store_row_column; }
348
8.89k
    bool store_row_column() const { return _store_row_column; }
349
0
    void set_skip_write_index_on_load(bool skip) { _skip_write_index_on_load = skip; }
350
54
    bool skip_write_index_on_load() const { return _skip_write_index_on_load; }
351
4.15k
    int32_t delete_sign_idx() const { return _delete_sign_idx; }
352
0
    void set_delete_sign_idx(int32_t delete_sign_idx) { _delete_sign_idx = delete_sign_idx; }
353
139k
    bool has_sequence_col() const { return _sequence_col_idx != -1; }
354
64.7k
    int32_t sequence_col_idx() const { return _sequence_col_idx; }
355
0
    void set_version_col_idx(int32_t version_col_idx) { _version_col_idx = version_col_idx; }
356
0
    int32_t version_col_idx() const { return _version_col_idx; }
357
4.76k
    segment_v2::CompressionTypePB compression_type() const { return _compression_type; }
358
0
    void set_row_store_page_size(long page_size) { _row_store_page_size = page_size; }
359
0
    long row_store_page_size() const { return _row_store_page_size; }
360
361
20
    const std::vector<TabletIndex>& indexes() const { return _indexes; }
362
4.84k
    bool has_inverted_index() const {
363
4.84k
        for (const auto& index : _indexes) {
364
22
            if (index.index_type() == IndexType::INVERTED) {
365
                //if index_id == -1, ignore it.
366
22
                if (!index.col_unique_ids().empty() && index.col_unique_ids()[0] >= 0) {
367
22
                    return true;
368
22
                }
369
22
            }
370
22
        }
371
4.82k
        return false;
372
4.84k
    }
373
    std::vector<const TabletIndex*> get_indexes_for_column(const TabletColumn& col) const;
374
    bool has_inverted_index(const TabletColumn& col) const;
375
    bool has_inverted_index_with_index_id(int64_t index_id, const std::string& suffix_path) const;
376
    const TabletIndex* get_inverted_index_with_index_id(int64_t index_id,
377
                                                        const std::string& suffix_name) const;
378
    // check_valid: check if this column supports inverted index
379
    // Some columns (Float, Double, JSONB ...) from the variant do not support index, but they are listed in TabletIndex.
380
    // If returned, the index file will not be found.
381
    const TabletIndex* get_inverted_index(const TabletColumn& col, bool check_valid = true) const;
382
    const TabletIndex* get_inverted_index(int32_t col_unique_id,
383
                                          const std::string& suffix_path) const;
384
    bool has_ngram_bf_index(int32_t col_unique_id) const;
385
    const TabletIndex* get_ngram_bf_index(int32_t col_unique_id) const;
386
    void update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>& indexes);
387
    // If schema version is not set, it should be -1
388
394
    int32_t schema_version() const { return _schema_version; }
389
    void clear_columns();
390
    vectorized::Block create_block(
391
            const std::vector<uint32_t>& return_columns,
392
            const std::unordered_set<uint32_t>* tablet_columns_need_convert_null = nullptr) const;
393
    vectorized::Block create_block(bool ignore_dropped_col = true) const;
394
0
    void set_schema_version(int32_t version) { _schema_version = version; }
395
0
    void set_auto_increment_column(const std::string& auto_increment_column) {
396
0
        _auto_increment_column = auto_increment_column;
397
0
    }
398
369
    std::string auto_increment_column() const { return _auto_increment_column; }
399
400
24
    void set_table_id(int64_t table_id) { _table_id = table_id; }
401
266
    int64_t table_id() const { return _table_id; }
402
24
    void set_db_id(int64_t db_id) { _db_id = db_id; }
403
0
    int64_t db_id() const { return _db_id; }
404
    void build_current_tablet_schema(int64_t index_id, int32_t version,
405
                                     const OlapTableIndexSchema* index,
406
                                     const TabletSchema& out_tablet_schema);
407
408
    // Merge columns that not exit in current schema, these column is dropped in current schema
409
    // but they are useful in some cases. For example,
410
    // 1. origin schema is  ColA, ColB
411
    // 2. insert values     1, 2
412
    // 3. delete where ColB = 2
413
    // 4. drop ColB
414
    // 5. insert values  3
415
    // 6. add column ColB, although it is name ColB, but it is different with previous ColB, the new ColB we name could call ColB'
416
    // 7. insert value  4, 5
417
    // Then the read schema should be ColA, ColB, ColB' because the delete predicate need ColB to remove related data.
418
    // Because they have same name, so that the dropped column should not be added to the map, only with unique id.
419
    void merge_dropped_columns(const TabletSchema& src_schema);
420
421
    bool is_dropped_column(const TabletColumn& col) const;
422
423
    // copy extracted columns from src_schema
424
    void copy_extracted_columns(const TabletSchema& src_schema);
425
426
    // only reserve extracted columns
427
    void reserve_extracted_columns();
428
429
0
    string get_all_field_names() const {
430
0
        string str = "[";
431
0
        for (auto p : _field_name_to_index) {
432
0
            if (str.size() > 1) {
433
0
                str += ", ";
434
0
            }
435
0
            str += p.first.to_string() + "(" + std::to_string(_cols[p.second]->unique_id()) + ")";
436
0
        }
437
0
        str += "]";
438
0
        return str;
439
0
    }
440
441
    // Dump [(name, type, is_nullable), ...]
442
0
    string dump_structure() const {
443
0
        string str = "[";
444
0
        for (auto p : _cols) {
445
0
            if (str.size() > 1) {
446
0
                str += ", ";
447
0
            }
448
0
            str += "(";
449
0
            str += p->name();
450
0
            str += ", ";
451
0
            str += TabletColumn::get_string_by_field_type(p->type());
452
0
            str += ", ";
453
0
            str += "is_nullable:";
454
0
            str += (p->is_nullable() ? "true" : "false");
455
0
            str += ")";
456
0
        }
457
0
        str += "]";
458
0
        return str;
459
0
    }
460
461
0
    string dump_full_schema() const {
462
0
        string str = "[";
463
0
        for (auto p : _cols) {
464
0
            if (str.size() > 1) {
465
0
                str += ", ";
466
0
            }
467
0
            ColumnPB col_pb;
468
0
            p->to_schema_pb(&col_pb);
469
0
            str += "(";
470
0
            str += col_pb.ShortDebugString();
471
0
            str += ")";
472
0
        }
473
0
        str += "]";
474
0
        return str;
475
0
    }
476
477
    vectorized::Block create_block_by_cids(const std::vector<uint32_t>& cids);
478
479
    std::shared_ptr<TabletSchema> copy_without_variant_extracted_columns();
480
5.18k
    InvertedIndexStorageFormatPB get_inverted_index_storage_format() const {
481
5.18k
        return _inverted_index_storage_format;
482
5.18k
    }
483
484
    int64_t get_metadata_size() const override;
485
486
private:
487
    friend bool operator==(const TabletSchema& a, const TabletSchema& b);
488
    friend bool operator!=(const TabletSchema& a, const TabletSchema& b);
489
    TabletSchema(const TabletSchema&) = default;
490
491
    void clear_column_cache_handlers();
492
493
    KeysType _keys_type = DUP_KEYS;
494
    SortType _sort_type = SortType::LEXICAL;
495
    size_t _sort_col_num = 0;
496
    std::vector<TabletColumnPtr> _cols;
497
    std::vector<Cache::Handle*> _column_cache_handlers;
498
499
    std::vector<TabletIndex> _indexes;
500
    std::unordered_map<StringRef, int32_t, StringRefHash> _field_name_to_index;
501
    std::unordered_map<int32_t, int32_t> _field_id_to_index;
502
    std::unordered_map<vectorized::PathInDataRef, int32_t, vectorized::PathInDataRef::Hash>
503
            _field_path_to_index;
504
    size_t _num_columns = 0;
505
    size_t _num_variant_columns = 0;
506
    size_t _num_key_columns = 0;
507
    std::vector<uint32_t> _cluster_key_idxes;
508
    size_t _num_null_columns = 0;
509
    size_t _num_short_key_columns = 0;
510
    size_t _num_rows_per_row_block = 0;
511
    CompressKind _compress_kind = COMPRESS_NONE;
512
    segment_v2::CompressionTypePB _compression_type = segment_v2::CompressionTypePB::LZ4F;
513
    long _row_store_page_size = segment_v2::ROW_STORE_PAGE_SIZE_DEFAULT_VALUE;
514
    size_t _next_column_unique_id = 0;
515
    std::string _auto_increment_column;
516
517
    bool _has_bf_fpp = false;
518
    double _bf_fpp = 0;
519
    bool _is_in_memory = false;
520
    int32_t _delete_sign_idx = -1;
521
    int32_t _sequence_col_idx = -1;
522
    int32_t _version_col_idx = -1;
523
    int32_t _schema_version = -1;
524
    int64_t _table_id = -1;
525
    int64_t _db_id = -1;
526
    bool _disable_auto_compaction = false;
527
    bool _enable_single_replica_compaction = false;
528
    int64_t _mem_size = 0;
529
    bool _store_row_column = false;
530
    bool _skip_write_index_on_load = false;
531
    InvertedIndexStorageFormatPB _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1;
532
533
    int64_t _vl_field_mem_size {0}; // variable length field
534
};
535
536
bool operator==(const TabletSchema& a, const TabletSchema& b);
537
bool operator!=(const TabletSchema& a, const TabletSchema& b);
538
539
using TabletSchemaSPtr = std::shared_ptr<TabletSchema>;
540
541
} // namespace doris