Coverage Report

Created: 2025-07-23 19:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/olap/tablet_schema.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/Types_types.h>
21
#include <gen_cpp/olap_common.pb.h>
22
#include <gen_cpp/olap_file.pb.h>
23
#include <gen_cpp/segment_v2.pb.h>
24
#include <parallel_hashmap/phmap.h>
25
26
#include <algorithm>
27
#include <map>
28
#include <memory>
29
#include <string>
30
#include <unordered_map>
31
#include <unordered_set>
32
#include <utility>
33
#include <vector>
34
35
#include "common/consts.h"
36
#include "common/status.h"
37
#include "olap/metadata_adder.h"
38
#include "olap/olap_common.h"
39
#include "olap/rowset/segment_v2/options.h"
40
#include "runtime/define_primitive_type.h"
41
#include "runtime/descriptors.h"
42
#include "runtime/memory/lru_cache_policy.h"
43
#include "util/debug_points.h"
44
#include "util/string_parser.hpp"
45
#include "util/string_util.h"
46
#include "vec/aggregate_functions/aggregate_function.h"
47
#include "vec/common/string_ref.h"
48
#include "vec/common/string_utils/string_utils.h"
49
#include "vec/core/types.h"
50
#include "vec/json/path_in_data.h"
51
52
namespace doris {
53
namespace vectorized {
54
class Block;
55
class PathInData;
56
class IDataType;
57
} // namespace vectorized
58
59
#include "common/compile_check_begin.h"
60
61
struct OlapTableIndexSchema;
62
class TColumn;
63
class TOlapTableIndex;
64
class TabletColumn;
65
66
using TabletColumnPtr = std::shared_ptr<TabletColumn>;
67
68
class TabletColumn : public MetadataAdder<TabletColumn> {
69
public:
70
    TabletColumn();
71
    TabletColumn(const ColumnPB& column);
72
    TabletColumn(const TColumn& column);
73
    TabletColumn(FieldAggregationMethod agg, FieldType type);
74
    TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable);
75
    TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable,
76
                 int32_t unique_id, size_t length);
77
    void init_from_pb(const ColumnPB& column);
78
    void init_from_thrift(const TColumn& column);
79
    void to_schema_pb(ColumnPB* column) const;
80
81
322k
    int32_t unique_id() const { return _unique_id; }
82
258
    void set_unique_id(int32_t id) { _unique_id = id; }
83
184k
    const std::string& name() const { return _col_name; }
84
0
    const std::string& name_lower_case() const { return _col_name_lower_case; }
85
303
    void set_name(std::string col_name) {
86
303
        _col_name = col_name;
87
303
        _col_name_lower_case = to_lower(_col_name);
88
303
    }
89
945k
    FieldType type() const { return _type; }
90
300
    void set_type(FieldType type) { _type = type; }
91
87.1k
    bool is_key() const { return _is_key; }
92
168k
    bool is_nullable() const { return _is_nullable; }
93
0
    bool is_auto_increment() const { return _is_auto_increment; }
94
0
    bool is_seqeunce_col() const { return _col_name == SEQUENCE_COL; }
95
0
    bool is_on_update_current_timestamp() const { return _is_on_update_current_timestamp; }
96
76.4k
    bool is_variant_type() const { return _type == FieldType::OLAP_FIELD_TYPE_VARIANT; }
97
13.2k
    bool is_bf_column() const { return _is_bf_column; }
98
13.2k
    bool has_bitmap_index() const { return _has_bitmap_index; }
99
14.0k
    bool is_array_type() const { return _type == FieldType::OLAP_FIELD_TYPE_ARRAY; }
100
12.4k
    bool is_agg_state_type() const { return _type == FieldType::OLAP_FIELD_TYPE_AGG_STATE; }
101
0
    bool is_jsonb_type() const { return _type == FieldType::OLAP_FIELD_TYPE_JSONB; }
102
0
    bool is_length_variable_type() const {
103
0
        return _type == FieldType::OLAP_FIELD_TYPE_CHAR ||
104
0
               _type == FieldType::OLAP_FIELD_TYPE_VARCHAR ||
105
0
               _type == FieldType::OLAP_FIELD_TYPE_STRING ||
106
0
               _type == FieldType::OLAP_FIELD_TYPE_HLL ||
107
0
               _type == FieldType::OLAP_FIELD_TYPE_BITMAP ||
108
0
               _type == FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE ||
109
0
               _type == FieldType::OLAP_FIELD_TYPE_AGG_STATE;
110
0
    }
111
    // Such columns are not exist in frontend schema info, so we need to
112
    // add them into tablet_schema for later column indexing.
113
    static TabletColumn create_materialized_variant_column(const std::string& root,
114
                                                           const std::vector<std::string>& paths,
115
                                                           int32_t parent_unique_id);
116
188
    bool has_default_value() const { return _has_default_value; }
117
13.2k
    std::string default_value() const { return _default_value; }
118
39.6k
    int32_t length() const { return _length; }
119
222
    void set_length(int32_t length) { _length = length; }
120
0
    void set_default_value(const std::string& default_value) {
121
0
        _default_value = default_value;
122
0
        _has_default_value = true;
123
0
    }
124
29.8k
    int32_t index_length() const { return _index_length; }
125
183
    void set_index_length(int32_t index_length) { _index_length = index_length; }
126
252
    void set_is_key(bool is_key) { _is_key = is_key; }
127
217
    void set_is_nullable(bool is_nullable) { _is_nullable = is_nullable; }
128
0
    void set_is_auto_increment(bool is_auto_increment) { _is_auto_increment = is_auto_increment; }
129
0
    void set_is_on_update_current_timestamp(bool is_on_update_current_timestamp) {
130
0
        _is_on_update_current_timestamp = is_on_update_current_timestamp;
131
0
    }
132
    void set_path_info(const vectorized::PathInData& path);
133
19.5k
    FieldAggregationMethod aggregation() const { return _aggregation; }
134
    vectorized::AggregateFunctionPtr get_aggregate_function_union(
135
            vectorized::DataTypePtr type, int current_be_exec_version) const;
136
    vectorized::AggregateFunctionPtr get_aggregate_function(std::string suffix,
137
                                                            int current_be_exec_version) const;
138
148k
    int precision() const { return _precision; }
139
148k
    int frac() const { return _frac; }
140
0
    inline bool visible() const { return _visible; }
141
    bool has_char_type() const;
142
143
4
    void set_aggregation_method(FieldAggregationMethod agg) {
144
4
        _aggregation = agg;
145
4
        _aggregation_name = get_string_by_aggregation_type(agg);
146
4
    }
147
148
    /**
149
     * Add a sub column.
150
     */
151
    void add_sub_column(TabletColumn& sub_column);
152
153
13.2k
    uint32_t get_subtype_count() const { return _sub_column_count; }
154
66
    const TabletColumn& get_sub_column(uint64_t i) const { return *_sub_columns[i]; }
155
0
    const std::vector<TabletColumnPtr>& get_sub_columns() const { return _sub_columns; }
156
157
    friend bool operator==(const TabletColumn& a, const TabletColumn& b);
158
    friend bool operator!=(const TabletColumn& a, const TabletColumn& b);
159
160
    static std::string get_string_by_field_type(FieldType type);
161
    static std::string get_string_by_aggregation_type(FieldAggregationMethod aggregation_type);
162
    static FieldType get_field_type_by_string(const std::string& str);
163
    static FieldType get_field_type_by_type(PrimitiveType type);
164
    static FieldAggregationMethod get_aggregation_type_by_string(const std::string& str);
165
    static uint32_t get_field_length_by_type(TPrimitiveType::type type, uint32_t string_length);
166
    bool is_row_store_column() const;
167
13.2k
    std::string get_aggregation_name() const { return _aggregation_name; }
168
13.2k
    bool get_result_is_nullable() const { return _result_is_nullable; }
169
13.2k
    int get_be_exec_version() const { return _be_exec_version; }
170
109k
    bool has_path_info() const { return _column_path != nullptr && !_column_path->empty(); }
171
26.3k
    const vectorized::PathInDataPtr& path_info_ptr() const { return _column_path; }
172
    // If it is an extracted column from variant column
173
91.8k
    bool is_extracted_column() const {
174
91.8k
        return _column_path != nullptr && !_column_path->empty() && _parent_col_unique_id > 0;
175
91.8k
    };
176
26.4k
    std::string suffix_path() const {
177
26.4k
        return is_extracted_column() ? _column_path->get_path() : "";
178
26.4k
    }
179
0
    bool is_nested_subcolumn() const {
180
0
        return _column_path != nullptr && _column_path->has_nested_part();
181
0
    }
182
26.3k
    int32_t parent_unique_id() const { return _parent_col_unique_id; }
183
6
    void set_parent_unique_id(int32_t col_unique_id) { _parent_col_unique_id = col_unique_id; }
184
11
    void set_is_bf_column(bool is_bf_column) { _is_bf_column = is_bf_column; }
185
0
    void set_has_bitmap_index(bool has_bitmap_index) { _has_bitmap_index = has_bitmap_index; }
186
    std::shared_ptr<const vectorized::IDataType> get_vec_type() const;
187
188
    void append_sparse_column(TabletColumn column);
189
    const TabletColumn& sparse_column_at(size_t oridinal) const;
190
    const std::vector<TabletColumnPtr>& sparse_columns() const;
191
13.2k
    size_t num_sparse_columns() const { return _num_sparse_columns; }
192
193
1
    Status check_valid() const {
194
1
        if (type() != FieldType::OLAP_FIELD_TYPE_ARRAY &&
195
1
            type() != FieldType::OLAP_FIELD_TYPE_STRUCT &&
196
1
            type() != FieldType::OLAP_FIELD_TYPE_MAP) {
197
1
            return Status::OK();
198
1
        }
199
0
        if (is_bf_column()) {
200
0
            return Status::NotSupported("Do not support bloom filter index, type={}",
201
0
                                        get_string_by_field_type(type()));
202
0
        }
203
0
        if (has_bitmap_index()) {
204
0
            return Status::NotSupported("Do not support bitmap index, type={}",
205
0
                                        get_string_by_field_type(type()));
206
0
        }
207
0
        return Status::OK();
208
0
    }
209
210
private:
211
    int32_t _unique_id = -1;
212
    std::string _col_name;
213
    std::string _col_name_lower_case;
214
    // the field _type will change from TPrimitiveType
215
    // to string by 'EnumToString(TPrimitiveType, tcolumn.column_type.type, data_type);' (reference: TabletMeta::init_column_from_tcolumn)
216
    // to FieldType by 'TabletColumn::get_field_type_by_string' (reference: TabletColumn::init_from_pb).
217
    // And the _type in columnPB is string and it changed from FieldType by 'get_string_by_field_type' (reference: TabletColumn::to_schema_pb).
218
    FieldType _type;
219
    bool _is_key = false;
220
    FieldAggregationMethod _aggregation;
221
    std::string _aggregation_name;
222
    bool _is_nullable = false;
223
    bool _is_auto_increment = false;
224
    bool _is_on_update_current_timestamp {false};
225
226
    bool _has_default_value = false;
227
    std::string _default_value;
228
229
    bool _is_decimal = false;
230
    int32_t _precision = -1;
231
    int32_t _frac = -1;
232
233
    int32_t _length = -1;
234
    int32_t _index_length = -1;
235
236
    bool _is_bf_column = false;
237
238
    bool _has_bitmap_index = false;
239
    bool _visible = true;
240
241
    std::vector<TabletColumnPtr> _sub_columns;
242
    uint32_t _sub_column_count = 0;
243
244
    bool _result_is_nullable = false;
245
    int _be_exec_version = -1;
246
247
    // The extracted sub-columns from "variant" contain the following information:
248
    int32_t _parent_col_unique_id = -1;     // "variant" -> col_unique_id
249
    vectorized::PathInDataPtr _column_path; // the path of the sub-columns themselves
250
251
    // Record information about columns merged into a sparse column within a variant
252
    // `{"id": 100, "name" : "jack", "point" : 3.9}`
253
    // If the information mentioned above is inserted into the variant column,
254
    // 'id' and 'name' are correctly extracted, while 'point' is merged into the sparse column due to its sparsity.
255
    // The path_info and type of 'point' will be recorded using the TabletColumn.
256
    // Use shared_ptr for reuse and reducing column memory usage
257
    std::vector<TabletColumnPtr> _sparse_cols;
258
    size_t _num_sparse_columns = 0;
259
};
260
261
bool operator==(const TabletColumn& a, const TabletColumn& b);
262
bool operator!=(const TabletColumn& a, const TabletColumn& b);
263
264
class TabletIndex : public MetadataAdder<TabletIndex> {
265
public:
266
7.42k
    TabletIndex() = default;
267
    void init_from_thrift(const TOlapTableIndex& index, const TabletSchema& tablet_schema);
268
    void init_from_thrift(const TOlapTableIndex& index, const std::vector<int32_t>& column_uids);
269
    void init_from_pb(const TabletIndexPB& index);
270
    void to_schema_pb(TabletIndexPB* index) const;
271
272
11.2k
    int64_t index_id() const { return _index_id; }
273
0
    const std::string& index_name() const { return _index_name; }
274
9.35k
    IndexType index_type() const { return _index_type; }
275
9.20k
    const std::vector<int32_t>& col_unique_ids() const { return _col_unique_ids; }
276
25.5k
    const std::map<std::string, std::string>& properties() const { return _properties; }
277
0
    int32_t get_gram_size() const {
278
0
        if (_properties.contains("gram_size")) {
279
0
            return std::stoi(_properties.at("gram_size"));
280
0
        }
281
282
0
        return 0;
283
0
    }
284
0
    int32_t get_gram_bf_size() const {
285
0
        if (_properties.contains("bf_size")) {
286
0
            return std::stoi(_properties.at("bf_size"));
287
0
        }
288
289
0
        return 0;
290
0
    }
291
292
15.7k
    const std::string& get_index_suffix() const { return _escaped_index_suffix_path; }
293
294
    void set_escaped_escaped_index_suffix_path(const std::string& name);
295
296
private:
297
    int64_t _index_id = -1;
298
    // Identify the different index with the same _index_id
299
    std::string _escaped_index_suffix_path;
300
    std::string _index_name;
301
    IndexType _index_type;
302
    std::vector<int32_t> _col_unique_ids;
303
    std::map<std::string, std::string> _properties;
304
};
305
306
using TabletIndexPtr = std::shared_ptr<TabletIndex>;
307
308
class TabletSchema : public MetadataAdder<TabletSchema> {
309
public:
310
    enum class ColumnType { NORMAL = 0, DROPPED = 1, VARIANT = 2 };
311
    // TODO(yingchun): better to make constructor as private to avoid
312
    // manually init members incorrectly, and define a new function like
313
    // void create_from_pb(const TabletSchemaPB& schema, TabletSchema* tablet_schema).
314
    TabletSchema();
315
    ~TabletSchema() override;
316
317
    // Init from pb
318
    // ignore_extracted_columns: ignore the extracted columns from variant column
319
    // reuse_cached_column: reuse the cached column in the schema if they are the same, to reduce memory usage
320
    void init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns = false,
321
                      bool reuse_cached_column = false);
322
    // Notice: Use deterministic way to serialize protobuf,
323
    // since serialize Map in protobuf may could lead to un-deterministic by default
324
    template <class PbType>
325
4.01k
    static std::string deterministic_string_serialize(const PbType& pb) {
326
4.01k
        std::string output;
327
4.01k
        google::protobuf::io::StringOutputStream string_output_stream(&output);
328
4.01k
        google::protobuf::io::CodedOutputStream output_stream(&string_output_stream);
329
4.01k
        output_stream.SetSerializationDeterministic(true);
330
4.01k
        pb.SerializeToCodedStream(&output_stream);
331
4.01k
        return output;
332
4.01k
    }
_ZN5doris12TabletSchema30deterministic_string_serializeINS_14TabletSchemaPBEEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKT_
Line
Count
Source
325
3.64k
    static std::string deterministic_string_serialize(const PbType& pb) {
326
3.64k
        std::string output;
327
3.64k
        google::protobuf::io::StringOutputStream string_output_stream(&output);
328
3.64k
        google::protobuf::io::CodedOutputStream output_stream(&string_output_stream);
329
3.64k
        output_stream.SetSerializationDeterministic(true);
330
3.64k
        pb.SerializeToCodedStream(&output_stream);
331
3.64k
        return output;
332
3.64k
    }
_ZN5doris12TabletSchema30deterministic_string_serializeINS_8ColumnPBEEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKT_
Line
Count
Source
325
258
    static std::string deterministic_string_serialize(const PbType& pb) {
326
258
        std::string output;
327
258
        google::protobuf::io::StringOutputStream string_output_stream(&output);
328
258
        google::protobuf::io::CodedOutputStream output_stream(&string_output_stream);
329
258
        output_stream.SetSerializationDeterministic(true);
330
258
        pb.SerializeToCodedStream(&output_stream);
331
258
        return output;
332
258
    }
_ZN5doris12TabletSchema30deterministic_string_serializeINS_13TabletIndexPBEEENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKT_
Line
Count
Source
325
112
    static std::string deterministic_string_serialize(const PbType& pb) {
326
112
        std::string output;
327
112
        google::protobuf::io::StringOutputStream string_output_stream(&output);
328
112
        google::protobuf::io::CodedOutputStream output_stream(&string_output_stream);
329
112
        output_stream.SetSerializationDeterministic(true);
330
112
        pb.SerializeToCodedStream(&output_stream);
331
112
        return output;
332
112
    }
333
    void to_schema_pb(TabletSchemaPB* tablet_meta_pb) const;
334
    void append_column(TabletColumn column, ColumnType col_type = ColumnType::NORMAL);
335
    void append_index(TabletIndex&& index);
336
    void update_index(const TabletColumn& column, const IndexType& index_type, TabletIndex&& index);
337
    void remove_index(int64_t index_id);
338
    void clear_index();
339
    // Must make sure the row column is always the last column
340
    void add_row_column();
341
    void copy_from(const TabletSchema& tablet_schema);
342
    // lightweight copy, take care of lifecycle of TabletColumn
343
    void shawdow_copy_without_columns(const TabletSchema& tablet_schema);
344
    void update_index_info_from(const TabletSchema& tablet_schema);
345
    std::string to_key() const;
346
    // get_metadata_size is only the memory of the TabletSchema itself, not include child objects.
347
52
    int64_t mem_size() const { return get_metadata_size(); }
348
    size_t row_size() const;
349
    int32_t field_index(const std::string& field_name) const;
350
    int32_t field_index(const vectorized::PathInData& path) const;
351
    int32_t field_index(int32_t col_unique_id) const;
352
    const TabletColumn& column(size_t ordinal) const;
353
    Result<const TabletColumn*> column(const std::string& field_name) const;
354
    Status have_column(const std::string& field_name) const;
355
    bool exist_column(const std::string& field_name) const;
356
    bool has_column_unique_id(int32_t col_unique_id) const;
357
    const TabletColumn& column_by_uid(int32_t col_unique_id) const;
358
    TabletColumn& mutable_column_by_uid(int32_t col_unique_id);
359
    TabletColumn& mutable_column(size_t ordinal);
360
    void replace_column(size_t pos, TabletColumn new_col);
361
    const std::vector<TabletColumnPtr>& columns() const;
362
660k
    size_t num_columns() const { return _num_columns; }
363
1.08M
    size_t num_key_columns() const { return _num_key_columns; }
364
136k
    const std::vector<uint32_t>& cluster_key_uids() const { return _cluster_key_uids; }
365
0
    size_t num_null_columns() const { return _num_null_columns; }
366
5.21k
    size_t num_short_key_columns() const { return _num_short_key_columns; }
367
0
    size_t num_rows_per_row_block() const { return _num_rows_per_row_block; }
368
1.28k
    size_t num_variant_columns() const { return _num_variant_columns; };
369
7.11M
    KeysType keys_type() const { return _keys_type; }
370
5.52k
    SortType sort_type() const { return _sort_type; }
371
0
    size_t sort_col_num() const { return _sort_col_num; }
372
0
    CompressKind compress_kind() const { return _compress_kind; }
373
0
    size_t next_column_unique_id() const { return _next_column_unique_id; }
374
4
    bool has_bf_fpp() const { return _has_bf_fpp; }
375
4
    double bloom_filter_fpp() const { return _bf_fpp; }
376
27.9k
    bool is_in_memory() const { return _is_in_memory; }
377
0
    void set_is_in_memory(bool is_in_memory) { _is_in_memory = is_in_memory; }
378
2
    void set_disable_auto_compaction(bool disable_auto_compaction) {
379
2
        _disable_auto_compaction = disable_auto_compaction;
380
2
    }
381
288
    bool disable_auto_compaction() const { return _disable_auto_compaction; }
382
0
    void set_enable_variant_flatten_nested(bool flatten_nested) {
383
0
        _enable_variant_flatten_nested = flatten_nested;
384
0
    }
385
0
    bool variant_flatten_nested() const { return _enable_variant_flatten_nested; }
386
0
    void set_enable_single_replica_compaction(bool enable_single_replica_compaction) {
387
0
        _enable_single_replica_compaction = enable_single_replica_compaction;
388
0
    }
389
351
    bool enable_single_replica_compaction() const { return _enable_single_replica_compaction; }
390
    // indicate if full row store column(all the columns encodes as row) exists
391
0
    bool has_row_store_for_all_columns() const {
392
0
        return _store_row_column && row_columns_uids().empty();
393
0
    }
394
0
    void set_skip_write_index_on_load(bool skip) { _skip_write_index_on_load = skip; }
395
73
    bool skip_write_index_on_load() const { return _skip_write_index_on_load; }
396
8.20k
    int32_t delete_sign_idx() const { return _delete_sign_idx; }
397
0
    void set_delete_sign_idx(int32_t delete_sign_idx) { _delete_sign_idx = delete_sign_idx; }
398
142k
    bool has_sequence_col() const { return _sequence_col_idx != -1; }
399
64.7k
    int32_t sequence_col_idx() const { return _sequence_col_idx; }
400
0
    void set_version_col_idx(int32_t version_col_idx) { _version_col_idx = version_col_idx; }
401
0
    int32_t version_col_idx() const { return _version_col_idx; }
402
0
    bool has_skip_bitmap_col() const { return _skip_bitmap_col_idx != -1; }
403
0
    int32_t skip_bitmap_col_idx() const { return _skip_bitmap_col_idx; }
404
5.09k
    segment_v2::CompressionTypePB compression_type() const { return _compression_type; }
405
0
    void set_row_store_page_size(long page_size) { _row_store_page_size = page_size; }
406
0
    long row_store_page_size() const { return _row_store_page_size; }
407
0
    void set_storage_page_size(long storage_page_size) { _storage_page_size = storage_page_size; }
408
13.2k
    long storage_page_size() const { return _storage_page_size; }
409
0
    void set_storage_dict_page_size(long storage_dict_page_size) {
410
0
        _storage_dict_page_size = storage_dict_page_size;
411
0
    }
412
13.2k
    long storage_dict_page_size() const { return _storage_dict_page_size; }
413
0
    bool has_global_row_id() const {
414
0
        for (auto [col_name, _] : _field_name_to_index) {
415
0
            if (col_name.start_with(StringRef(BeConsts::GLOBAL_ROWID_COL.data(),
416
0
                                              BeConsts::GLOBAL_ROWID_COL.size()))) {
417
0
                return true;
418
0
            }
419
0
        }
420
0
        return false;
421
0
    }
422
423
112
    const std::vector<const TabletIndex*> inverted_indexes() const {
424
112
        std::vector<const TabletIndex*> inverted_indexes;
425
1.25k
        for (const auto& index : _indexes) {
426
1.25k
            if (index->index_type() == IndexType::INVERTED) {
427
1.24k
                inverted_indexes.emplace_back(index.get());
428
1.24k
            }
429
1.25k
        }
430
112
        return inverted_indexes;
431
112
    }
432
11.1k
    bool has_inverted_index() const {
433
11.1k
        for (const auto& index : _indexes) {
434
699
            DBUG_EXECUTE_IF("tablet_schema::has_inverted_index", {
435
699
                if (index->col_unique_ids().empty()) {
436
699
                    throw Exception(Status::InternalError("col unique ids cannot be empty"));
437
699
                }
438
699
            });
439
440
699
            if (index->index_type() == IndexType::INVERTED) {
441
                //if index_id == -1, ignore it.
442
699
                if (!index->col_unique_ids().empty() && index->col_unique_ids()[0] >= 0) {
443
699
                    return true;
444
699
                }
445
699
            }
446
699
        }
447
10.4k
        return false;
448
11.1k
    }
449
    bool has_inverted_index_with_index_id(int64_t index_id) const;
450
    // Check whether this column supports inverted index
451
    // Some columns (Float, Double, JSONB ...) from the variant do not support index, but they are listed in TabletIndex.
452
    const TabletIndex* inverted_index(const TabletColumn& col) const;
453
454
    // Regardless of whether this column supports inverted index
455
    // TabletIndex information will be returned as long as it exists.
456
    const TabletIndex* inverted_index(int32_t col_unique_id,
457
                                      const std::string& suffix_path = "") const;
458
    bool has_ngram_bf_index(int32_t col_unique_id) const;
459
    const TabletIndex* get_ngram_bf_index(int32_t col_unique_id) const;
460
    void update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>& indexes);
461
    // If schema version is not set, it should be -1
462
1.54k
    int32_t schema_version() const { return _schema_version; }
463
    void clear_columns();
464
    vectorized::Block create_block(
465
            const std::vector<uint32_t>& return_columns,
466
            const std::unordered_set<uint32_t>* tablet_columns_need_convert_null = nullptr) const;
467
    vectorized::Block create_block(bool ignore_dropped_col = true) const;
468
0
    void set_schema_version(int32_t version) { _schema_version = version; }
469
0
    void set_auto_increment_column(const std::string& auto_increment_column) {
470
0
        _auto_increment_column = auto_increment_column;
471
0
    }
472
0
    std::string auto_increment_column() const { return _auto_increment_column; }
473
474
28
    void set_table_id(int64_t table_id) { _table_id = table_id; }
475
465
    int64_t table_id() const { return _table_id; }
476
28
    void set_db_id(int64_t db_id) { _db_id = db_id; }
477
0
    int64_t db_id() const { return _db_id; }
478
    void build_current_tablet_schema(int64_t index_id, int32_t version,
479
                                     const OlapTableIndexSchema* index,
480
                                     const TabletSchema& out_tablet_schema);
481
482
    // Merge columns that not exit in current schema, these column is dropped in current schema
483
    // but they are useful in some cases. For example,
484
    // 1. origin schema is  ColA, ColB
485
    // 2. insert values     1, 2
486
    // 3. delete where ColB = 2
487
    // 4. drop ColB
488
    // 5. insert values  3
489
    // 6. add column ColB, although it is name ColB, but it is different with previous ColB, the new ColB we name could call ColB'
490
    // 7. insert value  4, 5
491
    // Then the read schema should be ColA, ColB, ColB' because the delete predicate need ColB to remove related data.
492
    // Because they have same name, so that the dropped column should not be added to the map, only with unique id.
493
    void merge_dropped_columns(const TabletSchema& src_schema);
494
495
    bool is_dropped_column(const TabletColumn& col) const;
496
497
    // copy extracted columns from src_schema
498
    void copy_extracted_columns(const TabletSchema& src_schema);
499
500
    // only reserve extracted columns
501
    void reserve_extracted_columns();
502
503
4.04k
    std::string get_all_field_names() const {
504
4.04k
        std::string str = "[";
505
4.04k
        for (auto p : _field_name_to_index) {
506
4.04k
            if (str.size() > 1) {
507
0
                str += ", ";
508
0
            }
509
4.04k
            str += p.first.to_string() + "(" + std::to_string(_cols[p.second]->unique_id()) + ")";
510
4.04k
        }
511
4.04k
        str += "]";
512
4.04k
        return str;
513
4.04k
    }
514
515
    // Dump [(name, type, is_nullable), ...]
516
0
    std::string dump_structure() const {
517
0
        std::string str = "[";
518
0
        for (auto p : _cols) {
519
0
            if (str.size() > 1) {
520
0
                str += ", ";
521
0
            }
522
0
            str += "(";
523
0
            str += p->name();
524
0
            str += ", ";
525
0
            str += TabletColumn::get_string_by_field_type(p->type());
526
0
            str += ", ";
527
0
            str += "is_nullable:";
528
0
            str += (p->is_nullable() ? "true" : "false");
529
0
            str += ")";
530
0
        }
531
0
        str += "]";
532
0
        return str;
533
0
    }
534
535
1
    std::string dump_full_schema() const {
536
1
        std::string str = "[";
537
4
        for (auto p : _cols) {
538
4
            if (str.size() > 1) {
539
3
                str += ", ";
540
3
            }
541
4
            ColumnPB col_pb;
542
4
            p->to_schema_pb(&col_pb);
543
4
            str += "(";
544
4
            str += col_pb.ShortDebugString();
545
4
            str += ")";
546
4
        }
547
1
        str += "]";
548
1
        return str;
549
1
    }
550
551
    vectorized::Block create_block_by_cids(const std::vector<uint32_t>& cids) const;
552
553
    std::shared_ptr<TabletSchema> copy_without_variant_extracted_columns();
554
9.21k
    InvertedIndexStorageFormatPB get_inverted_index_storage_format() const {
555
9.21k
        return _inverted_index_storage_format;
556
9.21k
    }
557
558
    void update_tablet_columns(const TabletSchema& tablet_schema,
559
                               const std::vector<TColumn>& t_columns);
560
561
0
    const std::vector<int32_t>& row_columns_uids() const { return _row_store_column_unique_ids; }
562
563
    int64_t get_metadata_size() const override;
564
565
private:
566
    friend bool operator==(const TabletSchema& a, const TabletSchema& b);
567
    friend bool operator!=(const TabletSchema& a, const TabletSchema& b);
568
0
    TabletSchema(const TabletSchema&) = default;
569
570
    KeysType _keys_type = DUP_KEYS;
571
    SortType _sort_type = SortType::LEXICAL;
572
    size_t _sort_col_num = 0;
573
    std::vector<TabletColumnPtr> _cols;
574
575
    std::vector<TabletIndexPtr> _indexes;
576
    std::unordered_map<StringRef, int32_t, StringRefHash> _field_name_to_index;
577
    std::unordered_map<int32_t, int32_t> _field_uniqueid_to_index;
578
    std::unordered_map<vectorized::PathInDataRef, int32_t, vectorized::PathInDataRef::Hash>
579
            _field_path_to_index;
580
581
    // index_type/col_unique_id/suffix -> idx in _indexes
582
    using IndexKey = std::tuple<IndexType, int32_t, std::string>;
583
    struct IndexKeyHash {
584
49.6k
        size_t operator()(const IndexKey& t) const {
585
49.6k
            uint32_t seed = 0;
586
49.6k
            seed = doris::HashUtil::hash((const char*)&std::get<0>(t), sizeof(std::get<0>(t)),
587
49.6k
                                         seed);
588
49.6k
            seed = doris::HashUtil::hash((const char*)&std::get<1>(t), sizeof(std::get<1>(t)),
589
49.6k
                                         seed);
590
49.6k
            seed = doris::HashUtil::hash((const char*)std::get<2>(t).c_str(),
591
49.6k
                                         static_cast<uint32_t>(std::get<2>(t).size()), seed);
592
49.6k
            return seed;
593
49.6k
        }
594
    };
595
    std::unordered_map<IndexKey, int32_t, IndexKeyHash> _col_id_suffix_to_index;
596
597
    int32_t _num_columns = 0;
598
    size_t _num_variant_columns = 0;
599
    size_t _num_key_columns = 0;
600
    std::vector<uint32_t> _cluster_key_uids;
601
    size_t _num_null_columns = 0;
602
    size_t _num_short_key_columns = 0;
603
    size_t _num_rows_per_row_block = 0;
604
    CompressKind _compress_kind = COMPRESS_NONE;
605
    segment_v2::CompressionTypePB _compression_type = segment_v2::CompressionTypePB::LZ4F;
606
    long _row_store_page_size = segment_v2::ROW_STORE_PAGE_SIZE_DEFAULT_VALUE;
607
    long _storage_page_size = segment_v2::STORAGE_PAGE_SIZE_DEFAULT_VALUE;
608
    long _storage_dict_page_size = segment_v2::STORAGE_DICT_PAGE_SIZE_DEFAULT_VALUE;
609
    size_t _next_column_unique_id = 0;
610
    std::string _auto_increment_column;
611
612
    bool _has_bf_fpp = false;
613
    double _bf_fpp = 0;
614
    bool _is_in_memory = false;
615
    int32_t _delete_sign_idx = -1;
616
    int32_t _sequence_col_idx = -1;
617
    int32_t _version_col_idx = -1;
618
    int32_t _skip_bitmap_col_idx = -1;
619
    int32_t _schema_version = -1;
620
    int64_t _table_id = -1;
621
    int64_t _db_id = -1;
622
    bool _disable_auto_compaction = false;
623
    bool _enable_single_replica_compaction = false;
624
    bool _store_row_column = false;
625
    bool _skip_write_index_on_load = false;
626
    InvertedIndexStorageFormatPB _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1;
627
628
    // Contains column ids of which columns should be encoded into row store.
629
    // ATTN: For compability reason empty cids means all columns of tablet schema are encoded to row column
630
    std::vector<int32_t> _row_store_column_unique_ids;
631
    bool _enable_variant_flatten_nested = false;
632
};
633
634
bool operator==(const TabletSchema& a, const TabletSchema& b);
635
bool operator!=(const TabletSchema& a, const TabletSchema& b);
636
637
using TabletSchemaSPtr = std::shared_ptr<TabletSchema>;
638
639
#include "common/compile_check_end.h"
640
} // namespace doris