Coverage Report

Created: 2026-04-01 07:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/tablet/tablet_schema.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "storage/tablet/tablet_schema.h"
19
20
#include <gen_cpp/Descriptors_types.h>
21
#include <gen_cpp/olap_file.pb.h>
22
#include <glog/logging.h>
23
#include <google/protobuf/io/coded_stream.h>
24
#include <google/protobuf/io/zero_copy_stream.h>
25
#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
26
27
#include <algorithm>
28
#include <cctype>
29
// IWYU pragma: no_include <bits/std_abs.h>
30
#include <cmath> // IWYU pragma: keep
31
#include <memory>
32
#include <ostream>
33
#include <vector>
34
35
#include "common/compiler_util.h" // IWYU pragma: keep
36
#include "common/consts.h"
37
#include "common/status.h"
38
#include "core/block/block.h"
39
#include "core/column/column_nothing.h"
40
#include "core/data_type/data_type.h"
41
#include "core/data_type/data_type_factory.hpp"
42
#include "core/string_ref.h"
43
#include "exec/common/hex.h"
44
#include "exprs/aggregate/aggregate_function_simple_factory.h"
45
#include "exprs/aggregate/aggregate_function_state_union.h"
46
#include "storage/index/inverted/analyzer/analyzer.h"
47
#include "storage/index/inverted/inverted_index_parser.h"
48
#include "storage/olap_common.h"
49
#include "storage/olap_define.h"
50
#include "storage/tablet/tablet_column_object_pool.h"
51
#include "storage/tablet/tablet_meta.h"
52
#include "storage/tablet_info.h"
53
#include "storage/types.h"
54
#include "storage/utils.h"
55
#include "util/json/path_in_data.h"
56
57
namespace doris {
58
#include "common/compile_check_begin.h"
59
4.31M
FieldType TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) {
60
4.31M
    switch (primitiveType) {
61
0
    case PrimitiveType::INVALID_TYPE:
62
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
63
0
    case PrimitiveType::TYPE_NULL:
64
0
        return FieldType::OLAP_FIELD_TYPE_NONE;
65
271k
    case PrimitiveType::TYPE_BOOLEAN:
66
271k
        return FieldType::OLAP_FIELD_TYPE_BOOL;
67
44
    case PrimitiveType::TYPE_TINYINT:
68
44
        return FieldType::OLAP_FIELD_TYPE_TINYINT;
69
53
    case PrimitiveType::TYPE_SMALLINT:
70
53
        return FieldType::OLAP_FIELD_TYPE_SMALLINT;
71
134k
    case PrimitiveType::TYPE_INT:
72
134k
        return FieldType::OLAP_FIELD_TYPE_INT;
73
2.17M
    case PrimitiveType::TYPE_BIGINT:
74
2.17M
        return FieldType::OLAP_FIELD_TYPE_BIGINT;
75
90.1k
    case PrimitiveType::TYPE_LARGEINT:
76
90.1k
        return FieldType::OLAP_FIELD_TYPE_LARGEINT;
77
69
    case PrimitiveType::TYPE_FLOAT:
78
69
        return FieldType::OLAP_FIELD_TYPE_FLOAT;
79
1.41M
    case PrimitiveType::TYPE_DOUBLE:
80
1.41M
        return FieldType::OLAP_FIELD_TYPE_DOUBLE;
81
0
    case PrimitiveType::TYPE_VARCHAR:
82
0
        return FieldType::OLAP_FIELD_TYPE_VARCHAR;
83
0
    case PrimitiveType::TYPE_DATE:
84
0
        return FieldType::OLAP_FIELD_TYPE_DATE;
85
0
    case PrimitiveType::TYPE_DATETIME:
86
0
        return FieldType::OLAP_FIELD_TYPE_DATETIME;
87
0
    case PrimitiveType::TYPE_BINARY:
88
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
89
0
    case PrimitiveType::TYPE_CHAR:
90
0
        return FieldType::OLAP_FIELD_TYPE_CHAR;
91
0
    case PrimitiveType::TYPE_STRUCT:
92
0
        return FieldType::OLAP_FIELD_TYPE_STRUCT;
93
0
    case PrimitiveType::TYPE_ARRAY:
94
0
        return FieldType::OLAP_FIELD_TYPE_ARRAY;
95
0
    case PrimitiveType::TYPE_MAP:
96
0
        return FieldType::OLAP_FIELD_TYPE_MAP;
97
0
    case PrimitiveType::TYPE_HLL:
98
0
        return FieldType::OLAP_FIELD_TYPE_HLL;
99
0
    case PrimitiveType::TYPE_DECIMALV2:
100
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
101
0
    case PrimitiveType::TYPE_BITMAP:
102
0
        return FieldType::OLAP_FIELD_TYPE_BITMAP;
103
0
    case PrimitiveType::TYPE_STRING:
104
0
        return FieldType::OLAP_FIELD_TYPE_STRING;
105
0
    case PrimitiveType::TYPE_QUANTILE_STATE:
106
0
        return FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
107
0
    case PrimitiveType::TYPE_DATEV2:
108
0
        return FieldType::OLAP_FIELD_TYPE_DATEV2;
109
0
    case PrimitiveType::TYPE_DATETIMEV2:
110
0
        return FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
111
0
    case PrimitiveType::TYPE_TIMESTAMPTZ:
112
0
        return FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ;
113
0
    case PrimitiveType::TYPE_TIMEV2:
114
0
        return FieldType::OLAP_FIELD_TYPE_TIMEV2;
115
78
    case PrimitiveType::TYPE_DECIMAL32:
116
78
        return FieldType::OLAP_FIELD_TYPE_DECIMAL32;
117
82
    case PrimitiveType::TYPE_DECIMAL64:
118
82
        return FieldType::OLAP_FIELD_TYPE_DECIMAL64;
119
270k
    case PrimitiveType::TYPE_DECIMAL128I:
120
270k
        return FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
121
90
    case PrimitiveType::TYPE_DECIMAL256:
122
90
        return FieldType::OLAP_FIELD_TYPE_DECIMAL256;
123
0
    case PrimitiveType::TYPE_JSONB:
124
0
        return FieldType::OLAP_FIELD_TYPE_JSONB;
125
0
    case PrimitiveType::TYPE_VARIANT:
126
0
        return FieldType::OLAP_FIELD_TYPE_VARIANT;
127
0
    case PrimitiveType::TYPE_IPV4:
128
0
        return FieldType::OLAP_FIELD_TYPE_IPV4;
129
0
    case PrimitiveType::TYPE_IPV6:
130
0
        return FieldType::OLAP_FIELD_TYPE_IPV6;
131
0
    case PrimitiveType::TYPE_LAMBDA_FUNCTION:
132
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
133
0
    case PrimitiveType::TYPE_AGG_STATE:
134
0
        return FieldType::OLAP_FIELD_TYPE_AGG_STATE;
135
0
    default:
136
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
137
4.31M
    }
138
4.31M
}
139
140
19.2M
PrimitiveType TabletColumn::get_primitive_type_by_field_type(FieldType type) {
141
19.2M
    static const PrimitiveType mapping[] = {
142
19.2M
            /*  0 */ PrimitiveType::INVALID_TYPE,
143
19.2M
            /*  1 OLAP_FIELD_TYPE_TINYINT           */ PrimitiveType::TYPE_TINYINT,
144
19.2M
            /*  2 OLAP_FIELD_TYPE_UNSIGNED_TINYINT  */ PrimitiveType::INVALID_TYPE,
145
19.2M
            /*  3 OLAP_FIELD_TYPE_SMALLINT          */ PrimitiveType::TYPE_SMALLINT,
146
19.2M
            /*  4 OLAP_FIELD_TYPE_UNSIGNED_SMALLINT */ PrimitiveType::INVALID_TYPE,
147
19.2M
            /*  5 OLAP_FIELD_TYPE_INT               */ PrimitiveType::TYPE_INT,
148
19.2M
            /*  6 OLAP_FIELD_TYPE_UNSIGNED_INT      */ PrimitiveType::INVALID_TYPE,
149
19.2M
            /*  7 OLAP_FIELD_TYPE_BIGINT            */ PrimitiveType::TYPE_BIGINT,
150
19.2M
            /*  8 OLAP_FIELD_TYPE_UNSIGNED_BIGINT   */ PrimitiveType::INVALID_TYPE,
151
19.2M
            /*  9 OLAP_FIELD_TYPE_LARGEINT          */ PrimitiveType::TYPE_LARGEINT,
152
19.2M
            /* 10 OLAP_FIELD_TYPE_FLOAT             */ PrimitiveType::TYPE_FLOAT,
153
19.2M
            /* 11 OLAP_FIELD_TYPE_DOUBLE            */ PrimitiveType::TYPE_DOUBLE,
154
19.2M
            /* 12 OLAP_FIELD_TYPE_DISCRETE_DOUBLE   */ PrimitiveType::INVALID_TYPE,
155
19.2M
            /* 13 OLAP_FIELD_TYPE_CHAR              */ PrimitiveType::TYPE_CHAR,
156
19.2M
            /* 14 OLAP_FIELD_TYPE_DATE              */ PrimitiveType::TYPE_DATE,
157
19.2M
            /* 15 OLAP_FIELD_TYPE_DATETIME          */ PrimitiveType::TYPE_DATETIME,
158
19.2M
            /* 16 OLAP_FIELD_TYPE_DECIMAL           */ PrimitiveType::INVALID_TYPE,
159
19.2M
            /* 17 OLAP_FIELD_TYPE_VARCHAR           */ PrimitiveType::TYPE_VARCHAR,
160
19.2M
            /* 18 OLAP_FIELD_TYPE_STRUCT            */ PrimitiveType::TYPE_STRUCT,
161
19.2M
            /* 19 OLAP_FIELD_TYPE_ARRAY             */ PrimitiveType::TYPE_ARRAY,
162
19.2M
            /* 20 OLAP_FIELD_TYPE_MAP               */ PrimitiveType::TYPE_MAP,
163
19.2M
            /* 21 OLAP_FIELD_TYPE_UNKNOWN           */ PrimitiveType::INVALID_TYPE,
164
19.2M
            /* 22 OLAP_FIELD_TYPE_NONE              */ PrimitiveType::TYPE_NULL,
165
19.2M
            /* 23 OLAP_FIELD_TYPE_HLL               */ PrimitiveType::TYPE_HLL,
166
19.2M
            /* 24 OLAP_FIELD_TYPE_BOOL              */ PrimitiveType::TYPE_BOOLEAN,
167
19.2M
            /* 25 OLAP_FIELD_TYPE_BITMAP            */ PrimitiveType::TYPE_BITMAP,
168
19.2M
            /* 26 OLAP_FIELD_TYPE_STRING            */ PrimitiveType::TYPE_STRING,
169
19.2M
            /* 27 OLAP_FIELD_TYPE_QUANTILE_STATE    */ PrimitiveType::TYPE_QUANTILE_STATE,
170
19.2M
            /* 28 OLAP_FIELD_TYPE_DATEV2            */ PrimitiveType::TYPE_DATEV2,
171
19.2M
            /* 29 OLAP_FIELD_TYPE_DATETIMEV2        */ PrimitiveType::TYPE_DATETIMEV2,
172
19.2M
            /* 30 OLAP_FIELD_TYPE_TIMEV2            */ PrimitiveType::TYPE_TIMEV2,
173
19.2M
            /* 31 OLAP_FIELD_TYPE_DECIMAL32         */ PrimitiveType::TYPE_DECIMAL32,
174
19.2M
            /* 32 OLAP_FIELD_TYPE_DECIMAL64         */ PrimitiveType::TYPE_DECIMAL64,
175
19.2M
            /* 33 OLAP_FIELD_TYPE_DECIMAL128I       */ PrimitiveType::TYPE_DECIMAL128I,
176
19.2M
            /* 34 OLAP_FIELD_TYPE_JSONB             */ PrimitiveType::TYPE_JSONB,
177
19.2M
            /* 35 OLAP_FIELD_TYPE_VARIANT           */ PrimitiveType::TYPE_VARIANT,
178
19.2M
            /* 36 OLAP_FIELD_TYPE_AGG_STATE         */ PrimitiveType::TYPE_AGG_STATE,
179
19.2M
            /* 37 OLAP_FIELD_TYPE_DECIMAL256        */ PrimitiveType::TYPE_DECIMAL256,
180
19.2M
            /* 38 OLAP_FIELD_TYPE_IPV4              */ PrimitiveType::TYPE_IPV4,
181
19.2M
            /* 39 OLAP_FIELD_TYPE_IPV6              */ PrimitiveType::TYPE_IPV6,
182
19.2M
            /* 40 OLAP_FIELD_TYPE_TIMESTAMPTZ       */ PrimitiveType::TYPE_TIMESTAMPTZ,
183
19.2M
    };
184
185
19.2M
    int idx = static_cast<int>(type);
186
19.2M
    return mapping[idx];
187
19.2M
}
188
189
20.9M
FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) {
190
20.9M
    std::string upper_type_str = type_str;
191
20.9M
    std::transform(type_str.begin(), type_str.end(), upper_type_str.begin(),
192
131M
                   [](auto c) { return std::toupper(c); });
193
20.9M
    FieldType type;
194
195
20.9M
    if (0 == upper_type_str.compare("TINYINT")) {
196
1.28M
        type = FieldType::OLAP_FIELD_TYPE_TINYINT;
197
19.6M
    } else if (0 == upper_type_str.compare("SMALLINT")) {
198
483k
        type = FieldType::OLAP_FIELD_TYPE_SMALLINT;
199
19.1M
    } else if (0 == upper_type_str.compare("INT")) {
200
2.75M
        type = FieldType::OLAP_FIELD_TYPE_INT;
201
16.3M
    } else if (0 == upper_type_str.compare("BIGINT")) {
202
2.07M
        type = FieldType::OLAP_FIELD_TYPE_BIGINT;
203
14.3M
    } else if (0 == upper_type_str.compare("LARGEINT")) {
204
536k
        type = FieldType::OLAP_FIELD_TYPE_LARGEINT;
205
13.7M
    } else if (0 == upper_type_str.compare("UNSIGNED_TINYINT")) {
206
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT;
207
13.7M
    } else if (0 == upper_type_str.compare("UNSIGNED_SMALLINT")) {
208
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT;
209
13.7M
    } else if (0 == upper_type_str.compare("UNSIGNED_INT")) {
210
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT;
211
13.7M
    } else if (0 == upper_type_str.compare("UNSIGNED_BIGINT")) {
212
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT;
213
13.7M
    } else if (0 == upper_type_str.compare("IPV4")) {
214
31.3k
        type = FieldType::OLAP_FIELD_TYPE_IPV4;
215
13.7M
    } else if (0 == upper_type_str.compare("IPV6")) {
216
31.6k
        type = FieldType::OLAP_FIELD_TYPE_IPV6;
217
13.7M
    } else if (0 == upper_type_str.compare("FLOAT")) {
218
408k
        type = FieldType::OLAP_FIELD_TYPE_FLOAT;
219
13.3M
    } else if (0 == upper_type_str.compare("DISCRETE_DOUBLE")) {
220
0
        type = FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE;
221
13.3M
    } else if (0 == upper_type_str.compare("DOUBLE")) {
222
557k
        type = FieldType::OLAP_FIELD_TYPE_DOUBLE;
223
12.7M
    } else if (0 == upper_type_str.compare("CHAR")) {
224
546k
        type = FieldType::OLAP_FIELD_TYPE_CHAR;
225
12.2M
    } else if (0 == upper_type_str.compare("DATE")) {
226
4.65k
        type = FieldType::OLAP_FIELD_TYPE_DATE;
227
12.2M
    } else if (0 == upper_type_str.compare("DATEV2")) {
228
1.49M
        type = FieldType::OLAP_FIELD_TYPE_DATEV2;
229
10.7M
    } else if (0 == upper_type_str.compare("DATETIMEV2")) {
230
1.14M
        type = FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
231
9.56M
    } else if (0 == upper_type_str.compare("DATETIME")) {
232
8.44k
        type = FieldType::OLAP_FIELD_TYPE_DATETIME;
233
9.55M
    } else if (0 == upper_type_str.compare("TIMESTAMPTZ")) {
234
80.4k
        type = FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ;
235
9.47M
    } else if (0 == upper_type_str.compare("DECIMAL32")) {
236
380k
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL32;
237
9.09M
    } else if (0 == upper_type_str.compare("DECIMAL64")) {
238
777k
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL64;
239
8.31M
    } else if (0 == upper_type_str.compare("DECIMAL128I")) {
240
566k
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
241
7.75M
    } else if (0 == upper_type_str.compare("DECIMAL256")) {
242
70.7k
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL256;
243
7.67M
    } else if (0 == upper_type_str.compare(0, 7, "DECIMAL")) {
244
24.5k
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL;
245
7.65M
    } else if (0 == upper_type_str.compare(0, 7, "VARCHAR")) {
246
3.75M
        type = FieldType::OLAP_FIELD_TYPE_VARCHAR;
247
3.89M
    } else if (0 == upper_type_str.compare("STRING")) {
248
841k
        type = FieldType::OLAP_FIELD_TYPE_STRING;
249
3.05M
    } else if (0 == upper_type_str.compare("JSONB")) {
250
205k
        type = FieldType::OLAP_FIELD_TYPE_JSONB;
251
2.85M
    } else if (0 == upper_type_str.compare("VARIANT")) {
252
65.4k
        type = FieldType::OLAP_FIELD_TYPE_VARIANT;
253
2.78M
    } else if (0 == upper_type_str.compare("BOOLEAN")) {
254
390k
        type = FieldType::OLAP_FIELD_TYPE_BOOL;
255
2.39M
    } else if (0 == upper_type_str.compare(0, 3, "HLL")) {
256
50.7k
        type = FieldType::OLAP_FIELD_TYPE_HLL;
257
2.34M
    } else if (0 == upper_type_str.compare("STRUCT")) {
258
76.7k
        type = FieldType::OLAP_FIELD_TYPE_STRUCT;
259
2.26M
    } else if (0 == upper_type_str.compare("LIST")) {
260
0
        type = FieldType::OLAP_FIELD_TYPE_ARRAY;
261
2.26M
    } else if (0 == upper_type_str.compare("MAP")) {
262
722k
        type = FieldType::OLAP_FIELD_TYPE_MAP;
263
1.54M
    } else if (0 == upper_type_str.compare("OBJECT")) {
264
41.3k
        type = FieldType::OLAP_FIELD_TYPE_BITMAP;
265
1.50M
    } else if (0 == upper_type_str.compare("BITMAP")) {
266
22.6k
        type = FieldType::OLAP_FIELD_TYPE_BITMAP;
267
1.49M
    } else if (0 == upper_type_str.compare("ARRAY")) {
268
1.49M
        type = FieldType::OLAP_FIELD_TYPE_ARRAY;
269
18.4E
    } else if (0 == upper_type_str.compare("QUANTILE_STATE")) {
270
45.1k
        type = FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
271
18.4E
    } else if (0 == upper_type_str.compare("AGG_STATE")) {
272
24.9k
        type = FieldType::OLAP_FIELD_TYPE_AGG_STATE;
273
18.4E
    } else {
274
18.4E
        LOG(WARNING) << "invalid type string. [type='" << type_str << "']";
275
18.4E
        type = FieldType::OLAP_FIELD_TYPE_UNKNOWN;
276
18.4E
    }
277
278
20.9M
    return type;
279
20.9M
}
280
281
20.3M
FieldAggregationMethod TabletColumn::get_aggregation_type_by_string(const std::string& str) {
282
20.3M
    std::string upper_str = str;
283
20.3M
    std::transform(str.begin(), str.end(), upper_str.begin(),
284
86.2M
                   [](auto c) { return std::toupper(c); });
285
20.3M
    FieldAggregationMethod aggregation_type;
286
287
20.3M
    if (0 == upper_str.compare("NONE")) {
288
18.8M
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE;
289
18.8M
    } else if (0 == upper_str.compare("SUM")) {
290
514k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM;
291
989k
    } else if (0 == upper_str.compare("MIN")) {
292
14.2k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN;
293
975k
    } else if (0 == upper_str.compare("MAX")) {
294
40.2k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX;
295
934k
    } else if (0 == upper_str.compare("REPLACE")) {
296
747k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE;
297
747k
    } else if (0 == upper_str.compare("REPLACE_IF_NOT_NULL")) {
298
110k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL;
299
110k
    } else if (0 == upper_str.compare("HLL_UNION")) {
300
48.9k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION;
301
55.1k
    } else if (0 == upper_str.compare("BITMAP_UNION")) {
302
55.1k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION;
303
18.4E
    } else if (0 == upper_str.compare("QUANTILE_UNION")) {
304
44.7k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION;
305
18.4E
    } else if (!upper_str.empty()) {
306
11.3k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC;
307
18.4E
    } else {
308
18.4E
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN;
309
18.4E
    }
310
311
20.3M
    return aggregation_type;
312
20.3M
}
313
314
28.1M
std::string TabletColumn::get_string_by_field_type(FieldType type) {
315
28.1M
    switch (type) {
316
1.51M
    case FieldType::OLAP_FIELD_TYPE_TINYINT:
317
1.51M
        return "TINYINT";
318
319
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT:
320
0
        return "UNSIGNED_TINYINT";
321
322
765k
    case FieldType::OLAP_FIELD_TYPE_SMALLINT:
323
765k
        return "SMALLINT";
324
325
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT:
326
0
        return "UNSIGNED_SMALLINT";
327
328
3.54M
    case FieldType::OLAP_FIELD_TYPE_INT:
329
3.54M
        return "INT";
330
331
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT:
332
0
        return "UNSIGNED_INT";
333
334
2.82M
    case FieldType::OLAP_FIELD_TYPE_BIGINT:
335
2.82M
        return "BIGINT";
336
337
911k
    case FieldType::OLAP_FIELD_TYPE_LARGEINT:
338
911k
        return "LARGEINT";
339
340
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT:
341
0
        return "UNSIGNED_BIGINT";
342
343
27.1k
    case FieldType::OLAP_FIELD_TYPE_IPV4:
344
27.1k
        return "IPV4";
345
346
27.9k
    case FieldType::OLAP_FIELD_TYPE_IPV6:
347
27.9k
        return "IPV6";
348
349
659k
    case FieldType::OLAP_FIELD_TYPE_FLOAT:
350
659k
        return "FLOAT";
351
352
759k
    case FieldType::OLAP_FIELD_TYPE_DOUBLE:
353
759k
        return "DOUBLE";
354
355
0
    case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE:
356
0
        return "DISCRETE_DOUBLE";
357
358
771k
    case FieldType::OLAP_FIELD_TYPE_CHAR:
359
771k
        return "CHAR";
360
361
5.98k
    case FieldType::OLAP_FIELD_TYPE_DATE:
362
5.98k
        return "DATE";
363
364
2.06M
    case FieldType::OLAP_FIELD_TYPE_DATEV2:
365
2.06M
        return "DATEV2";
366
367
9.90k
    case FieldType::OLAP_FIELD_TYPE_DATETIME:
368
9.90k
        return "DATETIME";
369
370
1.77M
    case FieldType::OLAP_FIELD_TYPE_DATETIMEV2:
371
1.77M
        return "DATETIMEV2";
372
373
206k
    case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
374
206k
        return "TIMESTAMPTZ";
375
376
19.5k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL:
377
19.5k
        return "DECIMAL";
378
379
607k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
380
607k
        return "DECIMAL32";
381
382
776k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
383
776k
        return "DECIMAL64";
384
385
871k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
386
871k
        return "DECIMAL128I";
387
388
66.6k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
389
66.6k
        return "DECIMAL256";
390
391
4.66M
    case FieldType::OLAP_FIELD_TYPE_VARCHAR:
392
4.66M
        return "VARCHAR";
393
394
434k
    case FieldType::OLAP_FIELD_TYPE_JSONB:
395
434k
        return "JSONB";
396
397
95.2k
    case FieldType::OLAP_FIELD_TYPE_VARIANT:
398
95.2k
        return "VARIANT";
399
400
1.33M
    case FieldType::OLAP_FIELD_TYPE_STRING:
401
1.33M
        return "STRING";
402
403
632k
    case FieldType::OLAP_FIELD_TYPE_BOOL:
404
632k
        return "BOOLEAN";
405
406
105k
    case FieldType::OLAP_FIELD_TYPE_HLL:
407
105k
        return "HLL";
408
409
91.5k
    case FieldType::OLAP_FIELD_TYPE_STRUCT:
410
91.5k
        return "STRUCT";
411
412
1.83M
    case FieldType::OLAP_FIELD_TYPE_ARRAY:
413
1.83M
        return "ARRAY";
414
415
523k
    case FieldType::OLAP_FIELD_TYPE_MAP:
416
523k
        return "MAP";
417
418
124k
    case FieldType::OLAP_FIELD_TYPE_BITMAP:
419
124k
        return "OBJECT";
420
96.2k
    case FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE:
421
96.2k
        return "QUANTILE_STATE";
422
13.2k
    case FieldType::OLAP_FIELD_TYPE_AGG_STATE:
423
13.2k
        return "AGG_STATE";
424
0
    default:
425
0
        return "UNKNOWN";
426
28.1M
    }
427
28.1M
}
428
429
200k
std::string TabletColumn::get_string_by_aggregation_type(FieldAggregationMethod type) {
430
200k
    switch (type) {
431
132k
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE:
432
132k
        return "NONE";
433
434
12.7k
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM:
435
12.7k
        return "SUM";
436
437
1.86k
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN:
438
1.86k
        return "MIN";
439
440
5.56k
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX:
441
5.56k
        return "MAX";
442
443
29.8k
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE:
444
29.8k
        return "REPLACE";
445
446
14.2k
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL:
447
14.2k
        return "REPLACE_IF_NOT_NULL";
448
449
1.24k
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION:
450
1.24k
        return "HLL_UNION";
451
452
1.65k
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION:
453
1.65k
        return "BITMAP_UNION";
454
455
966
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION:
456
966
        return "QUANTILE_UNION";
457
458
2
    default:
459
2
        return "UNKNOWN";
460
200k
    }
461
200k
}
462
463
6.56M
uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint32_t string_length) {
464
6.56M
    switch (type) {
465
507k
    case TPrimitiveType::TINYINT:
466
606k
    case TPrimitiveType::BOOLEAN:
467
606k
        return 1;
468
118k
    case TPrimitiveType::SMALLINT:
469
118k
        return 2;
470
728k
    case TPrimitiveType::INT:
471
728k
        return 4;
472
698k
    case TPrimitiveType::BIGINT:
473
698k
        return 8;
474
130k
    case TPrimitiveType::LARGEINT:
475
130k
        return 16;
476
13.5k
    case TPrimitiveType::IPV4:
477
13.5k
        return 4;
478
13.7k
    case TPrimitiveType::IPV6:
479
13.7k
        return 16;
480
1.91k
    case TPrimitiveType::DATE:
481
1.91k
        return 3;
482
387k
    case TPrimitiveType::DATEV2:
483
387k
        return 4;
484
2.72k
    case TPrimitiveType::DATETIME:
485
2.72k
        return 8;
486
324k
    case TPrimitiveType::DATETIMEV2:
487
345k
    case TPrimitiveType::TIMESTAMPTZ:
488
345k
        return 8;
489
102k
    case TPrimitiveType::FLOAT:
490
102k
        return 4;
491
180k
    case TPrimitiveType::DOUBLE:
492
180k
        return 8;
493
3.36k
    case TPrimitiveType::QUANTILE_STATE:
494
12.1k
    case TPrimitiveType::BITMAP:
495
12.1k
        return 16;
496
157k
    case TPrimitiveType::CHAR:
497
157k
        return string_length;
498
1.37M
    case TPrimitiveType::VARCHAR:
499
1.37M
    case TPrimitiveType::HLL:
500
1.37M
    case TPrimitiveType::AGG_STATE:
501
1.37M
        return string_length + sizeof(OLAP_VARCHAR_MAX_LENGTH);
502
263k
    case TPrimitiveType::STRING:
503
282k
    case TPrimitiveType::VARIANT:
504
282k
        return string_length + sizeof(OLAP_STRING_MAX_LENGTH);
505
30.5k
    case TPrimitiveType::JSONB:
506
30.5k
        return string_length + sizeof(OLAP_JSONB_MAX_LENGTH);
507
23.9k
    case TPrimitiveType::STRUCT:
508
        // Note that(xy): this is the length of struct type itself,
509
        // the length of its subtypes are not included.
510
23.9k
        return OLAP_STRUCT_MAX_LENGTH;
511
505k
    case TPrimitiveType::ARRAY:
512
505k
        return OLAP_ARRAY_MAX_LENGTH;
513
348k
    case TPrimitiveType::MAP:
514
348k
        return OLAP_MAP_MAX_LENGTH;
515
65.8k
    case TPrimitiveType::DECIMAL32:
516
65.8k
        return 4;
517
272k
    case TPrimitiveType::DECIMAL64:
518
272k
        return 8;
519
141k
    case TPrimitiveType::DECIMAL128I:
520
141k
        return 16;
521
22.7k
    case TPrimitiveType::DECIMAL256:
522
22.7k
        return 32;
523
13.1k
    case TPrimitiveType::DECIMALV2:
524
13.1k
        return 12; // use 12 bytes in olap engine.
525
0
    default:
526
0
        LOG(WARNING) << "unknown field type. [type=" << type << "]";
527
0
        return 0;
528
6.56M
    }
529
6.56M
}
530
531
17
bool TabletColumn::has_char_type() const {
532
17
    switch (_type) {
533
4
    case FieldType::OLAP_FIELD_TYPE_CHAR: {
534
4
        return true;
535
0
    }
536
4
    case FieldType::OLAP_FIELD_TYPE_ARRAY:
537
4
    case FieldType::OLAP_FIELD_TYPE_MAP:
538
4
    case FieldType::OLAP_FIELD_TYPE_STRUCT: {
539
4
        return std::any_of(_sub_columns.begin(), _sub_columns.end(),
540
4
                           [&](const auto& sub) -> bool { return sub->has_char_type(); });
541
4
    }
542
9
    default:
543
9
        return false;
544
17
    }
545
17
}
546
547
16.3M
TabletColumn::TabletColumn() : _aggregation(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) {}
548
549
37
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType type) {
550
37
    _aggregation = agg;
551
37
    _type = type;
552
37
}
553
554
17
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable) {
555
17
    _aggregation = agg;
556
17
    _type = filed_type;
557
17
    _length = cast_set<int32_t>(get_scalar_type_info(filed_type)->size());
558
17
    _is_nullable = is_nullable;
559
17
}
560
561
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable,
562
107k
                           int32_t unique_id, size_t length) {
563
107k
    _aggregation = agg;
564
107k
    _type = filed_type;
565
107k
    _is_nullable = is_nullable;
566
107k
    _unique_id = unique_id;
567
107k
    _length = cast_set<int32_t>(length);
568
107k
}
569
570
4.92k
TabletColumn::TabletColumn(const ColumnPB& column) {
571
4.92k
    init_from_pb(column);
572
4.92k
}
573
574
4.63M
TabletColumn::TabletColumn(const TColumn& column) {
575
4.63M
    init_from_thrift(column);
576
4.63M
}
577
578
5.23M
void TabletColumn::init_from_thrift(const TColumn& tcolumn) {
579
5.23M
    ColumnPB column_pb;
580
5.23M
    TabletMeta::init_column_from_tcolumn(tcolumn.col_unique_id, tcolumn, &column_pb);
581
5.23M
    init_from_pb(column_pb);
582
5.23M
}
583
584
20.3M
void TabletColumn::init_from_pb(const ColumnPB& column) {
585
20.3M
    _unique_id = column.unique_id();
586
20.3M
    _col_name = column.name();
587
20.3M
    _col_name_lower_case = to_lower(_col_name);
588
20.3M
    _type = TabletColumn::get_field_type_by_string(column.type());
589
20.3M
    _is_key = column.is_key();
590
20.3M
    _is_nullable = column.is_nullable();
591
20.3M
    _is_auto_increment = column.is_auto_increment();
592
20.3M
    if (column.has_is_on_update_current_timestamp()) {
593
16.3M
        _is_on_update_current_timestamp = column.is_on_update_current_timestamp();
594
16.3M
    }
595
596
20.3M
    _has_default_value = column.has_default_value();
597
20.3M
    if (_has_default_value) {
598
3.26M
        _default_value = column.default_value();
599
3.26M
    }
600
601
20.3M
    if (column.has_precision()) {
602
20.3M
        _is_decimal = true;
603
20.3M
        _precision = column.precision();
604
18.4E
    } else {
605
18.4E
        _is_decimal = false;
606
18.4E
    }
607
20.4M
    if (column.has_frac()) {
608
20.4M
        _frac = column.frac();
609
20.4M
    }
610
20.3M
    _length = column.length();
611
20.3M
    _index_length = column.index_length();
612
20.3M
    if (column.has_is_bf_column()) {
613
4.21M
        _is_bf_column = column.is_bf_column();
614
16.1M
    } else {
615
16.1M
        _is_bf_column = false;
616
16.1M
    }
617
20.4M
    if (column.has_aggregation()) {
618
20.4M
        _aggregation = get_aggregation_type_by_string(column.aggregation());
619
20.4M
        _aggregation_name = column.aggregation();
620
20.4M
    }
621
622
20.3M
    if (_type == FieldType::OLAP_FIELD_TYPE_AGG_STATE) {
623
12.3k
        _result_is_nullable = column.result_is_nullable();
624
12.3k
        _be_exec_version = column.be_exec_version();
625
12.3k
    }
626
627
20.4M
    if (column.has_visible()) {
628
20.4M
        _visible = column.visible();
629
20.4M
    }
630
20.3M
    if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
631
18.4E
        CHECK(column.children_columns_size() == 1)
632
18.4E
                << "ARRAY type should has 1 children types, but got "
633
18.4E
                << column.children_columns_size();
634
1.47M
    }
635
20.3M
    if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
636
18.4E
        DCHECK(column.children_columns_size() == 2)
637
18.4E
                << "MAP type should has 2 children types, but got "
638
18.4E
                << column.children_columns_size();
639
709k
        if (UNLIKELY(column.children_columns_size() != 2)) {
640
0
            LOG(WARNING) << "MAP type should has 2 children types, but got "
641
0
                         << column.children_columns_size();
642
0
        }
643
709k
    }
644
23.8M
    for (int i = 0; i < column.children_columns_size(); i++) {
645
3.45M
        TabletColumn child_column;
646
3.45M
        child_column.init_from_pb(column.children_columns(i));
647
3.45M
        add_sub_column(child_column);
648
3.45M
    }
649
20.3M
    if (column.has_column_path_info()) {
650
49.4k
        _column_path = std::make_shared<PathInData>();
651
49.4k
        _column_path->from_protobuf(column.column_path_info());
652
49.4k
        _parent_col_unique_id = column.column_path_info().parrent_column_unique_id();
653
49.4k
    }
654
20.3M
    if (is_variant_type() && !column.has_column_path_info()) {
655
        // set path info for variant root column, to prevent from missing
656
34.5k
        _column_path = std::make_shared<PathInData>(_col_name_lower_case);
657
        // _parent_col_unique_id = _unique_id;
658
34.5k
    }
659
20.3M
    if (column.has_variant_max_subcolumns_count()) {
660
16.3M
        _variant.max_subcolumns_count = column.variant_max_subcolumns_count();
661
16.3M
    }
662
20.3M
    if (column.has_variant_enable_typed_paths_to_sparse()) {
663
16.3M
        _variant.enable_typed_paths_to_sparse = column.variant_enable_typed_paths_to_sparse();
664
16.3M
    }
665
20.3M
    if (column.has_variant_max_sparse_column_statistics_size()) {
666
16.3M
        _variant.max_sparse_column_statistics_size =
667
16.3M
                column.variant_max_sparse_column_statistics_size();
668
16.3M
    }
669
20.3M
    if (column.has_variant_sparse_hash_shard_count()) {
670
15.0M
        _variant.sparse_hash_shard_count = column.variant_sparse_hash_shard_count();
671
15.0M
    }
672
20.3M
    if (column.has_variant_enable_doc_mode()) {
673
16.3M
        _variant.enable_doc_mode = column.variant_enable_doc_mode();
674
16.3M
    }
675
20.3M
    if (column.has_variant_doc_materialization_min_rows()) {
676
15.0M
        _variant.doc_materialization_min_rows = column.variant_doc_materialization_min_rows();
677
15.0M
    }
678
20.3M
    if (column.has_variant_doc_hash_shard_count()) {
679
15.0M
        _variant.doc_hash_shard_count = column.variant_doc_hash_shard_count();
680
15.0M
    }
681
20.3M
    if (column.has_variant_enable_nested_group()) {
682
15.0M
        _variant.enable_nested_group = column.variant_enable_nested_group();
683
15.0M
    }
684
20.3M
    if (column.has_pattern_type()) {
685
9.83M
        _pattern_type = column.pattern_type();
686
9.83M
    }
687
20.3M
}
688
689
TabletColumn TabletColumn::create_materialized_variant_column(const std::string& root,
690
                                                              const std::vector<std::string>& paths,
691
                                                              int32_t parent_unique_id,
692
                                                              int32_t max_subcolumns_count,
693
7.61k
                                                              bool enable_doc_mode) {
694
7.61k
    TabletColumn subcol;
695
7.61k
    subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
696
7.61k
    subcol.set_is_nullable(true);
697
7.61k
    subcol.set_unique_id(-1);
698
7.61k
    subcol.set_parent_unique_id(parent_unique_id);
699
7.61k
    PathInData path(root, paths);
700
7.61k
    subcol.set_path_info(path);
701
7.61k
    subcol.set_name(path.get_path());
702
7.61k
    subcol.set_variant_max_subcolumns_count(max_subcolumns_count);
703
7.61k
    subcol.set_variant_enable_doc_mode(enable_doc_mode);
704
7.61k
    return subcol;
705
7.61k
}
706
707
27.6M
void TabletColumn::to_schema_pb(ColumnPB* column) const {
708
27.6M
    column->set_unique_id(_unique_id);
709
27.6M
    column->set_name(_col_name);
710
27.6M
    column->set_type(get_string_by_field_type(_type));
711
27.6M
    column->set_is_key(_is_key);
712
27.6M
    column->set_is_nullable(_is_nullable);
713
27.6M
    column->set_is_auto_increment(_is_auto_increment);
714
27.6M
    column->set_is_on_update_current_timestamp(_is_on_update_current_timestamp);
715
27.6M
    if (_has_default_value) {
716
6.22M
        column->set_default_value(_default_value);
717
6.22M
    }
718
27.7M
    if (_is_decimal) {
719
27.7M
        column->set_precision(_precision);
720
27.7M
        column->set_frac(_frac);
721
27.7M
    }
722
27.6M
    column->set_length(_length);
723
27.6M
    column->set_index_length(_index_length);
724
27.6M
    if (_is_bf_column) {
725
265k
        column->set_is_bf_column(_is_bf_column);
726
265k
    }
727
27.8M
    if (!_aggregation_name.empty()) {
728
27.8M
        column->set_aggregation(_aggregation_name);
729
27.8M
    }
730
27.6M
    column->set_result_is_nullable(_result_is_nullable);
731
27.6M
    column->set_be_exec_version(_be_exec_version);
732
27.6M
    column->set_visible(_visible);
733
734
27.6M
    if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
735
18.4E
        CHECK(_sub_columns.size() == 1)
736
18.4E
                << "ARRAY type should has 1 children types, but got " << _sub_columns.size();
737
1.82M
    }
738
27.6M
    if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
739
18.4E
        DCHECK(_sub_columns.size() == 2)
740
18.4E
                << "MAP type should has 2 children types, but got " << _sub_columns.size();
741
513k
        if (UNLIKELY(_sub_columns.size() != 2)) {
742
0
            LOG(WARNING) << "MAP type should has 2 children types, but got " << _sub_columns.size();
743
0
        }
744
513k
    }
745
746
31.1M
    for (size_t i = 0; i < _sub_columns.size(); i++) {
747
3.49M
        ColumnPB* child = column->add_children_columns();
748
3.49M
        _sub_columns[i]->to_schema_pb(child);
749
3.49M
    }
750
751
    // set parts info
752
27.6M
    if (has_path_info()) {
753
        // CHECK_GT(_parent_col_unique_id, 0);
754
123k
        _column_path->to_protobuf(column->mutable_column_path_info(), _parent_col_unique_id);
755
        // Update unstable information for variant columns. Some of the fields in the tablet schema
756
        // are irrelevant for variant sub-columns, but retaining them may lead to an excessive growth
757
        // in the number of tablet schema cache entries.
758
123k
        if (_type == FieldType::OLAP_FIELD_TYPE_STRING) {
759
1.50k
            column->set_length(INT_MAX);
760
1.50k
        }
761
123k
        column->set_index_length(0);
762
123k
    }
763
27.6M
    column->set_variant_max_subcolumns_count(_variant.max_subcolumns_count);
764
27.6M
    column->set_pattern_type(_pattern_type);
765
27.6M
    column->set_variant_enable_typed_paths_to_sparse(_variant.enable_typed_paths_to_sparse);
766
27.6M
    column->set_variant_max_sparse_column_statistics_size(
767
27.6M
            _variant.max_sparse_column_statistics_size);
768
27.6M
    column->set_variant_sparse_hash_shard_count(_variant.sparse_hash_shard_count);
769
27.6M
    column->set_variant_enable_doc_mode(_variant.enable_doc_mode);
770
27.6M
    column->set_variant_doc_materialization_min_rows(_variant.doc_materialization_min_rows);
771
27.6M
    column->set_variant_doc_hash_shard_count(_variant.doc_hash_shard_count);
772
27.6M
    column->set_variant_enable_nested_group(_variant.enable_nested_group);
773
27.6M
}
774
775
3.50M
void TabletColumn::add_sub_column(TabletColumn& sub_column) {
776
3.50M
    _sub_columns.push_back(std::make_shared<TabletColumn>(sub_column));
777
3.50M
    sub_column._parent_col_unique_id = this->_unique_id;
778
3.50M
    _sub_column_count += 1;
779
3.50M
}
780
781
38.3M
bool TabletColumn::is_row_store_column() const {
782
38.3M
    return _col_name == BeConsts::ROW_STORE_COL;
783
38.3M
}
784
785
AggregateFunctionPtr TabletColumn::get_aggregate_function_union(DataTypePtr type,
786
1.42k
                                                                int current_be_exec_version) const {
787
1.42k
    const auto* state_type = assert_cast<const DataTypeAggState*>(type.get());
788
1.42k
    BeExecVersionManager::check_function_compatibility(
789
1.42k
            current_be_exec_version, _be_exec_version,
790
1.42k
            state_type->get_nested_function()->get_name());
791
1.42k
    return AggregateStateUnion::create(state_type->get_nested_function(), {type}, type);
792
1.42k
}
793
794
AggregateFunctionPtr TabletColumn::get_aggregate_function(std::string suffix,
795
68.5k
                                                          int current_be_exec_version) const {
796
68.5k
    AggregateFunctionPtr function = nullptr;
797
798
68.5k
    auto type = DataTypeFactory::instance().create_data_type(*this);
799
68.5k
    if (type && type->get_primitive_type() == PrimitiveType::TYPE_AGG_STATE) {
800
1.42k
        function = get_aggregate_function_union(type, current_be_exec_version);
801
67.1k
    } else {
802
67.1k
        std::string origin_name = TabletColumn::get_string_by_aggregation_type(_aggregation);
803
67.1k
        std::string agg_name = origin_name + suffix;
804
67.1k
        std::transform(agg_name.begin(), agg_name.end(), agg_name.begin(),
805
940k
                       [](unsigned char c) { return std::tolower(c); });
806
67.1k
        function = AggregateFunctionSimpleFactory::instance().get(
807
67.1k
                agg_name, {type}, type, type->is_nullable(),
808
67.1k
                BeExecVersionManager::get_newest_version());
809
67.1k
        if (!function) {
810
0
            LOG(WARNING) << "get column aggregate function failed, aggregation_name=" << origin_name
811
0
                         << ", column_type=" << type->get_name();
812
0
        }
813
67.1k
    }
814
68.5k
    if (function) {
815
68.5k
        function->set_version(_be_exec_version);
816
68.5k
        return function;
817
68.5k
    }
818
18.4E
    return nullptr;
819
68.5k
}
820
821
132k
void TabletColumn::set_path_info(const PathInData& path) {
822
132k
    _column_path = std::make_shared<PathInData>(path);
823
132k
}
824
825
14.8k
DataTypePtr TabletColumn::get_vec_type() const {
826
14.8k
    return DataTypeFactory::instance().create_data_type(*this);
827
14.8k
}
828
829
// escape '.' and '_'
830
46.0M
std::string escape_for_path_name(const std::string& s) {
831
46.0M
    std::string res;
832
46.0M
    const char* pos = s.data();
833
46.0M
    const char* end = pos + s.size();
834
46.6M
    while (pos != end) {
835
586k
        unsigned char c = *pos;
836
586k
        if (c == '.' || c == '_') {
837
74.2k
            res += '%';
838
74.2k
            res += hex_digit_uppercase(c / 16);
839
74.2k
            res += hex_digit_uppercase(c % 16);
840
512k
        } else {
841
512k
            res += c;
842
512k
        }
843
586k
        ++pos;
844
586k
    }
845
46.0M
    return res;
846
46.0M
}
847
848
9.47k
void TabletIndex::set_escaped_escaped_index_suffix_path(const std::string& path_name) {
849
9.47k
    std::string escaped_path = escape_for_path_name(path_name);
850
9.47k
    _escaped_index_suffix_path = escaped_path;
851
9.47k
}
852
853
void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
854
164k
                                   const TabletSchema& tablet_schema) {
855
164k
    _index_id = index.index_id;
856
164k
    _index_name = index.index_name;
857
    // init col_unique_id in index at be side, since col_unique_id may be -1 at fe side
858
    // get column unique id by name
859
164k
    std::vector<int32_t> col_unique_ids(index.columns.size());
860
329k
    for (size_t i = 0; i < index.columns.size(); i++) {
861
164k
        auto column_idx = tablet_schema.field_index(index.columns[i]);
862
164k
        if (column_idx >= 0) {
863
164k
            col_unique_ids[i] = tablet_schema.column(column_idx).unique_id();
864
164k
        } else {
865
            // if column unique id not found by column name, find by column unique id
866
            // column unique id can not found means this column is a new column added by light schema change
867
190
            if (index.__isset.column_unique_ids && !index.column_unique_ids.empty() &&
868
190
                tablet_schema.has_column_unique_id(index.column_unique_ids[i])) {
869
114
                col_unique_ids[i] = index.column_unique_ids[i];
870
114
            } else {
871
76
                col_unique_ids[i] = -1;
872
76
            }
873
190
        }
874
164k
    }
875
164k
    _col_unique_ids = std::move(col_unique_ids);
876
877
164k
    switch (index.index_type) {
878
0
    case TIndexType::BITMAP:
879
0
        _index_type = IndexType::BITMAP;
880
0
        break;
881
154k
    case TIndexType::INVERTED:
882
154k
        _index_type = IndexType::INVERTED;
883
154k
        break;
884
406
    case TIndexType::ANN:
885
406
        _index_type = IndexType::ANN;
886
406
        break;
887
0
    case TIndexType::BLOOMFILTER:
888
0
        _index_type = IndexType::BLOOMFILTER;
889
0
        break;
890
9.50k
    case TIndexType::NGRAM_BF:
891
9.50k
        _index_type = IndexType::NGRAM_BF;
892
9.50k
        break;
893
164k
    }
894
164k
    if (index.__isset.properties) {
895
164k
        for (auto kv : index.properties) {
896
97.1k
            _properties[kv.first] = kv.second;
897
97.1k
        }
898
164k
    }
899
164k
}
900
901
void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
902
11.0k
                                   const std::vector<int32_t>& column_uids) {
903
11.0k
    _index_id = index.index_id;
904
11.0k
    _index_name = index.index_name;
905
11.0k
    _col_unique_ids = column_uids;
906
907
11.0k
    switch (index.index_type) {
908
0
    case TIndexType::BITMAP:
909
0
        _index_type = IndexType::BITMAP;
910
0
        break;
911
10.4k
    case TIndexType::INVERTED:
912
10.4k
        _index_type = IndexType::INVERTED;
913
10.4k
        break;
914
93
    case TIndexType::ANN:
915
93
        _index_type = IndexType::ANN;
916
93
        break;
917
0
    case TIndexType::BLOOMFILTER:
918
0
        _index_type = IndexType::BLOOMFILTER;
919
0
        break;
920
466
    case TIndexType::NGRAM_BF:
921
466
        _index_type = IndexType::NGRAM_BF;
922
466
        break;
923
11.0k
    }
924
11.0k
    if (index.__isset.properties) {
925
11.9k
        for (auto kv : index.properties) {
926
11.9k
            _properties[kv.first] = kv.second;
927
11.9k
        }
928
11.0k
    }
929
11.0k
}
930
931
979k
void TabletIndex::init_from_pb(const TabletIndexPB& index) {
932
979k
    _index_id = index.index_id();
933
979k
    _index_name = index.index_name();
934
979k
    _col_unique_ids.clear();
935
979k
    for (auto col_unique_id : index.col_unique_id()) {
936
979k
        _col_unique_ids.push_back(col_unique_id);
937
979k
    }
938
979k
    _index_type = index.index_type();
939
979k
    for (const auto& kv : index.properties()) {
940
639k
        _properties[kv.first] = kv.second;
941
639k
    }
942
979k
    _escaped_index_suffix_path = index.index_suffix_name();
943
979k
}
944
945
2.00M
void TabletIndex::to_schema_pb(TabletIndexPB* index) const {
946
2.00M
    index->set_index_id(_index_id);
947
2.00M
    index->set_index_name(_index_name);
948
2.00M
    index->clear_col_unique_id();
949
2.00M
    for (auto col_unique_id : _col_unique_ids) {
950
2.00M
        index->add_col_unique_id(col_unique_id);
951
2.00M
    }
952
2.00M
    index->set_index_type(_index_type);
953
2.00M
    for (const auto& kv : _properties) {
954
1.64M
        DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", {
955
1.64M
            if (kv.first == INVERTED_INDEX_PARSER_LOWERCASE_KEY) {
956
1.64M
                continue;
957
1.64M
            }
958
1.64M
        })
959
1.64M
        (*index->mutable_properties())[kv.first] = kv.second;
960
1.64M
    }
961
2.00M
    index->set_index_suffix_name(_escaped_index_suffix_path);
962
963
2.00M
    DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { return; })
964
965
    // Only add lower_case=true default for built-in analyzers/parsers, NOT for custom analyzers
966
    // Custom analyzer: lower_case is determined by analyzer's internal token filter
967
2.00M
    if (!_properties.empty() && !_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) {
968
45.4k
        bool has_parser = _properties.contains(INVERTED_INDEX_PARSER_KEY) ||
969
45.4k
                          _properties.contains(INVERTED_INDEX_PARSER_KEY_ALIAS);
970
45.4k
        std::string analyzer_name = get_analyzer_name_from_properties(_properties);
971
45.4k
        bool is_builtin = analyzer_name.empty() ||
972
45.4k
                          segment_v2::inverted_index::InvertedIndexAnalyzer::is_builtin_analyzer(
973
2.25k
                                  analyzer_name);
974
45.4k
        if (has_parser || is_builtin) {
975
43.1k
            (*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
976
43.1k
                    INVERTED_INDEX_PARSER_TRUE;
977
43.1k
        }
978
45.4k
    }
979
2.00M
}
980
981
2.13M
TabletSchema::TabletSchema() = default;
982
983
2.10M
TabletSchema::~TabletSchema() {}
984
985
1.12M
int64_t TabletSchema::get_metadata_size() const {
986
1.12M
    return sizeof(TabletSchema);
987
1.12M
}
988
989
4.76M
void TabletSchema::append_column(TabletColumn column, ColumnType col_type) {
990
4.76M
    if (column.is_key()) {
991
1.08M
        _num_key_columns++;
992
1.08M
    }
993
4.76M
    if (column.is_nullable()) {
994
2.75M
        _num_null_columns++;
995
2.75M
    }
996
4.76M
    if (column.is_variant_type()) {
997
26.0k
        ++_num_variant_columns;
998
26.0k
        if (!column.has_path_info()) {
999
23
            const std::string& col_name = column.name_lower_case();
1000
23
            PathInData path(col_name);
1001
23
            column.set_path_info(path);
1002
23
        }
1003
26.0k
    }
1004
4.76M
    if (UNLIKELY(column.name() == DELETE_SIGN)) {
1005
107k
        _delete_sign_idx = _num_columns;
1006
4.66M
    } else if (UNLIKELY(column.name() == SEQUENCE_COL)) {
1007
4.29k
        _sequence_col_idx = _num_columns;
1008
4.65M
    } else if (UNLIKELY(column.name() == VERSION_COL)) {
1009
107k
        _version_col_idx = _num_columns;
1010
4.54M
    } else if (UNLIKELY(column.name() == SKIP_BITMAP_COL)) {
1011
771
        _skip_bitmap_col_idx = _num_columns;
1012
4.54M
    } else if (UNLIKELY(column.name().starts_with(BeConsts::VIRTUAL_COLUMN_PREFIX))) {
1013
482
        _vir_col_idx_to_unique_id[_num_columns] = column.unique_id();
1014
482
    }
1015
4.76M
    _field_uniqueid_to_index[column.unique_id()] = _num_columns;
1016
4.76M
    _cols.push_back(std::make_shared<TabletColumn>(std::move(column)));
1017
    // The dropped column may have same name with exsiting column, so that
1018
    // not add to name to index map, only for uid to index map
1019
4.77M
    if (col_type == ColumnType::VARIANT || _cols.back()->is_variant_type() ||
1020
4.76M
        _cols.back()->is_extracted_column()) {
1021
30.8k
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1022
30.8k
        _field_path_to_index[_cols.back()->path_info_ptr().get()] = _num_columns;
1023
4.74M
    } else if (col_type == ColumnType::NORMAL) {
1024
4.74M
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1025
4.74M
    }
1026
4.76M
    _num_columns++;
1027
4.76M
    _num_virtual_columns = _vir_col_idx_to_unique_id.size();
1028
    // generate column index mapping for seq map
1029
4.76M
    if (_seq_col_uid_to_value_cols_uid.contains(column.unique_id())) {
1030
19
        const auto seq_idx = _field_uniqueid_to_index[column.unique_id()];
1031
19
        if (!_seq_col_idx_to_value_cols_idx.contains(seq_idx)) {
1032
15
            _seq_col_idx_to_value_cols_idx[seq_idx] = {};
1033
15
        }
1034
19
    }
1035
4.76M
    if (_value_col_uid_to_seq_col_uid.contains(column.unique_id())) {
1036
46
        const auto seq_uid = _value_col_uid_to_seq_col_uid[column.unique_id()];
1037
46
        if (_field_uniqueid_to_index.contains(seq_uid)) {
1038
22
            bool all_uid_index_found = true;
1039
22
            std::vector<int32_t> value_cols_index;
1040
31
            for (const auto value_col_uid : _seq_col_uid_to_value_cols_uid[seq_uid]) {
1041
31
                if (!_field_uniqueid_to_index.contains(value_col_uid)) {
1042
3
                    all_uid_index_found = false;
1043
3
                    break;
1044
3
                }
1045
28
                value_cols_index.push_back(_field_uniqueid_to_index[value_col_uid]);
1046
28
            }
1047
22
            if (all_uid_index_found) {
1048
19
                const auto seq_idx = _field_uniqueid_to_index[seq_uid];
1049
27
                for (const auto col_idx : value_cols_index) {
1050
27
                    _seq_col_idx_to_value_cols_idx[seq_idx].push_back(col_idx);
1051
27
                    _value_col_idx_to_seq_col_idx[col_idx] = seq_idx;
1052
27
                }
1053
19
                _value_col_idx_to_seq_col_idx[seq_idx] = seq_idx;
1054
19
            }
1055
22
        }
1056
46
    }
1057
4.76M
}
1058
1059
1.50k
void TabletSchema::append_index(TabletIndex&& index) {
1060
1.50k
    size_t index_pos = _indexes.size();
1061
1.50k
    _indexes.push_back(std::make_shared<TabletIndex>(index));
1062
1.50k
    for (int32_t id : _indexes.back()->col_unique_ids()) {
1063
1.50k
        if (auto field_pattern = _indexes.back()->field_pattern(); !field_pattern.empty()) {
1064
6
            auto& pattern_to_index_map = _index_by_unique_id_with_pattern[id];
1065
6
            pattern_to_index_map[field_pattern].emplace_back(_indexes.back());
1066
1.49k
        } else {
1067
1.49k
            IndexKey key = std::make_tuple(_indexes.back()->index_type(), id,
1068
1.49k
                                           _indexes.back()->get_index_suffix());
1069
1.49k
            _col_id_suffix_to_index[key].push_back(index_pos);
1070
1.49k
        }
1071
1.50k
    }
1072
1.50k
}
1073
1074
0
void TabletSchema::replace_column(size_t pos, TabletColumn new_col) {
1075
0
    CHECK_LT(pos, num_columns()) << " outof range";
1076
0
    _cols[pos] = std::make_shared<TabletColumn>(std::move(new_col));
1077
0
}
1078
1079
808
void TabletSchema::clear_index() {
1080
808
    _indexes.clear();
1081
808
    _col_id_suffix_to_index.clear();
1082
808
    _index_by_unique_id_with_pattern.clear();
1083
808
}
1084
1085
7
void TabletSchema::remove_index(int64_t index_id) {
1086
7
    std::vector<TabletIndexPtr> new_indexes;
1087
11
    for (auto& index : _indexes) {
1088
11
        if (index->index_id() != index_id) {
1089
4
            new_indexes.emplace_back(std::move(index));
1090
4
        }
1091
11
    }
1092
7
    _indexes = std::move(new_indexes);
1093
7
    _col_id_suffix_to_index.clear();
1094
7
    _index_by_unique_id_with_pattern.clear();
1095
11
    for (size_t new_pos = 0; new_pos < _indexes.size(); ++new_pos) {
1096
4
        const auto& index = _indexes[new_pos];
1097
4
        for (int32_t col_uid : index->col_unique_ids()) {
1098
4
            if (auto field_pattern = index->field_pattern(); !field_pattern.empty()) {
1099
0
                auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid];
1100
0
                pattern_to_index_map[field_pattern].emplace_back(index);
1101
4
            } else {
1102
4
                IndexKey key = std::make_tuple(_indexes.back()->index_type(), col_uid,
1103
4
                                               _indexes.back()->get_index_suffix());
1104
4
                _col_id_suffix_to_index[key].push_back(new_pos);
1105
4
            }
1106
4
        }
1107
4
    }
1108
7
}
1109
1110
330k
void TabletSchema::clear_columns() {
1111
330k
    _field_path_to_index.clear();
1112
330k
    _field_name_to_index.clear();
1113
330k
    _field_uniqueid_to_index.clear();
1114
330k
    _num_columns = 0;
1115
330k
    _num_variant_columns = 0;
1116
330k
    _num_null_columns = 0;
1117
330k
    _num_key_columns = 0;
1118
330k
    _seq_col_idx_to_value_cols_idx.clear();
1119
330k
    _value_col_idx_to_seq_col_idx.clear();
1120
330k
    _cols.clear();
1121
330k
}
1122
1123
void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns,
1124
956k
                                bool reuse_cache_column) {
1125
956k
    _keys_type = schema.keys_type();
1126
956k
    _num_columns = 0;
1127
956k
    _num_variant_columns = 0;
1128
956k
    _num_key_columns = 0;
1129
956k
    _num_null_columns = 0;
1130
956k
    _cols.clear();
1131
956k
    _indexes.clear();
1132
956k
    _index_by_unique_id_with_pattern.clear();
1133
956k
    _col_id_suffix_to_index.clear();
1134
956k
    _field_name_to_index.clear();
1135
956k
    _field_uniqueid_to_index.clear();
1136
956k
    _cluster_key_uids.clear();
1137
956k
    for (const auto& i : schema.cluster_key_uids()) {
1138
43.1k
        _cluster_key_uids.push_back(i);
1139
43.1k
    }
1140
11.8M
    for (auto& column_pb : schema.column()) {
1141
11.8M
        TabletColumnPtr column;
1142
11.8M
        if (reuse_cache_column) {
1143
469k
            auto pair = TabletColumnObjectPool::instance()->insert(
1144
469k
                    deterministic_string_serialize(column_pb));
1145
469k
            column = pair.second;
1146
            // Release the handle quickly, because we use shared ptr to manage column.
1147
            // It often core during tablet schema copy to another schema because handle's
1148
            // reference count should be managed mannually.
1149
469k
            TabletColumnObjectPool::instance()->release(pair.first);
1150
11.3M
        } else {
1151
11.3M
            column = std::make_shared<TabletColumn>();
1152
11.3M
            column->init_from_pb(column_pb);
1153
11.3M
        }
1154
11.8M
        if (ignore_extracted_columns && column->is_extracted_column()) {
1155
0
            continue;
1156
0
        }
1157
11.8M
        if (column->is_key()) {
1158
2.06M
            _num_key_columns++;
1159
2.06M
        }
1160
11.8M
        if (column->is_nullable()) {
1161
6.59M
            _num_null_columns++;
1162
6.59M
        }
1163
11.8M
        if (column->is_variant_type()) {
1164
40.5k
            ++_num_variant_columns;
1165
40.5k
        }
1166
1167
11.8M
        _cols.emplace_back(std::move(column));
1168
11.8M
        if (!_cols.back()->is_extracted_column()) {
1169
11.8M
            _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1170
11.8M
            _field_uniqueid_to_index[_cols.back()->unique_id()] = _num_columns;
1171
11.8M
        }
1172
11.8M
        _num_columns++;
1173
11.8M
    }
1174
1.00M
    for (const auto& index_pb : schema.index()) {
1175
1.00M
        TabletIndexPtr index;
1176
1.00M
        if (reuse_cache_column) {
1177
40.8k
            auto pair = TabletColumnObjectPool::instance()->insert_index(
1178
40.8k
                    deterministic_string_serialize(index_pb));
1179
40.8k
            index = pair.second;
1180
            //  Only need the value to be cached by the pool, release it quickly because the handle need
1181
            // record reference count mannually, or it will core during tablet schema copy method.
1182
40.8k
            TabletColumnObjectPool::instance()->release(pair.first);
1183
961k
        } else {
1184
961k
            index = std::make_shared<TabletIndex>();
1185
961k
            index->init_from_pb(index_pb);
1186
961k
        }
1187
1.00M
        size_t index_pos = _indexes.size();
1188
1.00M
        _indexes.emplace_back(std::move(index));
1189
1.00M
        for (int32_t col_uid : _indexes.back()->col_unique_ids()) {
1190
1.00M
            if (auto field_pattern = _indexes.back()->field_pattern(); !field_pattern.empty()) {
1191
26.2k
                auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid];
1192
26.2k
                pattern_to_index_map[field_pattern].emplace_back(_indexes.back());
1193
976k
            } else {
1194
976k
                IndexKey key = std::make_tuple(_indexes.back()->index_type(), col_uid,
1195
976k
                                               _indexes.back()->get_index_suffix());
1196
976k
                _col_id_suffix_to_index[key].push_back(index_pos);
1197
976k
            }
1198
1.00M
        }
1199
1.00M
    }
1200
956k
    _num_short_key_columns = schema.num_short_key_columns();
1201
956k
    _num_rows_per_row_block = schema.num_rows_per_row_block();
1202
956k
    _compress_kind = schema.compress_kind();
1203
956k
    _next_column_unique_id = schema.next_column_unique_id();
1204
956k
    if (schema.has_bf_fpp()) {
1205
549k
        _has_bf_fpp = true;
1206
549k
        _bf_fpp = schema.bf_fpp();
1207
549k
    } else {
1208
407k
        _has_bf_fpp = false;
1209
407k
        _bf_fpp = BLOOM_FILTER_DEFAULT_FPP;
1210
407k
    }
1211
956k
    _is_in_memory = schema.is_in_memory();
1212
956k
    _disable_auto_compaction = schema.disable_auto_compaction();
1213
956k
    _enable_single_replica_compaction = schema.enable_single_replica_compaction();
1214
956k
    _store_row_column = schema.store_row_column();
1215
956k
    _skip_write_index_on_load = schema.skip_write_index_on_load();
1216
956k
    _delete_sign_idx = schema.delete_sign_idx();
1217
956k
    _sequence_col_idx = schema.sequence_col_idx();
1218
956k
    _version_col_idx = schema.version_col_idx();
1219
956k
    _skip_bitmap_col_idx = schema.skip_bitmap_col_idx();
1220
956k
    _sort_type = schema.sort_type();
1221
956k
    _sort_col_num = schema.sort_col_num();
1222
956k
    _compression_type = schema.compression_type();
1223
956k
    _row_store_page_size = schema.row_store_page_size();
1224
956k
    _storage_page_size = schema.storage_page_size();
1225
956k
    _storage_dict_page_size = schema.storage_dict_page_size();
1226
956k
    _schema_version = schema.schema_version();
1227
956k
    if (schema.has_seq_map()) {
1228
955k
        auto column_groups_pb = schema.seq_map();
1229
955k
        _seq_col_uid_to_value_cols_uid.clear();
1230
955k
        _value_col_uid_to_seq_col_uid.clear();
1231
955k
        _seq_col_idx_to_value_cols_idx.clear();
1232
955k
        _value_col_idx_to_seq_col_idx.clear();
1233
        /*
1234
         * ColumnGroupsPB is a list of cg_pb, and
1235
         * ColumnGroupsPB do not have begin() or end() method.
1236
         * we must use for(i=0;i<xx;i++) loop
1237
         */
1238
956k
        for (int i = 0; i < column_groups_pb.cg_size(); i++) {
1239
308
            ColumnGroupPB cg_pb = column_groups_pb.cg(i);
1240
308
            uint32_t key_uid = cg_pb.sequence_column();
1241
308
            auto found = _field_uniqueid_to_index.find(key_uid);
1242
308
            DCHECK(found != _field_uniqueid_to_index.end())
1243
0
                    << "could not find sequence col with unique id = " << key_uid
1244
0
                    << " table_id=" << _table_id;
1245
308
            int32_t seq_index = found->second;
1246
308
            _seq_col_uid_to_value_cols_uid[key_uid] = {};
1247
308
            _seq_col_idx_to_value_cols_idx[seq_index] = {};
1248
464
            for (auto val_uid : cg_pb.columns_in_group()) {
1249
464
                _seq_col_uid_to_value_cols_uid[key_uid].push_back(val_uid);
1250
464
                found = _field_uniqueid_to_index.find(val_uid);
1251
464
                DCHECK(found != _field_uniqueid_to_index.end())
1252
0
                        << "could not find value col with unique id = " << key_uid
1253
0
                        << " table_id=" << _table_id;
1254
464
                int32_t val_index = found->second;
1255
464
                _seq_col_idx_to_value_cols_idx[seq_index].push_back(val_index);
1256
464
            }
1257
308
        }
1258
1259
955k
        if (!_seq_col_uid_to_value_cols_uid.empty()) {
1260
            /*
1261
                |** KEY **|        ** VALUE **     |
1262
                ------------------------------------
1263
                |** KEY **|  CDE is value| sequence|
1264
                |----|----|----|----|----|----|----|
1265
                A    B    C    D    E   S1   S2
1266
                0    1    2    3    4    5    6
1267
                for example: _seq_map is {5:{2,3}, 6:{4}}
1268
                then, _value_to_seq = {2:5,3:5,5:5,4:6,6:6}
1269
            */
1270
308
            for (auto& [seq_uid, cols_uid] : _seq_col_uid_to_value_cols_uid) {
1271
464
                for (auto col_uid : cols_uid) {
1272
464
                    _value_col_uid_to_seq_col_uid[col_uid] = seq_uid;
1273
464
                }
1274
308
                _value_col_uid_to_seq_col_uid[seq_uid] = seq_uid;
1275
308
            }
1276
1277
308
            for (auto& [seq_idx, value_cols_idx] : _seq_col_idx_to_value_cols_idx) {
1278
464
                for (auto col_idx : value_cols_idx) {
1279
464
                    _value_col_idx_to_seq_col_idx[col_idx] = seq_idx;
1280
464
                }
1281
308
                _value_col_idx_to_seq_col_idx[seq_idx] = seq_idx;
1282
308
            }
1283
219
        }
1284
955k
    }
1285
    // Default to V1 inverted index storage format for backward compatibility if not specified in schema.
1286
956k
    if (!schema.has_inverted_index_storage_format()) {
1287
295
        _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1;
1288
956k
    } else {
1289
956k
        _inverted_index_storage_format = schema.inverted_index_storage_format();
1290
956k
    }
1291
1292
956k
    _row_store_column_unique_ids.assign(schema.row_store_column_unique_ids().begin(),
1293
956k
                                        schema.row_store_column_unique_ids().end());
1294
956k
    _deprecated_enable_variant_flatten_nested = schema.enable_variant_flatten_nested();
1295
956k
    if (schema.has_is_external_segment_column_meta_used()) {
1296
955k
        _is_external_segment_column_meta_used = schema.is_external_segment_column_meta_used();
1297
955k
    } else {
1298
1.06k
        _is_external_segment_column_meta_used = false;
1299
1.06k
    }
1300
956k
    if (schema.has_integer_type_default_use_plain_encoding()) {
1301
784k
        _integer_type_default_use_plain_encoding = schema.integer_type_default_use_plain_encoding();
1302
784k
    }
1303
956k
    if (schema.has_binary_plain_encoding_default_impl()) {
1304
784k
        _binary_plain_encoding_default_impl = schema.binary_plain_encoding_default_impl();
1305
784k
    }
1306
956k
    update_metadata_size();
1307
956k
}
1308
1309
185k
void TabletSchema::copy_from(const TabletSchema& tablet_schema) {
1310
185k
    TabletSchemaPB tablet_schema_pb;
1311
185k
    tablet_schema.to_schema_pb(&tablet_schema_pb);
1312
185k
    init_from_pb(tablet_schema_pb);
1313
185k
    _table_id = tablet_schema.table_id();
1314
185k
    _path_set_info_map = tablet_schema._path_set_info_map;
1315
185k
}
1316
1317
188k
void TabletSchema::shawdow_copy_without_columns(const TabletSchema& tablet_schema) {
1318
188k
    *this = tablet_schema;
1319
188k
    _field_path_to_index.clear();
1320
188k
    _field_name_to_index.clear();
1321
188k
    _field_uniqueid_to_index.clear();
1322
188k
    _num_columns = 0;
1323
188k
    _num_variant_columns = 0;
1324
188k
    _num_null_columns = 0;
1325
188k
    _num_key_columns = 0;
1326
188k
    _cols.clear();
1327
188k
    _delete_sign_idx = -1;
1328
188k
    _sequence_col_idx = -1;
1329
188k
    _version_col_idx = -1;
1330
188k
}
1331
1332
0
void TabletSchema::update_index_info_from(const TabletSchema& tablet_schema) {
1333
0
    for (auto& col : _cols) {
1334
0
        if (col->unique_id() < 0) {
1335
0
            continue;
1336
0
        }
1337
0
        const auto iter = tablet_schema._field_uniqueid_to_index.find(col->unique_id());
1338
0
        if (iter == tablet_schema._field_uniqueid_to_index.end()) {
1339
0
            continue;
1340
0
        }
1341
0
        auto col_idx = iter->second;
1342
0
        if (col_idx < 0 || col_idx >= tablet_schema._cols.size()) {
1343
0
            continue;
1344
0
        }
1345
0
        col->set_is_bf_column(tablet_schema._cols[col_idx]->is_bf_column());
1346
0
    }
1347
0
}
1348
1349
1.12M
std::string TabletSchema::to_key() const {
1350
1.12M
    TabletSchemaPB pb;
1351
1.12M
    to_schema_pb(&pb);
1352
1.12M
    return TabletSchema::deterministic_string_serialize(pb);
1353
1.12M
}
1354
1355
void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version,
1356
                                               const OlapTableIndexSchema* index,
1357
170k
                                               const TabletSchema& ori_tablet_schema) {
1358
    // copy from ori_tablet_schema
1359
170k
    _keys_type = ori_tablet_schema.keys_type();
1360
170k
    _num_short_key_columns = ori_tablet_schema.num_short_key_columns();
1361
170k
    _num_rows_per_row_block = ori_tablet_schema.num_rows_per_row_block();
1362
170k
    _compress_kind = ori_tablet_schema.compress_kind();
1363
1364
    // todo(yixiu): unique_id
1365
170k
    _next_column_unique_id = ori_tablet_schema.next_column_unique_id();
1366
170k
    _is_in_memory = ori_tablet_schema.is_in_memory();
1367
170k
    _disable_auto_compaction = ori_tablet_schema.disable_auto_compaction();
1368
170k
    _enable_single_replica_compaction = ori_tablet_schema.enable_single_replica_compaction();
1369
170k
    _skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load();
1370
170k
    _sort_type = ori_tablet_schema.sort_type();
1371
170k
    _sort_col_num = ori_tablet_schema.sort_col_num();
1372
170k
    _row_store_page_size = ori_tablet_schema.row_store_page_size();
1373
170k
    _storage_page_size = ori_tablet_schema.storage_page_size();
1374
170k
    _storage_dict_page_size = ori_tablet_schema.storage_dict_page_size();
1375
170k
    _deprecated_enable_variant_flatten_nested =
1376
170k
            ori_tablet_schema.deprecated_variant_flatten_nested();
1377
1378
    // copy from table_schema_param
1379
170k
    _schema_version = version;
1380
170k
    _num_columns = 0;
1381
170k
    _num_variant_columns = 0;
1382
170k
    _num_key_columns = 0;
1383
170k
    _num_null_columns = 0;
1384
170k
    bool has_bf_columns = false;
1385
170k
    _cols.clear();
1386
170k
    _indexes.clear();
1387
170k
    _col_id_suffix_to_index.clear();
1388
170k
    _index_by_unique_id_with_pattern.clear();
1389
170k
    _field_name_to_index.clear();
1390
170k
    _field_uniqueid_to_index.clear();
1391
170k
    _delete_sign_idx = -1;
1392
170k
    _sequence_col_idx = -1;
1393
170k
    _version_col_idx = -1;
1394
170k
    _skip_bitmap_col_idx = -1;
1395
170k
    _cluster_key_uids.clear();
1396
170k
    for (const auto& i : ori_tablet_schema._cluster_key_uids) {
1397
6.33k
        _cluster_key_uids.push_back(i);
1398
6.33k
    }
1399
1.96M
    for (auto& column : index->columns) {
1400
1.96M
        if (column->is_key()) {
1401
458k
            _num_key_columns++;
1402
458k
        }
1403
1.96M
        if (column->is_nullable()) {
1404
1.06M
            _num_null_columns++;
1405
1.06M
        }
1406
1.96M
        if (column->is_bf_column()) {
1407
8.50k
            has_bf_columns = true;
1408
8.50k
        }
1409
1.96M
        if (column->is_variant_type()) {
1410
11.1k
            ++_num_variant_columns;
1411
11.1k
        }
1412
1.96M
        if (UNLIKELY(column->name() == DELETE_SIGN)) {
1413
54.0k
            _delete_sign_idx = _num_columns;
1414
1.91M
        } else if (UNLIKELY(column->name() == SEQUENCE_COL)) {
1415
3.83k
            _sequence_col_idx = _num_columns;
1416
1.91M
        } else if (UNLIKELY(column->name() == VERSION_COL)) {
1417
53.7k
            _version_col_idx = _num_columns;
1418
1.85M
        } else if (UNLIKELY(column->name() == SKIP_BITMAP_COL)) {
1419
338
            _skip_bitmap_col_idx = _num_columns;
1420
338
        }
1421
        // Reuse TabletColumn object from pool to reduce memory consumption
1422
1.96M
        TabletColumnPtr new_column;
1423
1.96M
        ColumnPB column_pb;
1424
1.96M
        column->to_schema_pb(&column_pb);
1425
1.96M
        auto pair = TabletColumnObjectPool::instance()->insert(
1426
1.96M
                deterministic_string_serialize(column_pb));
1427
1.96M
        new_column = pair.second;
1428
        // Release the handle quickly, because we use shared ptr to manage column
1429
1.96M
        TabletColumnObjectPool::instance()->release(pair.first);
1430
1.96M
        _cols.emplace_back(std::move(new_column));
1431
1.96M
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1432
1.96M
        _field_uniqueid_to_index[_cols.back()->unique_id()] = _num_columns;
1433
1.96M
        _num_columns++;
1434
1.96M
    }
1435
1436
170k
    for (const auto& i : index->indexes) {
1437
128k
        size_t index_pos = _indexes.size();
1438
        // Reuse TabletIndex object from pool to reduce memory consumption
1439
128k
        TabletIndexPtr new_index;
1440
128k
        TabletIndexPB index_pb;
1441
128k
        i->to_schema_pb(&index_pb);
1442
128k
        auto pair = TabletColumnObjectPool::instance()->insert_index(
1443
128k
                deterministic_string_serialize(index_pb));
1444
128k
        new_index = pair.second;
1445
        // Release the handle quickly, because we use shared ptr to manage index
1446
128k
        TabletColumnObjectPool::instance()->release(pair.first);
1447
128k
        _indexes.emplace_back(std::move(new_index));
1448
128k
        for (int32_t col_uid : _indexes.back()->col_unique_ids()) {
1449
128k
            if (auto field_pattern = _indexes.back()->field_pattern(); !field_pattern.empty()) {
1450
1.83k
                auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid];
1451
1.83k
                pattern_to_index_map[field_pattern].emplace_back(_indexes.back());
1452
126k
            } else {
1453
126k
                IndexKey key = std::make_tuple(_indexes.back()->index_type(), col_uid,
1454
126k
                                               _indexes.back()->get_index_suffix());
1455
126k
                _col_id_suffix_to_index[key].push_back(index_pos);
1456
126k
            }
1457
128k
        }
1458
128k
    }
1459
1460
170k
    if (has_bf_columns) {
1461
7.93k
        _has_bf_fpp = true;
1462
7.93k
        _bf_fpp = ori_tablet_schema.bloom_filter_fpp();
1463
162k
    } else {
1464
162k
        _has_bf_fpp = false;
1465
162k
        _bf_fpp = BLOOM_FILTER_DEFAULT_FPP;
1466
162k
    }
1467
170k
}
1468
1469
7.42k
void TabletSchema::merge_dropped_columns(const TabletSchema& src_schema) {
1470
    // If they are the same tablet schema object, then just return
1471
7.42k
    if (this == &src_schema) {
1472
0
        return;
1473
0
    }
1474
56.7k
    for (const auto& src_col : src_schema.columns()) {
1475
56.7k
        if (_field_uniqueid_to_index.find(src_col->unique_id()) == _field_uniqueid_to_index.end()) {
1476
51
            CHECK(!src_col->is_key())
1477
0
                    << src_col->name() << " is key column, should not be dropped.";
1478
51
            ColumnPB src_col_pb;
1479
            // There are some pointer in tablet column, not sure the reference relation, so
1480
            // that deep copy it.
1481
51
            src_col->to_schema_pb(&src_col_pb);
1482
51
            TabletColumn new_col(src_col_pb);
1483
51
            append_column(new_col, TabletSchema::ColumnType::DROPPED);
1484
51
        }
1485
56.7k
    }
1486
7.42k
}
1487
1488
8.32k
TabletSchemaSPtr TabletSchema::copy_without_variant_extracted_columns() {
1489
8.32k
    TabletSchemaSPtr copy = std::make_shared<TabletSchema>();
1490
8.32k
    copy->shawdow_copy_without_columns(*this);
1491
57.4k
    for (auto& col : this->columns()) {
1492
57.4k
        if (col->is_extracted_column()) {
1493
3.95k
            continue;
1494
3.95k
        }
1495
53.4k
        copy->append_column(*col);
1496
53.4k
    }
1497
8.32k
    return copy;
1498
8.32k
}
1499
1500
// Dropped column is in _field_uniqueid_to_index but not in _field_name_to_index
1501
// Could refer to append_column method
1502
499k
bool TabletSchema::is_dropped_column(const TabletColumn& col) const {
1503
18.4E
    CHECK(_field_uniqueid_to_index.find(col.unique_id()) != _field_uniqueid_to_index.end())
1504
18.4E
            << "could not find col with unique id = " << col.unique_id()
1505
18.4E
            << " and name = " << col.name() << " table_id=" << _table_id;
1506
499k
    auto it = _field_name_to_index.find(StringRef {col.name()});
1507
500k
    return it == _field_name_to_index.end() || _cols[it->second]->unique_id() != col.unique_id();
1508
499k
}
1509
1510
98
void TabletSchema::copy_extracted_columns(const TabletSchema& src_schema) {
1511
98
    std::unordered_set<int32_t> variant_columns;
1512
330
    for (const auto& col : columns()) {
1513
330
        if (col->is_variant_type()) {
1514
206
            variant_columns.insert(col->unique_id());
1515
206
        }
1516
330
    }
1517
286
    for (const TabletColumnPtr& col : src_schema.columns()) {
1518
286
        if (col->is_extracted_column() && variant_columns.contains(col->parent_unique_id())) {
1519
0
            ColumnPB col_pb;
1520
0
            col->to_schema_pb(&col_pb);
1521
0
            TabletColumn new_col(col_pb);
1522
0
            append_column(new_col, ColumnType::VARIANT);
1523
0
        }
1524
286
    }
1525
98
}
1526
1527
107
void TabletSchema::reserve_extracted_columns() {
1528
650
    for (auto it = _cols.begin(); it != _cols.end();) {
1529
543
        if (!(*it)->is_extracted_column()) {
1530
219
            it = _cols.erase(it);
1531
324
        } else {
1532
324
            ++it;
1533
324
        }
1534
543
    }
1535
107
}
1536
1537
1.71M
void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const {
1538
1.71M
    for (const auto& i : _cluster_key_uids) {
1539
68.4k
        tablet_schema_pb->add_cluster_key_uids(i);
1540
68.4k
    }
1541
1.71M
    tablet_schema_pb->set_keys_type(_keys_type);
1542
21.9M
    for (const auto& col : _cols) {
1543
21.9M
        ColumnPB* column = tablet_schema_pb->add_column();
1544
21.9M
        col->to_schema_pb(column);
1545
21.9M
    }
1546
1.87M
    for (const auto& index : _indexes) {
1547
1.87M
        auto* index_pb = tablet_schema_pb->add_index();
1548
1.87M
        index->to_schema_pb(index_pb);
1549
1.87M
    }
1550
1.71M
    tablet_schema_pb->set_num_short_key_columns(cast_set<int32_t>(_num_short_key_columns));
1551
1.71M
    tablet_schema_pb->set_num_rows_per_row_block(cast_set<int32_t>(_num_rows_per_row_block));
1552
1.71M
    tablet_schema_pb->set_compress_kind(_compress_kind);
1553
1.71M
    if (_has_bf_fpp) {
1554
636k
        tablet_schema_pb->set_bf_fpp(_bf_fpp);
1555
636k
    }
1556
1.71M
    tablet_schema_pb->set_next_column_unique_id(cast_set<uint32_t>(_next_column_unique_id));
1557
1.71M
    tablet_schema_pb->set_is_in_memory(_is_in_memory);
1558
1.71M
    tablet_schema_pb->set_disable_auto_compaction(_disable_auto_compaction);
1559
1.71M
    tablet_schema_pb->set_enable_single_replica_compaction(_enable_single_replica_compaction);
1560
1.71M
    tablet_schema_pb->set_store_row_column(_store_row_column);
1561
1.71M
    tablet_schema_pb->set_skip_write_index_on_load(_skip_write_index_on_load);
1562
1.71M
    tablet_schema_pb->set_delete_sign_idx(_delete_sign_idx);
1563
1.71M
    tablet_schema_pb->set_sequence_col_idx(_sequence_col_idx);
1564
1.71M
    tablet_schema_pb->set_sort_type(_sort_type);
1565
1.71M
    tablet_schema_pb->set_sort_col_num(cast_set<int32_t>(_sort_col_num));
1566
1.71M
    tablet_schema_pb->set_schema_version(_schema_version);
1567
1.71M
    tablet_schema_pb->set_compression_type(_compression_type);
1568
1.71M
    tablet_schema_pb->set_row_store_page_size(_row_store_page_size);
1569
1.71M
    tablet_schema_pb->set_storage_page_size(_storage_page_size);
1570
1.71M
    tablet_schema_pb->set_storage_dict_page_size(_storage_dict_page_size);
1571
1.71M
    tablet_schema_pb->set_version_col_idx(_version_col_idx);
1572
1.71M
    tablet_schema_pb->set_skip_bitmap_col_idx(_skip_bitmap_col_idx);
1573
1.71M
    tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format);
1574
1.71M
    tablet_schema_pb->mutable_row_store_column_unique_ids()->Assign(
1575
1.71M
            _row_store_column_unique_ids.begin(), _row_store_column_unique_ids.end());
1576
1.71M
    tablet_schema_pb->set_enable_variant_flatten_nested(_deprecated_enable_variant_flatten_nested);
1577
1.71M
    tablet_schema_pb->set_is_external_segment_column_meta_used(
1578
1.71M
            _is_external_segment_column_meta_used);
1579
1.71M
    tablet_schema_pb->set_integer_type_default_use_plain_encoding(
1580
1.71M
            _integer_type_default_use_plain_encoding);
1581
1.71M
    tablet_schema_pb->set_binary_plain_encoding_default_impl(_binary_plain_encoding_default_impl);
1582
1.71M
    auto column_groups_pb = tablet_schema_pb->mutable_seq_map();
1583
1.71M
    for (const auto& it : _seq_col_uid_to_value_cols_uid) {
1584
456
        uint32_t key = it.first;
1585
456
        ColumnGroupPB* cg_pb = column_groups_pb->add_cg(); // ColumnGroupPB {key: {v1, v2, v3}}
1586
456
        cg_pb->set_sequence_column(key);
1587
709
        for (auto v : it.second) {
1588
709
            cg_pb->add_columns_in_group(v);
1589
709
        }
1590
456
    }
1591
1.71M
}
1592
1593
12.3k
size_t TabletSchema::row_size() const {
1594
12.3k
    size_t size = 0;
1595
194k
    for (const auto& column : _cols) {
1596
194k
        size += column->length();
1597
194k
    }
1598
12.3k
    size += (_num_columns + 7) / 8;
1599
1600
12.3k
    return size;
1601
12.3k
}
1602
1603
7.37M
int32_t TabletSchema::field_index(const std::string& field_name) const {
1604
7.37M
    const auto& found = _field_name_to_index.find(StringRef(field_name));
1605
7.37M
    return (found == _field_name_to_index.end()) ? -1 : found->second;
1606
7.37M
}
1607
1608
13.5k
int32_t TabletSchema::field_index(const PathInData& path) const {
1609
13.5k
    const auto& found = _field_path_to_index.find(PathInDataRef(&path));
1610
13.5k
    return (found == _field_path_to_index.end()) ? -1 : found->second;
1611
13.5k
}
1612
1613
9.97M
int32_t TabletSchema::field_index(int32_t col_unique_id) const {
1614
9.97M
    const auto& found = _field_uniqueid_to_index.find(col_unique_id);
1615
9.97M
    return (found == _field_uniqueid_to_index.end()) ? -1 : found->second;
1616
9.97M
}
1617
1618
44.9M
const std::vector<TabletColumnPtr>& TabletSchema::columns() const {
1619
44.9M
    return _cols;
1620
44.9M
}
1621
1622
100M
const TabletColumn& TabletSchema::column(size_t ordinal) const {
1623
18.4E
    DCHECK(ordinal < _num_columns) << "ordinal:" << ordinal << ", _num_columns:" << _num_columns;
1624
100M
    return *_cols[ordinal];
1625
100M
}
1626
1627
1.76M
const TabletColumn& TabletSchema::column_by_uid(int32_t col_unique_id) const {
1628
1.76M
    return *_cols.at(_field_uniqueid_to_index.at(col_unique_id));
1629
1.76M
}
1630
1631
1
TabletColumn& TabletSchema::mutable_column_by_uid(int32_t col_unique_id) {
1632
1
    return *_cols.at(_field_uniqueid_to_index.at(col_unique_id));
1633
1
}
1634
1635
86.2k
TabletColumn& TabletSchema::mutable_column(size_t ordinal) {
1636
86.2k
    return *_cols.at(ordinal);
1637
86.2k
}
1638
1639
320k
void TabletSchema::update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>& tindexes) {
1640
320k
    std::vector<TabletIndexPtr> indexes;
1641
320k
    for (const auto& tindex : tindexes) {
1642
163k
        TabletIndex index;
1643
163k
        index.init_from_thrift(tindex, *this);
1644
163k
        indexes.emplace_back(std::make_shared<TabletIndex>(std::move(index)));
1645
163k
    }
1646
320k
    _indexes = std::move(indexes);
1647
320k
    _col_id_suffix_to_index.clear();
1648
320k
    _index_by_unique_id_with_pattern.clear();
1649
320k
    size_t index_pos = 0;
1650
320k
    for (auto& index : _indexes) {
1651
163k
        for (int32_t col_uid : index->col_unique_ids()) {
1652
163k
            if (auto field_pattern = index->field_pattern(); !field_pattern.empty()) {
1653
4.94k
                auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid];
1654
4.94k
                pattern_to_index_map[field_pattern].emplace_back(index);
1655
158k
            } else {
1656
158k
                IndexKey key =
1657
158k
                        std::make_tuple(index->index_type(), col_uid, index->get_index_suffix());
1658
158k
                _col_id_suffix_to_index[key].push_back(index_pos);
1659
158k
            }
1660
163k
        }
1661
163k
        index_pos++;
1662
163k
    }
1663
320k
}
1664
1665
4.70k
bool TabletSchema::exist_column(const std::string& field_name) const {
1666
4.70k
    return _field_name_to_index.contains(StringRef {field_name});
1667
4.70k
}
1668
1669
23.1M
bool TabletSchema::has_column_unique_id(int32_t col_unique_id) const {
1670
23.1M
    return _field_uniqueid_to_index.contains(col_unique_id);
1671
23.1M
}
1672
1673
4.20k
Status TabletSchema::have_column(const std::string& field_name) const {
1674
4.20k
    if (!_field_name_to_index.contains(StringRef(field_name))) {
1675
4.04k
        return Status::Error<ErrorCode::INTERNAL_ERROR>(
1676
4.04k
                "Not found field_name, field_name:{}, schema:{}", field_name,
1677
4.04k
                get_all_field_names());
1678
4.04k
    }
1679
154
    return Status::OK();
1680
4.20k
}
1681
1682
5.75k
Result<const TabletColumn*> TabletSchema::column(const std::string& field_name) const {
1683
5.75k
    auto it = _field_name_to_index.find(StringRef {field_name});
1684
5.75k
    if (it == _field_name_to_index.end()) {
1685
0
        DCHECK(false) << "field_name=" << field_name << ", table_id=" << _table_id
1686
0
                      << ", field_name_to_index=" << get_all_field_names();
1687
0
        return ResultError(
1688
0
                Status::InternalError("column not found, name={}, table_id={}, schema_version={}",
1689
0
                                      field_name, _table_id, _schema_version));
1690
0
    }
1691
5.75k
    return _cols[it->second].get();
1692
5.75k
}
1693
1694
void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema,
1695
12.7k
                                         const std::vector<TColumn>& t_columns) {
1696
12.7k
    copy_from(tablet_schema);
1697
12.7k
    if (!t_columns.empty() && t_columns[0].col_unique_id >= 0) {
1698
12.7k
        clear_columns();
1699
128k
        for (const auto& column : t_columns) {
1700
128k
            append_column(TabletColumn(column));
1701
128k
        }
1702
12.7k
    }
1703
12.7k
}
1704
1705
67
bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const {
1706
86
    for (size_t i = 0; i < _indexes.size(); i++) {
1707
48
        if ((_indexes[i]->index_type() == IndexType::INVERTED ||
1708
48
             _indexes[i]->index_type() == IndexType::ANN) &&
1709
48
            _indexes[i]->index_id() == index_id) {
1710
29
            return true;
1711
29
        }
1712
48
    }
1713
38
    return false;
1714
67
}
1715
1716
std::vector<const TabletIndex*> TabletSchema::inverted_indexs(
1717
23.1M
        int32_t col_unique_id, const std::string& suffix_path) const {
1718
23.1M
    std::vector<const TabletIndex*> result;
1719
23.1M
    const std::string escaped_suffix = escape_for_path_name(suffix_path);
1720
23.1M
    auto it = _col_id_suffix_to_index.find(
1721
23.1M
            std::make_tuple(IndexType::INVERTED, col_unique_id, escaped_suffix));
1722
23.1M
    if (it != _col_id_suffix_to_index.end()) {
1723
157k
        for (size_t pos : it->second) {
1724
157k
            if (pos < _indexes.size()) {
1725
157k
                result.push_back(_indexes[pos].get());
1726
157k
            }
1727
157k
        }
1728
156k
    }
1729
23.1M
    return result;
1730
23.1M
}
1731
1732
std::vector<TabletIndexPtr> TabletSchema::inverted_index_by_field_pattern(
1733
9.91k
        int32_t col_unique_id, const std::string& field_pattern) const {
1734
9.91k
    auto id_to_pattern_map = _index_by_unique_id_with_pattern.find(col_unique_id);
1735
9.91k
    if (id_to_pattern_map == _index_by_unique_id_with_pattern.end()) {
1736
5.58k
        return {};
1737
5.58k
    }
1738
4.33k
    auto pattern_to_index_map = id_to_pattern_map->second.find(field_pattern);
1739
4.33k
    if (pattern_to_index_map == id_to_pattern_map->second.end()) {
1740
620
        return {};
1741
620
    }
1742
3.71k
    return pattern_to_index_map->second;
1743
4.33k
}
1744
1745
22.9M
std::vector<const TabletIndex*> TabletSchema::inverted_indexs(const TabletColumn& col) const {
1746
    // Some columns(Float, Double, JSONB ...) from the variant do not support inverted index
1747
22.9M
    if (!segment_v2::IndexColumnWriter::check_support_inverted_index(col)) {
1748
60.4k
        return {};
1749
60.4k
    }
1750
1751
    // TODO use more efficient impl
1752
    // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants
1753
22.8M
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
1754
22.8M
    std::vector<const TabletIndex*> result;
1755
22.8M
    if (result = inverted_indexs(col_unique_id, escape_for_path_name(col.suffix_path()));
1756
22.8M
        !result.empty()) {
1757
122k
        return result;
1758
122k
    }
1759
    // variant's typed column has it's own index
1760
22.7M
    else if (col.is_extracted_column() && col.path_info_ptr()->get_is_typed()) {
1761
535
        std::string relative_path = col.path_info_ptr()->copy_pop_front().get_path();
1762
535
        if (_path_set_info_map.find(col_unique_id) == _path_set_info_map.end()) {
1763
0
            return result;
1764
0
        }
1765
535
        const auto& path_set_info = _path_set_info_map.at(col_unique_id);
1766
535
        if (path_set_info.typed_path_set.find(relative_path) ==
1767
535
            path_set_info.typed_path_set.end()) {
1768
0
            return result;
1769
0
        }
1770
535
        for (const auto& index : path_set_info.typed_path_set.at(relative_path).indexes) {
1771
47
            result.push_back(index.get());
1772
47
        }
1773
535
        return result;
1774
535
    }
1775
    // variant's subcolumns has it's own index
1776
22.7M
    else if (col.is_extracted_column()) {
1777
2.46k
        std::string relative_path = col.path_info_ptr()->copy_pop_front().get_path();
1778
2.46k
        if (_path_set_info_map.find(col_unique_id) == _path_set_info_map.end()) {
1779
1
            return result;
1780
1
        }
1781
2.46k
        const auto& path_set_info = _path_set_info_map.at(col_unique_id);
1782
2.46k
        if (path_set_info.subcolumn_indexes.find(relative_path) ==
1783
2.46k
            path_set_info.subcolumn_indexes.end()) {
1784
772
            return result;
1785
772
        }
1786
1.69k
        for (const auto& index : path_set_info.subcolumn_indexes.at(relative_path)) {
1787
34
            result.push_back(index.get());
1788
34
        }
1789
1.69k
    }
1790
22.7M
    return result;
1791
22.8M
}
1792
1793
const TabletIndex* TabletSchema::ann_index(int32_t col_unique_id,
1794
97.5k
                                           const std::string& suffix_path) const {
1795
206k
    for (size_t i = 0; i < _indexes.size(); i++) {
1796
109k
        if (_indexes[i]->index_type() == IndexType::ANN) {
1797
205
            for (int32_t id : _indexes[i]->col_unique_ids()) {
1798
205
                if (id == col_unique_id &&
1799
205
                    _indexes[i]->get_index_suffix() == escape_for_path_name(suffix_path)) {
1800
204
                    return _indexes[i].get();
1801
204
                }
1802
205
            }
1803
204
        }
1804
109k
    }
1805
97.3k
    return nullptr;
1806
97.5k
}
1807
1808
22.2M
const TabletIndex* TabletSchema::ann_index(const TabletColumn& col) const {
1809
22.2M
    if (!segment_v2::IndexColumnWriter::check_support_ann_index(col)) {
1810
22.1M
        return nullptr;
1811
22.1M
    }
1812
    // TODO use more efficient impl
1813
    // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants
1814
100k
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
1815
100k
    return ann_index(col_unique_id, escape_for_path_name(col.suffix_path()));
1816
22.2M
}
1817
1818
0
bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const {
1819
0
    IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, "");
1820
0
    auto it = _col_id_suffix_to_index.find(index_key);
1821
0
    return it != _col_id_suffix_to_index.end();
1822
0
}
1823
1824
672k
const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id) const {
1825
    // Get the ngram bf index for the given column unique id
1826
672k
    IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, "");
1827
672k
    auto it = _col_id_suffix_to_index.find(index_key);
1828
672k
    if (it != _col_id_suffix_to_index.end()) {
1829
3.40k
        if (!it->second.empty() && it->second[0] < _indexes.size()) {
1830
3.40k
            return _indexes[it->second[0]].get();
1831
3.40k
        }
1832
3.40k
    }
1833
668k
    return nullptr;
1834
672k
}
1835
1836
const TabletIndex* TabletSchema::get_index(int32_t col_unique_id, IndexType index_type,
1837
14
                                           const std::string& suffix_path) const {
1838
14
    IndexKey index_key(index_type, col_unique_id, suffix_path);
1839
14
    auto it = _col_id_suffix_to_index.find(index_key);
1840
14
    if (it != _col_id_suffix_to_index.end()) {
1841
12
        if (!it->second.empty() && it->second[0] < _indexes.size()) {
1842
12
            return _indexes[it->second[0]].get();
1843
12
        }
1844
12
    }
1845
2
    return nullptr;
1846
14
}
1847
1848
Block TabletSchema::create_block(
1849
        const std::vector<uint32_t>& return_columns,
1850
3.14M
        const std::unordered_set<uint32_t>* tablet_columns_need_convert_null) const {
1851
3.14M
    Block block;
1852
41.5M
    for (int i = 0; i < return_columns.size(); ++i) {
1853
38.4M
        const ColumnId cid = return_columns[i];
1854
38.4M
        const auto& col = *_cols[cid];
1855
38.4M
        bool is_nullable = (tablet_columns_need_convert_null != nullptr &&
1856
38.4M
                            tablet_columns_need_convert_null->find(cid) !=
1857
38.3M
                                    tablet_columns_need_convert_null->end());
1858
38.4M
        auto data_type = DataTypeFactory::instance().create_data_type(col, is_nullable);
1859
38.4M
        if (col.type() == FieldType::OLAP_FIELD_TYPE_STRUCT ||
1860
38.4M
            col.type() == FieldType::OLAP_FIELD_TYPE_MAP ||
1861
38.4M
            col.type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
1862
103k
            if (_pruned_columns_data_type.contains(col.unique_id())) {
1863
99.4k
                data_type = _pruned_columns_data_type.at(col.unique_id());
1864
99.4k
            }
1865
103k
        }
1866
1867
38.4M
        if (_vir_col_idx_to_unique_id.contains(cid)) {
1868
549
            block.insert({ColumnNothing::create(0), data_type, col.name()});
1869
18.4E
            VLOG_DEBUG << fmt::format(
1870
18.4E
                    "Create block from tablet schema, column cid {} is virtual column, col_name: "
1871
18.4E
                    "{}, col_unique_id: {}, type {}",
1872
18.4E
                    cid, col.name(), col.unique_id(), data_type->get_name());
1873
38.4M
        } else {
1874
38.4M
            block.insert({data_type->create_column(), data_type, col.name()});
1875
38.4M
        }
1876
38.4M
    }
1877
3.14M
    return block;
1878
3.14M
}
1879
1880
41.8k
Block TabletSchema::create_block() const {
1881
41.8k
    Block block;
1882
500k
    for (const auto& col : _cols) {
1883
500k
        if (is_dropped_column(*col)) {
1884
13
            continue;
1885
13
        }
1886
1887
500k
        auto data_type = DataTypeFactory::instance().create_data_type(*col);
1888
500k
        if (col->type() == FieldType::OLAP_FIELD_TYPE_STRUCT) {
1889
1.04k
            if (_pruned_columns_data_type.contains(col->unique_id())) {
1890
0
                data_type = _pruned_columns_data_type.at(col->unique_id());
1891
0
            }
1892
1.04k
        }
1893
500k
        block.insert({data_type->create_column(), data_type, col->name()});
1894
500k
    }
1895
41.8k
    return block;
1896
41.8k
}
1897
1898
2.22k
Block TabletSchema::create_block_by_cids(const std::vector<uint32_t>& cids) const {
1899
2.22k
    Block block;
1900
15.2k
    for (const auto& cid : cids) {
1901
15.2k
        const auto& col = *_cols[cid];
1902
15.2k
        auto data_type = DataTypeFactory::instance().create_data_type(col);
1903
15.2k
        if (col.type() == FieldType::OLAP_FIELD_TYPE_STRUCT) {
1904
17
            if (_pruned_columns_data_type.contains(col.unique_id())) {
1905
0
                data_type = _pruned_columns_data_type.at(col.unique_id());
1906
0
            }
1907
17
        }
1908
15.2k
        block.insert({data_type->create_column(), data_type, col.name()});
1909
15.2k
    }
1910
2.22k
    return block;
1911
2.22k
}
1912
1913
831
bool operator==(const TabletColumn& a, const TabletColumn& b) {
1914
831
    if (a._unique_id != b._unique_id) return false;
1915
831
    if (a._col_name != b._col_name) return false;
1916
831
    if (a._type != b._type) return false;
1917
831
    if (a._is_key != b._is_key) return false;
1918
831
    if (a._aggregation != b._aggregation) return false;
1919
831
    if (a._is_nullable != b._is_nullable) return false;
1920
831
    if (a._has_default_value != b._has_default_value) return false;
1921
831
    if (a._has_default_value) {
1922
414
        if (a._default_value != b._default_value) return false;
1923
414
    }
1924
831
    if (a._is_decimal != b._is_decimal) return false;
1925
831
    if (a._is_decimal) {
1926
829
        if (a._precision != b._precision) return false;
1927
829
        if (a._frac != b._frac) return false;
1928
829
    }
1929
831
    if (a._length != b._length) return false;
1930
831
    if (a._index_length != b._index_length) return false;
1931
831
    if (a._is_bf_column != b._is_bf_column) return false;
1932
831
    if (a._column_path == nullptr && a._column_path != nullptr) return false;
1933
831
    if (b._column_path == nullptr && a._column_path != nullptr) return false;
1934
831
    if (b._column_path != nullptr && a._column_path != nullptr &&
1935
831
        *a._column_path != *b._column_path)
1936
0
        return false;
1937
831
    return true;
1938
831
}
1939
1940
828
bool operator!=(const TabletColumn& a, const TabletColumn& b) {
1941
828
    return !(a == b);
1942
828
}
1943
1944
108
bool operator==(const TabletSchema& a, const TabletSchema& b) {
1945
108
    if (a._keys_type != b._keys_type) return false;
1946
108
    if (a._cols.size() != b._cols.size()) return false;
1947
939
    for (int i = 0; i < a._cols.size(); ++i) {
1948
831
        if (*a._cols[i] != *b._cols[i]) return false;
1949
831
    }
1950
108
    if (a._num_columns != b._num_columns) return false;
1951
108
    if (a._num_key_columns != b._num_key_columns) return false;
1952
108
    if (a._num_null_columns != b._num_null_columns) return false;
1953
108
    if (a._num_short_key_columns != b._num_short_key_columns) return false;
1954
108
    if (a._num_rows_per_row_block != b._num_rows_per_row_block) return false;
1955
108
    if (a._compress_kind != b._compress_kind) return false;
1956
108
    if (a._next_column_unique_id != b._next_column_unique_id) return false;
1957
108
    if (a._has_bf_fpp != b._has_bf_fpp) return false;
1958
108
    if (a._has_bf_fpp) {
1959
9
        if (std::abs(a._bf_fpp - b._bf_fpp) > 1e-6) return false;
1960
9
    }
1961
108
    if (a._is_in_memory != b._is_in_memory) return false;
1962
108
    if (a._delete_sign_idx != b._delete_sign_idx) return false;
1963
108
    if (a._disable_auto_compaction != b._disable_auto_compaction) return false;
1964
108
    if (a._enable_single_replica_compaction != b._enable_single_replica_compaction) return false;
1965
108
    if (a._store_row_column != b._store_row_column) return false;
1966
108
    if (a._row_store_page_size != b._row_store_page_size) return false;
1967
108
    if (a._storage_page_size != b._storage_page_size) return false;
1968
108
    if (a._storage_dict_page_size != b._storage_dict_page_size) return false;
1969
108
    if (a._skip_write_index_on_load != b._skip_write_index_on_load) return false;
1970
108
    if (a._deprecated_enable_variant_flatten_nested !=
1971
108
        b._deprecated_enable_variant_flatten_nested) {
1972
0
        return false;
1973
0
    }
1974
108
    if (a._is_external_segment_column_meta_used != b._is_external_segment_column_meta_used)
1975
0
        return false;
1976
108
    if (a._integer_type_default_use_plain_encoding != b._integer_type_default_use_plain_encoding)
1977
0
        return false;
1978
108
    if (a._binary_plain_encoding_default_impl != b._binary_plain_encoding_default_impl)
1979
0
        return false;
1980
108
    return true;
1981
108
}
1982
1983
108
bool operator!=(const TabletSchema& a, const TabletSchema& b) {
1984
108
    return !(a == b);
1985
108
}
1986
#include "common/compile_check_end.h"
1987
} // namespace doris