Coverage Report

Created: 2025-06-08 11:30

/root/doris/be/src/olap/tablet_schema.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "olap/tablet_schema.h"
19
20
#include <gen_cpp/Descriptors_types.h>
21
#include <gen_cpp/olap_file.pb.h>
22
#include <glog/logging.h>
23
#include <google/protobuf/io/coded_stream.h>
24
#include <google/protobuf/io/zero_copy_stream.h>
25
#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
26
27
#include <algorithm>
28
#include <cctype>
29
// IWYU pragma: no_include <bits/std_abs.h>
30
#include <cmath> // IWYU pragma: keep
31
#include <memory>
32
#include <ostream>
33
#include <vector>
34
35
#include "common/compiler_util.h" // IWYU pragma: keep
36
#include "common/consts.h"
37
#include "common/status.h"
38
#include "exec/tablet_info.h"
39
#include "olap/inverted_index_parser.h"
40
#include "olap/olap_define.h"
41
#include "olap/tablet_column_object_pool.h"
42
#include "olap/types.h"
43
#include "olap/utils.h"
44
#include "runtime/memory/lru_cache_policy.h"
45
#include "runtime/thread_context.h"
46
#include "tablet_meta.h"
47
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
48
#include "vec/aggregate_functions/aggregate_function_state_union.h"
49
#include "vec/common/hex.h"
50
#include "vec/common/string_ref.h"
51
#include "vec/core/block.h"
52
#include "vec/data_types/data_type.h"
53
#include "vec/data_types/data_type_factory.hpp"
54
#include "vec/json/path_in_data.h"
55
56
namespace doris {
57
58
0
FieldType TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) {
59
0
    switch (primitiveType) {
60
0
    case PrimitiveType::INVALID_TYPE:
61
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
62
0
    case PrimitiveType::TYPE_NULL:
63
0
        return FieldType::OLAP_FIELD_TYPE_NONE;
64
0
    case PrimitiveType::TYPE_BOOLEAN:
65
0
        return FieldType::OLAP_FIELD_TYPE_BOOL;
66
0
    case PrimitiveType::TYPE_TINYINT:
67
0
        return FieldType::OLAP_FIELD_TYPE_TINYINT;
68
0
    case PrimitiveType::TYPE_SMALLINT:
69
0
        return FieldType::OLAP_FIELD_TYPE_SMALLINT;
70
0
    case PrimitiveType::TYPE_INT:
71
0
        return FieldType::OLAP_FIELD_TYPE_INT;
72
0
    case PrimitiveType::TYPE_BIGINT:
73
0
        return FieldType::OLAP_FIELD_TYPE_BIGINT;
74
0
    case PrimitiveType::TYPE_LARGEINT:
75
0
        return FieldType::OLAP_FIELD_TYPE_LARGEINT;
76
0
    case PrimitiveType::TYPE_FLOAT:
77
0
        return FieldType::OLAP_FIELD_TYPE_FLOAT;
78
0
    case PrimitiveType::TYPE_DOUBLE:
79
0
        return FieldType::OLAP_FIELD_TYPE_DOUBLE;
80
0
    case PrimitiveType::TYPE_VARCHAR:
81
0
        return FieldType::OLAP_FIELD_TYPE_VARCHAR;
82
0
    case PrimitiveType::TYPE_DATE:
83
0
        return FieldType::OLAP_FIELD_TYPE_DATE;
84
0
    case PrimitiveType::TYPE_DATETIME:
85
0
        return FieldType::OLAP_FIELD_TYPE_DATETIME;
86
0
    case PrimitiveType::TYPE_BINARY:
87
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
88
0
    case PrimitiveType::TYPE_CHAR:
89
0
        return FieldType::OLAP_FIELD_TYPE_CHAR;
90
0
    case PrimitiveType::TYPE_STRUCT:
91
0
        return FieldType::OLAP_FIELD_TYPE_STRUCT;
92
0
    case PrimitiveType::TYPE_ARRAY:
93
0
        return FieldType::OLAP_FIELD_TYPE_ARRAY;
94
0
    case PrimitiveType::TYPE_MAP:
95
0
        return FieldType::OLAP_FIELD_TYPE_MAP;
96
0
    case PrimitiveType::TYPE_HLL:
97
0
        return FieldType::OLAP_FIELD_TYPE_HLL;
98
0
    case PrimitiveType::TYPE_DECIMALV2:
99
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
100
0
    case PrimitiveType::TYPE_TIME:
101
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
102
0
    case PrimitiveType::TYPE_OBJECT:
103
0
        return FieldType::OLAP_FIELD_TYPE_OBJECT;
104
0
    case PrimitiveType::TYPE_STRING:
105
0
        return FieldType::OLAP_FIELD_TYPE_STRING;
106
0
    case PrimitiveType::TYPE_QUANTILE_STATE:
107
0
        return FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
108
0
    case PrimitiveType::TYPE_DATEV2:
109
0
        return FieldType::OLAP_FIELD_TYPE_DATEV2;
110
0
    case PrimitiveType::TYPE_DATETIMEV2:
111
0
        return FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
112
0
    case PrimitiveType::TYPE_TIMEV2:
113
0
        return FieldType::OLAP_FIELD_TYPE_TIMEV2;
114
0
    case PrimitiveType::TYPE_DECIMAL32:
115
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL32;
116
0
    case PrimitiveType::TYPE_DECIMAL64:
117
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL64;
118
0
    case PrimitiveType::TYPE_DECIMAL128I:
119
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
120
0
    case PrimitiveType::TYPE_JSONB:
121
0
        return FieldType::OLAP_FIELD_TYPE_JSONB;
122
0
    case PrimitiveType::TYPE_VARIANT:
123
0
        return FieldType::OLAP_FIELD_TYPE_VARIANT;
124
0
    case PrimitiveType::TYPE_LAMBDA_FUNCTION:
125
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
126
0
    case PrimitiveType::TYPE_AGG_STATE:
127
0
        return FieldType::OLAP_FIELD_TYPE_AGG_STATE;
128
0
    default:
129
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
130
0
    }
131
0
}
132
133
4.83k
FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) {
134
4.83k
    std::string upper_type_str = type_str;
135
4.83k
    std::transform(type_str.begin(), type_str.end(), upper_type_str.begin(),
136
24.9k
                   [](auto c) { return std::toupper(c); });
137
4.83k
    FieldType type;
138
139
4.83k
    if (0 == upper_type_str.compare("TINYINT")) {
140
331
        type = FieldType::OLAP_FIELD_TYPE_TINYINT;
141
4.50k
    } else if (0 == upper_type_str.compare("SMALLINT")) {
142
863
        type = FieldType::OLAP_FIELD_TYPE_SMALLINT;
143
3.64k
    } else if (0 == upper_type_str.compare("INT")) {
144
2.37k
        type = FieldType::OLAP_FIELD_TYPE_INT;
145
2.37k
    } else if (0 == upper_type_str.compare("BIGINT")) {
146
154
        type = FieldType::OLAP_FIELD_TYPE_BIGINT;
147
1.11k
    } else if (0 == upper_type_str.compare("LARGEINT")) {
148
137
        type = FieldType::OLAP_FIELD_TYPE_LARGEINT;
149
978
    } else if (0 == upper_type_str.compare("UNSIGNED_TINYINT")) {
150
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT;
151
978
    } else if (0 == upper_type_str.compare("UNSIGNED_SMALLINT")) {
152
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT;
153
978
    } else if (0 == upper_type_str.compare("UNSIGNED_INT")) {
154
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT;
155
978
    } else if (0 == upper_type_str.compare("UNSIGNED_BIGINT")) {
156
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT;
157
978
    } else if (0 == upper_type_str.compare("IPV4")) {
158
0
        type = FieldType::OLAP_FIELD_TYPE_IPV4;
159
978
    } else if (0 == upper_type_str.compare("IPV6")) {
160
0
        type = FieldType::OLAP_FIELD_TYPE_IPV6;
161
978
    } else if (0 == upper_type_str.compare("FLOAT")) {
162
0
        type = FieldType::OLAP_FIELD_TYPE_FLOAT;
163
978
    } else if (0 == upper_type_str.compare("DISCRETE_DOUBLE")) {
164
0
        type = FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE;
165
978
    } else if (0 == upper_type_str.compare("DOUBLE")) {
166
0
        type = FieldType::OLAP_FIELD_TYPE_DOUBLE;
167
978
    } else if (0 == upper_type_str.compare("CHAR")) {
168
138
        type = FieldType::OLAP_FIELD_TYPE_CHAR;
169
840
    } else if (0 == upper_type_str.compare("DATE")) {
170
139
        type = FieldType::OLAP_FIELD_TYPE_DATE;
171
701
    } else if (0 == upper_type_str.compare("DATEV2")) {
172
133
        type = FieldType::OLAP_FIELD_TYPE_DATEV2;
173
568
    } else if (0 == upper_type_str.compare("DATETIMEV2")) {
174
0
        type = FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
175
568
    } else if (0 == upper_type_str.compare("DATETIME")) {
176
168
        type = FieldType::OLAP_FIELD_TYPE_DATETIME;
177
400
    } else if (0 == upper_type_str.compare("DECIMAL32")) {
178
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL32;
179
400
    } else if (0 == upper_type_str.compare("DECIMAL64")) {
180
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL64;
181
400
    } else if (0 == upper_type_str.compare("DECIMAL128I")) {
182
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
183
400
    } else if (0 == upper_type_str.compare("DECIMAL256")) {
184
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL256;
185
400
    } else if (0 == upper_type_str.compare(0, 7, "DECIMAL")) {
186
141
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL;
187
259
    } else if (0 == upper_type_str.compare(0, 7, "VARCHAR")) {
188
156
        type = FieldType::OLAP_FIELD_TYPE_VARCHAR;
189
156
    } else if (0 == upper_type_str.compare("STRING")) {
190
8
        type = FieldType::OLAP_FIELD_TYPE_STRING;
191
95
    } else if (0 == upper_type_str.compare("JSONB")) {
192
0
        type = FieldType::OLAP_FIELD_TYPE_JSONB;
193
95
    } else if (0 == upper_type_str.compare("VARIANT")) {
194
2
        type = FieldType::OLAP_FIELD_TYPE_VARIANT;
195
93
    } else if (0 == upper_type_str.compare("BOOLEAN")) {
196
0
        type = FieldType::OLAP_FIELD_TYPE_BOOL;
197
93
    } else if (0 == upper_type_str.compare(0, 3, "HLL")) {
198
7
        type = FieldType::OLAP_FIELD_TYPE_HLL;
199
86
    } else if (0 == upper_type_str.compare("STRUCT")) {
200
0
        type = FieldType::OLAP_FIELD_TYPE_STRUCT;
201
86
    } else if (0 == upper_type_str.compare("LIST")) {
202
0
        type = FieldType::OLAP_FIELD_TYPE_ARRAY;
203
86
    } else if (0 == upper_type_str.compare("MAP")) {
204
0
        type = FieldType::OLAP_FIELD_TYPE_MAP;
205
86
    } else if (0 == upper_type_str.compare("OBJECT")) {
206
0
        type = FieldType::OLAP_FIELD_TYPE_OBJECT;
207
86
    } else if (0 == upper_type_str.compare("ARRAY")) {
208
2
        type = FieldType::OLAP_FIELD_TYPE_ARRAY;
209
84
    } else if (0 == upper_type_str.compare("QUANTILE_STATE")) {
210
0
        type = FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
211
84
    } else if (0 == upper_type_str.compare("AGG_STATE")) {
212
0
        type = FieldType::OLAP_FIELD_TYPE_AGG_STATE;
213
84
    } else {
214
84
        LOG(WARNING) << "invalid type string. [type='" << type_str << "']";
215
84
        type = FieldType::OLAP_FIELD_TYPE_UNKNOWN;
216
84
    }
217
218
4.83k
    return type;
219
4.83k
}
220
221
3.96k
FieldAggregationMethod TabletColumn::get_aggregation_type_by_string(const std::string& str) {
222
3.96k
    std::string upper_str = str;
223
3.96k
    std::transform(str.begin(), str.end(), upper_str.begin(),
224
20.4k
                   [](auto c) { return std::toupper(c); });
225
3.96k
    FieldAggregationMethod aggregation_type;
226
227
3.96k
    if (0 == upper_str.compare("NONE")) {
228
1.66k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE;
229
2.29k
    } else if (0 == upper_str.compare("SUM")) {
230
556
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM;
231
1.74k
    } else if (0 == upper_str.compare("MIN")) {
232
4
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN;
233
1.73k
    } else if (0 == upper_str.compare("MAX")) {
234
4
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX;
235
1.73k
    } else if (0 == upper_str.compare("REPLACE")) {
236
1.72k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE;
237
1.72k
    } else if (0 == upper_str.compare("REPLACE_IF_NOT_NULL")) {
238
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL;
239
7
    } else if (0 == upper_str.compare("HLL_UNION")) {
240
7
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION;
241
7
    } else if (0 == upper_str.compare("BITMAP_UNION")) {
242
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION;
243
0
    } else if (0 == upper_str.compare("QUANTILE_UNION")) {
244
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION;
245
0
    } else if (!upper_str.empty()) {
246
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC;
247
0
    } else {
248
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN;
249
0
    }
250
251
3.96k
    return aggregation_type;
252
3.96k
}
253
254
19.3k
std::string TabletColumn::get_string_by_field_type(FieldType type) {
255
19.3k
    switch (type) {
256
1.50k
    case FieldType::OLAP_FIELD_TYPE_TINYINT:
257
1.50k
        return "TINYINT";
258
259
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT:
260
0
        return "UNSIGNED_TINYINT";
261
262
3.53k
    case FieldType::OLAP_FIELD_TYPE_SMALLINT:
263
3.53k
        return "SMALLINT";
264
265
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT:
266
0
        return "UNSIGNED_SMALLINT";
267
268
8.31k
    case FieldType::OLAP_FIELD_TYPE_INT:
269
8.31k
        return "INT";
270
271
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT:
272
0
        return "UNSIGNED_INT";
273
274
836
    case FieldType::OLAP_FIELD_TYPE_BIGINT:
275
836
        return "BIGINT";
276
277
717
    case FieldType::OLAP_FIELD_TYPE_LARGEINT:
278
717
        return "LARGEINT";
279
280
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT:
281
0
        return "UNSIGNED_BIGINT";
282
283
0
    case FieldType::OLAP_FIELD_TYPE_IPV4:
284
0
        return "IPV4";
285
286
0
    case FieldType::OLAP_FIELD_TYPE_IPV6:
287
0
        return "IPV6";
288
289
0
    case FieldType::OLAP_FIELD_TYPE_FLOAT:
290
0
        return "FLOAT";
291
292
0
    case FieldType::OLAP_FIELD_TYPE_DOUBLE:
293
0
        return "DOUBLE";
294
295
0
    case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE:
296
0
        return "DISCRETE_DOUBLE";
297
298
717
    case FieldType::OLAP_FIELD_TYPE_CHAR:
299
717
        return "CHAR";
300
301
719
    case FieldType::OLAP_FIELD_TYPE_DATE:
302
719
        return "DATE";
303
304
659
    case FieldType::OLAP_FIELD_TYPE_DATEV2:
305
659
        return "DATEV2";
306
307
905
    case FieldType::OLAP_FIELD_TYPE_DATETIME:
308
905
        return "DATETIME";
309
310
0
    case FieldType::OLAP_FIELD_TYPE_DATETIMEV2:
311
0
        return "DATETIMEV2";
312
313
717
    case FieldType::OLAP_FIELD_TYPE_DECIMAL:
314
717
        return "DECIMAL";
315
316
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
317
0
        return "DECIMAL32";
318
319
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
320
0
        return "DECIMAL64";
321
322
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
323
0
        return "DECIMAL128I";
324
325
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
326
0
        return "DECIMAL256";
327
328
721
    case FieldType::OLAP_FIELD_TYPE_VARCHAR:
329
721
        return "VARCHAR";
330
331
0
    case FieldType::OLAP_FIELD_TYPE_JSONB:
332
0
        return "JSONB";
333
334
0
    case FieldType::OLAP_FIELD_TYPE_VARIANT:
335
0
        return "VARIANT";
336
337
22
    case FieldType::OLAP_FIELD_TYPE_STRING:
338
22
        return "STRING";
339
340
0
    case FieldType::OLAP_FIELD_TYPE_BOOL:
341
0
        return "BOOLEAN";
342
343
6
    case FieldType::OLAP_FIELD_TYPE_HLL:
344
6
        return "HLL";
345
346
0
    case FieldType::OLAP_FIELD_TYPE_STRUCT:
347
0
        return "STRUCT";
348
349
4
    case FieldType::OLAP_FIELD_TYPE_ARRAY:
350
4
        return "ARRAY";
351
352
0
    case FieldType::OLAP_FIELD_TYPE_MAP:
353
0
        return "MAP";
354
355
0
    case FieldType::OLAP_FIELD_TYPE_OBJECT:
356
0
        return "OBJECT";
357
0
    case FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE:
358
0
        return "QUANTILE_STATE";
359
0
    case FieldType::OLAP_FIELD_TYPE_AGG_STATE:
360
0
        return "AGG_STATE";
361
0
    default:
362
0
        return "UNKNOWN";
363
19.3k
    }
364
19.3k
}
365
366
28
std::string TabletColumn::get_string_by_aggregation_type(FieldAggregationMethod type) {
367
28
    switch (type) {
368
4
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE:
369
4
        return "NONE";
370
371
9
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM:
372
9
        return "SUM";
373
374
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN:
375
0
        return "MIN";
376
377
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX:
378
0
        return "MAX";
379
380
15
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE:
381
15
        return "REPLACE";
382
383
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL:
384
0
        return "REPLACE_IF_NOT_NULL";
385
386
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION:
387
0
        return "HLL_UNION";
388
389
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION:
390
0
        return "BITMAP_UNION";
391
392
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION:
393
0
        return "QUANTILE_UNION";
394
395
0
    default:
396
0
        return "UNKNOWN";
397
28
    }
398
28
}
399
400
2.09k
uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint32_t string_length) {
401
2.09k
    switch (type) {
402
109
    case TPrimitiveType::TINYINT:
403
109
    case TPrimitiveType::BOOLEAN:
404
109
        return 1;
405
352
    case TPrimitiveType::SMALLINT:
406
352
        return 2;
407
818
    case TPrimitiveType::INT:
408
818
        return 4;
409
114
    case TPrimitiveType::BIGINT:
410
114
        return 8;
411
101
    case TPrimitiveType::LARGEINT:
412
101
        return 16;
413
0
    case TPrimitiveType::IPV4:
414
0
        return 4;
415
0
    case TPrimitiveType::IPV6:
416
0
        return 16;
417
101
    case TPrimitiveType::DATE:
418
101
        return 3;
419
91
    case TPrimitiveType::DATEV2:
420
91
        return 4;
421
108
    case TPrimitiveType::DATETIME:
422
108
        return 8;
423
0
    case TPrimitiveType::DATETIMEV2:
424
0
        return 8;
425
0
    case TPrimitiveType::FLOAT:
426
0
        return 4;
427
0
    case TPrimitiveType::DOUBLE:
428
0
        return 8;
429
0
    case TPrimitiveType::QUANTILE_STATE:
430
0
    case TPrimitiveType::OBJECT:
431
0
        return 16;
432
101
    case TPrimitiveType::CHAR:
433
101
        return string_length;
434
101
    case TPrimitiveType::VARCHAR:
435
101
    case TPrimitiveType::HLL:
436
101
    case TPrimitiveType::AGG_STATE:
437
101
        return string_length + sizeof(OLAP_VARCHAR_MAX_LENGTH);
438
0
    case TPrimitiveType::STRING:
439
0
    case TPrimitiveType::VARIANT:
440
0
        return string_length + sizeof(OLAP_STRING_MAX_LENGTH);
441
0
    case TPrimitiveType::JSONB:
442
0
        return string_length + sizeof(OLAP_JSONB_MAX_LENGTH);
443
0
    case TPrimitiveType::STRUCT:
444
        // Note that(xy): this is the length of struct type itself,
445
        // the length of its subtypes are not included.
446
0
        return OLAP_STRUCT_MAX_LENGTH;
447
0
    case TPrimitiveType::ARRAY:
448
0
        return OLAP_ARRAY_MAX_LENGTH;
449
0
    case TPrimitiveType::MAP:
450
0
        return OLAP_MAP_MAX_LENGTH;
451
0
    case TPrimitiveType::DECIMAL32:
452
0
        return 4;
453
0
    case TPrimitiveType::DECIMAL64:
454
0
        return 8;
455
0
    case TPrimitiveType::DECIMAL128I:
456
0
        return 16;
457
0
    case TPrimitiveType::DECIMAL256:
458
0
        return 32;
459
101
    case TPrimitiveType::DECIMALV2:
460
101
        return 12; // use 12 bytes in olap engine.
461
0
    default:
462
0
        LOG(WARNING) << "unknown field type. [type=" << type << "]";
463
0
        return 0;
464
2.09k
    }
465
2.09k
}
466
467
4.89k
TabletColumn::TabletColumn() : _aggregation(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) {}
468
469
133
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType type) {
470
133
    _aggregation = agg;
471
133
    _type = type;
472
133
}
473
474
17
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable) {
475
17
    _aggregation = agg;
476
17
    _type = filed_type;
477
17
    _length = get_scalar_type_info(filed_type)->size();
478
17
    _is_nullable = is_nullable;
479
17
}
480
481
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable,
482
16
                           int32_t unique_id, size_t length) {
483
16
    _aggregation = agg;
484
16
    _type = filed_type;
485
16
    _is_nullable = is_nullable;
486
16
    _unique_id = unique_id;
487
16
    _length = length;
488
16
}
489
490
0
TabletColumn::TabletColumn(const ColumnPB& column) {
491
0
    init_from_pb(column);
492
0
}
493
494
0
TabletColumn::TabletColumn(const TColumn& column) {
495
0
    init_from_thrift(column);
496
0
}
497
498
0
void TabletColumn::init_from_thrift(const TColumn& tcolumn) {
499
0
    ColumnPB column_pb;
500
0
    TabletMeta::init_column_from_tcolumn(tcolumn.col_unique_id, tcolumn, &column_pb);
501
0
    init_from_pb(column_pb);
502
0
}
503
504
4.75k
void TabletColumn::init_from_pb(const ColumnPB& column) {
505
4.75k
    _unique_id = column.unique_id();
506
4.75k
    _col_name = column.name();
507
4.75k
    _col_name_lower_case = to_lower(_col_name);
508
4.75k
    _type = TabletColumn::get_field_type_by_string(column.type());
509
4.75k
    _is_key = column.is_key();
510
4.75k
    _is_nullable = column.is_nullable();
511
4.75k
    _is_auto_increment = column.is_auto_increment();
512
513
4.75k
    _has_default_value = column.has_default_value();
514
4.75k
    if (_has_default_value) {
515
29
        _default_value = column.default_value();
516
29
    }
517
518
4.75k
    if (column.has_precision()) {
519
3.91k
        _is_decimal = true;
520
3.91k
        _precision = column.precision();
521
3.91k
    } else {
522
840
        _is_decimal = false;
523
840
    }
524
4.75k
    if (column.has_frac()) {
525
3.91k
        _frac = column.frac();
526
3.91k
    }
527
4.75k
    _length = column.length();
528
4.75k
    _index_length = column.index_length();
529
4.75k
    if (column.has_is_bf_column()) {
530
345
        _is_bf_column = column.is_bf_column();
531
4.40k
    } else {
532
4.40k
        _is_bf_column = false;
533
4.40k
    }
534
4.75k
    if (column.has_has_bitmap_index()) {
535
2.09k
        _has_bitmap_index = column.has_bitmap_index();
536
2.65k
    } else {
537
2.65k
        _has_bitmap_index = false;
538
2.65k
    }
539
4.75k
    if (column.has_aggregation()) {
540
3.96k
        _aggregation = get_aggregation_type_by_string(column.aggregation());
541
3.96k
        _aggregation_name = column.aggregation();
542
3.96k
    }
543
4.75k
    if (column.has_result_is_nullable()) {
544
2.26k
        _result_is_nullable = column.result_is_nullable();
545
2.26k
    }
546
4.75k
    if (column.has_visible()) {
547
2.26k
        _visible = column.visible();
548
2.26k
    }
549
4.75k
    if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
550
2
        CHECK(column.children_columns_size() == 1)
551
0
                << "ARRAY type should has 1 children types, but got "
552
0
                << column.children_columns_size();
553
2
    }
554
4.75k
    if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
555
0
        DCHECK(column.children_columns_size() == 2)
556
0
                << "MAP type should has 2 children types, but got "
557
0
                << column.children_columns_size();
558
0
        if (UNLIKELY(column.children_columns_size() != 2)) {
559
0
            LOG(WARNING) << "MAP type should has 2 children types, but got "
560
0
                         << column.children_columns_size();
561
0
        }
562
0
    }
563
4.75k
    for (size_t i = 0; i < column.children_columns_size(); i++) {
564
2
        TabletColumn child_column;
565
2
        child_column.init_from_pb(column.children_columns(i));
566
2
        add_sub_column(child_column);
567
2
    }
568
4.75k
    if (column.has_column_path_info()) {
569
0
        _column_path = std::make_shared<vectorized::PathInData>();
570
0
        _column_path->from_protobuf(column.column_path_info());
571
0
        _parent_col_unique_id = column.column_path_info().parrent_column_unique_id();
572
0
    }
573
4.75k
    if (is_variant_type() && !column.has_column_path_info()) {
574
        // set path info for variant root column, to prevent from missing
575
2
        _column_path = std::make_shared<vectorized::PathInData>(_col_name_lower_case);
576
2
    }
577
4.75k
    for (auto& column_pb : column.sparse_columns()) {
578
0
        TabletColumn column;
579
0
        column.init_from_pb(column_pb);
580
0
        _sparse_cols.emplace_back(std::make_shared<TabletColumn>(std::move(column)));
581
0
        _num_sparse_columns++;
582
0
    }
583
4.75k
}
584
585
TabletColumn TabletColumn::create_materialized_variant_column(const std::string& root,
586
                                                              const std::vector<std::string>& paths,
587
0
                                                              int32_t parent_unique_id) {
588
0
    TabletColumn subcol;
589
0
    subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
590
0
    subcol.set_is_nullable(true);
591
0
    subcol.set_unique_id(-1);
592
0
    subcol.set_parent_unique_id(parent_unique_id);
593
0
    vectorized::PathInData path(root, paths);
594
0
    subcol.set_path_info(path);
595
0
    subcol.set_name(path.get_path());
596
0
    return subcol;
597
0
}
598
599
19.3k
void TabletColumn::to_schema_pb(ColumnPB* column) const {
600
19.3k
    column->set_unique_id(_unique_id);
601
19.3k
    column->set_name(_col_name);
602
19.3k
    column->set_type(get_string_by_field_type(_type));
603
19.3k
    column->set_is_key(_is_key);
604
19.3k
    column->set_is_nullable(_is_nullable);
605
19.3k
    if (_has_default_value) {
606
156
        column->set_default_value(_default_value);
607
156
    }
608
19.3k
    if (_is_decimal) {
609
16.5k
        column->set_precision(_precision);
610
16.5k
        column->set_frac(_frac);
611
16.5k
    }
612
19.3k
    column->set_length(_length);
613
19.3k
    column->set_index_length(_index_length);
614
19.3k
    if (_is_bf_column) {
615
4
        column->set_is_bf_column(_is_bf_column);
616
4
    }
617
19.3k
    if (!_aggregation_name.empty()) {
618
16.7k
        column->set_aggregation(_aggregation_name);
619
16.7k
    }
620
19.3k
    column->set_result_is_nullable(_result_is_nullable);
621
19.3k
    if (_has_bitmap_index) {
622
0
        column->set_has_bitmap_index(_has_bitmap_index);
623
0
    }
624
19.3k
    column->set_visible(_visible);
625
626
19.3k
    if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
627
4
        CHECK(_sub_columns.size() == 1)
628
0
                << "ARRAY type should has 1 children types, but got " << _sub_columns.size();
629
4
    }
630
19.3k
    if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
631
0
        DCHECK(_sub_columns.size() == 2)
632
0
                << "MAP type should has 2 children types, but got " << _sub_columns.size();
633
0
        if (UNLIKELY(_sub_columns.size() != 2)) {
634
0
            LOG(WARNING) << "MAP type should has 2 children types, but got " << _sub_columns.size();
635
0
        }
636
0
    }
637
638
19.3k
    for (size_t i = 0; i < _sub_columns.size(); i++) {
639
4
        ColumnPB* child = column->add_children_columns();
640
4
        _sub_columns[i]->to_schema_pb(child);
641
4
    }
642
643
    // set parts info
644
19.3k
    if (has_path_info()) {
645
        // CHECK_GT(_parent_col_unique_id, 0);
646
0
        _column_path->to_protobuf(column->mutable_column_path_info(), _parent_col_unique_id);
647
        // Update unstable information for variant columns. Some of the fields in the tablet schema
648
        // are irrelevant for variant sub-columns, but retaining them may lead to an excessive growth
649
        // in the number of tablet schema cache entries.
650
0
        if (_type == FieldType::OLAP_FIELD_TYPE_STRING) {
651
0
            column->set_length(INT_MAX);
652
0
        }
653
0
        column->set_index_length(0);
654
0
    }
655
19.3k
    for (auto& col : _sparse_cols) {
656
0
        ColumnPB* sparse_column = column->add_sparse_columns();
657
0
        col->to_schema_pb(sparse_column);
658
0
    }
659
19.3k
}
660
661
32
void TabletColumn::add_sub_column(TabletColumn& sub_column) {
662
32
    _sub_columns.push_back(std::make_shared<TabletColumn>(sub_column));
663
32
    sub_column._parent_col_unique_id = this->_unique_id;
664
32
    _sub_column_count += 1;
665
32
}
666
667
19.6k
bool TabletColumn::is_row_store_column() const {
668
19.6k
    return _col_name == BeConsts::ROW_STORE_COL;
669
19.6k
}
670
671
vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function_union(
672
0
        vectorized::DataTypePtr type) const {
673
0
    auto state_type = assert_cast<const vectorized::DataTypeAggState*>(type.get());
674
0
    return vectorized::AggregateStateUnion::create(state_type->get_nested_function(), {type}, type);
675
0
}
676
677
24
vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function(std::string suffix) const {
678
24
    auto type = vectorized::DataTypeFactory::instance().create_data_type(*this);
679
24
    if (type && type->get_type_as_type_descriptor().type == PrimitiveType::TYPE_AGG_STATE) {
680
0
        return get_aggregate_function_union(type);
681
0
    }
682
683
24
    std::string origin_name = TabletColumn::get_string_by_aggregation_type(_aggregation);
684
24
    std::string agg_name = origin_name + suffix;
685
24
    std::transform(agg_name.begin(), agg_name.end(), agg_name.begin(),
686
258
                   [](unsigned char c) { return std::tolower(c); });
687
688
24
    auto function = vectorized::AggregateFunctionSimpleFactory::instance().get(agg_name, {type},
689
24
                                                                               type->is_nullable());
690
24
    if (function) {
691
24
        return function;
692
24
    }
693
0
    LOG(WARNING) << "get column aggregate function failed, aggregation_name=" << origin_name
694
0
                 << ", column_type=" << type->get_name();
695
0
    return nullptr;
696
24
}
697
698
4
void TabletColumn::set_path_info(const vectorized::PathInData& path) {
699
4
    _column_path = std::make_shared<vectorized::PathInData>(path);
700
4
}
701
702
0
vectorized::DataTypePtr TabletColumn::get_vec_type() const {
703
0
    return vectorized::DataTypeFactory::instance().create_data_type(*this);
704
0
}
705
706
// escape '.' and '_'
707
10.0k
std::string escape_for_path_name(const std::string& s) {
708
10.0k
    std::string res;
709
10.0k
    const char* pos = s.data();
710
10.0k
    const char* end = pos + s.size();
711
10.1k
    while (pos != end) {
712
76
        unsigned char c = *pos;
713
76
        if (c == '.' || c == '_') {
714
10
            res += '%';
715
10
            res += vectorized::hex_digit_uppercase(c / 16);
716
10
            res += vectorized::hex_digit_uppercase(c % 16);
717
66
        } else {
718
66
            res += c;
719
66
        }
720
76
        ++pos;
721
76
    }
722
10.0k
    return res;
723
10.0k
}
724
725
4
void TabletIndex::set_escaped_escaped_index_suffix_path(const std::string& path_name) {
726
4
    std::string escaped_path = escape_for_path_name(path_name);
727
4
    _escaped_index_suffix_path = escaped_path;
728
4
}
729
730
void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
731
15
                                   const TabletSchema& tablet_schema) {
732
15
    _index_id = index.index_id;
733
15
    _index_name = index.index_name;
734
    // init col_unique_id in index at be side, since col_unique_id may be -1 at fe side
735
    // get column unique id by name
736
15
    std::vector<int32_t> col_unique_ids(index.columns.size());
737
30
    for (size_t i = 0; i < index.columns.size(); i++) {
738
15
        auto column_idx = tablet_schema.field_index(index.columns[i]);
739
15
        if (column_idx >= 0) {
740
11
            col_unique_ids[i] = tablet_schema.column(column_idx).unique_id();
741
11
        } else {
742
            // if column unique id not found by column name, find by column unique id
743
            // column unique id can not found means this column is a new column added by light schema change
744
4
            if (index.__isset.column_unique_ids && !index.column_unique_ids.empty() &&
745
4
                tablet_schema.has_column_unique_id(index.column_unique_ids[i])) {
746
1
                col_unique_ids[i] = index.column_unique_ids[i];
747
3
            } else {
748
3
                col_unique_ids[i] = -1;
749
3
            }
750
4
        }
751
15
    }
752
15
    _col_unique_ids = std::move(col_unique_ids);
753
754
15
    switch (index.index_type) {
755
0
    case TIndexType::BITMAP:
756
0
        _index_type = IndexType::BITMAP;
757
0
        break;
758
15
    case TIndexType::INVERTED:
759
15
        _index_type = IndexType::INVERTED;
760
15
        break;
761
0
    case TIndexType::BLOOMFILTER:
762
0
        _index_type = IndexType::BLOOMFILTER;
763
0
        break;
764
0
    case TIndexType::NGRAM_BF:
765
0
        _index_type = IndexType::NGRAM_BF;
766
0
        break;
767
15
    }
768
15
    if (index.__isset.properties) {
769
0
        for (auto kv : index.properties) {
770
0
            _properties[kv.first] = kv.second;
771
0
        }
772
0
    }
773
15
}
774
775
void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
776
0
                                   const std::vector<int32_t>& column_uids) {
777
0
    _index_id = index.index_id;
778
0
    _index_name = index.index_name;
779
0
    _col_unique_ids = column_uids;
780
781
0
    switch (index.index_type) {
782
0
    case TIndexType::BITMAP:
783
0
        _index_type = IndexType::BITMAP;
784
0
        break;
785
0
    case TIndexType::INVERTED:
786
0
        _index_type = IndexType::INVERTED;
787
0
        break;
788
0
    case TIndexType::BLOOMFILTER:
789
0
        _index_type = IndexType::BLOOMFILTER;
790
0
        break;
791
0
    case TIndexType::NGRAM_BF:
792
0
        _index_type = IndexType::NGRAM_BF;
793
0
        break;
794
0
    }
795
0
    if (index.__isset.properties) {
796
0
        for (auto kv : index.properties) {
797
0
            _properties[kv.first] = kv.second;
798
0
        }
799
0
    }
800
0
}
801
802
53
void TabletIndex::init_from_pb(const TabletIndexPB& index) {
803
53
    _index_id = index.index_id();
804
53
    _index_name = index.index_name();
805
53
    _col_unique_ids.clear();
806
53
    for (auto col_unique_id : index.col_unique_id()) {
807
53
        _col_unique_ids.push_back(col_unique_id);
808
53
    }
809
53
    _index_type = index.index_type();
810
53
    for (auto& kv : index.properties()) {
811
8
        _properties[kv.first] = kv.second;
812
8
    }
813
53
    _escaped_index_suffix_path = index.index_suffix_name();
814
53
}
815
816
88
void TabletIndex::to_schema_pb(TabletIndexPB* index) const {
817
88
    index->set_index_id(_index_id);
818
88
    index->set_index_name(_index_name);
819
88
    index->clear_col_unique_id();
820
88
    for (auto col_unique_id : _col_unique_ids) {
821
88
        index->add_col_unique_id(col_unique_id);
822
88
    }
823
88
    index->set_index_type(_index_type);
824
88
    for (const auto& kv : _properties) {
825
17
        DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", {
826
17
            if (kv.first == INVERTED_INDEX_PARSER_LOWERCASE_KEY) {
827
17
                continue;
828
17
            }
829
17
        })
830
17
        (*index->mutable_properties())[kv.first] = kv.second;
831
17
    }
832
88
    index->set_index_suffix_name(_escaped_index_suffix_path);
833
834
88
    DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { return; })
835
836
    // lowercase by default
837
88
    if (!_properties.empty()) {
838
11
        if (!_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) {
839
5
            (*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
840
5
                    INVERTED_INDEX_PARSER_TRUE;
841
5
        }
842
11
    }
843
88
}
844
845
2.31k
TabletSchema::TabletSchema() = default;
846
847
2.27k
TabletSchema::~TabletSchema() = default;
848
849
1.29k
int64_t TabletSchema::get_metadata_size() const {
850
1.29k
    return sizeof(TabletSchema) + _vl_field_mem_size;
851
1.29k
}
852
853
118
void TabletSchema::append_column(TabletColumn column, ColumnType col_type) {
854
118
    if (column.is_key()) {
855
48
        _num_key_columns++;
856
48
    }
857
118
    if (column.is_nullable()) {
858
39
        _num_null_columns++;
859
39
    }
860
118
    if (column.is_variant_type()) {
861
0
        ++_num_variant_columns;
862
0
        if (!column.has_path_info()) {
863
0
            const std::string& col_name = column.name_lower_case();
864
0
            vectorized::PathInData path(col_name);
865
0
            column.set_path_info(path);
866
0
        }
867
0
    }
868
118
    if (UNLIKELY(column.name() == DELETE_SIGN)) {
869
0
        _delete_sign_idx = _num_columns;
870
118
    } else if (UNLIKELY(column.name() == SEQUENCE_COL)) {
871
4
        _sequence_col_idx = _num_columns;
872
114
    } else if (UNLIKELY(column.name() == VERSION_COL)) {
873
0
        _version_col_idx = _num_columns;
874
0
    }
875
118
    _field_id_to_index[column.unique_id()] = _num_columns;
876
118
    _cols.push_back(std::make_shared<TabletColumn>(std::move(column)));
877
    // The dropped column may have same name with exsiting column, so that
878
    // not add to name to index map, only for uid to index map
879
118
    if (col_type == ColumnType::VARIANT || _cols.back()->is_variant_type() ||
880
118
        _cols.back()->is_extracted_column()) {
881
4
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
882
4
        _field_path_to_index[_cols.back()->path_info_ptr().get()] = _num_columns;
883
114
    } else if (col_type == ColumnType::NORMAL) {
884
114
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
885
114
    }
886
118
    _num_columns++;
887
118
}
888
889
0
void TabletColumn::append_sparse_column(TabletColumn column) {
890
0
    _sparse_cols.push_back(std::make_shared<TabletColumn>(column));
891
0
    _num_sparse_columns++;
892
0
}
893
894
24
void TabletSchema::append_index(TabletIndex index) {
895
24
    _indexes.push_back(std::move(index));
896
24
}
897
898
void TabletSchema::update_index(const TabletColumn& col, const IndexType& index_type,
899
0
                                TabletIndex&& index) {
900
0
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
901
0
    const std::string& suffix_path =
902
0
            col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : "";
903
0
    for (size_t i = 0; i < _indexes.size(); i++) {
904
0
        for (int32_t id : _indexes[i].col_unique_ids()) {
905
0
            if (_indexes[i].index_type() == index_type && id == col_unique_id &&
906
0
                _indexes[i].get_index_suffix() == suffix_path) {
907
0
                _indexes[i] = std::move(index);
908
0
                break;
909
0
            }
910
0
        }
911
0
    }
912
0
}
913
914
0
void TabletSchema::replace_column(size_t pos, TabletColumn new_col) {
915
0
    CHECK_LT(pos, num_columns()) << " outof range";
916
0
    _cols[pos] = std::make_shared<TabletColumn>(std::move(new_col));
917
0
}
918
919
0
void TabletSchema::clear_index() {
920
0
    _indexes.clear();
921
0
}
922
923
2
void TabletSchema::remove_index(int64_t index_id) {
924
2
    std::vector<TabletIndex> indexes;
925
2
    for (auto index : _indexes) {
926
2
        if (index.index_id() == index_id) {
927
2
            continue;
928
2
        }
929
0
        indexes.emplace_back(std::move(index));
930
0
    }
931
2
    _indexes = std::move(indexes);
932
2
}
933
934
0
void TabletSchema::clear_columns() {
935
0
    _field_path_to_index.clear();
936
0
    _field_name_to_index.clear();
937
0
    _field_id_to_index.clear();
938
0
    _num_columns = 0;
939
0
    _num_variant_columns = 0;
940
0
    _num_null_columns = 0;
941
0
    _num_key_columns = 0;
942
0
    _cols.clear();
943
0
    clear_column_cache_handlers();
944
0
}
945
946
1.29k
void TabletSchema::clear_column_cache_handlers() {
947
1.29k
    for (auto* cache_handle : _column_cache_handlers) {
948
0
        TabletColumnObjectPool::instance()->release(cache_handle);
949
0
    }
950
1.29k
    _column_cache_handlers.clear();
951
1.29k
}
952
953
void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns,
954
1.29k
                                bool reuse_cache_column) {
955
1.29k
    _keys_type = schema.keys_type();
956
1.29k
    _num_columns = 0;
957
1.29k
    _num_variant_columns = 0;
958
1.29k
    _num_key_columns = 0;
959
1.29k
    _num_null_columns = 0;
960
1.29k
    _cols.clear();
961
1.29k
    _indexes.clear();
962
1.29k
    _field_name_to_index.clear();
963
1.29k
    _field_id_to_index.clear();
964
1.29k
    _cluster_key_idxes.clear();
965
1.29k
    clear_column_cache_handlers();
966
1.29k
    for (const auto& i : schema.cluster_key_idxes()) {
967
0
        _cluster_key_idxes.push_back(i);
968
0
    }
969
4.84k
    for (auto& column_pb : schema.column()) {
970
4.84k
        TabletColumnPtr column;
971
4.84k
        if (reuse_cache_column) {
972
156
            auto pair = TabletColumnObjectPool::instance()->insert(
973
156
                    deterministic_string_serialize(column_pb));
974
156
            column = pair.second;
975
156
            _column_cache_handlers.push_back(pair.first);
976
4.68k
        } else {
977
4.68k
            column = std::make_shared<TabletColumn>();
978
4.68k
            column->init_from_pb(column_pb);
979
4.68k
        }
980
4.84k
        if (ignore_extracted_columns && column->is_extracted_column()) {
981
0
            continue;
982
0
        }
983
4.84k
        if (column->is_key()) {
984
2.06k
            _num_key_columns++;
985
2.06k
        }
986
4.84k
        if (column->is_nullable()) {
987
129
            _num_null_columns++;
988
129
        }
989
4.84k
        if (column->is_variant_type()) {
990
2
            ++_num_variant_columns;
991
2
        }
992
993
4.84k
        _cols.emplace_back(std::move(column));
994
4.84k
        if (!_cols.back()->is_extracted_column()) {
995
4.84k
            _vl_field_mem_size += sizeof(StringRef) + sizeof(char) * _cols.back()->name().size() +
996
4.84k
                                  sizeof(int32_t);
997
4.84k
            _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
998
4.84k
            _field_id_to_index[_cols.back()->unique_id()] = _num_columns;
999
4.84k
            _vl_field_mem_size += sizeof(int32_t) * 2;
1000
4.84k
        }
1001
4.84k
        _num_columns++;
1002
4.84k
    }
1003
1.29k
    for (auto& index_pb : schema.index()) {
1004
40
        TabletIndex index;
1005
40
        index.init_from_pb(index_pb);
1006
40
        _indexes.emplace_back(std::move(index));
1007
40
    }
1008
1.29k
    _num_short_key_columns = schema.num_short_key_columns();
1009
1.29k
    _num_rows_per_row_block = schema.num_rows_per_row_block();
1010
1.29k
    _compress_kind = schema.compress_kind();
1011
1.29k
    _next_column_unique_id = schema.next_column_unique_id();
1012
1.29k
    if (schema.has_bf_fpp()) {
1013
0
        _has_bf_fpp = true;
1014
0
        _bf_fpp = schema.bf_fpp();
1015
1.29k
    } else {
1016
1.29k
        _has_bf_fpp = false;
1017
1.29k
        _bf_fpp = BLOOM_FILTER_DEFAULT_FPP;
1018
1.29k
    }
1019
1.29k
    _is_in_memory = schema.is_in_memory();
1020
1.29k
    _disable_auto_compaction = schema.disable_auto_compaction();
1021
1.29k
    _enable_single_replica_compaction = schema.enable_single_replica_compaction();
1022
1.29k
    _store_row_column = schema.store_row_column();
1023
1.29k
    _skip_write_index_on_load = schema.skip_write_index_on_load();
1024
1.29k
    _delete_sign_idx = schema.delete_sign_idx();
1025
1.29k
    _sequence_col_idx = schema.sequence_col_idx();
1026
1.29k
    _version_col_idx = schema.version_col_idx();
1027
1.29k
    _sort_type = schema.sort_type();
1028
1.29k
    _sort_col_num = schema.sort_col_num();
1029
1.29k
    _compression_type = schema.compression_type();
1030
1.29k
    _row_store_page_size = schema.row_store_page_size();
1031
1.29k
    _schema_version = schema.schema_version();
1032
    // Default to V1 inverted index storage format for backward compatibility if not specified in schema.
1033
1.29k
    if (!schema.has_inverted_index_storage_format()) {
1034
164
        _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1;
1035
1.13k
    } else {
1036
1.13k
        _inverted_index_storage_format = schema.inverted_index_storage_format();
1037
1.13k
    }
1038
1039
1.29k
    update_metadata_size();
1040
1.29k
}
1041
1042
179
void TabletSchema::copy_from(const TabletSchema& tablet_schema) {
1043
179
    TabletSchemaPB tablet_schema_pb;
1044
179
    tablet_schema.to_schema_pb(&tablet_schema_pb);
1045
179
    init_from_pb(tablet_schema_pb);
1046
179
    _table_id = tablet_schema.table_id();
1047
179
}
1048
1049
0
void TabletSchema::shawdow_copy_without_columns(const TabletSchema& tablet_schema) {
1050
0
    *this = tablet_schema;
1051
0
    _field_path_to_index.clear();
1052
0
    _field_name_to_index.clear();
1053
0
    _field_id_to_index.clear();
1054
0
    _num_columns = 0;
1055
0
    _num_variant_columns = 0;
1056
0
    _num_null_columns = 0;
1057
0
    _num_key_columns = 0;
1058
0
    _cols.clear();
1059
0
    _vl_field_mem_size = 0;
1060
    // notice : do not ref columns
1061
0
    _column_cache_handlers.clear();
1062
0
}
1063
1064
0
void TabletSchema::update_index_info_from(const TabletSchema& tablet_schema) {
1065
0
    for (auto& col : _cols) {
1066
0
        if (col->unique_id() < 0) {
1067
0
            continue;
1068
0
        }
1069
0
        const auto iter = tablet_schema._field_id_to_index.find(col->unique_id());
1070
0
        if (iter == tablet_schema._field_id_to_index.end()) {
1071
0
            continue;
1072
0
        }
1073
0
        int32_t col_idx = iter->second;
1074
0
        if (col_idx < 0 || col_idx >= tablet_schema._cols.size()) {
1075
0
            continue;
1076
0
        }
1077
0
        col->set_is_bf_column(tablet_schema._cols[col_idx]->is_bf_column());
1078
0
        col->set_has_bitmap_index(tablet_schema._cols[col_idx]->has_bitmap_index());
1079
0
    }
1080
0
}
1081
1082
2.58k
std::string TabletSchema::to_key() const {
1083
2.58k
    TabletSchemaPB pb;
1084
2.58k
    to_schema_pb(&pb);
1085
2.58k
    return TabletSchema::deterministic_string_serialize(pb);
1086
2.58k
}
1087
1088
void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version,
1089
                                               const OlapTableIndexSchema* index,
1090
0
                                               const TabletSchema& ori_tablet_schema) {
1091
    // copy from ori_tablet_schema
1092
0
    _keys_type = ori_tablet_schema.keys_type();
1093
0
    _num_short_key_columns = ori_tablet_schema.num_short_key_columns();
1094
0
    _num_rows_per_row_block = ori_tablet_schema.num_rows_per_row_block();
1095
0
    _compress_kind = ori_tablet_schema.compress_kind();
1096
1097
    // todo(yixiu): unique_id
1098
0
    _next_column_unique_id = ori_tablet_schema.next_column_unique_id();
1099
0
    _is_in_memory = ori_tablet_schema.is_in_memory();
1100
0
    _disable_auto_compaction = ori_tablet_schema.disable_auto_compaction();
1101
0
    _enable_single_replica_compaction = ori_tablet_schema.enable_single_replica_compaction();
1102
0
    _store_row_column = ori_tablet_schema.store_row_column();
1103
0
    _skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load();
1104
0
    _sort_type = ori_tablet_schema.sort_type();
1105
0
    _sort_col_num = ori_tablet_schema.sort_col_num();
1106
0
    _row_store_page_size = ori_tablet_schema.row_store_page_size();
1107
1108
    // copy from table_schema_param
1109
0
    _schema_version = version;
1110
0
    _num_columns = 0;
1111
0
    _num_variant_columns = 0;
1112
0
    _num_key_columns = 0;
1113
0
    _num_null_columns = 0;
1114
0
    bool has_bf_columns = false;
1115
0
    _cols.clear();
1116
0
    _indexes.clear();
1117
0
    _field_name_to_index.clear();
1118
0
    _field_id_to_index.clear();
1119
0
    _delete_sign_idx = -1;
1120
0
    _sequence_col_idx = -1;
1121
0
    _version_col_idx = -1;
1122
0
    _cluster_key_idxes.clear();
1123
0
    clear_column_cache_handlers();
1124
0
    for (const auto& i : ori_tablet_schema._cluster_key_idxes) {
1125
0
        _cluster_key_idxes.push_back(i);
1126
0
    }
1127
0
    for (auto& column : index->columns) {
1128
0
        if (column->is_key()) {
1129
0
            _num_key_columns++;
1130
0
        }
1131
0
        if (column->is_nullable()) {
1132
0
            _num_null_columns++;
1133
0
        }
1134
0
        if (column->is_bf_column()) {
1135
0
            has_bf_columns = true;
1136
0
        }
1137
0
        if (column->is_variant_type()) {
1138
0
            ++_num_variant_columns;
1139
0
        }
1140
0
        if (UNLIKELY(column->name() == DELETE_SIGN)) {
1141
0
            _delete_sign_idx = _num_columns;
1142
0
        } else if (UNLIKELY(column->name() == SEQUENCE_COL)) {
1143
0
            _sequence_col_idx = _num_columns;
1144
0
        } else if (UNLIKELY(column->name() == VERSION_COL)) {
1145
0
            _version_col_idx = _num_columns;
1146
0
        }
1147
0
        _cols.emplace_back(std::make_shared<TabletColumn>(*column));
1148
0
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1149
0
        _field_id_to_index[_cols.back()->unique_id()] = _num_columns;
1150
0
        _num_columns++;
1151
0
    }
1152
1153
0
    for (auto& i : index->indexes) {
1154
0
        _indexes.emplace_back(*i);
1155
0
    }
1156
1157
0
    if (has_bf_columns) {
1158
0
        _has_bf_fpp = true;
1159
0
        _bf_fpp = ori_tablet_schema.bloom_filter_fpp();
1160
0
    } else {
1161
0
        _has_bf_fpp = false;
1162
0
        _bf_fpp = BLOOM_FILTER_DEFAULT_FPP;
1163
0
    }
1164
0
}
1165
1166
77
void TabletSchema::merge_dropped_columns(const TabletSchema& src_schema) {
1167
    // If they are the same tablet schema object, then just return
1168
77
    if (this == &src_schema) {
1169
0
        return;
1170
0
    }
1171
206
    for (const auto& src_col : src_schema.columns()) {
1172
206
        if (_field_id_to_index.find(src_col->unique_id()) == _field_id_to_index.end()) {
1173
0
            CHECK(!src_col->is_key())
1174
0
                    << src_col->name() << " is key column, should not be dropped.";
1175
0
            ColumnPB src_col_pb;
1176
            // There are some pointer in tablet column, not sure the reference relation, so
1177
            // that deep copy it.
1178
0
            src_col->to_schema_pb(&src_col_pb);
1179
0
            TabletColumn new_col(src_col_pb);
1180
0
            append_column(new_col, TabletSchema::ColumnType::DROPPED);
1181
0
        }
1182
206
    }
1183
77
}
1184
1185
2
TabletSchemaSPtr TabletSchema::copy_without_variant_extracted_columns() {
1186
2
    TabletSchemaSPtr copy = std::make_shared<TabletSchema>();
1187
2
    TabletSchemaPB tablet_schema_pb;
1188
2
    this->to_schema_pb(&tablet_schema_pb);
1189
2
    copy->init_from_pb(tablet_schema_pb, true /*ignore extracted_columns*/);
1190
2
    return copy;
1191
2
}
1192
1193
// Dropped column is in _field_id_to_index but not in _field_name_to_index
1194
// Could refer to append_column method
1195
4.67k
bool TabletSchema::is_dropped_column(const TabletColumn& col) const {
1196
4.67k
    CHECK(_field_id_to_index.find(col.unique_id()) != _field_id_to_index.end())
1197
0
            << "could not find col with unique id = " << col.unique_id()
1198
0
            << " and name = " << col.name();
1199
4.67k
    return _field_name_to_index.find(StringRef(col.name())) == _field_name_to_index.end() ||
1200
4.67k
           column(col.name()).unique_id() != col.unique_id();
1201
4.67k
}
1202
1203
0
void TabletSchema::copy_extracted_columns(const TabletSchema& src_schema) {
1204
0
    std::unordered_set<int32_t> variant_columns;
1205
0
    for (const auto& col : columns()) {
1206
0
        if (col->is_variant_type()) {
1207
0
            variant_columns.insert(col->unique_id());
1208
0
        }
1209
0
    }
1210
0
    for (const TabletColumnPtr& col : src_schema.columns()) {
1211
0
        if (col->is_extracted_column() && variant_columns.contains(col->parent_unique_id())) {
1212
0
            ColumnPB col_pb;
1213
0
            col->to_schema_pb(&col_pb);
1214
0
            TabletColumn new_col(col_pb);
1215
0
            append_column(new_col, ColumnType::VARIANT);
1216
0
        }
1217
0
    }
1218
0
}
1219
1220
0
void TabletSchema::reserve_extracted_columns() {
1221
0
    for (auto it = _cols.begin(); it != _cols.end();) {
1222
0
        if (!(*it)->is_extracted_column()) {
1223
0
            it = _cols.erase(it);
1224
0
        } else {
1225
0
            ++it;
1226
0
        }
1227
0
    }
1228
0
}
1229
1230
4.52k
void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const {
1231
4.52k
    for (const auto& i : _cluster_key_idxes) {
1232
0
        tablet_schema_pb->add_cluster_key_idxes(i);
1233
0
    }
1234
4.52k
    tablet_schema_pb->set_keys_type(_keys_type);
1235
19.3k
    for (const auto& col : _cols) {
1236
19.3k
        ColumnPB* column = tablet_schema_pb->add_column();
1237
19.3k
        col->to_schema_pb(column);
1238
19.3k
    }
1239
4.52k
    for (const auto& index : _indexes) {
1240
88
        auto* index_pb = tablet_schema_pb->add_index();
1241
88
        index.to_schema_pb(index_pb);
1242
88
    }
1243
4.52k
    tablet_schema_pb->set_num_short_key_columns(_num_short_key_columns);
1244
4.52k
    tablet_schema_pb->set_num_rows_per_row_block(_num_rows_per_row_block);
1245
4.52k
    tablet_schema_pb->set_compress_kind(_compress_kind);
1246
4.52k
    if (_has_bf_fpp) {
1247
0
        tablet_schema_pb->set_bf_fpp(_bf_fpp);
1248
0
    }
1249
4.52k
    tablet_schema_pb->set_next_column_unique_id(_next_column_unique_id);
1250
4.52k
    tablet_schema_pb->set_is_in_memory(_is_in_memory);
1251
4.52k
    tablet_schema_pb->set_disable_auto_compaction(_disable_auto_compaction);
1252
4.52k
    tablet_schema_pb->set_enable_single_replica_compaction(_enable_single_replica_compaction);
1253
4.52k
    tablet_schema_pb->set_store_row_column(_store_row_column);
1254
4.52k
    tablet_schema_pb->set_skip_write_index_on_load(_skip_write_index_on_load);
1255
4.52k
    tablet_schema_pb->set_delete_sign_idx(_delete_sign_idx);
1256
4.52k
    tablet_schema_pb->set_sequence_col_idx(_sequence_col_idx);
1257
4.52k
    tablet_schema_pb->set_sort_type(_sort_type);
1258
4.52k
    tablet_schema_pb->set_sort_col_num(_sort_col_num);
1259
4.52k
    tablet_schema_pb->set_schema_version(_schema_version);
1260
4.52k
    tablet_schema_pb->set_compression_type(_compression_type);
1261
4.52k
    tablet_schema_pb->set_row_store_page_size(_row_store_page_size);
1262
4.52k
    tablet_schema_pb->set_version_col_idx(_version_col_idx);
1263
4.52k
    tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format);
1264
4.52k
}
1265
1266
0
size_t TabletSchema::row_size() const {
1267
0
    size_t size = 0;
1268
0
    for (const auto& column : _cols) {
1269
0
        size += column->length();
1270
0
    }
1271
0
    size += (_num_columns + 7) / 8;
1272
1273
0
    return size;
1274
0
}
1275
1276
1.19k
int32_t TabletSchema::field_index(const std::string& field_name) const {
1277
1.19k
    const auto& found = _field_name_to_index.find(StringRef(field_name));
1278
1.19k
    return (found == _field_name_to_index.end()) ? -1 : found->second;
1279
1.19k
}
1280
1281
0
int32_t TabletSchema::field_index(const vectorized::PathInData& path) const {
1282
0
    const auto& found = _field_path_to_index.find(vectorized::PathInDataRef(&path));
1283
0
    return (found == _field_path_to_index.end()) ? -1 : found->second;
1284
0
}
1285
1286
106
int32_t TabletSchema::field_index(int32_t col_unique_id) const {
1287
106
    const auto& found = _field_id_to_index.find(col_unique_id);
1288
106
    return (found == _field_id_to_index.end()) ? -1 : found->second;
1289
106
}
1290
1291
21.2k
const std::vector<TabletColumnPtr>& TabletSchema::columns() const {
1292
21.2k
    return _cols;
1293
21.2k
}
1294
1295
0
const std::vector<TabletColumnPtr>& TabletColumn::sparse_columns() const {
1296
0
    return _sparse_cols;
1297
0
}
1298
1299
108k
const TabletColumn& TabletSchema::column(size_t ordinal) const {
1300
108k
    DCHECK(ordinal < _num_columns) << "ordinal:" << ordinal << ", _num_columns:" << _num_columns;
1301
108k
    return *_cols[ordinal];
1302
108k
}
1303
1304
0
const TabletColumn& TabletColumn::sparse_column_at(size_t ordinal) const {
1305
0
    DCHECK(ordinal < _sparse_cols.size())
1306
0
            << "ordinal:" << ordinal << ", _num_columns:" << _sparse_cols.size();
1307
0
    return *_sparse_cols[ordinal];
1308
0
}
1309
1310
122
const TabletColumn& TabletSchema::column_by_uid(int32_t col_unique_id) const {
1311
122
    return *_cols.at(_field_id_to_index.at(col_unique_id));
1312
122
}
1313
1314
0
TabletColumn& TabletSchema::mutable_column_by_uid(int32_t col_unique_id) {
1315
0
    return *_cols.at(_field_id_to_index.at(col_unique_id));
1316
0
}
1317
1318
8
TabletColumn& TabletSchema::mutable_column(size_t ordinal) {
1319
8
    return *_cols.at(ordinal);
1320
8
}
1321
1322
0
void TabletSchema::update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>& tindexes) {
1323
0
    std::vector<TabletIndex> indexes;
1324
0
    for (auto& tindex : tindexes) {
1325
0
        TabletIndex index;
1326
0
        index.init_from_thrift(tindex, *this);
1327
0
        indexes.emplace_back(std::move(index));
1328
0
    }
1329
0
    _indexes = std::move(indexes);
1330
0
}
1331
1332
5
bool TabletSchema::has_column_unique_id(int32_t col_unique_id) const {
1333
5
    return _field_id_to_index.contains(col_unique_id);
1334
5
}
1335
1336
0
Status TabletSchema::have_column(const std::string& field_name) const {
1337
0
    if (!_field_name_to_index.contains(StringRef(field_name))) {
1338
0
        return Status::Error<ErrorCode::INTERNAL_ERROR>(
1339
0
                "Not found field_name, field_name:{}, schema:{}", field_name,
1340
0
                get_all_field_names());
1341
0
    }
1342
0
    return Status::OK();
1343
0
}
1344
1345
4.77k
const TabletColumn& TabletSchema::column(const std::string& field_name) const {
1346
4.77k
    DCHECK(_field_name_to_index.contains(StringRef(field_name)) != 0)
1347
0
            << ", field_name=" << field_name << ", field_name_to_index=" << get_all_field_names();
1348
4.77k
    const auto& found = _field_name_to_index.find(StringRef(field_name));
1349
4.77k
    return *_cols[found->second];
1350
4.77k
}
1351
1352
std::vector<const TabletIndex*> TabletSchema::get_indexes_for_column(
1353
10.4k
        const TabletColumn& col) const {
1354
10.4k
    std::vector<const TabletIndex*> indexes_for_column;
1355
    // Some columns (Float, Double, JSONB ...) from the variant do not support index, but they are listed in TabltetIndex.
1356
10.4k
    if (!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) {
1357
0
        return indexes_for_column;
1358
0
    }
1359
10.4k
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
1360
10.4k
    const std::string& suffix_path =
1361
10.4k
            col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : "";
1362
    // TODO use more efficient impl
1363
10.6k
    for (size_t i = 0; i < _indexes.size(); i++) {
1364
206
        for (int32_t id : _indexes[i].col_unique_ids()) {
1365
206
            if (id == col_unique_id && _indexes[i].get_index_suffix() == suffix_path) {
1366
55
                indexes_for_column.push_back(&(_indexes[i]));
1367
55
            }
1368
206
        }
1369
206
    }
1370
1371
10.4k
    return indexes_for_column;
1372
10.4k
}
1373
1374
357
bool TabletSchema::has_inverted_index(const TabletColumn& col) const {
1375
    // TODO use more efficient impl
1376
357
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
1377
357
    const std::string& suffix_path =
1378
357
            col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : "";
1379
357
    for (size_t i = 0; i < _indexes.size(); i++) {
1380
0
        if (_indexes[i].index_type() == IndexType::INVERTED) {
1381
0
            for (int32_t id : _indexes[i].col_unique_ids()) {
1382
0
                if (id == col_unique_id && _indexes[i].get_index_suffix() == suffix_path) {
1383
0
                    return true;
1384
0
                }
1385
0
            }
1386
0
        }
1387
0
    }
1388
1389
357
    return false;
1390
357
}
1391
1392
bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id,
1393
55
                                                    const std::string& suffix_name) const {
1394
72
    for (size_t i = 0; i < _indexes.size(); i++) {
1395
37
        if (_indexes[i].index_type() == IndexType::INVERTED &&
1396
37
            _indexes[i].get_index_suffix() == suffix_name && _indexes[i].index_id() == index_id) {
1397
20
            return true;
1398
20
        }
1399
37
    }
1400
35
    return false;
1401
55
}
1402
1403
const TabletIndex* TabletSchema::get_inverted_index_with_index_id(
1404
0
        int64_t index_id, const std::string& suffix_name) const {
1405
0
    for (size_t i = 0; i < _indexes.size(); i++) {
1406
0
        if (_indexes[i].index_type() == IndexType::INVERTED &&
1407
0
            _indexes[i].get_index_suffix() == suffix_name && _indexes[i].index_id() == index_id) {
1408
0
            return &(_indexes[i]);
1409
0
        }
1410
0
    }
1411
1412
0
    return nullptr;
1413
0
}
1414
1415
const TabletIndex* TabletSchema::get_inverted_index(int32_t col_unique_id,
1416
10.0k
                                                    const std::string& suffix_path) const {
1417
10.0k
    const std::string escaped_suffix = escape_for_path_name(suffix_path);
1418
10.2k
    for (size_t i = 0; i < _indexes.size(); i++) {
1419
323
        if (_indexes[i].index_type() == IndexType::INVERTED) {
1420
323
            for (int32_t id : _indexes[i].col_unique_ids()) {
1421
323
                if (id == col_unique_id && _indexes[i].get_index_suffix() == escaped_suffix) {
1422
132
                    return &(_indexes[i]);
1423
132
                }
1424
323
            }
1425
323
        }
1426
323
    }
1427
9.94k
    return nullptr;
1428
10.0k
}
1429
1430
const TabletIndex* TabletSchema::get_inverted_index(const TabletColumn& col,
1431
10.0k
                                                    bool check_valid) const {
1432
    // With check_valid set to true by default
1433
    // Some columns(Float, Double, JSONB ...) from the variant do not support inverted index
1434
10.0k
    if (check_valid && !segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) {
1435
2
        return nullptr;
1436
2
    }
1437
    // TODO use more efficient impl
1438
    // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants
1439
10.0k
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
1440
10.0k
    const std::string& suffix_path =
1441
10.0k
            col.has_path_info() ? escape_for_path_name(col.path_info_ptr()->get_path()) : "";
1442
10.0k
    return get_inverted_index(col_unique_id, suffix_path);
1443
10.0k
}
1444
1445
0
bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const {
1446
    // TODO use more efficient impl
1447
0
    for (size_t i = 0; i < _indexes.size(); i++) {
1448
0
        if (_indexes[i].index_type() == IndexType::NGRAM_BF) {
1449
0
            for (int32_t id : _indexes[i].col_unique_ids()) {
1450
0
                if (id == col_unique_id) {
1451
0
                    return true;
1452
0
                }
1453
0
            }
1454
0
        }
1455
0
    }
1456
1457
0
    return false;
1458
0
}
1459
1460
10.4k
const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id) const {
1461
    // TODO use more efficient impl
1462
10.6k
    for (size_t i = 0; i < _indexes.size(); i++) {
1463
206
        if (_indexes[i].index_type() == IndexType::NGRAM_BF) {
1464
0
            for (int32_t id : _indexes[i].col_unique_ids()) {
1465
0
                if (id == col_unique_id) {
1466
0
                    return &(_indexes[i]);
1467
0
                }
1468
0
            }
1469
0
        }
1470
206
    }
1471
10.4k
    return nullptr;
1472
10.4k
}
1473
1474
vectorized::Block TabletSchema::create_block(
1475
        const std::vector<uint32_t>& return_columns,
1476
526
        const std::unordered_set<uint32_t>* tablet_columns_need_convert_null) const {
1477
526
    vectorized::Block block;
1478
1.85k
    for (int i = 0; i < return_columns.size(); ++i) {
1479
1.32k
        const auto& col = *_cols[return_columns[i]];
1480
1.32k
        bool is_nullable = (tablet_columns_need_convert_null != nullptr &&
1481
1.32k
                            tablet_columns_need_convert_null->find(return_columns[i]) !=
1482
0
                                    tablet_columns_need_convert_null->end());
1483
1.32k
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col, is_nullable);
1484
1.32k
        auto column = data_type->create_column();
1485
1.32k
        block.insert({std::move(column), data_type, col.name()});
1486
1.32k
    }
1487
526
    return block;
1488
526
}
1489
1490
1.75k
vectorized::Block TabletSchema::create_block(bool ignore_dropped_col) const {
1491
1.75k
    vectorized::Block block;
1492
4.67k
    for (const auto& col : _cols) {
1493
4.67k
        if (ignore_dropped_col && is_dropped_column(*col)) {
1494
0
            continue;
1495
0
        }
1496
4.67k
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(*col);
1497
4.67k
        block.insert({data_type->create_column(), data_type, col->name()});
1498
4.67k
    }
1499
1.75k
    return block;
1500
1.75k
}
1501
1502
0
vectorized::Block TabletSchema::create_block_by_cids(const std::vector<uint32_t>& cids) {
1503
0
    vectorized::Block block;
1504
0
    for (const auto& cid : cids) {
1505
0
        const auto& col = *_cols[cid];
1506
0
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col);
1507
0
        block.insert({data_type->create_column(), data_type, col.name()});
1508
0
    }
1509
0
    return block;
1510
0
}
1511
1512
0
bool operator==(const TabletColumn& a, const TabletColumn& b) {
1513
0
    if (a._unique_id != b._unique_id) return false;
1514
0
    if (a._col_name != b._col_name) return false;
1515
0
    if (a._type != b._type) return false;
1516
0
    if (a._is_key != b._is_key) return false;
1517
0
    if (a._aggregation != b._aggregation) return false;
1518
0
    if (a._is_nullable != b._is_nullable) return false;
1519
0
    if (a._has_default_value != b._has_default_value) return false;
1520
0
    if (a._has_default_value) {
1521
0
        if (a._default_value != b._default_value) return false;
1522
0
    }
1523
0
    if (a._is_decimal != b._is_decimal) return false;
1524
0
    if (a._is_decimal) {
1525
0
        if (a._precision != b._precision) return false;
1526
0
        if (a._frac != b._frac) return false;
1527
0
    }
1528
0
    if (a._length != b._length) return false;
1529
0
    if (a._index_length != b._index_length) return false;
1530
0
    if (a._is_bf_column != b._is_bf_column) return false;
1531
0
    if (a._has_bitmap_index != b._has_bitmap_index) return false;
1532
0
    if (a._column_path == nullptr && a._column_path != nullptr) return false;
1533
0
    if (b._column_path == nullptr && a._column_path != nullptr) return false;
1534
0
    if (b._column_path != nullptr && a._column_path != nullptr &&
1535
0
        *a._column_path != *b._column_path)
1536
0
        return false;
1537
0
    return true;
1538
0
}
1539
1540
0
bool operator!=(const TabletColumn& a, const TabletColumn& b) {
1541
0
    return !(a == b);
1542
0
}
1543
1544
1
bool operator==(const TabletSchema& a, const TabletSchema& b) {
1545
1
    if (a._keys_type != b._keys_type) return false;
1546
1
    if (a._cols.size() != b._cols.size()) return false;
1547
1
    for (int i = 0; i < a._cols.size(); ++i) {
1548
0
        if (*a._cols[i] != *b._cols[i]) return false;
1549
0
    }
1550
1
    if (a._num_columns != b._num_columns) return false;
1551
1
    if (a._num_key_columns != b._num_key_columns) return false;
1552
1
    if (a._num_null_columns != b._num_null_columns) return false;
1553
1
    if (a._num_short_key_columns != b._num_short_key_columns) return false;
1554
1
    if (a._num_rows_per_row_block != b._num_rows_per_row_block) return false;
1555
1
    if (a._compress_kind != b._compress_kind) return false;
1556
1
    if (a._next_column_unique_id != b._next_column_unique_id) return false;
1557
1
    if (a._has_bf_fpp != b._has_bf_fpp) return false;
1558
1
    if (a._has_bf_fpp) {
1559
0
        if (std::abs(a._bf_fpp - b._bf_fpp) > 1e-6) return false;
1560
0
    }
1561
1
    if (a._is_in_memory != b._is_in_memory) return false;
1562
1
    if (a._delete_sign_idx != b._delete_sign_idx) return false;
1563
1
    if (a._disable_auto_compaction != b._disable_auto_compaction) return false;
1564
1
    if (a._enable_single_replica_compaction != b._enable_single_replica_compaction) return false;
1565
1
    if (a._store_row_column != b._store_row_column) return false;
1566
1
    if (a._row_store_page_size != b._row_store_page_size) return false;
1567
1
    if (a._skip_write_index_on_load != b._skip_write_index_on_load) return false;
1568
1
    return true;
1569
1
}
1570
1571
1
bool operator!=(const TabletSchema& a, const TabletSchema& b) {
1572
1
    return !(a == b);
1573
1
}
1574
1575
} // namespace doris