Coverage Report

Created: 2024-11-18 11:49

/root/doris/be/src/olap/tablet_schema.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "olap/tablet_schema.h"
19
20
#include <gen_cpp/Descriptors_types.h>
21
#include <gen_cpp/olap_file.pb.h>
22
#include <glog/logging.h>
23
#include <google/protobuf/io/coded_stream.h>
24
#include <google/protobuf/io/zero_copy_stream.h>
25
#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
26
27
#include <algorithm>
28
#include <cctype>
29
// IWYU pragma: no_include <bits/std_abs.h>
30
#include <cmath> // IWYU pragma: keep
31
#include <memory>
32
#include <ostream>
33
#include <vector>
34
35
#include "common/compiler_util.h" // IWYU pragma: keep
36
#include "common/consts.h"
37
#include "common/status.h"
38
#include "exec/tablet_info.h"
39
#include "olap/inverted_index_parser.h"
40
#include "olap/olap_define.h"
41
#include "olap/tablet_column_object_pool.h"
42
#include "olap/types.h"
43
#include "olap/utils.h"
44
#include "runtime/memory/lru_cache_policy.h"
45
#include "runtime/thread_context.h"
46
#include "tablet_meta.h"
47
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
48
#include "vec/aggregate_functions/aggregate_function_state_union.h"
49
#include "vec/common/hex.h"
50
#include "vec/common/string_ref.h"
51
#include "vec/core/block.h"
52
#include "vec/data_types/data_type.h"
53
#include "vec/data_types/data_type_factory.hpp"
54
#include "vec/json/path_in_data.h"
55
56
namespace doris {
57
58
0
FieldType TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) {
59
0
    switch (primitiveType) {
60
0
    case PrimitiveType::INVALID_TYPE:
61
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
62
0
    case PrimitiveType::TYPE_NULL:
63
0
        return FieldType::OLAP_FIELD_TYPE_NONE;
64
0
    case PrimitiveType::TYPE_BOOLEAN:
65
0
        return FieldType::OLAP_FIELD_TYPE_BOOL;
66
0
    case PrimitiveType::TYPE_TINYINT:
67
0
        return FieldType::OLAP_FIELD_TYPE_TINYINT;
68
0
    case PrimitiveType::TYPE_SMALLINT:
69
0
        return FieldType::OLAP_FIELD_TYPE_SMALLINT;
70
0
    case PrimitiveType::TYPE_INT:
71
0
        return FieldType::OLAP_FIELD_TYPE_INT;
72
0
    case PrimitiveType::TYPE_BIGINT:
73
0
        return FieldType::OLAP_FIELD_TYPE_BIGINT;
74
0
    case PrimitiveType::TYPE_LARGEINT:
75
0
        return FieldType::OLAP_FIELD_TYPE_LARGEINT;
76
0
    case PrimitiveType::TYPE_FLOAT:
77
0
        return FieldType::OLAP_FIELD_TYPE_FLOAT;
78
0
    case PrimitiveType::TYPE_DOUBLE:
79
0
        return FieldType::OLAP_FIELD_TYPE_DOUBLE;
80
0
    case PrimitiveType::TYPE_VARCHAR:
81
0
        return FieldType::OLAP_FIELD_TYPE_VARCHAR;
82
0
    case PrimitiveType::TYPE_DATE:
83
0
        return FieldType::OLAP_FIELD_TYPE_DATE;
84
0
    case PrimitiveType::TYPE_DATETIME:
85
0
        return FieldType::OLAP_FIELD_TYPE_DATETIME;
86
0
    case PrimitiveType::TYPE_BINARY:
87
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
88
0
    case PrimitiveType::TYPE_CHAR:
89
0
        return FieldType::OLAP_FIELD_TYPE_CHAR;
90
0
    case PrimitiveType::TYPE_STRUCT:
91
0
        return FieldType::OLAP_FIELD_TYPE_STRUCT;
92
0
    case PrimitiveType::TYPE_ARRAY:
93
0
        return FieldType::OLAP_FIELD_TYPE_ARRAY;
94
0
    case PrimitiveType::TYPE_MAP:
95
0
        return FieldType::OLAP_FIELD_TYPE_MAP;
96
0
    case PrimitiveType::TYPE_HLL:
97
0
        return FieldType::OLAP_FIELD_TYPE_HLL;
98
0
    case PrimitiveType::TYPE_DECIMALV2:
99
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
100
0
    case PrimitiveType::TYPE_OBJECT:
101
0
        return FieldType::OLAP_FIELD_TYPE_OBJECT;
102
0
    case PrimitiveType::TYPE_STRING:
103
0
        return FieldType::OLAP_FIELD_TYPE_STRING;
104
0
    case PrimitiveType::TYPE_QUANTILE_STATE:
105
0
        return FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
106
0
    case PrimitiveType::TYPE_DATEV2:
107
0
        return FieldType::OLAP_FIELD_TYPE_DATEV2;
108
0
    case PrimitiveType::TYPE_DATETIMEV2:
109
0
        return FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
110
0
    case PrimitiveType::TYPE_TIMEV2:
111
0
        return FieldType::OLAP_FIELD_TYPE_TIMEV2;
112
0
    case PrimitiveType::TYPE_DECIMAL32:
113
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL32;
114
0
    case PrimitiveType::TYPE_DECIMAL64:
115
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL64;
116
0
    case PrimitiveType::TYPE_DECIMAL128I:
117
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
118
0
    case PrimitiveType::TYPE_JSONB:
119
0
        return FieldType::OLAP_FIELD_TYPE_JSONB;
120
0
    case PrimitiveType::TYPE_VARIANT:
121
0
        return FieldType::OLAP_FIELD_TYPE_VARIANT;
122
0
    case PrimitiveType::TYPE_LAMBDA_FUNCTION:
123
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
124
0
    case PrimitiveType::TYPE_AGG_STATE:
125
0
        return FieldType::OLAP_FIELD_TYPE_AGG_STATE;
126
0
    default:
127
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
128
0
    }
129
0
}
130
131
2.97k
FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) {
132
2.97k
    std::string upper_type_str = type_str;
133
2.97k
    std::transform(type_str.begin(), type_str.end(), upper_type_str.begin(),
134
16.3k
                   [](auto c) { return std::toupper(c); });
135
2.97k
    FieldType type;
136
137
2.97k
    if (0 == upper_type_str.compare("TINYINT")) {
138
330
        type = FieldType::OLAP_FIELD_TYPE_TINYINT;
139
2.64k
    } else if (0 == upper_type_str.compare("SMALLINT")) {
140
251
        type = FieldType::OLAP_FIELD_TYPE_SMALLINT;
141
2.39k
    } else if (0 == upper_type_str.compare("INT")) {
142
1.11k
        type = FieldType::OLAP_FIELD_TYPE_INT;
143
1.27k
    } else if (0 == upper_type_str.compare("BIGINT")) {
144
153
        type = FieldType::OLAP_FIELD_TYPE_BIGINT;
145
1.12k
    } else if (0 == upper_type_str.compare("LARGEINT")) {
146
137
        type = FieldType::OLAP_FIELD_TYPE_LARGEINT;
147
984
    } else if (0 == upper_type_str.compare("UNSIGNED_TINYINT")) {
148
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT;
149
984
    } else if (0 == upper_type_str.compare("UNSIGNED_SMALLINT")) {
150
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT;
151
984
    } else if (0 == upper_type_str.compare("UNSIGNED_INT")) {
152
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT;
153
984
    } else if (0 == upper_type_str.compare("UNSIGNED_BIGINT")) {
154
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT;
155
984
    } else if (0 == upper_type_str.compare("IPV4")) {
156
0
        type = FieldType::OLAP_FIELD_TYPE_IPV4;
157
984
    } else if (0 == upper_type_str.compare("IPV6")) {
158
0
        type = FieldType::OLAP_FIELD_TYPE_IPV6;
159
984
    } else if (0 == upper_type_str.compare("FLOAT")) {
160
0
        type = FieldType::OLAP_FIELD_TYPE_FLOAT;
161
984
    } else if (0 == upper_type_str.compare("DISCRETE_DOUBLE")) {
162
0
        type = FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE;
163
984
    } else if (0 == upper_type_str.compare("DOUBLE")) {
164
0
        type = FieldType::OLAP_FIELD_TYPE_DOUBLE;
165
984
    } else if (0 == upper_type_str.compare("CHAR")) {
166
138
        type = FieldType::OLAP_FIELD_TYPE_CHAR;
167
846
    } else if (0 == upper_type_str.compare("DATE")) {
168
139
        type = FieldType::OLAP_FIELD_TYPE_DATE;
169
707
    } else if (0 == upper_type_str.compare("DATEV2")) {
170
133
        type = FieldType::OLAP_FIELD_TYPE_DATEV2;
171
574
    } else if (0 == upper_type_str.compare("DATETIMEV2")) {
172
0
        type = FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
173
574
    } else if (0 == upper_type_str.compare("DATETIME")) {
174
168
        type = FieldType::OLAP_FIELD_TYPE_DATETIME;
175
406
    } else if (0 == upper_type_str.compare("DECIMAL32")) {
176
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL32;
177
406
    } else if (0 == upper_type_str.compare("DECIMAL64")) {
178
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL64;
179
406
    } else if (0 == upper_type_str.compare("DECIMAL128I")) {
180
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
181
406
    } else if (0 == upper_type_str.compare("DECIMAL256")) {
182
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL256;
183
406
    } else if (0 == upper_type_str.compare(0, 7, "DECIMAL")) {
184
141
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL;
185
265
    } else if (0 == upper_type_str.compare(0, 7, "VARCHAR")) {
186
153
        type = FieldType::OLAP_FIELD_TYPE_VARCHAR;
187
153
    } else if (0 == upper_type_str.compare("STRING")) {
188
20
        type = FieldType::OLAP_FIELD_TYPE_STRING;
189
92
    } else if (0 == upper_type_str.compare("JSONB")) {
190
0
        type = FieldType::OLAP_FIELD_TYPE_JSONB;
191
92
    } else if (0 == upper_type_str.compare("VARIANT")) {
192
2
        type = FieldType::OLAP_FIELD_TYPE_VARIANT;
193
90
    } else if (0 == upper_type_str.compare("BOOLEAN")) {
194
0
        type = FieldType::OLAP_FIELD_TYPE_BOOL;
195
90
    } else if (0 == upper_type_str.compare(0, 3, "HLL")) {
196
6
        type = FieldType::OLAP_FIELD_TYPE_HLL;
197
84
    } else if (0 == upper_type_str.compare("STRUCT")) {
198
0
        type = FieldType::OLAP_FIELD_TYPE_STRUCT;
199
84
    } else if (0 == upper_type_str.compare("LIST")) {
200
0
        type = FieldType::OLAP_FIELD_TYPE_ARRAY;
201
84
    } else if (0 == upper_type_str.compare("MAP")) {
202
0
        type = FieldType::OLAP_FIELD_TYPE_MAP;
203
84
    } else if (0 == upper_type_str.compare("OBJECT")) {
204
0
        type = FieldType::OLAP_FIELD_TYPE_OBJECT;
205
84
    } else if (0 == upper_type_str.compare("ARRAY")) {
206
0
        type = FieldType::OLAP_FIELD_TYPE_ARRAY;
207
84
    } else if (0 == upper_type_str.compare("QUANTILE_STATE")) {
208
0
        type = FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
209
84
    } else if (0 == upper_type_str.compare("AGG_STATE")) {
210
0
        type = FieldType::OLAP_FIELD_TYPE_AGG_STATE;
211
84
    } else {
212
84
        LOG(WARNING) << "invalid type string. [type='" << type_str << "']";
213
84
        type = FieldType::OLAP_FIELD_TYPE_UNKNOWN;
214
84
    }
215
216
2.97k
    return type;
217
2.97k
}
218
219
2.12k
FieldAggregationMethod TabletColumn::get_aggregation_type_by_string(const std::string& str) {
220
2.12k
    std::string upper_str = str;
221
2.12k
    std::transform(str.begin(), str.end(), upper_str.begin(),
222
9.46k
                   [](auto c) { return std::toupper(c); });
223
2.12k
    FieldAggregationMethod aggregation_type;
224
225
2.12k
    if (0 == upper_str.compare("NONE")) {
226
1.05k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE;
227
1.07k
    } else if (0 == upper_str.compare("SUM")) {
228
554
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM;
229
554
    } else if (0 == upper_str.compare("MIN")) {
230
4
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN;
231
513
    } else if (0 == upper_str.compare("MAX")) {
232
4
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX;
233
509
    } else if (0 == upper_str.compare("REPLACE")) {
234
503
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE;
235
503
    } else if (0 == upper_str.compare("REPLACE_IF_NOT_NULL")) {
236
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL;
237
6
    } else if (0 == upper_str.compare("HLL_UNION")) {
238
6
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION;
239
6
    } else if (0 == upper_str.compare("BITMAP_UNION")) {
240
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION;
241
0
    } else if (0 == upper_str.compare("QUANTILE_UNION")) {
242
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION;
243
0
    } else if (!upper_str.empty()) {
244
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC;
245
0
    } else {
246
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN;
247
0
    }
248
249
2.12k
    return aggregation_type;
250
2.12k
}
251
252
10.9k
std::string TabletColumn::get_string_by_field_type(FieldType type) {
253
10.9k
    switch (type) {
254
1.39k
    case FieldType::OLAP_FIELD_TYPE_TINYINT:
255
1.39k
        return "TINYINT";
256
257
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT:
258
0
        return "UNSIGNED_TINYINT";
259
260
1.10k
    case FieldType::OLAP_FIELD_TYPE_SMALLINT:
261
1.10k
        return "SMALLINT";
262
263
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT:
264
0
        return "UNSIGNED_SMALLINT";
265
266
3.20k
    case FieldType::OLAP_FIELD_TYPE_INT:
267
3.20k
        return "INT";
268
269
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT:
270
0
        return "UNSIGNED_INT";
271
272
719
    case FieldType::OLAP_FIELD_TYPE_BIGINT:
273
719
        return "BIGINT";
274
275
616
    case FieldType::OLAP_FIELD_TYPE_LARGEINT:
276
616
        return "LARGEINT";
277
278
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT:
279
0
        return "UNSIGNED_BIGINT";
280
281
0
    case FieldType::OLAP_FIELD_TYPE_IPV4:
282
0
        return "IPV4";
283
284
0
    case FieldType::OLAP_FIELD_TYPE_IPV6:
285
0
        return "IPV6";
286
287
0
    case FieldType::OLAP_FIELD_TYPE_FLOAT:
288
0
        return "FLOAT";
289
290
0
    case FieldType::OLAP_FIELD_TYPE_DOUBLE:
291
0
        return "DOUBLE";
292
293
0
    case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE:
294
0
        return "DISCRETE_DOUBLE";
295
296
616
    case FieldType::OLAP_FIELD_TYPE_CHAR:
297
616
        return "CHAR";
298
299
618
    case FieldType::OLAP_FIELD_TYPE_DATE:
300
618
        return "DATE";
301
302
578
    case FieldType::OLAP_FIELD_TYPE_DATEV2:
303
578
        return "DATEV2";
304
305
801
    case FieldType::OLAP_FIELD_TYPE_DATETIME:
306
801
        return "DATETIME";
307
308
0
    case FieldType::OLAP_FIELD_TYPE_DATETIMEV2:
309
0
        return "DATETIMEV2";
310
311
616
    case FieldType::OLAP_FIELD_TYPE_DECIMAL:
312
616
        return "DECIMAL";
313
314
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
315
0
        return "DECIMAL32";
316
317
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
318
0
        return "DECIMAL64";
319
320
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
321
0
        return "DECIMAL128I";
322
323
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
324
0
        return "DECIMAL256";
325
326
616
    case FieldType::OLAP_FIELD_TYPE_VARCHAR:
327
616
        return "VARCHAR";
328
329
0
    case FieldType::OLAP_FIELD_TYPE_JSONB:
330
0
        return "JSONB";
331
332
0
    case FieldType::OLAP_FIELD_TYPE_VARIANT:
333
0
        return "VARIANT";
334
335
38
    case FieldType::OLAP_FIELD_TYPE_STRING:
336
38
        return "STRING";
337
338
0
    case FieldType::OLAP_FIELD_TYPE_BOOL:
339
0
        return "BOOLEAN";
340
341
3
    case FieldType::OLAP_FIELD_TYPE_HLL:
342
3
        return "HLL";
343
344
0
    case FieldType::OLAP_FIELD_TYPE_STRUCT:
345
0
        return "STRUCT";
346
347
0
    case FieldType::OLAP_FIELD_TYPE_ARRAY:
348
0
        return "ARRAY";
349
350
0
    case FieldType::OLAP_FIELD_TYPE_MAP:
351
0
        return "MAP";
352
353
0
    case FieldType::OLAP_FIELD_TYPE_OBJECT:
354
0
        return "OBJECT";
355
0
    case FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE:
356
0
        return "QUANTILE_STATE";
357
0
    case FieldType::OLAP_FIELD_TYPE_AGG_STATE:
358
0
        return "AGG_STATE";
359
0
    default:
360
0
        return "UNKNOWN";
361
10.9k
    }
362
10.9k
}
363
364
28
std::string TabletColumn::get_string_by_aggregation_type(FieldAggregationMethod type) {
365
28
    switch (type) {
366
4
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE:
367
4
        return "NONE";
368
369
9
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM:
370
9
        return "SUM";
371
372
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN:
373
0
        return "MIN";
374
375
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX:
376
0
        return "MAX";
377
378
15
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE:
379
15
        return "REPLACE";
380
381
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL:
382
0
        return "REPLACE_IF_NOT_NULL";
383
384
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION:
385
0
        return "HLL_UNION";
386
387
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION:
388
0
        return "BITMAP_UNION";
389
390
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION:
391
0
        return "QUANTILE_UNION";
392
393
0
    default:
394
0
        return "UNKNOWN";
395
28
    }
396
28
}
397
398
1.48k
uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint32_t string_length) {
399
1.48k
    switch (type) {
400
110
    case TPrimitiveType::TINYINT:
401
110
    case TPrimitiveType::BOOLEAN:
402
110
        return 1;
403
148
    case TPrimitiveType::SMALLINT:
404
148
        return 2;
405
409
    case TPrimitiveType::INT:
406
409
        return 4;
407
114
    case TPrimitiveType::BIGINT:
408
114
        return 8;
409
101
    case TPrimitiveType::LARGEINT:
410
101
        return 16;
411
0
    case TPrimitiveType::IPV4:
412
0
        return 4;
413
0
    case TPrimitiveType::IPV6:
414
0
        return 16;
415
101
    case TPrimitiveType::DATE:
416
101
        return 3;
417
92
    case TPrimitiveType::DATEV2:
418
92
        return 4;
419
109
    case TPrimitiveType::DATETIME:
420
109
        return 8;
421
0
    case TPrimitiveType::DATETIMEV2:
422
0
        return 8;
423
0
    case TPrimitiveType::FLOAT:
424
0
        return 4;
425
0
    case TPrimitiveType::DOUBLE:
426
0
        return 8;
427
0
    case TPrimitiveType::QUANTILE_STATE:
428
0
    case TPrimitiveType::OBJECT:
429
0
        return 16;
430
101
    case TPrimitiveType::CHAR:
431
101
        return string_length;
432
101
    case TPrimitiveType::VARCHAR:
433
101
    case TPrimitiveType::HLL:
434
101
    case TPrimitiveType::AGG_STATE:
435
101
        return string_length + sizeof(OLAP_VARCHAR_MAX_LENGTH);
436
0
    case TPrimitiveType::STRING:
437
0
    case TPrimitiveType::VARIANT:
438
0
        return string_length + sizeof(OLAP_STRING_MAX_LENGTH);
439
0
    case TPrimitiveType::JSONB:
440
0
        return string_length + sizeof(OLAP_JSONB_MAX_LENGTH);
441
0
    case TPrimitiveType::STRUCT:
442
        // Note that(xy): this is the length of struct type itself,
443
        // the length of its subtypes are not included.
444
0
        return OLAP_STRUCT_MAX_LENGTH;
445
0
    case TPrimitiveType::ARRAY:
446
0
        return OLAP_ARRAY_MAX_LENGTH;
447
0
    case TPrimitiveType::MAP:
448
0
        return OLAP_MAP_MAX_LENGTH;
449
0
    case TPrimitiveType::DECIMAL32:
450
0
        return 4;
451
0
    case TPrimitiveType::DECIMAL64:
452
0
        return 8;
453
0
    case TPrimitiveType::DECIMAL128I:
454
0
        return 16;
455
0
    case TPrimitiveType::DECIMAL256:
456
0
        return 32;
457
101
    case TPrimitiveType::DECIMALV2:
458
101
        return 12; // use 12 bytes in olap engine.
459
0
    default:
460
0
        LOG(WARNING) << "unknown field type. [type=" << type << "]";
461
0
        return 0;
462
1.48k
    }
463
1.48k
}
464
465
2.94k
TabletColumn::TabletColumn() : _aggregation(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) {}
466
467
133
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType type) {
468
133
    _aggregation = agg;
469
133
    _type = type;
470
133
}
471
472
15
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable) {
473
15
    _aggregation = agg;
474
15
    _type = filed_type;
475
15
    _length = get_scalar_type_info(filed_type)->size();
476
15
    _is_nullable = is_nullable;
477
15
}
478
479
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable,
480
15
                           int32_t unique_id, size_t length) {
481
15
    _aggregation = agg;
482
15
    _type = filed_type;
483
15
    _is_nullable = is_nullable;
484
15
    _unique_id = unique_id;
485
15
    _length = length;
486
15
}
487
488
0
TabletColumn::TabletColumn(const ColumnPB& column) {
489
0
    init_from_pb(column);
490
0
}
491
492
0
TabletColumn::TabletColumn(const TColumn& column) {
493
0
    init_from_thrift(column);
494
0
}
495
496
0
void TabletColumn::init_from_thrift(const TColumn& tcolumn) {
497
0
    ColumnPB column_pb;
498
0
    TabletMeta::init_column_from_tcolumn(tcolumn.col_unique_id, tcolumn, &column_pb);
499
0
    init_from_pb(column_pb);
500
0
}
501
502
2.88k
void TabletColumn::init_from_pb(const ColumnPB& column) {
503
2.88k
    _unique_id = column.unique_id();
504
2.88k
    _col_name = column.name();
505
2.88k
    _col_name_lower_case = to_lower(_col_name);
506
2.88k
    _type = TabletColumn::get_field_type_by_string(column.type());
507
2.88k
    _is_key = column.is_key();
508
2.88k
    _is_nullable = column.is_nullable();
509
2.88k
    _is_auto_increment = column.is_auto_increment();
510
511
2.88k
    _has_default_value = column.has_default_value();
512
2.88k
    if (_has_default_value) {
513
28
        _default_value = column.default_value();
514
28
    }
515
516
2.88k
    if (column.has_precision()) {
517
2.07k
        _is_decimal = true;
518
2.07k
        _precision = column.precision();
519
2.07k
    } else {
520
812
        _is_decimal = false;
521
812
    }
522
2.88k
    if (column.has_frac()) {
523
2.07k
        _frac = column.frac();
524
2.07k
    }
525
2.88k
    _length = column.length();
526
2.88k
    _index_length = column.index_length();
527
2.88k
    if (column.has_is_bf_column()) {
528
351
        _is_bf_column = column.is_bf_column();
529
2.53k
    } else {
530
2.53k
        _is_bf_column = false;
531
2.53k
    }
532
2.88k
    if (column.has_has_bitmap_index()) {
533
1.48k
        _has_bitmap_index = column.has_bitmap_index();
534
1.48k
    } else {
535
1.40k
        _has_bitmap_index = false;
536
1.40k
    }
537
2.88k
    if (column.has_aggregation()) {
538
2.12k
        _aggregation = get_aggregation_type_by_string(column.aggregation());
539
2.12k
        _aggregation_name = column.aggregation();
540
2.12k
    }
541
542
2.88k
    if (_type == FieldType::OLAP_FIELD_TYPE_AGG_STATE) {
543
0
        _result_is_nullable = column.result_is_nullable();
544
0
        _be_exec_version = column.be_exec_version();
545
0
    }
546
547
2.88k
    if (column.has_visible()) {
548
998
        _visible = column.visible();
549
998
    }
550
2.88k
    if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
551
0
        CHECK(column.children_columns_size() == 1)
552
0
                << "ARRAY type should has 1 children types, but got "
553
0
                << column.children_columns_size();
554
0
    }
555
2.88k
    if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
556
0
        DCHECK(column.children_columns_size() == 2)
557
0
                << "MAP type should has 2 children types, but got "
558
0
                << column.children_columns_size();
559
0
        if (UNLIKELY(column.children_columns_size() != 2)) {
560
0
            LOG(WARNING) << "MAP type should has 2 children types, but got "
561
0
                         << column.children_columns_size();
562
0
        }
563
0
    }
564
2.88k
    for (size_t i = 0; i < column.children_columns_size(); i++) {
565
0
        TabletColumn child_column;
566
0
        child_column.init_from_pb(column.children_columns(i));
567
0
        add_sub_column(child_column);
568
0
    }
569
2.88k
    if (column.has_column_path_info()) {
570
0
        _column_path = std::make_shared<vectorized::PathInData>();
571
0
        _column_path->from_protobuf(column.column_path_info());
572
0
        _parent_col_unique_id = column.column_path_info().parrent_column_unique_id();
573
0
    }
574
2.88k
    if (is_variant_type() && !column.has_column_path_info()) {
575
        // set path info for variant root column, to prevent from missing
576
2
        _column_path = std::make_shared<vectorized::PathInData>(_col_name_lower_case);
577
2
    }
578
2.88k
    for (auto& column_pb : column.sparse_columns()) {
579
0
        TabletColumn column;
580
0
        column.init_from_pb(column_pb);
581
0
        _sparse_cols.emplace_back(std::make_shared<TabletColumn>(std::move(column)));
582
0
        _num_sparse_columns++;
583
0
    }
584
2.88k
}
585
586
TabletColumn TabletColumn::create_materialized_variant_column(const std::string& root,
587
                                                              const std::vector<std::string>& paths,
588
0
                                                              int32_t parent_unique_id) {
589
0
    TabletColumn subcol;
590
0
    subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
591
0
    subcol.set_is_nullable(true);
592
0
    subcol.set_unique_id(-1);
593
0
    subcol.set_parent_unique_id(parent_unique_id);
594
0
    vectorized::PathInData path(root, paths);
595
0
    subcol.set_path_info(path);
596
0
    subcol.set_name(path.get_path());
597
0
    return subcol;
598
0
}
599
600
10.9k
void TabletColumn::to_schema_pb(ColumnPB* column) const {
601
10.9k
    column->set_unique_id(_unique_id);
602
10.9k
    column->set_name(_col_name);
603
10.9k
    column->set_type(get_string_by_field_type(_type));
604
10.9k
    column->set_is_key(_is_key);
605
10.9k
    column->set_is_nullable(_is_nullable);
606
10.9k
    if (_has_default_value) {
607
153
        column->set_default_value(_default_value);
608
153
    }
609
10.9k
    if (_is_decimal) {
610
8.24k
        column->set_precision(_precision);
611
8.24k
        column->set_frac(_frac);
612
8.24k
    }
613
10.9k
    column->set_length(_length);
614
10.9k
    column->set_index_length(_index_length);
615
10.9k
    if (_is_bf_column) {
616
4
        column->set_is_bf_column(_is_bf_column);
617
4
    }
618
10.9k
    if (!_aggregation_name.empty()) {
619
8.41k
        column->set_aggregation(_aggregation_name);
620
8.41k
    }
621
10.9k
    column->set_result_is_nullable(_result_is_nullable);
622
10.9k
    column->set_be_exec_version(_be_exec_version);
623
10.9k
    if (_has_bitmap_index) {
624
0
        column->set_has_bitmap_index(_has_bitmap_index);
625
0
    }
626
10.9k
    column->set_visible(_visible);
627
628
10.9k
    if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
629
0
        CHECK(_sub_columns.size() == 1)
630
0
                << "ARRAY type should has 1 children types, but got " << _sub_columns.size();
631
0
    }
632
10.9k
    if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
633
0
        DCHECK(_sub_columns.size() == 2)
634
0
                << "MAP type should has 2 children types, but got " << _sub_columns.size();
635
0
        if (UNLIKELY(_sub_columns.size() != 2)) {
636
0
            LOG(WARNING) << "MAP type should has 2 children types, but got " << _sub_columns.size();
637
0
        }
638
0
    }
639
640
10.9k
    for (size_t i = 0; i < _sub_columns.size(); i++) {
641
0
        ColumnPB* child = column->add_children_columns();
642
0
        _sub_columns[i]->to_schema_pb(child);
643
0
    }
644
645
    // set parts info
646
10.9k
    if (has_path_info()) {
647
        // CHECK_GT(_parent_col_unique_id, 0);
648
0
        _column_path->to_protobuf(column->mutable_column_path_info(), _parent_col_unique_id);
649
        // Update unstable information for variant columns. Some of the fields in the tablet schema
650
        // are irrelevant for variant sub-columns, but retaining them may lead to an excessive growth
651
        // in the number of tablet schema cache entries.
652
0
        if (_type == FieldType::OLAP_FIELD_TYPE_STRING) {
653
0
            column->set_length(INT_MAX);
654
0
        }
655
0
        column->set_index_length(0);
656
0
    }
657
10.9k
    for (auto& col : _sparse_cols) {
658
0
        ColumnPB* sparse_column = column->add_sparse_columns();
659
0
        col->to_schema_pb(sparse_column);
660
0
    }
661
10.9k
}
662
663
16
void TabletColumn::add_sub_column(TabletColumn& sub_column) {
664
16
    _sub_columns.push_back(std::make_shared<TabletColumn>(sub_column));
665
16
    sub_column._parent_col_unique_id = this->_unique_id;
666
16
    _sub_column_count += 1;
667
16
}
668
669
19.7k
bool TabletColumn::is_row_store_column() const {
670
19.7k
    return _col_name == BeConsts::ROW_STORE_COL;
671
19.7k
}
672
673
vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function_union(
674
0
        vectorized::DataTypePtr type, int current_be_exec_version) const {
675
0
    const auto* state_type = assert_cast<const vectorized::DataTypeAggState*>(type.get());
676
0
    BeExecVersionManager::check_function_compatibility(
677
0
            current_be_exec_version, _be_exec_version,
678
0
            state_type->get_nested_function()->get_name());
679
0
    return vectorized::AggregateStateUnion::create(state_type->get_nested_function(), {type}, type);
680
0
}
681
682
vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function(
683
24
        std::string suffix, int current_be_exec_version) const {
684
24
    vectorized::AggregateFunctionPtr function = nullptr;
685
686
24
    auto type = vectorized::DataTypeFactory::instance().create_data_type(*this);
687
24
    if (type && type->get_type_as_type_descriptor().type == PrimitiveType::TYPE_AGG_STATE) {
688
0
        function = get_aggregate_function_union(type, current_be_exec_version);
689
24
    } else {
690
24
        std::string origin_name = TabletColumn::get_string_by_aggregation_type(_aggregation);
691
24
        std::string agg_name = origin_name + suffix;
692
24
        std::transform(agg_name.begin(), agg_name.end(), agg_name.begin(),
693
258
                       [](unsigned char c) { return std::tolower(c); });
694
24
        function = vectorized::AggregateFunctionSimpleFactory::instance().get(
695
24
                agg_name, {type}, type->is_nullable(), BeExecVersionManager::get_newest_version());
696
24
        if (!function) {
697
0
            LOG(WARNING) << "get column aggregate function failed, aggregation_name=" << origin_name
698
0
                         << ", column_type=" << type->get_name();
699
0
        }
700
24
    }
701
24
    if (function) {
702
24
        function->set_version(_be_exec_version);
703
24
        return function;
704
24
    }
705
0
    return nullptr;
706
24
}
707
708
4
void TabletColumn::set_path_info(const vectorized::PathInData& path) {
709
4
    _column_path = std::make_shared<vectorized::PathInData>(path);
710
4
}
711
712
0
vectorized::DataTypePtr TabletColumn::get_vec_type() const {
713
0
    return vectorized::DataTypeFactory::instance().create_data_type(*this);
714
0
}
715
716
// escape '.' and '_'
717
11.4k
std::string escape_for_path_name(const std::string& s) {
718
11.4k
    std::string res;
719
11.4k
    const char* pos = s.data();
720
11.4k
    const char* end = pos + s.size();
721
11.5k
    while (pos != end) {
722
78
        unsigned char c = *pos;
723
78
        if (c == '.' || c == '_') {
724
12
            res += '%';
725
12
            res += vectorized::hex_digit_uppercase(c / 16);
726
12
            res += vectorized::hex_digit_uppercase(c % 16);
727
66
        } else {
728
66
            res += c;
729
66
        }
730
78
        ++pos;
731
78
    }
732
11.4k
    return res;
733
11.4k
}
734
735
4
void TabletIndex::set_escaped_escaped_index_suffix_path(const std::string& path_name) {
736
4
    std::string escaped_path = escape_for_path_name(path_name);
737
4
    _escaped_index_suffix_path = escaped_path;
738
4
}
739
740
void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
741
0
                                   const TabletSchema& tablet_schema) {
742
0
    _index_id = index.index_id;
743
0
    _index_name = index.index_name;
744
    // init col_unique_id in index at be side, since col_unique_id may be -1 at fe side
745
    // get column unique id by name
746
0
    std::vector<int32_t> col_unique_ids(index.columns.size());
747
0
    for (size_t i = 0; i < index.columns.size(); i++) {
748
0
        auto column_idx = tablet_schema.field_index(index.columns[i]);
749
0
        if (column_idx >= 0) {
750
0
            col_unique_ids[i] = tablet_schema.column(column_idx).unique_id();
751
0
        } else {
752
            // if column unique id not found by column name, find by column unique id
753
            // column unique id can not bigger than tablet schema column size, if bigger than column size means
754
            // this column is a new column added by light schema change
755
0
            if (index.__isset.column_unique_ids &&
756
0
                index.column_unique_ids[i] < tablet_schema.num_columns()) {
757
0
                col_unique_ids[i] = index.column_unique_ids[i];
758
0
            } else {
759
0
                col_unique_ids[i] = -1;
760
0
            }
761
0
        }
762
0
    }
763
0
    _col_unique_ids = std::move(col_unique_ids);
764
765
0
    switch (index.index_type) {
766
0
    case TIndexType::BITMAP:
767
0
        _index_type = IndexType::BITMAP;
768
0
        break;
769
0
    case TIndexType::INVERTED:
770
0
        _index_type = IndexType::INVERTED;
771
0
        break;
772
0
    case TIndexType::BLOOMFILTER:
773
0
        _index_type = IndexType::BLOOMFILTER;
774
0
        break;
775
0
    case TIndexType::NGRAM_BF:
776
0
        _index_type = IndexType::NGRAM_BF;
777
0
        break;
778
0
    }
779
0
    if (index.__isset.properties) {
780
0
        for (auto kv : index.properties) {
781
0
            _properties[kv.first] = kv.second;
782
0
        }
783
0
    }
784
0
}
785
786
void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
787
0
                                   const std::vector<int32_t>& column_uids) {
788
0
    _index_id = index.index_id;
789
0
    _index_name = index.index_name;
790
0
    _col_unique_ids = column_uids;
791
792
0
    switch (index.index_type) {
793
0
    case TIndexType::BITMAP:
794
0
        _index_type = IndexType::BITMAP;
795
0
        break;
796
0
    case TIndexType::INVERTED:
797
0
        _index_type = IndexType::INVERTED;
798
0
        break;
799
0
    case TIndexType::BLOOMFILTER:
800
0
        _index_type = IndexType::BLOOMFILTER;
801
0
        break;
802
0
    case TIndexType::NGRAM_BF:
803
0
        _index_type = IndexType::NGRAM_BF;
804
0
        break;
805
0
    }
806
0
    if (index.__isset.properties) {
807
0
        for (auto kv : index.properties) {
808
0
            _properties[kv.first] = kv.second;
809
0
        }
810
0
    }
811
0
}
812
813
64
void TabletIndex::init_from_pb(const TabletIndexPB& index) {
814
64
    _index_id = index.index_id();
815
64
    _index_name = index.index_name();
816
64
    _col_unique_ids.clear();
817
64
    for (auto col_unique_id : index.col_unique_id()) {
818
57
        _col_unique_ids.push_back(col_unique_id);
819
57
    }
820
64
    _index_type = index.index_type();
821
64
    for (auto& kv : index.properties()) {
822
15
        _properties[kv.first] = kv.second;
823
15
    }
824
64
    _escaped_index_suffix_path = index.index_suffix_name();
825
64
}
826
827
96
void TabletIndex::to_schema_pb(TabletIndexPB* index) const {
828
96
    index->set_index_id(_index_id);
829
96
    index->set_index_name(_index_name);
830
96
    index->clear_col_unique_id();
831
96
    for (auto col_unique_id : _col_unique_ids) {
832
96
        index->add_col_unique_id(col_unique_id);
833
96
    }
834
96
    index->set_index_type(_index_type);
835
96
    for (const auto& kv : _properties) {
836
28
        DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", {
837
28
            if (kv.first == INVERTED_INDEX_PARSER_LOWERCASE_KEY) {
838
28
                continue;
839
28
            }
840
28
        })
841
28
        (*index->mutable_properties())[kv.first] = kv.second;
842
28
    }
843
96
    index->set_index_suffix_name(_escaped_index_suffix_path);
844
845
96
    DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { return; })
846
847
    // lowercase by default
848
96
    if (!_properties.empty()) {
849
19
        if (!_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) {
850
10
            (*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
851
10
                    INVERTED_INDEX_PARSER_TRUE;
852
10
        }
853
19
    }
854
96
}
855
856
687
TabletSchema::TabletSchema() = default;
857
858
663
TabletSchema::~TabletSchema() {
859
663
    clear_column_cache_handlers();
860
663
}
861
862
616
int64_t TabletSchema::get_metadata_size() const {
863
616
    return sizeof(TabletSchema) + _vl_field_mem_size;
864
616
}
865
866
45
void TabletSchema::append_column(TabletColumn column, ColumnType col_type) {
867
45
    if (column.is_key()) {
868
16
        _num_key_columns++;
869
16
    }
870
45
    if (column.is_nullable()) {
871
32
        _num_null_columns++;
872
32
    }
873
45
    if (column.is_variant_type()) {
874
0
        ++_num_variant_columns;
875
0
        if (!column.has_path_info()) {
876
0
            const std::string& col_name = column.name_lower_case();
877
0
            vectorized::PathInData path(col_name);
878
0
            column.set_path_info(path);
879
0
        }
880
0
    }
881
45
    if (UNLIKELY(column.name() == DELETE_SIGN)) {
882
0
        _delete_sign_idx = _num_columns;
883
45
    } else if (UNLIKELY(column.name() == SEQUENCE_COL)) {
884
4
        _sequence_col_idx = _num_columns;
885
41
    } else if (UNLIKELY(column.name() == VERSION_COL)) {
886
0
        _version_col_idx = _num_columns;
887
41
    } else if (UNLIKELY(column.name() == SKIP_BITMAP_COL)) {
888
0
        _skip_bitmap_col_idx = _num_columns;
889
0
    }
890
45
    _field_id_to_index[column.unique_id()] = _num_columns;
891
45
    _cols.push_back(std::make_shared<TabletColumn>(std::move(column)));
892
    // The dropped column may have same name with exsiting column, so that
893
    // not add to name to index map, only for uid to index map
894
45
    if (col_type == ColumnType::VARIANT || _cols.back()->is_variant_type() ||
895
45
        _cols.back()->is_extracted_column()) {
896
4
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
897
4
        _field_path_to_index[_cols.back()->path_info_ptr().get()] = _num_columns;
898
41
    } else if (col_type == ColumnType::NORMAL) {
899
41
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
900
41
    }
901
45
    _num_columns++;
902
45
}
903
904
0
void TabletColumn::append_sparse_column(TabletColumn column) {
905
0
    _sparse_cols.push_back(std::make_shared<TabletColumn>(column));
906
0
    _num_sparse_columns++;
907
0
}
908
909
4
void TabletSchema::append_index(TabletIndex&& index) {
910
4
    _indexes.push_back(std::move(index));
911
4
}
912
913
void TabletSchema::update_index(const TabletColumn& col, const IndexType& index_type,
914
0
                                TabletIndex&& index) {
915
0
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
916
0
    const std::string& suffix_path = escape_for_path_name(col.suffix_path());
917
0
    for (size_t i = 0; i < _indexes.size(); i++) {
918
0
        for (int32_t id : _indexes[i].col_unique_ids()) {
919
0
            if (_indexes[i].index_type() == index_type && id == col_unique_id &&
920
0
                _indexes[i].get_index_suffix() == suffix_path) {
921
0
                _indexes[i] = std::move(index);
922
0
                break;
923
0
            }
924
0
        }
925
0
    }
926
0
}
927
928
0
void TabletSchema::replace_column(size_t pos, TabletColumn new_col) {
929
0
    CHECK_LT(pos, num_columns()) << " outof range";
930
0
    _cols[pos] = std::make_shared<TabletColumn>(std::move(new_col));
931
0
}
932
933
0
void TabletSchema::clear_index() {
934
0
    _indexes.clear();
935
0
}
936
937
0
void TabletSchema::remove_index(int64_t index_id) {
938
0
    std::vector<TabletIndex> indexes;
939
0
    for (auto index : _indexes) {
940
0
        if (index.index_id() == index_id) {
941
0
            continue;
942
0
        }
943
0
        indexes.emplace_back(std::move(index));
944
0
    }
945
0
    _indexes = std::move(indexes);
946
0
}
947
948
0
void TabletSchema::clear_columns() {
949
0
    _field_path_to_index.clear();
950
0
    _field_name_to_index.clear();
951
0
    _field_id_to_index.clear();
952
0
    _num_columns = 0;
953
0
    _num_variant_columns = 0;
954
0
    _num_null_columns = 0;
955
0
    _num_key_columns = 0;
956
0
    _cols.clear();
957
0
    clear_column_cache_handlers();
958
0
}
959
960
1.27k
void TabletSchema::clear_column_cache_handlers() {
961
1.27k
    for (auto* cache_handle : _column_cache_handlers) {
962
0
        TabletColumnObjectPool::instance()->release(cache_handle);
963
0
    }
964
1.27k
    _column_cache_handlers.clear();
965
1.27k
}
966
967
void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns,
968
616
                                bool reuse_cache_column) {
969
616
    _keys_type = schema.keys_type();
970
616
    _num_columns = 0;
971
616
    _num_variant_columns = 0;
972
616
    _num_key_columns = 0;
973
616
    _num_null_columns = 0;
974
616
    _cols.clear();
975
616
    _indexes.clear();
976
616
    _field_name_to_index.clear();
977
616
    _field_id_to_index.clear();
978
616
    _cluster_key_idxes.clear();
979
616
    clear_column_cache_handlers();
980
616
    for (const auto& i : schema.cluster_key_idxes()) {
981
6
        _cluster_key_idxes.push_back(i);
982
6
    }
983
2.95k
    for (auto& column_pb : schema.column()) {
984
2.95k
        TabletColumnPtr column;
985
2.95k
        if (reuse_cache_column) {
986
124
            auto pair = TabletColumnObjectPool::instance()->insert(
987
124
                    deterministic_string_serialize(column_pb));
988
124
            column = pair.second;
989
124
            _column_cache_handlers.push_back(pair.first);
990
2.83k
        } else {
991
2.83k
            column = std::make_shared<TabletColumn>();
992
2.83k
            column->init_from_pb(column_pb);
993
2.83k
        }
994
2.95k
        if (ignore_extracted_columns && column->is_extracted_column()) {
995
0
            continue;
996
0
        }
997
2.95k
        if (column->is_key()) {
998
1.41k
            _num_key_columns++;
999
1.41k
        }
1000
2.95k
        if (column->is_nullable()) {
1001
145
            _num_null_columns++;
1002
145
        }
1003
2.95k
        if (column->is_variant_type()) {
1004
2
            ++_num_variant_columns;
1005
2
        }
1006
1007
2.95k
        _cols.emplace_back(std::move(column));
1008
2.95k
        if (!_cols.back()->is_extracted_column()) {
1009
2.95k
            _vl_field_mem_size +=
1010
2.95k
                    sizeof(StringRef) + sizeof(char) * _cols.back()->name().size() + sizeof(size_t);
1011
2.95k
            _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1012
2.95k
            _vl_field_mem_size += sizeof(int32_t) * 2;
1013
2.95k
            _field_id_to_index[_cols.back()->unique_id()] = _num_columns;
1014
2.95k
        }
1015
2.95k
        _num_columns++;
1016
2.95k
    }
1017
616
    for (auto& index_pb : schema.index()) {
1018
56
        TabletIndex index;
1019
56
        index.init_from_pb(index_pb);
1020
56
        _indexes.emplace_back(std::move(index));
1021
56
    }
1022
616
    _num_short_key_columns = schema.num_short_key_columns();
1023
616
    _num_rows_per_row_block = schema.num_rows_per_row_block();
1024
616
    _compress_kind = schema.compress_kind();
1025
616
    _next_column_unique_id = schema.next_column_unique_id();
1026
616
    if (schema.has_bf_fpp()) {
1027
0
        _has_bf_fpp = true;
1028
0
        _bf_fpp = schema.bf_fpp();
1029
616
    } else {
1030
616
        _has_bf_fpp = false;
1031
616
        _bf_fpp = BLOOM_FILTER_DEFAULT_FPP;
1032
616
    }
1033
616
    _is_in_memory = schema.is_in_memory();
1034
616
    _disable_auto_compaction = schema.disable_auto_compaction();
1035
616
    _enable_single_replica_compaction = schema.enable_single_replica_compaction();
1036
616
    _store_row_column = schema.store_row_column();
1037
616
    _skip_write_index_on_load = schema.skip_write_index_on_load();
1038
616
    _delete_sign_idx = schema.delete_sign_idx();
1039
616
    _sequence_col_idx = schema.sequence_col_idx();
1040
616
    _version_col_idx = schema.version_col_idx();
1041
616
    _skip_bitmap_col_idx = schema.skip_bitmap_col_idx();
1042
616
    _sort_type = schema.sort_type();
1043
616
    _sort_col_num = schema.sort_col_num();
1044
616
    _compression_type = schema.compression_type();
1045
616
    _row_store_page_size = schema.row_store_page_size();
1046
616
    _storage_page_size = schema.storage_page_size();
1047
616
    _schema_version = schema.schema_version();
1048
    // Default to V1 inverted index storage format for backward compatibility if not specified in schema.
1049
616
    if (!schema.has_inverted_index_storage_format()) {
1050
122
        _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1;
1051
494
    } else {
1052
494
        _inverted_index_storage_format = schema.inverted_index_storage_format();
1053
494
    }
1054
1055
616
    _row_store_column_unique_ids.assign(schema.row_store_column_unique_ids().begin(),
1056
616
                                        schema.row_store_column_unique_ids().end());
1057
616
    _variant_enable_flatten_nested = schema.variant_enable_flatten_nested();
1058
616
    _vl_field_mem_size += _row_store_column_unique_ids.capacity() * sizeof(int32_t);
1059
616
    update_metadata_size();
1060
616
}
1061
1062
166
void TabletSchema::copy_from(const TabletSchema& tablet_schema) {
1063
166
    TabletSchemaPB tablet_schema_pb;
1064
166
    tablet_schema.to_schema_pb(&tablet_schema_pb);
1065
166
    init_from_pb(tablet_schema_pb);
1066
166
    _table_id = tablet_schema.table_id();
1067
166
}
1068
1069
0
void TabletSchema::update_index_info_from(const TabletSchema& tablet_schema) {
1070
0
    for (auto& col : _cols) {
1071
0
        if (col->unique_id() < 0) {
1072
0
            continue;
1073
0
        }
1074
0
        const auto iter = tablet_schema._field_id_to_index.find(col->unique_id());
1075
0
        if (iter == tablet_schema._field_id_to_index.end()) {
1076
0
            continue;
1077
0
        }
1078
0
        auto col_idx = iter->second;
1079
0
        if (col_idx < 0 || col_idx >= tablet_schema._cols.size()) {
1080
0
            continue;
1081
0
        }
1082
0
        col->set_is_bf_column(tablet_schema._cols[col_idx]->is_bf_column());
1083
0
        col->set_has_bitmap_index(tablet_schema._cols[col_idx]->has_bitmap_index());
1084
0
    }
1085
0
}
1086
1087
1.19k
std::string TabletSchema::to_key() const {
1088
1.19k
    TabletSchemaPB pb;
1089
1.19k
    to_schema_pb(&pb);
1090
1.19k
    return TabletSchema::deterministic_string_serialize(pb);
1091
1.19k
}
1092
1093
void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version,
1094
                                               const OlapTableIndexSchema* index,
1095
0
                                               const TabletSchema& ori_tablet_schema) {
1096
    // copy from ori_tablet_schema
1097
0
    _keys_type = ori_tablet_schema.keys_type();
1098
0
    _num_short_key_columns = ori_tablet_schema.num_short_key_columns();
1099
0
    _num_rows_per_row_block = ori_tablet_schema.num_rows_per_row_block();
1100
0
    _compress_kind = ori_tablet_schema.compress_kind();
1101
1102
    // todo(yixiu): unique_id
1103
0
    _next_column_unique_id = ori_tablet_schema.next_column_unique_id();
1104
0
    _is_in_memory = ori_tablet_schema.is_in_memory();
1105
0
    _disable_auto_compaction = ori_tablet_schema.disable_auto_compaction();
1106
0
    _enable_single_replica_compaction = ori_tablet_schema.enable_single_replica_compaction();
1107
0
    _skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load();
1108
0
    _sort_type = ori_tablet_schema.sort_type();
1109
0
    _sort_col_num = ori_tablet_schema.sort_col_num();
1110
0
    _row_store_page_size = ori_tablet_schema.row_store_page_size();
1111
0
    _storage_page_size = ori_tablet_schema.storage_page_size();
1112
0
    _variant_enable_flatten_nested = ori_tablet_schema.variant_flatten_nested();
1113
1114
    // copy from table_schema_param
1115
0
    _schema_version = version;
1116
0
    _num_columns = 0;
1117
0
    _num_variant_columns = 0;
1118
0
    _num_key_columns = 0;
1119
0
    _num_null_columns = 0;
1120
0
    bool has_bf_columns = false;
1121
0
    _cols.clear();
1122
0
    _indexes.clear();
1123
0
    _field_name_to_index.clear();
1124
0
    _field_id_to_index.clear();
1125
0
    _delete_sign_idx = -1;
1126
0
    _sequence_col_idx = -1;
1127
0
    _version_col_idx = -1;
1128
0
    _skip_bitmap_col_idx = -1;
1129
0
    _cluster_key_idxes.clear();
1130
0
    clear_column_cache_handlers();
1131
0
    for (const auto& i : ori_tablet_schema._cluster_key_idxes) {
1132
0
        _cluster_key_idxes.push_back(i);
1133
0
    }
1134
0
    for (auto& column : index->columns) {
1135
0
        if (column->is_key()) {
1136
0
            _num_key_columns++;
1137
0
        }
1138
0
        if (column->is_nullable()) {
1139
0
            _num_null_columns++;
1140
0
        }
1141
0
        if (column->is_bf_column()) {
1142
0
            has_bf_columns = true;
1143
0
        }
1144
0
        if (column->is_variant_type()) {
1145
0
            ++_num_variant_columns;
1146
0
        }
1147
0
        if (UNLIKELY(column->name() == DELETE_SIGN)) {
1148
0
            _delete_sign_idx = _num_columns;
1149
0
        } else if (UNLIKELY(column->name() == SEQUENCE_COL)) {
1150
0
            _sequence_col_idx = _num_columns;
1151
0
        } else if (UNLIKELY(column->name() == VERSION_COL)) {
1152
0
            _version_col_idx = _num_columns;
1153
0
        } else if (UNLIKELY(column->name() == SKIP_BITMAP_COL)) {
1154
0
            _skip_bitmap_col_idx = _num_columns;
1155
0
        }
1156
0
        _cols.emplace_back(std::make_shared<TabletColumn>(*column));
1157
0
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1158
0
        _field_id_to_index[_cols.back()->unique_id()] = _num_columns;
1159
0
        _num_columns++;
1160
0
    }
1161
1162
0
    for (auto& i : index->indexes) {
1163
0
        _indexes.emplace_back(*i);
1164
0
    }
1165
1166
0
    if (has_bf_columns) {
1167
0
        _has_bf_fpp = true;
1168
0
        _bf_fpp = ori_tablet_schema.bloom_filter_fpp();
1169
0
    } else {
1170
0
        _has_bf_fpp = false;
1171
0
        _bf_fpp = BLOOM_FILTER_DEFAULT_FPP;
1172
0
    }
1173
0
}
1174
1175
77
void TabletSchema::merge_dropped_columns(const TabletSchema& src_schema) {
1176
    // If they are the same tablet schema object, then just return
1177
77
    if (this == &src_schema) {
1178
0
        return;
1179
0
    }
1180
206
    for (const auto& src_col : src_schema.columns()) {
1181
206
        if (_field_id_to_index.find(src_col->unique_id()) == _field_id_to_index.end()) {
1182
0
            CHECK(!src_col->is_key())
1183
0
                    << src_col->name() << " is key column, should not be dropped.";
1184
0
            ColumnPB src_col_pb;
1185
            // There are some pointer in tablet column, not sure the reference relation, so
1186
            // that deep copy it.
1187
0
            src_col->to_schema_pb(&src_col_pb);
1188
0
            TabletColumn new_col(src_col_pb);
1189
0
            append_column(new_col, TabletSchema::ColumnType::DROPPED);
1190
0
        }
1191
206
    }
1192
77
}
1193
1194
0
TabletSchemaSPtr TabletSchema::copy_without_variant_extracted_columns() {
1195
0
    TabletSchemaSPtr copy = std::make_shared<TabletSchema>();
1196
0
    TabletSchemaPB tablet_schema_pb;
1197
0
    this->to_schema_pb(&tablet_schema_pb);
1198
0
    copy->init_from_pb(tablet_schema_pb, true /*ignore extracted_columns*/);
1199
0
    return copy;
1200
0
}
1201
1202
// Dropped column is in _field_id_to_index but not in _field_name_to_index
1203
// Could refer to append_column method
1204
4.64k
bool TabletSchema::is_dropped_column(const TabletColumn& col) const {
1205
4.64k
    CHECK(_field_id_to_index.find(col.unique_id()) != _field_id_to_index.end())
1206
0
            << "could not find col with unique id = " << col.unique_id()
1207
0
            << " and name = " << col.name() << " table_id=" << _table_id;
1208
4.64k
    auto it = _field_name_to_index.find(StringRef {col.name()});
1209
4.64k
    return it == _field_name_to_index.end() || _cols[it->second]->unique_id() != col.unique_id();
1210
4.64k
}
1211
1212
0
void TabletSchema::copy_extracted_columns(const TabletSchema& src_schema) {
1213
0
    std::unordered_set<int32_t> variant_columns;
1214
0
    for (const auto& col : columns()) {
1215
0
        if (col->is_variant_type()) {
1216
0
            variant_columns.insert(col->unique_id());
1217
0
        }
1218
0
    }
1219
0
    for (const TabletColumnPtr& col : src_schema.columns()) {
1220
0
        if (col->is_extracted_column() && variant_columns.contains(col->parent_unique_id())) {
1221
0
            ColumnPB col_pb;
1222
0
            col->to_schema_pb(&col_pb);
1223
0
            TabletColumn new_col(col_pb);
1224
0
            append_column(new_col, ColumnType::VARIANT);
1225
0
        }
1226
0
    }
1227
0
}
1228
1229
0
void TabletSchema::reserve_extracted_columns() {
1230
0
    for (auto it = _cols.begin(); it != _cols.end();) {
1231
0
        if (!(*it)->is_extracted_column()) {
1232
0
            it = _cols.erase(it);
1233
0
        } else {
1234
0
            ++it;
1235
0
        }
1236
0
    }
1237
0
}
1238
1239
1.90k
void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const {
1240
1.90k
    for (const auto& i : _cluster_key_idxes) {
1241
28
        tablet_schema_pb->add_cluster_key_idxes(i);
1242
28
    }
1243
1.90k
    tablet_schema_pb->set_keys_type(_keys_type);
1244
10.9k
    for (const auto& col : _cols) {
1245
10.9k
        ColumnPB* column = tablet_schema_pb->add_column();
1246
10.9k
        col->to_schema_pb(column);
1247
10.9k
    }
1248
1.90k
    for (const auto& index : _indexes) {
1249
96
        auto* index_pb = tablet_schema_pb->add_index();
1250
96
        index.to_schema_pb(index_pb);
1251
96
    }
1252
1.90k
    tablet_schema_pb->set_num_short_key_columns(_num_short_key_columns);
1253
1.90k
    tablet_schema_pb->set_num_rows_per_row_block(_num_rows_per_row_block);
1254
1.90k
    tablet_schema_pb->set_compress_kind(_compress_kind);
1255
1.90k
    if (_has_bf_fpp) {
1256
0
        tablet_schema_pb->set_bf_fpp(_bf_fpp);
1257
0
    }
1258
1.90k
    tablet_schema_pb->set_next_column_unique_id(_next_column_unique_id);
1259
1.90k
    tablet_schema_pb->set_is_in_memory(_is_in_memory);
1260
1.90k
    tablet_schema_pb->set_disable_auto_compaction(_disable_auto_compaction);
1261
1.90k
    tablet_schema_pb->set_enable_single_replica_compaction(_enable_single_replica_compaction);
1262
1.90k
    tablet_schema_pb->set_store_row_column(_store_row_column);
1263
1.90k
    tablet_schema_pb->set_skip_write_index_on_load(_skip_write_index_on_load);
1264
1.90k
    tablet_schema_pb->set_delete_sign_idx(_delete_sign_idx);
1265
1.90k
    tablet_schema_pb->set_sequence_col_idx(_sequence_col_idx);
1266
1.90k
    tablet_schema_pb->set_sort_type(_sort_type);
1267
1.90k
    tablet_schema_pb->set_sort_col_num(_sort_col_num);
1268
1.90k
    tablet_schema_pb->set_schema_version(_schema_version);
1269
1.90k
    tablet_schema_pb->set_compression_type(_compression_type);
1270
1.90k
    tablet_schema_pb->set_row_store_page_size(_row_store_page_size);
1271
1.90k
    tablet_schema_pb->set_storage_page_size(_storage_page_size);
1272
1.90k
    tablet_schema_pb->set_version_col_idx(_version_col_idx);
1273
1.90k
    tablet_schema_pb->set_skip_bitmap_col_idx(_skip_bitmap_col_idx);
1274
1.90k
    tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format);
1275
1.90k
    tablet_schema_pb->mutable_row_store_column_unique_ids()->Assign(
1276
1.90k
            _row_store_column_unique_ids.begin(), _row_store_column_unique_ids.end());
1277
1.90k
    tablet_schema_pb->set_variant_enable_flatten_nested(_variant_enable_flatten_nested);
1278
1.90k
}
1279
1280
0
size_t TabletSchema::row_size() const {
1281
0
    size_t size = 0;
1282
0
    for (const auto& column : _cols) {
1283
0
        size += column->length();
1284
0
    }
1285
0
    size += (_num_columns + 7) / 8;
1286
1287
0
    return size;
1288
0
}
1289
1290
1.14k
int32_t TabletSchema::field_index(const std::string& field_name) const {
1291
1.14k
    const auto& found = _field_name_to_index.find(StringRef(field_name));
1292
1.14k
    return (found == _field_name_to_index.end()) ? -1 : found->second;
1293
1.14k
}
1294
1295
0
int32_t TabletSchema::field_index(const vectorized::PathInData& path) const {
1296
0
    const auto& found = _field_path_to_index.find(vectorized::PathInDataRef(&path));
1297
0
    return (found == _field_path_to_index.end()) ? -1 : found->second;
1298
0
}
1299
1300
106
int32_t TabletSchema::field_index(int32_t col_unique_id) const {
1301
106
    const auto& found = _field_id_to_index.find(col_unique_id);
1302
106
    return (found == _field_id_to_index.end()) ? -1 : found->second;
1303
106
}
1304
1305
21.2k
const std::vector<TabletColumnPtr>& TabletSchema::columns() const {
1306
21.2k
    return _cols;
1307
21.2k
}
1308
1309
0
const std::vector<TabletColumnPtr>& TabletColumn::sparse_columns() const {
1310
0
    return _sparse_cols;
1311
0
}
1312
1313
97.9k
const TabletColumn& TabletSchema::column(size_t ordinal) const {
1314
97.9k
    DCHECK(ordinal < _num_columns) << "ordinal:" << ordinal << ", _num_columns:" << _num_columns;
1315
97.9k
    return *_cols[ordinal];
1316
97.9k
}
1317
1318
0
const TabletColumn& TabletColumn::sparse_column_at(size_t ordinal) const {
1319
0
    DCHECK(ordinal < _sparse_cols.size())
1320
0
            << "ordinal:" << ordinal << ", _num_columns:" << _sparse_cols.size();
1321
0
    return *_sparse_cols[ordinal];
1322
0
}
1323
1324
133
const TabletColumn& TabletSchema::column_by_uid(int32_t col_unique_id) const {
1325
133
    return *_cols.at(_field_id_to_index.at(col_unique_id));
1326
133
}
1327
1328
0
TabletColumn& TabletSchema::mutable_column_by_uid(int32_t col_unique_id) {
1329
0
    return *_cols.at(_field_id_to_index.at(col_unique_id));
1330
0
}
1331
1332
7
TabletColumn& TabletSchema::mutable_column(size_t ordinal) {
1333
7
    return *_cols.at(ordinal);
1334
7
}
1335
1336
0
void TabletSchema::update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>& tindexes) {
1337
0
    std::vector<TabletIndex> indexes;
1338
0
    for (auto& tindex : tindexes) {
1339
0
        TabletIndex index;
1340
0
        index.init_from_thrift(tindex, *this);
1341
0
        indexes.emplace_back(std::move(index));
1342
0
    }
1343
0
    _indexes = std::move(indexes);
1344
0
}
1345
1346
0
bool TabletSchema::exist_column(const std::string& field_name) const {
1347
0
    return _field_name_to_index.contains(StringRef {field_name});
1348
0
}
1349
1350
0
Status TabletSchema::have_column(const std::string& field_name) const {
1351
0
    if (!_field_name_to_index.contains(StringRef(field_name))) {
1352
0
        return Status::Error<ErrorCode::INTERNAL_ERROR>(
1353
0
                "Not found field_name, field_name:{}, schema:{}", field_name,
1354
0
                get_all_field_names());
1355
0
    }
1356
0
    return Status::OK();
1357
0
}
1358
1359
100
Result<const TabletColumn*> TabletSchema::column(const std::string& field_name) const {
1360
100
    auto it = _field_name_to_index.find(StringRef {field_name});
1361
100
    if (it == _field_name_to_index.end()) {
1362
0
        DCHECK(false) << "field_name=" << field_name << ", table_id=" << _table_id
1363
0
                      << ", field_name_to_index=" << get_all_field_names();
1364
0
        return ResultError(
1365
0
                Status::InternalError("column not found, name={}, table_id={}, schema_version={}",
1366
0
                                      field_name, _table_id, _schema_version));
1367
0
    }
1368
100
    return _cols[it->second].get();
1369
100
}
1370
1371
void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema,
1372
0
                                         const std::vector<TColumn>& t_columns) {
1373
0
    copy_from(tablet_schema);
1374
0
    if (!t_columns.empty() && t_columns[0].col_unique_id >= 0) {
1375
0
        clear_columns();
1376
0
        for (const auto& column : t_columns) {
1377
0
            append_column(TabletColumn(column));
1378
0
        }
1379
0
    }
1380
0
}
1381
1382
0
bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const {
1383
0
    for (size_t i = 0; i < _indexes.size(); i++) {
1384
0
        if (_indexes[i].index_type() == IndexType::INVERTED && _indexes[i].index_id() == index_id) {
1385
0
            return true;
1386
0
        }
1387
0
    }
1388
0
    return false;
1389
0
}
1390
1391
const TabletIndex* TabletSchema::inverted_index(int32_t col_unique_id,
1392
20.9k
                                                const std::string& suffix_path) const {
1393
21.7k
    for (size_t i = 0; i < _indexes.size(); i++) {
1394
1.43k
        if (_indexes[i].index_type() == IndexType::INVERTED) {
1395
1.42k
            for (int32_t id : _indexes[i].col_unique_ids()) {
1396
1.42k
                if (id == col_unique_id &&
1397
1.42k
                    _indexes[i].get_index_suffix() == escape_for_path_name(suffix_path)) {
1398
561
                    return &(_indexes[i]);
1399
561
                }
1400
1.42k
            }
1401
1.42k
        }
1402
1.43k
    }
1403
20.3k
    return nullptr;
1404
20.9k
}
1405
1406
10.8k
const TabletIndex* TabletSchema::inverted_index(const TabletColumn& col) const {
1407
    // Some columns(Float, Double, JSONB ...) from the variant do not support inverted index
1408
10.8k
    if (!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) {
1409
2
        return nullptr;
1410
2
    }
1411
    // TODO use more efficient impl
1412
    // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants
1413
10.8k
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
1414
10.8k
    return inverted_index(col_unique_id, escape_for_path_name(col.suffix_path()));
1415
10.8k
}
1416
1417
0
bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const {
1418
    // TODO use more efficient impl
1419
0
    for (size_t i = 0; i < _indexes.size(); i++) {
1420
0
        if (_indexes[i].index_type() == IndexType::NGRAM_BF) {
1421
0
            for (int32_t id : _indexes[i].col_unique_ids()) {
1422
0
                if (id == col_unique_id) {
1423
0
                    return true;
1424
0
                }
1425
0
            }
1426
0
        }
1427
0
    }
1428
1429
0
    return false;
1430
0
}
1431
1432
10.4k
const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id) const {
1433
    // TODO use more efficient impl
1434
11.1k
    for (size_t i = 0; i < _indexes.size(); i++) {
1435
704
        if (_indexes[i].index_type() == IndexType::NGRAM_BF) {
1436
0
            for (int32_t id : _indexes[i].col_unique_ids()) {
1437
0
                if (id == col_unique_id) {
1438
0
                    return &(_indexes[i]);
1439
0
                }
1440
0
            }
1441
0
        }
1442
704
    }
1443
10.4k
    return nullptr;
1444
10.4k
}
1445
1446
vectorized::Block TabletSchema::create_block(
1447
        const std::vector<uint32_t>& return_columns,
1448
516
        const std::unordered_set<uint32_t>* tablet_columns_need_convert_null) const {
1449
516
    vectorized::Block block;
1450
1.82k
    for (int i = 0; i < return_columns.size(); ++i) {
1451
1.31k
        const auto& col = *_cols[return_columns[i]];
1452
1.31k
        bool is_nullable = (tablet_columns_need_convert_null != nullptr &&
1453
1.31k
                            tablet_columns_need_convert_null->find(return_columns[i]) !=
1454
0
                                    tablet_columns_need_convert_null->end());
1455
1.31k
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col, is_nullable);
1456
1.31k
        auto column = data_type->create_column();
1457
1.31k
        block.insert({std::move(column), data_type, col.name()});
1458
1.31k
    }
1459
516
    return block;
1460
516
}
1461
1462
1.74k
vectorized::Block TabletSchema::create_block(bool ignore_dropped_col) const {
1463
1.74k
    vectorized::Block block;
1464
4.64k
    for (const auto& col : _cols) {
1465
4.64k
        if (ignore_dropped_col && is_dropped_column(*col)) {
1466
0
            continue;
1467
0
        }
1468
4.64k
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(*col);
1469
4.64k
        block.insert({data_type->create_column(), data_type, col->name()});
1470
4.64k
    }
1471
1.74k
    return block;
1472
1.74k
}
1473
1474
0
vectorized::Block TabletSchema::create_block_by_cids(const std::vector<uint32_t>& cids) const {
1475
0
    vectorized::Block block;
1476
0
    for (const auto& cid : cids) {
1477
0
        const auto& col = *_cols[cid];
1478
0
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col);
1479
0
        block.insert({data_type->create_column(), data_type, col.name()});
1480
0
    }
1481
0
    return block;
1482
0
}
1483
1484
0
bool operator==(const TabletColumn& a, const TabletColumn& b) {
1485
0
    if (a._unique_id != b._unique_id) return false;
1486
0
    if (a._col_name != b._col_name) return false;
1487
0
    if (a._type != b._type) return false;
1488
0
    if (a._is_key != b._is_key) return false;
1489
0
    if (a._aggregation != b._aggregation) return false;
1490
0
    if (a._is_nullable != b._is_nullable) return false;
1491
0
    if (a._has_default_value != b._has_default_value) return false;
1492
0
    if (a._has_default_value) {
1493
0
        if (a._default_value != b._default_value) return false;
1494
0
    }
1495
0
    if (a._is_decimal != b._is_decimal) return false;
1496
0
    if (a._is_decimal) {
1497
0
        if (a._precision != b._precision) return false;
1498
0
        if (a._frac != b._frac) return false;
1499
0
    }
1500
0
    if (a._length != b._length) return false;
1501
0
    if (a._index_length != b._index_length) return false;
1502
0
    if (a._is_bf_column != b._is_bf_column) return false;
1503
0
    if (a._has_bitmap_index != b._has_bitmap_index) return false;
1504
0
    if (a._column_path == nullptr && a._column_path != nullptr) return false;
1505
0
    if (b._column_path == nullptr && a._column_path != nullptr) return false;
1506
0
    if (b._column_path != nullptr && a._column_path != nullptr &&
1507
0
        *a._column_path != *b._column_path)
1508
0
        return false;
1509
0
    return true;
1510
0
}
1511
1512
0
bool operator!=(const TabletColumn& a, const TabletColumn& b) {
1513
0
    return !(a == b);
1514
0
}
1515
1516
1
bool operator==(const TabletSchema& a, const TabletSchema& b) {
1517
1
    if (a._keys_type != b._keys_type) return false;
1518
1
    if (a._cols.size() != b._cols.size()) return false;
1519
1
    for (int i = 0; i < a._cols.size(); ++i) {
1520
0
        if (*a._cols[i] != *b._cols[i]) return false;
1521
0
    }
1522
1
    if (a._num_columns != b._num_columns) return false;
1523
1
    if (a._num_key_columns != b._num_key_columns) return false;
1524
1
    if (a._num_null_columns != b._num_null_columns) return false;
1525
1
    if (a._num_short_key_columns != b._num_short_key_columns) return false;
1526
1
    if (a._num_rows_per_row_block != b._num_rows_per_row_block) return false;
1527
1
    if (a._compress_kind != b._compress_kind) return false;
1528
1
    if (a._next_column_unique_id != b._next_column_unique_id) return false;
1529
1
    if (a._has_bf_fpp != b._has_bf_fpp) return false;
1530
1
    if (a._has_bf_fpp) {
1531
0
        if (std::abs(a._bf_fpp - b._bf_fpp) > 1e-6) return false;
1532
0
    }
1533
1
    if (a._is_in_memory != b._is_in_memory) return false;
1534
1
    if (a._delete_sign_idx != b._delete_sign_idx) return false;
1535
1
    if (a._disable_auto_compaction != b._disable_auto_compaction) return false;
1536
1
    if (a._enable_single_replica_compaction != b._enable_single_replica_compaction) return false;
1537
1
    if (a._store_row_column != b._store_row_column) return false;
1538
1
    if (a._row_store_page_size != b._row_store_page_size) return false;
1539
1
    if (a._storage_page_size != b._storage_page_size) return false;
1540
1
    if (a._skip_write_index_on_load != b._skip_write_index_on_load) return false;
1541
1
    if (a._variant_enable_flatten_nested != b._variant_enable_flatten_nested) return false;
1542
1
    return true;
1543
1
}
1544
1545
1
bool operator!=(const TabletSchema& a, const TabletSchema& b) {
1546
1
    return !(a == b);
1547
1
}
1548
1549
} // namespace doris