Coverage Report

Created: 2025-06-20 14:56

/root/doris/be/src/olap/tablet_schema.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "olap/tablet_schema.h"
19
20
#include <gen_cpp/Descriptors_types.h>
21
#include <gen_cpp/olap_file.pb.h>
22
#include <glog/logging.h>
23
#include <google/protobuf/io/coded_stream.h>
24
#include <google/protobuf/io/zero_copy_stream.h>
25
#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
26
27
#include <algorithm>
28
#include <cctype>
29
// IWYU pragma: no_include <bits/std_abs.h>
30
#include <cmath> // IWYU pragma: keep
31
#include <memory>
32
#include <ostream>
33
#include <vector>
34
35
#include "common/compiler_util.h" // IWYU pragma: keep
36
#include "common/consts.h"
37
#include "common/status.h"
38
#include "exec/tablet_info.h"
39
#include "olap/inverted_index_parser.h"
40
#include "olap/olap_common.h"
41
#include "olap/olap_define.h"
42
#include "olap/tablet_column_object_pool.h"
43
#include "olap/types.h"
44
#include "olap/utils.h"
45
#include "tablet_meta.h"
46
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
47
#include "vec/aggregate_functions/aggregate_function_state_union.h"
48
#include "vec/common/hex.h"
49
#include "vec/common/string_ref.h"
50
#include "vec/core/block.h"
51
#include "vec/data_types/data_type.h"
52
#include "vec/data_types/data_type_factory.hpp"
53
#include "vec/data_types/data_type_map.h"
54
#include "vec/data_types/data_type_struct.h"
55
#include "vec/json/path_in_data.h"
56
57
namespace doris {
58
59
0
FieldType TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) {
60
0
    switch (primitiveType) {
61
0
    case PrimitiveType::INVALID_TYPE:
62
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
63
0
    case PrimitiveType::TYPE_NULL:
64
0
        return FieldType::OLAP_FIELD_TYPE_NONE;
65
0
    case PrimitiveType::TYPE_BOOLEAN:
66
0
        return FieldType::OLAP_FIELD_TYPE_BOOL;
67
0
    case PrimitiveType::TYPE_TINYINT:
68
0
        return FieldType::OLAP_FIELD_TYPE_TINYINT;
69
0
    case PrimitiveType::TYPE_SMALLINT:
70
0
        return FieldType::OLAP_FIELD_TYPE_SMALLINT;
71
0
    case PrimitiveType::TYPE_INT:
72
0
        return FieldType::OLAP_FIELD_TYPE_INT;
73
0
    case PrimitiveType::TYPE_BIGINT:
74
0
        return FieldType::OLAP_FIELD_TYPE_BIGINT;
75
0
    case PrimitiveType::TYPE_LARGEINT:
76
0
        return FieldType::OLAP_FIELD_TYPE_LARGEINT;
77
0
    case PrimitiveType::TYPE_FLOAT:
78
0
        return FieldType::OLAP_FIELD_TYPE_FLOAT;
79
0
    case PrimitiveType::TYPE_DOUBLE:
80
0
        return FieldType::OLAP_FIELD_TYPE_DOUBLE;
81
0
    case PrimitiveType::TYPE_VARCHAR:
82
0
        return FieldType::OLAP_FIELD_TYPE_VARCHAR;
83
0
    case PrimitiveType::TYPE_DATE:
84
0
        return FieldType::OLAP_FIELD_TYPE_DATE;
85
0
    case PrimitiveType::TYPE_DATETIME:
86
0
        return FieldType::OLAP_FIELD_TYPE_DATETIME;
87
0
    case PrimitiveType::TYPE_BINARY:
88
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
89
0
    case PrimitiveType::TYPE_CHAR:
90
0
        return FieldType::OLAP_FIELD_TYPE_CHAR;
91
0
    case PrimitiveType::TYPE_STRUCT:
92
0
        return FieldType::OLAP_FIELD_TYPE_STRUCT;
93
0
    case PrimitiveType::TYPE_ARRAY:
94
0
        return FieldType::OLAP_FIELD_TYPE_ARRAY;
95
0
    case PrimitiveType::TYPE_MAP:
96
0
        return FieldType::OLAP_FIELD_TYPE_MAP;
97
0
    case PrimitiveType::TYPE_HLL:
98
0
        return FieldType::OLAP_FIELD_TYPE_HLL;
99
0
    case PrimitiveType::TYPE_DECIMALV2:
100
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
101
0
    case PrimitiveType::TYPE_BITMAP:
102
0
        return FieldType::OLAP_FIELD_TYPE_BITMAP;
103
0
    case PrimitiveType::TYPE_STRING:
104
0
        return FieldType::OLAP_FIELD_TYPE_STRING;
105
0
    case PrimitiveType::TYPE_QUANTILE_STATE:
106
0
        return FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
107
0
    case PrimitiveType::TYPE_DATEV2:
108
0
        return FieldType::OLAP_FIELD_TYPE_DATEV2;
109
0
    case PrimitiveType::TYPE_DATETIMEV2:
110
0
        return FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
111
0
    case PrimitiveType::TYPE_TIMEV2:
112
0
        return FieldType::OLAP_FIELD_TYPE_TIMEV2;
113
0
    case PrimitiveType::TYPE_DECIMAL32:
114
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL32;
115
0
    case PrimitiveType::TYPE_DECIMAL64:
116
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL64;
117
0
    case PrimitiveType::TYPE_DECIMAL128I:
118
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
119
0
    case PrimitiveType::TYPE_JSONB:
120
0
        return FieldType::OLAP_FIELD_TYPE_JSONB;
121
0
    case PrimitiveType::TYPE_VARIANT:
122
0
        return FieldType::OLAP_FIELD_TYPE_VARIANT;
123
0
    case PrimitiveType::TYPE_LAMBDA_FUNCTION:
124
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
125
0
    case PrimitiveType::TYPE_AGG_STATE:
126
0
        return FieldType::OLAP_FIELD_TYPE_AGG_STATE;
127
0
    default:
128
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
129
0
    }
130
0
}
131
132
24.3k
FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) {
133
24.3k
    std::string upper_type_str = type_str;
134
24.3k
    std::transform(type_str.begin(), type_str.end(), upper_type_str.begin(),
135
136k
                   [](auto c) { return std::toupper(c); });
136
24.3k
    FieldType type;
137
138
24.3k
    if (0 == upper_type_str.compare("TINYINT")) {
139
860
        type = FieldType::OLAP_FIELD_TYPE_TINYINT;
140
23.5k
    } else if (0 == upper_type_str.compare("SMALLINT")) {
141
1.75k
        type = FieldType::OLAP_FIELD_TYPE_SMALLINT;
142
21.7k
    } else if (0 == upper_type_str.compare("INT")) {
143
5.00k
        type = FieldType::OLAP_FIELD_TYPE_INT;
144
16.7k
    } else if (0 == upper_type_str.compare("BIGINT")) {
145
364
        type = FieldType::OLAP_FIELD_TYPE_BIGINT;
146
16.4k
    } else if (0 == upper_type_str.compare("LARGEINT")) {
147
274
        type = FieldType::OLAP_FIELD_TYPE_LARGEINT;
148
16.1k
    } else if (0 == upper_type_str.compare("UNSIGNED_TINYINT")) {
149
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT;
150
16.1k
    } else if (0 == upper_type_str.compare("UNSIGNED_SMALLINT")) {
151
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT;
152
16.1k
    } else if (0 == upper_type_str.compare("UNSIGNED_INT")) {
153
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT;
154
16.1k
    } else if (0 == upper_type_str.compare("UNSIGNED_BIGINT")) {
155
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT;
156
16.1k
    } else if (0 == upper_type_str.compare("IPV4")) {
157
0
        type = FieldType::OLAP_FIELD_TYPE_IPV4;
158
16.1k
    } else if (0 == upper_type_str.compare("IPV6")) {
159
0
        type = FieldType::OLAP_FIELD_TYPE_IPV6;
160
16.1k
    } else if (0 == upper_type_str.compare("FLOAT")) {
161
0
        type = FieldType::OLAP_FIELD_TYPE_FLOAT;
162
16.1k
    } else if (0 == upper_type_str.compare("DISCRETE_DOUBLE")) {
163
0
        type = FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE;
164
16.1k
    } else if (0 == upper_type_str.compare("DOUBLE")) {
165
0
        type = FieldType::OLAP_FIELD_TYPE_DOUBLE;
166
16.1k
    } else if (0 == upper_type_str.compare("CHAR")) {
167
276
        type = FieldType::OLAP_FIELD_TYPE_CHAR;
168
15.8k
    } else if (0 == upper_type_str.compare("DATE")) {
169
280
        type = FieldType::OLAP_FIELD_TYPE_DATE;
170
15.5k
    } else if (0 == upper_type_str.compare("DATEV2")) {
171
266
        type = FieldType::OLAP_FIELD_TYPE_DATEV2;
172
15.3k
    } else if (0 == upper_type_str.compare("DATETIMEV2")) {
173
0
        type = FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
174
15.3k
    } else if (0 == upper_type_str.compare("DATETIME")) {
175
334
        type = FieldType::OLAP_FIELD_TYPE_DATETIME;
176
14.9k
    } else if (0 == upper_type_str.compare("DECIMAL32")) {
177
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL32;
178
14.9k
    } else if (0 == upper_type_str.compare("DECIMAL64")) {
179
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL64;
180
14.9k
    } else if (0 == upper_type_str.compare("DECIMAL128I")) {
181
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
182
14.9k
    } else if (0 == upper_type_str.compare("DECIMAL256")) {
183
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL256;
184
14.9k
    } else if (0 == upper_type_str.compare(0, 7, "DECIMAL")) {
185
282
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL;
186
14.6k
    } else if (0 == upper_type_str.compare(0, 7, "VARCHAR")) {
187
385
        type = FieldType::OLAP_FIELD_TYPE_VARCHAR;
188
14.3k
    } else if (0 == upper_type_str.compare("STRING")) {
189
14.2k
        type = FieldType::OLAP_FIELD_TYPE_STRING;
190
14.2k
    } else if (0 == upper_type_str.compare("JSONB")) {
191
0
        type = FieldType::OLAP_FIELD_TYPE_JSONB;
192
22
    } else if (0 == upper_type_str.compare("VARIANT")) {
193
4
        type = FieldType::OLAP_FIELD_TYPE_VARIANT;
194
18
    } else if (0 == upper_type_str.compare("BOOLEAN")) {
195
0
        type = FieldType::OLAP_FIELD_TYPE_BOOL;
196
18
    } else if (0 == upper_type_str.compare(0, 3, "HLL")) {
197
14
        type = FieldType::OLAP_FIELD_TYPE_HLL;
198
14
    } else if (0 == upper_type_str.compare("STRUCT")) {
199
0
        type = FieldType::OLAP_FIELD_TYPE_STRUCT;
200
4
    } else if (0 == upper_type_str.compare("LIST")) {
201
0
        type = FieldType::OLAP_FIELD_TYPE_ARRAY;
202
4
    } else if (0 == upper_type_str.compare("MAP")) {
203
0
        type = FieldType::OLAP_FIELD_TYPE_MAP;
204
4
    } else if (0 == upper_type_str.compare("OBJECT")) {
205
0
        type = FieldType::OLAP_FIELD_TYPE_BITMAP;
206
4
    } else if (0 == upper_type_str.compare("BITMAP")) {
207
0
        type = FieldType::OLAP_FIELD_TYPE_BITMAP;
208
4
    } else if (0 == upper_type_str.compare("ARRAY")) {
209
4
        type = FieldType::OLAP_FIELD_TYPE_ARRAY;
210
4
    } else if (0 == upper_type_str.compare("QUANTILE_STATE")) {
211
0
        type = FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
212
0
    } else if (0 == upper_type_str.compare("AGG_STATE")) {
213
0
        type = FieldType::OLAP_FIELD_TYPE_AGG_STATE;
214
0
    } else {
215
0
        LOG(WARNING) << "invalid type string. [type='" << type_str << "']";
216
0
        type = FieldType::OLAP_FIELD_TYPE_UNKNOWN;
217
0
    }
218
219
24.3k
    return type;
220
24.3k
}
221
222
7.99k
FieldAggregationMethod TabletColumn::get_aggregation_type_by_string(const std::string& str) {
223
7.99k
    std::string upper_str = str;
224
7.99k
    std::transform(str.begin(), str.end(), upper_str.begin(),
225
41.2k
                   [](auto c) { return std::toupper(c); });
226
7.99k
    FieldAggregationMethod aggregation_type;
227
228
7.99k
    if (0 == upper_str.compare("NONE")) {
229
3.37k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE;
230
4.61k
    } else if (0 == upper_str.compare("SUM")) {
231
1.13k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM;
232
3.48k
    } else if (0 == upper_str.compare("MIN")) {
233
8
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN;
234
3.47k
    } else if (0 == upper_str.compare("MAX")) {
235
8
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX;
236
3.46k
    } else if (0 == upper_str.compare("REPLACE")) {
237
3.45k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE;
238
3.45k
    } else if (0 == upper_str.compare("REPLACE_IF_NOT_NULL")) {
239
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL;
240
14
    } else if (0 == upper_str.compare("HLL_UNION")) {
241
14
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION;
242
14
    } else if (0 == upper_str.compare("BITMAP_UNION")) {
243
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION;
244
0
    } else if (0 == upper_str.compare("QUANTILE_UNION")) {
245
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION;
246
0
    } else if (!upper_str.empty()) {
247
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC;
248
0
    } else {
249
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN;
250
0
    }
251
252
7.99k
    return aggregation_type;
253
7.99k
}
254
255
62.1k
std::string TabletColumn::get_string_by_field_type(FieldType type) {
256
62.1k
    switch (type) {
257
3.29k
    case FieldType::OLAP_FIELD_TYPE_TINYINT:
258
3.29k
        return "TINYINT";
259
260
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT:
261
0
        return "UNSIGNED_TINYINT";
262
263
7.25k
    case FieldType::OLAP_FIELD_TYPE_SMALLINT:
264
7.25k
        return "SMALLINT";
265
266
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT:
267
0
        return "UNSIGNED_SMALLINT";
268
269
17.6k
    case FieldType::OLAP_FIELD_TYPE_INT:
270
17.6k
        return "INT";
271
272
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT:
273
0
        return "UNSIGNED_INT";
274
275
1.67k
    case FieldType::OLAP_FIELD_TYPE_BIGINT:
276
1.67k
        return "BIGINT";
277
278
1.43k
    case FieldType::OLAP_FIELD_TYPE_LARGEINT:
279
1.43k
        return "LARGEINT";
280
281
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT:
282
0
        return "UNSIGNED_BIGINT";
283
284
0
    case FieldType::OLAP_FIELD_TYPE_IPV4:
285
0
        return "IPV4";
286
287
0
    case FieldType::OLAP_FIELD_TYPE_IPV6:
288
0
        return "IPV6";
289
290
0
    case FieldType::OLAP_FIELD_TYPE_FLOAT:
291
0
        return "FLOAT";
292
293
0
    case FieldType::OLAP_FIELD_TYPE_DOUBLE:
294
0
        return "DOUBLE";
295
296
0
    case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE:
297
0
        return "DISCRETE_DOUBLE";
298
299
1.43k
    case FieldType::OLAP_FIELD_TYPE_CHAR:
300
1.43k
        return "CHAR";
301
302
1.44k
    case FieldType::OLAP_FIELD_TYPE_DATE:
303
1.44k
        return "DATE";
304
305
1.34k
    case FieldType::OLAP_FIELD_TYPE_DATEV2:
306
1.34k
        return "DATEV2";
307
308
1.82k
    case FieldType::OLAP_FIELD_TYPE_DATETIME:
309
1.82k
        return "DATETIME";
310
311
0
    case FieldType::OLAP_FIELD_TYPE_DATETIMEV2:
312
0
        return "DATETIMEV2";
313
314
1.43k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL:
315
1.43k
        return "DECIMAL";
316
317
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
318
0
        return "DECIMAL32";
319
320
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
321
0
        return "DECIMAL64";
322
323
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
324
0
        return "DECIMAL128I";
325
326
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
327
0
        return "DECIMAL256";
328
329
1.94k
    case FieldType::OLAP_FIELD_TYPE_VARCHAR:
330
1.94k
        return "VARCHAR";
331
332
0
    case FieldType::OLAP_FIELD_TYPE_JSONB:
333
0
        return "JSONB";
334
335
0
    case FieldType::OLAP_FIELD_TYPE_VARIANT:
336
0
        return "VARIANT";
337
338
21.4k
    case FieldType::OLAP_FIELD_TYPE_STRING:
339
21.4k
        return "STRING";
340
341
0
    case FieldType::OLAP_FIELD_TYPE_BOOL:
342
0
        return "BOOLEAN";
343
344
12
    case FieldType::OLAP_FIELD_TYPE_HLL:
345
12
        return "HLL";
346
347
0
    case FieldType::OLAP_FIELD_TYPE_STRUCT:
348
0
        return "STRUCT";
349
350
8
    case FieldType::OLAP_FIELD_TYPE_ARRAY:
351
8
        return "ARRAY";
352
353
0
    case FieldType::OLAP_FIELD_TYPE_MAP:
354
0
        return "MAP";
355
356
0
    case FieldType::OLAP_FIELD_TYPE_BITMAP:
357
0
        return "OBJECT";
358
0
    case FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE:
359
0
        return "QUANTILE_STATE";
360
0
    case FieldType::OLAP_FIELD_TYPE_AGG_STATE:
361
0
        return "AGG_STATE";
362
0
    default:
363
0
        return "UNKNOWN";
364
62.1k
    }
365
62.1k
}
366
367
56
std::string TabletColumn::get_string_by_aggregation_type(FieldAggregationMethod type) {
368
56
    switch (type) {
369
8
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE:
370
8
        return "NONE";
371
372
18
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM:
373
18
        return "SUM";
374
375
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN:
376
0
        return "MIN";
377
378
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX:
379
0
        return "MAX";
380
381
30
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE:
382
30
        return "REPLACE";
383
384
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL:
385
0
        return "REPLACE_IF_NOT_NULL";
386
387
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION:
388
0
        return "HLL_UNION";
389
390
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION:
391
0
        return "BITMAP_UNION";
392
393
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION:
394
0
        return "QUANTILE_UNION";
395
396
0
    default:
397
0
        return "UNKNOWN";
398
56
    }
399
56
}
400
401
4.25k
uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint32_t string_length) {
402
4.25k
    switch (type) {
403
220
    case TPrimitiveType::TINYINT:
404
220
    case TPrimitiveType::BOOLEAN:
405
220
        return 1;
406
714
    case TPrimitiveType::SMALLINT:
407
714
        return 2;
408
1.68k
    case TPrimitiveType::INT:
409
1.68k
        return 4;
410
228
    case TPrimitiveType::BIGINT:
411
228
        return 8;
412
202
    case TPrimitiveType::LARGEINT:
413
202
        return 16;
414
0
    case TPrimitiveType::IPV4:
415
0
        return 4;
416
0
    case TPrimitiveType::IPV6:
417
0
        return 16;
418
202
    case TPrimitiveType::DATE:
419
202
        return 3;
420
184
    case TPrimitiveType::DATEV2:
421
184
        return 4;
422
218
    case TPrimitiveType::DATETIME:
423
218
        return 8;
424
0
    case TPrimitiveType::DATETIMEV2:
425
0
        return 8;
426
0
    case TPrimitiveType::FLOAT:
427
0
        return 4;
428
0
    case TPrimitiveType::DOUBLE:
429
0
        return 8;
430
0
    case TPrimitiveType::QUANTILE_STATE:
431
0
    case TPrimitiveType::BITMAP:
432
0
        return 16;
433
202
    case TPrimitiveType::CHAR:
434
202
        return string_length;
435
202
    case TPrimitiveType::VARCHAR:
436
202
    case TPrimitiveType::HLL:
437
202
    case TPrimitiveType::AGG_STATE:
438
202
        return string_length + sizeof(OLAP_VARCHAR_MAX_LENGTH);
439
0
    case TPrimitiveType::STRING:
440
0
    case TPrimitiveType::VARIANT:
441
0
        return string_length + sizeof(OLAP_STRING_MAX_LENGTH);
442
0
    case TPrimitiveType::JSONB:
443
0
        return string_length + sizeof(OLAP_JSONB_MAX_LENGTH);
444
0
    case TPrimitiveType::STRUCT:
445
        // Note that(xy): this is the length of struct type itself,
446
        // the length of its subtypes are not included.
447
0
        return OLAP_STRUCT_MAX_LENGTH;
448
0
    case TPrimitiveType::ARRAY:
449
0
        return OLAP_ARRAY_MAX_LENGTH;
450
0
    case TPrimitiveType::MAP:
451
0
        return OLAP_MAP_MAX_LENGTH;
452
0
    case TPrimitiveType::DECIMAL32:
453
0
        return 4;
454
0
    case TPrimitiveType::DECIMAL64:
455
0
        return 8;
456
0
    case TPrimitiveType::DECIMAL128I:
457
0
        return 16;
458
0
    case TPrimitiveType::DECIMAL256:
459
0
        return 32;
460
202
    case TPrimitiveType::DECIMALV2:
461
202
        return 12; // use 12 bytes in olap engine.
462
0
    default:
463
0
        LOG(WARNING) << "unknown field type. [type=" << type << "]";
464
0
        return 0;
465
4.25k
    }
466
4.25k
}
467
468
18
bool TabletColumn::has_char_type() const {
469
18
    switch (_type) {
470
6
    case FieldType::OLAP_FIELD_TYPE_CHAR: {
471
6
        return true;
472
0
    }
473
8
    case FieldType::OLAP_FIELD_TYPE_ARRAY:
474
8
    case FieldType::OLAP_FIELD_TYPE_MAP:
475
8
    case FieldType::OLAP_FIELD_TYPE_STRUCT: {
476
8
        return std::any_of(_sub_columns.begin(), _sub_columns.end(),
477
8
                           [&](const auto& sub) -> bool { return sub->has_char_type(); });
478
8
    }
479
4
    default:
480
4
        return false;
481
18
    }
482
18
}
483
484
24.5k
TabletColumn::TabletColumn() : _aggregation(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) {}
485
486
246
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType type) {
487
246
    _aggregation = agg;
488
246
    _type = type;
489
246
}
490
491
34
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable) {
492
34
    _aggregation = agg;
493
34
    _type = filed_type;
494
34
    _length = get_scalar_type_info(filed_type)->size();
495
34
    _is_nullable = is_nullable;
496
34
}
497
498
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable,
499
32
                           int32_t unique_id, size_t length) {
500
32
    _aggregation = agg;
501
32
    _type = filed_type;
502
32
    _is_nullable = is_nullable;
503
32
    _unique_id = unique_id;
504
32
    _length = length;
505
32
}
506
507
0
TabletColumn::TabletColumn(const ColumnPB& column) {
508
0
    init_from_pb(column);
509
0
}
510
511
0
TabletColumn::TabletColumn(const TColumn& column) {
512
0
    init_from_thrift(column);
513
0
}
514
515
0
void TabletColumn::init_from_thrift(const TColumn& tcolumn) {
516
0
    ColumnPB column_pb;
517
0
    TabletMeta::init_column_from_tcolumn(tcolumn.col_unique_id, tcolumn, &column_pb);
518
0
    init_from_pb(column_pb);
519
0
}
520
521
24.2k
void TabletColumn::init_from_pb(const ColumnPB& column) {
522
24.2k
    _unique_id = column.unique_id();
523
24.2k
    _col_name = column.name();
524
24.2k
    _col_name_lower_case = to_lower(_col_name);
525
24.2k
    _type = TabletColumn::get_field_type_by_string(column.type());
526
24.2k
    _is_key = column.is_key();
527
24.2k
    _is_nullable = column.is_nullable();
528
24.2k
    _is_auto_increment = column.is_auto_increment();
529
530
24.2k
    _has_default_value = column.has_default_value();
531
24.2k
    if (_has_default_value) {
532
58
        _default_value = column.default_value();
533
58
    }
534
535
24.2k
    if (column.has_precision()) {
536
7.89k
        _is_decimal = true;
537
7.89k
        _precision = column.precision();
538
16.3k
    } else {
539
16.3k
        _is_decimal = false;
540
16.3k
    }
541
24.2k
    if (column.has_frac()) {
542
7.89k
        _frac = column.frac();
543
7.89k
    }
544
24.2k
    _length = column.length();
545
24.2k
    _index_length = column.index_length();
546
24.2k
    if (column.has_is_bf_column()) {
547
746
        _is_bf_column = column.is_bf_column();
548
23.4k
    } else {
549
23.4k
        _is_bf_column = false;
550
23.4k
    }
551
24.2k
    if (column.has_has_bitmap_index()) {
552
4.25k
        _has_bitmap_index = column.has_bitmap_index();
553
19.9k
    } else {
554
19.9k
        _has_bitmap_index = false;
555
19.9k
    }
556
24.2k
    if (column.has_aggregation()) {
557
7.99k
        _aggregation = get_aggregation_type_by_string(column.aggregation());
558
7.99k
        _aggregation_name = column.aggregation();
559
7.99k
    }
560
561
24.2k
    if (_type == FieldType::OLAP_FIELD_TYPE_AGG_STATE) {
562
0
        _result_is_nullable = column.result_is_nullable();
563
0
        _be_exec_version = column.be_exec_version();
564
0
    }
565
566
24.2k
    if (column.has_visible()) {
567
18.2k
        _visible = column.visible();
568
18.2k
    }
569
24.2k
    if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
570
4
        CHECK(column.children_columns_size() == 1)
571
0
                << "ARRAY type should has 1 children types, but got "
572
0
                << column.children_columns_size();
573
4
    }
574
24.2k
    if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
575
0
        DCHECK(column.children_columns_size() == 2)
576
0
                << "MAP type should has 2 children types, but got "
577
0
                << column.children_columns_size();
578
0
        if (UNLIKELY(column.children_columns_size() != 2)) {
579
0
            LOG(WARNING) << "MAP type should has 2 children types, but got "
580
0
                         << column.children_columns_size();
581
0
        }
582
0
    }
583
24.2k
    for (size_t i = 0; i < column.children_columns_size(); i++) {
584
4
        TabletColumn child_column;
585
4
        child_column.init_from_pb(column.children_columns(i));
586
4
        add_sub_column(child_column);
587
4
    }
588
24.2k
    if (column.has_column_path_info()) {
589
0
        _column_path = std::make_shared<vectorized::PathInData>();
590
0
        _column_path->from_protobuf(column.column_path_info());
591
0
        _parent_col_unique_id = column.column_path_info().parrent_column_unique_id();
592
0
    }
593
24.2k
    if (is_variant_type() && !column.has_column_path_info()) {
594
        // set path info for variant root column, to prevent from missing
595
4
        _column_path = std::make_shared<vectorized::PathInData>(_col_name_lower_case);
596
4
    }
597
24.2k
    for (const auto& column_pb : column.sparse_columns()) {
598
0
        TabletColumn column;
599
0
        column.init_from_pb(column_pb);
600
0
        _sparse_cols.emplace_back(std::make_shared<TabletColumn>(std::move(column)));
601
0
        _num_sparse_columns++;
602
0
    }
603
24.2k
}
604
605
TabletColumn TabletColumn::create_materialized_variant_column(const std::string& root,
606
                                                              const std::vector<std::string>& paths,
607
0
                                                              int32_t parent_unique_id) {
608
0
    TabletColumn subcol;
609
0
    subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
610
0
    subcol.set_is_nullable(true);
611
0
    subcol.set_unique_id(-1);
612
0
    subcol.set_parent_unique_id(parent_unique_id);
613
0
    vectorized::PathInData path(root, paths);
614
0
    subcol.set_path_info(path);
615
0
    subcol.set_name(path.get_path());
616
0
    return subcol;
617
0
}
618
619
62.1k
void TabletColumn::to_schema_pb(ColumnPB* column) const {
620
62.1k
    column->set_unique_id(_unique_id);
621
62.1k
    column->set_name(_col_name);
622
62.1k
    column->set_type(get_string_by_field_type(_type));
623
62.1k
    column->set_is_key(_is_key);
624
62.1k
    column->set_is_nullable(_is_nullable);
625
62.1k
    if (_has_default_value) {
626
312
        column->set_default_value(_default_value);
627
312
    }
628
62.1k
    if (_is_decimal) {
629
33.4k
        column->set_precision(_precision);
630
33.4k
        column->set_frac(_frac);
631
33.4k
    }
632
62.1k
    column->set_length(_length);
633
62.1k
    column->set_index_length(_index_length);
634
62.1k
    if (_is_bf_column) {
635
16
        column->set_is_bf_column(_is_bf_column);
636
16
    }
637
62.1k
    if (!_aggregation_name.empty()) {
638
33.8k
        column->set_aggregation(_aggregation_name);
639
33.8k
    }
640
62.1k
    column->set_result_is_nullable(_result_is_nullable);
641
62.1k
    column->set_be_exec_version(_be_exec_version);
642
62.1k
    if (_has_bitmap_index) {
643
0
        column->set_has_bitmap_index(_has_bitmap_index);
644
0
    }
645
62.1k
    column->set_visible(_visible);
646
647
62.1k
    if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
648
8
        CHECK(_sub_columns.size() == 1)
649
0
                << "ARRAY type should has 1 children types, but got " << _sub_columns.size();
650
8
    }
651
62.1k
    if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
652
0
        DCHECK(_sub_columns.size() == 2)
653
0
                << "MAP type should has 2 children types, but got " << _sub_columns.size();
654
0
        if (UNLIKELY(_sub_columns.size() != 2)) {
655
0
            LOG(WARNING) << "MAP type should has 2 children types, but got " << _sub_columns.size();
656
0
        }
657
0
    }
658
659
62.1k
    for (size_t i = 0; i < _sub_columns.size(); i++) {
660
8
        ColumnPB* child = column->add_children_columns();
661
8
        _sub_columns[i]->to_schema_pb(child);
662
8
    }
663
664
    // set parts info
665
62.1k
    if (has_path_info()) {
666
        // CHECK_GT(_parent_col_unique_id, 0);
667
0
        _column_path->to_protobuf(column->mutable_column_path_info(), _parent_col_unique_id);
668
        // Update unstable information for variant columns. Some of the fields in the tablet schema
669
        // are irrelevant for variant sub-columns, but retaining them may lead to an excessive growth
670
        // in the number of tablet schema cache entries.
671
0
        if (_type == FieldType::OLAP_FIELD_TYPE_STRING) {
672
0
            column->set_length(INT_MAX);
673
0
        }
674
0
        column->set_index_length(0);
675
0
    }
676
62.1k
    for (auto& col : _sparse_cols) {
677
0
        ColumnPB* sparse_column = column->add_sparse_columns();
678
0
        col->to_schema_pb(sparse_column);
679
0
    }
680
62.1k
}
681
682
72
void TabletColumn::add_sub_column(TabletColumn& sub_column) {
683
72
    _sub_columns.push_back(std::make_shared<TabletColumn>(sub_column));
684
72
    sub_column._parent_col_unique_id = this->_unique_id;
685
72
    _sub_column_count += 1;
686
72
}
687
688
44.8k
bool TabletColumn::is_row_store_column() const {
689
44.8k
    return _col_name == BeConsts::ROW_STORE_COL;
690
44.8k
}
691
692
vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function_union(
693
0
        vectorized::DataTypePtr type, int current_be_exec_version) const {
694
0
    const auto* state_type = assert_cast<const vectorized::DataTypeAggState*>(type.get());
695
0
    BeExecVersionManager::check_function_compatibility(
696
0
            current_be_exec_version, _be_exec_version,
697
0
            state_type->get_nested_function()->get_name());
698
0
    return vectorized::AggregateStateUnion::create(state_type->get_nested_function(), {type}, type);
699
0
}
700
701
vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function(
702
48
        std::string suffix, int current_be_exec_version) const {
703
48
    vectorized::AggregateFunctionPtr function = nullptr;
704
705
48
    auto type = vectorized::DataTypeFactory::instance().create_data_type(*this);
706
48
    if (type && type->get_primitive_type() == PrimitiveType::TYPE_AGG_STATE) {
707
0
        function = get_aggregate_function_union(type, current_be_exec_version);
708
48
    } else {
709
48
        std::string origin_name = TabletColumn::get_string_by_aggregation_type(_aggregation);
710
48
        std::string agg_name = origin_name + suffix;
711
48
        std::transform(agg_name.begin(), agg_name.end(), agg_name.begin(),
712
516
                       [](unsigned char c) { return std::tolower(c); });
713
48
        function = vectorized::AggregateFunctionSimpleFactory::instance().get(
714
48
                agg_name, {type}, type->is_nullable(), BeExecVersionManager::get_newest_version());
715
48
        if (!function) {
716
0
            LOG(WARNING) << "get column aggregate function failed, aggregation_name=" << origin_name
717
0
                         << ", column_type=" << type->get_name();
718
0
        }
719
48
    }
720
48
    if (function) {
721
48
        function->set_version(_be_exec_version);
722
48
        return function;
723
48
    }
724
0
    return nullptr;
725
48
}
726
727
10
void TabletColumn::set_path_info(const vectorized::PathInData& path) {
728
10
    _column_path = std::make_shared<vectorized::PathInData>(path);
729
10
}
730
731
0
vectorized::DataTypePtr TabletColumn::get_vec_type() const {
732
0
    return vectorized::DataTypeFactory::instance().create_data_type(*this);
733
0
}
734
735
// escape '.' and '_'
736
85.4k
std::string escape_for_path_name(const std::string& s) {
737
85.4k
    std::string res;
738
85.4k
    const char* pos = s.data();
739
85.4k
    const char* end = pos + s.size();
740
85.9k
    while (pos != end) {
741
430
        unsigned char c = *pos;
742
430
        if (c == '.' || c == '_') {
743
38
            res += '%';
744
38
            res += vectorized::hex_digit_uppercase(c / 16);
745
38
            res += vectorized::hex_digit_uppercase(c % 16);
746
392
        } else {
747
392
            res += c;
748
392
        }
749
430
        ++pos;
750
430
    }
751
85.4k
    return res;
752
85.4k
}
753
754
36
void TabletIndex::set_escaped_escaped_index_suffix_path(const std::string& path_name) {
755
36
    std::string escaped_path = escape_for_path_name(path_name);
756
36
    _escaped_index_suffix_path = escaped_path;
757
36
}
758
759
void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
760
30
                                   const TabletSchema& tablet_schema) {
761
30
    _index_id = index.index_id;
762
30
    _index_name = index.index_name;
763
    // init col_unique_id in index at be side, since col_unique_id may be -1 at fe side
764
    // get column unique id by name
765
30
    std::vector<int32_t> col_unique_ids(index.columns.size());
766
60
    for (size_t i = 0; i < index.columns.size(); i++) {
767
30
        auto column_idx = tablet_schema.field_index(index.columns[i]);
768
30
        if (column_idx >= 0) {
769
22
            col_unique_ids[i] = tablet_schema.column(column_idx).unique_id();
770
22
        } else {
771
            // if column unique id not found by column name, find by column unique id
772
            // column unique id can not found means this column is a new column added by light schema change
773
8
            if (index.__isset.column_unique_ids && !index.column_unique_ids.empty() &&
774
8
                tablet_schema.has_column_unique_id(index.column_unique_ids[i])) {
775
2
                col_unique_ids[i] = index.column_unique_ids[i];
776
6
            } else {
777
6
                col_unique_ids[i] = -1;
778
6
            }
779
8
        }
780
30
    }
781
30
    _col_unique_ids = std::move(col_unique_ids);
782
783
30
    switch (index.index_type) {
784
0
    case TIndexType::BITMAP:
785
0
        _index_type = IndexType::BITMAP;
786
0
        break;
787
30
    case TIndexType::INVERTED:
788
30
        _index_type = IndexType::INVERTED;
789
30
        break;
790
0
    case TIndexType::BLOOMFILTER:
791
0
        _index_type = IndexType::BLOOMFILTER;
792
0
        break;
793
0
    case TIndexType::NGRAM_BF:
794
0
        _index_type = IndexType::NGRAM_BF;
795
0
        break;
796
30
    }
797
30
    if (index.__isset.properties) {
798
0
        for (auto kv : index.properties) {
799
0
            _properties[kv.first] = kv.second;
800
0
        }
801
0
    }
802
30
}
803
804
void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
805
0
                                   const std::vector<int32_t>& column_uids) {
806
0
    _index_id = index.index_id;
807
0
    _index_name = index.index_name;
808
0
    _col_unique_ids = column_uids;
809
810
0
    switch (index.index_type) {
811
0
    case TIndexType::BITMAP:
812
0
        _index_type = IndexType::BITMAP;
813
0
        break;
814
0
    case TIndexType::INVERTED:
815
0
        _index_type = IndexType::INVERTED;
816
0
        break;
817
0
    case TIndexType::BLOOMFILTER:
818
0
        _index_type = IndexType::BLOOMFILTER;
819
0
        break;
820
0
    case TIndexType::NGRAM_BF:
821
0
        _index_type = IndexType::NGRAM_BF;
822
0
        break;
823
0
    }
824
0
    if (index.__isset.properties) {
825
0
        for (auto kv : index.properties) {
826
0
            _properties[kv.first] = kv.second;
827
0
        }
828
0
    }
829
0
}
830
831
14.6k
void TabletIndex::init_from_pb(const TabletIndexPB& index) {
832
14.6k
    _index_id = index.index_id();
833
14.6k
    _index_name = index.index_name();
834
14.6k
    _col_unique_ids.clear();
835
14.6k
    for (auto col_unique_id : index.col_unique_id()) {
836
14.5k
        _col_unique_ids.push_back(col_unique_id);
837
14.5k
    }
838
14.6k
    _index_type = index.index_type();
839
81.9k
    for (const auto& kv : index.properties()) {
840
81.9k
        _properties[kv.first] = kv.second;
841
81.9k
    }
842
14.6k
    _escaped_index_suffix_path = index.index_suffix_name();
843
14.6k
}
844
845
21.9k
void TabletIndex::to_schema_pb(TabletIndexPB* index) const {
846
21.9k
    index->set_index_id(_index_id);
847
21.9k
    index->set_index_name(_index_name);
848
21.9k
    index->clear_col_unique_id();
849
21.9k
    for (auto col_unique_id : _col_unique_ids) {
850
21.9k
        index->add_col_unique_id(col_unique_id);
851
21.9k
    }
852
21.9k
    index->set_index_type(_index_type);
853
121k
    for (const auto& kv : _properties) {
854
121k
        DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", {
855
121k
            if (kv.first == INVERTED_INDEX_PARSER_LOWERCASE_KEY) {
856
121k
                continue;
857
121k
            }
858
121k
        })
859
121k
        (*index->mutable_properties())[kv.first] = kv.second;
860
121k
    }
861
21.9k
    index->set_index_suffix_name(_escaped_index_suffix_path);
862
863
21.9k
    DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { return; })
864
865
    // lowercase by default
866
21.9k
    if (!_properties.empty()) {
867
19.4k
        if (!_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) {
868
336
            (*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
869
336
                    INVERTED_INDEX_PARSER_TRUE;
870
336
        }
871
19.4k
    }
872
21.9k
}
873
874
5.31k
TabletSchema::TabletSchema() = default;
875
876
5.20k
TabletSchema::~TabletSchema() {}
877
878
3.28k
int64_t TabletSchema::get_metadata_size() const {
879
3.28k
    return sizeof(TabletSchema);
880
3.28k
}
881
882
322
void TabletSchema::append_column(TabletColumn column, ColumnType col_type) {
883
322
    if (column.is_key()) {
884
128
        _num_key_columns++;
885
128
    }
886
322
    if (column.is_nullable()) {
887
116
        _num_null_columns++;
888
116
    }
889
322
    if (column.is_variant_type()) {
890
0
        ++_num_variant_columns;
891
0
        if (!column.has_path_info()) {
892
0
            const std::string& col_name = column.name_lower_case();
893
0
            vectorized::PathInData path(col_name);
894
0
            column.set_path_info(path);
895
0
        }
896
0
    }
897
322
    if (UNLIKELY(column.name() == DELETE_SIGN)) {
898
22
        _delete_sign_idx = _num_columns;
899
300
    } else if (UNLIKELY(column.name() == SEQUENCE_COL)) {
900
8
        _sequence_col_idx = _num_columns;
901
292
    } else if (UNLIKELY(column.name() == VERSION_COL)) {
902
0
        _version_col_idx = _num_columns;
903
292
    } else if (UNLIKELY(column.name() == SKIP_BITMAP_COL)) {
904
0
        _skip_bitmap_col_idx = _num_columns;
905
0
    }
906
322
    _field_uniqueid_to_index[column.unique_id()] = _num_columns;
907
322
    _cols.push_back(std::make_shared<TabletColumn>(std::move(column)));
908
    // The dropped column may have same name with exsiting column, so that
909
    // not add to name to index map, only for uid to index map
910
322
    if (col_type == ColumnType::VARIANT || _cols.back()->is_variant_type() ||
911
322
        _cols.back()->is_extracted_column()) {
912
10
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
913
10
        _field_path_to_index[_cols.back()->path_info_ptr().get()] = _num_columns;
914
312
    } else if (col_type == ColumnType::NORMAL) {
915
312
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
916
312
    }
917
322
    _num_columns++;
918
322
}
919
920
0
void TabletColumn::append_sparse_column(TabletColumn column) {
921
0
    _sparse_cols.push_back(std::make_shared<TabletColumn>(column));
922
0
    _num_sparse_columns++;
923
0
}
924
925
72
void TabletSchema::append_index(TabletIndex&& index) {
926
78
    for (int32_t id : index.col_unique_ids()) {
927
78
        _col_id_suffix_to_index.emplace(
928
78
                std::make_tuple(index.index_type(), id, index.get_index_suffix()), _indexes.size());
929
78
    }
930
72
    _indexes.push_back(std::make_shared<TabletIndex>(index));
931
72
}
932
933
void TabletSchema::update_index(const TabletColumn& col, const IndexType& index_type,
934
4
                                TabletIndex&& index) {
935
4
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
936
4
    const std::string& suffix_path = escape_for_path_name(col.suffix_path());
937
4
    IndexKey key(index_type, col_unique_id, suffix_path);
938
4
    auto iter = _col_id_suffix_to_index.find(key);
939
4
    if (iter != _col_id_suffix_to_index.end()) {
940
2
        _indexes[iter->second] = std::make_shared<TabletIndex>(std::move(index));
941
2
        return;
942
2
    }
943
2
    LOG(WARNING) << " failed to update_index: " << index_type << " " << col_unique_id << " "
944
2
                 << suffix_path;
945
2
}
946
947
0
void TabletSchema::replace_column(size_t pos, TabletColumn new_col) {
948
0
    CHECK_LT(pos, num_columns()) << " outof range";
949
0
    _cols[pos] = std::make_shared<TabletColumn>(std::move(new_col));
950
0
}
951
952
2
void TabletSchema::clear_index() {
953
2
    _indexes.clear();
954
2
    _col_id_suffix_to_index.clear();
955
2
}
956
957
8
void TabletSchema::remove_index(int64_t index_id) {
958
8
    std::vector<TabletIndexPtr> indexes;
959
8
    std::unordered_map<IndexKey, int32_t, IndexKeyHash> col_id_suffix_to_index;
960
10
    for (auto index : _indexes) {
961
10
        if (index->index_id() == index_id) {
962
8
            continue;
963
8
        }
964
2
        for (int32_t col_uid : index->col_unique_ids()) {
965
2
            col_id_suffix_to_index.emplace(
966
2
                    std::make_tuple(index->index_type(), col_uid, index->get_index_suffix()),
967
2
                    indexes.size());
968
2
        }
969
2
        indexes.emplace_back(std::move(index));
970
2
    }
971
8
    _indexes = std::move(indexes);
972
8
    _col_id_suffix_to_index = std::move(col_id_suffix_to_index);
973
8
}
974
975
0
void TabletSchema::clear_columns() {
976
0
    _field_path_to_index.clear();
977
0
    _field_name_to_index.clear();
978
0
    _field_uniqueid_to_index.clear();
979
0
    _num_columns = 0;
980
0
    _num_variant_columns = 0;
981
0
    _num_null_columns = 0;
982
0
    _num_key_columns = 0;
983
0
    _cols.clear();
984
0
}
985
986
void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns,
987
3.18k
                                bool reuse_cache_column) {
988
3.18k
    _keys_type = schema.keys_type();
989
3.18k
    _num_columns = 0;
990
3.18k
    _num_variant_columns = 0;
991
3.18k
    _num_key_columns = 0;
992
3.18k
    _num_null_columns = 0;
993
3.18k
    _cols.clear();
994
3.18k
    _indexes.clear();
995
3.18k
    _col_id_suffix_to_index.clear();
996
3.18k
    _field_name_to_index.clear();
997
3.18k
    _field_uniqueid_to_index.clear();
998
3.18k
    _cluster_key_uids.clear();
999
3.18k
    for (const auto& i : schema.cluster_key_uids()) {
1000
12
        _cluster_key_uids.push_back(i);
1001
12
    }
1002
24.5k
    for (auto& column_pb : schema.column()) {
1003
24.5k
        TabletColumnPtr column;
1004
24.5k
        if (reuse_cache_column) {
1005
516
            auto pair = TabletColumnObjectPool::instance()->insert(
1006
516
                    deterministic_string_serialize(column_pb));
1007
516
            column = pair.second;
1008
            // Release the handle quickly, because we use shared ptr to manage column.
1009
            // It often core during tablet schema copy to another schema because handle's
1010
            // reference count should be managed mannually.
1011
516
            TabletColumnObjectPool::instance()->release(pair.first);
1012
23.9k
        } else {
1013
23.9k
            column = std::make_shared<TabletColumn>();
1014
23.9k
            column->init_from_pb(column_pb);
1015
23.9k
        }
1016
24.5k
        if (ignore_extracted_columns && column->is_extracted_column()) {
1017
0
            continue;
1018
0
        }
1019
24.5k
        if (column->is_key()) {
1020
4.57k
            _num_key_columns++;
1021
4.57k
        }
1022
24.5k
        if (column->is_nullable()) {
1023
14.7k
            _num_null_columns++;
1024
14.7k
        }
1025
24.5k
        if (column->is_variant_type()) {
1026
4
            ++_num_variant_columns;
1027
4
        }
1028
1029
24.5k
        _cols.emplace_back(std::move(column));
1030
24.5k
        if (!_cols.back()->is_extracted_column()) {
1031
24.5k
            _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1032
24.5k
            _field_uniqueid_to_index[_cols.back()->unique_id()] = _num_columns;
1033
24.5k
        }
1034
24.5k
        _num_columns++;
1035
24.5k
    }
1036
14.6k
    for (const auto& index_pb : schema.index()) {
1037
14.6k
        TabletIndexPtr index;
1038
14.6k
        if (reuse_cache_column) {
1039
224
            auto pair = TabletColumnObjectPool::instance()->insert_index(
1040
224
                    deterministic_string_serialize(index_pb));
1041
224
            index = pair.second;
1042
            //  Only need the value to be cached by the pool, release it quickly because the handle need
1043
            // record reference count mannually, or it will core during tablet schema copy method.
1044
224
            TabletColumnObjectPool::instance()->release(pair.first);
1045
14.3k
        } else {
1046
14.3k
            index = std::make_shared<TabletIndex>();
1047
14.3k
            index->init_from_pb(index_pb);
1048
14.3k
        }
1049
14.6k
        for (int32_t col_uid : index->col_unique_ids()) {
1050
14.6k
            _col_id_suffix_to_index.emplace(
1051
14.6k
                    std::make_tuple(index->index_type(), col_uid, index->get_index_suffix()),
1052
14.6k
                    _indexes.size());
1053
14.6k
        }
1054
14.6k
        _indexes.emplace_back(std::move(index));
1055
14.6k
    }
1056
3.18k
    _num_short_key_columns = schema.num_short_key_columns();
1057
3.18k
    _num_rows_per_row_block = schema.num_rows_per_row_block();
1058
3.18k
    _compress_kind = schema.compress_kind();
1059
3.18k
    _next_column_unique_id = schema.next_column_unique_id();
1060
3.18k
    if (schema.has_bf_fpp()) {
1061
6
        _has_bf_fpp = true;
1062
6
        _bf_fpp = schema.bf_fpp();
1063
3.17k
    } else {
1064
3.17k
        _has_bf_fpp = false;
1065
3.17k
        _bf_fpp = BLOOM_FILTER_DEFAULT_FPP;
1066
3.17k
    }
1067
3.18k
    _is_in_memory = schema.is_in_memory();
1068
3.18k
    _disable_auto_compaction = schema.disable_auto_compaction();
1069
3.18k
    _enable_single_replica_compaction = schema.enable_single_replica_compaction();
1070
3.18k
    _store_row_column = schema.store_row_column();
1071
3.18k
    _skip_write_index_on_load = schema.skip_write_index_on_load();
1072
3.18k
    _delete_sign_idx = schema.delete_sign_idx();
1073
3.18k
    _sequence_col_idx = schema.sequence_col_idx();
1074
3.18k
    _version_col_idx = schema.version_col_idx();
1075
3.18k
    _skip_bitmap_col_idx = schema.skip_bitmap_col_idx();
1076
3.18k
    _sort_type = schema.sort_type();
1077
3.18k
    _sort_col_num = schema.sort_col_num();
1078
3.18k
    _compression_type = schema.compression_type();
1079
3.18k
    _row_store_page_size = schema.row_store_page_size();
1080
3.18k
    _storage_page_size = schema.storage_page_size();
1081
3.18k
    _schema_version = schema.schema_version();
1082
    // Default to V1 inverted index storage format for backward compatibility if not specified in schema.
1083
3.18k
    if (!schema.has_inverted_index_storage_format()) {
1084
355
        _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1;
1085
2.82k
    } else {
1086
2.82k
        _inverted_index_storage_format = schema.inverted_index_storage_format();
1087
2.82k
    }
1088
1089
3.18k
    _row_store_column_unique_ids.assign(schema.row_store_column_unique_ids().begin(),
1090
3.18k
                                        schema.row_store_column_unique_ids().end());
1091
3.18k
    _enable_variant_flatten_nested = schema.enable_variant_flatten_nested();
1092
3.18k
    update_metadata_size();
1093
3.18k
}
1094
1095
748
void TabletSchema::copy_from(const TabletSchema& tablet_schema) {
1096
748
    TabletSchemaPB tablet_schema_pb;
1097
748
    tablet_schema.to_schema_pb(&tablet_schema_pb);
1098
748
    init_from_pb(tablet_schema_pb);
1099
748
    _table_id = tablet_schema.table_id();
1100
748
}
1101
1102
0
void TabletSchema::shawdow_copy_without_columns(const TabletSchema& tablet_schema) {
1103
0
    *this = tablet_schema;
1104
0
    _field_path_to_index.clear();
1105
0
    _field_name_to_index.clear();
1106
0
    _field_uniqueid_to_index.clear();
1107
0
    _num_columns = 0;
1108
0
    _num_variant_columns = 0;
1109
0
    _num_null_columns = 0;
1110
0
    _num_key_columns = 0;
1111
0
    _cols.clear();
1112
0
}
1113
1114
0
void TabletSchema::update_index_info_from(const TabletSchema& tablet_schema) {
1115
0
    for (auto& col : _cols) {
1116
0
        if (col->unique_id() < 0) {
1117
0
            continue;
1118
0
        }
1119
0
        const auto iter = tablet_schema._field_uniqueid_to_index.find(col->unique_id());
1120
0
        if (iter == tablet_schema._field_uniqueid_to_index.end()) {
1121
0
            continue;
1122
0
        }
1123
0
        auto col_idx = iter->second;
1124
0
        if (col_idx < 0 || col_idx >= tablet_schema._cols.size()) {
1125
0
            continue;
1126
0
        }
1127
0
        col->set_is_bf_column(tablet_schema._cols[col_idx]->is_bf_column());
1128
0
        col->set_has_bitmap_index(tablet_schema._cols[col_idx]->has_bitmap_index());
1129
0
    }
1130
0
}
1131
1132
6.16k
std::string TabletSchema::to_key() const {
1133
6.16k
    TabletSchemaPB pb;
1134
6.16k
    to_schema_pb(&pb);
1135
6.16k
    return TabletSchema::deterministic_string_serialize(pb);
1136
6.16k
}
1137
1138
void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version,
1139
                                               const OlapTableIndexSchema* index,
1140
0
                                               const TabletSchema& ori_tablet_schema) {
1141
    // copy from ori_tablet_schema
1142
0
    _keys_type = ori_tablet_schema.keys_type();
1143
0
    _num_short_key_columns = ori_tablet_schema.num_short_key_columns();
1144
0
    _num_rows_per_row_block = ori_tablet_schema.num_rows_per_row_block();
1145
0
    _compress_kind = ori_tablet_schema.compress_kind();
1146
1147
    // todo(yixiu): unique_id
1148
0
    _next_column_unique_id = ori_tablet_schema.next_column_unique_id();
1149
0
    _is_in_memory = ori_tablet_schema.is_in_memory();
1150
0
    _disable_auto_compaction = ori_tablet_schema.disable_auto_compaction();
1151
0
    _enable_single_replica_compaction = ori_tablet_schema.enable_single_replica_compaction();
1152
0
    _skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load();
1153
0
    _sort_type = ori_tablet_schema.sort_type();
1154
0
    _sort_col_num = ori_tablet_schema.sort_col_num();
1155
0
    _row_store_page_size = ori_tablet_schema.row_store_page_size();
1156
0
    _storage_page_size = ori_tablet_schema.storage_page_size();
1157
0
    _enable_variant_flatten_nested = ori_tablet_schema.variant_flatten_nested();
1158
1159
    // copy from table_schema_param
1160
0
    _schema_version = version;
1161
0
    _num_columns = 0;
1162
0
    _num_variant_columns = 0;
1163
0
    _num_key_columns = 0;
1164
0
    _num_null_columns = 0;
1165
0
    bool has_bf_columns = false;
1166
0
    _cols.clear();
1167
0
    _indexes.clear();
1168
0
    _col_id_suffix_to_index.clear();
1169
0
    _field_name_to_index.clear();
1170
0
    _field_uniqueid_to_index.clear();
1171
0
    _delete_sign_idx = -1;
1172
0
    _sequence_col_idx = -1;
1173
0
    _version_col_idx = -1;
1174
0
    _skip_bitmap_col_idx = -1;
1175
0
    _cluster_key_uids.clear();
1176
0
    for (const auto& i : ori_tablet_schema._cluster_key_uids) {
1177
0
        _cluster_key_uids.push_back(i);
1178
0
    }
1179
0
    for (auto& column : index->columns) {
1180
0
        if (column->is_key()) {
1181
0
            _num_key_columns++;
1182
0
        }
1183
0
        if (column->is_nullable()) {
1184
0
            _num_null_columns++;
1185
0
        }
1186
0
        if (column->is_bf_column()) {
1187
0
            has_bf_columns = true;
1188
0
        }
1189
0
        if (column->is_variant_type()) {
1190
0
            ++_num_variant_columns;
1191
0
        }
1192
0
        if (UNLIKELY(column->name() == DELETE_SIGN)) {
1193
0
            _delete_sign_idx = _num_columns;
1194
0
        } else if (UNLIKELY(column->name() == SEQUENCE_COL)) {
1195
0
            _sequence_col_idx = _num_columns;
1196
0
        } else if (UNLIKELY(column->name() == VERSION_COL)) {
1197
0
            _version_col_idx = _num_columns;
1198
0
        } else if (UNLIKELY(column->name() == SKIP_BITMAP_COL)) {
1199
0
            _skip_bitmap_col_idx = _num_columns;
1200
0
        }
1201
0
        _cols.emplace_back(std::make_shared<TabletColumn>(*column));
1202
0
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1203
0
        _field_uniqueid_to_index[_cols.back()->unique_id()] = _num_columns;
1204
0
        _num_columns++;
1205
0
    }
1206
1207
0
    for (const auto& i : index->indexes) {
1208
0
        for (int32_t col_uid : i->col_unique_ids()) {
1209
0
            _col_id_suffix_to_index.emplace(
1210
0
                    std::make_tuple(i->index_type(), col_uid, i->get_index_suffix()),
1211
0
                    _indexes.size());
1212
0
        }
1213
0
        _indexes.emplace_back(std::make_shared<TabletIndex>(*i));
1214
0
    }
1215
1216
0
    if (has_bf_columns) {
1217
0
        _has_bf_fpp = true;
1218
0
        _bf_fpp = ori_tablet_schema.bloom_filter_fpp();
1219
0
    } else {
1220
0
        _has_bf_fpp = false;
1221
0
        _bf_fpp = BLOOM_FILTER_DEFAULT_FPP;
1222
0
    }
1223
0
}
1224
1225
298
void TabletSchema::merge_dropped_columns(const TabletSchema& src_schema) {
1226
    // If they are the same tablet schema object, then just return
1227
298
    if (this == &src_schema) {
1228
0
        return;
1229
0
    }
1230
5.81k
    for (const auto& src_col : src_schema.columns()) {
1231
5.81k
        if (_field_uniqueid_to_index.find(src_col->unique_id()) == _field_uniqueid_to_index.end()) {
1232
0
            CHECK(!src_col->is_key())
1233
0
                    << src_col->name() << " is key column, should not be dropped.";
1234
0
            ColumnPB src_col_pb;
1235
            // There are some pointer in tablet column, not sure the reference relation, so
1236
            // that deep copy it.
1237
0
            src_col->to_schema_pb(&src_col_pb);
1238
0
            TabletColumn new_col(src_col_pb);
1239
0
            append_column(new_col, TabletSchema::ColumnType::DROPPED);
1240
0
        }
1241
5.81k
    }
1242
298
}
1243
1244
0
TabletSchemaSPtr TabletSchema::copy_without_variant_extracted_columns() {
1245
0
    TabletSchemaSPtr copy = std::make_shared<TabletSchema>();
1246
0
    TabletSchemaPB tablet_schema_pb;
1247
0
    this->to_schema_pb(&tablet_schema_pb);
1248
0
    copy->init_from_pb(tablet_schema_pb, true /*ignore extracted_columns*/);
1249
0
    return copy;
1250
0
}
1251
1252
// Dropped column is in _field_uniqueid_to_index but not in _field_name_to_index
1253
// Could refer to append_column method
1254
12.3k
bool TabletSchema::is_dropped_column(const TabletColumn& col) const {
1255
12.3k
    CHECK(_field_uniqueid_to_index.find(col.unique_id()) != _field_uniqueid_to_index.end())
1256
0
            << "could not find col with unique id = " << col.unique_id()
1257
0
            << " and name = " << col.name() << " table_id=" << _table_id;
1258
12.3k
    auto it = _field_name_to_index.find(StringRef {col.name()});
1259
12.3k
    return it == _field_name_to_index.end() || _cols[it->second]->unique_id() != col.unique_id();
1260
12.3k
}
1261
1262
0
void TabletSchema::copy_extracted_columns(const TabletSchema& src_schema) {
1263
0
    std::unordered_set<int32_t> variant_columns;
1264
0
    for (const auto& col : columns()) {
1265
0
        if (col->is_variant_type()) {
1266
0
            variant_columns.insert(col->unique_id());
1267
0
        }
1268
0
    }
1269
0
    for (const TabletColumnPtr& col : src_schema.columns()) {
1270
0
        if (col->is_extracted_column() && variant_columns.contains(col->parent_unique_id())) {
1271
0
            ColumnPB col_pb;
1272
0
            col->to_schema_pb(&col_pb);
1273
0
            TabletColumn new_col(col_pb);
1274
0
            append_column(new_col, ColumnType::VARIANT);
1275
0
        }
1276
0
    }
1277
0
}
1278
1279
0
void TabletSchema::reserve_extracted_columns() {
1280
0
    for (auto it = _cols.begin(); it != _cols.end();) {
1281
0
        if (!(*it)->is_extracted_column()) {
1282
0
            it = _cols.erase(it);
1283
0
        } else {
1284
0
            ++it;
1285
0
        }
1286
0
    }
1287
0
}
1288
1289
10.5k
void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const {
1290
10.5k
    for (const auto& i : _cluster_key_uids) {
1291
60
        tablet_schema_pb->add_cluster_key_uids(i);
1292
60
    }
1293
10.5k
    tablet_schema_pb->set_keys_type(_keys_type);
1294
62.0k
    for (const auto& col : _cols) {
1295
62.0k
        ColumnPB* column = tablet_schema_pb->add_column();
1296
62.0k
        col->to_schema_pb(column);
1297
62.0k
    }
1298
21.9k
    for (const auto& index : _indexes) {
1299
21.9k
        auto* index_pb = tablet_schema_pb->add_index();
1300
21.9k
        index->to_schema_pb(index_pb);
1301
21.9k
    }
1302
10.5k
    tablet_schema_pb->set_num_short_key_columns(_num_short_key_columns);
1303
10.5k
    tablet_schema_pb->set_num_rows_per_row_block(_num_rows_per_row_block);
1304
10.5k
    tablet_schema_pb->set_compress_kind(_compress_kind);
1305
10.5k
    if (_has_bf_fpp) {
1306
8
        tablet_schema_pb->set_bf_fpp(_bf_fpp);
1307
8
    }
1308
10.5k
    tablet_schema_pb->set_next_column_unique_id(_next_column_unique_id);
1309
10.5k
    tablet_schema_pb->set_is_in_memory(_is_in_memory);
1310
10.5k
    tablet_schema_pb->set_disable_auto_compaction(_disable_auto_compaction);
1311
10.5k
    tablet_schema_pb->set_enable_single_replica_compaction(_enable_single_replica_compaction);
1312
10.5k
    tablet_schema_pb->set_store_row_column(_store_row_column);
1313
10.5k
    tablet_schema_pb->set_skip_write_index_on_load(_skip_write_index_on_load);
1314
10.5k
    tablet_schema_pb->set_delete_sign_idx(_delete_sign_idx);
1315
10.5k
    tablet_schema_pb->set_sequence_col_idx(_sequence_col_idx);
1316
10.5k
    tablet_schema_pb->set_sort_type(_sort_type);
1317
10.5k
    tablet_schema_pb->set_sort_col_num(_sort_col_num);
1318
10.5k
    tablet_schema_pb->set_schema_version(_schema_version);
1319
10.5k
    tablet_schema_pb->set_compression_type(_compression_type);
1320
10.5k
    tablet_schema_pb->set_row_store_page_size(_row_store_page_size);
1321
10.5k
    tablet_schema_pb->set_storage_page_size(_storage_page_size);
1322
10.5k
    tablet_schema_pb->set_version_col_idx(_version_col_idx);
1323
10.5k
    tablet_schema_pb->set_skip_bitmap_col_idx(_skip_bitmap_col_idx);
1324
10.5k
    tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format);
1325
10.5k
    tablet_schema_pb->mutable_row_store_column_unique_ids()->Assign(
1326
10.5k
            _row_store_column_unique_ids.begin(), _row_store_column_unique_ids.end());
1327
10.5k
    tablet_schema_pb->set_enable_variant_flatten_nested(_enable_variant_flatten_nested);
1328
10.5k
}
1329
1330
0
size_t TabletSchema::row_size() const {
1331
0
    size_t size = 0;
1332
0
    for (const auto& column : _cols) {
1333
0
        size += column->length();
1334
0
    }
1335
0
    size += (_num_columns + 7) / 8;
1336
1337
0
    return size;
1338
0
}
1339
1340
2.43k
int32_t TabletSchema::field_index(const std::string& field_name) const {
1341
2.43k
    const auto& found = _field_name_to_index.find(StringRef(field_name));
1342
2.43k
    return (found == _field_name_to_index.end()) ? -1 : found->second;
1343
2.43k
}
1344
1345
0
int32_t TabletSchema::field_index(const vectorized::PathInData& path) const {
1346
0
    const auto& found = _field_path_to_index.find(vectorized::PathInDataRef(&path));
1347
0
    return (found == _field_path_to_index.end()) ? -1 : found->second;
1348
0
}
1349
1350
360
int32_t TabletSchema::field_index(int32_t col_unique_id) const {
1351
360
    const auto& found = _field_uniqueid_to_index.find(col_unique_id);
1352
360
    return (found == _field_uniqueid_to_index.end()) ? -1 : found->second;
1353
360
}
1354
1355
52.9k
const std::vector<TabletColumnPtr>& TabletSchema::columns() const {
1356
52.9k
    return _cols;
1357
52.9k
}
1358
1359
0
const std::vector<TabletColumnPtr>& TabletColumn::sparse_columns() const {
1360
0
    return _sparse_cols;
1361
0
}
1362
1363
292k
const TabletColumn& TabletSchema::column(size_t ordinal) const {
1364
292k
    DCHECK(ordinal < _num_columns) << "ordinal:" << ordinal << ", _num_columns:" << _num_columns;
1365
292k
    return *_cols[ordinal];
1366
292k
}
1367
1368
0
const TabletColumn& TabletColumn::sparse_column_at(size_t ordinal) const {
1369
0
    DCHECK(ordinal < _sparse_cols.size())
1370
0
            << "ordinal:" << ordinal << ", _num_columns:" << _sparse_cols.size();
1371
0
    return *_sparse_cols[ordinal];
1372
0
}
1373
1374
1.85k
const TabletColumn& TabletSchema::column_by_uid(int32_t col_unique_id) const {
1375
1.85k
    return *_cols.at(_field_uniqueid_to_index.at(col_unique_id));
1376
1.85k
}
1377
1378
0
TabletColumn& TabletSchema::mutable_column_by_uid(int32_t col_unique_id) {
1379
0
    return *_cols.at(_field_uniqueid_to_index.at(col_unique_id));
1380
0
}
1381
1382
16
TabletColumn& TabletSchema::mutable_column(size_t ordinal) {
1383
16
    return *_cols.at(ordinal);
1384
16
}
1385
1386
0
void TabletSchema::update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>& tindexes) {
1387
0
    std::vector<TabletIndexPtr> indexes;
1388
0
    for (const auto& tindex : tindexes) {
1389
0
        TabletIndex index;
1390
0
        index.init_from_thrift(tindex, *this);
1391
0
        indexes.emplace_back(std::make_shared<TabletIndex>(std::move(index)));
1392
0
    }
1393
0
    _indexes = std::move(indexes);
1394
0
    std::unordered_map<IndexKey, int32_t, IndexKeyHash> col_id_suffix_to_index;
1395
0
    for (size_t i = 0; i < _indexes.size(); i++) {
1396
0
        for (int32_t col_uid : _indexes[i]->col_unique_ids()) {
1397
0
            col_id_suffix_to_index.emplace(std::make_tuple(_indexes[i]->index_type(), col_uid,
1398
0
                                                           _indexes[i]->get_index_suffix()),
1399
0
                                           i);
1400
0
        }
1401
0
    }
1402
0
    _col_id_suffix_to_index = std::move(col_id_suffix_to_index);
1403
0
}
1404
1405
0
bool TabletSchema::exist_column(const std::string& field_name) const {
1406
0
    return _field_name_to_index.contains(StringRef {field_name});
1407
0
}
1408
1409
830
bool TabletSchema::has_column_unique_id(int32_t col_unique_id) const {
1410
830
    return _field_uniqueid_to_index.contains(col_unique_id);
1411
830
}
1412
1413
8.06k
Status TabletSchema::have_column(const std::string& field_name) const {
1414
8.06k
    if (!_field_name_to_index.contains(StringRef(field_name))) {
1415
8.06k
        return Status::Error<ErrorCode::INTERNAL_ERROR>(
1416
8.06k
                "Not found field_name, field_name:{}, schema:{}", field_name,
1417
8.06k
                get_all_field_names());
1418
8.06k
    }
1419
18.4E
    return Status::OK();
1420
8.06k
}
1421
1422
344
Result<const TabletColumn*> TabletSchema::column(const std::string& field_name) const {
1423
344
    auto it = _field_name_to_index.find(StringRef {field_name});
1424
344
    if (it == _field_name_to_index.end()) {
1425
0
        DCHECK(false) << "field_name=" << field_name << ", table_id=" << _table_id
1426
0
                      << ", field_name_to_index=" << get_all_field_names();
1427
0
        return ResultError(
1428
0
                Status::InternalError("column not found, name={}, table_id={}, schema_version={}",
1429
0
                                      field_name, _table_id, _schema_version));
1430
0
    }
1431
344
    return _cols[it->second].get();
1432
344
}
1433
1434
void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema,
1435
0
                                         const std::vector<TColumn>& t_columns) {
1436
0
    copy_from(tablet_schema);
1437
0
    if (!t_columns.empty() && t_columns[0].col_unique_id >= 0) {
1438
0
        clear_columns();
1439
0
        for (const auto& column : t_columns) {
1440
0
            append_column(TabletColumn(column));
1441
0
        }
1442
0
    }
1443
0
}
1444
1445
110
bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const {
1446
144
    for (size_t i = 0; i < _indexes.size(); i++) {
1447
74
        if (_indexes[i]->index_type() == IndexType::INVERTED &&
1448
74
            _indexes[i]->index_id() == index_id) {
1449
40
            return true;
1450
40
        }
1451
74
    }
1452
70
    return false;
1453
110
}
1454
1455
const TabletIndex* TabletSchema::inverted_index(int32_t col_unique_id,
1456
57.6k
                                                const std::string& suffix_path) const {
1457
57.6k
    const std::string escaped_suffix = escape_for_path_name(suffix_path);
1458
57.6k
    auto it = _col_id_suffix_to_index.find(
1459
57.6k
            std::make_tuple(IndexType::INVERTED, col_unique_id, escaped_suffix));
1460
57.6k
    if (it != _col_id_suffix_to_index.end()) {
1461
15.9k
        return _indexes[it->second].get();
1462
15.9k
    }
1463
41.6k
    return nullptr;
1464
57.6k
}
1465
1466
27.8k
const TabletIndex* TabletSchema::inverted_index(const TabletColumn& col) const {
1467
    // Some columns(Float, Double, JSONB ...) from the variant do not support inverted index
1468
27.8k
    if (!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) {
1469
4
        return nullptr;
1470
4
    }
1471
    // TODO use more efficient impl
1472
    // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants
1473
27.8k
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
1474
27.8k
    return inverted_index(col_unique_id, escape_for_path_name(col.suffix_path()));
1475
27.8k
}
1476
1477
0
bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const {
1478
0
    IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, "");
1479
0
    auto it = _col_id_suffix_to_index.find(index_key);
1480
0
    return it != _col_id_suffix_to_index.end();
1481
0
}
1482
1483
26.2k
const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id) const {
1484
    // Get the ngram bf index for the given column unique id
1485
26.2k
    IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, "");
1486
26.2k
    auto it = _col_id_suffix_to_index.find(index_key);
1487
26.2k
    if (it != _col_id_suffix_to_index.end()) {
1488
2
        return _indexes[it->second].get();
1489
2
    }
1490
26.2k
    return nullptr;
1491
26.2k
}
1492
1493
vectorized::Block TabletSchema::create_block(
1494
        const std::vector<uint32_t>& return_columns,
1495
1.43k
        const std::unordered_set<uint32_t>* tablet_columns_need_convert_null) const {
1496
1.43k
    vectorized::Block block;
1497
5.66k
    for (int i = 0; i < return_columns.size(); ++i) {
1498
4.22k
        const auto& col = *_cols[return_columns[i]];
1499
4.22k
        bool is_nullable = (tablet_columns_need_convert_null != nullptr &&
1500
4.22k
                            tablet_columns_need_convert_null->find(return_columns[i]) !=
1501
0
                                    tablet_columns_need_convert_null->end());
1502
4.22k
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col, is_nullable);
1503
4.22k
        auto column = data_type->create_column();
1504
4.22k
        block.insert({std::move(column), data_type, col.name()});
1505
4.22k
    }
1506
1.43k
    return block;
1507
1.43k
}
1508
1509
3.90k
vectorized::Block TabletSchema::create_block(bool ignore_dropped_col) const {
1510
3.90k
    vectorized::Block block;
1511
12.3k
    for (const auto& col : _cols) {
1512
12.3k
        if (ignore_dropped_col && is_dropped_column(*col)) {
1513
0
            continue;
1514
0
        }
1515
12.3k
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(*col);
1516
12.3k
        block.insert({data_type->create_column(), data_type, col->name()});
1517
12.3k
    }
1518
3.90k
    return block;
1519
3.90k
}
1520
1521
0
vectorized::Block TabletSchema::create_block_by_cids(const std::vector<uint32_t>& cids) const {
1522
0
    vectorized::Block block;
1523
0
    for (const auto& cid : cids) {
1524
0
        const auto& col = *_cols[cid];
1525
0
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col);
1526
0
        block.insert({data_type->create_column(), data_type, col.name()});
1527
0
    }
1528
0
    return block;
1529
0
}
1530
1531
0
bool operator==(const TabletColumn& a, const TabletColumn& b) {
1532
0
    if (a._unique_id != b._unique_id) return false;
1533
0
    if (a._col_name != b._col_name) return false;
1534
0
    if (a._type != b._type) return false;
1535
0
    if (a._is_key != b._is_key) return false;
1536
0
    if (a._aggregation != b._aggregation) return false;
1537
0
    if (a._is_nullable != b._is_nullable) return false;
1538
0
    if (a._has_default_value != b._has_default_value) return false;
1539
0
    if (a._has_default_value) {
1540
0
        if (a._default_value != b._default_value) return false;
1541
0
    }
1542
0
    if (a._is_decimal != b._is_decimal) return false;
1543
0
    if (a._is_decimal) {
1544
0
        if (a._precision != b._precision) return false;
1545
0
        if (a._frac != b._frac) return false;
1546
0
    }
1547
0
    if (a._length != b._length) return false;
1548
0
    if (a._index_length != b._index_length) return false;
1549
0
    if (a._is_bf_column != b._is_bf_column) return false;
1550
0
    if (a._has_bitmap_index != b._has_bitmap_index) return false;
1551
0
    if (a._column_path == nullptr && a._column_path != nullptr) return false;
1552
0
    if (b._column_path == nullptr && a._column_path != nullptr) return false;
1553
0
    if (b._column_path != nullptr && a._column_path != nullptr &&
1554
0
        *a._column_path != *b._column_path)
1555
0
        return false;
1556
0
    return true;
1557
0
}
1558
1559
0
bool operator!=(const TabletColumn& a, const TabletColumn& b) {
1560
0
    return !(a == b);
1561
0
}
1562
1563
2
bool operator==(const TabletSchema& a, const TabletSchema& b) {
1564
2
    if (a._keys_type != b._keys_type) return false;
1565
2
    if (a._cols.size() != b._cols.size()) return false;
1566
2
    for (int i = 0; i < a._cols.size(); ++i) {
1567
0
        if (*a._cols[i] != *b._cols[i]) return false;
1568
0
    }
1569
2
    if (a._num_columns != b._num_columns) return false;
1570
2
    if (a._num_key_columns != b._num_key_columns) return false;
1571
2
    if (a._num_null_columns != b._num_null_columns) return false;
1572
2
    if (a._num_short_key_columns != b._num_short_key_columns) return false;
1573
2
    if (a._num_rows_per_row_block != b._num_rows_per_row_block) return false;
1574
2
    if (a._compress_kind != b._compress_kind) return false;
1575
2
    if (a._next_column_unique_id != b._next_column_unique_id) return false;
1576
2
    if (a._has_bf_fpp != b._has_bf_fpp) return false;
1577
2
    if (a._has_bf_fpp) {
1578
0
        if (std::abs(a._bf_fpp - b._bf_fpp) > 1e-6) return false;
1579
0
    }
1580
2
    if (a._is_in_memory != b._is_in_memory) return false;
1581
2
    if (a._delete_sign_idx != b._delete_sign_idx) return false;
1582
2
    if (a._disable_auto_compaction != b._disable_auto_compaction) return false;
1583
2
    if (a._enable_single_replica_compaction != b._enable_single_replica_compaction) return false;
1584
2
    if (a._store_row_column != b._store_row_column) return false;
1585
2
    if (a._row_store_page_size != b._row_store_page_size) return false;
1586
2
    if (a._storage_page_size != b._storage_page_size) return false;
1587
2
    if (a._skip_write_index_on_load != b._skip_write_index_on_load) return false;
1588
2
    if (a._enable_variant_flatten_nested != b._enable_variant_flatten_nested) return false;
1589
2
    return true;
1590
2
}
1591
1592
2
bool operator!=(const TabletSchema& a, const TabletSchema& b) {
1593
2
    return !(a == b);
1594
2
}
1595
1596
} // namespace doris