Coverage Report

Created: 2025-07-28 16:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/olap/tablet_schema.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "olap/tablet_schema.h"
19
20
#include <gen_cpp/Descriptors_types.h>
21
#include <gen_cpp/olap_file.pb.h>
22
#include <glog/logging.h>
23
#include <google/protobuf/io/coded_stream.h>
24
#include <google/protobuf/io/zero_copy_stream.h>
25
#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
26
27
#include <algorithm>
28
#include <cctype>
29
// IWYU pragma: no_include <bits/std_abs.h>
30
#include <cmath> // IWYU pragma: keep
31
#include <memory>
32
#include <ostream>
33
#include <vector>
34
35
#include "common/compiler_util.h" // IWYU pragma: keep
36
#include "common/consts.h"
37
#include "common/status.h"
38
#include "exec/tablet_info.h"
39
#include "olap/inverted_index_parser.h"
40
#include "olap/olap_common.h"
41
#include "olap/olap_define.h"
42
#include "olap/tablet_column_object_pool.h"
43
#include "olap/types.h"
44
#include "olap/utils.h"
45
#include "tablet_meta.h"
46
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
47
#include "vec/aggregate_functions/aggregate_function_state_union.h"
48
#include "vec/columns/column_nothing.h"
49
#include "vec/common/hex.h"
50
#include "vec/common/string_ref.h"
51
#include "vec/core/block.h"
52
#include "vec/data_types/data_type.h"
53
#include "vec/data_types/data_type_factory.hpp"
54
#include "vec/json/path_in_data.h"
55
56
namespace doris {
57
58
0
FieldType TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) {
59
0
    switch (primitiveType) {
60
0
    case PrimitiveType::INVALID_TYPE:
61
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
62
0
    case PrimitiveType::TYPE_NULL:
63
0
        return FieldType::OLAP_FIELD_TYPE_NONE;
64
0
    case PrimitiveType::TYPE_BOOLEAN:
65
0
        return FieldType::OLAP_FIELD_TYPE_BOOL;
66
0
    case PrimitiveType::TYPE_TINYINT:
67
0
        return FieldType::OLAP_FIELD_TYPE_TINYINT;
68
0
    case PrimitiveType::TYPE_SMALLINT:
69
0
        return FieldType::OLAP_FIELD_TYPE_SMALLINT;
70
0
    case PrimitiveType::TYPE_INT:
71
0
        return FieldType::OLAP_FIELD_TYPE_INT;
72
0
    case PrimitiveType::TYPE_BIGINT:
73
0
        return FieldType::OLAP_FIELD_TYPE_BIGINT;
74
0
    case PrimitiveType::TYPE_LARGEINT:
75
0
        return FieldType::OLAP_FIELD_TYPE_LARGEINT;
76
0
    case PrimitiveType::TYPE_FLOAT:
77
0
        return FieldType::OLAP_FIELD_TYPE_FLOAT;
78
0
    case PrimitiveType::TYPE_DOUBLE:
79
0
        return FieldType::OLAP_FIELD_TYPE_DOUBLE;
80
0
    case PrimitiveType::TYPE_VARCHAR:
81
0
        return FieldType::OLAP_FIELD_TYPE_VARCHAR;
82
0
    case PrimitiveType::TYPE_DATE:
83
0
        return FieldType::OLAP_FIELD_TYPE_DATE;
84
0
    case PrimitiveType::TYPE_DATETIME:
85
0
        return FieldType::OLAP_FIELD_TYPE_DATETIME;
86
0
    case PrimitiveType::TYPE_BINARY:
87
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
88
0
    case PrimitiveType::TYPE_CHAR:
89
0
        return FieldType::OLAP_FIELD_TYPE_CHAR;
90
0
    case PrimitiveType::TYPE_STRUCT:
91
0
        return FieldType::OLAP_FIELD_TYPE_STRUCT;
92
0
    case PrimitiveType::TYPE_ARRAY:
93
0
        return FieldType::OLAP_FIELD_TYPE_ARRAY;
94
0
    case PrimitiveType::TYPE_MAP:
95
0
        return FieldType::OLAP_FIELD_TYPE_MAP;
96
0
    case PrimitiveType::TYPE_HLL:
97
0
        return FieldType::OLAP_FIELD_TYPE_HLL;
98
0
    case PrimitiveType::TYPE_DECIMALV2:
99
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
100
0
    case PrimitiveType::TYPE_BITMAP:
101
0
        return FieldType::OLAP_FIELD_TYPE_BITMAP;
102
0
    case PrimitiveType::TYPE_STRING:
103
0
        return FieldType::OLAP_FIELD_TYPE_STRING;
104
0
    case PrimitiveType::TYPE_QUANTILE_STATE:
105
0
        return FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
106
0
    case PrimitiveType::TYPE_DATEV2:
107
0
        return FieldType::OLAP_FIELD_TYPE_DATEV2;
108
0
    case PrimitiveType::TYPE_DATETIMEV2:
109
0
        return FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
110
0
    case PrimitiveType::TYPE_TIMEV2:
111
0
        return FieldType::OLAP_FIELD_TYPE_TIMEV2;
112
0
    case PrimitiveType::TYPE_DECIMAL32:
113
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL32;
114
0
    case PrimitiveType::TYPE_DECIMAL64:
115
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL64;
116
0
    case PrimitiveType::TYPE_DECIMAL128I:
117
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
118
0
    case PrimitiveType::TYPE_JSONB:
119
0
        return FieldType::OLAP_FIELD_TYPE_JSONB;
120
0
    case PrimitiveType::TYPE_VARIANT:
121
0
        return FieldType::OLAP_FIELD_TYPE_VARIANT;
122
0
    case PrimitiveType::TYPE_LAMBDA_FUNCTION:
123
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
124
0
    case PrimitiveType::TYPE_AGG_STATE:
125
0
        return FieldType::OLAP_FIELD_TYPE_AGG_STATE;
126
0
    default:
127
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
128
0
    }
129
0
}
130
131
12.2k
FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) {
132
12.2k
    std::string upper_type_str = type_str;
133
12.2k
    std::transform(type_str.begin(), type_str.end(), upper_type_str.begin(),
134
68.6k
                   [](auto c) { return std::toupper(c); });
135
12.2k
    FieldType type;
136
137
12.2k
    if (0 == upper_type_str.compare("TINYINT")) {
138
430
        type = FieldType::OLAP_FIELD_TYPE_TINYINT;
139
11.8k
    } else if (0 == upper_type_str.compare("SMALLINT")) {
140
876
        type = FieldType::OLAP_FIELD_TYPE_SMALLINT;
141
10.9k
    } else if (0 == upper_type_str.compare("INT")) {
142
2.52k
        type = FieldType::OLAP_FIELD_TYPE_INT;
143
8.41k
    } else if (0 == upper_type_str.compare("BIGINT")) {
144
182
        type = FieldType::OLAP_FIELD_TYPE_BIGINT;
145
8.23k
    } else if (0 == upper_type_str.compare("LARGEINT")) {
146
137
        type = FieldType::OLAP_FIELD_TYPE_LARGEINT;
147
8.09k
    } else if (0 == upper_type_str.compare("UNSIGNED_TINYINT")) {
148
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT;
149
8.09k
    } else if (0 == upper_type_str.compare("UNSIGNED_SMALLINT")) {
150
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT;
151
8.09k
    } else if (0 == upper_type_str.compare("UNSIGNED_INT")) {
152
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT;
153
8.09k
    } else if (0 == upper_type_str.compare("UNSIGNED_BIGINT")) {
154
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT;
155
8.09k
    } else if (0 == upper_type_str.compare("IPV4")) {
156
0
        type = FieldType::OLAP_FIELD_TYPE_IPV4;
157
8.09k
    } else if (0 == upper_type_str.compare("IPV6")) {
158
0
        type = FieldType::OLAP_FIELD_TYPE_IPV6;
159
8.09k
    } else if (0 == upper_type_str.compare("FLOAT")) {
160
0
        type = FieldType::OLAP_FIELD_TYPE_FLOAT;
161
8.09k
    } else if (0 == upper_type_str.compare("DISCRETE_DOUBLE")) {
162
0
        type = FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE;
163
8.09k
    } else if (0 == upper_type_str.compare("DOUBLE")) {
164
0
        type = FieldType::OLAP_FIELD_TYPE_DOUBLE;
165
8.09k
    } else if (0 == upper_type_str.compare("CHAR")) {
166
138
        type = FieldType::OLAP_FIELD_TYPE_CHAR;
167
7.95k
    } else if (0 == upper_type_str.compare("DATE")) {
168
140
        type = FieldType::OLAP_FIELD_TYPE_DATE;
169
7.81k
    } else if (0 == upper_type_str.compare("DATEV2")) {
170
133
        type = FieldType::OLAP_FIELD_TYPE_DATEV2;
171
7.68k
    } else if (0 == upper_type_str.compare("DATETIMEV2")) {
172
0
        type = FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
173
7.68k
    } else if (0 == upper_type_str.compare("DATETIME")) {
174
167
        type = FieldType::OLAP_FIELD_TYPE_DATETIME;
175
7.51k
    } else if (0 == upper_type_str.compare("DECIMAL32")) {
176
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL32;
177
7.51k
    } else if (0 == upper_type_str.compare("DECIMAL64")) {
178
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL64;
179
7.51k
    } else if (0 == upper_type_str.compare("DECIMAL128I")) {
180
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
181
7.51k
    } else if (0 == upper_type_str.compare("DECIMAL256")) {
182
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL256;
183
7.51k
    } else if (0 == upper_type_str.compare(0, 7, "DECIMAL")) {
184
141
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL;
185
7.37k
    } else if (0 == upper_type_str.compare(0, 7, "VARCHAR")) {
186
208
        type = FieldType::OLAP_FIELD_TYPE_VARCHAR;
187
7.16k
    } else if (0 == upper_type_str.compare("STRING")) {
188
7.15k
        type = FieldType::OLAP_FIELD_TYPE_STRING;
189
7.15k
    } else if (0 == upper_type_str.compare("JSONB")) {
190
0
        type = FieldType::OLAP_FIELD_TYPE_JSONB;
191
15
    } else if (0 == upper_type_str.compare("VARIANT")) {
192
6
        type = FieldType::OLAP_FIELD_TYPE_VARIANT;
193
9
    } else if (0 == upper_type_str.compare("BOOLEAN")) {
194
0
        type = FieldType::OLAP_FIELD_TYPE_BOOL;
195
9
    } else if (0 == upper_type_str.compare(0, 3, "HLL")) {
196
7
        type = FieldType::OLAP_FIELD_TYPE_HLL;
197
7
    } else if (0 == upper_type_str.compare("STRUCT")) {
198
0
        type = FieldType::OLAP_FIELD_TYPE_STRUCT;
199
2
    } else if (0 == upper_type_str.compare("LIST")) {
200
0
        type = FieldType::OLAP_FIELD_TYPE_ARRAY;
201
2
    } else if (0 == upper_type_str.compare("MAP")) {
202
0
        type = FieldType::OLAP_FIELD_TYPE_MAP;
203
2
    } else if (0 == upper_type_str.compare("OBJECT")) {
204
0
        type = FieldType::OLAP_FIELD_TYPE_BITMAP;
205
2
    } else if (0 == upper_type_str.compare("BITMAP")) {
206
0
        type = FieldType::OLAP_FIELD_TYPE_BITMAP;
207
2
    } else if (0 == upper_type_str.compare("ARRAY")) {
208
2
        type = FieldType::OLAP_FIELD_TYPE_ARRAY;
209
2
    } else if (0 == upper_type_str.compare("QUANTILE_STATE")) {
210
0
        type = FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
211
0
    } else if (0 == upper_type_str.compare("AGG_STATE")) {
212
0
        type = FieldType::OLAP_FIELD_TYPE_AGG_STATE;
213
0
    } else {
214
0
        LOG(WARNING) << "invalid type string. [type='" << type_str << "']";
215
0
        type = FieldType::OLAP_FIELD_TYPE_UNKNOWN;
216
0
    }
217
218
12.2k
    return type;
219
12.2k
}
220
221
3.99k
FieldAggregationMethod TabletColumn::get_aggregation_type_by_string(const std::string& str) {
222
3.99k
    std::string upper_str = str;
223
3.99k
    std::transform(str.begin(), str.end(), upper_str.begin(),
224
20.6k
                   [](auto c) { return std::toupper(c); });
225
3.99k
    FieldAggregationMethod aggregation_type;
226
227
3.99k
    if (0 == upper_str.compare("NONE")) {
228
1.68k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE;
229
2.30k
    } else if (0 == upper_str.compare("SUM")) {
230
567
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM;
231
1.74k
    } else if (0 == upper_str.compare("MIN")) {
232
4
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN;
233
1.73k
    } else if (0 == upper_str.compare("MAX")) {
234
4
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX;
235
1.73k
    } else if (0 == upper_str.compare("REPLACE")) {
236
1.72k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE;
237
1.72k
    } else if (0 == upper_str.compare("REPLACE_IF_NOT_NULL")) {
238
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL;
239
7
    } else if (0 == upper_str.compare("HLL_UNION")) {
240
7
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION;
241
7
    } else if (0 == upper_str.compare("BITMAP_UNION")) {
242
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION;
243
0
    } else if (0 == upper_str.compare("QUANTILE_UNION")) {
244
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION;
245
0
    } else if (!upper_str.empty()) {
246
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC;
247
0
    } else {
248
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN;
249
0
    }
250
251
3.99k
    return aggregation_type;
252
3.99k
}
253
254
31.1k
std::string TabletColumn::get_string_by_field_type(FieldType type) {
255
31.1k
    switch (type) {
256
1.64k
    case FieldType::OLAP_FIELD_TYPE_TINYINT:
257
1.64k
        return "TINYINT";
258
259
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT:
260
0
        return "UNSIGNED_TINYINT";
261
262
3.62k
    case FieldType::OLAP_FIELD_TYPE_SMALLINT:
263
3.62k
        return "SMALLINT";
264
265
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT:
266
0
        return "UNSIGNED_SMALLINT";
267
268
8.83k
    case FieldType::OLAP_FIELD_TYPE_INT:
269
8.83k
        return "INT";
270
271
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT:
272
0
        return "UNSIGNED_INT";
273
274
836
    case FieldType::OLAP_FIELD_TYPE_BIGINT:
275
836
        return "BIGINT";
276
277
717
    case FieldType::OLAP_FIELD_TYPE_LARGEINT:
278
717
        return "LARGEINT";
279
280
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT:
281
0
        return "UNSIGNED_BIGINT";
282
283
0
    case FieldType::OLAP_FIELD_TYPE_IPV4:
284
0
        return "IPV4";
285
286
0
    case FieldType::OLAP_FIELD_TYPE_IPV6:
287
0
        return "IPV6";
288
289
0
    case FieldType::OLAP_FIELD_TYPE_FLOAT:
290
0
        return "FLOAT";
291
292
0
    case FieldType::OLAP_FIELD_TYPE_DOUBLE:
293
0
        return "DOUBLE";
294
295
0
    case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE:
296
0
        return "DISCRETE_DOUBLE";
297
298
717
    case FieldType::OLAP_FIELD_TYPE_CHAR:
299
717
        return "CHAR";
300
301
721
    case FieldType::OLAP_FIELD_TYPE_DATE:
302
721
        return "DATE";
303
304
670
    case FieldType::OLAP_FIELD_TYPE_DATEV2:
305
670
        return "DATEV2";
306
307
912
    case FieldType::OLAP_FIELD_TYPE_DATETIME:
308
912
        return "DATETIME";
309
310
0
    case FieldType::OLAP_FIELD_TYPE_DATETIMEV2:
311
0
        return "DATETIMEV2";
312
313
717
    case FieldType::OLAP_FIELD_TYPE_DECIMAL:
314
717
        return "DECIMAL";
315
316
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
317
0
        return "DECIMAL32";
318
319
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
320
0
        return "DECIMAL64";
321
322
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
323
0
        return "DECIMAL128I";
324
325
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
326
0
        return "DECIMAL256";
327
328
973
    case FieldType::OLAP_FIELD_TYPE_VARCHAR:
329
973
        return "VARCHAR";
330
331
0
    case FieldType::OLAP_FIELD_TYPE_JSONB:
332
0
        return "JSONB";
333
334
0
    case FieldType::OLAP_FIELD_TYPE_VARIANT:
335
0
        return "VARIANT";
336
337
10.7k
    case FieldType::OLAP_FIELD_TYPE_STRING:
338
10.7k
        return "STRING";
339
340
0
    case FieldType::OLAP_FIELD_TYPE_BOOL:
341
0
        return "BOOLEAN";
342
343
6
    case FieldType::OLAP_FIELD_TYPE_HLL:
344
6
        return "HLL";
345
346
0
    case FieldType::OLAP_FIELD_TYPE_STRUCT:
347
0
        return "STRUCT";
348
349
4
    case FieldType::OLAP_FIELD_TYPE_ARRAY:
350
4
        return "ARRAY";
351
352
0
    case FieldType::OLAP_FIELD_TYPE_MAP:
353
0
        return "MAP";
354
355
0
    case FieldType::OLAP_FIELD_TYPE_BITMAP:
356
0
        return "OBJECT";
357
0
    case FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE:
358
0
        return "QUANTILE_STATE";
359
0
    case FieldType::OLAP_FIELD_TYPE_AGG_STATE:
360
0
        return "AGG_STATE";
361
0
    default:
362
0
        return "UNKNOWN";
363
31.1k
    }
364
31.1k
}
365
366
31
std::string TabletColumn::get_string_by_aggregation_type(FieldAggregationMethod type) {
367
31
    switch (type) {
368
7
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE:
369
7
        return "NONE";
370
371
9
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM:
372
9
        return "SUM";
373
374
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN:
375
0
        return "MIN";
376
377
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX:
378
0
        return "MAX";
379
380
15
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE:
381
15
        return "REPLACE";
382
383
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL:
384
0
        return "REPLACE_IF_NOT_NULL";
385
386
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION:
387
0
        return "HLL_UNION";
388
389
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION:
390
0
        return "BITMAP_UNION";
391
392
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION:
393
0
        return "QUANTILE_UNION";
394
395
0
    default:
396
0
        return "UNKNOWN";
397
31
    }
398
31
}
399
400
2.12k
uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint32_t string_length) {
401
2.12k
    switch (type) {
402
110
    case TPrimitiveType::TINYINT:
403
110
    case TPrimitiveType::BOOLEAN:
404
110
        return 1;
405
357
    case TPrimitiveType::SMALLINT:
406
357
        return 2;
407
841
    case TPrimitiveType::INT:
408
841
        return 4;
409
114
    case TPrimitiveType::BIGINT:
410
114
        return 8;
411
101
    case TPrimitiveType::LARGEINT:
412
101
        return 16;
413
0
    case TPrimitiveType::IPV4:
414
0
        return 4;
415
0
    case TPrimitiveType::IPV6:
416
0
        return 16;
417
101
    case TPrimitiveType::DATE:
418
101
        return 3;
419
92
    case TPrimitiveType::DATEV2:
420
92
        return 4;
421
109
    case TPrimitiveType::DATETIME:
422
109
        return 8;
423
0
    case TPrimitiveType::DATETIMEV2:
424
0
        return 8;
425
0
    case TPrimitiveType::FLOAT:
426
0
        return 4;
427
0
    case TPrimitiveType::DOUBLE:
428
0
        return 8;
429
0
    case TPrimitiveType::QUANTILE_STATE:
430
0
    case TPrimitiveType::BITMAP:
431
0
        return 16;
432
101
    case TPrimitiveType::CHAR:
433
101
        return string_length;
434
101
    case TPrimitiveType::VARCHAR:
435
101
    case TPrimitiveType::HLL:
436
101
    case TPrimitiveType::AGG_STATE:
437
101
        return string_length + sizeof(OLAP_VARCHAR_MAX_LENGTH);
438
0
    case TPrimitiveType::STRING:
439
0
    case TPrimitiveType::VARIANT:
440
0
        return string_length + sizeof(OLAP_STRING_MAX_LENGTH);
441
0
    case TPrimitiveType::JSONB:
442
0
        return string_length + sizeof(OLAP_JSONB_MAX_LENGTH);
443
0
    case TPrimitiveType::STRUCT:
444
        // Note that(xy): this is the length of struct type itself,
445
        // the length of its subtypes are not included.
446
0
        return OLAP_STRUCT_MAX_LENGTH;
447
0
    case TPrimitiveType::ARRAY:
448
0
        return OLAP_ARRAY_MAX_LENGTH;
449
0
    case TPrimitiveType::MAP:
450
0
        return OLAP_MAP_MAX_LENGTH;
451
0
    case TPrimitiveType::DECIMAL32:
452
0
        return 4;
453
0
    case TPrimitiveType::DECIMAL64:
454
0
        return 8;
455
0
    case TPrimitiveType::DECIMAL128I:
456
0
        return 16;
457
0
    case TPrimitiveType::DECIMAL256:
458
0
        return 32;
459
101
    case TPrimitiveType::DECIMALV2:
460
101
        return 12; // use 12 bytes in olap engine.
461
0
    default:
462
0
        LOG(WARNING) << "unknown field type. [type=" << type << "]";
463
0
        return 0;
464
2.12k
    }
465
2.12k
}
466
467
9
bool TabletColumn::has_char_type() const {
468
9
    switch (_type) {
469
3
    case FieldType::OLAP_FIELD_TYPE_CHAR: {
470
3
        return true;
471
0
    }
472
4
    case FieldType::OLAP_FIELD_TYPE_ARRAY:
473
4
    case FieldType::OLAP_FIELD_TYPE_MAP:
474
4
    case FieldType::OLAP_FIELD_TYPE_STRUCT: {
475
4
        return std::any_of(_sub_columns.begin(), _sub_columns.end(),
476
4
                           [&](const auto& sub) -> bool { return sub->has_char_type(); });
477
4
    }
478
2
    default:
479
2
        return false;
480
9
    }
481
9
}
482
483
12.5k
TabletColumn::TabletColumn() : _aggregation(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) {}
484
485
120
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType type) {
486
120
    _aggregation = agg;
487
120
    _type = type;
488
120
}
489
490
17
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable) {
491
17
    _aggregation = agg;
492
17
    _type = filed_type;
493
17
    _length = get_scalar_type_info(filed_type)->size();
494
17
    _is_nullable = is_nullable;
495
17
}
496
497
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable,
498
16
                           int32_t unique_id, size_t length) {
499
16
    _aggregation = agg;
500
16
    _type = filed_type;
501
16
    _is_nullable = is_nullable;
502
16
    _unique_id = unique_id;
503
16
    _length = length;
504
16
}
505
506
0
TabletColumn::TabletColumn(const ColumnPB& column) {
507
0
    init_from_pb(column);
508
0
}
509
510
0
TabletColumn::TabletColumn(const TColumn& column) {
511
0
    init_from_thrift(column);
512
0
}
513
514
0
void TabletColumn::init_from_thrift(const TColumn& tcolumn) {
515
0
    ColumnPB column_pb;
516
0
    TabletMeta::init_column_from_tcolumn(tcolumn.col_unique_id, tcolumn, &column_pb);
517
0
    init_from_pb(column_pb);
518
0
}
519
520
12.1k
void TabletColumn::init_from_pb(const ColumnPB& column) {
521
12.1k
    _unique_id = column.unique_id();
522
12.1k
    _col_name = column.name();
523
12.1k
    _col_name_lower_case = to_lower(_col_name);
524
12.1k
    _type = TabletColumn::get_field_type_by_string(column.type());
525
12.1k
    _is_key = column.is_key();
526
12.1k
    _is_nullable = column.is_nullable();
527
12.1k
    _is_auto_increment = column.is_auto_increment();
528
12.1k
    if (column.has_is_on_update_current_timestamp()) {
529
11.2k
        _is_on_update_current_timestamp = column.is_on_update_current_timestamp();
530
11.2k
    }
531
532
12.1k
    _has_default_value = column.has_default_value();
533
12.1k
    if (_has_default_value) {
534
29
        _default_value = column.default_value();
535
29
    }
536
537
12.1k
    if (column.has_precision()) {
538
3.94k
        _is_decimal = true;
539
3.94k
        _precision = column.precision();
540
8.21k
    } else {
541
8.21k
        _is_decimal = false;
542
8.21k
    }
543
12.1k
    if (column.has_frac()) {
544
3.94k
        _frac = column.frac();
545
3.94k
    }
546
12.1k
    _length = column.length();
547
12.1k
    _index_length = column.index_length();
548
12.1k
    if (column.has_is_bf_column()) {
549
408
        _is_bf_column = column.is_bf_column();
550
11.7k
    } else {
551
11.7k
        _is_bf_column = false;
552
11.7k
    }
553
12.1k
    if (column.has_has_bitmap_index()) {
554
2.12k
        _has_bitmap_index = column.has_bitmap_index();
555
10.0k
    } else {
556
10.0k
        _has_bitmap_index = false;
557
10.0k
    }
558
12.1k
    if (column.has_aggregation()) {
559
3.99k
        _aggregation = get_aggregation_type_by_string(column.aggregation());
560
3.99k
        _aggregation_name = column.aggregation();
561
3.99k
    }
562
563
12.1k
    if (_type == FieldType::OLAP_FIELD_TYPE_AGG_STATE) {
564
0
        _result_is_nullable = column.result_is_nullable();
565
0
        _be_exec_version = column.be_exec_version();
566
0
    }
567
568
12.1k
    if (column.has_visible()) {
569
9.13k
        _visible = column.visible();
570
9.13k
    }
571
12.1k
    if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
572
2
        CHECK(column.children_columns_size() == 1)
573
0
                << "ARRAY type should has 1 children types, but got "
574
0
                << column.children_columns_size();
575
2
    }
576
12.1k
    if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
577
0
        DCHECK(column.children_columns_size() == 2)
578
0
                << "MAP type should has 2 children types, but got "
579
0
                << column.children_columns_size();
580
0
        if (UNLIKELY(column.children_columns_size() != 2)) {
581
0
            LOG(WARNING) << "MAP type should has 2 children types, but got "
582
0
                         << column.children_columns_size();
583
0
        }
584
0
    }
585
12.1k
    for (int i = 0; i < column.children_columns_size(); i++) {
586
2
        TabletColumn child_column;
587
2
        child_column.init_from_pb(column.children_columns(i));
588
2
        add_sub_column(child_column);
589
2
    }
590
12.1k
    if (column.has_column_path_info()) {
591
0
        _column_path = std::make_shared<vectorized::PathInData>();
592
0
        _column_path->from_protobuf(column.column_path_info());
593
0
        _parent_col_unique_id = column.column_path_info().parrent_column_unique_id();
594
0
    }
595
12.1k
    if (is_variant_type() && !column.has_column_path_info()) {
596
        // set path info for variant root column, to prevent from missing
597
6
        _column_path = std::make_shared<vectorized::PathInData>(_col_name_lower_case);
598
6
    }
599
12.1k
    for (const auto& column_pb : column.sparse_columns()) {
600
0
        TabletColumn new_column;
601
0
        new_column.init_from_pb(column_pb);
602
0
        _sparse_cols.emplace_back(std::make_shared<TabletColumn>(std::move(new_column)));
603
0
        _num_sparse_columns++;
604
0
    }
605
12.1k
}
606
607
TabletColumn TabletColumn::create_materialized_variant_column(const std::string& root,
608
                                                              const std::vector<std::string>& paths,
609
0
                                                              int32_t parent_unique_id) {
610
0
    TabletColumn subcol;
611
0
    subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
612
0
    subcol.set_is_nullable(true);
613
0
    subcol.set_unique_id(-1);
614
0
    subcol.set_parent_unique_id(parent_unique_id);
615
0
    vectorized::PathInData path(root, paths);
616
0
    subcol.set_path_info(path);
617
0
    subcol.set_name(path.get_path());
618
0
    return subcol;
619
0
}
620
621
31.1k
void TabletColumn::to_schema_pb(ColumnPB* column) const {
622
31.1k
    column->set_unique_id(_unique_id);
623
31.1k
    column->set_name(_col_name);
624
31.1k
    column->set_type(get_string_by_field_type(_type));
625
31.1k
    column->set_is_key(_is_key);
626
31.1k
    column->set_is_nullable(_is_nullable);
627
31.1k
    column->set_is_auto_increment(_is_auto_increment);
628
31.1k
    column->set_is_on_update_current_timestamp(_is_on_update_current_timestamp);
629
31.1k
    if (_has_default_value) {
630
156
        column->set_default_value(_default_value);
631
156
    }
632
31.1k
    if (_is_decimal) {
633
16.7k
        column->set_precision(_precision);
634
16.7k
        column->set_frac(_frac);
635
16.7k
    }
636
31.1k
    column->set_length(_length);
637
31.1k
    column->set_index_length(_index_length);
638
31.1k
    if (_is_bf_column) {
639
8
        column->set_is_bf_column(_is_bf_column);
640
8
    }
641
31.1k
    if (!_aggregation_name.empty()) {
642
16.9k
        column->set_aggregation(_aggregation_name);
643
16.9k
    }
644
31.1k
    column->set_result_is_nullable(_result_is_nullable);
645
31.1k
    column->set_be_exec_version(_be_exec_version);
646
31.1k
    if (_has_bitmap_index) {
647
0
        column->set_has_bitmap_index(_has_bitmap_index);
648
0
    }
649
31.1k
    column->set_visible(_visible);
650
651
31.1k
    if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
652
4
        CHECK(_sub_columns.size() == 1)
653
0
                << "ARRAY type should has 1 children types, but got " << _sub_columns.size();
654
4
    }
655
31.1k
    if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
656
0
        DCHECK(_sub_columns.size() == 2)
657
0
                << "MAP type should has 2 children types, but got " << _sub_columns.size();
658
0
        if (UNLIKELY(_sub_columns.size() != 2)) {
659
0
            LOG(WARNING) << "MAP type should has 2 children types, but got " << _sub_columns.size();
660
0
        }
661
0
    }
662
663
31.1k
    for (size_t i = 0; i < _sub_columns.size(); i++) {
664
4
        ColumnPB* child = column->add_children_columns();
665
4
        _sub_columns[i]->to_schema_pb(child);
666
4
    }
667
668
    // set parts info
669
31.1k
    if (has_path_info()) {
670
        // CHECK_GT(_parent_col_unique_id, 0);
671
0
        _column_path->to_protobuf(column->mutable_column_path_info(), _parent_col_unique_id);
672
        // Update unstable information for variant columns. Some of the fields in the tablet schema
673
        // are irrelevant for variant sub-columns, but retaining them may lead to an excessive growth
674
        // in the number of tablet schema cache entries.
675
0
        if (_type == FieldType::OLAP_FIELD_TYPE_STRING) {
676
0
            column->set_length(INT_MAX);
677
0
        }
678
0
        column->set_index_length(0);
679
0
    }
680
31.1k
    for (auto& col : _sparse_cols) {
681
0
        ColumnPB* sparse_column = column->add_sparse_columns();
682
0
        col->to_schema_pb(sparse_column);
683
0
    }
684
31.1k
}
685
686
38
void TabletColumn::add_sub_column(TabletColumn& sub_column) {
687
38
    _sub_columns.push_back(std::make_shared<TabletColumn>(sub_column));
688
38
    sub_column._parent_col_unique_id = this->_unique_id;
689
38
    _sub_column_count += 1;
690
38
}
691
692
22.5k
bool TabletColumn::is_row_store_column() const {
693
22.5k
    return _col_name == BeConsts::ROW_STORE_COL;
694
22.5k
}
695
696
vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function_union(
697
0
        vectorized::DataTypePtr type, int current_be_exec_version) const {
698
0
    const auto* state_type = assert_cast<const vectorized::DataTypeAggState*>(type.get());
699
0
    BeExecVersionManager::check_function_compatibility(
700
0
            current_be_exec_version, _be_exec_version,
701
0
            state_type->get_nested_function()->get_name());
702
0
    return vectorized::AggregateStateUnion::create(state_type->get_nested_function(), {type}, type);
703
0
}
704
705
vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function(
706
24
        std::string suffix, int current_be_exec_version) const {
707
24
    vectorized::AggregateFunctionPtr function = nullptr;
708
709
24
    auto type = vectorized::DataTypeFactory::instance().create_data_type(*this);
710
24
    if (type && type->get_primitive_type() == PrimitiveType::TYPE_AGG_STATE) {
711
0
        function = get_aggregate_function_union(type, current_be_exec_version);
712
24
    } else {
713
24
        std::string origin_name = TabletColumn::get_string_by_aggregation_type(_aggregation);
714
24
        std::string agg_name = origin_name + suffix;
715
24
        std::transform(agg_name.begin(), agg_name.end(), agg_name.begin(),
716
258
                       [](unsigned char c) { return std::tolower(c); });
717
24
        function = vectorized::AggregateFunctionSimpleFactory::instance().get(
718
24
                agg_name, {type}, type->is_nullable(), BeExecVersionManager::get_newest_version());
719
24
        if (!function) {
720
0
            LOG(WARNING) << "get column aggregate function failed, aggregation_name=" << origin_name
721
0
                         << ", column_type=" << type->get_name();
722
0
        }
723
24
    }
724
24
    if (function) {
725
24
        function->set_version(_be_exec_version);
726
24
        return function;
727
24
    }
728
0
    return nullptr;
729
24
}
730
731
12
void TabletColumn::set_path_info(const vectorized::PathInData& path) {
732
12
    _column_path = std::make_shared<vectorized::PathInData>(path);
733
12
}
734
735
0
vectorized::DataTypePtr TabletColumn::get_vec_type() const {
736
0
    return vectorized::DataTypeFactory::instance().create_data_type(*this);
737
0
}
738
739
// escape '.' and '_'
740
43.0k
std::string escape_for_path_name(const std::string& s) {
741
43.0k
    std::string res;
742
43.0k
    const char* pos = s.data();
743
43.0k
    const char* end = pos + s.size();
744
43.3k
    while (pos != end) {
745
261
        unsigned char c = *pos;
746
261
        if (c == '.' || c == '_') {
747
25
            res += '%';
748
25
            res += vectorized::hex_digit_uppercase(c / 16);
749
25
            res += vectorized::hex_digit_uppercase(c % 16);
750
236
        } else {
751
236
            res += c;
752
236
        }
753
261
        ++pos;
754
261
    }
755
43.0k
    return res;
756
43.0k
}
757
758
21
void TabletIndex::set_escaped_escaped_index_suffix_path(const std::string& path_name) {
759
21
    std::string escaped_path = escape_for_path_name(path_name);
760
21
    _escaped_index_suffix_path = escaped_path;
761
21
}
762
763
void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
764
15
                                   const TabletSchema& tablet_schema) {
765
15
    _index_id = index.index_id;
766
15
    _index_name = index.index_name;
767
    // init col_unique_id in index at be side, since col_unique_id may be -1 at fe side
768
    // get column unique id by name
769
15
    std::vector<int32_t> col_unique_ids(index.columns.size());
770
30
    for (size_t i = 0; i < index.columns.size(); i++) {
771
15
        auto column_idx = tablet_schema.field_index(index.columns[i]);
772
15
        if (column_idx >= 0) {
773
11
            col_unique_ids[i] = tablet_schema.column(column_idx).unique_id();
774
11
        } else {
775
            // if column unique id not found by column name, find by column unique id
776
            // column unique id can not found means this column is a new column added by light schema change
777
4
            if (index.__isset.column_unique_ids && !index.column_unique_ids.empty() &&
778
4
                tablet_schema.has_column_unique_id(index.column_unique_ids[i])) {
779
1
                col_unique_ids[i] = index.column_unique_ids[i];
780
3
            } else {
781
3
                col_unique_ids[i] = -1;
782
3
            }
783
4
        }
784
15
    }
785
15
    _col_unique_ids = std::move(col_unique_ids);
786
787
15
    switch (index.index_type) {
788
0
    case TIndexType::BITMAP:
789
0
        _index_type = IndexType::BITMAP;
790
0
        break;
791
15
    case TIndexType::INVERTED:
792
15
        _index_type = IndexType::INVERTED;
793
15
        break;
794
0
    case TIndexType::BLOOMFILTER:
795
0
        _index_type = IndexType::BLOOMFILTER;
796
0
        break;
797
0
    case TIndexType::NGRAM_BF:
798
0
        _index_type = IndexType::NGRAM_BF;
799
0
        break;
800
15
    }
801
15
    if (index.__isset.properties) {
802
0
        for (auto kv : index.properties) {
803
0
            _properties[kv.first] = kv.second;
804
0
        }
805
0
    }
806
15
}
807
808
void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
809
0
                                   const std::vector<int32_t>& column_uids) {
810
0
    _index_id = index.index_id;
811
0
    _index_name = index.index_name;
812
0
    _col_unique_ids = column_uids;
813
814
0
    switch (index.index_type) {
815
0
    case TIndexType::BITMAP:
816
0
        _index_type = IndexType::BITMAP;
817
0
        break;
818
0
    case TIndexType::INVERTED:
819
0
        _index_type = IndexType::INVERTED;
820
0
        break;
821
0
    case TIndexType::BLOOMFILTER:
822
0
        _index_type = IndexType::BLOOMFILTER;
823
0
        break;
824
0
    case TIndexType::NGRAM_BF:
825
0
        _index_type = IndexType::NGRAM_BF;
826
0
        break;
827
0
    }
828
0
    if (index.__isset.properties) {
829
0
        for (auto kv : index.properties) {
830
0
            _properties[kv.first] = kv.second;
831
0
        }
832
0
    }
833
0
}
834
835
7.37k
void TabletIndex::init_from_pb(const TabletIndexPB& index) {
836
7.37k
    _index_id = index.index_id();
837
7.37k
    _index_name = index.index_name();
838
7.37k
    _col_unique_ids.clear();
839
7.37k
    for (auto col_unique_id : index.col_unique_id()) {
840
7.34k
        _col_unique_ids.push_back(col_unique_id);
841
7.34k
    }
842
7.37k
    _index_type = index.index_type();
843
41.0k
    for (const auto& kv : index.properties()) {
844
41.0k
        _properties[kv.first] = kv.second;
845
41.0k
    }
846
7.37k
    _escaped_index_suffix_path = index.index_suffix_name();
847
7.37k
}
848
849
11.0k
void TabletIndex::to_schema_pb(TabletIndexPB* index) const {
850
11.0k
    index->set_index_id(_index_id);
851
11.0k
    index->set_index_name(_index_name);
852
11.0k
    index->clear_col_unique_id();
853
11.0k
    for (auto col_unique_id : _col_unique_ids) {
854
11.0k
        index->add_col_unique_id(col_unique_id);
855
11.0k
    }
856
11.0k
    index->set_index_type(_index_type);
857
60.8k
    for (const auto& kv : _properties) {
858
60.8k
        DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", {
859
60.8k
            if (kv.first == INVERTED_INDEX_PARSER_LOWERCASE_KEY) {
860
60.8k
                continue;
861
60.8k
            }
862
60.8k
        })
863
60.8k
        (*index->mutable_properties())[kv.first] = kv.second;
864
60.8k
    }
865
11.0k
    index->set_index_suffix_name(_escaped_index_suffix_path);
866
867
11.0k
    DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { return; })
868
869
    // lowercase by default
870
11.0k
    if (!_properties.empty()) {
871
9.76k
        if (!_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) {
872
176
            (*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
873
176
                    INVERTED_INDEX_PARSER_TRUE;
874
176
        }
875
9.76k
    }
876
11.0k
}
877
878
2.70k
TabletSchema::TabletSchema() = default;
879
880
2.65k
TabletSchema::~TabletSchema() {}
881
882
1.69k
int64_t TabletSchema::get_metadata_size() const {
883
1.69k
    return sizeof(TabletSchema);
884
1.69k
}
885
886
340
void TabletSchema::append_column(TabletColumn column, ColumnType col_type) {
887
340
    if (column.is_key()) {
888
87
        _num_key_columns++;
889
87
    }
890
340
    if (column.is_nullable()) {
891
222
        _num_null_columns++;
892
222
    }
893
340
    if (column.is_variant_type()) {
894
3
        ++_num_variant_columns;
895
3
        if (!column.has_path_info()) {
896
0
            const std::string& col_name = column.name_lower_case();
897
0
            vectorized::PathInData path(col_name);
898
0
            column.set_path_info(path);
899
0
        }
900
3
    }
901
340
    if (UNLIKELY(column.name() == DELETE_SIGN)) {
902
11
        _delete_sign_idx = _num_columns;
903
329
    } else if (UNLIKELY(column.name() == SEQUENCE_COL)) {
904
4
        _sequence_col_idx = _num_columns;
905
325
    } else if (UNLIKELY(column.name() == VERSION_COL)) {
906
0
        _version_col_idx = _num_columns;
907
325
    } else if (UNLIKELY(column.name() == SKIP_BITMAP_COL)) {
908
0
        _skip_bitmap_col_idx = _num_columns;
909
325
    } else if (UNLIKELY(column.name().starts_with(BeConsts::VIRTUAL_COLUMN_PREFIX))) {
910
0
        _vir_col_idx_to_unique_id[_num_columns] = column.unique_id();
911
0
    }
912
340
    _field_uniqueid_to_index[column.unique_id()] = _num_columns;
913
340
    _cols.push_back(std::make_shared<TabletColumn>(std::move(column)));
914
    // The dropped column may have same name with exsiting column, so that
915
    // not add to name to index map, only for uid to index map
916
340
    if (col_type == ColumnType::VARIANT || _cols.back()->is_variant_type() ||
917
340
        _cols.back()->is_extracted_column()) {
918
15
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
919
15
        _field_path_to_index[_cols.back()->path_info_ptr().get()] = _num_columns;
920
325
    } else if (col_type == ColumnType::NORMAL) {
921
325
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
922
325
    }
923
340
    _num_columns++;
924
340
    _num_virtual_columns = _vir_col_idx_to_unique_id.size();
925
340
}
926
927
0
void TabletColumn::append_sparse_column(TabletColumn column) {
928
0
    _sparse_cols.push_back(std::make_shared<TabletColumn>(column));
929
0
    _num_sparse_columns++;
930
0
}
931
932
39
void TabletSchema::append_index(TabletIndex&& index) {
933
42
    for (int32_t id : index.col_unique_ids()) {
934
42
        _col_id_suffix_to_index.emplace(
935
42
                std::make_tuple(index.index_type(), id, index.get_index_suffix()), _indexes.size());
936
42
    }
937
39
    _indexes.push_back(std::make_shared<TabletIndex>(index));
938
39
}
939
940
void TabletSchema::update_index(const TabletColumn& col, const IndexType& index_type,
941
2
                                TabletIndex&& index) {
942
2
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
943
2
    const std::string& suffix_path = escape_for_path_name(col.suffix_path());
944
2
    IndexKey key(index_type, col_unique_id, suffix_path);
945
2
    auto iter = _col_id_suffix_to_index.find(key);
946
2
    if (iter != _col_id_suffix_to_index.end()) {
947
1
        _indexes[iter->second] = std::make_shared<TabletIndex>(std::move(index));
948
1
        return;
949
1
    }
950
2
    LOG(WARNING) << " failed to update_index: " << index_type << " " << col_unique_id << " "
951
1
                 << suffix_path;
952
1
}
953
954
0
void TabletSchema::replace_column(size_t pos, TabletColumn new_col) {
955
0
    CHECK_LT(pos, num_columns()) << " outof range";
956
0
    _cols[pos] = std::make_shared<TabletColumn>(std::move(new_col));
957
0
}
958
959
1
void TabletSchema::clear_index() {
960
1
    _indexes.clear();
961
1
    _col_id_suffix_to_index.clear();
962
1
}
963
964
4
void TabletSchema::remove_index(int64_t index_id) {
965
4
    std::vector<TabletIndexPtr> indexes;
966
4
    std::unordered_map<IndexKey, int32_t, IndexKeyHash> col_id_suffix_to_index;
967
5
    for (auto index : _indexes) {
968
5
        if (index->index_id() == index_id) {
969
4
            continue;
970
4
        }
971
1
        for (int32_t col_uid : index->col_unique_ids()) {
972
1
            col_id_suffix_to_index.emplace(
973
1
                    std::make_tuple(index->index_type(), col_uid, index->get_index_suffix()),
974
1
                    indexes.size());
975
1
        }
976
1
        indexes.emplace_back(std::move(index));
977
1
    }
978
4
    _indexes = std::move(indexes);
979
4
    _col_id_suffix_to_index = std::move(col_id_suffix_to_index);
980
4
}
981
982
0
void TabletSchema::clear_columns() {
983
0
    _field_path_to_index.clear();
984
0
    _field_name_to_index.clear();
985
0
    _field_uniqueid_to_index.clear();
986
0
    _num_columns = 0;
987
0
    _num_variant_columns = 0;
988
0
    _num_null_columns = 0;
989
0
    _num_key_columns = 0;
990
0
    _cols.clear();
991
0
}
992
993
void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns,
994
1.64k
                                bool reuse_cache_column) {
995
1.64k
    _keys_type = schema.keys_type();
996
1.64k
    _num_columns = 0;
997
1.64k
    _num_variant_columns = 0;
998
1.64k
    _num_key_columns = 0;
999
1.64k
    _num_null_columns = 0;
1000
1.64k
    _cols.clear();
1001
1.64k
    _indexes.clear();
1002
1.64k
    _col_id_suffix_to_index.clear();
1003
1.64k
    _field_name_to_index.clear();
1004
1.64k
    _field_uniqueid_to_index.clear();
1005
1.64k
    _cluster_key_uids.clear();
1006
1.64k
    for (const auto& i : schema.cluster_key_uids()) {
1007
6
        _cluster_key_uids.push_back(i);
1008
6
    }
1009
12.3k
    for (auto& column_pb : schema.column()) {
1010
12.3k
        TabletColumnPtr column;
1011
12.3k
        if (reuse_cache_column) {
1012
258
            auto pair = TabletColumnObjectPool::instance()->insert(
1013
258
                    deterministic_string_serialize(column_pb));
1014
258
            column = pair.second;
1015
            // Release the handle quickly, because we use shared ptr to manage column.
1016
            // It often core during tablet schema copy to another schema because handle's
1017
            // reference count should be managed mannually.
1018
258
            TabletColumnObjectPool::instance()->release(pair.first);
1019
12.0k
        } else {
1020
12.0k
            column = std::make_shared<TabletColumn>();
1021
12.0k
            column->init_from_pb(column_pb);
1022
12.0k
        }
1023
12.3k
        if (ignore_extracted_columns && column->is_extracted_column()) {
1024
0
            continue;
1025
0
        }
1026
12.3k
        if (column->is_key()) {
1027
2.30k
            _num_key_columns++;
1028
2.30k
        }
1029
12.3k
        if (column->is_nullable()) {
1030
7.41k
            _num_null_columns++;
1031
7.41k
        }
1032
12.3k
        if (column->is_variant_type()) {
1033
6
            ++_num_variant_columns;
1034
6
        }
1035
1036
12.3k
        _cols.emplace_back(std::move(column));
1037
12.3k
        if (!_cols.back()->is_extracted_column()) {
1038
12.3k
            _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1039
12.3k
            _field_uniqueid_to_index[_cols.back()->unique_id()] = _num_columns;
1040
12.3k
        }
1041
12.3k
        _num_columns++;
1042
12.3k
    }
1043
7.33k
    for (const auto& index_pb : schema.index()) {
1044
7.33k
        TabletIndexPtr index;
1045
7.33k
        if (reuse_cache_column) {
1046
112
            auto pair = TabletColumnObjectPool::instance()->insert_index(
1047
112
                    deterministic_string_serialize(index_pb));
1048
112
            index = pair.second;
1049
            //  Only need the value to be cached by the pool, release it quickly because the handle need
1050
            // record reference count mannually, or it will core during tablet schema copy method.
1051
112
            TabletColumnObjectPool::instance()->release(pair.first);
1052
7.22k
        } else {
1053
7.22k
            index = std::make_shared<TabletIndex>();
1054
7.22k
            index->init_from_pb(index_pb);
1055
7.22k
        }
1056
7.33k
        for (int32_t col_uid : index->col_unique_ids()) {
1057
7.33k
            _col_id_suffix_to_index.emplace(
1058
7.33k
                    std::make_tuple(index->index_type(), col_uid, index->get_index_suffix()),
1059
7.33k
                    _indexes.size());
1060
7.33k
        }
1061
7.33k
        _indexes.emplace_back(std::move(index));
1062
7.33k
    }
1063
1.64k
    _num_short_key_columns = schema.num_short_key_columns();
1064
1.64k
    _num_rows_per_row_block = schema.num_rows_per_row_block();
1065
1.64k
    _compress_kind = schema.compress_kind();
1066
1.64k
    _next_column_unique_id = schema.next_column_unique_id();
1067
1.64k
    if (schema.has_bf_fpp()) {
1068
3
        _has_bf_fpp = true;
1069
3
        _bf_fpp = schema.bf_fpp();
1070
1.63k
    } else {
1071
1.63k
        _has_bf_fpp = false;
1072
1.63k
        _bf_fpp = BLOOM_FILTER_DEFAULT_FPP;
1073
1.63k
    }
1074
1.64k
    _is_in_memory = schema.is_in_memory();
1075
1.64k
    _disable_auto_compaction = schema.disable_auto_compaction();
1076
1.64k
    _enable_single_replica_compaction = schema.enable_single_replica_compaction();
1077
1.64k
    _store_row_column = schema.store_row_column();
1078
1.64k
    _skip_write_index_on_load = schema.skip_write_index_on_load();
1079
1.64k
    _delete_sign_idx = schema.delete_sign_idx();
1080
1.64k
    _sequence_col_idx = schema.sequence_col_idx();
1081
1.64k
    _version_col_idx = schema.version_col_idx();
1082
1.64k
    _skip_bitmap_col_idx = schema.skip_bitmap_col_idx();
1083
1.64k
    _sort_type = schema.sort_type();
1084
1.64k
    _sort_col_num = schema.sort_col_num();
1085
1.64k
    _compression_type = schema.compression_type();
1086
1.64k
    _row_store_page_size = schema.row_store_page_size();
1087
1.64k
    _storage_page_size = schema.storage_page_size();
1088
1.64k
    _storage_dict_page_size = schema.storage_dict_page_size();
1089
1.64k
    _schema_version = schema.schema_version();
1090
    // Default to V1 inverted index storage format for backward compatibility if not specified in schema.
1091
1.64k
    if (!schema.has_inverted_index_storage_format()) {
1092
223
        _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1;
1093
1.41k
    } else {
1094
1.41k
        _inverted_index_storage_format = schema.inverted_index_storage_format();
1095
1.41k
    }
1096
1097
1.64k
    _row_store_column_unique_ids.assign(schema.row_store_column_unique_ids().begin(),
1098
1.64k
                                        schema.row_store_column_unique_ids().end());
1099
1.64k
    _enable_variant_flatten_nested = schema.enable_variant_flatten_nested();
1100
1.64k
    update_metadata_size();
1101
1.64k
}
1102
1103
377
void TabletSchema::copy_from(const TabletSchema& tablet_schema) {
1104
377
    TabletSchemaPB tablet_schema_pb;
1105
377
    tablet_schema.to_schema_pb(&tablet_schema_pb);
1106
377
    init_from_pb(tablet_schema_pb);
1107
377
    _table_id = tablet_schema.table_id();
1108
377
}
1109
1110
3
void TabletSchema::shawdow_copy_without_columns(const TabletSchema& tablet_schema) {
1111
3
    *this = tablet_schema;
1112
3
    _field_path_to_index.clear();
1113
3
    _field_name_to_index.clear();
1114
3
    _field_uniqueid_to_index.clear();
1115
3
    _num_columns = 0;
1116
3
    _num_variant_columns = 0;
1117
3
    _num_null_columns = 0;
1118
3
    _num_key_columns = 0;
1119
3
    _cols.clear();
1120
3
}
1121
1122
0
void TabletSchema::update_index_info_from(const TabletSchema& tablet_schema) {
1123
0
    for (auto& col : _cols) {
1124
0
        if (col->unique_id() < 0) {
1125
0
            continue;
1126
0
        }
1127
0
        const auto iter = tablet_schema._field_uniqueid_to_index.find(col->unique_id());
1128
0
        if (iter == tablet_schema._field_uniqueid_to_index.end()) {
1129
0
            continue;
1130
0
        }
1131
0
        auto col_idx = iter->second;
1132
0
        if (col_idx < 0 || col_idx >= tablet_schema._cols.size()) {
1133
0
            continue;
1134
0
        }
1135
0
        col->set_is_bf_column(tablet_schema._cols[col_idx]->is_bf_column());
1136
0
        col->set_has_bitmap_index(tablet_schema._cols[col_idx]->has_bitmap_index());
1137
0
    }
1138
0
}
1139
1140
3.09k
std::string TabletSchema::to_key() const {
1141
3.09k
    TabletSchemaPB pb;
1142
3.09k
    to_schema_pb(&pb);
1143
3.09k
    return TabletSchema::deterministic_string_serialize(pb);
1144
3.09k
}
1145
1146
void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version,
1147
                                               const OlapTableIndexSchema* index,
1148
0
                                               const TabletSchema& ori_tablet_schema) {
1149
    // copy from ori_tablet_schema
1150
0
    _keys_type = ori_tablet_schema.keys_type();
1151
0
    _num_short_key_columns = ori_tablet_schema.num_short_key_columns();
1152
0
    _num_rows_per_row_block = ori_tablet_schema.num_rows_per_row_block();
1153
0
    _compress_kind = ori_tablet_schema.compress_kind();
1154
1155
    // todo(yixiu): unique_id
1156
0
    _next_column_unique_id = ori_tablet_schema.next_column_unique_id();
1157
0
    _is_in_memory = ori_tablet_schema.is_in_memory();
1158
0
    _disable_auto_compaction = ori_tablet_schema.disable_auto_compaction();
1159
0
    _enable_single_replica_compaction = ori_tablet_schema.enable_single_replica_compaction();
1160
0
    _skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load();
1161
0
    _sort_type = ori_tablet_schema.sort_type();
1162
0
    _sort_col_num = ori_tablet_schema.sort_col_num();
1163
0
    _row_store_page_size = ori_tablet_schema.row_store_page_size();
1164
0
    _storage_page_size = ori_tablet_schema.storage_page_size();
1165
0
    _storage_dict_page_size = ori_tablet_schema.storage_dict_page_size();
1166
0
    _enable_variant_flatten_nested = ori_tablet_schema.variant_flatten_nested();
1167
1168
    // copy from table_schema_param
1169
0
    _schema_version = version;
1170
0
    _num_columns = 0;
1171
0
    _num_variant_columns = 0;
1172
0
    _num_key_columns = 0;
1173
0
    _num_null_columns = 0;
1174
0
    bool has_bf_columns = false;
1175
0
    _cols.clear();
1176
0
    _indexes.clear();
1177
0
    _col_id_suffix_to_index.clear();
1178
0
    _field_name_to_index.clear();
1179
0
    _field_uniqueid_to_index.clear();
1180
0
    _delete_sign_idx = -1;
1181
0
    _sequence_col_idx = -1;
1182
0
    _version_col_idx = -1;
1183
0
    _skip_bitmap_col_idx = -1;
1184
0
    _cluster_key_uids.clear();
1185
0
    for (const auto& i : ori_tablet_schema._cluster_key_uids) {
1186
0
        _cluster_key_uids.push_back(i);
1187
0
    }
1188
0
    for (auto& column : index->columns) {
1189
0
        if (column->is_key()) {
1190
0
            _num_key_columns++;
1191
0
        }
1192
0
        if (column->is_nullable()) {
1193
0
            _num_null_columns++;
1194
0
        }
1195
0
        if (column->is_bf_column()) {
1196
0
            has_bf_columns = true;
1197
0
        }
1198
0
        if (column->is_variant_type()) {
1199
0
            ++_num_variant_columns;
1200
0
        }
1201
0
        if (UNLIKELY(column->name() == DELETE_SIGN)) {
1202
0
            _delete_sign_idx = _num_columns;
1203
0
        } else if (UNLIKELY(column->name() == SEQUENCE_COL)) {
1204
0
            _sequence_col_idx = _num_columns;
1205
0
        } else if (UNLIKELY(column->name() == VERSION_COL)) {
1206
0
            _version_col_idx = _num_columns;
1207
0
        } else if (UNLIKELY(column->name() == SKIP_BITMAP_COL)) {
1208
0
            _skip_bitmap_col_idx = _num_columns;
1209
0
        }
1210
0
        _cols.emplace_back(std::make_shared<TabletColumn>(*column));
1211
0
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1212
0
        _field_uniqueid_to_index[_cols.back()->unique_id()] = _num_columns;
1213
0
        _num_columns++;
1214
0
    }
1215
1216
0
    for (const auto& i : index->indexes) {
1217
0
        for (int32_t col_uid : i->col_unique_ids()) {
1218
0
            _col_id_suffix_to_index.emplace(
1219
0
                    std::make_tuple(i->index_type(), col_uid, i->get_index_suffix()),
1220
0
                    _indexes.size());
1221
0
        }
1222
0
        _indexes.emplace_back(std::make_shared<TabletIndex>(*i));
1223
0
    }
1224
1225
0
    if (has_bf_columns) {
1226
0
        _has_bf_fpp = true;
1227
0
        _bf_fpp = ori_tablet_schema.bloom_filter_fpp();
1228
0
    } else {
1229
0
        _has_bf_fpp = false;
1230
0
        _bf_fpp = BLOOM_FILTER_DEFAULT_FPP;
1231
0
    }
1232
0
}
1233
1234
149
void TabletSchema::merge_dropped_columns(const TabletSchema& src_schema) {
1235
    // If they are the same tablet schema object, then just return
1236
149
    if (this == &src_schema) {
1237
0
        return;
1238
0
    }
1239
2.90k
    for (const auto& src_col : src_schema.columns()) {
1240
2.90k
        if (_field_uniqueid_to_index.find(src_col->unique_id()) == _field_uniqueid_to_index.end()) {
1241
0
            CHECK(!src_col->is_key())
1242
0
                    << src_col->name() << " is key column, should not be dropped.";
1243
0
            ColumnPB src_col_pb;
1244
            // There are some pointer in tablet column, not sure the reference relation, so
1245
            // that deep copy it.
1246
0
            src_col->to_schema_pb(&src_col_pb);
1247
0
            TabletColumn new_col(src_col_pb);
1248
0
            append_column(new_col, TabletSchema::ColumnType::DROPPED);
1249
0
        }
1250
2.90k
    }
1251
149
}
1252
1253
0
TabletSchemaSPtr TabletSchema::copy_without_variant_extracted_columns() {
1254
0
    TabletSchemaSPtr copy = std::make_shared<TabletSchema>();
1255
0
    TabletSchemaPB tablet_schema_pb;
1256
0
    this->to_schema_pb(&tablet_schema_pb);
1257
0
    copy->init_from_pb(tablet_schema_pb, true /*ignore extracted_columns*/);
1258
0
    return copy;
1259
0
}
1260
1261
// Dropped column is in _field_uniqueid_to_index but not in _field_name_to_index
1262
// Could refer to append_column method
1263
6.18k
bool TabletSchema::is_dropped_column(const TabletColumn& col) const {
1264
6.18k
    CHECK(_field_uniqueid_to_index.find(col.unique_id()) != _field_uniqueid_to_index.end())
1265
0
            << "could not find col with unique id = " << col.unique_id()
1266
0
            << " and name = " << col.name() << " table_id=" << _table_id;
1267
6.18k
    auto it = _field_name_to_index.find(StringRef {col.name()});
1268
6.18k
    return it == _field_name_to_index.end() || _cols[it->second]->unique_id() != col.unique_id();
1269
6.18k
}
1270
1271
0
void TabletSchema::copy_extracted_columns(const TabletSchema& src_schema) {
1272
0
    std::unordered_set<int32_t> variant_columns;
1273
0
    for (const auto& col : columns()) {
1274
0
        if (col->is_variant_type()) {
1275
0
            variant_columns.insert(col->unique_id());
1276
0
        }
1277
0
    }
1278
0
    for (const TabletColumnPtr& col : src_schema.columns()) {
1279
0
        if (col->is_extracted_column() && variant_columns.contains(col->parent_unique_id())) {
1280
0
            ColumnPB col_pb;
1281
0
            col->to_schema_pb(&col_pb);
1282
0
            TabletColumn new_col(col_pb);
1283
0
            append_column(new_col, ColumnType::VARIANT);
1284
0
        }
1285
0
    }
1286
0
}
1287
1288
0
void TabletSchema::reserve_extracted_columns() {
1289
0
    for (auto it = _cols.begin(); it != _cols.end();) {
1290
0
        if (!(*it)->is_extracted_column()) {
1291
0
            it = _cols.erase(it);
1292
0
        } else {
1293
0
            ++it;
1294
0
        }
1295
0
    }
1296
0
}
1297
1298
5.29k
void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const {
1299
5.29k
    for (const auto& i : _cluster_key_uids) {
1300
30
        tablet_schema_pb->add_cluster_key_uids(i);
1301
30
    }
1302
5.29k
    tablet_schema_pb->set_keys_type(_keys_type);
1303
31.1k
    for (const auto& col : _cols) {
1304
31.1k
        ColumnPB* column = tablet_schema_pb->add_column();
1305
31.1k
        col->to_schema_pb(column);
1306
31.1k
    }
1307
11.0k
    for (const auto& index : _indexes) {
1308
11.0k
        auto* index_pb = tablet_schema_pb->add_index();
1309
11.0k
        index->to_schema_pb(index_pb);
1310
11.0k
    }
1311
5.29k
    tablet_schema_pb->set_num_short_key_columns(_num_short_key_columns);
1312
5.29k
    tablet_schema_pb->set_num_rows_per_row_block(_num_rows_per_row_block);
1313
5.29k
    tablet_schema_pb->set_compress_kind(_compress_kind);
1314
5.29k
    if (_has_bf_fpp) {
1315
4
        tablet_schema_pb->set_bf_fpp(_bf_fpp);
1316
4
    }
1317
5.29k
    tablet_schema_pb->set_next_column_unique_id(_next_column_unique_id);
1318
5.29k
    tablet_schema_pb->set_is_in_memory(_is_in_memory);
1319
5.29k
    tablet_schema_pb->set_disable_auto_compaction(_disable_auto_compaction);
1320
5.29k
    tablet_schema_pb->set_enable_single_replica_compaction(_enable_single_replica_compaction);
1321
5.29k
    tablet_schema_pb->set_store_row_column(_store_row_column);
1322
5.29k
    tablet_schema_pb->set_skip_write_index_on_load(_skip_write_index_on_load);
1323
5.29k
    tablet_schema_pb->set_delete_sign_idx(_delete_sign_idx);
1324
5.29k
    tablet_schema_pb->set_sequence_col_idx(_sequence_col_idx);
1325
5.29k
    tablet_schema_pb->set_sort_type(_sort_type);
1326
5.29k
    tablet_schema_pb->set_sort_col_num(_sort_col_num);
1327
5.29k
    tablet_schema_pb->set_schema_version(_schema_version);
1328
5.29k
    tablet_schema_pb->set_compression_type(_compression_type);
1329
5.29k
    tablet_schema_pb->set_row_store_page_size(_row_store_page_size);
1330
5.29k
    tablet_schema_pb->set_storage_page_size(_storage_page_size);
1331
5.29k
    tablet_schema_pb->set_storage_dict_page_size(_storage_dict_page_size);
1332
5.29k
    tablet_schema_pb->set_version_col_idx(_version_col_idx);
1333
5.29k
    tablet_schema_pb->set_skip_bitmap_col_idx(_skip_bitmap_col_idx);
1334
5.29k
    tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format);
1335
5.29k
    tablet_schema_pb->mutable_row_store_column_unique_ids()->Assign(
1336
5.29k
            _row_store_column_unique_ids.begin(), _row_store_column_unique_ids.end());
1337
5.29k
    tablet_schema_pb->set_enable_variant_flatten_nested(_enable_variant_flatten_nested);
1338
5.29k
}
1339
1340
0
size_t TabletSchema::row_size() const {
1341
0
    size_t size = 0;
1342
0
    for (const auto& column : _cols) {
1343
0
        size += column->length();
1344
0
    }
1345
0
    size += (_num_columns + 7) / 8;
1346
1347
0
    return size;
1348
0
}
1349
1350
1.21k
int32_t TabletSchema::field_index(const std::string& field_name) const {
1351
1.21k
    const auto& found = _field_name_to_index.find(StringRef(field_name));
1352
1.21k
    return (found == _field_name_to_index.end()) ? -1 : found->second;
1353
1.21k
}
1354
1355
0
int32_t TabletSchema::field_index(const vectorized::PathInData& path) const {
1356
0
    const auto& found = _field_path_to_index.find(vectorized::PathInDataRef(&path));
1357
0
    return (found == _field_path_to_index.end()) ? -1 : found->second;
1358
0
}
1359
1360
191
int32_t TabletSchema::field_index(int32_t col_unique_id) const {
1361
191
    const auto& found = _field_uniqueid_to_index.find(col_unique_id);
1362
191
    return (found == _field_uniqueid_to_index.end()) ? -1 : found->second;
1363
191
}
1364
1365
26.5k
const std::vector<TabletColumnPtr>& TabletSchema::columns() const {
1366
26.5k
    return _cols;
1367
26.5k
}
1368
1369
8
const std::vector<TabletColumnPtr>& TabletColumn::sparse_columns() const {
1370
8
    return _sparse_cols;
1371
8
}
1372
1373
147k
const TabletColumn& TabletSchema::column(size_t ordinal) const {
1374
147k
    DCHECK(ordinal < _num_columns) << "ordinal:" << ordinal << ", _num_columns:" << _num_columns;
1375
147k
    return *_cols[ordinal];
1376
147k
}
1377
1378
0
const TabletColumn& TabletColumn::sparse_column_at(size_t ordinal) const {
1379
0
    DCHECK(ordinal < _sparse_cols.size())
1380
0
            << "ordinal:" << ordinal << ", _num_columns:" << _sparse_cols.size();
1381
0
    return *_sparse_cols[ordinal];
1382
0
}
1383
1384
960
const TabletColumn& TabletSchema::column_by_uid(int32_t col_unique_id) const {
1385
960
    return *_cols.at(_field_uniqueid_to_index.at(col_unique_id));
1386
960
}
1387
1388
0
TabletColumn& TabletSchema::mutable_column_by_uid(int32_t col_unique_id) {
1389
0
    return *_cols.at(_field_uniqueid_to_index.at(col_unique_id));
1390
0
}
1391
1392
14
TabletColumn& TabletSchema::mutable_column(size_t ordinal) {
1393
14
    return *_cols.at(ordinal);
1394
14
}
1395
1396
0
void TabletSchema::update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>& tindexes) {
1397
0
    std::vector<TabletIndexPtr> indexes;
1398
0
    for (const auto& tindex : tindexes) {
1399
0
        TabletIndex index;
1400
0
        index.init_from_thrift(tindex, *this);
1401
0
        indexes.emplace_back(std::make_shared<TabletIndex>(std::move(index)));
1402
0
    }
1403
0
    _indexes = std::move(indexes);
1404
0
    std::unordered_map<IndexKey, int32_t, IndexKeyHash> col_id_suffix_to_index;
1405
0
    for (size_t i = 0; i < _indexes.size(); i++) {
1406
0
        for (int32_t col_uid : _indexes[i]->col_unique_ids()) {
1407
0
            col_id_suffix_to_index.emplace(std::make_tuple(_indexes[i]->index_type(), col_uid,
1408
0
                                                           _indexes[i]->get_index_suffix()),
1409
0
                                           i);
1410
0
        }
1411
0
    }
1412
0
    _col_id_suffix_to_index = std::move(col_id_suffix_to_index);
1413
0
}
1414
1415
0
bool TabletSchema::exist_column(const std::string& field_name) const {
1416
0
    return _field_name_to_index.contains(StringRef {field_name});
1417
0
}
1418
1419
423
bool TabletSchema::has_column_unique_id(int32_t col_unique_id) const {
1420
423
    return _field_uniqueid_to_index.contains(col_unique_id);
1421
423
}
1422
1423
4.04k
Status TabletSchema::have_column(const std::string& field_name) const {
1424
4.04k
    if (!_field_name_to_index.contains(StringRef(field_name))) {
1425
4.04k
        return Status::Error<ErrorCode::INTERNAL_ERROR>(
1426
4.04k
                "Not found field_name, field_name:{}, schema:{}", field_name,
1427
4.04k
                get_all_field_names());
1428
4.04k
    }
1429
0
    return Status::OK();
1430
4.04k
}
1431
1432
172
Result<const TabletColumn*> TabletSchema::column(const std::string& field_name) const {
1433
172
    auto it = _field_name_to_index.find(StringRef {field_name});
1434
172
    if (it == _field_name_to_index.end()) {
1435
0
        DCHECK(false) << "field_name=" << field_name << ", table_id=" << _table_id
1436
0
                      << ", field_name_to_index=" << get_all_field_names();
1437
0
        return ResultError(
1438
0
                Status::InternalError("column not found, name={}, table_id={}, schema_version={}",
1439
0
                                      field_name, _table_id, _schema_version));
1440
0
    }
1441
172
    return _cols[it->second].get();
1442
172
}
1443
1444
void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema,
1445
0
                                         const std::vector<TColumn>& t_columns) {
1446
0
    copy_from(tablet_schema);
1447
0
    if (!t_columns.empty() && t_columns[0].col_unique_id >= 0) {
1448
0
        clear_columns();
1449
0
        for (const auto& column : t_columns) {
1450
0
            append_column(TabletColumn(column));
1451
0
        }
1452
0
    }
1453
0
}
1454
1455
55
bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const {
1456
72
    for (size_t i = 0; i < _indexes.size(); i++) {
1457
37
        if (_indexes[i]->index_type() == IndexType::INVERTED &&
1458
37
            _indexes[i]->index_id() == index_id) {
1459
20
            return true;
1460
20
        }
1461
37
    }
1462
35
    return false;
1463
55
}
1464
1465
const TabletIndex* TabletSchema::inverted_index(int32_t col_unique_id,
1466
29.0k
                                                const std::string& suffix_path) const {
1467
29.0k
    const std::string escaped_suffix = escape_for_path_name(suffix_path);
1468
29.0k
    auto it = _col_id_suffix_to_index.find(
1469
29.0k
            std::make_tuple(IndexType::INVERTED, col_unique_id, escaped_suffix));
1470
29.0k
    if (it != _col_id_suffix_to_index.end()) {
1471
8.22k
        return _indexes[it->second].get();
1472
8.22k
    }
1473
20.8k
    return nullptr;
1474
29.0k
}
1475
1476
14.0k
const TabletIndex* TabletSchema::inverted_index(const TabletColumn& col) const {
1477
    // Some columns(Float, Double, JSONB ...) from the variant do not support inverted index
1478
14.0k
    if (!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) {
1479
2
        return nullptr;
1480
2
    }
1481
    // TODO use more efficient impl
1482
    // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants
1483
14.0k
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
1484
14.0k
    return inverted_index(col_unique_id, escape_for_path_name(col.suffix_path()));
1485
14.0k
}
1486
1487
0
bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const {
1488
0
    IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, "");
1489
0
    auto it = _col_id_suffix_to_index.find(index_key);
1490
0
    return it != _col_id_suffix_to_index.end();
1491
0
}
1492
1493
13.2k
const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id) const {
1494
    // Get the ngram bf index for the given column unique id
1495
13.2k
    IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, "");
1496
13.2k
    auto it = _col_id_suffix_to_index.find(index_key);
1497
13.2k
    if (it != _col_id_suffix_to_index.end()) {
1498
1
        return _indexes[it->second].get();
1499
1
    }
1500
13.2k
    return nullptr;
1501
13.2k
}
1502
1503
vectorized::Block TabletSchema::create_block(
1504
        const std::vector<uint32_t>& return_columns,
1505
719
        const std::unordered_set<uint32_t>* tablet_columns_need_convert_null) const {
1506
719
    vectorized::Block block;
1507
2.84k
    for (int i = 0; i < return_columns.size(); ++i) {
1508
2.12k
        const ColumnId cid = return_columns[i];
1509
2.12k
        const auto& col = *_cols[cid];
1510
2.12k
        bool is_nullable = (tablet_columns_need_convert_null != nullptr &&
1511
2.12k
                            tablet_columns_need_convert_null->find(cid) !=
1512
0
                                    tablet_columns_need_convert_null->end());
1513
2.12k
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col, is_nullable);
1514
2.12k
        if (_vir_col_idx_to_unique_id.contains(cid)) {
1515
0
            block.insert({vectorized::ColumnNothing::create(0), data_type, col.name()});
1516
0
            VLOG_DEBUG << fmt::format(
1517
0
                    "Create block from tablet schema, column cid {} is virtual column, col_name: "
1518
0
                    "{}, col_unique_id: {}, type {}",
1519
0
                    cid, col.name(), col.unique_id(), data_type->get_name());
1520
2.12k
        } else {
1521
2.12k
            block.insert({data_type->create_column(), data_type, col.name()});
1522
2.12k
        }
1523
2.12k
    }
1524
719
    return block;
1525
719
}
1526
1527
1.95k
vectorized::Block TabletSchema::create_block(bool ignore_dropped_col) const {
1528
1.95k
    vectorized::Block block;
1529
6.18k
    for (const auto& col : _cols) {
1530
6.18k
        if (ignore_dropped_col && is_dropped_column(*col)) {
1531
0
            continue;
1532
0
        }
1533
6.18k
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(*col);
1534
6.18k
        block.insert({data_type->create_column(), data_type, col->name()});
1535
6.18k
    }
1536
1.95k
    return block;
1537
1.95k
}
1538
1539
0
vectorized::Block TabletSchema::create_block_by_cids(const std::vector<uint32_t>& cids) const {
1540
0
    vectorized::Block block;
1541
0
    for (const auto& cid : cids) {
1542
0
        const auto& col = *_cols[cid];
1543
0
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col);
1544
0
        block.insert({data_type->create_column(), data_type, col.name()});
1545
0
    }
1546
0
    return block;
1547
0
}
1548
1549
0
bool operator==(const TabletColumn& a, const TabletColumn& b) {
1550
0
    if (a._unique_id != b._unique_id) return false;
1551
0
    if (a._col_name != b._col_name) return false;
1552
0
    if (a._type != b._type) return false;
1553
0
    if (a._is_key != b._is_key) return false;
1554
0
    if (a._aggregation != b._aggregation) return false;
1555
0
    if (a._is_nullable != b._is_nullable) return false;
1556
0
    if (a._has_default_value != b._has_default_value) return false;
1557
0
    if (a._has_default_value) {
1558
0
        if (a._default_value != b._default_value) return false;
1559
0
    }
1560
0
    if (a._is_decimal != b._is_decimal) return false;
1561
0
    if (a._is_decimal) {
1562
0
        if (a._precision != b._precision) return false;
1563
0
        if (a._frac != b._frac) return false;
1564
0
    }
1565
0
    if (a._length != b._length) return false;
1566
0
    if (a._index_length != b._index_length) return false;
1567
0
    if (a._is_bf_column != b._is_bf_column) return false;
1568
0
    if (a._has_bitmap_index != b._has_bitmap_index) return false;
1569
0
    if (a._column_path == nullptr && a._column_path != nullptr) return false;
1570
0
    if (b._column_path == nullptr && a._column_path != nullptr) return false;
1571
0
    if (b._column_path != nullptr && a._column_path != nullptr &&
1572
0
        *a._column_path != *b._column_path)
1573
0
        return false;
1574
0
    return true;
1575
0
}
1576
1577
0
bool operator!=(const TabletColumn& a, const TabletColumn& b) {
1578
0
    return !(a == b);
1579
0
}
1580
1581
1
bool operator==(const TabletSchema& a, const TabletSchema& b) {
1582
1
    if (a._keys_type != b._keys_type) return false;
1583
1
    if (a._cols.size() != b._cols.size()) return false;
1584
1
    for (int i = 0; i < a._cols.size(); ++i) {
1585
0
        if (*a._cols[i] != *b._cols[i]) return false;
1586
0
    }
1587
1
    if (a._num_columns != b._num_columns) return false;
1588
1
    if (a._num_key_columns != b._num_key_columns) return false;
1589
1
    if (a._num_null_columns != b._num_null_columns) return false;
1590
1
    if (a._num_short_key_columns != b._num_short_key_columns) return false;
1591
1
    if (a._num_rows_per_row_block != b._num_rows_per_row_block) return false;
1592
1
    if (a._compress_kind != b._compress_kind) return false;
1593
1
    if (a._next_column_unique_id != b._next_column_unique_id) return false;
1594
1
    if (a._has_bf_fpp != b._has_bf_fpp) return false;
1595
1
    if (a._has_bf_fpp) {
1596
0
        if (std::abs(a._bf_fpp - b._bf_fpp) > 1e-6) return false;
1597
0
    }
1598
1
    if (a._is_in_memory != b._is_in_memory) return false;
1599
1
    if (a._delete_sign_idx != b._delete_sign_idx) return false;
1600
1
    if (a._disable_auto_compaction != b._disable_auto_compaction) return false;
1601
1
    if (a._enable_single_replica_compaction != b._enable_single_replica_compaction) return false;
1602
1
    if (a._store_row_column != b._store_row_column) return false;
1603
1
    if (a._row_store_page_size != b._row_store_page_size) return false;
1604
1
    if (a._storage_page_size != b._storage_page_size) return false;
1605
1
    if (a._storage_dict_page_size != b._storage_dict_page_size) return false;
1606
1
    if (a._skip_write_index_on_load != b._skip_write_index_on_load) return false;
1607
1
    if (a._enable_variant_flatten_nested != b._enable_variant_flatten_nested) return false;
1608
1
    return true;
1609
1
}
1610
1611
1
bool operator!=(const TabletSchema& a, const TabletSchema& b) {
1612
1
    return !(a == b);
1613
1
}
1614
1615
} // namespace doris