Coverage Report

Created: 2025-06-23 17:53

/root/doris/be/src/olap/tablet_schema.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "olap/tablet_schema.h"
19
20
#include <gen_cpp/Descriptors_types.h>
21
#include <gen_cpp/olap_file.pb.h>
22
#include <glog/logging.h>
23
#include <google/protobuf/io/coded_stream.h>
24
#include <google/protobuf/io/zero_copy_stream.h>
25
#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
26
27
#include <algorithm>
28
#include <cctype>
29
// IWYU pragma: no_include <bits/std_abs.h>
30
#include <cmath> // IWYU pragma: keep
31
#include <memory>
32
#include <ostream>
33
#include <vector>
34
35
#include "common/compiler_util.h" // IWYU pragma: keep
36
#include "common/consts.h"
37
#include "common/status.h"
38
#include "exec/tablet_info.h"
39
#include "olap/inverted_index_parser.h"
40
#include "olap/olap_common.h"
41
#include "olap/olap_define.h"
42
#include "olap/tablet_column_object_pool.h"
43
#include "olap/types.h"
44
#include "olap/utils.h"
45
#include "tablet_meta.h"
46
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
47
#include "vec/aggregate_functions/aggregate_function_state_union.h"
48
#include "vec/common/hex.h"
49
#include "vec/common/string_ref.h"
50
#include "vec/core/block.h"
51
#include "vec/data_types/data_type.h"
52
#include "vec/data_types/data_type_factory.hpp"
53
#include "vec/data_types/data_type_map.h"
54
#include "vec/data_types/data_type_struct.h"
55
#include "vec/json/path_in_data.h"
56
57
namespace doris {
58
59
0
FieldType TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) {
60
0
    switch (primitiveType) {
61
0
    case PrimitiveType::INVALID_TYPE:
62
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
63
0
    case PrimitiveType::TYPE_NULL:
64
0
        return FieldType::OLAP_FIELD_TYPE_NONE;
65
0
    case PrimitiveType::TYPE_BOOLEAN:
66
0
        return FieldType::OLAP_FIELD_TYPE_BOOL;
67
0
    case PrimitiveType::TYPE_TINYINT:
68
0
        return FieldType::OLAP_FIELD_TYPE_TINYINT;
69
0
    case PrimitiveType::TYPE_SMALLINT:
70
0
        return FieldType::OLAP_FIELD_TYPE_SMALLINT;
71
0
    case PrimitiveType::TYPE_INT:
72
0
        return FieldType::OLAP_FIELD_TYPE_INT;
73
0
    case PrimitiveType::TYPE_BIGINT:
74
0
        return FieldType::OLAP_FIELD_TYPE_BIGINT;
75
0
    case PrimitiveType::TYPE_LARGEINT:
76
0
        return FieldType::OLAP_FIELD_TYPE_LARGEINT;
77
0
    case PrimitiveType::TYPE_FLOAT:
78
0
        return FieldType::OLAP_FIELD_TYPE_FLOAT;
79
0
    case PrimitiveType::TYPE_DOUBLE:
80
0
        return FieldType::OLAP_FIELD_TYPE_DOUBLE;
81
0
    case PrimitiveType::TYPE_VARCHAR:
82
0
        return FieldType::OLAP_FIELD_TYPE_VARCHAR;
83
0
    case PrimitiveType::TYPE_DATE:
84
0
        return FieldType::OLAP_FIELD_TYPE_DATE;
85
0
    case PrimitiveType::TYPE_DATETIME:
86
0
        return FieldType::OLAP_FIELD_TYPE_DATETIME;
87
0
    case PrimitiveType::TYPE_BINARY:
88
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
89
0
    case PrimitiveType::TYPE_CHAR:
90
0
        return FieldType::OLAP_FIELD_TYPE_CHAR;
91
0
    case PrimitiveType::TYPE_STRUCT:
92
0
        return FieldType::OLAP_FIELD_TYPE_STRUCT;
93
0
    case PrimitiveType::TYPE_ARRAY:
94
0
        return FieldType::OLAP_FIELD_TYPE_ARRAY;
95
0
    case PrimitiveType::TYPE_MAP:
96
0
        return FieldType::OLAP_FIELD_TYPE_MAP;
97
0
    case PrimitiveType::TYPE_HLL:
98
0
        return FieldType::OLAP_FIELD_TYPE_HLL;
99
0
    case PrimitiveType::TYPE_DECIMALV2:
100
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
101
0
    case PrimitiveType::TYPE_OBJECT:
102
0
        return FieldType::OLAP_FIELD_TYPE_OBJECT;
103
0
    case PrimitiveType::TYPE_STRING:
104
0
        return FieldType::OLAP_FIELD_TYPE_STRING;
105
0
    case PrimitiveType::TYPE_QUANTILE_STATE:
106
0
        return FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
107
0
    case PrimitiveType::TYPE_DATEV2:
108
0
        return FieldType::OLAP_FIELD_TYPE_DATEV2;
109
0
    case PrimitiveType::TYPE_DATETIMEV2:
110
0
        return FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
111
0
    case PrimitiveType::TYPE_TIMEV2:
112
0
        return FieldType::OLAP_FIELD_TYPE_TIMEV2;
113
0
    case PrimitiveType::TYPE_DECIMAL32:
114
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL32;
115
0
    case PrimitiveType::TYPE_DECIMAL64:
116
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL64;
117
0
    case PrimitiveType::TYPE_DECIMAL128I:
118
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
119
0
    case PrimitiveType::TYPE_JSONB:
120
0
        return FieldType::OLAP_FIELD_TYPE_JSONB;
121
0
    case PrimitiveType::TYPE_VARIANT:
122
0
        return FieldType::OLAP_FIELD_TYPE_VARIANT;
123
0
    case PrimitiveType::TYPE_LAMBDA_FUNCTION:
124
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
125
0
    case PrimitiveType::TYPE_AGG_STATE:
126
0
        return FieldType::OLAP_FIELD_TYPE_AGG_STATE;
127
0
    default:
128
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
129
0
    }
130
0
}
131
132
12.1k
FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) {
133
12.1k
    std::string upper_type_str = type_str;
134
12.1k
    std::transform(type_str.begin(), type_str.end(), upper_type_str.begin(),
135
68.8k
                   [](auto c) { return std::toupper(c); });
136
12.1k
    FieldType type;
137
138
12.1k
    if (0 == upper_type_str.compare("TINYINT")) {
139
424
        type = FieldType::OLAP_FIELD_TYPE_TINYINT;
140
11.7k
    } else if (0 == upper_type_str.compare("SMALLINT")) {
141
876
        type = FieldType::OLAP_FIELD_TYPE_SMALLINT;
142
10.8k
    } else if (0 == upper_type_str.compare("INT")) {
143
2.47k
        type = FieldType::OLAP_FIELD_TYPE_INT;
144
8.40k
    } else if (0 == upper_type_str.compare("BIGINT")) {
145
154
        type = FieldType::OLAP_FIELD_TYPE_BIGINT;
146
8.25k
    } else if (0 == upper_type_str.compare("LARGEINT")) {
147
137
        type = FieldType::OLAP_FIELD_TYPE_LARGEINT;
148
8.11k
    } else if (0 == upper_type_str.compare("UNSIGNED_TINYINT")) {
149
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT;
150
8.11k
    } else if (0 == upper_type_str.compare("UNSIGNED_SMALLINT")) {
151
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT;
152
8.11k
    } else if (0 == upper_type_str.compare("UNSIGNED_INT")) {
153
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT;
154
8.11k
    } else if (0 == upper_type_str.compare("UNSIGNED_BIGINT")) {
155
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT;
156
8.11k
    } else if (0 == upper_type_str.compare("IPV4")) {
157
0
        type = FieldType::OLAP_FIELD_TYPE_IPV4;
158
8.11k
    } else if (0 == upper_type_str.compare("IPV6")) {
159
0
        type = FieldType::OLAP_FIELD_TYPE_IPV6;
160
8.11k
    } else if (0 == upper_type_str.compare("FLOAT")) {
161
0
        type = FieldType::OLAP_FIELD_TYPE_FLOAT;
162
8.11k
    } else if (0 == upper_type_str.compare("DISCRETE_DOUBLE")) {
163
0
        type = FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE;
164
8.11k
    } else if (0 == upper_type_str.compare("DOUBLE")) {
165
0
        type = FieldType::OLAP_FIELD_TYPE_DOUBLE;
166
8.11k
    } else if (0 == upper_type_str.compare("CHAR")) {
167
138
        type = FieldType::OLAP_FIELD_TYPE_CHAR;
168
7.97k
    } else if (0 == upper_type_str.compare("DATE")) {
169
140
        type = FieldType::OLAP_FIELD_TYPE_DATE;
170
7.83k
    } else if (0 == upper_type_str.compare("DATEV2")) {
171
133
        type = FieldType::OLAP_FIELD_TYPE_DATEV2;
172
7.70k
    } else if (0 == upper_type_str.compare("DATETIMEV2")) {
173
0
        type = FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
174
7.70k
    } else if (0 == upper_type_str.compare("DATETIME")) {
175
167
        type = FieldType::OLAP_FIELD_TYPE_DATETIME;
176
7.53k
    } else if (0 == upper_type_str.compare("DECIMAL32")) {
177
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL32;
178
7.53k
    } else if (0 == upper_type_str.compare("DECIMAL64")) {
179
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL64;
180
7.53k
    } else if (0 == upper_type_str.compare("DECIMAL128I")) {
181
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
182
7.53k
    } else if (0 == upper_type_str.compare("DECIMAL256")) {
183
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL256;
184
7.53k
    } else if (0 == upper_type_str.compare(0, 7, "DECIMAL")) {
185
141
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL;
186
7.39k
    } else if (0 == upper_type_str.compare(0, 7, "VARCHAR")) {
187
162
        type = FieldType::OLAP_FIELD_TYPE_VARCHAR;
188
7.23k
    } else if (0 == upper_type_str.compare("STRING")) {
189
7.14k
        type = FieldType::OLAP_FIELD_TYPE_STRING;
190
7.14k
    } else if (0 == upper_type_str.compare("JSONB")) {
191
0
        type = FieldType::OLAP_FIELD_TYPE_JSONB;
192
95
    } else if (0 == upper_type_str.compare("VARIANT")) {
193
2
        type = FieldType::OLAP_FIELD_TYPE_VARIANT;
194
93
    } else if (0 == upper_type_str.compare("BOOLEAN")) {
195
0
        type = FieldType::OLAP_FIELD_TYPE_BOOL;
196
93
    } else if (0 == upper_type_str.compare(0, 3, "HLL")) {
197
7
        type = FieldType::OLAP_FIELD_TYPE_HLL;
198
86
    } else if (0 == upper_type_str.compare("STRUCT")) {
199
0
        type = FieldType::OLAP_FIELD_TYPE_STRUCT;
200
86
    } else if (0 == upper_type_str.compare("LIST")) {
201
0
        type = FieldType::OLAP_FIELD_TYPE_ARRAY;
202
86
    } else if (0 == upper_type_str.compare("MAP")) {
203
0
        type = FieldType::OLAP_FIELD_TYPE_MAP;
204
86
    } else if (0 == upper_type_str.compare("OBJECT")) {
205
0
        type = FieldType::OLAP_FIELD_TYPE_OBJECT;
206
86
    } else if (0 == upper_type_str.compare("ARRAY")) {
207
2
        type = FieldType::OLAP_FIELD_TYPE_ARRAY;
208
84
    } else if (0 == upper_type_str.compare("QUANTILE_STATE")) {
209
0
        type = FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
210
84
    } else if (0 == upper_type_str.compare("AGG_STATE")) {
211
0
        type = FieldType::OLAP_FIELD_TYPE_AGG_STATE;
212
84
    } else {
213
84
        LOG(WARNING) << "invalid type string. [type='" << type_str << "']";
214
84
        type = FieldType::OLAP_FIELD_TYPE_UNKNOWN;
215
84
    }
216
217
12.1k
    return type;
218
12.1k
}
219
220
3.99k
FieldAggregationMethod TabletColumn::get_aggregation_type_by_string(const std::string& str) {
221
3.99k
    std::string upper_str = str;
222
3.99k
    std::transform(str.begin(), str.end(), upper_str.begin(),
223
20.6k
                   [](auto c) { return std::toupper(c); });
224
3.99k
    FieldAggregationMethod aggregation_type;
225
226
3.99k
    if (0 == upper_str.compare("NONE")) {
227
1.68k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE;
228
2.30k
    } else if (0 == upper_str.compare("SUM")) {
229
567
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM;
230
1.74k
    } else if (0 == upper_str.compare("MIN")) {
231
4
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN;
232
1.73k
    } else if (0 == upper_str.compare("MAX")) {
233
4
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX;
234
1.73k
    } else if (0 == upper_str.compare("REPLACE")) {
235
1.72k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE;
236
1.72k
    } else if (0 == upper_str.compare("REPLACE_IF_NOT_NULL")) {
237
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL;
238
7
    } else if (0 == upper_str.compare("HLL_UNION")) {
239
7
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION;
240
7
    } else if (0 == upper_str.compare("BITMAP_UNION")) {
241
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION;
242
0
    } else if (0 == upper_str.compare("QUANTILE_UNION")) {
243
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION;
244
0
    } else if (!upper_str.empty()) {
245
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC;
246
0
    } else {
247
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN;
248
0
    }
249
250
3.99k
    return aggregation_type;
251
3.99k
}
252
253
31.0k
std::string TabletColumn::get_string_by_field_type(FieldType type) {
254
31.0k
    switch (type) {
255
1.64k
    case FieldType::OLAP_FIELD_TYPE_TINYINT:
256
1.64k
        return "TINYINT";
257
258
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT:
259
0
        return "UNSIGNED_TINYINT";
260
261
3.62k
    case FieldType::OLAP_FIELD_TYPE_SMALLINT:
262
3.62k
        return "SMALLINT";
263
264
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT:
265
0
        return "UNSIGNED_SMALLINT";
266
267
8.79k
    case FieldType::OLAP_FIELD_TYPE_INT:
268
8.79k
        return "INT";
269
270
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT:
271
0
        return "UNSIGNED_INT";
272
273
836
    case FieldType::OLAP_FIELD_TYPE_BIGINT:
274
836
        return "BIGINT";
275
276
717
    case FieldType::OLAP_FIELD_TYPE_LARGEINT:
277
717
        return "LARGEINT";
278
279
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT:
280
0
        return "UNSIGNED_BIGINT";
281
282
0
    case FieldType::OLAP_FIELD_TYPE_IPV4:
283
0
        return "IPV4";
284
285
0
    case FieldType::OLAP_FIELD_TYPE_IPV6:
286
0
        return "IPV6";
287
288
0
    case FieldType::OLAP_FIELD_TYPE_FLOAT:
289
0
        return "FLOAT";
290
291
0
    case FieldType::OLAP_FIELD_TYPE_DOUBLE:
292
0
        return "DOUBLE";
293
294
0
    case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE:
295
0
        return "DISCRETE_DOUBLE";
296
297
717
    case FieldType::OLAP_FIELD_TYPE_CHAR:
298
717
        return "CHAR";
299
300
721
    case FieldType::OLAP_FIELD_TYPE_DATE:
301
721
        return "DATE";
302
303
670
    case FieldType::OLAP_FIELD_TYPE_DATEV2:
304
670
        return "DATEV2";
305
306
912
    case FieldType::OLAP_FIELD_TYPE_DATETIME:
307
912
        return "DATETIME";
308
309
0
    case FieldType::OLAP_FIELD_TYPE_DATETIMEV2:
310
0
        return "DATETIMEV2";
311
312
717
    case FieldType::OLAP_FIELD_TYPE_DECIMAL:
313
717
        return "DECIMAL";
314
315
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
316
0
        return "DECIMAL32";
317
318
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
319
0
        return "DECIMAL64";
320
321
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
322
0
        return "DECIMAL128I";
323
324
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
325
0
        return "DECIMAL256";
326
327
973
    case FieldType::OLAP_FIELD_TYPE_VARCHAR:
328
973
        return "VARCHAR";
329
330
0
    case FieldType::OLAP_FIELD_TYPE_JSONB:
331
0
        return "JSONB";
332
333
0
    case FieldType::OLAP_FIELD_TYPE_VARIANT:
334
0
        return "VARIANT";
335
336
10.7k
    case FieldType::OLAP_FIELD_TYPE_STRING:
337
10.7k
        return "STRING";
338
339
0
    case FieldType::OLAP_FIELD_TYPE_BOOL:
340
0
        return "BOOLEAN";
341
342
6
    case FieldType::OLAP_FIELD_TYPE_HLL:
343
6
        return "HLL";
344
345
0
    case FieldType::OLAP_FIELD_TYPE_STRUCT:
346
0
        return "STRUCT";
347
348
4
    case FieldType::OLAP_FIELD_TYPE_ARRAY:
349
4
        return "ARRAY";
350
351
0
    case FieldType::OLAP_FIELD_TYPE_MAP:
352
0
        return "MAP";
353
354
0
    case FieldType::OLAP_FIELD_TYPE_OBJECT:
355
0
        return "OBJECT";
356
0
    case FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE:
357
0
        return "QUANTILE_STATE";
358
0
    case FieldType::OLAP_FIELD_TYPE_AGG_STATE:
359
0
        return "AGG_STATE";
360
0
    default:
361
0
        return "UNKNOWN";
362
31.0k
    }
363
31.0k
}
364
365
28
std::string TabletColumn::get_string_by_aggregation_type(FieldAggregationMethod type) {
366
28
    switch (type) {
367
4
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE:
368
4
        return "NONE";
369
370
9
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM:
371
9
        return "SUM";
372
373
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN:
374
0
        return "MIN";
375
376
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX:
377
0
        return "MAX";
378
379
15
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE:
380
15
        return "REPLACE";
381
382
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL:
383
0
        return "REPLACE_IF_NOT_NULL";
384
385
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION:
386
0
        return "HLL_UNION";
387
388
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION:
389
0
        return "BITMAP_UNION";
390
391
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION:
392
0
        return "QUANTILE_UNION";
393
394
0
    default:
395
0
        return "UNKNOWN";
396
28
    }
397
28
}
398
399
2.12k
uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint32_t string_length) {
400
2.12k
    switch (type) {
401
110
    case TPrimitiveType::TINYINT:
402
110
    case TPrimitiveType::BOOLEAN:
403
110
        return 1;
404
357
    case TPrimitiveType::SMALLINT:
405
357
        return 2;
406
841
    case TPrimitiveType::INT:
407
841
        return 4;
408
114
    case TPrimitiveType::BIGINT:
409
114
        return 8;
410
101
    case TPrimitiveType::LARGEINT:
411
101
        return 16;
412
0
    case TPrimitiveType::IPV4:
413
0
        return 4;
414
0
    case TPrimitiveType::IPV6:
415
0
        return 16;
416
101
    case TPrimitiveType::DATE:
417
101
        return 3;
418
92
    case TPrimitiveType::DATEV2:
419
92
        return 4;
420
109
    case TPrimitiveType::DATETIME:
421
109
        return 8;
422
0
    case TPrimitiveType::DATETIMEV2:
423
0
        return 8;
424
0
    case TPrimitiveType::FLOAT:
425
0
        return 4;
426
0
    case TPrimitiveType::DOUBLE:
427
0
        return 8;
428
0
    case TPrimitiveType::QUANTILE_STATE:
429
0
    case TPrimitiveType::OBJECT:
430
0
        return 16;
431
101
    case TPrimitiveType::CHAR:
432
101
        return string_length;
433
101
    case TPrimitiveType::VARCHAR:
434
101
    case TPrimitiveType::HLL:
435
101
    case TPrimitiveType::AGG_STATE:
436
101
        return string_length + sizeof(OLAP_VARCHAR_MAX_LENGTH);
437
0
    case TPrimitiveType::STRING:
438
0
    case TPrimitiveType::VARIANT:
439
0
        return string_length + sizeof(OLAP_STRING_MAX_LENGTH);
440
0
    case TPrimitiveType::JSONB:
441
0
        return string_length + sizeof(OLAP_JSONB_MAX_LENGTH);
442
0
    case TPrimitiveType::STRUCT:
443
        // Note that(xy): this is the length of struct type itself,
444
        // the length of its subtypes are not included.
445
0
        return OLAP_STRUCT_MAX_LENGTH;
446
0
    case TPrimitiveType::ARRAY:
447
0
        return OLAP_ARRAY_MAX_LENGTH;
448
0
    case TPrimitiveType::MAP:
449
0
        return OLAP_MAP_MAX_LENGTH;
450
0
    case TPrimitiveType::DECIMAL32:
451
0
        return 4;
452
0
    case TPrimitiveType::DECIMAL64:
453
0
        return 8;
454
0
    case TPrimitiveType::DECIMAL128I:
455
0
        return 16;
456
0
    case TPrimitiveType::DECIMAL256:
457
0
        return 32;
458
101
    case TPrimitiveType::DECIMALV2:
459
101
        return 12; // use 12 bytes in olap engine.
460
0
    default:
461
0
        LOG(WARNING) << "unknown field type. [type=" << type << "]";
462
0
        return 0;
463
2.12k
    }
464
2.12k
}
465
466
9
bool TabletColumn::has_char_type() const {
467
9
    switch (_type) {
468
3
    case FieldType::OLAP_FIELD_TYPE_CHAR: {
469
3
        return true;
470
0
    }
471
4
    case FieldType::OLAP_FIELD_TYPE_ARRAY:
472
4
    case FieldType::OLAP_FIELD_TYPE_MAP:
473
4
    case FieldType::OLAP_FIELD_TYPE_STRUCT: {
474
4
        return std::any_of(_sub_columns.begin(), _sub_columns.end(),
475
4
                           [&](const auto& sub) -> bool { return sub->has_char_type(); });
476
4
    }
477
2
    default:
478
2
        return false;
479
9
    }
480
9
}
481
482
12.2k
TabletColumn::TabletColumn() : _aggregation(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) {}
483
484
142
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType type) {
485
142
    _aggregation = agg;
486
142
    _type = type;
487
142
}
488
489
17
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable) {
490
17
    _aggregation = agg;
491
17
    _type = filed_type;
492
17
    _length = get_scalar_type_info(filed_type)->size();
493
17
    _is_nullable = is_nullable;
494
17
}
495
496
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable,
497
16
                           int32_t unique_id, size_t length) {
498
16
    _aggregation = agg;
499
16
    _type = filed_type;
500
16
    _is_nullable = is_nullable;
501
16
    _unique_id = unique_id;
502
16
    _length = length;
503
16
}
504
505
0
TabletColumn::TabletColumn(const ColumnPB& column) {
506
0
    init_from_pb(column);
507
0
}
508
509
0
TabletColumn::TabletColumn(const TColumn& column) {
510
0
    init_from_thrift(column);
511
0
}
512
513
0
void TabletColumn::init_from_thrift(const TColumn& tcolumn) {
514
0
    ColumnPB column_pb;
515
0
    TabletMeta::init_column_from_tcolumn(tcolumn.col_unique_id, tcolumn, &column_pb);
516
0
    init_from_pb(column_pb);
517
0
}
518
519
12.0k
void TabletColumn::init_from_pb(const ColumnPB& column) {
520
12.0k
    _unique_id = column.unique_id();
521
12.0k
    _col_name = column.name();
522
12.0k
    _col_name_lower_case = to_lower(_col_name);
523
12.0k
    _type = TabletColumn::get_field_type_by_string(column.type());
524
12.0k
    _is_key = column.is_key();
525
12.0k
    _is_nullable = column.is_nullable();
526
12.0k
    _is_auto_increment = column.is_auto_increment();
527
528
12.0k
    _has_default_value = column.has_default_value();
529
12.0k
    if (_has_default_value) {
530
29
        _default_value = column.default_value();
531
29
    }
532
533
12.0k
    if (column.has_precision()) {
534
3.94k
        _is_decimal = true;
535
3.94k
        _precision = column.precision();
536
8.14k
    } else {
537
8.14k
        _is_decimal = false;
538
8.14k
    }
539
12.0k
    if (column.has_frac()) {
540
3.94k
        _frac = column.frac();
541
3.94k
    }
542
12.0k
    _length = column.length();
543
12.0k
    _index_length = column.index_length();
544
12.0k
    if (column.has_is_bf_column()) {
545
368
        _is_bf_column = column.is_bf_column();
546
11.7k
    } else {
547
11.7k
        _is_bf_column = false;
548
11.7k
    }
549
12.0k
    if (column.has_has_bitmap_index()) {
550
2.12k
        _has_bitmap_index = column.has_bitmap_index();
551
9.96k
    } else {
552
9.96k
        _has_bitmap_index = false;
553
9.96k
    }
554
12.0k
    if (column.has_aggregation()) {
555
3.99k
        _aggregation = get_aggregation_type_by_string(column.aggregation());
556
3.99k
        _aggregation_name = column.aggregation();
557
3.99k
    }
558
559
12.0k
    if (_type == FieldType::OLAP_FIELD_TYPE_AGG_STATE) {
560
0
        _result_is_nullable = column.result_is_nullable();
561
0
        _be_exec_version = column.be_exec_version();
562
0
    }
563
564
12.0k
    if (column.has_visible()) {
565
9.12k
        _visible = column.visible();
566
9.12k
    }
567
12.0k
    if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
568
2
        CHECK(column.children_columns_size() == 1)
569
0
                << "ARRAY type should has 1 children types, but got "
570
0
                << column.children_columns_size();
571
2
    }
572
12.0k
    if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
573
0
        DCHECK(column.children_columns_size() == 2)
574
0
                << "MAP type should has 2 children types, but got "
575
0
                << column.children_columns_size();
576
0
        if (UNLIKELY(column.children_columns_size() != 2)) {
577
0
            LOG(WARNING) << "MAP type should has 2 children types, but got "
578
0
                         << column.children_columns_size();
579
0
        }
580
0
    }
581
12.0k
    for (size_t i = 0; i < column.children_columns_size(); i++) {
582
2
        TabletColumn child_column;
583
2
        child_column.init_from_pb(column.children_columns(i));
584
2
        add_sub_column(child_column);
585
2
    }
586
12.0k
    if (column.has_column_path_info()) {
587
0
        _column_path = std::make_shared<vectorized::PathInData>();
588
0
        _column_path->from_protobuf(column.column_path_info());
589
0
        _parent_col_unique_id = column.column_path_info().parrent_column_unique_id();
590
0
    }
591
12.0k
    if (is_variant_type() && !column.has_column_path_info()) {
592
        // set path info for variant root column, to prevent from missing
593
2
        _column_path = std::make_shared<vectorized::PathInData>(_col_name_lower_case);
594
2
    }
595
12.0k
    for (auto& column_pb : column.sparse_columns()) {
596
0
        TabletColumn column;
597
0
        column.init_from_pb(column_pb);
598
0
        _sparse_cols.emplace_back(std::make_shared<TabletColumn>(std::move(column)));
599
0
        _num_sparse_columns++;
600
0
    }
601
12.0k
}
602
603
TabletColumn TabletColumn::create_materialized_variant_column(const std::string& root,
604
                                                              const std::vector<std::string>& paths,
605
0
                                                              int32_t parent_unique_id) {
606
0
    TabletColumn subcol;
607
0
    subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
608
0
    subcol.set_is_nullable(true);
609
0
    subcol.set_unique_id(-1);
610
0
    subcol.set_parent_unique_id(parent_unique_id);
611
0
    vectorized::PathInData path(root, paths);
612
0
    subcol.set_path_info(path);
613
0
    subcol.set_name(path.get_path());
614
0
    return subcol;
615
0
}
616
617
31.0k
void TabletColumn::to_schema_pb(ColumnPB* column) const {
618
31.0k
    column->set_unique_id(_unique_id);
619
31.0k
    column->set_name(_col_name);
620
31.0k
    column->set_type(get_string_by_field_type(_type));
621
31.0k
    column->set_is_key(_is_key);
622
31.0k
    column->set_is_nullable(_is_nullable);
623
31.0k
    if (_has_default_value) {
624
156
        column->set_default_value(_default_value);
625
156
    }
626
31.0k
    if (_is_decimal) {
627
16.7k
        column->set_precision(_precision);
628
16.7k
        column->set_frac(_frac);
629
16.7k
    }
630
31.0k
    column->set_length(_length);
631
31.0k
    column->set_index_length(_index_length);
632
31.0k
    if (_is_bf_column) {
633
8
        column->set_is_bf_column(_is_bf_column);
634
8
    }
635
31.0k
    if (!_aggregation_name.empty()) {
636
16.9k
        column->set_aggregation(_aggregation_name);
637
16.9k
    }
638
31.0k
    column->set_result_is_nullable(_result_is_nullable);
639
31.0k
    column->set_be_exec_version(_be_exec_version);
640
31.0k
    if (_has_bitmap_index) {
641
0
        column->set_has_bitmap_index(_has_bitmap_index);
642
0
    }
643
31.0k
    column->set_visible(_visible);
644
645
31.0k
    if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
646
4
        CHECK(_sub_columns.size() == 1)
647
0
                << "ARRAY type should has 1 children types, but got " << _sub_columns.size();
648
4
    }
649
31.0k
    if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
650
0
        DCHECK(_sub_columns.size() == 2)
651
0
                << "MAP type should has 2 children types, but got " << _sub_columns.size();
652
0
        if (UNLIKELY(_sub_columns.size() != 2)) {
653
0
            LOG(WARNING) << "MAP type should has 2 children types, but got " << _sub_columns.size();
654
0
        }
655
0
    }
656
657
31.0k
    for (size_t i = 0; i < _sub_columns.size(); i++) {
658
4
        ColumnPB* child = column->add_children_columns();
659
4
        _sub_columns[i]->to_schema_pb(child);
660
4
    }
661
662
    // set parts info
663
31.0k
    if (has_path_info()) {
664
        // CHECK_GT(_parent_col_unique_id, 0);
665
0
        _column_path->to_protobuf(column->mutable_column_path_info(), _parent_col_unique_id);
666
        // Update unstable information for variant columns. Some of the fields in the tablet schema
667
        // are irrelevant for variant sub-columns, but retaining them may lead to an excessive growth
668
        // in the number of tablet schema cache entries.
669
0
        if (_type == FieldType::OLAP_FIELD_TYPE_STRING) {
670
0
            column->set_length(INT_MAX);
671
0
        }
672
0
        column->set_index_length(0);
673
0
    }
674
31.0k
    for (auto& col : _sparse_cols) {
675
0
        ColumnPB* sparse_column = column->add_sparse_columns();
676
0
        col->to_schema_pb(sparse_column);
677
0
    }
678
31.0k
}
679
680
36
void TabletColumn::add_sub_column(TabletColumn& sub_column) {
681
36
    _sub_columns.push_back(std::make_shared<TabletColumn>(sub_column));
682
36
    sub_column._parent_col_unique_id = this->_unique_id;
683
36
    _sub_column_count += 1;
684
36
}
685
686
22.4k
bool TabletColumn::is_row_store_column() const {
687
22.4k
    return _col_name == BeConsts::ROW_STORE_COL;
688
22.4k
}
689
690
vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function_union(
691
0
        vectorized::DataTypePtr type, int current_be_exec_version) const {
692
0
    const auto* state_type = assert_cast<const vectorized::DataTypeAggState*>(type.get());
693
0
    BeExecVersionManager::check_function_compatibility(
694
0
            current_be_exec_version, _be_exec_version,
695
0
            state_type->get_nested_function()->get_name());
696
0
    return vectorized::AggregateStateUnion::create(state_type->get_nested_function(), {type}, type);
697
0
}
698
699
vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function(
700
24
        std::string suffix, int current_be_exec_version) const {
701
24
    vectorized::AggregateFunctionPtr function = nullptr;
702
703
24
    auto type = vectorized::DataTypeFactory::instance().create_data_type(*this);
704
24
    if (type && type->get_type_as_type_descriptor().type == PrimitiveType::TYPE_AGG_STATE) {
705
0
        function = get_aggregate_function_union(type, current_be_exec_version);
706
24
    } else {
707
24
        std::string origin_name = TabletColumn::get_string_by_aggregation_type(_aggregation);
708
24
        std::string agg_name = origin_name + suffix;
709
24
        std::transform(agg_name.begin(), agg_name.end(), agg_name.begin(),
710
258
                       [](unsigned char c) { return std::tolower(c); });
711
24
        function = vectorized::AggregateFunctionSimpleFactory::instance().get(
712
24
                agg_name, {type}, type->is_nullable(), BeExecVersionManager::get_newest_version());
713
24
        if (!function) {
714
0
            LOG(WARNING) << "get column aggregate function failed, aggregation_name=" << origin_name
715
0
                         << ", column_type=" << type->get_name();
716
0
        }
717
24
    }
718
24
    if (function) {
719
24
        function->set_version(_be_exec_version);
720
24
        return function;
721
24
    }
722
0
    return nullptr;
723
24
}
724
725
4
void TabletColumn::set_path_info(const vectorized::PathInData& path) {
726
4
    _column_path = std::make_shared<vectorized::PathInData>(path);
727
4
}
728
729
0
vectorized::DataTypePtr TabletColumn::get_vec_type() const {
730
0
    return vectorized::DataTypeFactory::instance().create_data_type(*this);
731
0
}
732
733
// escape '.' and '_'
734
43.2k
std::string escape_for_path_name(const std::string& s) {
735
43.2k
    std::string res;
736
43.2k
    const char* pos = s.data();
737
43.2k
    const char* end = pos + s.size();
738
43.2k
    while (pos != end) {
739
56
        unsigned char c = *pos;
740
56
        if (c == '.' || c == '_') {
741
11
            res += '%';
742
11
            res += vectorized::hex_digit_uppercase(c / 16);
743
11
            res += vectorized::hex_digit_uppercase(c % 16);
744
45
        } else {
745
45
            res += c;
746
45
        }
747
56
        ++pos;
748
56
    }
749
43.2k
    return res;
750
43.2k
}
751
752
4
void TabletIndex::set_escaped_escaped_index_suffix_path(const std::string& path_name) {
753
4
    std::string escaped_path = escape_for_path_name(path_name);
754
4
    _escaped_index_suffix_path = escaped_path;
755
4
}
756
757
void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
758
15
                                   const TabletSchema& tablet_schema) {
759
15
    _index_id = index.index_id;
760
15
    _index_name = index.index_name;
761
    // init col_unique_id in index at be side, since col_unique_id may be -1 at fe side
762
    // get column unique id by name
763
15
    std::vector<int32_t> col_unique_ids(index.columns.size());
764
30
    for (size_t i = 0; i < index.columns.size(); i++) {
765
15
        auto column_idx = tablet_schema.field_index(index.columns[i]);
766
15
        if (column_idx >= 0) {
767
11
            col_unique_ids[i] = tablet_schema.column(column_idx).unique_id();
768
11
        } else {
769
            // if column unique id not found by column name, find by column unique id
770
            // column unique id can not found means this column is a new column added by light schema change
771
4
            if (index.__isset.column_unique_ids && !index.column_unique_ids.empty() &&
772
4
                tablet_schema.has_column_unique_id(index.column_unique_ids[i])) {
773
1
                col_unique_ids[i] = index.column_unique_ids[i];
774
3
            } else {
775
3
                col_unique_ids[i] = -1;
776
3
            }
777
4
        }
778
15
    }
779
15
    _col_unique_ids = std::move(col_unique_ids);
780
781
15
    switch (index.index_type) {
782
0
    case TIndexType::BITMAP:
783
0
        _index_type = IndexType::BITMAP;
784
0
        break;
785
15
    case TIndexType::INVERTED:
786
15
        _index_type = IndexType::INVERTED;
787
15
        break;
788
0
    case TIndexType::BLOOMFILTER:
789
0
        _index_type = IndexType::BLOOMFILTER;
790
0
        break;
791
0
    case TIndexType::NGRAM_BF:
792
0
        _index_type = IndexType::NGRAM_BF;
793
0
        break;
794
15
    }
795
15
    if (index.__isset.properties) {
796
0
        for (auto kv : index.properties) {
797
0
            _properties[kv.first] = kv.second;
798
0
        }
799
0
    }
800
15
}
801
802
void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
803
0
                                   const std::vector<int32_t>& column_uids) {
804
0
    _index_id = index.index_id;
805
0
    _index_name = index.index_name;
806
0
    _col_unique_ids = column_uids;
807
808
0
    switch (index.index_type) {
809
0
    case TIndexType::BITMAP:
810
0
        _index_type = IndexType::BITMAP;
811
0
        break;
812
0
    case TIndexType::INVERTED:
813
0
        _index_type = IndexType::INVERTED;
814
0
        break;
815
0
    case TIndexType::BLOOMFILTER:
816
0
        _index_type = IndexType::BLOOMFILTER;
817
0
        break;
818
0
    case TIndexType::NGRAM_BF:
819
0
        _index_type = IndexType::NGRAM_BF;
820
0
        break;
821
0
    }
822
0
    if (index.__isset.properties) {
823
0
        for (auto kv : index.properties) {
824
0
            _properties[kv.first] = kv.second;
825
0
        }
826
0
    }
827
0
}
828
829
7.33k
void TabletIndex::init_from_pb(const TabletIndexPB& index) {
830
7.33k
    _index_id = index.index_id();
831
7.33k
    _index_name = index.index_name();
832
7.33k
    _col_unique_ids.clear();
833
7.33k
    for (auto col_unique_id : index.col_unique_id()) {
834
7.32k
        _col_unique_ids.push_back(col_unique_id);
835
7.32k
    }
836
7.33k
    _index_type = index.index_type();
837
41.1k
    for (auto& kv : index.properties()) {
838
41.1k
        _properties[kv.first] = kv.second;
839
41.1k
    }
840
7.33k
    _escaped_index_suffix_path = index.index_suffix_name();
841
7.33k
}
842
843
10.9k
void TabletIndex::to_schema_pb(TabletIndexPB* index) const {
844
10.9k
    index->set_index_id(_index_id);
845
10.9k
    index->set_index_name(_index_name);
846
10.9k
    index->clear_col_unique_id();
847
10.9k
    for (auto col_unique_id : _col_unique_ids) {
848
10.9k
        index->add_col_unique_id(col_unique_id);
849
10.9k
    }
850
10.9k
    index->set_index_type(_index_type);
851
60.7k
    for (const auto& kv : _properties) {
852
60.7k
        DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", {
853
60.7k
            if (kv.first == INVERTED_INDEX_PARSER_LOWERCASE_KEY) {
854
60.7k
                continue;
855
60.7k
            }
856
60.7k
        })
857
60.7k
        (*index->mutable_properties())[kv.first] = kv.second;
858
60.7k
    }
859
10.9k
    index->set_index_suffix_name(_escaped_index_suffix_path);
860
861
10.9k
    DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { return; })
862
863
    // lowercase by default
864
10.9k
    if (!_properties.empty()) {
865
9.74k
        if (!_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) {
866
168
            (*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
867
168
                    INVERTED_INDEX_PARSER_TRUE;
868
168
        }
869
9.74k
    }
870
10.9k
}
871
872
2.62k
TabletSchema::TabletSchema() = default;
873
874
2.57k
TabletSchema::~TabletSchema() = default;
875
876
1.62k
int64_t TabletSchema::get_metadata_size() const {
877
1.62k
    return sizeof(TabletSchema) + _vl_field_mem_size;
878
1.62k
}
879
880
126
void TabletSchema::append_column(TabletColumn column, ColumnType col_type) {
881
126
    if (column.is_key()) {
882
52
        _num_key_columns++;
883
52
    }
884
126
    if (column.is_nullable()) {
885
43
        _num_null_columns++;
886
43
    }
887
126
    if (column.is_variant_type()) {
888
0
        ++_num_variant_columns;
889
0
        if (!column.has_path_info()) {
890
0
            const std::string& col_name = column.name_lower_case();
891
0
            vectorized::PathInData path(col_name);
892
0
            column.set_path_info(path);
893
0
        }
894
0
    }
895
126
    if (UNLIKELY(column.name() == DELETE_SIGN)) {
896
0
        _delete_sign_idx = _num_columns;
897
126
    } else if (UNLIKELY(column.name() == SEQUENCE_COL)) {
898
4
        _sequence_col_idx = _num_columns;
899
122
    } else if (UNLIKELY(column.name() == VERSION_COL)) {
900
0
        _version_col_idx = _num_columns;
901
0
    }
902
126
    _field_id_to_index[column.unique_id()] = _num_columns;
903
126
    _cols.push_back(std::make_shared<TabletColumn>(std::move(column)));
904
    // The dropped column may have same name with exsiting column, so that
905
    // not add to name to index map, only for uid to index map
906
126
    if (col_type == ColumnType::VARIANT || _cols.back()->is_variant_type() ||
907
126
        _cols.back()->is_extracted_column()) {
908
4
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
909
4
        _field_path_to_index[_cols.back()->path_info_ptr().get()] = _num_columns;
910
122
    } else if (col_type == ColumnType::NORMAL) {
911
122
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
912
122
    }
913
126
    _num_columns++;
914
126
}
915
916
0
void TabletColumn::append_sparse_column(TabletColumn column) {
917
0
    _sparse_cols.push_back(std::make_shared<TabletColumn>(column));
918
0
    _num_sparse_columns++;
919
0
}
920
921
24
void TabletSchema::append_index(TabletIndex&& index) {
922
24
    _indexes.push_back(std::move(index));
923
24
}
924
925
void TabletSchema::update_index(const TabletColumn& col, const IndexType& index_type,
926
0
                                TabletIndex&& index) {
927
0
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
928
0
    const std::string& suffix_path = escape_for_path_name(col.suffix_path());
929
0
    for (size_t i = 0; i < _indexes.size(); i++) {
930
0
        for (int32_t id : _indexes[i].col_unique_ids()) {
931
0
            if (_indexes[i].index_type() == index_type && id == col_unique_id &&
932
0
                _indexes[i].get_index_suffix() == suffix_path) {
933
0
                _indexes[i] = std::move(index);
934
0
                break;
935
0
            }
936
0
        }
937
0
    }
938
0
}
939
940
0
void TabletSchema::replace_column(size_t pos, TabletColumn new_col) {
941
0
    CHECK_LT(pos, num_columns()) << " outof range";
942
0
    _cols[pos] = std::make_shared<TabletColumn>(std::move(new_col));
943
0
}
944
945
0
void TabletSchema::clear_index() {
946
0
    _indexes.clear();
947
0
}
948
949
2
void TabletSchema::remove_index(int64_t index_id) {
950
2
    std::vector<TabletIndex> indexes;
951
2
    for (auto index : _indexes) {
952
2
        if (index.index_id() == index_id) {
953
2
            continue;
954
2
        }
955
0
        indexes.emplace_back(std::move(index));
956
0
    }
957
2
    _indexes = std::move(indexes);
958
2
}
959
960
0
void TabletSchema::clear_columns() {
961
0
    _field_path_to_index.clear();
962
0
    _field_name_to_index.clear();
963
0
    _field_id_to_index.clear();
964
0
    _num_columns = 0;
965
0
    _num_variant_columns = 0;
966
0
    _num_null_columns = 0;
967
0
    _num_key_columns = 0;
968
0
    _cols.clear();
969
0
}
970
971
void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns,
972
1.57k
                                bool reuse_cache_column) {
973
1.57k
    _keys_type = schema.keys_type();
974
1.57k
    _num_columns = 0;
975
1.57k
    _num_variant_columns = 0;
976
1.57k
    _num_key_columns = 0;
977
1.57k
    _num_null_columns = 0;
978
1.57k
    _cols.clear();
979
1.57k
    _indexes.clear();
980
1.57k
    _field_name_to_index.clear();
981
1.57k
    _field_id_to_index.clear();
982
1.57k
    _cluster_key_idxes.clear();
983
1.57k
    for (const auto& i : schema.cluster_key_idxes()) {
984
6
        _cluster_key_idxes.push_back(i);
985
6
    }
986
12.2k
    for (auto& column_pb : schema.column()) {
987
12.2k
        TabletColumnPtr column;
988
12.2k
        if (reuse_cache_column) {
989
258
            auto pair = TabletColumnObjectPool::instance()->insert(
990
258
                    deterministic_string_serialize(column_pb));
991
258
            column = pair.second;
992
            // Release the handle quickly, because we use shared ptr to manage column.
993
            // It often core during tablet schema copy to another schema because handle's
994
            // reference count should be managed mannually.
995
258
            TabletColumnObjectPool::instance()->release(pair.first);
996
11.9k
        } else {
997
11.9k
            column = std::make_shared<TabletColumn>();
998
11.9k
            column->init_from_pb(column_pb);
999
11.9k
        }
1000
12.2k
        if (ignore_extracted_columns && column->is_extracted_column()) {
1001
0
            continue;
1002
0
        }
1003
12.2k
        if (column->is_key()) {
1004
2.28k
            _num_key_columns++;
1005
2.28k
        }
1006
12.2k
        if (column->is_nullable()) {
1007
7.38k
            _num_null_columns++;
1008
7.38k
        }
1009
12.2k
        if (column->is_variant_type()) {
1010
2
            ++_num_variant_columns;
1011
2
        }
1012
1013
12.2k
        _cols.emplace_back(std::move(column));
1014
12.2k
        if (!_cols.back()->is_extracted_column()) {
1015
12.2k
            _vl_field_mem_size += sizeof(StringRef) + sizeof(char) * _cols.back()->name().size() +
1016
12.2k
                                  sizeof(int32_t);
1017
12.2k
            _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1018
12.2k
            _vl_field_mem_size += sizeof(int32_t) * 2;
1019
12.2k
            _field_id_to_index[_cols.back()->unique_id()] = _num_columns;
1020
12.2k
        }
1021
12.2k
        _num_columns++;
1022
12.2k
    }
1023
7.30k
    for (auto& index_pb : schema.index()) {
1024
7.30k
        TabletIndex index;
1025
7.30k
        index.init_from_pb(index_pb);
1026
7.30k
        _indexes.emplace_back(std::move(index));
1027
7.30k
    }
1028
1.57k
    _num_short_key_columns = schema.num_short_key_columns();
1029
1.57k
    _num_rows_per_row_block = schema.num_rows_per_row_block();
1030
1.57k
    _compress_kind = schema.compress_kind();
1031
1.57k
    _next_column_unique_id = schema.next_column_unique_id();
1032
1.57k
    if (schema.has_bf_fpp()) {
1033
0
        _has_bf_fpp = true;
1034
0
        _bf_fpp = schema.bf_fpp();
1035
1.57k
    } else {
1036
1.57k
        _has_bf_fpp = false;
1037
1.57k
        _bf_fpp = BLOOM_FILTER_DEFAULT_FPP;
1038
1.57k
    }
1039
1.57k
    _is_in_memory = schema.is_in_memory();
1040
1.57k
    _disable_auto_compaction = schema.disable_auto_compaction();
1041
1.57k
    _enable_single_replica_compaction = schema.enable_single_replica_compaction();
1042
1.57k
    _store_row_column = schema.store_row_column();
1043
1.57k
    _skip_write_index_on_load = schema.skip_write_index_on_load();
1044
1.57k
    _delete_sign_idx = schema.delete_sign_idx();
1045
1.57k
    _sequence_col_idx = schema.sequence_col_idx();
1046
1.57k
    _version_col_idx = schema.version_col_idx();
1047
1.57k
    _sort_type = schema.sort_type();
1048
1.57k
    _sort_col_num = schema.sort_col_num();
1049
1.57k
    _compression_type = schema.compression_type();
1050
1.57k
    _row_store_page_size = schema.row_store_page_size();
1051
1.57k
    _storage_page_size = schema.storage_page_size();
1052
1.57k
    _schema_version = schema.schema_version();
1053
    // Default to V1 inverted index storage format for backward compatibility if not specified in schema.
1054
1.57k
    if (!schema.has_inverted_index_storage_format()) {
1055
169
        _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1;
1056
1.40k
    } else {
1057
1.40k
        _inverted_index_storage_format = schema.inverted_index_storage_format();
1058
1.40k
    }
1059
1060
1.57k
    _row_store_column_unique_ids.assign(schema.row_store_column_unique_ids().begin(),
1061
1.57k
                                        schema.row_store_column_unique_ids().end());
1062
1.57k
    _variant_enable_flatten_nested = schema.variant_enable_flatten_nested();
1063
1.57k
    _vl_field_mem_size += _row_store_column_unique_ids.capacity() * sizeof(int32_t);
1064
1.57k
    update_metadata_size();
1065
1.57k
}
1066
1067
374
void TabletSchema::copy_from(const TabletSchema& tablet_schema) {
1068
374
    TabletSchemaPB tablet_schema_pb;
1069
374
    tablet_schema.to_schema_pb(&tablet_schema_pb);
1070
374
    init_from_pb(tablet_schema_pb);
1071
374
    _table_id = tablet_schema.table_id();
1072
374
}
1073
1074
0
void TabletSchema::shawdow_copy_without_columns(const TabletSchema& tablet_schema) {
1075
0
    *this = tablet_schema;
1076
0
    _field_path_to_index.clear();
1077
0
    _field_name_to_index.clear();
1078
0
    _field_id_to_index.clear();
1079
0
    _num_columns = 0;
1080
0
    _num_variant_columns = 0;
1081
0
    _num_null_columns = 0;
1082
0
    _num_key_columns = 0;
1083
0
    _cols.clear();
1084
0
}
1085
1086
0
void TabletSchema::update_index_info_from(const TabletSchema& tablet_schema) {
1087
0
    for (auto& col : _cols) {
1088
0
        if (col->unique_id() < 0) {
1089
0
            continue;
1090
0
        }
1091
0
        const auto iter = tablet_schema._field_id_to_index.find(col->unique_id());
1092
0
        if (iter == tablet_schema._field_id_to_index.end()) {
1093
0
            continue;
1094
0
        }
1095
0
        auto col_idx = iter->second;
1096
0
        if (col_idx < 0 || col_idx >= tablet_schema._cols.size()) {
1097
0
            continue;
1098
0
        }
1099
0
        col->set_is_bf_column(tablet_schema._cols[col_idx]->is_bf_column());
1100
0
        col->set_has_bitmap_index(tablet_schema._cols[col_idx]->has_bitmap_index());
1101
0
    }
1102
0
}
1103
1104
3.07k
std::string TabletSchema::to_key() const {
1105
3.07k
    TabletSchemaPB pb;
1106
3.07k
    to_schema_pb(&pb);
1107
3.07k
    return TabletSchema::deterministic_string_serialize(pb);
1108
3.07k
}
1109
1110
void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version,
1111
                                               const OlapTableIndexSchema* index,
1112
0
                                               const TabletSchema& ori_tablet_schema) {
1113
    // copy from ori_tablet_schema
1114
0
    _keys_type = ori_tablet_schema.keys_type();
1115
0
    _num_short_key_columns = ori_tablet_schema.num_short_key_columns();
1116
0
    _num_rows_per_row_block = ori_tablet_schema.num_rows_per_row_block();
1117
0
    _compress_kind = ori_tablet_schema.compress_kind();
1118
1119
    // todo(yixiu): unique_id
1120
0
    _next_column_unique_id = ori_tablet_schema.next_column_unique_id();
1121
0
    _is_in_memory = ori_tablet_schema.is_in_memory();
1122
0
    _disable_auto_compaction = ori_tablet_schema.disable_auto_compaction();
1123
0
    _enable_single_replica_compaction = ori_tablet_schema.enable_single_replica_compaction();
1124
0
    _skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load();
1125
0
    _sort_type = ori_tablet_schema.sort_type();
1126
0
    _sort_col_num = ori_tablet_schema.sort_col_num();
1127
0
    _row_store_page_size = ori_tablet_schema.row_store_page_size();
1128
0
    _storage_page_size = ori_tablet_schema.storage_page_size();
1129
0
    _variant_enable_flatten_nested = ori_tablet_schema.variant_flatten_nested();
1130
1131
    // copy from table_schema_param
1132
0
    _schema_version = version;
1133
0
    _num_columns = 0;
1134
0
    _num_variant_columns = 0;
1135
0
    _num_key_columns = 0;
1136
0
    _num_null_columns = 0;
1137
0
    bool has_bf_columns = false;
1138
0
    _cols.clear();
1139
0
    _indexes.clear();
1140
0
    _field_name_to_index.clear();
1141
0
    _field_id_to_index.clear();
1142
0
    _delete_sign_idx = -1;
1143
0
    _sequence_col_idx = -1;
1144
0
    _version_col_idx = -1;
1145
0
    _cluster_key_idxes.clear();
1146
0
    for (const auto& i : ori_tablet_schema._cluster_key_idxes) {
1147
0
        _cluster_key_idxes.push_back(i);
1148
0
    }
1149
0
    for (auto& column : index->columns) {
1150
0
        if (column->is_key()) {
1151
0
            _num_key_columns++;
1152
0
        }
1153
0
        if (column->is_nullable()) {
1154
0
            _num_null_columns++;
1155
0
        }
1156
0
        if (column->is_bf_column()) {
1157
0
            has_bf_columns = true;
1158
0
        }
1159
0
        if (column->is_variant_type()) {
1160
0
            ++_num_variant_columns;
1161
0
        }
1162
0
        if (UNLIKELY(column->name() == DELETE_SIGN)) {
1163
0
            _delete_sign_idx = _num_columns;
1164
0
        } else if (UNLIKELY(column->name() == SEQUENCE_COL)) {
1165
0
            _sequence_col_idx = _num_columns;
1166
0
        } else if (UNLIKELY(column->name() == VERSION_COL)) {
1167
0
            _version_col_idx = _num_columns;
1168
0
        }
1169
0
        _cols.emplace_back(std::make_shared<TabletColumn>(*column));
1170
0
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1171
0
        _field_id_to_index[_cols.back()->unique_id()] = _num_columns;
1172
0
        _num_columns++;
1173
0
    }
1174
1175
0
    for (auto& i : index->indexes) {
1176
0
        _indexes.emplace_back(*i);
1177
0
    }
1178
1179
0
    if (has_bf_columns) {
1180
0
        _has_bf_fpp = true;
1181
0
        _bf_fpp = ori_tablet_schema.bloom_filter_fpp();
1182
0
    } else {
1183
0
        _has_bf_fpp = false;
1184
0
        _bf_fpp = BLOOM_FILTER_DEFAULT_FPP;
1185
0
    }
1186
0
}
1187
1188
149
void TabletSchema::merge_dropped_columns(const TabletSchema& src_schema) {
1189
    // If they are the same tablet schema object, then just return
1190
149
    if (this == &src_schema) {
1191
0
        return;
1192
0
    }
1193
2.90k
    for (const auto& src_col : src_schema.columns()) {
1194
2.90k
        if (_field_id_to_index.find(src_col->unique_id()) == _field_id_to_index.end()) {
1195
0
            CHECK(!src_col->is_key())
1196
0
                    << src_col->name() << " is key column, should not be dropped.";
1197
0
            ColumnPB src_col_pb;
1198
            // There are some pointer in tablet column, not sure the reference relation, so
1199
            // that deep copy it.
1200
0
            src_col->to_schema_pb(&src_col_pb);
1201
0
            TabletColumn new_col(src_col_pb);
1202
0
            append_column(new_col, TabletSchema::ColumnType::DROPPED);
1203
0
        }
1204
2.90k
    }
1205
149
}
1206
1207
0
TabletSchemaSPtr TabletSchema::copy_without_variant_extracted_columns() {
1208
0
    TabletSchemaSPtr copy = std::make_shared<TabletSchema>();
1209
0
    TabletSchemaPB tablet_schema_pb;
1210
0
    this->to_schema_pb(&tablet_schema_pb);
1211
0
    copy->init_from_pb(tablet_schema_pb, true /*ignore extracted_columns*/);
1212
0
    return copy;
1213
0
}
1214
1215
// Dropped column is in _field_id_to_index but not in _field_name_to_index
1216
// Could refer to append_column method
1217
6.16k
bool TabletSchema::is_dropped_column(const TabletColumn& col) const {
1218
6.16k
    CHECK(_field_id_to_index.find(col.unique_id()) != _field_id_to_index.end())
1219
0
            << "could not find col with unique id = " << col.unique_id()
1220
0
            << " and name = " << col.name() << " table_id=" << _table_id;
1221
6.16k
    auto it = _field_name_to_index.find(StringRef {col.name()});
1222
6.16k
    return it == _field_name_to_index.end() || _cols[it->second]->unique_id() != col.unique_id();
1223
6.16k
}
1224
1225
0
void TabletSchema::copy_extracted_columns(const TabletSchema& src_schema) {
1226
0
    std::unordered_set<int32_t> variant_columns;
1227
0
    for (const auto& col : columns()) {
1228
0
        if (col->is_variant_type()) {
1229
0
            variant_columns.insert(col->unique_id());
1230
0
        }
1231
0
    }
1232
0
    for (const TabletColumnPtr& col : src_schema.columns()) {
1233
0
        if (col->is_extracted_column() && variant_columns.contains(col->parent_unique_id())) {
1234
0
            ColumnPB col_pb;
1235
0
            col->to_schema_pb(&col_pb);
1236
0
            TabletColumn new_col(col_pb);
1237
0
            append_column(new_col, ColumnType::VARIANT);
1238
0
        }
1239
0
    }
1240
0
}
1241
1242
0
void TabletSchema::reserve_extracted_columns() {
1243
0
    for (auto it = _cols.begin(); it != _cols.end();) {
1244
0
        if (!(*it)->is_extracted_column()) {
1245
0
            it = _cols.erase(it);
1246
0
        } else {
1247
0
            ++it;
1248
0
        }
1249
0
    }
1250
0
}
1251
1252
5.28k
void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const {
1253
5.28k
    for (const auto& i : _cluster_key_idxes) {
1254
30
        tablet_schema_pb->add_cluster_key_idxes(i);
1255
30
    }
1256
5.28k
    tablet_schema_pb->set_keys_type(_keys_type);
1257
31.0k
    for (const auto& col : _cols) {
1258
31.0k
        ColumnPB* column = tablet_schema_pb->add_column();
1259
31.0k
        col->to_schema_pb(column);
1260
31.0k
    }
1261
10.9k
    for (const auto& index : _indexes) {
1262
10.9k
        auto* index_pb = tablet_schema_pb->add_index();
1263
10.9k
        index.to_schema_pb(index_pb);
1264
10.9k
    }
1265
5.28k
    tablet_schema_pb->set_num_short_key_columns(_num_short_key_columns);
1266
5.28k
    tablet_schema_pb->set_num_rows_per_row_block(_num_rows_per_row_block);
1267
5.28k
    tablet_schema_pb->set_compress_kind(_compress_kind);
1268
5.28k
    if (_has_bf_fpp) {
1269
0
        tablet_schema_pb->set_bf_fpp(_bf_fpp);
1270
0
    }
1271
5.28k
    tablet_schema_pb->set_next_column_unique_id(_next_column_unique_id);
1272
5.28k
    tablet_schema_pb->set_is_in_memory(_is_in_memory);
1273
5.28k
    tablet_schema_pb->set_disable_auto_compaction(_disable_auto_compaction);
1274
5.28k
    tablet_schema_pb->set_enable_single_replica_compaction(_enable_single_replica_compaction);
1275
5.28k
    tablet_schema_pb->set_store_row_column(_store_row_column);
1276
5.28k
    tablet_schema_pb->set_skip_write_index_on_load(_skip_write_index_on_load);
1277
5.28k
    tablet_schema_pb->set_delete_sign_idx(_delete_sign_idx);
1278
5.28k
    tablet_schema_pb->set_sequence_col_idx(_sequence_col_idx);
1279
5.28k
    tablet_schema_pb->set_sort_type(_sort_type);
1280
5.28k
    tablet_schema_pb->set_sort_col_num(_sort_col_num);
1281
5.28k
    tablet_schema_pb->set_schema_version(_schema_version);
1282
5.28k
    tablet_schema_pb->set_compression_type(_compression_type);
1283
5.28k
    tablet_schema_pb->set_row_store_page_size(_row_store_page_size);
1284
5.28k
    tablet_schema_pb->set_storage_page_size(_storage_page_size);
1285
5.28k
    tablet_schema_pb->set_version_col_idx(_version_col_idx);
1286
5.28k
    tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format);
1287
5.28k
    tablet_schema_pb->mutable_row_store_column_unique_ids()->Assign(
1288
5.28k
            _row_store_column_unique_ids.begin(), _row_store_column_unique_ids.end());
1289
5.28k
    tablet_schema_pb->set_variant_enable_flatten_nested(_variant_enable_flatten_nested);
1290
5.28k
}
1291
1292
0
size_t TabletSchema::row_size() const {
1293
0
    size_t size = 0;
1294
0
    for (const auto& column : _cols) {
1295
0
        size += column->length();
1296
0
    }
1297
0
    size += (_num_columns + 7) / 8;
1298
1299
0
    return size;
1300
0
}
1301
1302
1.21k
int32_t TabletSchema::field_index(const std::string& field_name) const {
1303
1.21k
    const auto& found = _field_name_to_index.find(StringRef(field_name));
1304
1.21k
    return (found == _field_name_to_index.end()) ? -1 : found->second;
1305
1.21k
}
1306
1307
0
int32_t TabletSchema::field_index(const vectorized::PathInData& path) const {
1308
0
    const auto& found = _field_path_to_index.find(vectorized::PathInDataRef(&path));
1309
0
    return (found == _field_path_to_index.end()) ? -1 : found->second;
1310
0
}
1311
1312
180
int32_t TabletSchema::field_index(int32_t col_unique_id) const {
1313
180
    const auto& found = _field_id_to_index.find(col_unique_id);
1314
180
    return (found == _field_id_to_index.end()) ? -1 : found->second;
1315
180
}
1316
1317
22.4k
const std::vector<TabletColumnPtr>& TabletSchema::columns() const {
1318
22.4k
    return _cols;
1319
22.4k
}
1320
1321
0
const std::vector<TabletColumnPtr>& TabletColumn::sparse_columns() const {
1322
0
    return _sparse_cols;
1323
0
}
1324
1325
155k
const TabletColumn& TabletSchema::column(size_t ordinal) const {
1326
155k
    DCHECK(ordinal < _num_columns) << "ordinal:" << ordinal << ", _num_columns:" << _num_columns;
1327
155k
    return *_cols[ordinal];
1328
155k
}
1329
1330
0
const TabletColumn& TabletColumn::sparse_column_at(size_t ordinal) const {
1331
0
    DCHECK(ordinal < _sparse_cols.size())
1332
0
            << "ordinal:" << ordinal << ", _num_columns:" << _sparse_cols.size();
1333
0
    return *_sparse_cols[ordinal];
1334
0
}
1335
1336
928
const TabletColumn& TabletSchema::column_by_uid(int32_t col_unique_id) const {
1337
928
    return *_cols.at(_field_id_to_index.at(col_unique_id));
1338
928
}
1339
1340
0
TabletColumn& TabletSchema::mutable_column_by_uid(int32_t col_unique_id) {
1341
0
    return *_cols.at(_field_id_to_index.at(col_unique_id));
1342
0
}
1343
1344
8
TabletColumn& TabletSchema::mutable_column(size_t ordinal) {
1345
8
    return *_cols.at(ordinal);
1346
8
}
1347
1348
0
void TabletSchema::update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>& tindexes) {
1349
0
    std::vector<TabletIndex> indexes;
1350
0
    for (auto& tindex : tindexes) {
1351
0
        TabletIndex index;
1352
0
        index.init_from_thrift(tindex, *this);
1353
0
        indexes.emplace_back(std::move(index));
1354
0
    }
1355
0
    _indexes = std::move(indexes);
1356
0
}
1357
1358
0
bool TabletSchema::exist_column(const std::string& field_name) const {
1359
0
    return _field_name_to_index.contains(StringRef {field_name});
1360
0
}
1361
1362
414
bool TabletSchema::has_column_unique_id(int32_t col_unique_id) const {
1363
414
    return _field_id_to_index.contains(col_unique_id);
1364
414
}
1365
1366
0
Status TabletSchema::have_column(const std::string& field_name) const {
1367
0
    if (!_field_name_to_index.contains(StringRef(field_name))) {
1368
0
        return Status::Error<ErrorCode::INTERNAL_ERROR>(
1369
0
                "Not found field_name, field_name:{}, schema:{}", field_name,
1370
0
                get_all_field_names());
1371
0
    }
1372
0
    return Status::OK();
1373
0
}
1374
1375
172
Result<const TabletColumn*> TabletSchema::column(const std::string& field_name) const {
1376
172
    auto it = _field_name_to_index.find(StringRef {field_name});
1377
172
    if (it == _field_name_to_index.end()) {
1378
0
        DCHECK(false) << "field_name=" << field_name << ", table_id=" << _table_id
1379
0
                      << ", field_name_to_index=" << get_all_field_names();
1380
0
        return ResultError(
1381
0
                Status::InternalError("column not found, name={}, table_id={}, schema_version={}",
1382
0
                                      field_name, _table_id, _schema_version));
1383
0
    }
1384
172
    return _cols[it->second].get();
1385
172
}
1386
1387
void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema,
1388
0
                                         const std::vector<TColumn>& t_columns) {
1389
0
    copy_from(tablet_schema);
1390
0
    if (!t_columns.empty() && t_columns[0].col_unique_id >= 0) {
1391
0
        clear_columns();
1392
0
        for (const auto& column : t_columns) {
1393
0
            append_column(TabletColumn(column));
1394
0
        }
1395
0
    }
1396
0
}
1397
1398
55
bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const {
1399
72
    for (size_t i = 0; i < _indexes.size(); i++) {
1400
37
        if (_indexes[i].index_type() == IndexType::INVERTED && _indexes[i].index_id() == index_id) {
1401
20
            return true;
1402
20
        }
1403
37
    }
1404
35
    return false;
1405
55
}
1406
1407
const TabletIndex* TabletSchema::inverted_index(int32_t col_unique_id,
1408
29.3k
                                                const std::string& suffix_path) const {
1409
29.3k
    const std::string escaped_suffix = escape_for_path_name(suffix_path);
1410
162k
    for (size_t i = 0; i < _indexes.size(); i++) {
1411
141k
        if (_indexes[i].index_type() == IndexType::INVERTED) {
1412
141k
            for (int32_t id : _indexes[i].col_unique_ids()) {
1413
141k
                if (id == col_unique_id && _indexes[i].get_index_suffix() == escaped_suffix) {
1414
8.50k
                    return &(_indexes[i]);
1415
8.50k
                }
1416
141k
            }
1417
141k
        }
1418
141k
    }
1419
20.8k
    return nullptr;
1420
29.3k
}
1421
1422
13.8k
const TabletIndex* TabletSchema::inverted_index(const TabletColumn& col) const {
1423
    // Some columns(Float, Double, JSONB ...) from the variant do not support inverted index
1424
13.8k
    if (!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) {
1425
2
        return nullptr;
1426
2
    }
1427
    // TODO use more efficient impl
1428
    // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants
1429
13.8k
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
1430
13.8k
    return inverted_index(col_unique_id, escape_for_path_name(col.suffix_path()));
1431
13.8k
}
1432
1433
0
bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const {
1434
    // TODO use more efficient impl
1435
0
    for (size_t i = 0; i < _indexes.size(); i++) {
1436
0
        if (_indexes[i].index_type() == IndexType::NGRAM_BF) {
1437
0
            for (int32_t id : _indexes[i].col_unique_ids()) {
1438
0
                if (id == col_unique_id) {
1439
0
                    return true;
1440
0
                }
1441
0
            }
1442
0
        }
1443
0
    }
1444
1445
0
    return false;
1446
0
}
1447
1448
13.1k
const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id) const {
1449
    // TODO use more efficient impl
1450
87.5k
    for (size_t i = 0; i < _indexes.size(); i++) {
1451
74.4k
        if (_indexes[i].index_type() == IndexType::NGRAM_BF) {
1452
0
            for (int32_t id : _indexes[i].col_unique_ids()) {
1453
0
                if (id == col_unique_id) {
1454
0
                    return &(_indexes[i]);
1455
0
                }
1456
0
            }
1457
0
        }
1458
74.4k
    }
1459
13.1k
    return nullptr;
1460
13.1k
}
1461
1462
vectorized::Block TabletSchema::create_block(
1463
        const std::vector<uint32_t>& return_columns,
1464
716
        const std::unordered_set<uint32_t>* tablet_columns_need_convert_null) const {
1465
716
    vectorized::Block block;
1466
2.83k
    for (int i = 0; i < return_columns.size(); ++i) {
1467
2.11k
        const auto& col = *_cols[return_columns[i]];
1468
2.11k
        bool is_nullable = (tablet_columns_need_convert_null != nullptr &&
1469
2.11k
                            tablet_columns_need_convert_null->find(return_columns[i]) !=
1470
0
                                    tablet_columns_need_convert_null->end());
1471
2.11k
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col, is_nullable);
1472
2.11k
        auto column = data_type->create_column();
1473
2.11k
        block.insert({std::move(column), data_type, col.name()});
1474
2.11k
    }
1475
716
    return block;
1476
716
}
1477
1478
1.94k
vectorized::Block TabletSchema::create_block(bool ignore_dropped_col) const {
1479
1.94k
    vectorized::Block block;
1480
6.16k
    for (const auto& col : _cols) {
1481
6.16k
        if (ignore_dropped_col && is_dropped_column(*col)) {
1482
0
            continue;
1483
0
        }
1484
6.16k
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(*col);
1485
6.16k
        block.insert({data_type->create_column(), data_type, col->name()});
1486
6.16k
    }
1487
1.94k
    return block;
1488
1.94k
}
1489
1490
0
vectorized::Block TabletSchema::create_block_by_cids(const std::vector<uint32_t>& cids) const {
1491
0
    vectorized::Block block;
1492
0
    for (const auto& cid : cids) {
1493
0
        const auto& col = *_cols[cid];
1494
0
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col);
1495
0
        block.insert({data_type->create_column(), data_type, col.name()});
1496
0
    }
1497
0
    return block;
1498
0
}
1499
1500
0
bool operator==(const TabletColumn& a, const TabletColumn& b) {
1501
0
    if (a._unique_id != b._unique_id) return false;
1502
0
    if (a._col_name != b._col_name) return false;
1503
0
    if (a._type != b._type) return false;
1504
0
    if (a._is_key != b._is_key) return false;
1505
0
    if (a._aggregation != b._aggregation) return false;
1506
0
    if (a._is_nullable != b._is_nullable) return false;
1507
0
    if (a._has_default_value != b._has_default_value) return false;
1508
0
    if (a._has_default_value) {
1509
0
        if (a._default_value != b._default_value) return false;
1510
0
    }
1511
0
    if (a._is_decimal != b._is_decimal) return false;
1512
0
    if (a._is_decimal) {
1513
0
        if (a._precision != b._precision) return false;
1514
0
        if (a._frac != b._frac) return false;
1515
0
    }
1516
0
    if (a._length != b._length) return false;
1517
0
    if (a._index_length != b._index_length) return false;
1518
0
    if (a._is_bf_column != b._is_bf_column) return false;
1519
0
    if (a._has_bitmap_index != b._has_bitmap_index) return false;
1520
0
    if (a._column_path == nullptr && a._column_path != nullptr) return false;
1521
0
    if (b._column_path == nullptr && a._column_path != nullptr) return false;
1522
0
    if (b._column_path != nullptr && a._column_path != nullptr &&
1523
0
        *a._column_path != *b._column_path)
1524
0
        return false;
1525
0
    return true;
1526
0
}
1527
1528
0
bool operator!=(const TabletColumn& a, const TabletColumn& b) {
1529
0
    return !(a == b);
1530
0
}
1531
1532
1
bool operator==(const TabletSchema& a, const TabletSchema& b) {
1533
1
    if (a._keys_type != b._keys_type) return false;
1534
1
    if (a._cols.size() != b._cols.size()) return false;
1535
1
    for (int i = 0; i < a._cols.size(); ++i) {
1536
0
        if (*a._cols[i] != *b._cols[i]) return false;
1537
0
    }
1538
1
    if (a._num_columns != b._num_columns) return false;
1539
1
    if (a._num_key_columns != b._num_key_columns) return false;
1540
1
    if (a._num_null_columns != b._num_null_columns) return false;
1541
1
    if (a._num_short_key_columns != b._num_short_key_columns) return false;
1542
1
    if (a._num_rows_per_row_block != b._num_rows_per_row_block) return false;
1543
1
    if (a._compress_kind != b._compress_kind) return false;
1544
1
    if (a._next_column_unique_id != b._next_column_unique_id) return false;
1545
1
    if (a._has_bf_fpp != b._has_bf_fpp) return false;
1546
1
    if (a._has_bf_fpp) {
1547
0
        if (std::abs(a._bf_fpp - b._bf_fpp) > 1e-6) return false;
1548
0
    }
1549
1
    if (a._is_in_memory != b._is_in_memory) return false;
1550
1
    if (a._delete_sign_idx != b._delete_sign_idx) return false;
1551
1
    if (a._disable_auto_compaction != b._disable_auto_compaction) return false;
1552
1
    if (a._enable_single_replica_compaction != b._enable_single_replica_compaction) return false;
1553
1
    if (a._store_row_column != b._store_row_column) return false;
1554
1
    if (a._row_store_page_size != b._row_store_page_size) return false;
1555
1
    if (a._storage_page_size != b._storage_page_size) return false;
1556
1
    if (a._skip_write_index_on_load != b._skip_write_index_on_load) return false;
1557
1
    if (a._variant_enable_flatten_nested != b._variant_enable_flatten_nested) return false;
1558
1
    return true;
1559
1
}
1560
1561
1
bool operator!=(const TabletSchema& a, const TabletSchema& b) {
1562
1
    return !(a == b);
1563
1
}
1564
1565
} // namespace doris