Coverage Report

Created: 2026-01-05 14:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/olap/tablet_schema.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "olap/tablet_schema.h"
19
20
#include <gen_cpp/Descriptors_types.h>
21
#include <gen_cpp/olap_file.pb.h>
22
#include <glog/logging.h>
23
#include <google/protobuf/io/coded_stream.h>
24
#include <google/protobuf/io/zero_copy_stream.h>
25
#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
26
27
#include <algorithm>
28
#include <cctype>
29
// IWYU pragma: no_include <bits/std_abs.h>
30
#include <cmath> // IWYU pragma: keep
31
#include <memory>
32
#include <ostream>
33
#include <vector>
34
35
#include "common/compiler_util.h" // IWYU pragma: keep
36
#include "common/consts.h"
37
#include "common/status.h"
38
#include "exec/tablet_info.h"
39
#include "olap/inverted_index_parser.h"
40
#include "olap/olap_common.h"
41
#include "olap/olap_define.h"
42
#include "olap/tablet_column_object_pool.h"
43
#include "olap/types.h"
44
#include "olap/utils.h"
45
#include "tablet_meta.h"
46
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
47
#include "vec/aggregate_functions/aggregate_function_state_union.h"
48
#include "vec/columns/column_nothing.h"
49
#include "vec/common/hex.h"
50
#include "vec/common/string_ref.h"
51
#include "vec/core/block.h"
52
#include "vec/data_types/data_type.h"
53
#include "vec/data_types/data_type_factory.hpp"
54
#include "vec/json/path_in_data.h"
55
56
namespace doris {
57
#include "common/compile_check_begin.h"
58
70.4k
FieldType TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) {
59
70.4k
    switch (primitiveType) {
60
0
    case PrimitiveType::INVALID_TYPE:
61
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
62
0
    case PrimitiveType::TYPE_NULL:
63
0
        return FieldType::OLAP_FIELD_TYPE_NONE;
64
0
    case PrimitiveType::TYPE_BOOLEAN:
65
0
        return FieldType::OLAP_FIELD_TYPE_BOOL;
66
70.4k
    case PrimitiveType::TYPE_TINYINT:
67
70.4k
        return FieldType::OLAP_FIELD_TYPE_TINYINT;
68
69
    case PrimitiveType::TYPE_SMALLINT:
69
69
        return FieldType::OLAP_FIELD_TYPE_SMALLINT;
70
7
    case PrimitiveType::TYPE_INT:
71
7
        return FieldType::OLAP_FIELD_TYPE_INT;
72
6
    case PrimitiveType::TYPE_BIGINT:
73
6
        return FieldType::OLAP_FIELD_TYPE_BIGINT;
74
0
    case PrimitiveType::TYPE_LARGEINT:
75
0
        return FieldType::OLAP_FIELD_TYPE_LARGEINT;
76
0
    case PrimitiveType::TYPE_FLOAT:
77
0
        return FieldType::OLAP_FIELD_TYPE_FLOAT;
78
12
    case PrimitiveType::TYPE_DOUBLE:
79
12
        return FieldType::OLAP_FIELD_TYPE_DOUBLE;
80
0
    case PrimitiveType::TYPE_VARCHAR:
81
0
        return FieldType::OLAP_FIELD_TYPE_VARCHAR;
82
0
    case PrimitiveType::TYPE_DATE:
83
0
        return FieldType::OLAP_FIELD_TYPE_DATE;
84
0
    case PrimitiveType::TYPE_DATETIME:
85
0
        return FieldType::OLAP_FIELD_TYPE_DATETIME;
86
0
    case PrimitiveType::TYPE_BINARY:
87
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
88
0
    case PrimitiveType::TYPE_CHAR:
89
0
        return FieldType::OLAP_FIELD_TYPE_CHAR;
90
0
    case PrimitiveType::TYPE_STRUCT:
91
0
        return FieldType::OLAP_FIELD_TYPE_STRUCT;
92
0
    case PrimitiveType::TYPE_ARRAY:
93
0
        return FieldType::OLAP_FIELD_TYPE_ARRAY;
94
0
    case PrimitiveType::TYPE_MAP:
95
0
        return FieldType::OLAP_FIELD_TYPE_MAP;
96
0
    case PrimitiveType::TYPE_HLL:
97
0
        return FieldType::OLAP_FIELD_TYPE_HLL;
98
0
    case PrimitiveType::TYPE_DECIMALV2:
99
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
100
0
    case PrimitiveType::TYPE_BITMAP:
101
0
        return FieldType::OLAP_FIELD_TYPE_BITMAP;
102
0
    case PrimitiveType::TYPE_STRING:
103
0
        return FieldType::OLAP_FIELD_TYPE_STRING;
104
0
    case PrimitiveType::TYPE_QUANTILE_STATE:
105
0
        return FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
106
0
    case PrimitiveType::TYPE_DATEV2:
107
0
        return FieldType::OLAP_FIELD_TYPE_DATEV2;
108
0
    case PrimitiveType::TYPE_DATETIMEV2:
109
0
        return FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
110
0
    case PrimitiveType::TYPE_TIMESTAMPTZ:
111
0
        return FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ;
112
0
    case PrimitiveType::TYPE_TIMEV2:
113
0
        return FieldType::OLAP_FIELD_TYPE_TIMEV2;
114
0
    case PrimitiveType::TYPE_DECIMAL32:
115
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL32;
116
0
    case PrimitiveType::TYPE_DECIMAL64:
117
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL64;
118
0
    case PrimitiveType::TYPE_DECIMAL128I:
119
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
120
0
    case PrimitiveType::TYPE_DECIMAL256:
121
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL256;
122
0
    case PrimitiveType::TYPE_JSONB:
123
0
        return FieldType::OLAP_FIELD_TYPE_JSONB;
124
0
    case PrimitiveType::TYPE_VARIANT:
125
0
        return FieldType::OLAP_FIELD_TYPE_VARIANT;
126
0
    case PrimitiveType::TYPE_IPV4:
127
0
        return FieldType::OLAP_FIELD_TYPE_IPV4;
128
0
    case PrimitiveType::TYPE_IPV6:
129
0
        return FieldType::OLAP_FIELD_TYPE_IPV6;
130
0
    case PrimitiveType::TYPE_LAMBDA_FUNCTION:
131
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
132
0
    case PrimitiveType::TYPE_AGG_STATE:
133
0
        return FieldType::OLAP_FIELD_TYPE_AGG_STATE;
134
0
    default:
135
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
136
70.4k
    }
137
70.4k
}
138
139
30.5k
PrimitiveType TabletColumn::get_primitive_type_by_field_type(FieldType type) {
140
30.5k
    static const PrimitiveType mapping[] = {
141
30.5k
            /*  0 */ PrimitiveType::INVALID_TYPE,
142
30.5k
            /*  1 OLAP_FIELD_TYPE_TINYINT           */ PrimitiveType::TYPE_TINYINT,
143
30.5k
            /*  2 OLAP_FIELD_TYPE_UNSIGNED_TINYINT  */ PrimitiveType::INVALID_TYPE,
144
30.5k
            /*  3 OLAP_FIELD_TYPE_SMALLINT          */ PrimitiveType::TYPE_SMALLINT,
145
30.5k
            /*  4 OLAP_FIELD_TYPE_UNSIGNED_SMALLINT */ PrimitiveType::INVALID_TYPE,
146
30.5k
            /*  5 OLAP_FIELD_TYPE_INT               */ PrimitiveType::TYPE_INT,
147
30.5k
            /*  6 OLAP_FIELD_TYPE_UNSIGNED_INT      */ PrimitiveType::INVALID_TYPE,
148
30.5k
            /*  7 OLAP_FIELD_TYPE_BIGINT            */ PrimitiveType::TYPE_BIGINT,
149
30.5k
            /*  8 OLAP_FIELD_TYPE_UNSIGNED_BIGINT   */ PrimitiveType::INVALID_TYPE,
150
30.5k
            /*  9 OLAP_FIELD_TYPE_LARGEINT          */ PrimitiveType::TYPE_LARGEINT,
151
30.5k
            /* 10 OLAP_FIELD_TYPE_FLOAT             */ PrimitiveType::TYPE_FLOAT,
152
30.5k
            /* 11 OLAP_FIELD_TYPE_DOUBLE            */ PrimitiveType::TYPE_DOUBLE,
153
30.5k
            /* 12 OLAP_FIELD_TYPE_DISCRETE_DOUBLE   */ PrimitiveType::INVALID_TYPE,
154
30.5k
            /* 13 OLAP_FIELD_TYPE_CHAR              */ PrimitiveType::TYPE_CHAR,
155
30.5k
            /* 14 OLAP_FIELD_TYPE_DATE              */ PrimitiveType::TYPE_DATE,
156
30.5k
            /* 15 OLAP_FIELD_TYPE_DATETIME          */ PrimitiveType::TYPE_DATETIME,
157
30.5k
            /* 16 OLAP_FIELD_TYPE_DECIMAL           */ PrimitiveType::INVALID_TYPE,
158
30.5k
            /* 17 OLAP_FIELD_TYPE_VARCHAR           */ PrimitiveType::TYPE_VARCHAR,
159
30.5k
            /* 18 OLAP_FIELD_TYPE_STRUCT            */ PrimitiveType::TYPE_STRUCT,
160
30.5k
            /* 19 OLAP_FIELD_TYPE_ARRAY             */ PrimitiveType::TYPE_ARRAY,
161
30.5k
            /* 20 OLAP_FIELD_TYPE_MAP               */ PrimitiveType::TYPE_MAP,
162
30.5k
            /* 21 OLAP_FIELD_TYPE_UNKNOWN           */ PrimitiveType::INVALID_TYPE,
163
30.5k
            /* 22 OLAP_FIELD_TYPE_NONE              */ PrimitiveType::TYPE_NULL,
164
30.5k
            /* 23 OLAP_FIELD_TYPE_HLL               */ PrimitiveType::TYPE_HLL,
165
30.5k
            /* 24 OLAP_FIELD_TYPE_BOOL              */ PrimitiveType::TYPE_BOOLEAN,
166
30.5k
            /* 25 OLAP_FIELD_TYPE_BITMAP            */ PrimitiveType::TYPE_BITMAP,
167
30.5k
            /* 26 OLAP_FIELD_TYPE_STRING            */ PrimitiveType::TYPE_STRING,
168
30.5k
            /* 27 OLAP_FIELD_TYPE_QUANTILE_STATE    */ PrimitiveType::TYPE_QUANTILE_STATE,
169
30.5k
            /* 28 OLAP_FIELD_TYPE_DATEV2            */ PrimitiveType::TYPE_DATEV2,
170
30.5k
            /* 29 OLAP_FIELD_TYPE_DATETIMEV2        */ PrimitiveType::TYPE_DATETIMEV2,
171
30.5k
            /* 30 OLAP_FIELD_TYPE_TIMEV2            */ PrimitiveType::TYPE_TIMEV2,
172
30.5k
            /* 31 OLAP_FIELD_TYPE_DECIMAL32         */ PrimitiveType::TYPE_DECIMAL32,
173
30.5k
            /* 32 OLAP_FIELD_TYPE_DECIMAL64         */ PrimitiveType::TYPE_DECIMAL64,
174
30.5k
            /* 33 OLAP_FIELD_TYPE_DECIMAL128I       */ PrimitiveType::TYPE_DECIMAL128I,
175
30.5k
            /* 34 OLAP_FIELD_TYPE_JSONB             */ PrimitiveType::TYPE_JSONB,
176
30.5k
            /* 35 OLAP_FIELD_TYPE_VARIANT           */ PrimitiveType::TYPE_VARIANT,
177
30.5k
            /* 36 OLAP_FIELD_TYPE_AGG_STATE         */ PrimitiveType::TYPE_AGG_STATE,
178
30.5k
            /* 37 OLAP_FIELD_TYPE_DECIMAL256        */ PrimitiveType::TYPE_DECIMAL256,
179
30.5k
            /* 38 OLAP_FIELD_TYPE_IPV4              */ PrimitiveType::TYPE_IPV4,
180
30.5k
            /* 39 OLAP_FIELD_TYPE_IPV6              */ PrimitiveType::TYPE_IPV6,
181
30.5k
            /* 40 OLAP_FIELD_TYPE_TIMESTAMPTZ       */ PrimitiveType::TYPE_TIMESTAMPTZ,
182
30.5k
    };
183
184
30.5k
    int idx = static_cast<int>(type);
185
30.5k
    return mapping[idx];
186
30.5k
}
187
188
12.4k
FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) {
189
12.4k
    std::string upper_type_str = type_str;
190
12.4k
    std::transform(type_str.begin(), type_str.end(), upper_type_str.begin(),
191
70.0k
                   [](auto c) { return std::toupper(c); });
192
12.4k
    FieldType type;
193
194
12.4k
    if (0 == upper_type_str.compare("TINYINT")) {
195
435
        type = FieldType::OLAP_FIELD_TYPE_TINYINT;
196
12.0k
    } else if (0 == upper_type_str.compare("SMALLINT")) {
197
885
        type = FieldType::OLAP_FIELD_TYPE_SMALLINT;
198
11.1k
    } else if (0 == upper_type_str.compare("INT")) {
199
2.58k
        type = FieldType::OLAP_FIELD_TYPE_INT;
200
8.57k
    } else if (0 == upper_type_str.compare("BIGINT")) {
201
188
        type = FieldType::OLAP_FIELD_TYPE_BIGINT;
202
8.38k
    } else if (0 == upper_type_str.compare("LARGEINT")) {
203
140
        type = FieldType::OLAP_FIELD_TYPE_LARGEINT;
204
8.24k
    } else if (0 == upper_type_str.compare("UNSIGNED_TINYINT")) {
205
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT;
206
8.24k
    } else if (0 == upper_type_str.compare("UNSIGNED_SMALLINT")) {
207
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT;
208
8.24k
    } else if (0 == upper_type_str.compare("UNSIGNED_INT")) {
209
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT;
210
8.24k
    } else if (0 == upper_type_str.compare("UNSIGNED_BIGINT")) {
211
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT;
212
8.24k
    } else if (0 == upper_type_str.compare("IPV4")) {
213
17
        type = FieldType::OLAP_FIELD_TYPE_IPV4;
214
8.23k
    } else if (0 == upper_type_str.compare("IPV6")) {
215
17
        type = FieldType::OLAP_FIELD_TYPE_IPV6;
216
8.21k
    } else if (0 == upper_type_str.compare("FLOAT")) {
217
4
        type = FieldType::OLAP_FIELD_TYPE_FLOAT;
218
8.21k
    } else if (0 == upper_type_str.compare("DISCRETE_DOUBLE")) {
219
0
        type = FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE;
220
8.21k
    } else if (0 == upper_type_str.compare("DOUBLE")) {
221
0
        type = FieldType::OLAP_FIELD_TYPE_DOUBLE;
222
8.21k
    } else if (0 == upper_type_str.compare("CHAR")) {
223
141
        type = FieldType::OLAP_FIELD_TYPE_CHAR;
224
8.06k
    } else if (0 == upper_type_str.compare("DATE")) {
225
143
        type = FieldType::OLAP_FIELD_TYPE_DATE;
226
7.92k
    } else if (0 == upper_type_str.compare("DATEV2")) {
227
133
        type = FieldType::OLAP_FIELD_TYPE_DATEV2;
228
7.79k
    } else if (0 == upper_type_str.compare("DATETIMEV2")) {
229
0
        type = FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
230
7.79k
    } else if (0 == upper_type_str.compare("DATETIME")) {
231
172
        type = FieldType::OLAP_FIELD_TYPE_DATETIME;
232
7.62k
    } else if (0 == upper_type_str.compare("TIMESTAMPTZ")) {
233
16
        type = FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ;
234
7.60k
    } else if (0 == upper_type_str.compare("DECIMAL32")) {
235
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL32;
236
7.60k
    } else if (0 == upper_type_str.compare("DECIMAL64")) {
237
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL64;
238
7.60k
    } else if (0 == upper_type_str.compare("DECIMAL128I")) {
239
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
240
7.60k
    } else if (0 == upper_type_str.compare("DECIMAL256")) {
241
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL256;
242
7.60k
    } else if (0 == upper_type_str.compare(0, 7, "DECIMAL")) {
243
144
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL;
244
7.46k
    } else if (0 == upper_type_str.compare(0, 7, "VARCHAR")) {
245
219
        type = FieldType::OLAP_FIELD_TYPE_VARCHAR;
246
7.24k
    } else if (0 == upper_type_str.compare("STRING")) {
247
7.17k
        type = FieldType::OLAP_FIELD_TYPE_STRING;
248
7.17k
    } else if (0 == upper_type_str.compare("JSONB")) {
249
0
        type = FieldType::OLAP_FIELD_TYPE_JSONB;
250
72
    } else if (0 == upper_type_str.compare("VARIANT")) {
251
59
        type = FieldType::OLAP_FIELD_TYPE_VARIANT;
252
59
    } else if (0 == upper_type_str.compare("BOOLEAN")) {
253
0
        type = FieldType::OLAP_FIELD_TYPE_BOOL;
254
13
    } else if (0 == upper_type_str.compare(0, 3, "HLL")) {
255
7
        type = FieldType::OLAP_FIELD_TYPE_HLL;
256
7
    } else if (0 == upper_type_str.compare("STRUCT")) {
257
0
        type = FieldType::OLAP_FIELD_TYPE_STRUCT;
258
6
    } else if (0 == upper_type_str.compare("LIST")) {
259
0
        type = FieldType::OLAP_FIELD_TYPE_ARRAY;
260
6
    } else if (0 == upper_type_str.compare("MAP")) {
261
0
        type = FieldType::OLAP_FIELD_TYPE_MAP;
262
6
    } else if (0 == upper_type_str.compare("OBJECT")) {
263
0
        type = FieldType::OLAP_FIELD_TYPE_BITMAP;
264
6
    } else if (0 == upper_type_str.compare("BITMAP")) {
265
0
        type = FieldType::OLAP_FIELD_TYPE_BITMAP;
266
6
    } else if (0 == upper_type_str.compare("ARRAY")) {
267
6
        type = FieldType::OLAP_FIELD_TYPE_ARRAY;
268
6
    } else if (0 == upper_type_str.compare("QUANTILE_STATE")) {
269
0
        type = FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
270
0
    } else if (0 == upper_type_str.compare("AGG_STATE")) {
271
0
        type = FieldType::OLAP_FIELD_TYPE_AGG_STATE;
272
0
    } else {
273
0
        LOG(WARNING) << "invalid type string. [type='" << type_str << "']";
274
0
        type = FieldType::OLAP_FIELD_TYPE_UNKNOWN;
275
0
    }
276
277
12.4k
    return type;
278
12.4k
}
279
280
4.11k
FieldAggregationMethod TabletColumn::get_aggregation_type_by_string(const std::string& str) {
281
4.11k
    std::string upper_str = str;
282
4.11k
    std::transform(str.begin(), str.end(), upper_str.begin(),
283
21.1k
                   [](auto c) { return std::toupper(c); });
284
4.11k
    FieldAggregationMethod aggregation_type;
285
286
4.11k
    if (0 == upper_str.compare("NONE")) {
287
1.77k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE;
288
2.34k
    } else if (0 == upper_str.compare("SUM")) {
289
594
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM;
290
1.75k
    } else if (0 == upper_str.compare("MIN")) {
291
4
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN;
292
1.75k
    } else if (0 == upper_str.compare("MAX")) {
293
4
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX;
294
1.74k
    } else if (0 == upper_str.compare("REPLACE")) {
295
1.73k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE;
296
1.73k
    } else if (0 == upper_str.compare("REPLACE_IF_NOT_NULL")) {
297
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL;
298
7
    } else if (0 == upper_str.compare("HLL_UNION")) {
299
7
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION;
300
7
    } else if (0 == upper_str.compare("BITMAP_UNION")) {
301
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION;
302
0
    } else if (0 == upper_str.compare("QUANTILE_UNION")) {
303
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION;
304
0
    } else if (!upper_str.empty()) {
305
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC;
306
0
    } else {
307
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN;
308
0
    }
309
310
4.11k
    return aggregation_type;
311
4.11k
}
312
313
32.3k
std::string TabletColumn::get_string_by_field_type(FieldType type) {
314
32.3k
    switch (type) {
315
1.68k
    case FieldType::OLAP_FIELD_TYPE_TINYINT:
316
1.68k
        return "TINYINT";
317
318
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT:
319
0
        return "UNSIGNED_TINYINT";
320
321
3.69k
    case FieldType::OLAP_FIELD_TYPE_SMALLINT:
322
3.69k
        return "SMALLINT";
323
324
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT:
325
0
        return "UNSIGNED_SMALLINT";
326
327
9.02k
    case FieldType::OLAP_FIELD_TYPE_INT:
328
9.02k
        return "INT";
329
330
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT:
331
0
        return "UNSIGNED_INT";
332
333
904
    case FieldType::OLAP_FIELD_TYPE_BIGINT:
334
904
        return "BIGINT";
335
336
751
    case FieldType::OLAP_FIELD_TYPE_LARGEINT:
337
751
        return "LARGEINT";
338
339
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT:
340
0
        return "UNSIGNED_BIGINT";
341
342
147
    case FieldType::OLAP_FIELD_TYPE_IPV4:
343
147
        return "IPV4";
344
345
147
    case FieldType::OLAP_FIELD_TYPE_IPV6:
346
147
        return "IPV6";
347
348
10
    case FieldType::OLAP_FIELD_TYPE_FLOAT:
349
10
        return "FLOAT";
350
351
0
    case FieldType::OLAP_FIELD_TYPE_DOUBLE:
352
0
        return "DOUBLE";
353
354
0
    case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE:
355
0
        return "DISCRETE_DOUBLE";
356
357
751
    case FieldType::OLAP_FIELD_TYPE_CHAR:
358
751
        return "CHAR";
359
360
755
    case FieldType::OLAP_FIELD_TYPE_DATE:
361
755
        return "DATE";
362
363
670
    case FieldType::OLAP_FIELD_TYPE_DATEV2:
364
670
        return "DATEV2";
365
366
948
    case FieldType::OLAP_FIELD_TYPE_DATETIME:
367
948
        return "DATETIME";
368
369
0
    case FieldType::OLAP_FIELD_TYPE_DATETIMEV2:
370
0
        return "DATETIMEV2";
371
372
142
    case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
373
142
        return "TIMESTAMPTZ";
374
375
751
    case FieldType::OLAP_FIELD_TYPE_DECIMAL:
376
751
        return "DECIMAL";
377
378
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
379
0
        return "DECIMAL32";
380
381
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
382
0
        return "DECIMAL64";
383
384
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
385
0
        return "DECIMAL128I";
386
387
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
388
0
        return "DECIMAL256";
389
390
1.01k
    case FieldType::OLAP_FIELD_TYPE_VARCHAR:
391
1.01k
        return "VARCHAR";
392
393
0
    case FieldType::OLAP_FIELD_TYPE_JSONB:
394
0
        return "JSONB";
395
396
129
    case FieldType::OLAP_FIELD_TYPE_VARIANT:
397
129
        return "VARIANT";
398
399
10.8k
    case FieldType::OLAP_FIELD_TYPE_STRING:
400
10.8k
        return "STRING";
401
402
0
    case FieldType::OLAP_FIELD_TYPE_BOOL:
403
0
        return "BOOLEAN";
404
405
6
    case FieldType::OLAP_FIELD_TYPE_HLL:
406
6
        return "HLL";
407
408
0
    case FieldType::OLAP_FIELD_TYPE_STRUCT:
409
0
        return "STRUCT";
410
411
14
    case FieldType::OLAP_FIELD_TYPE_ARRAY:
412
14
        return "ARRAY";
413
414
2
    case FieldType::OLAP_FIELD_TYPE_MAP:
415
2
        return "MAP";
416
417
0
    case FieldType::OLAP_FIELD_TYPE_BITMAP:
418
0
        return "OBJECT";
419
0
    case FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE:
420
0
        return "QUANTILE_STATE";
421
0
    case FieldType::OLAP_FIELD_TYPE_AGG_STATE:
422
0
        return "AGG_STATE";
423
0
    default:
424
0
        return "UNKNOWN";
425
32.3k
    }
426
32.3k
}
427
428
1.75k
std::string TabletColumn::get_string_by_aggregation_type(FieldAggregationMethod type) {
429
1.75k
    switch (type) {
430
1.73k
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE:
431
1.73k
        return "NONE";
432
433
9
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM:
434
9
        return "SUM";
435
436
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN:
437
0
        return "MIN";
438
439
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX:
440
0
        return "MAX";
441
442
15
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE:
443
15
        return "REPLACE";
444
445
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL:
446
0
        return "REPLACE_IF_NOT_NULL";
447
448
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION:
449
0
        return "HLL_UNION";
450
451
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION:
452
0
        return "BITMAP_UNION";
453
454
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION:
455
0
        return "QUANTILE_UNION";
456
457
2
    default:
458
2
        return "UNKNOWN";
459
1.75k
    }
460
1.75k
}
461
462
2.22k
uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint32_t string_length) {
463
2.22k
    switch (type) {
464
113
    case TPrimitiveType::TINYINT:
465
113
    case TPrimitiveType::BOOLEAN:
466
113
        return 1;
467
363
    case TPrimitiveType::SMALLINT:
468
363
        return 2;
469
847
    case TPrimitiveType::INT:
470
847
        return 4;
471
120
    case TPrimitiveType::BIGINT:
472
120
        return 8;
473
104
    case TPrimitiveType::LARGEINT:
474
104
        return 16;
475
16
    case TPrimitiveType::IPV4:
476
16
        return 4;
477
16
    case TPrimitiveType::IPV6:
478
16
        return 16;
479
104
    case TPrimitiveType::DATE:
480
104
        return 3;
481
92
    case TPrimitiveType::DATEV2:
482
92
        return 4;
483
112
    case TPrimitiveType::DATETIME:
484
112
        return 8;
485
0
    case TPrimitiveType::DATETIMEV2:
486
15
    case TPrimitiveType::TIMESTAMPTZ:
487
15
        return 8;
488
0
    case TPrimitiveType::FLOAT:
489
0
        return 4;
490
0
    case TPrimitiveType::DOUBLE:
491
0
        return 8;
492
0
    case TPrimitiveType::QUANTILE_STATE:
493
0
    case TPrimitiveType::BITMAP:
494
0
        return 16;
495
104
    case TPrimitiveType::CHAR:
496
104
        return string_length;
497
104
    case TPrimitiveType::VARCHAR:
498
104
    case TPrimitiveType::HLL:
499
104
    case TPrimitiveType::AGG_STATE:
500
104
        return string_length + sizeof(OLAP_VARCHAR_MAX_LENGTH);
501
4
    case TPrimitiveType::STRING:
502
4
    case TPrimitiveType::VARIANT:
503
4
        return string_length + sizeof(OLAP_STRING_MAX_LENGTH);
504
0
    case TPrimitiveType::JSONB:
505
0
        return string_length + sizeof(OLAP_JSONB_MAX_LENGTH);
506
0
    case TPrimitiveType::STRUCT:
507
        // Note that(xy): this is the length of struct type itself,
508
        // the length of its subtypes are not included.
509
0
        return OLAP_STRUCT_MAX_LENGTH;
510
7
    case TPrimitiveType::ARRAY:
511
7
        return OLAP_ARRAY_MAX_LENGTH;
512
0
    case TPrimitiveType::MAP:
513
0
        return OLAP_MAP_MAX_LENGTH;
514
0
    case TPrimitiveType::DECIMAL32:
515
0
        return 4;
516
0
    case TPrimitiveType::DECIMAL64:
517
0
        return 8;
518
0
    case TPrimitiveType::DECIMAL128I:
519
0
        return 16;
520
0
    case TPrimitiveType::DECIMAL256:
521
0
        return 32;
522
104
    case TPrimitiveType::DECIMALV2:
523
104
        return 12; // use 12 bytes in olap engine.
524
0
    default:
525
0
        LOG(WARNING) << "unknown field type. [type=" << type << "]";
526
0
        return 0;
527
2.22k
    }
528
2.22k
}
529
530
9
bool TabletColumn::has_char_type() const {
531
9
    switch (_type) {
532
3
    case FieldType::OLAP_FIELD_TYPE_CHAR: {
533
3
        return true;
534
0
    }
535
4
    case FieldType::OLAP_FIELD_TYPE_ARRAY:
536
4
    case FieldType::OLAP_FIELD_TYPE_MAP:
537
4
    case FieldType::OLAP_FIELD_TYPE_STRUCT: {
538
4
        return std::any_of(_sub_columns.begin(), _sub_columns.end(),
539
4
                           [&](const auto& sub) -> bool { return sub->has_char_type(); });
540
4
    }
541
2
    default:
542
2
        return false;
543
9
    }
544
9
}
545
546
18.4k
TabletColumn::TabletColumn() : _aggregation(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) {}
547
548
37.6k
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType type) {
549
37.6k
    _aggregation = agg;
550
37.6k
    _type = type;
551
37.6k
}
552
553
17
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable) {
554
17
    _aggregation = agg;
555
17
    _type = filed_type;
556
17
    _length = cast_set<int32_t>(get_scalar_type_info(filed_type)->size());
557
17
    _is_nullable = is_nullable;
558
17
}
559
560
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable,
561
356
                           int32_t unique_id, size_t length) {
562
356
    _aggregation = agg;
563
356
    _type = filed_type;
564
356
    _is_nullable = is_nullable;
565
356
    _unique_id = unique_id;
566
356
    _length = cast_set<int32_t>(length);
567
356
}
568
569
0
TabletColumn::TabletColumn(const ColumnPB& column) {
570
0
    init_from_pb(column);
571
0
}
572
573
4
TabletColumn::TabletColumn(const TColumn& column) {
574
4
    init_from_thrift(column);
575
4
}
576
577
5
void TabletColumn::init_from_thrift(const TColumn& tcolumn) {
578
5
    ColumnPB column_pb;
579
5
    TabletMeta::init_column_from_tcolumn(tcolumn.col_unique_id, tcolumn, &column_pb);
580
5
    init_from_pb(column_pb);
581
5
}
582
583
12.3k
void TabletColumn::init_from_pb(const ColumnPB& column) {
584
12.3k
    _unique_id = column.unique_id();
585
12.3k
    _col_name = column.name();
586
12.3k
    _col_name_lower_case = to_lower(_col_name);
587
12.3k
    _type = TabletColumn::get_field_type_by_string(column.type());
588
12.3k
    _is_key = column.is_key();
589
12.3k
    _is_nullable = column.is_nullable();
590
12.3k
    _is_auto_increment = column.is_auto_increment();
591
12.3k
    if (column.has_is_on_update_current_timestamp()) {
592
11.4k
        _is_on_update_current_timestamp = column.is_on_update_current_timestamp();
593
11.4k
    }
594
595
12.3k
    _has_default_value = column.has_default_value();
596
12.3k
    if (_has_default_value) {
597
30
        _default_value = column.default_value();
598
30
    }
599
600
12.3k
    if (column.has_precision()) {
601
4.06k
        _is_decimal = true;
602
4.06k
        _precision = column.precision();
603
8.33k
    } else {
604
8.33k
        _is_decimal = false;
605
8.33k
    }
606
12.3k
    if (column.has_frac()) {
607
4.06k
        _frac = column.frac();
608
4.06k
    }
609
12.3k
    _length = column.length();
610
12.3k
    _index_length = column.index_length();
611
12.3k
    if (column.has_is_bf_column()) {
612
419
        _is_bf_column = column.is_bf_column();
613
11.9k
    } else {
614
11.9k
        _is_bf_column = false;
615
11.9k
    }
616
12.3k
    if (column.has_aggregation()) {
617
4.11k
        _aggregation = get_aggregation_type_by_string(column.aggregation());
618
4.11k
        _aggregation_name = column.aggregation();
619
4.11k
    }
620
621
12.3k
    if (_type == FieldType::OLAP_FIELD_TYPE_AGG_STATE) {
622
0
        _result_is_nullable = column.result_is_nullable();
623
0
        _be_exec_version = column.be_exec_version();
624
0
    }
625
626
12.3k
    if (column.has_visible()) {
627
11.4k
        _visible = column.visible();
628
11.4k
    }
629
12.3k
    if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
630
6
        CHECK(column.children_columns_size() == 1)
631
0
                << "ARRAY type should has 1 children types, but got "
632
0
                << column.children_columns_size();
633
6
    }
634
12.3k
    if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
635
0
        DCHECK(column.children_columns_size() == 2)
636
0
                << "MAP type should has 2 children types, but got "
637
0
                << column.children_columns_size();
638
0
        if (UNLIKELY(column.children_columns_size() != 2)) {
639
0
            LOG(WARNING) << "MAP type should has 2 children types, but got "
640
0
                         << column.children_columns_size();
641
0
        }
642
0
    }
643
12.4k
    for (int i = 0; i < column.children_columns_size(); i++) {
644
10
        TabletColumn child_column;
645
10
        child_column.init_from_pb(column.children_columns(i));
646
10
        add_sub_column(child_column);
647
10
    }
648
12.3k
    if (column.has_column_path_info()) {
649
12
        _column_path = std::make_shared<vectorized::PathInData>();
650
12
        _column_path->from_protobuf(column.column_path_info());
651
12
        _parent_col_unique_id = column.column_path_info().parrent_column_unique_id();
652
12
    }
653
12.3k
    if (is_variant_type() && !column.has_column_path_info()) {
654
        // set path info for variant root column, to prevent from missing
655
47
        _column_path = std::make_shared<vectorized::PathInData>(_col_name_lower_case);
656
        // _parent_col_unique_id = _unique_id;
657
47
    }
658
12.3k
    if (column.has_variant_max_subcolumns_count()) {
659
11.4k
        _variant.max_subcolumns_count = column.variant_max_subcolumns_count();
660
11.4k
    }
661
12.3k
    if (column.has_variant_enable_typed_paths_to_sparse()) {
662
11.4k
        _variant.enable_typed_paths_to_sparse = column.variant_enable_typed_paths_to_sparse();
663
11.4k
    }
664
12.3k
    if (column.has_variant_max_sparse_column_statistics_size()) {
665
11.4k
        _variant.max_sparse_column_statistics_size =
666
11.4k
                column.variant_max_sparse_column_statistics_size();
667
11.4k
    }
668
12.3k
    if (column.has_variant_sparse_hash_shard_count()) {
669
9.21k
        _variant.sparse_hash_shard_count = column.variant_sparse_hash_shard_count();
670
9.21k
    }
671
12.3k
    if (column.has_variant_enable_doc_mode()) {
672
9.21k
        _variant.enable_doc_mode = column.variant_enable_doc_mode();
673
9.21k
    }
674
12.3k
    if (column.has_variant_doc_materialization_min_rows()) {
675
9.21k
        _variant.doc_materialization_min_rows = column.variant_doc_materialization_min_rows();
676
9.21k
    }
677
12.3k
    if (column.has_variant_doc_hash_shard_count()) {
678
9.19k
        _variant.doc_hash_shard_count = column.variant_doc_hash_shard_count();
679
9.19k
    }
680
12.3k
    if (column.has_pattern_type()) {
681
9.19k
        _pattern_type = column.pattern_type();
682
9.19k
    }
683
12.3k
}
684
685
TabletColumn TabletColumn::create_materialized_variant_column(const std::string& root,
686
                                                              const std::vector<std::string>& paths,
687
                                                              int32_t parent_unique_id,
688
0
                                                              int32_t max_subcolumns_count) {
689
0
    TabletColumn subcol;
690
0
    subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
691
0
    subcol.set_is_nullable(true);
692
0
    subcol.set_unique_id(-1);
693
0
    subcol.set_parent_unique_id(parent_unique_id);
694
0
    vectorized::PathInData path(root, paths);
695
0
    subcol.set_path_info(path);
696
0
    subcol.set_name(path.get_path());
697
0
    subcol.set_variant_max_subcolumns_count(max_subcolumns_count);
698
0
    return subcol;
699
0
}
700
701
32.3k
void TabletColumn::to_schema_pb(ColumnPB* column) const {
702
32.3k
    column->set_unique_id(_unique_id);
703
32.3k
    column->set_name(_col_name);
704
32.3k
    column->set_type(get_string_by_field_type(_type));
705
32.3k
    column->set_is_key(_is_key);
706
32.3k
    column->set_is_nullable(_is_nullable);
707
32.3k
    column->set_is_auto_increment(_is_auto_increment);
708
32.3k
    column->set_is_on_update_current_timestamp(_is_on_update_current_timestamp);
709
32.3k
    if (_has_default_value) {
710
156
        column->set_default_value(_default_value);
711
156
    }
712
32.3k
    if (_is_decimal) {
713
17.5k
        column->set_precision(_precision);
714
17.5k
        column->set_frac(_frac);
715
17.5k
    }
716
32.3k
    column->set_length(_length);
717
32.3k
    column->set_index_length(_index_length);
718
32.3k
    if (_is_bf_column) {
719
8
        column->set_is_bf_column(_is_bf_column);
720
8
    }
721
32.3k
    if (!_aggregation_name.empty()) {
722
17.7k
        column->set_aggregation(_aggregation_name);
723
17.7k
    }
724
32.3k
    column->set_result_is_nullable(_result_is_nullable);
725
32.3k
    column->set_be_exec_version(_be_exec_version);
726
32.3k
    column->set_visible(_visible);
727
728
32.3k
    if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
729
14
        CHECK(_sub_columns.size() == 1)
730
0
                << "ARRAY type should has 1 children types, but got " << _sub_columns.size();
731
14
    }
732
32.3k
    if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
733
0
        DCHECK(_sub_columns.size() == 2)
734
0
                << "MAP type should has 2 children types, but got " << _sub_columns.size();
735
0
        if (UNLIKELY(_sub_columns.size() != 2)) {
736
0
            LOG(WARNING) << "MAP type should has 2 children types, but got " << _sub_columns.size();
737
0
        }
738
0
    }
739
740
32.3k
    for (size_t i = 0; i < _sub_columns.size(); i++) {
741
18
        ColumnPB* child = column->add_children_columns();
742
18
        _sub_columns[i]->to_schema_pb(child);
743
18
    }
744
745
    // set parts info
746
32.3k
    if (has_path_info()) {
747
        // CHECK_GT(_parent_col_unique_id, 0);
748
124
        _column_path->to_protobuf(column->mutable_column_path_info(), _parent_col_unique_id);
749
        // Update unstable information for variant columns. Some of the fields in the tablet schema
750
        // are irrelevant for variant sub-columns, but retaining them may lead to an excessive growth
751
        // in the number of tablet schema cache entries.
752
124
        if (_type == FieldType::OLAP_FIELD_TYPE_STRING) {
753
0
            column->set_length(INT_MAX);
754
0
        }
755
124
        column->set_index_length(0);
756
124
    }
757
32.3k
    column->set_variant_max_subcolumns_count(_variant.max_subcolumns_count);
758
32.3k
    column->set_pattern_type(_pattern_type);
759
32.3k
    column->set_variant_enable_typed_paths_to_sparse(_variant.enable_typed_paths_to_sparse);
760
32.3k
    column->set_variant_max_sparse_column_statistics_size(
761
32.3k
            _variant.max_sparse_column_statistics_size);
762
32.3k
    column->set_variant_sparse_hash_shard_count(_variant.sparse_hash_shard_count);
763
32.3k
    column->set_variant_enable_doc_mode(_variant.enable_doc_mode);
764
32.3k
    column->set_variant_doc_materialization_min_rows(_variant.doc_materialization_min_rows);
765
32.3k
    column->set_variant_doc_hash_shard_count(_variant.doc_hash_shard_count);
766
32.3k
}
767
768
1.41k
void TabletColumn::add_sub_column(TabletColumn& sub_column) {
769
1.41k
    _sub_columns.push_back(std::make_shared<TabletColumn>(sub_column));
770
1.41k
    sub_column._parent_col_unique_id = this->_unique_id;
771
1.41k
    _sub_column_count += 1;
772
1.41k
}
773
774
25.2k
bool TabletColumn::is_row_store_column() const {
775
25.2k
    return _col_name == BeConsts::ROW_STORE_COL;
776
25.2k
}
777
778
vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function_union(
779
0
        vectorized::DataTypePtr type, int current_be_exec_version) const {
780
0
    const auto* state_type = assert_cast<const vectorized::DataTypeAggState*>(type.get());
781
0
    BeExecVersionManager::check_function_compatibility(
782
0
            current_be_exec_version, _be_exec_version,
783
0
            state_type->get_nested_function()->get_name());
784
0
    return vectorized::AggregateStateUnion::create(state_type->get_nested_function(), {type}, type);
785
0
}
786
787
vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function(
788
24
        std::string suffix, int current_be_exec_version) const {
789
24
    vectorized::AggregateFunctionPtr function = nullptr;
790
791
24
    auto type = vectorized::DataTypeFactory::instance().create_data_type(*this);
792
24
    if (type && type->get_primitive_type() == PrimitiveType::TYPE_AGG_STATE) {
793
0
        function = get_aggregate_function_union(type, current_be_exec_version);
794
24
    } else {
795
24
        std::string origin_name = TabletColumn::get_string_by_aggregation_type(_aggregation);
796
24
        std::string agg_name = origin_name + suffix;
797
24
        std::transform(agg_name.begin(), agg_name.end(), agg_name.begin(),
798
258
                       [](unsigned char c) { return std::tolower(c); });
799
24
        function = vectorized::AggregateFunctionSimpleFactory::instance().get(
800
24
                agg_name, {type}, type, type->is_nullable(),
801
24
                BeExecVersionManager::get_newest_version());
802
24
        if (!function) {
803
0
            LOG(WARNING) << "get column aggregate function failed, aggregation_name=" << origin_name
804
0
                         << ", column_type=" << type->get_name();
805
0
        }
806
24
    }
807
24
    if (function) {
808
24
        function->set_version(_be_exec_version);
809
24
        return function;
810
24
    }
811
0
    return nullptr;
812
24
}
813
814
1.80k
void TabletColumn::set_path_info(const vectorized::PathInData& path) {
815
1.80k
    _column_path = std::make_shared<vectorized::PathInData>(path);
816
1.80k
}
817
818
183
vectorized::DataTypePtr TabletColumn::get_vec_type() const {
819
183
    return vectorized::DataTypeFactory::instance().create_data_type(*this);
820
183
}
821
822
// escape '.' and '_'
823
61.3k
std::string escape_for_path_name(const std::string& s) {
824
61.3k
    std::string res;
825
61.3k
    const char* pos = s.data();
826
61.3k
    const char* end = pos + s.size();
827
61.9k
    while (pos != end) {
828
609
        unsigned char c = *pos;
829
609
        if (c == '.' || c == '_') {
830
55
            res += '%';
831
55
            res += vectorized::hex_digit_uppercase(c / 16);
832
55
            res += vectorized::hex_digit_uppercase(c % 16);
833
554
        } else {
834
554
            res += c;
835
554
        }
836
609
        ++pos;
837
609
    }
838
61.3k
    return res;
839
61.3k
}
840
841
26
void TabletIndex::set_escaped_escaped_index_suffix_path(const std::string& path_name) {
842
26
    std::string escaped_path = escape_for_path_name(path_name);
843
26
    _escaped_index_suffix_path = escaped_path;
844
26
}
845
846
void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
847
18
                                   const TabletSchema& tablet_schema) {
848
18
    _index_id = index.index_id;
849
18
    _index_name = index.index_name;
850
    // init col_unique_id in index at be side, since col_unique_id may be -1 at fe side
851
    // get column unique id by name
852
18
    std::vector<int32_t> col_unique_ids(index.columns.size());
853
36
    for (size_t i = 0; i < index.columns.size(); i++) {
854
18
        auto column_idx = tablet_schema.field_index(index.columns[i]);
855
18
        if (column_idx >= 0) {
856
14
            col_unique_ids[i] = tablet_schema.column(column_idx).unique_id();
857
14
        } else {
858
            // if column unique id not found by column name, find by column unique id
859
            // column unique id can not found means this column is a new column added by light schema change
860
4
            if (index.__isset.column_unique_ids && !index.column_unique_ids.empty() &&
861
4
                tablet_schema.has_column_unique_id(index.column_unique_ids[i])) {
862
1
                col_unique_ids[i] = index.column_unique_ids[i];
863
3
            } else {
864
3
                col_unique_ids[i] = -1;
865
3
            }
866
4
        }
867
18
    }
868
18
    _col_unique_ids = std::move(col_unique_ids);
869
870
18
    switch (index.index_type) {
871
0
    case TIndexType::BITMAP:
872
0
        _index_type = IndexType::BITMAP;
873
0
        break;
874
17
    case TIndexType::INVERTED:
875
17
        _index_type = IndexType::INVERTED;
876
17
        break;
877
1
    case TIndexType::ANN:
878
1
        _index_type = IndexType::ANN;
879
1
        break;
880
0
    case TIndexType::BLOOMFILTER:
881
0
        _index_type = IndexType::BLOOMFILTER;
882
0
        break;
883
0
    case TIndexType::NGRAM_BF:
884
0
        _index_type = IndexType::NGRAM_BF;
885
0
        break;
886
18
    }
887
18
    if (index.__isset.properties) {
888
4
        for (auto kv : index.properties) {
889
4
            _properties[kv.first] = kv.second;
890
4
        }
891
1
    }
892
18
}
893
894
void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
895
1
                                   const std::vector<int32_t>& column_uids) {
896
1
    _index_id = index.index_id;
897
1
    _index_name = index.index_name;
898
1
    _col_unique_ids = column_uids;
899
900
1
    switch (index.index_type) {
901
0
    case TIndexType::BITMAP:
902
0
        _index_type = IndexType::BITMAP;
903
0
        break;
904
1
    case TIndexType::INVERTED:
905
1
        _index_type = IndexType::INVERTED;
906
1
        break;
907
0
    case TIndexType::ANN:
908
0
        _index_type = IndexType::ANN;
909
0
        break;
910
0
    case TIndexType::BLOOMFILTER:
911
0
        _index_type = IndexType::BLOOMFILTER;
912
0
        break;
913
0
    case TIndexType::NGRAM_BF:
914
0
        _index_type = IndexType::NGRAM_BF;
915
0
        break;
916
1
    }
917
1
    if (index.__isset.properties) {
918
3
        for (auto kv : index.properties) {
919
3
            _properties[kv.first] = kv.second;
920
3
        }
921
1
    }
922
1
}
923
924
7.49k
void TabletIndex::init_from_pb(const TabletIndexPB& index) {
925
7.49k
    _index_id = index.index_id();
926
7.49k
    _index_name = index.index_name();
927
7.49k
    _col_unique_ids.clear();
928
7.49k
    for (auto col_unique_id : index.col_unique_id()) {
929
7.46k
        _col_unique_ids.push_back(col_unique_id);
930
7.46k
    }
931
7.49k
    _index_type = index.index_type();
932
41.0k
    for (const auto& kv : index.properties()) {
933
41.0k
        _properties[kv.first] = kv.second;
934
41.0k
    }
935
7.49k
    _escaped_index_suffix_path = index.index_suffix_name();
936
7.49k
}
937
938
11.0k
void TabletIndex::to_schema_pb(TabletIndexPB* index) const {
939
11.0k
    index->set_index_id(_index_id);
940
11.0k
    index->set_index_name(_index_name);
941
11.0k
    index->clear_col_unique_id();
942
11.0k
    for (auto col_unique_id : _col_unique_ids) {
943
11.0k
        index->add_col_unique_id(col_unique_id);
944
11.0k
    }
945
11.0k
    index->set_index_type(_index_type);
946
60.8k
    for (const auto& kv : _properties) {
947
60.8k
        DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", {
948
60.8k
            if (kv.first == INVERTED_INDEX_PARSER_LOWERCASE_KEY) {
949
60.8k
                continue;
950
60.8k
            }
951
60.8k
        })
952
60.8k
        (*index->mutable_properties())[kv.first] = kv.second;
953
60.8k
    }
954
11.0k
    index->set_index_suffix_name(_escaped_index_suffix_path);
955
956
11.0k
    DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { return; })
957
958
    // lowercase by default
959
11.0k
    if (!_properties.empty()) {
960
9.76k
        if (!_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) {
961
179
            (*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
962
179
                    INVERTED_INDEX_PARSER_TRUE;
963
179
        }
964
9.76k
    }
965
11.0k
}
966
967
3.24k
TabletSchema::TabletSchema() = default;
968
969
3.17k
TabletSchema::~TabletSchema() {}
970
971
1.95k
int64_t TabletSchema::get_metadata_size() const {
972
1.95k
    return sizeof(TabletSchema);
973
1.95k
}
974
975
3.34k
void TabletSchema::append_column(TabletColumn column, ColumnType col_type) {
976
3.34k
    if (column.is_key()) {
977
120
        _num_key_columns++;
978
120
    }
979
3.34k
    if (column.is_nullable()) {
980
3.10k
        _num_null_columns++;
981
3.10k
    }
982
3.34k
    if (column.is_variant_type()) {
983
56
        ++_num_variant_columns;
984
56
        if (!column.has_path_info()) {
985
23
            const std::string& col_name = column.name_lower_case();
986
23
            vectorized::PathInData path(col_name);
987
23
            column.set_path_info(path);
988
23
        }
989
56
    }
990
3.34k
    if (UNLIKELY(column.name() == DELETE_SIGN)) {
991
21
        _delete_sign_idx = _num_columns;
992
3.32k
    } else if (UNLIKELY(column.name() == SEQUENCE_COL)) {
993
4
        _sequence_col_idx = _num_columns;
994
3.32k
    } else if (UNLIKELY(column.name() == VERSION_COL)) {
995
0
        _version_col_idx = _num_columns;
996
3.32k
    } else if (UNLIKELY(column.name() == SKIP_BITMAP_COL)) {
997
0
        _skip_bitmap_col_idx = _num_columns;
998
3.32k
    } else if (UNLIKELY(column.name().starts_with(BeConsts::VIRTUAL_COLUMN_PREFIX))) {
999
0
        _vir_col_idx_to_unique_id[_num_columns] = column.unique_id();
1000
0
    }
1001
3.34k
    _field_uniqueid_to_index[column.unique_id()] = _num_columns;
1002
3.34k
    _cols.push_back(std::make_shared<TabletColumn>(std::move(column)));
1003
    // The dropped column may have same name with exsiting column, so that
1004
    // not add to name to index map, only for uid to index map
1005
3.34k
    if (col_type == ColumnType::VARIANT || _cols.back()->is_variant_type() ||
1006
3.34k
        _cols.back()->is_extracted_column()) {
1007
112
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1008
112
        _field_path_to_index[_cols.back()->path_info_ptr().get()] = _num_columns;
1009
3.23k
    } else if (col_type == ColumnType::NORMAL) {
1010
3.23k
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1011
3.23k
    }
1012
3.34k
    _num_columns++;
1013
3.34k
    _num_virtual_columns = _vir_col_idx_to_unique_id.size();
1014
    // generate column index mapping for seq map
1015
3.34k
    if (_seq_col_uid_to_value_cols_uid.contains(column.unique_id())) {
1016
0
        const auto seq_idx = _field_uniqueid_to_index[column.unique_id()];
1017
0
        if (!_seq_col_idx_to_value_cols_idx.contains(seq_idx)) {
1018
0
            _seq_col_idx_to_value_cols_idx[seq_idx] = {};
1019
0
        }
1020
0
    }
1021
3.34k
    if (_value_col_uid_to_seq_col_uid.contains(column.unique_id())) {
1022
0
        const auto seq_uid = _value_col_uid_to_seq_col_uid[column.unique_id()];
1023
0
        if (_field_uniqueid_to_index.contains(seq_uid)) {
1024
0
            bool all_uid_index_found = true;
1025
0
            std::vector<int32_t> value_cols_index;
1026
0
            for (const auto value_col_uid : _seq_col_uid_to_value_cols_uid[seq_uid]) {
1027
0
                if (!_field_uniqueid_to_index.contains(value_col_uid)) {
1028
0
                    all_uid_index_found = false;
1029
0
                    break;
1030
0
                }
1031
0
                value_cols_index.push_back(_field_uniqueid_to_index[value_col_uid]);
1032
0
            }
1033
0
            if (all_uid_index_found) {
1034
0
                const auto seq_idx = _field_uniqueid_to_index[seq_uid];
1035
0
                for (const auto col_idx : value_cols_index) {
1036
0
                    _seq_col_idx_to_value_cols_idx[seq_idx].push_back(col_idx);
1037
0
                    _value_col_idx_to_seq_col_idx[col_idx] = seq_idx;
1038
0
                }
1039
0
                _value_col_idx_to_seq_col_idx[seq_idx] = seq_idx;
1040
0
            }
1041
0
        }
1042
0
    }
1043
3.34k
}
1044
1045
73
void TabletSchema::append_index(TabletIndex&& index) {
1046
73
    size_t index_pos = _indexes.size();
1047
73
    _indexes.push_back(std::make_shared<TabletIndex>(index));
1048
76
    for (int32_t id : _indexes.back()->col_unique_ids()) {
1049
76
        if (auto field_pattern = _indexes.back()->field_pattern(); !field_pattern.empty()) {
1050
6
            auto& pattern_to_index_map = _index_by_unique_id_with_pattern[id];
1051
6
            pattern_to_index_map[field_pattern].emplace_back(_indexes.back());
1052
70
        } else {
1053
70
            IndexKey key = std::make_tuple(_indexes.back()->index_type(), id,
1054
70
                                           _indexes.back()->get_index_suffix());
1055
70
            _col_id_suffix_to_index[key].push_back(index_pos);
1056
70
        }
1057
76
    }
1058
73
}
1059
1060
0
void TabletSchema::replace_column(size_t pos, TabletColumn new_col) {
1061
0
    CHECK_LT(pos, num_columns()) << " outof range";
1062
0
    _cols[pos] = std::make_shared<TabletColumn>(std::move(new_col));
1063
0
}
1064
1065
3
void TabletSchema::clear_index() {
1066
3
    _indexes.clear();
1067
3
    _col_id_suffix_to_index.clear();
1068
3
    _index_by_unique_id_with_pattern.clear();
1069
3
}
1070
1071
7
void TabletSchema::remove_index(int64_t index_id) {
1072
7
    std::vector<TabletIndexPtr> new_indexes;
1073
11
    for (auto& index : _indexes) {
1074
11
        if (index->index_id() != index_id) {
1075
4
            new_indexes.emplace_back(std::move(index));
1076
4
        }
1077
11
    }
1078
7
    _indexes = std::move(new_indexes);
1079
7
    _col_id_suffix_to_index.clear();
1080
7
    _index_by_unique_id_with_pattern.clear();
1081
11
    for (size_t new_pos = 0; new_pos < _indexes.size(); ++new_pos) {
1082
4
        const auto& index = _indexes[new_pos];
1083
4
        for (int32_t col_uid : index->col_unique_ids()) {
1084
4
            if (auto field_pattern = index->field_pattern(); !field_pattern.empty()) {
1085
0
                auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid];
1086
0
                pattern_to_index_map[field_pattern].emplace_back(index);
1087
4
            } else {
1088
4
                IndexKey key = std::make_tuple(_indexes.back()->index_type(), col_uid,
1089
4
                                               _indexes.back()->get_index_suffix());
1090
4
                _col_id_suffix_to_index[key].push_back(new_pos);
1091
4
            }
1092
4
        }
1093
4
    }
1094
7
}
1095
1096
1
void TabletSchema::clear_columns() {
1097
1
    _field_path_to_index.clear();
1098
1
    _field_name_to_index.clear();
1099
1
    _field_uniqueid_to_index.clear();
1100
1
    _num_columns = 0;
1101
1
    _num_variant_columns = 0;
1102
1
    _num_null_columns = 0;
1103
1
    _num_key_columns = 0;
1104
1
    _seq_col_idx_to_value_cols_idx.clear();
1105
1
    _value_col_idx_to_seq_col_idx.clear();
1106
1
    _cols.clear();
1107
1
}
1108
1109
void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns,
1110
1.88k
                                bool reuse_cache_column) {
1111
1.88k
    _keys_type = schema.keys_type();
1112
1.88k
    _num_columns = 0;
1113
1.88k
    _num_variant_columns = 0;
1114
1.88k
    _num_key_columns = 0;
1115
1.88k
    _num_null_columns = 0;
1116
1.88k
    _cols.clear();
1117
1.88k
    _indexes.clear();
1118
1.88k
    _index_by_unique_id_with_pattern.clear();
1119
1.88k
    _col_id_suffix_to_index.clear();
1120
1.88k
    _field_name_to_index.clear();
1121
1.88k
    _field_uniqueid_to_index.clear();
1122
1.88k
    _cluster_key_uids.clear();
1123
1.88k
    for (const auto& i : schema.cluster_key_uids()) {
1124
6
        _cluster_key_uids.push_back(i);
1125
6
    }
1126
12.5k
    for (auto& column_pb : schema.column()) {
1127
12.5k
        TabletColumnPtr column;
1128
12.5k
        if (reuse_cache_column) {
1129
345
            auto pair = TabletColumnObjectPool::instance()->insert(
1130
345
                    deterministic_string_serialize(column_pb));
1131
345
            column = pair.second;
1132
            // Release the handle quickly, because we use shared ptr to manage column.
1133
            // It often core during tablet schema copy to another schema because handle's
1134
            // reference count should be managed mannually.
1135
345
            TabletColumnObjectPool::instance()->release(pair.first);
1136
12.2k
        } else {
1137
12.2k
            column = std::make_shared<TabletColumn>();
1138
12.2k
            column->init_from_pb(column_pb);
1139
12.2k
        }
1140
12.5k
        if (ignore_extracted_columns && column->is_extracted_column()) {
1141
0
            continue;
1142
0
        }
1143
12.5k
        if (column->is_key()) {
1144
2.42k
            _num_key_columns++;
1145
2.42k
        }
1146
12.5k
        if (column->is_nullable()) {
1147
7.47k
            _num_null_columns++;
1148
7.47k
        }
1149
12.5k
        if (column->is_variant_type()) {
1150
64
            ++_num_variant_columns;
1151
64
        }
1152
1153
12.5k
        _cols.emplace_back(std::move(column));
1154
12.5k
        if (!_cols.back()->is_extracted_column()) {
1155
12.5k
            _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1156
12.5k
            _field_uniqueid_to_index[_cols.back()->unique_id()] = _num_columns;
1157
12.5k
        }
1158
12.5k
        _num_columns++;
1159
12.5k
    }
1160
7.40k
    for (const auto& index_pb : schema.index()) {
1161
7.40k
        TabletIndexPtr index;
1162
7.40k
        if (reuse_cache_column) {
1163
163
            auto pair = TabletColumnObjectPool::instance()->insert_index(
1164
163
                    deterministic_string_serialize(index_pb));
1165
163
            index = pair.second;
1166
            //  Only need the value to be cached by the pool, release it quickly because the handle need
1167
            // record reference count mannually, or it will core during tablet schema copy method.
1168
163
            TabletColumnObjectPool::instance()->release(pair.first);
1169
7.24k
        } else {
1170
7.24k
            index = std::make_shared<TabletIndex>();
1171
7.24k
            index->init_from_pb(index_pb);
1172
7.24k
        }
1173
7.40k
        size_t index_pos = _indexes.size();
1174
7.40k
        _indexes.emplace_back(std::move(index));
1175
7.40k
        for (int32_t col_uid : _indexes.back()->col_unique_ids()) {
1176
7.40k
            if (auto field_pattern = _indexes.back()->field_pattern(); !field_pattern.empty()) {
1177
0
                auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid];
1178
0
                pattern_to_index_map[field_pattern].emplace_back(_indexes.back());
1179
7.40k
            } else {
1180
7.40k
                IndexKey key = std::make_tuple(_indexes.back()->index_type(), col_uid,
1181
7.40k
                                               _indexes.back()->get_index_suffix());
1182
7.40k
                _col_id_suffix_to_index[key].push_back(index_pos);
1183
7.40k
            }
1184
7.40k
        }
1185
7.40k
    }
1186
1.88k
    _num_short_key_columns = schema.num_short_key_columns();
1187
1.88k
    _num_rows_per_row_block = schema.num_rows_per_row_block();
1188
1.88k
    _compress_kind = schema.compress_kind();
1189
1.88k
    _next_column_unique_id = schema.next_column_unique_id();
1190
1.88k
    if (schema.has_bf_fpp()) {
1191
3
        _has_bf_fpp = true;
1192
3
        _bf_fpp = schema.bf_fpp();
1193
1.87k
    } else {
1194
1.87k
        _has_bf_fpp = false;
1195
1.87k
        _bf_fpp = BLOOM_FILTER_DEFAULT_FPP;
1196
1.87k
    }
1197
1.88k
    _is_in_memory = schema.is_in_memory();
1198
1.88k
    _disable_auto_compaction = schema.disable_auto_compaction();
1199
1.88k
    _enable_single_replica_compaction = schema.enable_single_replica_compaction();
1200
1.88k
    _store_row_column = schema.store_row_column();
1201
1.88k
    _skip_write_index_on_load = schema.skip_write_index_on_load();
1202
1.88k
    _delete_sign_idx = schema.delete_sign_idx();
1203
1.88k
    _sequence_col_idx = schema.sequence_col_idx();
1204
1.88k
    _version_col_idx = schema.version_col_idx();
1205
1.88k
    _skip_bitmap_col_idx = schema.skip_bitmap_col_idx();
1206
1.88k
    _sort_type = schema.sort_type();
1207
1.88k
    _sort_col_num = schema.sort_col_num();
1208
1.88k
    _compression_type = schema.compression_type();
1209
1.88k
    _row_store_page_size = schema.row_store_page_size();
1210
1.88k
    _storage_page_size = schema.storage_page_size();
1211
1.88k
    _storage_dict_page_size = schema.storage_dict_page_size();
1212
1.88k
    _schema_version = schema.schema_version();
1213
1.88k
    if (schema.has_seq_map()) {
1214
1.55k
        auto column_groups_pb = schema.seq_map();
1215
1.55k
        _seq_col_uid_to_value_cols_uid.clear();
1216
1.55k
        _value_col_uid_to_seq_col_uid.clear();
1217
1.55k
        _seq_col_idx_to_value_cols_idx.clear();
1218
1.55k
        _value_col_idx_to_seq_col_idx.clear();
1219
        /*
1220
         * ColumnGroupsPB is a list of cg_pb, and
1221
         * ColumnGroupsPB do not have begin() or end() method.
1222
         * we must use for(i=0;i<xx;i++) loop
1223
         */
1224
1.55k
        for (int i = 0; i < column_groups_pb.cg_size(); i++) {
1225
0
            ColumnGroupPB cg_pb = column_groups_pb.cg(i);
1226
0
            uint32_t key_uid = cg_pb.sequence_column();
1227
0
            auto found = _field_uniqueid_to_index.find(key_uid);
1228
0
            DCHECK(found != _field_uniqueid_to_index.end())
1229
0
                    << "could not find sequence col with unique id = " << key_uid
1230
0
                    << " table_id=" << _table_id;
1231
0
            int32_t seq_index = found->second;
1232
0
            _seq_col_uid_to_value_cols_uid[key_uid] = {};
1233
0
            _seq_col_idx_to_value_cols_idx[seq_index] = {};
1234
0
            for (auto val_uid : cg_pb.columns_in_group()) {
1235
0
                _seq_col_uid_to_value_cols_uid[key_uid].push_back(val_uid);
1236
0
                found = _field_uniqueid_to_index.find(val_uid);
1237
0
                DCHECK(found != _field_uniqueid_to_index.end())
1238
0
                        << "could not find value col with unique id = " << key_uid
1239
0
                        << " table_id=" << _table_id;
1240
0
                int32_t val_index = found->second;
1241
0
                _seq_col_idx_to_value_cols_idx[seq_index].push_back(val_index);
1242
0
            }
1243
0
        }
1244
1245
1.55k
        if (!_seq_col_uid_to_value_cols_uid.empty()) {
1246
            /*
1247
                |** KEY **|        ** VALUE **     |
1248
                ------------------------------------
1249
                |** KEY **|  CDE is value| sequence|
1250
                |----|----|----|----|----|----|----|
1251
                A    B    C    D    E   S1   S2
1252
                0    1    2    3    4    5    6
1253
                for example: _seq_map is {5:{2,3}, 6:{4}}
1254
                then, _value_to_seq = {2:5,3:5,5:5,4:6,6:6}
1255
            */
1256
0
            for (auto& [seq_uid, cols_uid] : _seq_col_uid_to_value_cols_uid) {
1257
0
                for (auto col_uid : cols_uid) {
1258
0
                    _value_col_uid_to_seq_col_uid[col_uid] = seq_uid;
1259
0
                }
1260
0
                _value_col_uid_to_seq_col_uid[seq_uid] = seq_uid;
1261
0
            }
1262
1263
0
            for (auto& [seq_idx, value_cols_idx] : _seq_col_idx_to_value_cols_idx) {
1264
0
                for (auto col_idx : value_cols_idx) {
1265
0
                    _value_col_idx_to_seq_col_idx[col_idx] = seq_idx;
1266
0
                }
1267
0
                _value_col_idx_to_seq_col_idx[seq_idx] = seq_idx;
1268
0
            }
1269
0
        }
1270
1.55k
    }
1271
    // Default to V1 inverted index storage format for backward compatibility if not specified in schema.
1272
1.88k
    if (!schema.has_inverted_index_storage_format()) {
1273
282
        _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1;
1274
1.59k
    } else {
1275
1.59k
        _inverted_index_storage_format = schema.inverted_index_storage_format();
1276
1.59k
    }
1277
1278
1.88k
    _row_store_column_unique_ids.assign(schema.row_store_column_unique_ids().begin(),
1279
1.88k
                                        schema.row_store_column_unique_ids().end());
1280
1.88k
    _enable_variant_flatten_nested = schema.enable_variant_flatten_nested();
1281
1.88k
    if (schema.has_is_external_segment_column_meta_used()) {
1282
941
        _is_external_segment_column_meta_used = schema.is_external_segment_column_meta_used();
1283
941
    } else {
1284
939
        _is_external_segment_column_meta_used = false;
1285
939
    }
1286
1.88k
    if (schema.has_integer_type_default_use_plain_encoding()) {
1287
941
        _integer_type_default_use_plain_encoding = schema.integer_type_default_use_plain_encoding();
1288
941
    }
1289
1.88k
    if (schema.has_binary_plain_encoding_default_impl()) {
1290
941
        _binary_plain_encoding_default_impl = schema.binary_plain_encoding_default_impl();
1291
941
    }
1292
1.88k
    update_metadata_size();
1293
1.88k
}
1294
1295
390
void TabletSchema::copy_from(const TabletSchema& tablet_schema) {
1296
390
    TabletSchemaPB tablet_schema_pb;
1297
390
    tablet_schema.to_schema_pb(&tablet_schema_pb);
1298
390
    init_from_pb(tablet_schema_pb);
1299
390
    _table_id = tablet_schema.table_id();
1300
390
    _path_set_info_map = tablet_schema._path_set_info_map;
1301
390
}
1302
1303
42
void TabletSchema::shawdow_copy_without_columns(const TabletSchema& tablet_schema) {
1304
42
    *this = tablet_schema;
1305
42
    _field_path_to_index.clear();
1306
42
    _field_name_to_index.clear();
1307
42
    _field_uniqueid_to_index.clear();
1308
42
    _num_columns = 0;
1309
42
    _num_variant_columns = 0;
1310
42
    _num_null_columns = 0;
1311
42
    _num_key_columns = 0;
1312
42
    _cols.clear();
1313
42
    _delete_sign_idx = -1;
1314
42
    _sequence_col_idx = -1;
1315
42
    _version_col_idx = -1;
1316
42
}
1317
1318
0
void TabletSchema::update_index_info_from(const TabletSchema& tablet_schema) {
1319
0
    for (auto& col : _cols) {
1320
0
        if (col->unique_id() < 0) {
1321
0
            continue;
1322
0
        }
1323
0
        const auto iter = tablet_schema._field_uniqueid_to_index.find(col->unique_id());
1324
0
        if (iter == tablet_schema._field_uniqueid_to_index.end()) {
1325
0
            continue;
1326
0
        }
1327
0
        auto col_idx = iter->second;
1328
0
        if (col_idx < 0 || col_idx >= tablet_schema._cols.size()) {
1329
0
            continue;
1330
0
        }
1331
0
        col->set_is_bf_column(tablet_schema._cols[col_idx]->is_bf_column());
1332
0
    }
1333
0
}
1334
1335
6.37k
std::string TabletSchema::to_key() const {
1336
6.37k
    TabletSchemaPB pb;
1337
6.37k
    to_schema_pb(&pb);
1338
6.37k
    return TabletSchema::deterministic_string_serialize(pb);
1339
6.37k
}
1340
1341
void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version,
1342
                                               const OlapTableIndexSchema* index,
1343
0
                                               const TabletSchema& ori_tablet_schema) {
1344
    // copy from ori_tablet_schema
1345
0
    _keys_type = ori_tablet_schema.keys_type();
1346
0
    _num_short_key_columns = ori_tablet_schema.num_short_key_columns();
1347
0
    _num_rows_per_row_block = ori_tablet_schema.num_rows_per_row_block();
1348
0
    _compress_kind = ori_tablet_schema.compress_kind();
1349
1350
    // todo(yixiu): unique_id
1351
0
    _next_column_unique_id = ori_tablet_schema.next_column_unique_id();
1352
0
    _is_in_memory = ori_tablet_schema.is_in_memory();
1353
0
    _disable_auto_compaction = ori_tablet_schema.disable_auto_compaction();
1354
0
    _enable_single_replica_compaction = ori_tablet_schema.enable_single_replica_compaction();
1355
0
    _skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load();
1356
0
    _sort_type = ori_tablet_schema.sort_type();
1357
0
    _sort_col_num = ori_tablet_schema.sort_col_num();
1358
0
    _row_store_page_size = ori_tablet_schema.row_store_page_size();
1359
0
    _storage_page_size = ori_tablet_schema.storage_page_size();
1360
0
    _storage_dict_page_size = ori_tablet_schema.storage_dict_page_size();
1361
0
    _enable_variant_flatten_nested = ori_tablet_schema.variant_flatten_nested();
1362
1363
    // copy from table_schema_param
1364
0
    _schema_version = version;
1365
0
    _num_columns = 0;
1366
0
    _num_variant_columns = 0;
1367
0
    _num_key_columns = 0;
1368
0
    _num_null_columns = 0;
1369
0
    bool has_bf_columns = false;
1370
0
    _cols.clear();
1371
0
    _indexes.clear();
1372
0
    _col_id_suffix_to_index.clear();
1373
0
    _index_by_unique_id_with_pattern.clear();
1374
0
    _field_name_to_index.clear();
1375
0
    _field_uniqueid_to_index.clear();
1376
0
    _delete_sign_idx = -1;
1377
0
    _sequence_col_idx = -1;
1378
0
    _version_col_idx = -1;
1379
0
    _skip_bitmap_col_idx = -1;
1380
0
    _cluster_key_uids.clear();
1381
0
    for (const auto& i : ori_tablet_schema._cluster_key_uids) {
1382
0
        _cluster_key_uids.push_back(i);
1383
0
    }
1384
0
    for (auto& column : index->columns) {
1385
0
        if (column->is_key()) {
1386
0
            _num_key_columns++;
1387
0
        }
1388
0
        if (column->is_nullable()) {
1389
0
            _num_null_columns++;
1390
0
        }
1391
0
        if (column->is_bf_column()) {
1392
0
            has_bf_columns = true;
1393
0
        }
1394
0
        if (column->is_variant_type()) {
1395
0
            ++_num_variant_columns;
1396
0
        }
1397
0
        if (UNLIKELY(column->name() == DELETE_SIGN)) {
1398
0
            _delete_sign_idx = _num_columns;
1399
0
        } else if (UNLIKELY(column->name() == SEQUENCE_COL)) {
1400
0
            _sequence_col_idx = _num_columns;
1401
0
        } else if (UNLIKELY(column->name() == VERSION_COL)) {
1402
0
            _version_col_idx = _num_columns;
1403
0
        } else if (UNLIKELY(column->name() == SKIP_BITMAP_COL)) {
1404
0
            _skip_bitmap_col_idx = _num_columns;
1405
0
        }
1406
        // Reuse TabletColumn object from pool to reduce memory consumption
1407
0
        TabletColumnPtr new_column;
1408
0
        ColumnPB column_pb;
1409
0
        column->to_schema_pb(&column_pb);
1410
0
        auto pair = TabletColumnObjectPool::instance()->insert(
1411
0
                deterministic_string_serialize(column_pb));
1412
0
        new_column = pair.second;
1413
        // Release the handle quickly, because we use shared ptr to manage column
1414
0
        TabletColumnObjectPool::instance()->release(pair.first);
1415
0
        _cols.emplace_back(std::move(new_column));
1416
0
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1417
0
        _field_uniqueid_to_index[_cols.back()->unique_id()] = _num_columns;
1418
0
        _num_columns++;
1419
0
    }
1420
1421
0
    for (const auto& i : index->indexes) {
1422
0
        size_t index_pos = _indexes.size();
1423
        // Reuse TabletIndex object from pool to reduce memory consumption
1424
0
        TabletIndexPtr new_index;
1425
0
        TabletIndexPB index_pb;
1426
0
        i->to_schema_pb(&index_pb);
1427
0
        auto pair = TabletColumnObjectPool::instance()->insert_index(
1428
0
                deterministic_string_serialize(index_pb));
1429
0
        new_index = pair.second;
1430
        // Release the handle quickly, because we use shared ptr to manage index
1431
0
        TabletColumnObjectPool::instance()->release(pair.first);
1432
0
        _indexes.emplace_back(std::move(new_index));
1433
0
        for (int32_t col_uid : _indexes.back()->col_unique_ids()) {
1434
0
            if (auto field_pattern = _indexes.back()->field_pattern(); !field_pattern.empty()) {
1435
0
                auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid];
1436
0
                pattern_to_index_map[field_pattern].emplace_back(_indexes.back());
1437
0
            } else {
1438
0
                IndexKey key = std::make_tuple(_indexes.back()->index_type(), col_uid,
1439
0
                                               _indexes.back()->get_index_suffix());
1440
0
                _col_id_suffix_to_index[key].push_back(index_pos);
1441
0
            }
1442
0
        }
1443
0
    }
1444
1445
0
    if (has_bf_columns) {
1446
0
        _has_bf_fpp = true;
1447
0
        _bf_fpp = ori_tablet_schema.bloom_filter_fpp();
1448
0
    } else {
1449
0
        _has_bf_fpp = false;
1450
0
        _bf_fpp = BLOOM_FILTER_DEFAULT_FPP;
1451
0
    }
1452
0
}
1453
1454
149
void TabletSchema::merge_dropped_columns(const TabletSchema& src_schema) {
1455
    // If they are the same tablet schema object, then just return
1456
149
    if (this == &src_schema) {
1457
0
        return;
1458
0
    }
1459
2.90k
    for (const auto& src_col : src_schema.columns()) {
1460
2.90k
        if (_field_uniqueid_to_index.find(src_col->unique_id()) == _field_uniqueid_to_index.end()) {
1461
0
            CHECK(!src_col->is_key())
1462
0
                    << src_col->name() << " is key column, should not be dropped.";
1463
0
            ColumnPB src_col_pb;
1464
            // There are some pointer in tablet column, not sure the reference relation, so
1465
            // that deep copy it.
1466
0
            src_col->to_schema_pb(&src_col_pb);
1467
0
            TabletColumn new_col(src_col_pb);
1468
0
            append_column(new_col, TabletSchema::ColumnType::DROPPED);
1469
0
        }
1470
2.90k
    }
1471
149
}
1472
1473
0
TabletSchemaSPtr TabletSchema::copy_without_variant_extracted_columns() {
1474
0
    TabletSchemaSPtr copy = std::make_shared<TabletSchema>();
1475
0
    copy->shawdow_copy_without_columns(*this);
1476
0
    for (auto& col : this->columns()) {
1477
0
        if (col->is_extracted_column()) {
1478
0
            continue;
1479
0
        }
1480
0
        copy->append_column(*col);
1481
0
    }
1482
0
    return copy;
1483
0
}
1484
1485
// Dropped column is in _field_uniqueid_to_index but not in _field_name_to_index
1486
// Could refer to append_column method
1487
6.34k
bool TabletSchema::is_dropped_column(const TabletColumn& col) const {
1488
6.34k
    CHECK(_field_uniqueid_to_index.find(col.unique_id()) != _field_uniqueid_to_index.end())
1489
0
            << "could not find col with unique id = " << col.unique_id()
1490
0
            << " and name = " << col.name() << " table_id=" << _table_id;
1491
6.34k
    auto it = _field_name_to_index.find(StringRef {col.name()});
1492
6.34k
    return it == _field_name_to_index.end() || _cols[it->second]->unique_id() != col.unique_id();
1493
6.34k
}
1494
1495
0
void TabletSchema::copy_extracted_columns(const TabletSchema& src_schema) {
1496
0
    std::unordered_set<int32_t> variant_columns;
1497
0
    for (const auto& col : columns()) {
1498
0
        if (col->is_variant_type()) {
1499
0
            variant_columns.insert(col->unique_id());
1500
0
        }
1501
0
    }
1502
0
    for (const TabletColumnPtr& col : src_schema.columns()) {
1503
0
        if (col->is_extracted_column() && variant_columns.contains(col->parent_unique_id())) {
1504
0
            ColumnPB col_pb;
1505
0
            col->to_schema_pb(&col_pb);
1506
0
            TabletColumn new_col(col_pb);
1507
0
            append_column(new_col, ColumnType::VARIANT);
1508
0
        }
1509
0
    }
1510
0
}
1511
1512
0
void TabletSchema::reserve_extracted_columns() {
1513
0
    for (auto it = _cols.begin(); it != _cols.end();) {
1514
0
        if (!(*it)->is_extracted_column()) {
1515
0
            it = _cols.erase(it);
1516
0
        } else {
1517
0
            ++it;
1518
0
        }
1519
0
    }
1520
0
}
1521
1522
8.62k
void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const {
1523
8.62k
    for (const auto& i : _cluster_key_uids) {
1524
30
        tablet_schema_pb->add_cluster_key_uids(i);
1525
30
    }
1526
8.62k
    tablet_schema_pb->set_keys_type(_keys_type);
1527
32.3k
    for (const auto& col : _cols) {
1528
32.3k
        ColumnPB* column = tablet_schema_pb->add_column();
1529
32.3k
        col->to_schema_pb(column);
1530
32.3k
    }
1531
11.0k
    for (const auto& index : _indexes) {
1532
11.0k
        auto* index_pb = tablet_schema_pb->add_index();
1533
11.0k
        index->to_schema_pb(index_pb);
1534
11.0k
    }
1535
8.62k
    tablet_schema_pb->set_num_short_key_columns(cast_set<int32_t>(_num_short_key_columns));
1536
8.62k
    tablet_schema_pb->set_num_rows_per_row_block(cast_set<int32_t>(_num_rows_per_row_block));
1537
8.62k
    tablet_schema_pb->set_compress_kind(_compress_kind);
1538
8.62k
    if (_has_bf_fpp) {
1539
4
        tablet_schema_pb->set_bf_fpp(_bf_fpp);
1540
4
    }
1541
8.62k
    tablet_schema_pb->set_next_column_unique_id(cast_set<uint32_t>(_next_column_unique_id));
1542
8.62k
    tablet_schema_pb->set_is_in_memory(_is_in_memory);
1543
8.62k
    tablet_schema_pb->set_disable_auto_compaction(_disable_auto_compaction);
1544
8.62k
    tablet_schema_pb->set_enable_single_replica_compaction(_enable_single_replica_compaction);
1545
8.62k
    tablet_schema_pb->set_store_row_column(_store_row_column);
1546
8.62k
    tablet_schema_pb->set_skip_write_index_on_load(_skip_write_index_on_load);
1547
8.62k
    tablet_schema_pb->set_delete_sign_idx(_delete_sign_idx);
1548
8.62k
    tablet_schema_pb->set_sequence_col_idx(_sequence_col_idx);
1549
8.62k
    tablet_schema_pb->set_sort_type(_sort_type);
1550
8.62k
    tablet_schema_pb->set_sort_col_num(cast_set<int32_t>(_sort_col_num));
1551
8.62k
    tablet_schema_pb->set_schema_version(_schema_version);
1552
8.62k
    tablet_schema_pb->set_compression_type(_compression_type);
1553
8.62k
    tablet_schema_pb->set_row_store_page_size(_row_store_page_size);
1554
8.62k
    tablet_schema_pb->set_storage_page_size(_storage_page_size);
1555
8.62k
    tablet_schema_pb->set_storage_dict_page_size(_storage_dict_page_size);
1556
8.62k
    tablet_schema_pb->set_version_col_idx(_version_col_idx);
1557
8.62k
    tablet_schema_pb->set_skip_bitmap_col_idx(_skip_bitmap_col_idx);
1558
8.62k
    tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format);
1559
8.62k
    tablet_schema_pb->mutable_row_store_column_unique_ids()->Assign(
1560
8.62k
            _row_store_column_unique_ids.begin(), _row_store_column_unique_ids.end());
1561
8.62k
    tablet_schema_pb->set_enable_variant_flatten_nested(_enable_variant_flatten_nested);
1562
8.62k
    tablet_schema_pb->set_is_external_segment_column_meta_used(
1563
8.62k
            _is_external_segment_column_meta_used);
1564
8.62k
    tablet_schema_pb->set_integer_type_default_use_plain_encoding(
1565
8.62k
            _integer_type_default_use_plain_encoding);
1566
8.62k
    tablet_schema_pb->set_binary_plain_encoding_default_impl(_binary_plain_encoding_default_impl);
1567
8.62k
    auto column_groups_pb = tablet_schema_pb->mutable_seq_map();
1568
8.62k
    for (const auto& it : _seq_col_uid_to_value_cols_uid) {
1569
0
        uint32_t key = it.first;
1570
0
        ColumnGroupPB* cg_pb = column_groups_pb->add_cg(); // ColumnGroupPB {key: {v1, v2, v3}}
1571
0
        cg_pb->set_sequence_column(key);
1572
0
        for (auto v : it.second) {
1573
0
            cg_pb->add_columns_in_group(v);
1574
0
        }
1575
0
    }
1576
8.62k
}
1577
1578
0
size_t TabletSchema::row_size() const {
1579
0
    size_t size = 0;
1580
0
    for (const auto& column : _cols) {
1581
0
        size += column->length();
1582
0
    }
1583
0
    size += (_num_columns + 7) / 8;
1584
1585
0
    return size;
1586
0
}
1587
1588
1.26k
int32_t TabletSchema::field_index(const std::string& field_name) const {
1589
1.26k
    const auto& found = _field_name_to_index.find(StringRef(field_name));
1590
1.26k
    return (found == _field_name_to_index.end()) ? -1 : found->second;
1591
1.26k
}
1592
1593
0
int32_t TabletSchema::field_index(const vectorized::PathInData& path) const {
1594
0
    const auto& found = _field_path_to_index.find(vectorized::PathInDataRef(&path));
1595
0
    return (found == _field_path_to_index.end()) ? -1 : found->second;
1596
0
}
1597
1598
195
int32_t TabletSchema::field_index(int32_t col_unique_id) const {
1599
195
    const auto& found = _field_uniqueid_to_index.find(col_unique_id);
1600
195
    return (found == _field_uniqueid_to_index.end()) ? -1 : found->second;
1601
195
}
1602
1603
27.4k
const std::vector<TabletColumnPtr>& TabletSchema::columns() const {
1604
27.4k
    return _cols;
1605
27.4k
}
1606
1607
105k
const TabletColumn& TabletSchema::column(size_t ordinal) const {
1608
105k
    DCHECK(ordinal < _num_columns) << "ordinal:" << ordinal << ", _num_columns:" << _num_columns;
1609
105k
    return *_cols[ordinal];
1610
105k
}
1611
1612
2.44k
const TabletColumn& TabletSchema::column_by_uid(int32_t col_unique_id) const {
1613
2.44k
    return *_cols.at(_field_uniqueid_to_index.at(col_unique_id));
1614
2.44k
}
1615
1616
1
TabletColumn& TabletSchema::mutable_column_by_uid(int32_t col_unique_id) {
1617
1
    return *_cols.at(_field_uniqueid_to_index.at(col_unique_id));
1618
1
}
1619
1620
18
TabletColumn& TabletSchema::mutable_column(size_t ordinal) {
1621
18
    return *_cols.at(ordinal);
1622
18
}
1623
1624
1
void TabletSchema::update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>& tindexes) {
1625
1
    std::vector<TabletIndexPtr> indexes;
1626
2
    for (const auto& tindex : tindexes) {
1627
2
        TabletIndex index;
1628
2
        index.init_from_thrift(tindex, *this);
1629
2
        indexes.emplace_back(std::make_shared<TabletIndex>(std::move(index)));
1630
2
    }
1631
1
    _indexes = std::move(indexes);
1632
1
    _col_id_suffix_to_index.clear();
1633
1
    _index_by_unique_id_with_pattern.clear();
1634
1
    size_t index_pos = 0;
1635
2
    for (auto& index : _indexes) {
1636
2
        for (int32_t col_uid : index->col_unique_ids()) {
1637
2
            if (auto field_pattern = index->field_pattern(); !field_pattern.empty()) {
1638
0
                auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid];
1639
0
                pattern_to_index_map[field_pattern].emplace_back(index);
1640
2
            } else {
1641
2
                IndexKey key =
1642
2
                        std::make_tuple(index->index_type(), col_uid, index->get_index_suffix());
1643
2
                _col_id_suffix_to_index[key].push_back(index_pos);
1644
2
            }
1645
2
        }
1646
2
        index_pos++;
1647
2
    }
1648
1
}
1649
1650
0
bool TabletSchema::exist_column(const std::string& field_name) const {
1651
0
    return _field_name_to_index.contains(StringRef {field_name});
1652
0
}
1653
1654
16.0k
bool TabletSchema::has_column_unique_id(int32_t col_unique_id) const {
1655
16.0k
    return _field_uniqueid_to_index.contains(col_unique_id);
1656
16.0k
}
1657
1658
4.04k
Status TabletSchema::have_column(const std::string& field_name) const {
1659
4.04k
    if (!_field_name_to_index.contains(StringRef(field_name))) {
1660
4.04k
        return Status::Error<ErrorCode::INTERNAL_ERROR>(
1661
4.04k
                "Not found field_name, field_name:{}, schema:{}", field_name,
1662
4.04k
                get_all_field_names());
1663
4.04k
    }
1664
0
    return Status::OK();
1665
4.04k
}
1666
1667
183
Result<const TabletColumn*> TabletSchema::column(const std::string& field_name) const {
1668
183
    auto it = _field_name_to_index.find(StringRef {field_name});
1669
183
    if (it == _field_name_to_index.end()) {
1670
0
        DCHECK(false) << "field_name=" << field_name << ", table_id=" << _table_id
1671
0
                      << ", field_name_to_index=" << get_all_field_names();
1672
0
        return ResultError(
1673
0
                Status::InternalError("column not found, name={}, table_id={}, schema_version={}",
1674
0
                                      field_name, _table_id, _schema_version));
1675
0
    }
1676
183
    return _cols[it->second].get();
1677
183
}
1678
1679
void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema,
1680
0
                                         const std::vector<TColumn>& t_columns) {
1681
0
    copy_from(tablet_schema);
1682
0
    if (!t_columns.empty() && t_columns[0].col_unique_id >= 0) {
1683
0
        clear_columns();
1684
0
        for (const auto& column : t_columns) {
1685
0
            append_column(TabletColumn(column));
1686
0
        }
1687
0
    }
1688
0
}
1689
1690
67
bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const {
1691
86
    for (size_t i = 0; i < _indexes.size(); i++) {
1692
48
        if ((_indexes[i]->index_type() == IndexType::INVERTED ||
1693
48
             _indexes[i]->index_type() == IndexType::ANN) &&
1694
48
            _indexes[i]->index_id() == index_id) {
1695
29
            return true;
1696
29
        }
1697
48
    }
1698
38
    return false;
1699
67
}
1700
1701
std::vector<const TabletIndex*> TabletSchema::inverted_indexs(
1702
32.7k
        int32_t col_unique_id, const std::string& suffix_path) const {
1703
32.7k
    std::vector<const TabletIndex*> result;
1704
32.7k
    const std::string escaped_suffix = escape_for_path_name(suffix_path);
1705
32.7k
    auto it = _col_id_suffix_to_index.find(
1706
32.7k
            std::make_tuple(IndexType::INVERTED, col_unique_id, escaped_suffix));
1707
32.7k
    if (it != _col_id_suffix_to_index.end()) {
1708
7.85k
        for (size_t pos : it->second) {
1709
7.85k
            if (pos < _indexes.size()) {
1710
7.85k
                result.push_back(_indexes[pos].get());
1711
7.85k
            }
1712
7.85k
        }
1713
7.82k
    }
1714
32.7k
    return result;
1715
32.7k
}
1716
1717
std::vector<TabletIndexPtr> TabletSchema::inverted_index_by_field_pattern(
1718
36
        int32_t col_unique_id, const std::string& field_pattern) const {
1719
36
    auto id_to_pattern_map = _index_by_unique_id_with_pattern.find(col_unique_id);
1720
36
    if (id_to_pattern_map == _index_by_unique_id_with_pattern.end()) {
1721
27
        return {};
1722
27
    }
1723
9
    auto pattern_to_index_map = id_to_pattern_map->second.find(field_pattern);
1724
9
    if (pattern_to_index_map == id_to_pattern_map->second.end()) {
1725
4
        return {};
1726
4
    }
1727
5
    return pattern_to_index_map->second;
1728
9
}
1729
1730
29.0k
std::vector<const TabletIndex*> TabletSchema::inverted_indexs(const TabletColumn& col) const {
1731
    // Some columns(Float, Double, JSONB ...) from the variant do not support inverted index
1732
29.0k
    if (!segment_v2::IndexColumnWriter::check_support_inverted_index(col)) {
1733
394
        return {};
1734
394
    }
1735
1736
    // TODO use more efficient impl
1737
    // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants
1738
28.6k
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
1739
28.6k
    std::vector<const TabletIndex*> result;
1740
28.6k
    if (result = inverted_indexs(col_unique_id, escape_for_path_name(col.suffix_path()));
1741
28.6k
        !result.empty()) {
1742
5.20k
        return result;
1743
5.20k
    }
1744
    // variant's typed column has it's own index
1745
23.4k
    else if (col.is_extracted_column() && col.path_info_ptr()->get_is_typed()) {
1746
2
        std::string relative_path = col.path_info_ptr()->copy_pop_front().get_path();
1747
2
        if (_path_set_info_map.find(col_unique_id) == _path_set_info_map.end()) {
1748
0
            return result;
1749
0
        }
1750
2
        const auto& path_set_info = _path_set_info_map.at(col_unique_id);
1751
2
        if (path_set_info.typed_path_set.find(relative_path) ==
1752
2
            path_set_info.typed_path_set.end()) {
1753
0
            return result;
1754
0
        }
1755
2
        for (const auto& index : path_set_info.typed_path_set.at(relative_path).indexes) {
1756
2
            result.push_back(index.get());
1757
2
        }
1758
2
        return result;
1759
2
    }
1760
    // variant's subcolumns has it's own index
1761
23.4k
    else if (col.is_extracted_column()) {
1762
3
        std::string relative_path = col.path_info_ptr()->copy_pop_front().get_path();
1763
3
        if (_path_set_info_map.find(col_unique_id) == _path_set_info_map.end()) {
1764
1
            return result;
1765
1
        }
1766
2
        const auto& path_set_info = _path_set_info_map.at(col_unique_id);
1767
2
        if (path_set_info.subcolumn_indexes.find(relative_path) ==
1768
2
            path_set_info.subcolumn_indexes.end()) {
1769
1
            return result;
1770
1
        }
1771
1
        for (const auto& index : path_set_info.subcolumn_indexes.at(relative_path)) {
1772
1
            result.push_back(index.get());
1773
1
        }
1774
1
    }
1775
23.4k
    return result;
1776
28.6k
}
1777
1778
const TabletIndex* TabletSchema::ann_index(int32_t col_unique_id,
1779
8
                                           const std::string& suffix_path) const {
1780
8
    for (size_t i = 0; i < _indexes.size(); i++) {
1781
4
        if (_indexes[i]->index_type() == IndexType::ANN) {
1782
4
            for (int32_t id : _indexes[i]->col_unique_ids()) {
1783
4
                if (id == col_unique_id &&
1784
4
                    _indexes[i]->get_index_suffix() == escape_for_path_name(suffix_path)) {
1785
4
                    return _indexes[i].get();
1786
4
                }
1787
4
            }
1788
4
        }
1789
4
    }
1790
4
    return nullptr;
1791
8
}
1792
1793
26.1k
const TabletIndex* TabletSchema::ann_index(const TabletColumn& col) const {
1794
26.1k
    if (!segment_v2::IndexColumnWriter::check_support_ann_index(col)) {
1795
26.1k
        return nullptr;
1796
26.1k
    }
1797
    // TODO use more efficient impl
1798
    // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants
1799
8
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
1800
8
    return ann_index(col_unique_id, escape_for_path_name(col.suffix_path()));
1801
26.1k
}
1802
1803
0
bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const {
1804
0
    IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, "");
1805
0
    auto it = _col_id_suffix_to_index.find(index_key);
1806
0
    return it != _col_id_suffix_to_index.end();
1807
0
}
1808
1809
15.9k
const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id) const {
1810
    // Get the ngram bf index for the given column unique id
1811
15.9k
    IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, "");
1812
15.9k
    auto it = _col_id_suffix_to_index.find(index_key);
1813
15.9k
    if (it != _col_id_suffix_to_index.end()) {
1814
1
        if (!it->second.empty() && it->second[0] < _indexes.size()) {
1815
1
            return _indexes[it->second[0]].get();
1816
1
        }
1817
1
    }
1818
15.9k
    return nullptr;
1819
15.9k
}
1820
1821
const TabletIndex* TabletSchema::get_index(int32_t col_unique_id, IndexType index_type,
1822
14
                                           const std::string& suffix_path) const {
1823
14
    IndexKey index_key(index_type, col_unique_id, suffix_path);
1824
14
    auto it = _col_id_suffix_to_index.find(index_key);
1825
14
    if (it != _col_id_suffix_to_index.end()) {
1826
12
        if (!it->second.empty() && it->second[0] < _indexes.size()) {
1827
12
            return _indexes[it->second[0]].get();
1828
12
        }
1829
12
    }
1830
2
    return nullptr;
1831
14
}
1832
1833
vectorized::Block TabletSchema::create_block(
1834
        const std::vector<uint32_t>& return_columns,
1835
724
        const std::unordered_set<uint32_t>* tablet_columns_need_convert_null) const {
1836
724
    vectorized::Block block;
1837
2.86k
    for (int i = 0; i < return_columns.size(); ++i) {
1838
2.13k
        const ColumnId cid = return_columns[i];
1839
2.13k
        const auto& col = *_cols[cid];
1840
2.13k
        bool is_nullable = (tablet_columns_need_convert_null != nullptr &&
1841
2.13k
                            tablet_columns_need_convert_null->find(cid) !=
1842
0
                                    tablet_columns_need_convert_null->end());
1843
2.13k
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col, is_nullable);
1844
2.13k
        if (col.type() == FieldType::OLAP_FIELD_TYPE_STRUCT ||
1845
2.13k
            col.type() == FieldType::OLAP_FIELD_TYPE_MAP ||
1846
2.13k
            col.type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
1847
2
            if (_pruned_columns_data_type.contains(col.unique_id())) {
1848
0
                data_type = _pruned_columns_data_type.at(col.unique_id());
1849
0
            }
1850
2
        }
1851
1852
2.13k
        if (_vir_col_idx_to_unique_id.contains(cid)) {
1853
0
            block.insert({vectorized::ColumnNothing::create(0), data_type, col.name()});
1854
0
            VLOG_DEBUG << fmt::format(
1855
0
                    "Create block from tablet schema, column cid {} is virtual column, col_name: "
1856
0
                    "{}, col_unique_id: {}, type {}",
1857
0
                    cid, col.name(), col.unique_id(), data_type->get_name());
1858
2.13k
        } else {
1859
2.13k
            block.insert({data_type->create_column(), data_type, col.name()});
1860
2.13k
        }
1861
2.13k
    }
1862
724
    return block;
1863
724
}
1864
1865
2.00k
vectorized::Block TabletSchema::create_block(bool ignore_dropped_col) const {
1866
2.00k
    vectorized::Block block;
1867
6.34k
    for (const auto& col : _cols) {
1868
6.34k
        if (ignore_dropped_col && is_dropped_column(*col)) {
1869
0
            continue;
1870
0
        }
1871
1872
6.34k
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(*col);
1873
6.34k
        if (col->type() == FieldType::OLAP_FIELD_TYPE_STRUCT) {
1874
0
            if (_pruned_columns_data_type.contains(col->unique_id())) {
1875
0
                data_type = _pruned_columns_data_type.at(col->unique_id());
1876
0
            }
1877
0
        }
1878
6.34k
        block.insert({data_type->create_column(), data_type, col->name()});
1879
6.34k
    }
1880
2.00k
    return block;
1881
2.00k
}
1882
1883
0
vectorized::Block TabletSchema::create_block_by_cids(const std::vector<uint32_t>& cids) const {
1884
0
    vectorized::Block block;
1885
0
    for (const auto& cid : cids) {
1886
0
        const auto& col = *_cols[cid];
1887
0
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col);
1888
0
        if (col.type() == FieldType::OLAP_FIELD_TYPE_STRUCT) {
1889
0
            if (_pruned_columns_data_type.contains(col.unique_id())) {
1890
0
                data_type = _pruned_columns_data_type.at(col.unique_id());
1891
0
            }
1892
0
        }
1893
0
        block.insert({data_type->create_column(), data_type, col.name()});
1894
0
    }
1895
0
    return block;
1896
0
}
1897
1898
0
bool operator==(const TabletColumn& a, const TabletColumn& b) {
1899
0
    if (a._unique_id != b._unique_id) return false;
1900
0
    if (a._col_name != b._col_name) return false;
1901
0
    if (a._type != b._type) return false;
1902
0
    if (a._is_key != b._is_key) return false;
1903
0
    if (a._aggregation != b._aggregation) return false;
1904
0
    if (a._is_nullable != b._is_nullable) return false;
1905
0
    if (a._has_default_value != b._has_default_value) return false;
1906
0
    if (a._has_default_value) {
1907
0
        if (a._default_value != b._default_value) return false;
1908
0
    }
1909
0
    if (a._is_decimal != b._is_decimal) return false;
1910
0
    if (a._is_decimal) {
1911
0
        if (a._precision != b._precision) return false;
1912
0
        if (a._frac != b._frac) return false;
1913
0
    }
1914
0
    if (a._length != b._length) return false;
1915
0
    if (a._index_length != b._index_length) return false;
1916
0
    if (a._is_bf_column != b._is_bf_column) return false;
1917
0
    if (a._column_path == nullptr && a._column_path != nullptr) return false;
1918
0
    if (b._column_path == nullptr && a._column_path != nullptr) return false;
1919
0
    if (b._column_path != nullptr && a._column_path != nullptr &&
1920
0
        *a._column_path != *b._column_path)
1921
0
        return false;
1922
0
    return true;
1923
0
}
1924
1925
0
bool operator!=(const TabletColumn& a, const TabletColumn& b) {
1926
0
    return !(a == b);
1927
0
}
1928
1929
3
bool operator==(const TabletSchema& a, const TabletSchema& b) {
1930
3
    if (a._keys_type != b._keys_type) return false;
1931
3
    if (a._cols.size() != b._cols.size()) return false;
1932
3
    for (int i = 0; i < a._cols.size(); ++i) {
1933
0
        if (*a._cols[i] != *b._cols[i]) return false;
1934
0
    }
1935
3
    if (a._num_columns != b._num_columns) return false;
1936
3
    if (a._num_key_columns != b._num_key_columns) return false;
1937
3
    if (a._num_null_columns != b._num_null_columns) return false;
1938
3
    if (a._num_short_key_columns != b._num_short_key_columns) return false;
1939
3
    if (a._num_rows_per_row_block != b._num_rows_per_row_block) return false;
1940
3
    if (a._compress_kind != b._compress_kind) return false;
1941
3
    if (a._next_column_unique_id != b._next_column_unique_id) return false;
1942
3
    if (a._has_bf_fpp != b._has_bf_fpp) return false;
1943
3
    if (a._has_bf_fpp) {
1944
0
        if (std::abs(a._bf_fpp - b._bf_fpp) > 1e-6) return false;
1945
0
    }
1946
3
    if (a._is_in_memory != b._is_in_memory) return false;
1947
3
    if (a._delete_sign_idx != b._delete_sign_idx) return false;
1948
3
    if (a._disable_auto_compaction != b._disable_auto_compaction) return false;
1949
3
    if (a._enable_single_replica_compaction != b._enable_single_replica_compaction) return false;
1950
3
    if (a._store_row_column != b._store_row_column) return false;
1951
3
    if (a._row_store_page_size != b._row_store_page_size) return false;
1952
3
    if (a._storage_page_size != b._storage_page_size) return false;
1953
3
    if (a._storage_dict_page_size != b._storage_dict_page_size) return false;
1954
3
    if (a._skip_write_index_on_load != b._skip_write_index_on_load) return false;
1955
3
    if (a._enable_variant_flatten_nested != b._enable_variant_flatten_nested) return false;
1956
3
    if (a._is_external_segment_column_meta_used != b._is_external_segment_column_meta_used)
1957
0
        return false;
1958
3
    if (a._integer_type_default_use_plain_encoding != b._integer_type_default_use_plain_encoding)
1959
0
        return false;
1960
3
    if (a._binary_plain_encoding_default_impl != b._binary_plain_encoding_default_impl)
1961
0
        return false;
1962
3
    return true;
1963
3
}
1964
1965
3
bool operator!=(const TabletSchema& a, const TabletSchema& b) {
1966
3
    return !(a == b);
1967
3
}
1968
#include "common/compile_check_end.h"
1969
} // namespace doris