Coverage Report

Created: 2026-02-23 23:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/olap/tablet_schema.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "olap/tablet_schema.h"
19
20
#include <gen_cpp/Descriptors_types.h>
21
#include <gen_cpp/olap_file.pb.h>
22
#include <glog/logging.h>
23
#include <google/protobuf/io/coded_stream.h>
24
#include <google/protobuf/io/zero_copy_stream.h>
25
#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
26
27
#include <algorithm>
28
#include <cctype>
29
// IWYU pragma: no_include <bits/std_abs.h>
30
#include <cmath> // IWYU pragma: keep
31
#include <memory>
32
#include <ostream>
33
#include <vector>
34
35
#include "common/compiler_util.h" // IWYU pragma: keep
36
#include "common/consts.h"
37
#include "common/status.h"
38
#include "exec/tablet_info.h"
39
#include "olap/inverted_index_parser.h"
40
#include "olap/olap_common.h"
41
#include "olap/olap_define.h"
42
#include "olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h"
43
#include "olap/tablet_column_object_pool.h"
44
#include "olap/types.h"
45
#include "olap/utils.h"
46
#include "tablet_meta.h"
47
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
48
#include "vec/aggregate_functions/aggregate_function_state_union.h"
49
#include "vec/columns/column_nothing.h"
50
#include "vec/common/hex.h"
51
#include "vec/common/string_ref.h"
52
#include "vec/core/block.h"
53
#include "vec/data_types/data_type.h"
54
#include "vec/data_types/data_type_factory.hpp"
55
#include "vec/json/path_in_data.h"
56
57
namespace doris {
58
#include "common/compile_check_begin.h"
59
143k
FieldType TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) {
60
143k
    switch (primitiveType) {
61
0
    case PrimitiveType::INVALID_TYPE:
62
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
63
0
    case PrimitiveType::TYPE_NULL:
64
0
        return FieldType::OLAP_FIELD_TYPE_NONE;
65
16
    case PrimitiveType::TYPE_BOOLEAN:
66
16
        return FieldType::OLAP_FIELD_TYPE_BOOL;
67
0
    case PrimitiveType::TYPE_TINYINT:
68
0
        return FieldType::OLAP_FIELD_TYPE_TINYINT;
69
10
    case PrimitiveType::TYPE_SMALLINT:
70
10
        return FieldType::OLAP_FIELD_TYPE_SMALLINT;
71
300
    case PrimitiveType::TYPE_INT:
72
300
        return FieldType::OLAP_FIELD_TYPE_INT;
73
143k
    case PrimitiveType::TYPE_BIGINT:
74
143k
        return FieldType::OLAP_FIELD_TYPE_BIGINT;
75
6
    case PrimitiveType::TYPE_LARGEINT:
76
6
        return FieldType::OLAP_FIELD_TYPE_LARGEINT;
77
0
    case PrimitiveType::TYPE_FLOAT:
78
0
        return FieldType::OLAP_FIELD_TYPE_FLOAT;
79
26
    case PrimitiveType::TYPE_DOUBLE:
80
26
        return FieldType::OLAP_FIELD_TYPE_DOUBLE;
81
0
    case PrimitiveType::TYPE_VARCHAR:
82
0
        return FieldType::OLAP_FIELD_TYPE_VARCHAR;
83
0
    case PrimitiveType::TYPE_DATE:
84
0
        return FieldType::OLAP_FIELD_TYPE_DATE;
85
0
    case PrimitiveType::TYPE_DATETIME:
86
0
        return FieldType::OLAP_FIELD_TYPE_DATETIME;
87
0
    case PrimitiveType::TYPE_BINARY:
88
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
89
0
    case PrimitiveType::TYPE_CHAR:
90
0
        return FieldType::OLAP_FIELD_TYPE_CHAR;
91
0
    case PrimitiveType::TYPE_STRUCT:
92
0
        return FieldType::OLAP_FIELD_TYPE_STRUCT;
93
0
    case PrimitiveType::TYPE_ARRAY:
94
0
        return FieldType::OLAP_FIELD_TYPE_ARRAY;
95
0
    case PrimitiveType::TYPE_MAP:
96
0
        return FieldType::OLAP_FIELD_TYPE_MAP;
97
0
    case PrimitiveType::TYPE_HLL:
98
0
        return FieldType::OLAP_FIELD_TYPE_HLL;
99
0
    case PrimitiveType::TYPE_DECIMALV2:
100
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
101
0
    case PrimitiveType::TYPE_BITMAP:
102
0
        return FieldType::OLAP_FIELD_TYPE_BITMAP;
103
0
    case PrimitiveType::TYPE_STRING:
104
0
        return FieldType::OLAP_FIELD_TYPE_STRING;
105
0
    case PrimitiveType::TYPE_QUANTILE_STATE:
106
0
        return FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
107
0
    case PrimitiveType::TYPE_DATEV2:
108
0
        return FieldType::OLAP_FIELD_TYPE_DATEV2;
109
0
    case PrimitiveType::TYPE_DATETIMEV2:
110
0
        return FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
111
0
    case PrimitiveType::TYPE_TIMESTAMPTZ:
112
0
        return FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ;
113
0
    case PrimitiveType::TYPE_TIMEV2:
114
0
        return FieldType::OLAP_FIELD_TYPE_TIMEV2;
115
0
    case PrimitiveType::TYPE_DECIMAL32:
116
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL32;
117
0
    case PrimitiveType::TYPE_DECIMAL64:
118
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL64;
119
0
    case PrimitiveType::TYPE_DECIMAL128I:
120
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
121
0
    case PrimitiveType::TYPE_DECIMAL256:
122
0
        return FieldType::OLAP_FIELD_TYPE_DECIMAL256;
123
0
    case PrimitiveType::TYPE_JSONB:
124
0
        return FieldType::OLAP_FIELD_TYPE_JSONB;
125
0
    case PrimitiveType::TYPE_VARIANT:
126
0
        return FieldType::OLAP_FIELD_TYPE_VARIANT;
127
0
    case PrimitiveType::TYPE_IPV4:
128
0
        return FieldType::OLAP_FIELD_TYPE_IPV4;
129
0
    case PrimitiveType::TYPE_IPV6:
130
0
        return FieldType::OLAP_FIELD_TYPE_IPV6;
131
0
    case PrimitiveType::TYPE_LAMBDA_FUNCTION:
132
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented
133
0
    case PrimitiveType::TYPE_AGG_STATE:
134
0
        return FieldType::OLAP_FIELD_TYPE_AGG_STATE;
135
0
    default:
136
0
        return FieldType::OLAP_FIELD_TYPE_UNKNOWN;
137
143k
    }
138
143k
}
139
140
63.8k
PrimitiveType TabletColumn::get_primitive_type_by_field_type(FieldType type) {
141
63.8k
    static const PrimitiveType mapping[] = {
142
63.8k
            /*  0 */ PrimitiveType::INVALID_TYPE,
143
63.8k
            /*  1 OLAP_FIELD_TYPE_TINYINT           */ PrimitiveType::TYPE_TINYINT,
144
63.8k
            /*  2 OLAP_FIELD_TYPE_UNSIGNED_TINYINT  */ PrimitiveType::INVALID_TYPE,
145
63.8k
            /*  3 OLAP_FIELD_TYPE_SMALLINT          */ PrimitiveType::TYPE_SMALLINT,
146
63.8k
            /*  4 OLAP_FIELD_TYPE_UNSIGNED_SMALLINT */ PrimitiveType::INVALID_TYPE,
147
63.8k
            /*  5 OLAP_FIELD_TYPE_INT               */ PrimitiveType::TYPE_INT,
148
63.8k
            /*  6 OLAP_FIELD_TYPE_UNSIGNED_INT      */ PrimitiveType::INVALID_TYPE,
149
63.8k
            /*  7 OLAP_FIELD_TYPE_BIGINT            */ PrimitiveType::TYPE_BIGINT,
150
63.8k
            /*  8 OLAP_FIELD_TYPE_UNSIGNED_BIGINT   */ PrimitiveType::INVALID_TYPE,
151
63.8k
            /*  9 OLAP_FIELD_TYPE_LARGEINT          */ PrimitiveType::TYPE_LARGEINT,
152
63.8k
            /* 10 OLAP_FIELD_TYPE_FLOAT             */ PrimitiveType::TYPE_FLOAT,
153
63.8k
            /* 11 OLAP_FIELD_TYPE_DOUBLE            */ PrimitiveType::TYPE_DOUBLE,
154
63.8k
            /* 12 OLAP_FIELD_TYPE_DISCRETE_DOUBLE   */ PrimitiveType::INVALID_TYPE,
155
63.8k
            /* 13 OLAP_FIELD_TYPE_CHAR              */ PrimitiveType::TYPE_CHAR,
156
63.8k
            /* 14 OLAP_FIELD_TYPE_DATE              */ PrimitiveType::TYPE_DATE,
157
63.8k
            /* 15 OLAP_FIELD_TYPE_DATETIME          */ PrimitiveType::TYPE_DATETIME,
158
63.8k
            /* 16 OLAP_FIELD_TYPE_DECIMAL           */ PrimitiveType::INVALID_TYPE,
159
63.8k
            /* 17 OLAP_FIELD_TYPE_VARCHAR           */ PrimitiveType::TYPE_VARCHAR,
160
63.8k
            /* 18 OLAP_FIELD_TYPE_STRUCT            */ PrimitiveType::TYPE_STRUCT,
161
63.8k
            /* 19 OLAP_FIELD_TYPE_ARRAY             */ PrimitiveType::TYPE_ARRAY,
162
63.8k
            /* 20 OLAP_FIELD_TYPE_MAP               */ PrimitiveType::TYPE_MAP,
163
63.8k
            /* 21 OLAP_FIELD_TYPE_UNKNOWN           */ PrimitiveType::INVALID_TYPE,
164
63.8k
            /* 22 OLAP_FIELD_TYPE_NONE              */ PrimitiveType::TYPE_NULL,
165
63.8k
            /* 23 OLAP_FIELD_TYPE_HLL               */ PrimitiveType::TYPE_HLL,
166
63.8k
            /* 24 OLAP_FIELD_TYPE_BOOL              */ PrimitiveType::TYPE_BOOLEAN,
167
63.8k
            /* 25 OLAP_FIELD_TYPE_BITMAP            */ PrimitiveType::TYPE_BITMAP,
168
63.8k
            /* 26 OLAP_FIELD_TYPE_STRING            */ PrimitiveType::TYPE_STRING,
169
63.8k
            /* 27 OLAP_FIELD_TYPE_QUANTILE_STATE    */ PrimitiveType::TYPE_QUANTILE_STATE,
170
63.8k
            /* 28 OLAP_FIELD_TYPE_DATEV2            */ PrimitiveType::TYPE_DATEV2,
171
63.8k
            /* 29 OLAP_FIELD_TYPE_DATETIMEV2        */ PrimitiveType::TYPE_DATETIMEV2,
172
63.8k
            /* 30 OLAP_FIELD_TYPE_TIMEV2            */ PrimitiveType::TYPE_TIMEV2,
173
63.8k
            /* 31 OLAP_FIELD_TYPE_DECIMAL32         */ PrimitiveType::TYPE_DECIMAL32,
174
63.8k
            /* 32 OLAP_FIELD_TYPE_DECIMAL64         */ PrimitiveType::TYPE_DECIMAL64,
175
63.8k
            /* 33 OLAP_FIELD_TYPE_DECIMAL128I       */ PrimitiveType::TYPE_DECIMAL128I,
176
63.8k
            /* 34 OLAP_FIELD_TYPE_JSONB             */ PrimitiveType::TYPE_JSONB,
177
63.8k
            /* 35 OLAP_FIELD_TYPE_VARIANT           */ PrimitiveType::TYPE_VARIANT,
178
63.8k
            /* 36 OLAP_FIELD_TYPE_AGG_STATE         */ PrimitiveType::TYPE_AGG_STATE,
179
63.8k
            /* 37 OLAP_FIELD_TYPE_DECIMAL256        */ PrimitiveType::TYPE_DECIMAL256,
180
63.8k
            /* 38 OLAP_FIELD_TYPE_IPV4              */ PrimitiveType::TYPE_IPV4,
181
63.8k
            /* 39 OLAP_FIELD_TYPE_IPV6              */ PrimitiveType::TYPE_IPV6,
182
63.8k
            /* 40 OLAP_FIELD_TYPE_TIMESTAMPTZ       */ PrimitiveType::TYPE_TIMESTAMPTZ,
183
63.8k
    };
184
185
63.8k
    int idx = static_cast<int>(type);
186
63.8k
    return mapping[idx];
187
63.8k
}
188
189
25.0k
FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) {
190
25.0k
    std::string upper_type_str = type_str;
191
25.0k
    std::transform(type_str.begin(), type_str.end(), upper_type_str.begin(),
192
140k
                   [](auto c) { return std::toupper(c); });
193
25.0k
    FieldType type;
194
195
25.0k
    if (0 == upper_type_str.compare("TINYINT")) {
196
876
        type = FieldType::OLAP_FIELD_TYPE_TINYINT;
197
24.1k
    } else if (0 == upper_type_str.compare("SMALLINT")) {
198
1.77k
        type = FieldType::OLAP_FIELD_TYPE_SMALLINT;
199
22.4k
    } else if (0 == upper_type_str.compare("INT")) {
200
5.24k
        type = FieldType::OLAP_FIELD_TYPE_INT;
201
17.1k
    } else if (0 == upper_type_str.compare("BIGINT")) {
202
376
        type = FieldType::OLAP_FIELD_TYPE_BIGINT;
203
16.7k
    } else if (0 == upper_type_str.compare("LARGEINT")) {
204
280
        type = FieldType::OLAP_FIELD_TYPE_LARGEINT;
205
16.5k
    } else if (0 == upper_type_str.compare("UNSIGNED_TINYINT")) {
206
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT;
207
16.5k
    } else if (0 == upper_type_str.compare("UNSIGNED_SMALLINT")) {
208
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT;
209
16.5k
    } else if (0 == upper_type_str.compare("UNSIGNED_INT")) {
210
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT;
211
16.5k
    } else if (0 == upper_type_str.compare("UNSIGNED_BIGINT")) {
212
0
        type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT;
213
16.5k
    } else if (0 == upper_type_str.compare("IPV4")) {
214
34
        type = FieldType::OLAP_FIELD_TYPE_IPV4;
215
16.4k
    } else if (0 == upper_type_str.compare("IPV6")) {
216
34
        type = FieldType::OLAP_FIELD_TYPE_IPV6;
217
16.4k
    } else if (0 == upper_type_str.compare("FLOAT")) {
218
8
        type = FieldType::OLAP_FIELD_TYPE_FLOAT;
219
16.4k
    } else if (0 == upper_type_str.compare("DISCRETE_DOUBLE")) {
220
0
        type = FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE;
221
16.4k
    } else if (0 == upper_type_str.compare("DOUBLE")) {
222
0
        type = FieldType::OLAP_FIELD_TYPE_DOUBLE;
223
16.4k
    } else if (0 == upper_type_str.compare("CHAR")) {
224
282
        type = FieldType::OLAP_FIELD_TYPE_CHAR;
225
16.1k
    } else if (0 == upper_type_str.compare("DATE")) {
226
286
        type = FieldType::OLAP_FIELD_TYPE_DATE;
227
15.8k
    } else if (0 == upper_type_str.compare("DATEV2")) {
228
266
        type = FieldType::OLAP_FIELD_TYPE_DATEV2;
229
15.5k
    } else if (0 == upper_type_str.compare("DATETIMEV2")) {
230
0
        type = FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
231
15.5k
    } else if (0 == upper_type_str.compare("DATETIME")) {
232
344
        type = FieldType::OLAP_FIELD_TYPE_DATETIME;
233
15.2k
    } else if (0 == upper_type_str.compare("TIMESTAMPTZ")) {
234
32
        type = FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ;
235
15.2k
    } else if (0 == upper_type_str.compare("DECIMAL32")) {
236
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL32;
237
15.2k
    } else if (0 == upper_type_str.compare("DECIMAL64")) {
238
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL64;
239
15.2k
    } else if (0 == upper_type_str.compare("DECIMAL128I")) {
240
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL128I;
241
15.2k
    } else if (0 == upper_type_str.compare("DECIMAL256")) {
242
0
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL256;
243
15.2k
    } else if (0 == upper_type_str.compare(0, 7, "DECIMAL")) {
244
288
        type = FieldType::OLAP_FIELD_TYPE_DECIMAL;
245
14.9k
    } else if (0 == upper_type_str.compare(0, 7, "VARCHAR")) {
246
438
        type = FieldType::OLAP_FIELD_TYPE_VARCHAR;
247
14.4k
    } else if (0 == upper_type_str.compare("STRING")) {
248
14.3k
        type = FieldType::OLAP_FIELD_TYPE_STRING;
249
14.3k
    } else if (0 == upper_type_str.compare("JSONB")) {
250
0
        type = FieldType::OLAP_FIELD_TYPE_JSONB;
251
152
    } else if (0 == upper_type_str.compare("VARIANT")) {
252
126
        type = FieldType::OLAP_FIELD_TYPE_VARIANT;
253
126
    } else if (0 == upper_type_str.compare("BOOLEAN")) {
254
0
        type = FieldType::OLAP_FIELD_TYPE_BOOL;
255
26
    } else if (0 == upper_type_str.compare(0, 3, "HLL")) {
256
14
        type = FieldType::OLAP_FIELD_TYPE_HLL;
257
14
    } else if (0 == upper_type_str.compare("STRUCT")) {
258
0
        type = FieldType::OLAP_FIELD_TYPE_STRUCT;
259
12
    } else if (0 == upper_type_str.compare("LIST")) {
260
0
        type = FieldType::OLAP_FIELD_TYPE_ARRAY;
261
12
    } else if (0 == upper_type_str.compare("MAP")) {
262
0
        type = FieldType::OLAP_FIELD_TYPE_MAP;
263
12
    } else if (0 == upper_type_str.compare("OBJECT")) {
264
0
        type = FieldType::OLAP_FIELD_TYPE_BITMAP;
265
12
    } else if (0 == upper_type_str.compare("BITMAP")) {
266
0
        type = FieldType::OLAP_FIELD_TYPE_BITMAP;
267
12
    } else if (0 == upper_type_str.compare("ARRAY")) {
268
12
        type = FieldType::OLAP_FIELD_TYPE_ARRAY;
269
12
    } else if (0 == upper_type_str.compare("QUANTILE_STATE")) {
270
0
        type = FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
271
0
    } else if (0 == upper_type_str.compare("AGG_STATE")) {
272
0
        type = FieldType::OLAP_FIELD_TYPE_AGG_STATE;
273
0
    } else {
274
0
        LOG(WARNING) << "invalid type string. [type='" << type_str << "']";
275
0
        type = FieldType::OLAP_FIELD_TYPE_UNKNOWN;
276
0
    }
277
278
25.0k
    return type;
279
25.0k
}
280
281
8.28k
FieldAggregationMethod TabletColumn::get_aggregation_type_by_string(const std::string& str) {
282
8.28k
    std::string upper_str = str;
283
8.28k
    std::transform(str.begin(), str.end(), upper_str.begin(),
284
42.4k
                   [](auto c) { return std::toupper(c); });
285
8.28k
    FieldAggregationMethod aggregation_type;
286
287
8.28k
    if (0 == upper_str.compare("NONE")) {
288
3.57k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE;
289
4.70k
    } else if (0 == upper_str.compare("SUM")) {
290
1.20k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM;
291
3.50k
    } else if (0 == upper_str.compare("MIN")) {
292
8
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN;
293
3.50k
    } else if (0 == upper_str.compare("MAX")) {
294
8
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX;
295
3.49k
    } else if (0 == upper_str.compare("REPLACE")) {
296
3.47k
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE;
297
3.47k
    } else if (0 == upper_str.compare("REPLACE_IF_NOT_NULL")) {
298
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL;
299
14
    } else if (0 == upper_str.compare("HLL_UNION")) {
300
14
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION;
301
14
    } else if (0 == upper_str.compare("BITMAP_UNION")) {
302
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION;
303
0
    } else if (0 == upper_str.compare("QUANTILE_UNION")) {
304
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION;
305
0
    } else if (!upper_str.empty()) {
306
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC;
307
0
    } else {
308
0
        aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN;
309
0
    }
310
311
8.28k
    return aggregation_type;
312
8.28k
}
313
314
64.7k
std::string TabletColumn::get_string_by_field_type(FieldType type) {
315
64.7k
    switch (type) {
316
3.38k
    case FieldType::OLAP_FIELD_TYPE_TINYINT:
317
3.38k
        return "TINYINT";
318
319
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT:
320
0
        return "UNSIGNED_TINYINT";
321
322
7.39k
    case FieldType::OLAP_FIELD_TYPE_SMALLINT:
323
7.39k
        return "SMALLINT";
324
325
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT:
326
0
        return "UNSIGNED_SMALLINT";
327
328
18.1k
    case FieldType::OLAP_FIELD_TYPE_INT:
329
18.1k
        return "INT";
330
331
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT:
332
0
        return "UNSIGNED_INT";
333
334
1.81k
    case FieldType::OLAP_FIELD_TYPE_BIGINT:
335
1.81k
        return "BIGINT";
336
337
1.50k
    case FieldType::OLAP_FIELD_TYPE_LARGEINT:
338
1.50k
        return "LARGEINT";
339
340
0
    case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT:
341
0
        return "UNSIGNED_BIGINT";
342
343
294
    case FieldType::OLAP_FIELD_TYPE_IPV4:
344
294
        return "IPV4";
345
346
294
    case FieldType::OLAP_FIELD_TYPE_IPV6:
347
294
        return "IPV6";
348
349
20
    case FieldType::OLAP_FIELD_TYPE_FLOAT:
350
20
        return "FLOAT";
351
352
0
    case FieldType::OLAP_FIELD_TYPE_DOUBLE:
353
0
        return "DOUBLE";
354
355
0
    case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE:
356
0
        return "DISCRETE_DOUBLE";
357
358
1.50k
    case FieldType::OLAP_FIELD_TYPE_CHAR:
359
1.50k
        return "CHAR";
360
361
1.51k
    case FieldType::OLAP_FIELD_TYPE_DATE:
362
1.51k
        return "DATE";
363
364
1.34k
    case FieldType::OLAP_FIELD_TYPE_DATEV2:
365
1.34k
        return "DATEV2";
366
367
1.89k
    case FieldType::OLAP_FIELD_TYPE_DATETIME:
368
1.89k
        return "DATETIME";
369
370
0
    case FieldType::OLAP_FIELD_TYPE_DATETIMEV2:
371
0
        return "DATETIMEV2";
372
373
284
    case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ:
374
284
        return "TIMESTAMPTZ";
375
376
1.50k
    case FieldType::OLAP_FIELD_TYPE_DECIMAL:
377
1.50k
        return "DECIMAL";
378
379
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL32:
380
0
        return "DECIMAL32";
381
382
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL64:
383
0
        return "DECIMAL64";
384
385
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:
386
0
        return "DECIMAL128I";
387
388
0
    case FieldType::OLAP_FIELD_TYPE_DECIMAL256:
389
0
        return "DECIMAL256";
390
391
2.02k
    case FieldType::OLAP_FIELD_TYPE_VARCHAR:
392
2.02k
        return "VARCHAR";
393
394
0
    case FieldType::OLAP_FIELD_TYPE_JSONB:
395
0
        return "JSONB";
396
397
258
    case FieldType::OLAP_FIELD_TYPE_VARIANT:
398
258
        return "VARIANT";
399
400
21.6k
    case FieldType::OLAP_FIELD_TYPE_STRING:
401
21.6k
        return "STRING";
402
403
0
    case FieldType::OLAP_FIELD_TYPE_BOOL:
404
0
        return "BOOLEAN";
405
406
12
    case FieldType::OLAP_FIELD_TYPE_HLL:
407
12
        return "HLL";
408
409
0
    case FieldType::OLAP_FIELD_TYPE_STRUCT:
410
0
        return "STRUCT";
411
412
28
    case FieldType::OLAP_FIELD_TYPE_ARRAY:
413
28
        return "ARRAY";
414
415
4
    case FieldType::OLAP_FIELD_TYPE_MAP:
416
4
        return "MAP";
417
418
0
    case FieldType::OLAP_FIELD_TYPE_BITMAP:
419
0
        return "OBJECT";
420
0
    case FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE:
421
0
        return "QUANTILE_STATE";
422
0
    case FieldType::OLAP_FIELD_TYPE_AGG_STATE:
423
0
        return "AGG_STATE";
424
0
    default:
425
0
        return "UNKNOWN";
426
64.7k
    }
427
64.7k
}
428
429
3.57k
std::string TabletColumn::get_string_by_aggregation_type(FieldAggregationMethod type) {
430
3.57k
    switch (type) {
431
3.51k
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE:
432
3.51k
        return "NONE";
433
434
18
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM:
435
18
        return "SUM";
436
437
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN:
438
0
        return "MIN";
439
440
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX:
441
0
        return "MAX";
442
443
30
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE:
444
30
        return "REPLACE";
445
446
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL:
447
0
        return "REPLACE_IF_NOT_NULL";
448
449
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION:
450
0
        return "HLL_UNION";
451
452
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION:
453
0
        return "BITMAP_UNION";
454
455
0
    case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION:
456
0
        return "QUANTILE_UNION";
457
458
4
    default:
459
4
        return "UNKNOWN";
460
3.57k
    }
461
3.57k
}
462
463
4.48k
uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint32_t string_length) {
464
4.48k
    switch (type) {
465
226
    case TPrimitiveType::TINYINT:
466
226
    case TPrimitiveType::BOOLEAN:
467
226
        return 1;
468
726
    case TPrimitiveType::SMALLINT:
469
726
        return 2;
470
1.72k
    case TPrimitiveType::INT:
471
1.72k
        return 4;
472
240
    case TPrimitiveType::BIGINT:
473
240
        return 8;
474
208
    case TPrimitiveType::LARGEINT:
475
208
        return 16;
476
32
    case TPrimitiveType::IPV4:
477
32
        return 4;
478
32
    case TPrimitiveType::IPV6:
479
32
        return 16;
480
208
    case TPrimitiveType::DATE:
481
208
        return 3;
482
184
    case TPrimitiveType::DATEV2:
483
184
        return 4;
484
224
    case TPrimitiveType::DATETIME:
485
224
        return 8;
486
0
    case TPrimitiveType::DATETIMEV2:
487
30
    case TPrimitiveType::TIMESTAMPTZ:
488
30
        return 8;
489
0
    case TPrimitiveType::FLOAT:
490
0
        return 4;
491
0
    case TPrimitiveType::DOUBLE:
492
0
        return 8;
493
0
    case TPrimitiveType::QUANTILE_STATE:
494
0
    case TPrimitiveType::BITMAP:
495
0
        return 16;
496
208
    case TPrimitiveType::CHAR:
497
208
        return string_length;
498
208
    case TPrimitiveType::VARCHAR:
499
208
    case TPrimitiveType::HLL:
500
208
    case TPrimitiveType::AGG_STATE:
501
208
        return string_length + sizeof(OLAP_VARCHAR_MAX_LENGTH);
502
8
    case TPrimitiveType::STRING:
503
8
    case TPrimitiveType::VARIANT:
504
8
        return string_length + sizeof(OLAP_STRING_MAX_LENGTH);
505
0
    case TPrimitiveType::JSONB:
506
0
        return string_length + sizeof(OLAP_JSONB_MAX_LENGTH);
507
0
    case TPrimitiveType::STRUCT:
508
        // Note that(xy): this is the length of struct type itself,
509
        // the length of its subtypes are not included.
510
0
        return OLAP_STRUCT_MAX_LENGTH;
511
18
    case TPrimitiveType::ARRAY:
512
18
        return OLAP_ARRAY_MAX_LENGTH;
513
0
    case TPrimitiveType::MAP:
514
0
        return OLAP_MAP_MAX_LENGTH;
515
0
    case TPrimitiveType::DECIMAL32:
516
0
        return 4;
517
0
    case TPrimitiveType::DECIMAL64:
518
0
        return 8;
519
0
    case TPrimitiveType::DECIMAL128I:
520
0
        return 16;
521
0
    case TPrimitiveType::DECIMAL256:
522
0
        return 32;
523
208
    case TPrimitiveType::DECIMALV2:
524
208
        return 12; // use 12 bytes in olap engine.
525
0
    default:
526
0
        LOG(WARNING) << "unknown field type. [type=" << type << "]";
527
0
        return 0;
528
4.48k
    }
529
4.48k
}
530
531
18
bool TabletColumn::has_char_type() const {
532
18
    switch (_type) {
533
6
    case FieldType::OLAP_FIELD_TYPE_CHAR: {
534
6
        return true;
535
0
    }
536
8
    case FieldType::OLAP_FIELD_TYPE_ARRAY:
537
8
    case FieldType::OLAP_FIELD_TYPE_MAP:
538
8
    case FieldType::OLAP_FIELD_TYPE_STRUCT: {
539
8
        return std::any_of(_sub_columns.begin(), _sub_columns.end(),
540
8
                           [&](const auto& sub) -> bool { return sub->has_char_type(); });
541
8
    }
542
4
    default:
543
4
        return false;
544
18
    }
545
18
}
546
547
37.2k
TabletColumn::TabletColumn() : _aggregation(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) {}
548
549
74
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType type) {
550
74
    _aggregation = agg;
551
74
    _type = type;
552
74
}
553
554
34
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable) {
555
34
    _aggregation = agg;
556
34
    _type = filed_type;
557
34
    _length = cast_set<int32_t>(get_scalar_type_info(filed_type)->size());
558
34
    _is_nullable = is_nullable;
559
34
}
560
561
TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable,
562
726
                           int32_t unique_id, size_t length) {
563
726
    _aggregation = agg;
564
726
    _type = filed_type;
565
726
    _is_nullable = is_nullable;
566
726
    _unique_id = unique_id;
567
726
    _length = cast_set<int32_t>(length);
568
726
}
569
570
0
TabletColumn::TabletColumn(const ColumnPB& column) {
571
0
    init_from_pb(column);
572
0
}
573
574
8
TabletColumn::TabletColumn(const TColumn& column) {
575
8
    init_from_thrift(column);
576
8
}
577
578
10
void TabletColumn::init_from_thrift(const TColumn& tcolumn) {
579
10
    ColumnPB column_pb;
580
10
    TabletMeta::init_column_from_tcolumn(tcolumn.col_unique_id, tcolumn, &column_pb);
581
10
    init_from_pb(column_pb);
582
10
}
583
584
24.8k
void TabletColumn::init_from_pb(const ColumnPB& column) {
585
24.8k
    _unique_id = column.unique_id();
586
24.8k
    _col_name = column.name();
587
24.8k
    _col_name_lower_case = to_lower(_col_name);
588
24.8k
    _type = TabletColumn::get_field_type_by_string(column.type());
589
24.8k
    _is_key = column.is_key();
590
24.8k
    _is_nullable = column.is_nullable();
591
24.8k
    _is_auto_increment = column.is_auto_increment();
592
24.8k
    if (column.has_is_on_update_current_timestamp()) {
593
22.8k
        _is_on_update_current_timestamp = column.is_on_update_current_timestamp();
594
22.8k
    }
595
596
24.8k
    _has_default_value = column.has_default_value();
597
24.8k
    if (_has_default_value) {
598
60
        _default_value = column.default_value();
599
60
    }
600
601
24.8k
    if (column.has_precision()) {
602
8.15k
        _is_decimal = true;
603
8.15k
        _precision = column.precision();
604
16.7k
    } else {
605
16.7k
        _is_decimal = false;
606
16.7k
    }
607
24.8k
    if (column.has_frac()) {
608
8.15k
        _frac = column.frac();
609
8.15k
    }
610
24.8k
    _length = column.length();
611
24.8k
    _index_length = column.index_length();
612
24.8k
    if (column.has_is_bf_column()) {
613
868
        _is_bf_column = column.is_bf_column();
614
24.0k
    } else {
615
24.0k
        _is_bf_column = false;
616
24.0k
    }
617
24.8k
    if (column.has_aggregation()) {
618
8.28k
        _aggregation = get_aggregation_type_by_string(column.aggregation());
619
8.28k
        _aggregation_name = column.aggregation();
620
8.28k
    }
621
622
24.8k
    if (_type == FieldType::OLAP_FIELD_TYPE_AGG_STATE) {
623
0
        _result_is_nullable = column.result_is_nullable();
624
0
        _be_exec_version = column.be_exec_version();
625
0
    }
626
627
24.8k
    if (column.has_visible()) {
628
22.8k
        _visible = column.visible();
629
22.8k
    }
630
24.8k
    if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
631
12
        CHECK(column.children_columns_size() == 1)
632
0
                << "ARRAY type should has 1 children types, but got "
633
0
                << column.children_columns_size();
634
12
    }
635
24.8k
    if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
636
0
        DCHECK(column.children_columns_size() == 2)
637
0
                << "MAP type should has 2 children types, but got "
638
0
                << column.children_columns_size();
639
0
        if (UNLIKELY(column.children_columns_size() != 2)) {
640
0
            LOG(WARNING) << "MAP type should has 2 children types, but got "
641
0
                         << column.children_columns_size();
642
0
        }
643
0
    }
644
24.9k
    for (int i = 0; i < column.children_columns_size(); i++) {
645
20
        TabletColumn child_column;
646
20
        child_column.init_from_pb(column.children_columns(i));
647
20
        add_sub_column(child_column);
648
20
    }
649
24.8k
    if (column.has_column_path_info()) {
650
24
        _column_path = std::make_shared<vectorized::PathInData>();
651
24
        _column_path->from_protobuf(column.column_path_info());
652
24
        _parent_col_unique_id = column.column_path_info().parrent_column_unique_id();
653
24
    }
654
24.8k
    if (is_variant_type() && !column.has_column_path_info()) {
655
        // set path info for variant root column, to prevent from missing
656
102
        _column_path = std::make_shared<vectorized::PathInData>(_col_name_lower_case);
657
        // _parent_col_unique_id = _unique_id;
658
102
    }
659
24.8k
    if (column.has_variant_max_subcolumns_count()) {
660
22.9k
        _variant.max_subcolumns_count = column.variant_max_subcolumns_count();
661
22.9k
    }
662
24.8k
    if (column.has_variant_enable_typed_paths_to_sparse()) {
663
22.8k
        _variant.enable_typed_paths_to_sparse = column.variant_enable_typed_paths_to_sparse();
664
22.8k
    }
665
24.8k
    if (column.has_variant_max_sparse_column_statistics_size()) {
666
22.9k
        _variant.max_sparse_column_statistics_size =
667
22.9k
                column.variant_max_sparse_column_statistics_size();
668
22.9k
    }
669
24.8k
    if (column.has_variant_sparse_hash_shard_count()) {
670
18.4k
        _variant.sparse_hash_shard_count = column.variant_sparse_hash_shard_count();
671
18.4k
    }
672
24.8k
    if (column.has_variant_enable_doc_mode()) {
673
18.4k
        _variant.enable_doc_mode = column.variant_enable_doc_mode();
674
18.4k
    }
675
24.8k
    if (column.has_variant_doc_materialization_min_rows()) {
676
18.4k
        _variant.doc_materialization_min_rows = column.variant_doc_materialization_min_rows();
677
18.4k
    }
678
24.8k
    if (column.has_variant_doc_hash_shard_count()) {
679
18.4k
        _variant.doc_hash_shard_count = column.variant_doc_hash_shard_count();
680
18.4k
    }
681
24.8k
    if (column.has_pattern_type()) {
682
18.4k
        _pattern_type = column.pattern_type();
683
18.4k
    }
684
24.8k
}
685
686
TabletColumn TabletColumn::create_materialized_variant_column(const std::string& root,
687
                                                              const std::vector<std::string>& paths,
688
                                                              int32_t parent_unique_id,
689
2
                                                              int32_t max_subcolumns_count) {
690
2
    TabletColumn subcol;
691
2
    subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
692
2
    subcol.set_is_nullable(true);
693
2
    subcol.set_unique_id(-1);
694
2
    subcol.set_parent_unique_id(parent_unique_id);
695
2
    vectorized::PathInData path(root, paths);
696
2
    subcol.set_path_info(path);
697
2
    subcol.set_name(path.get_path());
698
2
    subcol.set_variant_max_subcolumns_count(max_subcolumns_count);
699
2
    return subcol;
700
2
}
701
702
64.7k
void TabletColumn::to_schema_pb(ColumnPB* column) const {
703
64.7k
    column->set_unique_id(_unique_id);
704
64.7k
    column->set_name(_col_name);
705
64.7k
    column->set_type(get_string_by_field_type(_type));
706
64.7k
    column->set_is_key(_is_key);
707
64.7k
    column->set_is_nullable(_is_nullable);
708
64.7k
    column->set_is_auto_increment(_is_auto_increment);
709
64.7k
    column->set_is_on_update_current_timestamp(_is_on_update_current_timestamp);
710
64.7k
    if (_has_default_value) {
711
312
        column->set_default_value(_default_value);
712
312
    }
713
64.7k
    if (_is_decimal) {
714
35.2k
        column->set_precision(_precision);
715
35.2k
        column->set_frac(_frac);
716
35.2k
    }
717
64.7k
    column->set_length(_length);
718
64.7k
    column->set_index_length(_index_length);
719
64.7k
    if (_is_bf_column) {
720
16
        column->set_is_bf_column(_is_bf_column);
721
16
    }
722
64.7k
    if (!_aggregation_name.empty()) {
723
35.5k
        column->set_aggregation(_aggregation_name);
724
35.5k
    }
725
64.7k
    column->set_result_is_nullable(_result_is_nullable);
726
64.7k
    column->set_be_exec_version(_be_exec_version);
727
64.7k
    column->set_visible(_visible);
728
729
64.7k
    if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
730
28
        CHECK(_sub_columns.size() == 1)
731
0
                << "ARRAY type should has 1 children types, but got " << _sub_columns.size();
732
28
    }
733
64.7k
    if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
734
0
        DCHECK(_sub_columns.size() == 2)
735
0
                << "MAP type should has 2 children types, but got " << _sub_columns.size();
736
0
        if (UNLIKELY(_sub_columns.size() != 2)) {
737
0
            LOG(WARNING) << "MAP type should has 2 children types, but got " << _sub_columns.size();
738
0
        }
739
0
    }
740
741
64.7k
    for (size_t i = 0; i < _sub_columns.size(); i++) {
742
36
        ColumnPB* child = column->add_children_columns();
743
36
        _sub_columns[i]->to_schema_pb(child);
744
36
    }
745
746
    // set parts info
747
64.7k
    if (has_path_info()) {
748
        // CHECK_GT(_parent_col_unique_id, 0);
749
248
        _column_path->to_protobuf(column->mutable_column_path_info(), _parent_col_unique_id);
750
        // Update unstable information for variant columns. Some of the fields in the tablet schema
751
        // are irrelevant for variant sub-columns, but retaining them may lead to an excessive growth
752
        // in the number of tablet schema cache entries.
753
248
        if (_type == FieldType::OLAP_FIELD_TYPE_STRING) {
754
0
            column->set_length(INT_MAX);
755
0
        }
756
248
        column->set_index_length(0);
757
248
    }
758
64.7k
    column->set_variant_max_subcolumns_count(_variant.max_subcolumns_count);
759
64.7k
    column->set_pattern_type(_pattern_type);
760
64.7k
    column->set_variant_enable_typed_paths_to_sparse(_variant.enable_typed_paths_to_sparse);
761
64.7k
    column->set_variant_max_sparse_column_statistics_size(
762
64.7k
            _variant.max_sparse_column_statistics_size);
763
64.7k
    column->set_variant_sparse_hash_shard_count(_variant.sparse_hash_shard_count);
764
64.7k
    column->set_variant_enable_doc_mode(_variant.enable_doc_mode);
765
64.7k
    column->set_variant_doc_materialization_min_rows(_variant.doc_materialization_min_rows);
766
64.7k
    column->set_variant_doc_hash_shard_count(_variant.doc_hash_shard_count);
767
64.7k
}
768
769
2.85k
void TabletColumn::add_sub_column(TabletColumn& sub_column) {
770
2.85k
    _sub_columns.push_back(std::make_shared<TabletColumn>(sub_column));
771
2.85k
    sub_column._parent_col_unique_id = this->_unique_id;
772
2.85k
    _sub_column_count += 1;
773
2.85k
}
774
775
50.5k
bool TabletColumn::is_row_store_column() const {
776
50.5k
    return _col_name == BeConsts::ROW_STORE_COL;
777
50.5k
}
778
779
vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function_union(
780
0
        vectorized::DataTypePtr type, int current_be_exec_version) const {
781
0
    const auto* state_type = assert_cast<const vectorized::DataTypeAggState*>(type.get());
782
0
    BeExecVersionManager::check_function_compatibility(
783
0
            current_be_exec_version, _be_exec_version,
784
0
            state_type->get_nested_function()->get_name());
785
0
    return vectorized::AggregateStateUnion::create(state_type->get_nested_function(), {type}, type);
786
0
}
787
788
vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function(
789
48
        std::string suffix, int current_be_exec_version) const {
790
48
    vectorized::AggregateFunctionPtr function = nullptr;
791
792
48
    auto type = vectorized::DataTypeFactory::instance().create_data_type(*this);
793
48
    if (type && type->get_primitive_type() == PrimitiveType::TYPE_AGG_STATE) {
794
0
        function = get_aggregate_function_union(type, current_be_exec_version);
795
48
    } else {
796
48
        std::string origin_name = TabletColumn::get_string_by_aggregation_type(_aggregation);
797
48
        std::string agg_name = origin_name + suffix;
798
48
        std::transform(agg_name.begin(), agg_name.end(), agg_name.begin(),
799
516
                       [](unsigned char c) { return std::tolower(c); });
800
48
        function = vectorized::AggregateFunctionSimpleFactory::instance().get(
801
48
                agg_name, {type}, type, type->is_nullable(),
802
48
                BeExecVersionManager::get_newest_version());
803
48
        if (!function) {
804
0
            LOG(WARNING) << "get column aggregate function failed, aggregation_name=" << origin_name
805
0
                         << ", column_type=" << type->get_name();
806
0
        }
807
48
    }
808
48
    if (function) {
809
48
        function->set_version(_be_exec_version);
810
48
        return function;
811
48
    }
812
0
    return nullptr;
813
48
}
814
815
3.67k
void TabletColumn::set_path_info(const vectorized::PathInData& path) {
816
3.67k
    _column_path = std::make_shared<vectorized::PathInData>(path);
817
3.67k
}
818
819
366
vectorized::DataTypePtr TabletColumn::get_vec_type() const {
820
366
    return vectorized::DataTypeFactory::instance().create_data_type(*this);
821
366
}
822
823
// escape '.' and '_'
824
122k
std::string escape_for_path_name(const std::string& s) {
825
122k
    std::string res;
826
122k
    const char* pos = s.data();
827
122k
    const char* end = pos + s.size();
828
124k
    while (pos != end) {
829
1.21k
        unsigned char c = *pos;
830
1.21k
        if (c == '.' || c == '_') {
831
110
            res += '%';
832
110
            res += vectorized::hex_digit_uppercase(c / 16);
833
110
            res += vectorized::hex_digit_uppercase(c % 16);
834
1.10k
        } else {
835
1.10k
            res += c;
836
1.10k
        }
837
1.21k
        ++pos;
838
1.21k
    }
839
122k
    return res;
840
122k
}
841
842
52
void TabletIndex::set_escaped_escaped_index_suffix_path(const std::string& path_name) {
843
52
    std::string escaped_path = escape_for_path_name(path_name);
844
52
    _escaped_index_suffix_path = escaped_path;
845
52
}
846
847
void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
848
36
                                   const TabletSchema& tablet_schema) {
849
36
    _index_id = index.index_id;
850
36
    _index_name = index.index_name;
851
    // init col_unique_id in index at be side, since col_unique_id may be -1 at fe side
852
    // get column unique id by name
853
36
    std::vector<int32_t> col_unique_ids(index.columns.size());
854
72
    for (size_t i = 0; i < index.columns.size(); i++) {
855
36
        auto column_idx = tablet_schema.field_index(index.columns[i]);
856
36
        if (column_idx >= 0) {
857
28
            col_unique_ids[i] = tablet_schema.column(column_idx).unique_id();
858
28
        } else {
859
            // if column unique id not found by column name, find by column unique id
860
            // column unique id can not found means this column is a new column added by light schema change
861
8
            if (index.__isset.column_unique_ids && !index.column_unique_ids.empty() &&
862
8
                tablet_schema.has_column_unique_id(index.column_unique_ids[i])) {
863
2
                col_unique_ids[i] = index.column_unique_ids[i];
864
6
            } else {
865
6
                col_unique_ids[i] = -1;
866
6
            }
867
8
        }
868
36
    }
869
36
    _col_unique_ids = std::move(col_unique_ids);
870
871
36
    switch (index.index_type) {
872
0
    case TIndexType::BITMAP:
873
0
        _index_type = IndexType::BITMAP;
874
0
        break;
875
34
    case TIndexType::INVERTED:
876
34
        _index_type = IndexType::INVERTED;
877
34
        break;
878
2
    case TIndexType::ANN:
879
2
        _index_type = IndexType::ANN;
880
2
        break;
881
0
    case TIndexType::BLOOMFILTER:
882
0
        _index_type = IndexType::BLOOMFILTER;
883
0
        break;
884
0
    case TIndexType::NGRAM_BF:
885
0
        _index_type = IndexType::NGRAM_BF;
886
0
        break;
887
36
    }
888
36
    if (index.__isset.properties) {
889
8
        for (auto kv : index.properties) {
890
8
            _properties[kv.first] = kv.second;
891
8
        }
892
2
    }
893
36
}
894
895
void TabletIndex::init_from_thrift(const TOlapTableIndex& index,
896
2
                                   const std::vector<int32_t>& column_uids) {
897
2
    _index_id = index.index_id;
898
2
    _index_name = index.index_name;
899
2
    _col_unique_ids = column_uids;
900
901
2
    switch (index.index_type) {
902
0
    case TIndexType::BITMAP:
903
0
        _index_type = IndexType::BITMAP;
904
0
        break;
905
2
    case TIndexType::INVERTED:
906
2
        _index_type = IndexType::INVERTED;
907
2
        break;
908
0
    case TIndexType::ANN:
909
0
        _index_type = IndexType::ANN;
910
0
        break;
911
0
    case TIndexType::BLOOMFILTER:
912
0
        _index_type = IndexType::BLOOMFILTER;
913
0
        break;
914
0
    case TIndexType::NGRAM_BF:
915
0
        _index_type = IndexType::NGRAM_BF;
916
0
        break;
917
2
    }
918
2
    if (index.__isset.properties) {
919
6
        for (auto kv : index.properties) {
920
6
            _properties[kv.first] = kv.second;
921
6
        }
922
2
    }
923
2
}
924
925
15.2k
void TabletIndex::init_from_pb(const TabletIndexPB& index) {
926
15.2k
    _index_id = index.index_id();
927
15.2k
    _index_name = index.index_name();
928
15.2k
    _col_unique_ids.clear();
929
15.2k
    for (auto col_unique_id : index.col_unique_id()) {
930
14.9k
        _col_unique_ids.push_back(col_unique_id);
931
14.9k
    }
932
15.2k
    _index_type = index.index_type();
933
82.0k
    for (const auto& kv : index.properties()) {
934
82.0k
        _properties[kv.first] = kv.second;
935
82.0k
    }
936
15.2k
    _escaped_index_suffix_path = index.index_suffix_name();
937
15.2k
}
938
939
22.1k
void TabletIndex::to_schema_pb(TabletIndexPB* index) const {
940
22.1k
    index->set_index_id(_index_id);
941
22.1k
    index->set_index_name(_index_name);
942
22.1k
    index->clear_col_unique_id();
943
22.1k
    for (auto col_unique_id : _col_unique_ids) {
944
22.1k
        index->add_col_unique_id(col_unique_id);
945
22.1k
    }
946
22.1k
    index->set_index_type(_index_type);
947
121k
    for (const auto& kv : _properties) {
948
121k
        DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", {
949
121k
            if (kv.first == INVERTED_INDEX_PARSER_LOWERCASE_KEY) {
950
121k
                continue;
951
121k
            }
952
121k
        })
953
121k
        (*index->mutable_properties())[kv.first] = kv.second;
954
121k
    }
955
22.1k
    index->set_index_suffix_name(_escaped_index_suffix_path);
956
957
22.1k
    DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { return; })
958
959
    // Only add lower_case=true default for built-in analyzers/parsers, NOT for custom analyzers
960
    // Custom analyzer: lower_case is determined by analyzer's internal token filter
961
22.1k
    if (!_properties.empty() && !_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) {
962
358
        bool has_parser = _properties.contains(INVERTED_INDEX_PARSER_KEY) ||
963
358
                          _properties.contains(INVERTED_INDEX_PARSER_KEY_ALIAS);
964
358
        std::string analyzer_name = get_analyzer_name_from_properties(_properties);
965
358
        bool is_builtin = analyzer_name.empty() ||
966
358
                          segment_v2::inverted_index::InvertedIndexAnalyzer::is_builtin_analyzer(
967
0
                                  analyzer_name);
968
358
        if (has_parser || is_builtin) {
969
358
            (*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
970
358
                    INVERTED_INDEX_PARSER_TRUE;
971
358
        }
972
358
    }
973
22.1k
}
974
975
6.67k
TabletSchema::TabletSchema() = default;
976
977
6.51k
TabletSchema::~TabletSchema() {}
978
979
4.03k
int64_t TabletSchema::get_metadata_size() const {
980
4.03k
    return sizeof(TabletSchema);
981
4.03k
}
982
983
6.71k
void TabletSchema::append_column(TabletColumn column, ColumnType col_type) {
984
6.71k
    if (column.is_key()) {
985
246
        _num_key_columns++;
986
246
    }
987
6.71k
    if (column.is_nullable()) {
988
6.22k
        _num_null_columns++;
989
6.22k
    }
990
6.71k
    if (column.is_variant_type()) {
991
116
        ++_num_variant_columns;
992
116
        if (!column.has_path_info()) {
993
46
            const std::string& col_name = column.name_lower_case();
994
46
            vectorized::PathInData path(col_name);
995
46
            column.set_path_info(path);
996
46
        }
997
116
    }
998
6.71k
    if (UNLIKELY(column.name() == DELETE_SIGN)) {
999
42
        _delete_sign_idx = _num_columns;
1000
6.66k
    } else if (UNLIKELY(column.name() == SEQUENCE_COL)) {
1001
8
        _sequence_col_idx = _num_columns;
1002
6.66k
    } else if (UNLIKELY(column.name() == VERSION_COL)) {
1003
0
        _version_col_idx = _num_columns;
1004
6.66k
    } else if (UNLIKELY(column.name() == SKIP_BITMAP_COL)) {
1005
0
        _skip_bitmap_col_idx = _num_columns;
1006
6.66k
    } else if (UNLIKELY(column.name().starts_with(BeConsts::VIRTUAL_COLUMN_PREFIX))) {
1007
0
        _vir_col_idx_to_unique_id[_num_columns] = column.unique_id();
1008
0
    }
1009
6.71k
    _field_uniqueid_to_index[column.unique_id()] = _num_columns;
1010
6.71k
    _cols.push_back(std::make_shared<TabletColumn>(std::move(column)));
1011
    // The dropped column may have same name with exsiting column, so that
1012
    // not add to name to index map, only for uid to index map
1013
6.71k
    if (col_type == ColumnType::VARIANT || _cols.back()->is_variant_type() ||
1014
6.71k
        _cols.back()->is_extracted_column()) {
1015
228
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1016
228
        _field_path_to_index[_cols.back()->path_info_ptr().get()] = _num_columns;
1017
6.48k
    } else if (col_type == ColumnType::NORMAL) {
1018
6.48k
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1019
6.48k
    }
1020
6.71k
    _num_columns++;
1021
6.71k
    _num_virtual_columns = _vir_col_idx_to_unique_id.size();
1022
    // generate column index mapping for seq map
1023
6.71k
    if (_seq_col_uid_to_value_cols_uid.contains(column.unique_id())) {
1024
0
        const auto seq_idx = _field_uniqueid_to_index[column.unique_id()];
1025
0
        if (!_seq_col_idx_to_value_cols_idx.contains(seq_idx)) {
1026
0
            _seq_col_idx_to_value_cols_idx[seq_idx] = {};
1027
0
        }
1028
0
    }
1029
6.71k
    if (_value_col_uid_to_seq_col_uid.contains(column.unique_id())) {
1030
0
        const auto seq_uid = _value_col_uid_to_seq_col_uid[column.unique_id()];
1031
0
        if (_field_uniqueid_to_index.contains(seq_uid)) {
1032
0
            bool all_uid_index_found = true;
1033
0
            std::vector<int32_t> value_cols_index;
1034
0
            for (const auto value_col_uid : _seq_col_uid_to_value_cols_uid[seq_uid]) {
1035
0
                if (!_field_uniqueid_to_index.contains(value_col_uid)) {
1036
0
                    all_uid_index_found = false;
1037
0
                    break;
1038
0
                }
1039
0
                value_cols_index.push_back(_field_uniqueid_to_index[value_col_uid]);
1040
0
            }
1041
0
            if (all_uid_index_found) {
1042
0
                const auto seq_idx = _field_uniqueid_to_index[seq_uid];
1043
0
                for (const auto col_idx : value_cols_index) {
1044
0
                    _seq_col_idx_to_value_cols_idx[seq_idx].push_back(col_idx);
1045
0
                    _value_col_idx_to_seq_col_idx[col_idx] = seq_idx;
1046
0
                }
1047
0
                _value_col_idx_to_seq_col_idx[seq_idx] = seq_idx;
1048
0
            }
1049
0
        }
1050
0
    }
1051
6.71k
}
1052
1053
146
void TabletSchema::append_index(TabletIndex&& index) {
1054
146
    size_t index_pos = _indexes.size();
1055
146
    _indexes.push_back(std::make_shared<TabletIndex>(index));
1056
152
    for (int32_t id : _indexes.back()->col_unique_ids()) {
1057
152
        if (auto field_pattern = _indexes.back()->field_pattern(); !field_pattern.empty()) {
1058
12
            auto& pattern_to_index_map = _index_by_unique_id_with_pattern[id];
1059
12
            pattern_to_index_map[field_pattern].emplace_back(_indexes.back());
1060
140
        } else {
1061
140
            IndexKey key = std::make_tuple(_indexes.back()->index_type(), id,
1062
140
                                           _indexes.back()->get_index_suffix());
1063
140
            _col_id_suffix_to_index[key].push_back(index_pos);
1064
140
        }
1065
152
    }
1066
146
}
1067
1068
0
void TabletSchema::replace_column(size_t pos, TabletColumn new_col) {
1069
0
    CHECK_LT(pos, num_columns()) << " outof range";
1070
0
    _cols[pos] = std::make_shared<TabletColumn>(std::move(new_col));
1071
0
}
1072
1073
6
void TabletSchema::clear_index() {
1074
6
    _indexes.clear();
1075
6
    _col_id_suffix_to_index.clear();
1076
6
    _index_by_unique_id_with_pattern.clear();
1077
6
}
1078
1079
14
void TabletSchema::remove_index(int64_t index_id) {
1080
14
    std::vector<TabletIndexPtr> new_indexes;
1081
22
    for (auto& index : _indexes) {
1082
22
        if (index->index_id() != index_id) {
1083
8
            new_indexes.emplace_back(std::move(index));
1084
8
        }
1085
22
    }
1086
14
    _indexes = std::move(new_indexes);
1087
14
    _col_id_suffix_to_index.clear();
1088
14
    _index_by_unique_id_with_pattern.clear();
1089
22
    for (size_t new_pos = 0; new_pos < _indexes.size(); ++new_pos) {
1090
8
        const auto& index = _indexes[new_pos];
1091
8
        for (int32_t col_uid : index->col_unique_ids()) {
1092
8
            if (auto field_pattern = index->field_pattern(); !field_pattern.empty()) {
1093
0
                auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid];
1094
0
                pattern_to_index_map[field_pattern].emplace_back(index);
1095
8
            } else {
1096
8
                IndexKey key = std::make_tuple(_indexes.back()->index_type(), col_uid,
1097
8
                                               _indexes.back()->get_index_suffix());
1098
8
                _col_id_suffix_to_index[key].push_back(new_pos);
1099
8
            }
1100
8
        }
1101
8
    }
1102
14
}
1103
1104
2
void TabletSchema::clear_columns() {
1105
2
    _field_path_to_index.clear();
1106
2
    _field_name_to_index.clear();
1107
2
    _field_uniqueid_to_index.clear();
1108
2
    _num_columns = 0;
1109
2
    _num_variant_columns = 0;
1110
2
    _num_null_columns = 0;
1111
2
    _num_key_columns = 0;
1112
2
    _seq_col_idx_to_value_cols_idx.clear();
1113
2
    _value_col_idx_to_seq_col_idx.clear();
1114
2
    _cols.clear();
1115
2
}
1116
1117
void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns,
1118
3.87k
                                bool reuse_cache_column) {
1119
3.87k
    _keys_type = schema.keys_type();
1120
3.87k
    _num_columns = 0;
1121
3.87k
    _num_variant_columns = 0;
1122
3.87k
    _num_key_columns = 0;
1123
3.87k
    _num_null_columns = 0;
1124
3.87k
    _cols.clear();
1125
3.87k
    _indexes.clear();
1126
3.87k
    _index_by_unique_id_with_pattern.clear();
1127
3.87k
    _col_id_suffix_to_index.clear();
1128
3.87k
    _field_name_to_index.clear();
1129
3.87k
    _field_uniqueid_to_index.clear();
1130
3.87k
    _cluster_key_uids.clear();
1131
3.87k
    for (const auto& i : schema.cluster_key_uids()) {
1132
12
        _cluster_key_uids.push_back(i);
1133
12
    }
1134
25.2k
    for (auto& column_pb : schema.column()) {
1135
25.2k
        TabletColumnPtr column;
1136
25.2k
        if (reuse_cache_column) {
1137
714
            auto pair = TabletColumnObjectPool::instance()->insert(
1138
714
                    deterministic_string_serialize(column_pb));
1139
714
            column = pair.second;
1140
            // Release the handle quickly, because we use shared ptr to manage column.
1141
            // It often core during tablet schema copy to another schema because handle's
1142
            // reference count should be managed mannually.
1143
714
            TabletColumnObjectPool::instance()->release(pair.first);
1144
24.5k
        } else {
1145
24.5k
            column = std::make_shared<TabletColumn>();
1146
24.5k
            column->init_from_pb(column_pb);
1147
24.5k
        }
1148
25.2k
        if (ignore_extracted_columns && column->is_extracted_column()) {
1149
0
            continue;
1150
0
        }
1151
25.2k
        if (column->is_key()) {
1152
4.90k
            _num_key_columns++;
1153
4.90k
        }
1154
25.2k
        if (column->is_nullable()) {
1155
14.9k
            _num_null_columns++;
1156
14.9k
        }
1157
25.2k
        if (column->is_variant_type()) {
1158
136
            ++_num_variant_columns;
1159
136
        }
1160
1161
25.2k
        _cols.emplace_back(std::move(column));
1162
25.2k
        if (!_cols.back()->is_extracted_column()) {
1163
25.2k
            _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1164
25.2k
            _field_uniqueid_to_index[_cols.back()->unique_id()] = _num_columns;
1165
25.2k
        }
1166
25.2k
        _num_columns++;
1167
25.2k
    }
1168
14.8k
    for (const auto& index_pb : schema.index()) {
1169
14.8k
        TabletIndexPtr index;
1170
14.8k
        if (reuse_cache_column) {
1171
326
            auto pair = TabletColumnObjectPool::instance()->insert_index(
1172
326
                    deterministic_string_serialize(index_pb));
1173
326
            index = pair.second;
1174
            //  Only need the value to be cached by the pool, release it quickly because the handle need
1175
            // record reference count mannually, or it will core during tablet schema copy method.
1176
326
            TabletColumnObjectPool::instance()->release(pair.first);
1177
14.4k
        } else {
1178
14.4k
            index = std::make_shared<TabletIndex>();
1179
14.4k
            index->init_from_pb(index_pb);
1180
14.4k
        }
1181
14.8k
        size_t index_pos = _indexes.size();
1182
14.8k
        _indexes.emplace_back(std::move(index));
1183
14.8k
        for (int32_t col_uid : _indexes.back()->col_unique_ids()) {
1184
14.8k
            if (auto field_pattern = _indexes.back()->field_pattern(); !field_pattern.empty()) {
1185
0
                auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid];
1186
0
                pattern_to_index_map[field_pattern].emplace_back(_indexes.back());
1187
14.8k
            } else {
1188
14.8k
                IndexKey key = std::make_tuple(_indexes.back()->index_type(), col_uid,
1189
14.8k
                                               _indexes.back()->get_index_suffix());
1190
14.8k
                _col_id_suffix_to_index[key].push_back(index_pos);
1191
14.8k
            }
1192
14.8k
        }
1193
14.8k
    }
1194
3.87k
    _num_short_key_columns = schema.num_short_key_columns();
1195
3.87k
    _num_rows_per_row_block = schema.num_rows_per_row_block();
1196
3.87k
    _compress_kind = schema.compress_kind();
1197
3.87k
    _next_column_unique_id = schema.next_column_unique_id();
1198
3.87k
    if (schema.has_bf_fpp()) {
1199
6
        _has_bf_fpp = true;
1200
6
        _bf_fpp = schema.bf_fpp();
1201
3.86k
    } else {
1202
3.86k
        _has_bf_fpp = false;
1203
3.86k
        _bf_fpp = BLOOM_FILTER_DEFAULT_FPP;
1204
3.86k
    }
1205
3.87k
    _is_in_memory = schema.is_in_memory();
1206
3.87k
    _disable_auto_compaction = schema.disable_auto_compaction();
1207
3.87k
    _enable_single_replica_compaction = schema.enable_single_replica_compaction();
1208
3.87k
    _store_row_column = schema.store_row_column();
1209
3.87k
    _skip_write_index_on_load = schema.skip_write_index_on_load();
1210
3.87k
    _delete_sign_idx = schema.delete_sign_idx();
1211
3.87k
    _sequence_col_idx = schema.sequence_col_idx();
1212
3.87k
    _version_col_idx = schema.version_col_idx();
1213
3.87k
    _skip_bitmap_col_idx = schema.skip_bitmap_col_idx();
1214
3.87k
    _sort_type = schema.sort_type();
1215
3.87k
    _sort_col_num = schema.sort_col_num();
1216
3.87k
    _compression_type = schema.compression_type();
1217
3.87k
    _row_store_page_size = schema.row_store_page_size();
1218
3.87k
    _storage_page_size = schema.storage_page_size();
1219
3.87k
    _storage_dict_page_size = schema.storage_dict_page_size();
1220
3.87k
    _schema_version = schema.schema_version();
1221
3.87k
    if (schema.has_seq_map()) {
1222
3.20k
        auto column_groups_pb = schema.seq_map();
1223
3.20k
        _seq_col_uid_to_value_cols_uid.clear();
1224
3.20k
        _value_col_uid_to_seq_col_uid.clear();
1225
3.20k
        _seq_col_idx_to_value_cols_idx.clear();
1226
3.20k
        _value_col_idx_to_seq_col_idx.clear();
1227
        /*
1228
         * ColumnGroupsPB is a list of cg_pb, and
1229
         * ColumnGroupsPB do not have begin() or end() method.
1230
         * we must use for(i=0;i<xx;i++) loop
1231
         */
1232
3.20k
        for (int i = 0; i < column_groups_pb.cg_size(); i++) {
1233
0
            ColumnGroupPB cg_pb = column_groups_pb.cg(i);
1234
0
            uint32_t key_uid = cg_pb.sequence_column();
1235
0
            auto found = _field_uniqueid_to_index.find(key_uid);
1236
0
            DCHECK(found != _field_uniqueid_to_index.end())
1237
0
                    << "could not find sequence col with unique id = " << key_uid
1238
0
                    << " table_id=" << _table_id;
1239
0
            int32_t seq_index = found->second;
1240
0
            _seq_col_uid_to_value_cols_uid[key_uid] = {};
1241
0
            _seq_col_idx_to_value_cols_idx[seq_index] = {};
1242
0
            for (auto val_uid : cg_pb.columns_in_group()) {
1243
0
                _seq_col_uid_to_value_cols_uid[key_uid].push_back(val_uid);
1244
0
                found = _field_uniqueid_to_index.find(val_uid);
1245
0
                DCHECK(found != _field_uniqueid_to_index.end())
1246
0
                        << "could not find value col with unique id = " << key_uid
1247
0
                        << " table_id=" << _table_id;
1248
0
                int32_t val_index = found->second;
1249
0
                _seq_col_idx_to_value_cols_idx[seq_index].push_back(val_index);
1250
0
            }
1251
0
        }
1252
1253
3.20k
        if (!_seq_col_uid_to_value_cols_uid.empty()) {
1254
            /*
1255
                |** KEY **|        ** VALUE **     |
1256
                ------------------------------------
1257
                |** KEY **|  CDE is value| sequence|
1258
                |----|----|----|----|----|----|----|
1259
                A    B    C    D    E   S1   S2
1260
                0    1    2    3    4    5    6
1261
                for example: _seq_map is {5:{2,3}, 6:{4}}
1262
                then, _value_to_seq = {2:5,3:5,5:5,4:6,6:6}
1263
            */
1264
0
            for (auto& [seq_uid, cols_uid] : _seq_col_uid_to_value_cols_uid) {
1265
0
                for (auto col_uid : cols_uid) {
1266
0
                    _value_col_uid_to_seq_col_uid[col_uid] = seq_uid;
1267
0
                }
1268
0
                _value_col_uid_to_seq_col_uid[seq_uid] = seq_uid;
1269
0
            }
1270
1271
0
            for (auto& [seq_idx, value_cols_idx] : _seq_col_idx_to_value_cols_idx) {
1272
0
                for (auto col_idx : value_cols_idx) {
1273
0
                    _value_col_idx_to_seq_col_idx[col_idx] = seq_idx;
1274
0
                }
1275
0
                _value_col_idx_to_seq_col_idx[seq_idx] = seq_idx;
1276
0
            }
1277
0
        }
1278
3.20k
    }
1279
    // Default to V1 inverted index storage format for backward compatibility if not specified in schema.
1280
3.87k
    if (!schema.has_inverted_index_storage_format()) {
1281
588
        _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1;
1282
3.28k
    } else {
1283
3.28k
        _inverted_index_storage_format = schema.inverted_index_storage_format();
1284
3.28k
    }
1285
1286
3.87k
    _row_store_column_unique_ids.assign(schema.row_store_column_unique_ids().begin(),
1287
3.87k
                                        schema.row_store_column_unique_ids().end());
1288
3.87k
    _enable_variant_flatten_nested = schema.enable_variant_flatten_nested();
1289
3.87k
    if (schema.has_is_external_segment_column_meta_used()) {
1290
1.90k
        _is_external_segment_column_meta_used = schema.is_external_segment_column_meta_used();
1291
1.96k
    } else {
1292
1.96k
        _is_external_segment_column_meta_used = false;
1293
1.96k
    }
1294
3.87k
    if (schema.has_integer_type_default_use_plain_encoding()) {
1295
1.90k
        _integer_type_default_use_plain_encoding = schema.integer_type_default_use_plain_encoding();
1296
1.90k
    }
1297
3.87k
    if (schema.has_binary_plain_encoding_default_impl()) {
1298
1.90k
        _binary_plain_encoding_default_impl = schema.binary_plain_encoding_default_impl();
1299
1.90k
    }
1300
3.87k
    update_metadata_size();
1301
3.87k
}
1302
1303
784
void TabletSchema::copy_from(const TabletSchema& tablet_schema) {
1304
784
    TabletSchemaPB tablet_schema_pb;
1305
784
    tablet_schema.to_schema_pb(&tablet_schema_pb);
1306
784
    init_from_pb(tablet_schema_pb);
1307
784
    _table_id = tablet_schema.table_id();
1308
784
    _path_set_info_map = tablet_schema._path_set_info_map;
1309
784
}
1310
1311
84
void TabletSchema::shawdow_copy_without_columns(const TabletSchema& tablet_schema) {
1312
84
    *this = tablet_schema;
1313
84
    _field_path_to_index.clear();
1314
84
    _field_name_to_index.clear();
1315
84
    _field_uniqueid_to_index.clear();
1316
84
    _num_columns = 0;
1317
84
    _num_variant_columns = 0;
1318
84
    _num_null_columns = 0;
1319
84
    _num_key_columns = 0;
1320
84
    _cols.clear();
1321
84
    _delete_sign_idx = -1;
1322
84
    _sequence_col_idx = -1;
1323
84
    _version_col_idx = -1;
1324
84
}
1325
1326
0
void TabletSchema::update_index_info_from(const TabletSchema& tablet_schema) {
1327
0
    for (auto& col : _cols) {
1328
0
        if (col->unique_id() < 0) {
1329
0
            continue;
1330
0
        }
1331
0
        const auto iter = tablet_schema._field_uniqueid_to_index.find(col->unique_id());
1332
0
        if (iter == tablet_schema._field_uniqueid_to_index.end()) {
1333
0
            continue;
1334
0
        }
1335
0
        auto col_idx = iter->second;
1336
0
        if (col_idx < 0 || col_idx >= tablet_schema._cols.size()) {
1337
0
            continue;
1338
0
        }
1339
0
        col->set_is_bf_column(tablet_schema._cols[col_idx]->is_bf_column());
1340
0
    }
1341
0
}
1342
1343
12.8k
std::string TabletSchema::to_key() const {
1344
12.8k
    TabletSchemaPB pb;
1345
12.8k
    to_schema_pb(&pb);
1346
12.8k
    return TabletSchema::deterministic_string_serialize(pb);
1347
12.8k
}
1348
1349
void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version,
1350
                                               const OlapTableIndexSchema* index,
1351
0
                                               const TabletSchema& ori_tablet_schema) {
1352
    // copy from ori_tablet_schema
1353
0
    _keys_type = ori_tablet_schema.keys_type();
1354
0
    _num_short_key_columns = ori_tablet_schema.num_short_key_columns();
1355
0
    _num_rows_per_row_block = ori_tablet_schema.num_rows_per_row_block();
1356
0
    _compress_kind = ori_tablet_schema.compress_kind();
1357
1358
    // todo(yixiu): unique_id
1359
0
    _next_column_unique_id = ori_tablet_schema.next_column_unique_id();
1360
0
    _is_in_memory = ori_tablet_schema.is_in_memory();
1361
0
    _disable_auto_compaction = ori_tablet_schema.disable_auto_compaction();
1362
0
    _enable_single_replica_compaction = ori_tablet_schema.enable_single_replica_compaction();
1363
0
    _skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load();
1364
0
    _sort_type = ori_tablet_schema.sort_type();
1365
0
    _sort_col_num = ori_tablet_schema.sort_col_num();
1366
0
    _row_store_page_size = ori_tablet_schema.row_store_page_size();
1367
0
    _storage_page_size = ori_tablet_schema.storage_page_size();
1368
0
    _storage_dict_page_size = ori_tablet_schema.storage_dict_page_size();
1369
0
    _enable_variant_flatten_nested = ori_tablet_schema.variant_flatten_nested();
1370
1371
    // copy from table_schema_param
1372
0
    _schema_version = version;
1373
0
    _num_columns = 0;
1374
0
    _num_variant_columns = 0;
1375
0
    _num_key_columns = 0;
1376
0
    _num_null_columns = 0;
1377
0
    bool has_bf_columns = false;
1378
0
    _cols.clear();
1379
0
    _indexes.clear();
1380
0
    _col_id_suffix_to_index.clear();
1381
0
    _index_by_unique_id_with_pattern.clear();
1382
0
    _field_name_to_index.clear();
1383
0
    _field_uniqueid_to_index.clear();
1384
0
    _delete_sign_idx = -1;
1385
0
    _sequence_col_idx = -1;
1386
0
    _version_col_idx = -1;
1387
0
    _skip_bitmap_col_idx = -1;
1388
0
    _cluster_key_uids.clear();
1389
0
    for (const auto& i : ori_tablet_schema._cluster_key_uids) {
1390
0
        _cluster_key_uids.push_back(i);
1391
0
    }
1392
0
    for (auto& column : index->columns) {
1393
0
        if (column->is_key()) {
1394
0
            _num_key_columns++;
1395
0
        }
1396
0
        if (column->is_nullable()) {
1397
0
            _num_null_columns++;
1398
0
        }
1399
0
        if (column->is_bf_column()) {
1400
0
            has_bf_columns = true;
1401
0
        }
1402
0
        if (column->is_variant_type()) {
1403
0
            ++_num_variant_columns;
1404
0
        }
1405
0
        if (UNLIKELY(column->name() == DELETE_SIGN)) {
1406
0
            _delete_sign_idx = _num_columns;
1407
0
        } else if (UNLIKELY(column->name() == SEQUENCE_COL)) {
1408
0
            _sequence_col_idx = _num_columns;
1409
0
        } else if (UNLIKELY(column->name() == VERSION_COL)) {
1410
0
            _version_col_idx = _num_columns;
1411
0
        } else if (UNLIKELY(column->name() == SKIP_BITMAP_COL)) {
1412
0
            _skip_bitmap_col_idx = _num_columns;
1413
0
        }
1414
        // Reuse TabletColumn object from pool to reduce memory consumption
1415
0
        TabletColumnPtr new_column;
1416
0
        ColumnPB column_pb;
1417
0
        column->to_schema_pb(&column_pb);
1418
0
        auto pair = TabletColumnObjectPool::instance()->insert(
1419
0
                deterministic_string_serialize(column_pb));
1420
0
        new_column = pair.second;
1421
        // Release the handle quickly, because we use shared ptr to manage column
1422
0
        TabletColumnObjectPool::instance()->release(pair.first);
1423
0
        _cols.emplace_back(std::move(new_column));
1424
0
        _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns);
1425
0
        _field_uniqueid_to_index[_cols.back()->unique_id()] = _num_columns;
1426
0
        _num_columns++;
1427
0
    }
1428
1429
0
    for (const auto& i : index->indexes) {
1430
0
        size_t index_pos = _indexes.size();
1431
        // Reuse TabletIndex object from pool to reduce memory consumption
1432
0
        TabletIndexPtr new_index;
1433
0
        TabletIndexPB index_pb;
1434
0
        i->to_schema_pb(&index_pb);
1435
0
        auto pair = TabletColumnObjectPool::instance()->insert_index(
1436
0
                deterministic_string_serialize(index_pb));
1437
0
        new_index = pair.second;
1438
        // Release the handle quickly, because we use shared ptr to manage index
1439
0
        TabletColumnObjectPool::instance()->release(pair.first);
1440
0
        _indexes.emplace_back(std::move(new_index));
1441
0
        for (int32_t col_uid : _indexes.back()->col_unique_ids()) {
1442
0
            if (auto field_pattern = _indexes.back()->field_pattern(); !field_pattern.empty()) {
1443
0
                auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid];
1444
0
                pattern_to_index_map[field_pattern].emplace_back(_indexes.back());
1445
0
            } else {
1446
0
                IndexKey key = std::make_tuple(_indexes.back()->index_type(), col_uid,
1447
0
                                               _indexes.back()->get_index_suffix());
1448
0
                _col_id_suffix_to_index[key].push_back(index_pos);
1449
0
            }
1450
0
        }
1451
0
    }
1452
1453
0
    if (has_bf_columns) {
1454
0
        _has_bf_fpp = true;
1455
0
        _bf_fpp = ori_tablet_schema.bloom_filter_fpp();
1456
0
    } else {
1457
0
        _has_bf_fpp = false;
1458
0
        _bf_fpp = BLOOM_FILTER_DEFAULT_FPP;
1459
0
    }
1460
0
}
1461
1462
298
void TabletSchema::merge_dropped_columns(const TabletSchema& src_schema) {
1463
    // If they are the same tablet schema object, then just return
1464
298
    if (this == &src_schema) {
1465
0
        return;
1466
0
    }
1467
5.81k
    for (const auto& src_col : src_schema.columns()) {
1468
5.81k
        if (_field_uniqueid_to_index.find(src_col->unique_id()) == _field_uniqueid_to_index.end()) {
1469
0
            CHECK(!src_col->is_key())
1470
0
                    << src_col->name() << " is key column, should not be dropped.";
1471
0
            ColumnPB src_col_pb;
1472
            // There are some pointer in tablet column, not sure the reference relation, so
1473
            // that deep copy it.
1474
0
            src_col->to_schema_pb(&src_col_pb);
1475
0
            TabletColumn new_col(src_col_pb);
1476
0
            append_column(new_col, TabletSchema::ColumnType::DROPPED);
1477
0
        }
1478
5.81k
    }
1479
298
}
1480
1481
0
TabletSchemaSPtr TabletSchema::copy_without_variant_extracted_columns() {
1482
0
    TabletSchemaSPtr copy = std::make_shared<TabletSchema>();
1483
0
    copy->shawdow_copy_without_columns(*this);
1484
0
    for (auto& col : this->columns()) {
1485
0
        if (col->is_extracted_column()) {
1486
0
            continue;
1487
0
        }
1488
0
        copy->append_column(*col);
1489
0
    }
1490
0
    return copy;
1491
0
}
1492
1493
// Dropped column is in _field_uniqueid_to_index but not in _field_name_to_index
1494
// Could refer to append_column method
1495
12.6k
bool TabletSchema::is_dropped_column(const TabletColumn& col) const {
1496
12.6k
    CHECK(_field_uniqueid_to_index.find(col.unique_id()) != _field_uniqueid_to_index.end())
1497
0
            << "could not find col with unique id = " << col.unique_id()
1498
0
            << " and name = " << col.name() << " table_id=" << _table_id;
1499
12.6k
    auto it = _field_name_to_index.find(StringRef {col.name()});
1500
12.6k
    return it == _field_name_to_index.end() || _cols[it->second]->unique_id() != col.unique_id();
1501
12.6k
}
1502
1503
0
void TabletSchema::copy_extracted_columns(const TabletSchema& src_schema) {
1504
0
    std::unordered_set<int32_t> variant_columns;
1505
0
    for (const auto& col : columns()) {
1506
0
        if (col->is_variant_type()) {
1507
0
            variant_columns.insert(col->unique_id());
1508
0
        }
1509
0
    }
1510
0
    for (const TabletColumnPtr& col : src_schema.columns()) {
1511
0
        if (col->is_extracted_column() && variant_columns.contains(col->parent_unique_id())) {
1512
0
            ColumnPB col_pb;
1513
0
            col->to_schema_pb(&col_pb);
1514
0
            TabletColumn new_col(col_pb);
1515
0
            append_column(new_col, ColumnType::VARIANT);
1516
0
        }
1517
0
    }
1518
0
}
1519
1520
0
void TabletSchema::reserve_extracted_columns() {
1521
0
    for (auto it = _cols.begin(); it != _cols.end();) {
1522
0
        if (!(*it)->is_extracted_column()) {
1523
0
            it = _cols.erase(it);
1524
0
        } else {
1525
0
            ++it;
1526
0
        }
1527
0
    }
1528
0
}
1529
1530
17.3k
void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const {
1531
17.3k
    for (const auto& i : _cluster_key_uids) {
1532
60
        tablet_schema_pb->add_cluster_key_uids(i);
1533
60
    }
1534
17.3k
    tablet_schema_pb->set_keys_type(_keys_type);
1535
64.7k
    for (const auto& col : _cols) {
1536
64.7k
        ColumnPB* column = tablet_schema_pb->add_column();
1537
64.7k
        col->to_schema_pb(column);
1538
64.7k
    }
1539
22.1k
    for (const auto& index : _indexes) {
1540
22.1k
        auto* index_pb = tablet_schema_pb->add_index();
1541
22.1k
        index->to_schema_pb(index_pb);
1542
22.1k
    }
1543
17.3k
    tablet_schema_pb->set_num_short_key_columns(cast_set<int32_t>(_num_short_key_columns));
1544
17.3k
    tablet_schema_pb->set_num_rows_per_row_block(cast_set<int32_t>(_num_rows_per_row_block));
1545
17.3k
    tablet_schema_pb->set_compress_kind(_compress_kind);
1546
17.3k
    if (_has_bf_fpp) {
1547
8
        tablet_schema_pb->set_bf_fpp(_bf_fpp);
1548
8
    }
1549
17.3k
    tablet_schema_pb->set_next_column_unique_id(cast_set<uint32_t>(_next_column_unique_id));
1550
17.3k
    tablet_schema_pb->set_is_in_memory(_is_in_memory);
1551
17.3k
    tablet_schema_pb->set_disable_auto_compaction(_disable_auto_compaction);
1552
17.3k
    tablet_schema_pb->set_enable_single_replica_compaction(_enable_single_replica_compaction);
1553
17.3k
    tablet_schema_pb->set_store_row_column(_store_row_column);
1554
17.3k
    tablet_schema_pb->set_skip_write_index_on_load(_skip_write_index_on_load);
1555
17.3k
    tablet_schema_pb->set_delete_sign_idx(_delete_sign_idx);
1556
17.3k
    tablet_schema_pb->set_sequence_col_idx(_sequence_col_idx);
1557
17.3k
    tablet_schema_pb->set_sort_type(_sort_type);
1558
17.3k
    tablet_schema_pb->set_sort_col_num(cast_set<int32_t>(_sort_col_num));
1559
17.3k
    tablet_schema_pb->set_schema_version(_schema_version);
1560
17.3k
    tablet_schema_pb->set_compression_type(_compression_type);
1561
17.3k
    tablet_schema_pb->set_row_store_page_size(_row_store_page_size);
1562
17.3k
    tablet_schema_pb->set_storage_page_size(_storage_page_size);
1563
17.3k
    tablet_schema_pb->set_storage_dict_page_size(_storage_dict_page_size);
1564
17.3k
    tablet_schema_pb->set_version_col_idx(_version_col_idx);
1565
17.3k
    tablet_schema_pb->set_skip_bitmap_col_idx(_skip_bitmap_col_idx);
1566
17.3k
    tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format);
1567
17.3k
    tablet_schema_pb->mutable_row_store_column_unique_ids()->Assign(
1568
17.3k
            _row_store_column_unique_ids.begin(), _row_store_column_unique_ids.end());
1569
17.3k
    tablet_schema_pb->set_enable_variant_flatten_nested(_enable_variant_flatten_nested);
1570
17.3k
    tablet_schema_pb->set_is_external_segment_column_meta_used(
1571
17.3k
            _is_external_segment_column_meta_used);
1572
17.3k
    tablet_schema_pb->set_integer_type_default_use_plain_encoding(
1573
17.3k
            _integer_type_default_use_plain_encoding);
1574
17.3k
    tablet_schema_pb->set_binary_plain_encoding_default_impl(_binary_plain_encoding_default_impl);
1575
17.3k
    auto column_groups_pb = tablet_schema_pb->mutable_seq_map();
1576
17.3k
    for (const auto& it : _seq_col_uid_to_value_cols_uid) {
1577
0
        uint32_t key = it.first;
1578
0
        ColumnGroupPB* cg_pb = column_groups_pb->add_cg(); // ColumnGroupPB {key: {v1, v2, v3}}
1579
0
        cg_pb->set_sequence_column(key);
1580
0
        for (auto v : it.second) {
1581
0
            cg_pb->add_columns_in_group(v);
1582
0
        }
1583
0
    }
1584
17.3k
}
1585
1586
0
size_t TabletSchema::row_size() const {
1587
0
    size_t size = 0;
1588
0
    for (const auto& column : _cols) {
1589
0
        size += column->length();
1590
0
    }
1591
0
    size += (_num_columns + 7) / 8;
1592
1593
0
    return size;
1594
0
}
1595
1596
2.52k
int32_t TabletSchema::field_index(const std::string& field_name) const {
1597
2.52k
    const auto& found = _field_name_to_index.find(StringRef(field_name));
1598
2.52k
    return (found == _field_name_to_index.end()) ? -1 : found->second;
1599
2.52k
}
1600
1601
2
int32_t TabletSchema::field_index(const vectorized::PathInData& path) const {
1602
2
    const auto& found = _field_path_to_index.find(vectorized::PathInDataRef(&path));
1603
2
    return (found == _field_path_to_index.end()) ? -1 : found->second;
1604
2
}
1605
1606
390
int32_t TabletSchema::field_index(int32_t col_unique_id) const {
1607
390
    const auto& found = _field_uniqueid_to_index.find(col_unique_id);
1608
390
    return (found == _field_uniqueid_to_index.end()) ? -1 : found->second;
1609
390
}
1610
1611
55.0k
const std::vector<TabletColumnPtr>& TabletSchema::columns() const {
1612
55.0k
    return _cols;
1613
55.0k
}
1614
1615
210k
const TabletColumn& TabletSchema::column(size_t ordinal) const {
1616
210k
    DCHECK(ordinal < _num_columns) << "ordinal:" << ordinal << ", _num_columns:" << _num_columns;
1617
210k
    return *_cols[ordinal];
1618
210k
}
1619
1620
4.94k
const TabletColumn& TabletSchema::column_by_uid(int32_t col_unique_id) const {
1621
4.94k
    return *_cols.at(_field_uniqueid_to_index.at(col_unique_id));
1622
4.94k
}
1623
1624
2
TabletColumn& TabletSchema::mutable_column_by_uid(int32_t col_unique_id) {
1625
2
    return *_cols.at(_field_uniqueid_to_index.at(col_unique_id));
1626
2
}
1627
1628
36
TabletColumn& TabletSchema::mutable_column(size_t ordinal) {
1629
36
    return *_cols.at(ordinal);
1630
36
}
1631
1632
2
void TabletSchema::update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>& tindexes) {
1633
2
    std::vector<TabletIndexPtr> indexes;
1634
4
    for (const auto& tindex : tindexes) {
1635
4
        TabletIndex index;
1636
4
        index.init_from_thrift(tindex, *this);
1637
4
        indexes.emplace_back(std::make_shared<TabletIndex>(std::move(index)));
1638
4
    }
1639
2
    _indexes = std::move(indexes);
1640
2
    _col_id_suffix_to_index.clear();
1641
2
    _index_by_unique_id_with_pattern.clear();
1642
2
    size_t index_pos = 0;
1643
4
    for (auto& index : _indexes) {
1644
4
        for (int32_t col_uid : index->col_unique_ids()) {
1645
4
            if (auto field_pattern = index->field_pattern(); !field_pattern.empty()) {
1646
0
                auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid];
1647
0
                pattern_to_index_map[field_pattern].emplace_back(index);
1648
4
            } else {
1649
4
                IndexKey key =
1650
4
                        std::make_tuple(index->index_type(), col_uid, index->get_index_suffix());
1651
4
                _col_id_suffix_to_index[key].push_back(index_pos);
1652
4
            }
1653
4
        }
1654
4
        index_pos++;
1655
4
    }
1656
2
}
1657
1658
0
bool TabletSchema::exist_column(const std::string& field_name) const {
1659
0
    return _field_name_to_index.contains(StringRef {field_name});
1660
0
}
1661
1662
32.0k
bool TabletSchema::has_column_unique_id(int32_t col_unique_id) const {
1663
32.0k
    return _field_uniqueid_to_index.contains(col_unique_id);
1664
32.0k
}
1665
1666
8.09k
Status TabletSchema::have_column(const std::string& field_name) const {
1667
8.09k
    if (!_field_name_to_index.contains(StringRef(field_name))) {
1668
8.09k
        return Status::Error<ErrorCode::INTERNAL_ERROR>(
1669
8.09k
                "Not found field_name, field_name:{}, schema:{}", field_name,
1670
8.09k
                get_all_field_names());
1671
8.09k
    }
1672
0
    return Status::OK();
1673
8.09k
}
1674
1675
366
Result<const TabletColumn*> TabletSchema::column(const std::string& field_name) const {
1676
366
    auto it = _field_name_to_index.find(StringRef {field_name});
1677
366
    if (it == _field_name_to_index.end()) {
1678
0
        DCHECK(false) << "field_name=" << field_name << ", table_id=" << _table_id
1679
0
                      << ", field_name_to_index=" << get_all_field_names();
1680
0
        return ResultError(
1681
0
                Status::InternalError("column not found, name={}, table_id={}, schema_version={}",
1682
0
                                      field_name, _table_id, _schema_version));
1683
0
    }
1684
366
    return _cols[it->second].get();
1685
366
}
1686
1687
void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema,
1688
0
                                         const std::vector<TColumn>& t_columns) {
1689
0
    copy_from(tablet_schema);
1690
0
    if (!t_columns.empty() && t_columns[0].col_unique_id >= 0) {
1691
0
        clear_columns();
1692
0
        for (const auto& column : t_columns) {
1693
0
            append_column(TabletColumn(column));
1694
0
        }
1695
0
    }
1696
0
}
1697
1698
134
bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const {
1699
172
    for (size_t i = 0; i < _indexes.size(); i++) {
1700
96
        if ((_indexes[i]->index_type() == IndexType::INVERTED ||
1701
96
             _indexes[i]->index_type() == IndexType::ANN) &&
1702
96
            _indexes[i]->index_id() == index_id) {
1703
58
            return true;
1704
58
        }
1705
96
    }
1706
76
    return false;
1707
134
}
1708
1709
std::vector<const TabletIndex*> TabletSchema::inverted_indexs(
1710
65.5k
        int32_t col_unique_id, const std::string& suffix_path) const {
1711
65.5k
    std::vector<const TabletIndex*> result;
1712
65.5k
    const std::string escaped_suffix = escape_for_path_name(suffix_path);
1713
65.5k
    auto it = _col_id_suffix_to_index.find(
1714
65.5k
            std::make_tuple(IndexType::INVERTED, col_unique_id, escaped_suffix));
1715
65.5k
    if (it != _col_id_suffix_to_index.end()) {
1716
15.7k
        for (size_t pos : it->second) {
1717
15.7k
            if (pos < _indexes.size()) {
1718
15.7k
                result.push_back(_indexes[pos].get());
1719
15.7k
            }
1720
15.7k
        }
1721
15.6k
    }
1722
65.5k
    return result;
1723
65.5k
}
1724
1725
std::vector<TabletIndexPtr> TabletSchema::inverted_index_by_field_pattern(
1726
72
        int32_t col_unique_id, const std::string& field_pattern) const {
1727
72
    auto id_to_pattern_map = _index_by_unique_id_with_pattern.find(col_unique_id);
1728
72
    if (id_to_pattern_map == _index_by_unique_id_with_pattern.end()) {
1729
54
        return {};
1730
54
    }
1731
18
    auto pattern_to_index_map = id_to_pattern_map->second.find(field_pattern);
1732
18
    if (pattern_to_index_map == id_to_pattern_map->second.end()) {
1733
8
        return {};
1734
8
    }
1735
10
    return pattern_to_index_map->second;
1736
18
}
1737
1738
58.0k
std::vector<const TabletIndex*> TabletSchema::inverted_indexs(const TabletColumn& col) const {
1739
    // Some columns(Float, Double, JSONB ...) from the variant do not support inverted index
1740
58.0k
    if (!segment_v2::IndexColumnWriter::check_support_inverted_index(col)) {
1741
788
        return {};
1742
788
    }
1743
1744
    // TODO use more efficient impl
1745
    // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants
1746
57.2k
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
1747
57.2k
    std::vector<const TabletIndex*> result;
1748
57.2k
    if (result = inverted_indexs(col_unique_id, escape_for_path_name(col.suffix_path()));
1749
57.2k
        !result.empty()) {
1750
10.4k
        return result;
1751
10.4k
    }
1752
    // variant's typed column has it's own index
1753
46.8k
    else if (col.is_extracted_column() && col.path_info_ptr()->get_is_typed()) {
1754
4
        std::string relative_path = col.path_info_ptr()->copy_pop_front().get_path();
1755
4
        if (_path_set_info_map.find(col_unique_id) == _path_set_info_map.end()) {
1756
0
            return result;
1757
0
        }
1758
4
        const auto& path_set_info = _path_set_info_map.at(col_unique_id);
1759
4
        if (path_set_info.typed_path_set.find(relative_path) ==
1760
4
            path_set_info.typed_path_set.end()) {
1761
0
            return result;
1762
0
        }
1763
4
        for (const auto& index : path_set_info.typed_path_set.at(relative_path).indexes) {
1764
4
            result.push_back(index.get());
1765
4
        }
1766
4
        return result;
1767
4
    }
1768
    // variant's subcolumns has it's own index
1769
46.8k
    else if (col.is_extracted_column()) {
1770
6
        std::string relative_path = col.path_info_ptr()->copy_pop_front().get_path();
1771
6
        if (_path_set_info_map.find(col_unique_id) == _path_set_info_map.end()) {
1772
2
            return result;
1773
2
        }
1774
4
        const auto& path_set_info = _path_set_info_map.at(col_unique_id);
1775
4
        if (path_set_info.subcolumn_indexes.find(relative_path) ==
1776
4
            path_set_info.subcolumn_indexes.end()) {
1777
2
            return result;
1778
2
        }
1779
2
        for (const auto& index : path_set_info.subcolumn_indexes.at(relative_path)) {
1780
2
            result.push_back(index.get());
1781
2
        }
1782
2
    }
1783
46.8k
    return result;
1784
57.2k
}
1785
1786
const TabletIndex* TabletSchema::ann_index(int32_t col_unique_id,
1787
16
                                           const std::string& suffix_path) const {
1788
16
    for (size_t i = 0; i < _indexes.size(); i++) {
1789
8
        if (_indexes[i]->index_type() == IndexType::ANN) {
1790
8
            for (int32_t id : _indexes[i]->col_unique_ids()) {
1791
8
                if (id == col_unique_id &&
1792
8
                    _indexes[i]->get_index_suffix() == escape_for_path_name(suffix_path)) {
1793
8
                    return _indexes[i].get();
1794
8
                }
1795
8
            }
1796
8
        }
1797
8
    }
1798
8
    return nullptr;
1799
16
}
1800
1801
52.2k
const TabletIndex* TabletSchema::ann_index(const TabletColumn& col) const {
1802
52.2k
    if (!segment_v2::IndexColumnWriter::check_support_ann_index(col)) {
1803
52.2k
        return nullptr;
1804
52.2k
    }
1805
    // TODO use more efficient impl
1806
    // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants
1807
16
    int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id();
1808
16
    return ann_index(col_unique_id, escape_for_path_name(col.suffix_path()));
1809
52.2k
}
1810
1811
0
bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const {
1812
0
    IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, "");
1813
0
    auto it = _col_id_suffix_to_index.find(index_key);
1814
0
    return it != _col_id_suffix_to_index.end();
1815
0
}
1816
1817
31.9k
const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id) const {
1818
    // Get the ngram bf index for the given column unique id
1819
31.9k
    IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, "");
1820
31.9k
    auto it = _col_id_suffix_to_index.find(index_key);
1821
31.9k
    if (it != _col_id_suffix_to_index.end()) {
1822
2
        if (!it->second.empty() && it->second[0] < _indexes.size()) {
1823
2
            return _indexes[it->second[0]].get();
1824
2
        }
1825
2
    }
1826
31.9k
    return nullptr;
1827
31.9k
}
1828
1829
const TabletIndex* TabletSchema::get_index(int32_t col_unique_id, IndexType index_type,
1830
28
                                           const std::string& suffix_path) const {
1831
28
    IndexKey index_key(index_type, col_unique_id, suffix_path);
1832
28
    auto it = _col_id_suffix_to_index.find(index_key);
1833
28
    if (it != _col_id_suffix_to_index.end()) {
1834
24
        if (!it->second.empty() && it->second[0] < _indexes.size()) {
1835
24
            return _indexes[it->second[0]].get();
1836
24
        }
1837
24
    }
1838
4
    return nullptr;
1839
28
}
1840
1841
vectorized::Block TabletSchema::create_block(
1842
        const std::vector<uint32_t>& return_columns,
1843
1.45k
        const std::unordered_set<uint32_t>* tablet_columns_need_convert_null) const {
1844
1.45k
    vectorized::Block block;
1845
5.73k
    for (int i = 0; i < return_columns.size(); ++i) {
1846
4.28k
        const ColumnId cid = return_columns[i];
1847
4.28k
        const auto& col = *_cols[cid];
1848
4.28k
        bool is_nullable = (tablet_columns_need_convert_null != nullptr &&
1849
4.28k
                            tablet_columns_need_convert_null->find(cid) !=
1850
0
                                    tablet_columns_need_convert_null->end());
1851
4.28k
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col, is_nullable);
1852
4.28k
        if (col.type() == FieldType::OLAP_FIELD_TYPE_STRUCT ||
1853
4.28k
            col.type() == FieldType::OLAP_FIELD_TYPE_MAP ||
1854
4.28k
            col.type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
1855
4
            if (_pruned_columns_data_type.contains(col.unique_id())) {
1856
0
                data_type = _pruned_columns_data_type.at(col.unique_id());
1857
0
            }
1858
4
        }
1859
1860
4.28k
        if (_vir_col_idx_to_unique_id.contains(cid)) {
1861
0
            block.insert({vectorized::ColumnNothing::create(0), data_type, col.name()});
1862
0
            VLOG_DEBUG << fmt::format(
1863
0
                    "Create block from tablet schema, column cid {} is virtual column, col_name: "
1864
0
                    "{}, col_unique_id: {}, type {}",
1865
0
                    cid, col.name(), col.unique_id(), data_type->get_name());
1866
4.28k
        } else {
1867
4.28k
            block.insert({data_type->create_column(), data_type, col.name()});
1868
4.28k
        }
1869
4.28k
    }
1870
1.45k
    return block;
1871
1.45k
}
1872
1873
4.00k
vectorized::Block TabletSchema::create_block(bool ignore_dropped_col) const {
1874
4.00k
    vectorized::Block block;
1875
12.6k
    for (const auto& col : _cols) {
1876
12.6k
        if (ignore_dropped_col && is_dropped_column(*col)) {
1877
0
            continue;
1878
0
        }
1879
1880
12.6k
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(*col);
1881
12.6k
        if (col->type() == FieldType::OLAP_FIELD_TYPE_STRUCT) {
1882
0
            if (_pruned_columns_data_type.contains(col->unique_id())) {
1883
0
                data_type = _pruned_columns_data_type.at(col->unique_id());
1884
0
            }
1885
0
        }
1886
12.6k
        block.insert({data_type->create_column(), data_type, col->name()});
1887
12.6k
    }
1888
4.00k
    return block;
1889
4.00k
}
1890
1891
0
vectorized::Block TabletSchema::create_block_by_cids(const std::vector<uint32_t>& cids) const {
1892
0
    vectorized::Block block;
1893
0
    for (const auto& cid : cids) {
1894
0
        const auto& col = *_cols[cid];
1895
0
        auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col);
1896
0
        if (col.type() == FieldType::OLAP_FIELD_TYPE_STRUCT) {
1897
0
            if (_pruned_columns_data_type.contains(col.unique_id())) {
1898
0
                data_type = _pruned_columns_data_type.at(col.unique_id());
1899
0
            }
1900
0
        }
1901
0
        block.insert({data_type->create_column(), data_type, col.name()});
1902
0
    }
1903
0
    return block;
1904
0
}
1905
1906
0
bool operator==(const TabletColumn& a, const TabletColumn& b) {
1907
0
    if (a._unique_id != b._unique_id) return false;
1908
0
    if (a._col_name != b._col_name) return false;
1909
0
    if (a._type != b._type) return false;
1910
0
    if (a._is_key != b._is_key) return false;
1911
0
    if (a._aggregation != b._aggregation) return false;
1912
0
    if (a._is_nullable != b._is_nullable) return false;
1913
0
    if (a._has_default_value != b._has_default_value) return false;
1914
0
    if (a._has_default_value) {
1915
0
        if (a._default_value != b._default_value) return false;
1916
0
    }
1917
0
    if (a._is_decimal != b._is_decimal) return false;
1918
0
    if (a._is_decimal) {
1919
0
        if (a._precision != b._precision) return false;
1920
0
        if (a._frac != b._frac) return false;
1921
0
    }
1922
0
    if (a._length != b._length) return false;
1923
0
    if (a._index_length != b._index_length) return false;
1924
0
    if (a._is_bf_column != b._is_bf_column) return false;
1925
0
    if (a._column_path == nullptr && a._column_path != nullptr) return false;
1926
0
    if (b._column_path == nullptr && a._column_path != nullptr) return false;
1927
0
    if (b._column_path != nullptr && a._column_path != nullptr &&
1928
0
        *a._column_path != *b._column_path)
1929
0
        return false;
1930
0
    return true;
1931
0
}
1932
1933
0
bool operator!=(const TabletColumn& a, const TabletColumn& b) {
1934
0
    return !(a == b);
1935
0
}
1936
1937
6
bool operator==(const TabletSchema& a, const TabletSchema& b) {
1938
6
    if (a._keys_type != b._keys_type) return false;
1939
6
    if (a._cols.size() != b._cols.size()) return false;
1940
6
    for (int i = 0; i < a._cols.size(); ++i) {
1941
0
        if (*a._cols[i] != *b._cols[i]) return false;
1942
0
    }
1943
6
    if (a._num_columns != b._num_columns) return false;
1944
6
    if (a._num_key_columns != b._num_key_columns) return false;
1945
6
    if (a._num_null_columns != b._num_null_columns) return false;
1946
6
    if (a._num_short_key_columns != b._num_short_key_columns) return false;
1947
6
    if (a._num_rows_per_row_block != b._num_rows_per_row_block) return false;
1948
6
    if (a._compress_kind != b._compress_kind) return false;
1949
6
    if (a._next_column_unique_id != b._next_column_unique_id) return false;
1950
6
    if (a._has_bf_fpp != b._has_bf_fpp) return false;
1951
6
    if (a._has_bf_fpp) {
1952
0
        if (std::abs(a._bf_fpp - b._bf_fpp) > 1e-6) return false;
1953
0
    }
1954
6
    if (a._is_in_memory != b._is_in_memory) return false;
1955
6
    if (a._delete_sign_idx != b._delete_sign_idx) return false;
1956
6
    if (a._disable_auto_compaction != b._disable_auto_compaction) return false;
1957
6
    if (a._enable_single_replica_compaction != b._enable_single_replica_compaction) return false;
1958
6
    if (a._store_row_column != b._store_row_column) return false;
1959
6
    if (a._row_store_page_size != b._row_store_page_size) return false;
1960
6
    if (a._storage_page_size != b._storage_page_size) return false;
1961
6
    if (a._storage_dict_page_size != b._storage_dict_page_size) return false;
1962
6
    if (a._skip_write_index_on_load != b._skip_write_index_on_load) return false;
1963
6
    if (a._enable_variant_flatten_nested != b._enable_variant_flatten_nested) return false;
1964
6
    if (a._is_external_segment_column_meta_used != b._is_external_segment_column_meta_used)
1965
0
        return false;
1966
6
    if (a._integer_type_default_use_plain_encoding != b._integer_type_default_use_plain_encoding)
1967
0
        return false;
1968
6
    if (a._binary_plain_encoding_default_impl != b._binary_plain_encoding_default_impl)
1969
0
        return false;
1970
6
    return true;
1971
6
}
1972
1973
6
bool operator!=(const TabletSchema& a, const TabletSchema& b) {
1974
6
    return !(a == b);
1975
6
}
1976
#include "common/compile_check_end.h"
1977
} // namespace doris