/root/doris/be/src/olap/tablet_schema.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "olap/tablet_schema.h" |
19 | | |
20 | | #include <gen_cpp/Descriptors_types.h> |
21 | | #include <gen_cpp/olap_file.pb.h> |
22 | | #include <glog/logging.h> |
23 | | #include <google/protobuf/io/coded_stream.h> |
24 | | #include <google/protobuf/io/zero_copy_stream.h> |
25 | | #include <google/protobuf/io/zero_copy_stream_impl_lite.h> |
26 | | |
27 | | #include <algorithm> |
28 | | #include <cctype> |
29 | | // IWYU pragma: no_include <bits/std_abs.h> |
30 | | #include <cmath> // IWYU pragma: keep |
31 | | #include <memory> |
32 | | #include <ostream> |
33 | | #include <vector> |
34 | | |
35 | | #include "common/compiler_util.h" // IWYU pragma: keep |
36 | | #include "common/consts.h" |
37 | | #include "common/status.h" |
38 | | #include "exec/tablet_info.h" |
39 | | #include "olap/inverted_index_parser.h" |
40 | | #include "olap/olap_define.h" |
41 | | #include "olap/tablet_column_object_pool.h" |
42 | | #include "olap/types.h" |
43 | | #include "olap/utils.h" |
44 | | #include "runtime/memory/lru_cache_policy.h" |
45 | | #include "runtime/thread_context.h" |
46 | | #include "tablet_meta.h" |
47 | | #include "vec/aggregate_functions/aggregate_function_simple_factory.h" |
48 | | #include "vec/aggregate_functions/aggregate_function_state_union.h" |
49 | | #include "vec/common/hex.h" |
50 | | #include "vec/common/string_ref.h" |
51 | | #include "vec/core/block.h" |
52 | | #include "vec/data_types/data_type.h" |
53 | | #include "vec/data_types/data_type_factory.hpp" |
54 | | #include "vec/json/path_in_data.h" |
55 | | |
56 | | namespace doris { |
57 | | |
58 | 0 | FieldType TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) { |
59 | 0 | switch (primitiveType) { |
60 | 0 | case PrimitiveType::INVALID_TYPE: |
61 | 0 | return FieldType::OLAP_FIELD_TYPE_UNKNOWN; |
62 | 0 | case PrimitiveType::TYPE_NULL: |
63 | 0 | return FieldType::OLAP_FIELD_TYPE_NONE; |
64 | 0 | case PrimitiveType::TYPE_BOOLEAN: |
65 | 0 | return FieldType::OLAP_FIELD_TYPE_BOOL; |
66 | 0 | case PrimitiveType::TYPE_TINYINT: |
67 | 0 | return FieldType::OLAP_FIELD_TYPE_TINYINT; |
68 | 0 | case PrimitiveType::TYPE_SMALLINT: |
69 | 0 | return FieldType::OLAP_FIELD_TYPE_SMALLINT; |
70 | 0 | case PrimitiveType::TYPE_INT: |
71 | 0 | return FieldType::OLAP_FIELD_TYPE_INT; |
72 | 0 | case PrimitiveType::TYPE_BIGINT: |
73 | 0 | return FieldType::OLAP_FIELD_TYPE_BIGINT; |
74 | 0 | case PrimitiveType::TYPE_LARGEINT: |
75 | 0 | return FieldType::OLAP_FIELD_TYPE_LARGEINT; |
76 | 0 | case PrimitiveType::TYPE_FLOAT: |
77 | 0 | return FieldType::OLAP_FIELD_TYPE_FLOAT; |
78 | 0 | case PrimitiveType::TYPE_DOUBLE: |
79 | 0 | return FieldType::OLAP_FIELD_TYPE_DOUBLE; |
80 | 0 | case PrimitiveType::TYPE_VARCHAR: |
81 | 0 | return FieldType::OLAP_FIELD_TYPE_VARCHAR; |
82 | 0 | case PrimitiveType::TYPE_DATE: |
83 | 0 | return FieldType::OLAP_FIELD_TYPE_DATE; |
84 | 0 | case PrimitiveType::TYPE_DATETIME: |
85 | 0 | return FieldType::OLAP_FIELD_TYPE_DATETIME; |
86 | 0 | case PrimitiveType::TYPE_BINARY: |
87 | 0 | return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented |
88 | 0 | case PrimitiveType::TYPE_CHAR: |
89 | 0 | return FieldType::OLAP_FIELD_TYPE_CHAR; |
90 | 0 | case PrimitiveType::TYPE_STRUCT: |
91 | 0 | return FieldType::OLAP_FIELD_TYPE_STRUCT; |
92 | 0 | case PrimitiveType::TYPE_ARRAY: |
93 | 0 | return FieldType::OLAP_FIELD_TYPE_ARRAY; |
94 | 0 | case PrimitiveType::TYPE_MAP: |
95 | 0 | return FieldType::OLAP_FIELD_TYPE_MAP; |
96 | 0 | case PrimitiveType::TYPE_HLL: |
97 | 0 | return FieldType::OLAP_FIELD_TYPE_HLL; |
98 | 0 | case PrimitiveType::TYPE_DECIMALV2: |
99 | 0 | return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented |
100 | 0 | case PrimitiveType::TYPE_OBJECT: |
101 | 0 | return FieldType::OLAP_FIELD_TYPE_OBJECT; |
102 | 0 | case PrimitiveType::TYPE_STRING: |
103 | 0 | return FieldType::OLAP_FIELD_TYPE_STRING; |
104 | 0 | case PrimitiveType::TYPE_QUANTILE_STATE: |
105 | 0 | return FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE; |
106 | 0 | case PrimitiveType::TYPE_DATEV2: |
107 | 0 | return FieldType::OLAP_FIELD_TYPE_DATEV2; |
108 | 0 | case PrimitiveType::TYPE_DATETIMEV2: |
109 | 0 | return FieldType::OLAP_FIELD_TYPE_DATETIMEV2; |
110 | 0 | case PrimitiveType::TYPE_TIMEV2: |
111 | 0 | return FieldType::OLAP_FIELD_TYPE_TIMEV2; |
112 | 0 | case PrimitiveType::TYPE_DECIMAL32: |
113 | 0 | return FieldType::OLAP_FIELD_TYPE_DECIMAL32; |
114 | 0 | case PrimitiveType::TYPE_DECIMAL64: |
115 | 0 | return FieldType::OLAP_FIELD_TYPE_DECIMAL64; |
116 | 0 | case PrimitiveType::TYPE_DECIMAL128I: |
117 | 0 | return FieldType::OLAP_FIELD_TYPE_DECIMAL128I; |
118 | 0 | case PrimitiveType::TYPE_JSONB: |
119 | 0 | return FieldType::OLAP_FIELD_TYPE_JSONB; |
120 | 0 | case PrimitiveType::TYPE_VARIANT: |
121 | 0 | return FieldType::OLAP_FIELD_TYPE_VARIANT; |
122 | 0 | case PrimitiveType::TYPE_LAMBDA_FUNCTION: |
123 | 0 | return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented |
124 | 0 | case PrimitiveType::TYPE_AGG_STATE: |
125 | 0 | return FieldType::OLAP_FIELD_TYPE_AGG_STATE; |
126 | 0 | default: |
127 | 0 | return FieldType::OLAP_FIELD_TYPE_UNKNOWN; |
128 | 0 | } |
129 | 0 | } |
130 | | |
131 | 2.97k | FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) { |
132 | 2.97k | std::string upper_type_str = type_str; |
133 | 2.97k | std::transform(type_str.begin(), type_str.end(), upper_type_str.begin(), |
134 | 16.3k | [](auto c) { return std::toupper(c); }); |
135 | 2.97k | FieldType type; |
136 | | |
137 | 2.97k | if (0 == upper_type_str.compare("TINYINT")) { |
138 | 330 | type = FieldType::OLAP_FIELD_TYPE_TINYINT; |
139 | 2.64k | } else if (0 == upper_type_str.compare("SMALLINT")) { |
140 | 251 | type = FieldType::OLAP_FIELD_TYPE_SMALLINT; |
141 | 2.39k | } else if (0 == upper_type_str.compare("INT")) { |
142 | 1.11k | type = FieldType::OLAP_FIELD_TYPE_INT; |
143 | 1.27k | } else if (0 == upper_type_str.compare("BIGINT")) { |
144 | 153 | type = FieldType::OLAP_FIELD_TYPE_BIGINT; |
145 | 1.12k | } else if (0 == upper_type_str.compare("LARGEINT")) { |
146 | 137 | type = FieldType::OLAP_FIELD_TYPE_LARGEINT; |
147 | 984 | } else if (0 == upper_type_str.compare("UNSIGNED_TINYINT")) { |
148 | 0 | type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT; |
149 | 984 | } else if (0 == upper_type_str.compare("UNSIGNED_SMALLINT")) { |
150 | 0 | type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT; |
151 | 984 | } else if (0 == upper_type_str.compare("UNSIGNED_INT")) { |
152 | 0 | type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT; |
153 | 984 | } else if (0 == upper_type_str.compare("UNSIGNED_BIGINT")) { |
154 | 0 | type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT; |
155 | 984 | } else if (0 == upper_type_str.compare("IPV4")) { |
156 | 0 | type = FieldType::OLAP_FIELD_TYPE_IPV4; |
157 | 984 | } else if (0 == upper_type_str.compare("IPV6")) { |
158 | 0 | type = FieldType::OLAP_FIELD_TYPE_IPV6; |
159 | 984 | } else if (0 == upper_type_str.compare("FLOAT")) { |
160 | 0 | type = FieldType::OLAP_FIELD_TYPE_FLOAT; |
161 | 984 | } else if (0 == upper_type_str.compare("DISCRETE_DOUBLE")) { |
162 | 0 | type = FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE; |
163 | 984 | } else if (0 == upper_type_str.compare("DOUBLE")) { |
164 | 0 | type = FieldType::OLAP_FIELD_TYPE_DOUBLE; |
165 | 984 | } else if (0 == upper_type_str.compare("CHAR")) { |
166 | 138 | type = FieldType::OLAP_FIELD_TYPE_CHAR; |
167 | 846 | } else if (0 == upper_type_str.compare("DATE")) { |
168 | 139 | type = FieldType::OLAP_FIELD_TYPE_DATE; |
169 | 707 | } else if (0 == upper_type_str.compare("DATEV2")) { |
170 | 133 | type = FieldType::OLAP_FIELD_TYPE_DATEV2; |
171 | 574 | } else if (0 == upper_type_str.compare("DATETIMEV2")) { |
172 | 0 | type = FieldType::OLAP_FIELD_TYPE_DATETIMEV2; |
173 | 574 | } else if (0 == upper_type_str.compare("DATETIME")) { |
174 | 168 | type = FieldType::OLAP_FIELD_TYPE_DATETIME; |
175 | 406 | } else if (0 == upper_type_str.compare("DECIMAL32")) { |
176 | 0 | type = FieldType::OLAP_FIELD_TYPE_DECIMAL32; |
177 | 406 | } else if (0 == upper_type_str.compare("DECIMAL64")) { |
178 | 0 | type = FieldType::OLAP_FIELD_TYPE_DECIMAL64; |
179 | 406 | } else if (0 == upper_type_str.compare("DECIMAL128I")) { |
180 | 0 | type = FieldType::OLAP_FIELD_TYPE_DECIMAL128I; |
181 | 406 | } else if (0 == upper_type_str.compare("DECIMAL256")) { |
182 | 0 | type = FieldType::OLAP_FIELD_TYPE_DECIMAL256; |
183 | 406 | } else if (0 == upper_type_str.compare(0, 7, "DECIMAL")) { |
184 | 141 | type = FieldType::OLAP_FIELD_TYPE_DECIMAL; |
185 | 265 | } else if (0 == upper_type_str.compare(0, 7, "VARCHAR")) { |
186 | 153 | type = FieldType::OLAP_FIELD_TYPE_VARCHAR; |
187 | 153 | } else if (0 == upper_type_str.compare("STRING")) { |
188 | 20 | type = FieldType::OLAP_FIELD_TYPE_STRING; |
189 | 92 | } else if (0 == upper_type_str.compare("JSONB")) { |
190 | 0 | type = FieldType::OLAP_FIELD_TYPE_JSONB; |
191 | 92 | } else if (0 == upper_type_str.compare("VARIANT")) { |
192 | 2 | type = FieldType::OLAP_FIELD_TYPE_VARIANT; |
193 | 90 | } else if (0 == upper_type_str.compare("BOOLEAN")) { |
194 | 0 | type = FieldType::OLAP_FIELD_TYPE_BOOL; |
195 | 90 | } else if (0 == upper_type_str.compare(0, 3, "HLL")) { |
196 | 6 | type = FieldType::OLAP_FIELD_TYPE_HLL; |
197 | 84 | } else if (0 == upper_type_str.compare("STRUCT")) { |
198 | 0 | type = FieldType::OLAP_FIELD_TYPE_STRUCT; |
199 | 84 | } else if (0 == upper_type_str.compare("LIST")) { |
200 | 0 | type = FieldType::OLAP_FIELD_TYPE_ARRAY; |
201 | 84 | } else if (0 == upper_type_str.compare("MAP")) { |
202 | 0 | type = FieldType::OLAP_FIELD_TYPE_MAP; |
203 | 84 | } else if (0 == upper_type_str.compare("OBJECT")) { |
204 | 0 | type = FieldType::OLAP_FIELD_TYPE_OBJECT; |
205 | 84 | } else if (0 == upper_type_str.compare("ARRAY")) { |
206 | 0 | type = FieldType::OLAP_FIELD_TYPE_ARRAY; |
207 | 84 | } else if (0 == upper_type_str.compare("QUANTILE_STATE")) { |
208 | 0 | type = FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE; |
209 | 84 | } else if (0 == upper_type_str.compare("AGG_STATE")) { |
210 | 0 | type = FieldType::OLAP_FIELD_TYPE_AGG_STATE; |
211 | 84 | } else { |
212 | 84 | LOG(WARNING) << "invalid type string. [type='" << type_str << "']"; |
213 | 84 | type = FieldType::OLAP_FIELD_TYPE_UNKNOWN; |
214 | 84 | } |
215 | | |
216 | 2.97k | return type; |
217 | 2.97k | } |
218 | | |
219 | 2.12k | FieldAggregationMethod TabletColumn::get_aggregation_type_by_string(const std::string& str) { |
220 | 2.12k | std::string upper_str = str; |
221 | 2.12k | std::transform(str.begin(), str.end(), upper_str.begin(), |
222 | 9.46k | [](auto c) { return std::toupper(c); }); |
223 | 2.12k | FieldAggregationMethod aggregation_type; |
224 | | |
225 | 2.12k | if (0 == upper_str.compare("NONE")) { |
226 | 1.05k | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE; |
227 | 1.07k | } else if (0 == upper_str.compare("SUM")) { |
228 | 554 | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM; |
229 | 554 | } else if (0 == upper_str.compare("MIN")) { |
230 | 4 | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN; |
231 | 513 | } else if (0 == upper_str.compare("MAX")) { |
232 | 4 | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX; |
233 | 509 | } else if (0 == upper_str.compare("REPLACE")) { |
234 | 503 | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE; |
235 | 503 | } else if (0 == upper_str.compare("REPLACE_IF_NOT_NULL")) { |
236 | 0 | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL; |
237 | 6 | } else if (0 == upper_str.compare("HLL_UNION")) { |
238 | 6 | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION; |
239 | 6 | } else if (0 == upper_str.compare("BITMAP_UNION")) { |
240 | 0 | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION; |
241 | 0 | } else if (0 == upper_str.compare("QUANTILE_UNION")) { |
242 | 0 | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION; |
243 | 0 | } else if (!upper_str.empty()) { |
244 | 0 | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC; |
245 | 0 | } else { |
246 | 0 | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN; |
247 | 0 | } |
248 | | |
249 | 2.12k | return aggregation_type; |
250 | 2.12k | } |
251 | | |
252 | 10.9k | std::string TabletColumn::get_string_by_field_type(FieldType type) { |
253 | 10.9k | switch (type) { |
254 | 1.39k | case FieldType::OLAP_FIELD_TYPE_TINYINT: |
255 | 1.39k | return "TINYINT"; |
256 | | |
257 | 0 | case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT: |
258 | 0 | return "UNSIGNED_TINYINT"; |
259 | | |
260 | 1.10k | case FieldType::OLAP_FIELD_TYPE_SMALLINT: |
261 | 1.10k | return "SMALLINT"; |
262 | | |
263 | 0 | case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: |
264 | 0 | return "UNSIGNED_SMALLINT"; |
265 | | |
266 | 3.20k | case FieldType::OLAP_FIELD_TYPE_INT: |
267 | 3.20k | return "INT"; |
268 | | |
269 | 0 | case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT: |
270 | 0 | return "UNSIGNED_INT"; |
271 | | |
272 | 719 | case FieldType::OLAP_FIELD_TYPE_BIGINT: |
273 | 719 | return "BIGINT"; |
274 | | |
275 | 616 | case FieldType::OLAP_FIELD_TYPE_LARGEINT: |
276 | 616 | return "LARGEINT"; |
277 | | |
278 | 0 | case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT: |
279 | 0 | return "UNSIGNED_BIGINT"; |
280 | | |
281 | 0 | case FieldType::OLAP_FIELD_TYPE_IPV4: |
282 | 0 | return "IPV4"; |
283 | | |
284 | 0 | case FieldType::OLAP_FIELD_TYPE_IPV6: |
285 | 0 | return "IPV6"; |
286 | | |
287 | 0 | case FieldType::OLAP_FIELD_TYPE_FLOAT: |
288 | 0 | return "FLOAT"; |
289 | | |
290 | 0 | case FieldType::OLAP_FIELD_TYPE_DOUBLE: |
291 | 0 | return "DOUBLE"; |
292 | | |
293 | 0 | case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE: |
294 | 0 | return "DISCRETE_DOUBLE"; |
295 | | |
296 | 616 | case FieldType::OLAP_FIELD_TYPE_CHAR: |
297 | 616 | return "CHAR"; |
298 | | |
299 | 618 | case FieldType::OLAP_FIELD_TYPE_DATE: |
300 | 618 | return "DATE"; |
301 | | |
302 | 578 | case FieldType::OLAP_FIELD_TYPE_DATEV2: |
303 | 578 | return "DATEV2"; |
304 | | |
305 | 801 | case FieldType::OLAP_FIELD_TYPE_DATETIME: |
306 | 801 | return "DATETIME"; |
307 | | |
308 | 0 | case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: |
309 | 0 | return "DATETIMEV2"; |
310 | | |
311 | 616 | case FieldType::OLAP_FIELD_TYPE_DECIMAL: |
312 | 616 | return "DECIMAL"; |
313 | | |
314 | 0 | case FieldType::OLAP_FIELD_TYPE_DECIMAL32: |
315 | 0 | return "DECIMAL32"; |
316 | | |
317 | 0 | case FieldType::OLAP_FIELD_TYPE_DECIMAL64: |
318 | 0 | return "DECIMAL64"; |
319 | | |
320 | 0 | case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: |
321 | 0 | return "DECIMAL128I"; |
322 | | |
323 | 0 | case FieldType::OLAP_FIELD_TYPE_DECIMAL256: |
324 | 0 | return "DECIMAL256"; |
325 | | |
326 | 616 | case FieldType::OLAP_FIELD_TYPE_VARCHAR: |
327 | 616 | return "VARCHAR"; |
328 | | |
329 | 0 | case FieldType::OLAP_FIELD_TYPE_JSONB: |
330 | 0 | return "JSONB"; |
331 | | |
332 | 0 | case FieldType::OLAP_FIELD_TYPE_VARIANT: |
333 | 0 | return "VARIANT"; |
334 | | |
335 | 38 | case FieldType::OLAP_FIELD_TYPE_STRING: |
336 | 38 | return "STRING"; |
337 | | |
338 | 0 | case FieldType::OLAP_FIELD_TYPE_BOOL: |
339 | 0 | return "BOOLEAN"; |
340 | | |
341 | 3 | case FieldType::OLAP_FIELD_TYPE_HLL: |
342 | 3 | return "HLL"; |
343 | | |
344 | 0 | case FieldType::OLAP_FIELD_TYPE_STRUCT: |
345 | 0 | return "STRUCT"; |
346 | | |
347 | 0 | case FieldType::OLAP_FIELD_TYPE_ARRAY: |
348 | 0 | return "ARRAY"; |
349 | | |
350 | 0 | case FieldType::OLAP_FIELD_TYPE_MAP: |
351 | 0 | return "MAP"; |
352 | | |
353 | 0 | case FieldType::OLAP_FIELD_TYPE_OBJECT: |
354 | 0 | return "OBJECT"; |
355 | 0 | case FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE: |
356 | 0 | return "QUANTILE_STATE"; |
357 | 0 | case FieldType::OLAP_FIELD_TYPE_AGG_STATE: |
358 | 0 | return "AGG_STATE"; |
359 | 0 | default: |
360 | 0 | return "UNKNOWN"; |
361 | 10.9k | } |
362 | 10.9k | } |
363 | | |
364 | 28 | std::string TabletColumn::get_string_by_aggregation_type(FieldAggregationMethod type) { |
365 | 28 | switch (type) { |
366 | 4 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE: |
367 | 4 | return "NONE"; |
368 | | |
369 | 9 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM: |
370 | 9 | return "SUM"; |
371 | | |
372 | 0 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN: |
373 | 0 | return "MIN"; |
374 | | |
375 | 0 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX: |
376 | 0 | return "MAX"; |
377 | | |
378 | 15 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE: |
379 | 15 | return "REPLACE"; |
380 | | |
381 | 0 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL: |
382 | 0 | return "REPLACE_IF_NOT_NULL"; |
383 | | |
384 | 0 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION: |
385 | 0 | return "HLL_UNION"; |
386 | | |
387 | 0 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION: |
388 | 0 | return "BITMAP_UNION"; |
389 | | |
390 | 0 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION: |
391 | 0 | return "QUANTILE_UNION"; |
392 | | |
393 | 0 | default: |
394 | 0 | return "UNKNOWN"; |
395 | 28 | } |
396 | 28 | } |
397 | | |
398 | 1.48k | uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint32_t string_length) { |
399 | 1.48k | switch (type) { |
400 | 110 | case TPrimitiveType::TINYINT: |
401 | 110 | case TPrimitiveType::BOOLEAN: |
402 | 110 | return 1; |
403 | 148 | case TPrimitiveType::SMALLINT: |
404 | 148 | return 2; |
405 | 409 | case TPrimitiveType::INT: |
406 | 409 | return 4; |
407 | 114 | case TPrimitiveType::BIGINT: |
408 | 114 | return 8; |
409 | 101 | case TPrimitiveType::LARGEINT: |
410 | 101 | return 16; |
411 | 0 | case TPrimitiveType::IPV4: |
412 | 0 | return 4; |
413 | 0 | case TPrimitiveType::IPV6: |
414 | 0 | return 16; |
415 | 101 | case TPrimitiveType::DATE: |
416 | 101 | return 3; |
417 | 92 | case TPrimitiveType::DATEV2: |
418 | 92 | return 4; |
419 | 109 | case TPrimitiveType::DATETIME: |
420 | 109 | return 8; |
421 | 0 | case TPrimitiveType::DATETIMEV2: |
422 | 0 | return 8; |
423 | 0 | case TPrimitiveType::FLOAT: |
424 | 0 | return 4; |
425 | 0 | case TPrimitiveType::DOUBLE: |
426 | 0 | return 8; |
427 | 0 | case TPrimitiveType::QUANTILE_STATE: |
428 | 0 | case TPrimitiveType::OBJECT: |
429 | 0 | return 16; |
430 | 101 | case TPrimitiveType::CHAR: |
431 | 101 | return string_length; |
432 | 101 | case TPrimitiveType::VARCHAR: |
433 | 101 | case TPrimitiveType::HLL: |
434 | 101 | case TPrimitiveType::AGG_STATE: |
435 | 101 | return string_length + sizeof(OLAP_VARCHAR_MAX_LENGTH); |
436 | 0 | case TPrimitiveType::STRING: |
437 | 0 | case TPrimitiveType::VARIANT: |
438 | 0 | return string_length + sizeof(OLAP_STRING_MAX_LENGTH); |
439 | 0 | case TPrimitiveType::JSONB: |
440 | 0 | return string_length + sizeof(OLAP_JSONB_MAX_LENGTH); |
441 | 0 | case TPrimitiveType::STRUCT: |
442 | | // Note that(xy): this is the length of struct type itself, |
443 | | // the length of its subtypes are not included. |
444 | 0 | return OLAP_STRUCT_MAX_LENGTH; |
445 | 0 | case TPrimitiveType::ARRAY: |
446 | 0 | return OLAP_ARRAY_MAX_LENGTH; |
447 | 0 | case TPrimitiveType::MAP: |
448 | 0 | return OLAP_MAP_MAX_LENGTH; |
449 | 0 | case TPrimitiveType::DECIMAL32: |
450 | 0 | return 4; |
451 | 0 | case TPrimitiveType::DECIMAL64: |
452 | 0 | return 8; |
453 | 0 | case TPrimitiveType::DECIMAL128I: |
454 | 0 | return 16; |
455 | 0 | case TPrimitiveType::DECIMAL256: |
456 | 0 | return 32; |
457 | 101 | case TPrimitiveType::DECIMALV2: |
458 | 101 | return 12; // use 12 bytes in olap engine. |
459 | 0 | default: |
460 | 0 | LOG(WARNING) << "unknown field type. [type=" << type << "]"; |
461 | 0 | return 0; |
462 | 1.48k | } |
463 | 1.48k | } |
464 | | |
465 | 2.94k | TabletColumn::TabletColumn() : _aggregation(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) {} |
466 | | |
467 | 133 | TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType type) { |
468 | 133 | _aggregation = agg; |
469 | 133 | _type = type; |
470 | 133 | } |
471 | | |
472 | 15 | TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable) { |
473 | 15 | _aggregation = agg; |
474 | 15 | _type = filed_type; |
475 | 15 | _length = get_scalar_type_info(filed_type)->size(); |
476 | 15 | _is_nullable = is_nullable; |
477 | 15 | } |
478 | | |
479 | | TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable, |
480 | 15 | int32_t unique_id, size_t length) { |
481 | 15 | _aggregation = agg; |
482 | 15 | _type = filed_type; |
483 | 15 | _is_nullable = is_nullable; |
484 | 15 | _unique_id = unique_id; |
485 | 15 | _length = length; |
486 | 15 | } |
487 | | |
488 | 0 | TabletColumn::TabletColumn(const ColumnPB& column) { |
489 | 0 | init_from_pb(column); |
490 | 0 | } |
491 | | |
492 | 0 | TabletColumn::TabletColumn(const TColumn& column) { |
493 | 0 | init_from_thrift(column); |
494 | 0 | } |
495 | | |
496 | 0 | void TabletColumn::init_from_thrift(const TColumn& tcolumn) { |
497 | 0 | ColumnPB column_pb; |
498 | 0 | TabletMeta::init_column_from_tcolumn(tcolumn.col_unique_id, tcolumn, &column_pb); |
499 | 0 | init_from_pb(column_pb); |
500 | 0 | } |
501 | | |
502 | 2.88k | void TabletColumn::init_from_pb(const ColumnPB& column) { |
503 | 2.88k | _unique_id = column.unique_id(); |
504 | 2.88k | _col_name = column.name(); |
505 | 2.88k | _col_name_lower_case = to_lower(_col_name); |
506 | 2.88k | _type = TabletColumn::get_field_type_by_string(column.type()); |
507 | 2.88k | _is_key = column.is_key(); |
508 | 2.88k | _is_nullable = column.is_nullable(); |
509 | 2.88k | _is_auto_increment = column.is_auto_increment(); |
510 | | |
511 | 2.88k | _has_default_value = column.has_default_value(); |
512 | 2.88k | if (_has_default_value) { |
513 | 28 | _default_value = column.default_value(); |
514 | 28 | } |
515 | | |
516 | 2.88k | if (column.has_precision()) { |
517 | 2.07k | _is_decimal = true; |
518 | 2.07k | _precision = column.precision(); |
519 | 2.07k | } else { |
520 | 812 | _is_decimal = false; |
521 | 812 | } |
522 | 2.88k | if (column.has_frac()) { |
523 | 2.07k | _frac = column.frac(); |
524 | 2.07k | } |
525 | 2.88k | _length = column.length(); |
526 | 2.88k | _index_length = column.index_length(); |
527 | 2.88k | if (column.has_is_bf_column()) { |
528 | 351 | _is_bf_column = column.is_bf_column(); |
529 | 2.53k | } else { |
530 | 2.53k | _is_bf_column = false; |
531 | 2.53k | } |
532 | 2.88k | if (column.has_has_bitmap_index()) { |
533 | 1.48k | _has_bitmap_index = column.has_bitmap_index(); |
534 | 1.48k | } else { |
535 | 1.40k | _has_bitmap_index = false; |
536 | 1.40k | } |
537 | 2.88k | if (column.has_aggregation()) { |
538 | 2.12k | _aggregation = get_aggregation_type_by_string(column.aggregation()); |
539 | 2.12k | _aggregation_name = column.aggregation(); |
540 | 2.12k | } |
541 | | |
542 | 2.88k | if (_type == FieldType::OLAP_FIELD_TYPE_AGG_STATE) { |
543 | 0 | _result_is_nullable = column.result_is_nullable(); |
544 | 0 | _be_exec_version = column.be_exec_version(); |
545 | 0 | } |
546 | | |
547 | 2.88k | if (column.has_visible()) { |
548 | 998 | _visible = column.visible(); |
549 | 998 | } |
550 | 2.88k | if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) { |
551 | 0 | CHECK(column.children_columns_size() == 1) |
552 | 0 | << "ARRAY type should has 1 children types, but got " |
553 | 0 | << column.children_columns_size(); |
554 | 0 | } |
555 | 2.88k | if (_type == FieldType::OLAP_FIELD_TYPE_MAP) { |
556 | 0 | DCHECK(column.children_columns_size() == 2) |
557 | 0 | << "MAP type should has 2 children types, but got " |
558 | 0 | << column.children_columns_size(); |
559 | 0 | if (UNLIKELY(column.children_columns_size() != 2)) { |
560 | 0 | LOG(WARNING) << "MAP type should has 2 children types, but got " |
561 | 0 | << column.children_columns_size(); |
562 | 0 | } |
563 | 0 | } |
564 | 2.88k | for (size_t i = 0; i < column.children_columns_size(); i++) { |
565 | 0 | TabletColumn child_column; |
566 | 0 | child_column.init_from_pb(column.children_columns(i)); |
567 | 0 | add_sub_column(child_column); |
568 | 0 | } |
569 | 2.88k | if (column.has_column_path_info()) { |
570 | 0 | _column_path = std::make_shared<vectorized::PathInData>(); |
571 | 0 | _column_path->from_protobuf(column.column_path_info()); |
572 | 0 | _parent_col_unique_id = column.column_path_info().parrent_column_unique_id(); |
573 | 0 | } |
574 | 2.88k | if (is_variant_type() && !column.has_column_path_info()) { |
575 | | // set path info for variant root column, to prevent from missing |
576 | 2 | _column_path = std::make_shared<vectorized::PathInData>(_col_name_lower_case); |
577 | 2 | } |
578 | 2.88k | for (auto& column_pb : column.sparse_columns()) { |
579 | 0 | TabletColumn column; |
580 | 0 | column.init_from_pb(column_pb); |
581 | 0 | _sparse_cols.emplace_back(std::make_shared<TabletColumn>(std::move(column))); |
582 | 0 | _num_sparse_columns++; |
583 | 0 | } |
584 | 2.88k | } |
585 | | |
586 | | TabletColumn TabletColumn::create_materialized_variant_column(const std::string& root, |
587 | | const std::vector<std::string>& paths, |
588 | 0 | int32_t parent_unique_id) { |
589 | 0 | TabletColumn subcol; |
590 | 0 | subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT); |
591 | 0 | subcol.set_is_nullable(true); |
592 | 0 | subcol.set_unique_id(-1); |
593 | 0 | subcol.set_parent_unique_id(parent_unique_id); |
594 | 0 | vectorized::PathInData path(root, paths); |
595 | 0 | subcol.set_path_info(path); |
596 | 0 | subcol.set_name(path.get_path()); |
597 | 0 | return subcol; |
598 | 0 | } |
599 | | |
600 | 10.9k | void TabletColumn::to_schema_pb(ColumnPB* column) const { |
601 | 10.9k | column->set_unique_id(_unique_id); |
602 | 10.9k | column->set_name(_col_name); |
603 | 10.9k | column->set_type(get_string_by_field_type(_type)); |
604 | 10.9k | column->set_is_key(_is_key); |
605 | 10.9k | column->set_is_nullable(_is_nullable); |
606 | 10.9k | if (_has_default_value) { |
607 | 153 | column->set_default_value(_default_value); |
608 | 153 | } |
609 | 10.9k | if (_is_decimal) { |
610 | 8.24k | column->set_precision(_precision); |
611 | 8.24k | column->set_frac(_frac); |
612 | 8.24k | } |
613 | 10.9k | column->set_length(_length); |
614 | 10.9k | column->set_index_length(_index_length); |
615 | 10.9k | if (_is_bf_column) { |
616 | 4 | column->set_is_bf_column(_is_bf_column); |
617 | 4 | } |
618 | 10.9k | if (!_aggregation_name.empty()) { |
619 | 8.41k | column->set_aggregation(_aggregation_name); |
620 | 8.41k | } |
621 | 10.9k | column->set_result_is_nullable(_result_is_nullable); |
622 | 10.9k | column->set_be_exec_version(_be_exec_version); |
623 | 10.9k | if (_has_bitmap_index) { |
624 | 0 | column->set_has_bitmap_index(_has_bitmap_index); |
625 | 0 | } |
626 | 10.9k | column->set_visible(_visible); |
627 | | |
628 | 10.9k | if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) { |
629 | 0 | CHECK(_sub_columns.size() == 1) |
630 | 0 | << "ARRAY type should has 1 children types, but got " << _sub_columns.size(); |
631 | 0 | } |
632 | 10.9k | if (_type == FieldType::OLAP_FIELD_TYPE_MAP) { |
633 | 0 | DCHECK(_sub_columns.size() == 2) |
634 | 0 | << "MAP type should has 2 children types, but got " << _sub_columns.size(); |
635 | 0 | if (UNLIKELY(_sub_columns.size() != 2)) { |
636 | 0 | LOG(WARNING) << "MAP type should has 2 children types, but got " << _sub_columns.size(); |
637 | 0 | } |
638 | 0 | } |
639 | | |
640 | 10.9k | for (size_t i = 0; i < _sub_columns.size(); i++) { |
641 | 0 | ColumnPB* child = column->add_children_columns(); |
642 | 0 | _sub_columns[i]->to_schema_pb(child); |
643 | 0 | } |
644 | | |
645 | | // set parts info |
646 | 10.9k | if (has_path_info()) { |
647 | | // CHECK_GT(_parent_col_unique_id, 0); |
648 | 0 | _column_path->to_protobuf(column->mutable_column_path_info(), _parent_col_unique_id); |
649 | | // Update unstable information for variant columns. Some of the fields in the tablet schema |
650 | | // are irrelevant for variant sub-columns, but retaining them may lead to an excessive growth |
651 | | // in the number of tablet schema cache entries. |
652 | 0 | if (_type == FieldType::OLAP_FIELD_TYPE_STRING) { |
653 | 0 | column->set_length(INT_MAX); |
654 | 0 | } |
655 | 0 | column->set_index_length(0); |
656 | 0 | } |
657 | 10.9k | for (auto& col : _sparse_cols) { |
658 | 0 | ColumnPB* sparse_column = column->add_sparse_columns(); |
659 | 0 | col->to_schema_pb(sparse_column); |
660 | 0 | } |
661 | 10.9k | } |
662 | | |
663 | 16 | void TabletColumn::add_sub_column(TabletColumn& sub_column) { |
664 | 16 | _sub_columns.push_back(std::make_shared<TabletColumn>(sub_column)); |
665 | 16 | sub_column._parent_col_unique_id = this->_unique_id; |
666 | 16 | _sub_column_count += 1; |
667 | 16 | } |
668 | | |
669 | 19.7k | bool TabletColumn::is_row_store_column() const { |
670 | 19.7k | return _col_name == BeConsts::ROW_STORE_COL; |
671 | 19.7k | } |
672 | | |
673 | | vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function_union( |
674 | 0 | vectorized::DataTypePtr type, int current_be_exec_version) const { |
675 | 0 | const auto* state_type = assert_cast<const vectorized::DataTypeAggState*>(type.get()); |
676 | 0 | BeExecVersionManager::check_function_compatibility( |
677 | 0 | current_be_exec_version, _be_exec_version, |
678 | 0 | state_type->get_nested_function()->get_name()); |
679 | 0 | return vectorized::AggregateStateUnion::create(state_type->get_nested_function(), {type}, type); |
680 | 0 | } |
681 | | |
682 | | vectorized::AggregateFunctionPtr TabletColumn::get_aggregate_function( |
683 | 24 | std::string suffix, int current_be_exec_version) const { |
684 | 24 | vectorized::AggregateFunctionPtr function = nullptr; |
685 | | |
686 | 24 | auto type = vectorized::DataTypeFactory::instance().create_data_type(*this); |
687 | 24 | if (type && type->get_type_as_type_descriptor().type == PrimitiveType::TYPE_AGG_STATE) { |
688 | 0 | function = get_aggregate_function_union(type, current_be_exec_version); |
689 | 24 | } else { |
690 | 24 | std::string origin_name = TabletColumn::get_string_by_aggregation_type(_aggregation); |
691 | 24 | std::string agg_name = origin_name + suffix; |
692 | 24 | std::transform(agg_name.begin(), agg_name.end(), agg_name.begin(), |
693 | 258 | [](unsigned char c) { return std::tolower(c); }); |
694 | 24 | function = vectorized::AggregateFunctionSimpleFactory::instance().get( |
695 | 24 | agg_name, {type}, type->is_nullable(), BeExecVersionManager::get_newest_version()); |
696 | 24 | if (!function) { |
697 | 0 | LOG(WARNING) << "get column aggregate function failed, aggregation_name=" << origin_name |
698 | 0 | << ", column_type=" << type->get_name(); |
699 | 0 | } |
700 | 24 | } |
701 | 24 | if (function) { |
702 | 24 | function->set_version(_be_exec_version); |
703 | 24 | return function; |
704 | 24 | } |
705 | 0 | return nullptr; |
706 | 24 | } |
707 | | |
708 | 4 | void TabletColumn::set_path_info(const vectorized::PathInData& path) { |
709 | 4 | _column_path = std::make_shared<vectorized::PathInData>(path); |
710 | 4 | } |
711 | | |
712 | 0 | vectorized::DataTypePtr TabletColumn::get_vec_type() const { |
713 | 0 | return vectorized::DataTypeFactory::instance().create_data_type(*this); |
714 | 0 | } |
715 | | |
716 | | // escape '.' and '_' |
717 | 11.4k | std::string escape_for_path_name(const std::string& s) { |
718 | 11.4k | std::string res; |
719 | 11.4k | const char* pos = s.data(); |
720 | 11.4k | const char* end = pos + s.size(); |
721 | 11.5k | while (pos != end) { |
722 | 78 | unsigned char c = *pos; |
723 | 78 | if (c == '.' || c == '_') { |
724 | 12 | res += '%'; |
725 | 12 | res += vectorized::hex_digit_uppercase(c / 16); |
726 | 12 | res += vectorized::hex_digit_uppercase(c % 16); |
727 | 66 | } else { |
728 | 66 | res += c; |
729 | 66 | } |
730 | 78 | ++pos; |
731 | 78 | } |
732 | 11.4k | return res; |
733 | 11.4k | } |
734 | | |
735 | 4 | void TabletIndex::set_escaped_escaped_index_suffix_path(const std::string& path_name) { |
736 | 4 | std::string escaped_path = escape_for_path_name(path_name); |
737 | 4 | _escaped_index_suffix_path = escaped_path; |
738 | 4 | } |
739 | | |
740 | | void TabletIndex::init_from_thrift(const TOlapTableIndex& index, |
741 | 0 | const TabletSchema& tablet_schema) { |
742 | 0 | _index_id = index.index_id; |
743 | 0 | _index_name = index.index_name; |
744 | | // init col_unique_id in index at be side, since col_unique_id may be -1 at fe side |
745 | | // get column unique id by name |
746 | 0 | std::vector<int32_t> col_unique_ids(index.columns.size()); |
747 | 0 | for (size_t i = 0; i < index.columns.size(); i++) { |
748 | 0 | auto column_idx = tablet_schema.field_index(index.columns[i]); |
749 | 0 | if (column_idx >= 0) { |
750 | 0 | col_unique_ids[i] = tablet_schema.column(column_idx).unique_id(); |
751 | 0 | } else { |
752 | | // if column unique id not found by column name, find by column unique id |
753 | | // column unique id can not bigger than tablet schema column size, if bigger than column size means |
754 | | // this column is a new column added by light schema change |
755 | 0 | if (index.__isset.column_unique_ids && |
756 | 0 | index.column_unique_ids[i] < tablet_schema.num_columns()) { |
757 | 0 | col_unique_ids[i] = index.column_unique_ids[i]; |
758 | 0 | } else { |
759 | 0 | col_unique_ids[i] = -1; |
760 | 0 | } |
761 | 0 | } |
762 | 0 | } |
763 | 0 | _col_unique_ids = std::move(col_unique_ids); |
764 | |
|
765 | 0 | switch (index.index_type) { |
766 | 0 | case TIndexType::BITMAP: |
767 | 0 | _index_type = IndexType::BITMAP; |
768 | 0 | break; |
769 | 0 | case TIndexType::INVERTED: |
770 | 0 | _index_type = IndexType::INVERTED; |
771 | 0 | break; |
772 | 0 | case TIndexType::BLOOMFILTER: |
773 | 0 | _index_type = IndexType::BLOOMFILTER; |
774 | 0 | break; |
775 | 0 | case TIndexType::NGRAM_BF: |
776 | 0 | _index_type = IndexType::NGRAM_BF; |
777 | 0 | break; |
778 | 0 | } |
779 | 0 | if (index.__isset.properties) { |
780 | 0 | for (auto kv : index.properties) { |
781 | 0 | _properties[kv.first] = kv.second; |
782 | 0 | } |
783 | 0 | } |
784 | 0 | } |
785 | | |
786 | | void TabletIndex::init_from_thrift(const TOlapTableIndex& index, |
787 | 0 | const std::vector<int32_t>& column_uids) { |
788 | 0 | _index_id = index.index_id; |
789 | 0 | _index_name = index.index_name; |
790 | 0 | _col_unique_ids = column_uids; |
791 | |
|
792 | 0 | switch (index.index_type) { |
793 | 0 | case TIndexType::BITMAP: |
794 | 0 | _index_type = IndexType::BITMAP; |
795 | 0 | break; |
796 | 0 | case TIndexType::INVERTED: |
797 | 0 | _index_type = IndexType::INVERTED; |
798 | 0 | break; |
799 | 0 | case TIndexType::BLOOMFILTER: |
800 | 0 | _index_type = IndexType::BLOOMFILTER; |
801 | 0 | break; |
802 | 0 | case TIndexType::NGRAM_BF: |
803 | 0 | _index_type = IndexType::NGRAM_BF; |
804 | 0 | break; |
805 | 0 | } |
806 | 0 | if (index.__isset.properties) { |
807 | 0 | for (auto kv : index.properties) { |
808 | 0 | _properties[kv.first] = kv.second; |
809 | 0 | } |
810 | 0 | } |
811 | 0 | } |
812 | | |
813 | 68 | void TabletIndex::init_from_pb(const TabletIndexPB& index) { |
814 | 68 | _index_id = index.index_id(); |
815 | 68 | _index_name = index.index_name(); |
816 | 68 | _col_unique_ids.clear(); |
817 | 68 | for (auto col_unique_id : index.col_unique_id()) { |
818 | 57 | _col_unique_ids.push_back(col_unique_id); |
819 | 57 | } |
820 | 68 | _index_type = index.index_type(); |
821 | 68 | for (auto& kv : index.properties()) { |
822 | 15 | _properties[kv.first] = kv.second; |
823 | 15 | } |
824 | 68 | _escaped_index_suffix_path = index.index_suffix_name(); |
825 | 68 | } |
826 | | |
827 | 96 | void TabletIndex::to_schema_pb(TabletIndexPB* index) const { |
828 | 96 | index->set_index_id(_index_id); |
829 | 96 | index->set_index_name(_index_name); |
830 | 96 | index->clear_col_unique_id(); |
831 | 96 | for (auto col_unique_id : _col_unique_ids) { |
832 | 96 | index->add_col_unique_id(col_unique_id); |
833 | 96 | } |
834 | 96 | index->set_index_type(_index_type); |
835 | 96 | for (const auto& kv : _properties) { |
836 | 28 | DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { |
837 | 28 | if (kv.first == INVERTED_INDEX_PARSER_LOWERCASE_KEY) { |
838 | 28 | continue; |
839 | 28 | } |
840 | 28 | }) |
841 | 28 | (*index->mutable_properties())[kv.first] = kv.second; |
842 | 28 | } |
843 | 96 | index->set_index_suffix_name(_escaped_index_suffix_path); |
844 | | |
845 | 96 | DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { return; }) |
846 | | |
847 | | // lowercase by default |
848 | 96 | if (!_properties.empty()) { |
849 | 19 | if (!_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) { |
850 | 10 | (*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] = |
851 | 10 | INVERTED_INDEX_PARSER_TRUE; |
852 | 10 | } |
853 | 19 | } |
854 | 96 | } |
855 | | |
856 | 687 | TabletSchema::TabletSchema() = default; |
857 | | |
858 | 663 | TabletSchema::~TabletSchema() { |
859 | 663 | clear_column_cache_handlers(); |
860 | 663 | } |
861 | | |
862 | 616 | int64_t TabletSchema::get_metadata_size() const { |
863 | 616 | return sizeof(TabletSchema) + _vl_field_mem_size; |
864 | 616 | } |
865 | | |
866 | 45 | void TabletSchema::append_column(TabletColumn column, ColumnType col_type) { |
867 | 45 | if (column.is_key()) { |
868 | 16 | _num_key_columns++; |
869 | 16 | } |
870 | 45 | if (column.is_nullable()) { |
871 | 32 | _num_null_columns++; |
872 | 32 | } |
873 | 45 | if (column.is_variant_type()) { |
874 | 0 | ++_num_variant_columns; |
875 | 0 | if (!column.has_path_info()) { |
876 | 0 | const std::string& col_name = column.name_lower_case(); |
877 | 0 | vectorized::PathInData path(col_name); |
878 | 0 | column.set_path_info(path); |
879 | 0 | } |
880 | 0 | } |
881 | 45 | if (UNLIKELY(column.name() == DELETE_SIGN)) { |
882 | 0 | _delete_sign_idx = _num_columns; |
883 | 45 | } else if (UNLIKELY(column.name() == SEQUENCE_COL)) { |
884 | 4 | _sequence_col_idx = _num_columns; |
885 | 41 | } else if (UNLIKELY(column.name() == VERSION_COL)) { |
886 | 0 | _version_col_idx = _num_columns; |
887 | 41 | } else if (UNLIKELY(column.name() == SKIP_BITMAP_COL)) { |
888 | 0 | _skip_bitmap_col_idx = _num_columns; |
889 | 0 | } |
890 | 45 | _field_id_to_index[column.unique_id()] = _num_columns; |
891 | 45 | _cols.push_back(std::make_shared<TabletColumn>(std::move(column))); |
892 | | // The dropped column may have same name with exsiting column, so that |
893 | | // not add to name to index map, only for uid to index map |
894 | 45 | if (col_type == ColumnType::VARIANT || _cols.back()->is_variant_type() || |
895 | 45 | _cols.back()->is_extracted_column()) { |
896 | 4 | _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); |
897 | 4 | _field_path_to_index[_cols.back()->path_info_ptr().get()] = _num_columns; |
898 | 41 | } else if (col_type == ColumnType::NORMAL) { |
899 | 41 | _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); |
900 | 41 | } |
901 | 45 | _num_columns++; |
902 | 45 | } |
903 | | |
904 | 0 | void TabletColumn::append_sparse_column(TabletColumn column) { |
905 | 0 | _sparse_cols.push_back(std::make_shared<TabletColumn>(column)); |
906 | 0 | _num_sparse_columns++; |
907 | 0 | } |
908 | | |
909 | 4 | void TabletSchema::append_index(TabletIndex&& index) { |
910 | 4 | _indexes.push_back(std::move(index)); |
911 | 4 | } |
912 | | |
913 | | void TabletSchema::update_index(const TabletColumn& col, const IndexType& index_type, |
914 | 0 | TabletIndex&& index) { |
915 | 0 | int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id(); |
916 | 0 | const std::string& suffix_path = escape_for_path_name(col.suffix_path()); |
917 | 0 | for (size_t i = 0; i < _indexes.size(); i++) { |
918 | 0 | for (int32_t id : _indexes[i].col_unique_ids()) { |
919 | 0 | if (_indexes[i].index_type() == index_type && id == col_unique_id && |
920 | 0 | _indexes[i].get_index_suffix() == suffix_path) { |
921 | 0 | _indexes[i] = std::move(index); |
922 | 0 | break; |
923 | 0 | } |
924 | 0 | } |
925 | 0 | } |
926 | 0 | } |
927 | | |
928 | 0 | void TabletSchema::replace_column(size_t pos, TabletColumn new_col) { |
929 | 0 | CHECK_LT(pos, num_columns()) << " outof range"; |
930 | 0 | _cols[pos] = std::make_shared<TabletColumn>(std::move(new_col)); |
931 | 0 | } |
932 | | |
933 | 0 | void TabletSchema::clear_index() { |
934 | 0 | _indexes.clear(); |
935 | 0 | } |
936 | | |
937 | 0 | void TabletSchema::remove_index(int64_t index_id) { |
938 | 0 | std::vector<TabletIndex> indexes; |
939 | 0 | for (auto index : _indexes) { |
940 | 0 | if (index.index_id() == index_id) { |
941 | 0 | continue; |
942 | 0 | } |
943 | 0 | indexes.emplace_back(std::move(index)); |
944 | 0 | } |
945 | 0 | _indexes = std::move(indexes); |
946 | 0 | } |
947 | | |
948 | 0 | void TabletSchema::clear_columns() { |
949 | 0 | _field_path_to_index.clear(); |
950 | 0 | _field_name_to_index.clear(); |
951 | 0 | _field_id_to_index.clear(); |
952 | 0 | _num_columns = 0; |
953 | 0 | _num_variant_columns = 0; |
954 | 0 | _num_null_columns = 0; |
955 | 0 | _num_key_columns = 0; |
956 | 0 | _cols.clear(); |
957 | 0 | clear_column_cache_handlers(); |
958 | 0 | } |
959 | | |
960 | 1.27k | void TabletSchema::clear_column_cache_handlers() { |
961 | 1.27k | for (auto* cache_handle : _column_cache_handlers) { |
962 | 0 | TabletColumnObjectPool::instance()->release(cache_handle); |
963 | 0 | } |
964 | 1.27k | _column_cache_handlers.clear(); |
965 | 1.27k | } |
966 | | |
967 | | void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns, |
968 | 616 | bool reuse_cache_column) { |
969 | 616 | _keys_type = schema.keys_type(); |
970 | 616 | _num_columns = 0; |
971 | 616 | _num_variant_columns = 0; |
972 | 616 | _num_key_columns = 0; |
973 | 616 | _num_null_columns = 0; |
974 | 616 | _cols.clear(); |
975 | 616 | _indexes.clear(); |
976 | 616 | _field_name_to_index.clear(); |
977 | 616 | _field_id_to_index.clear(); |
978 | 616 | _cluster_key_idxes.clear(); |
979 | 616 | clear_column_cache_handlers(); |
980 | 616 | for (const auto& i : schema.cluster_key_idxes()) { |
981 | 6 | _cluster_key_idxes.push_back(i); |
982 | 6 | } |
983 | 2.95k | for (auto& column_pb : schema.column()) { |
984 | 2.95k | TabletColumnPtr column; |
985 | 2.95k | if (reuse_cache_column) { |
986 | 124 | auto pair = TabletColumnObjectPool::instance()->insert( |
987 | 124 | deterministic_string_serialize(column_pb)); |
988 | 124 | column = pair.second; |
989 | 124 | _column_cache_handlers.push_back(pair.first); |
990 | 2.83k | } else { |
991 | 2.83k | column = std::make_shared<TabletColumn>(); |
992 | 2.83k | column->init_from_pb(column_pb); |
993 | 2.83k | } |
994 | 2.95k | if (ignore_extracted_columns && column->is_extracted_column()) { |
995 | 0 | continue; |
996 | 0 | } |
997 | 2.95k | if (column->is_key()) { |
998 | 1.41k | _num_key_columns++; |
999 | 1.41k | } |
1000 | 2.95k | if (column->is_nullable()) { |
1001 | 145 | _num_null_columns++; |
1002 | 145 | } |
1003 | 2.95k | if (column->is_variant_type()) { |
1004 | 2 | ++_num_variant_columns; |
1005 | 2 | } |
1006 | | |
1007 | 2.95k | _cols.emplace_back(std::move(column)); |
1008 | 2.95k | if (!_cols.back()->is_extracted_column()) { |
1009 | 2.95k | _vl_field_mem_size += |
1010 | 2.95k | sizeof(StringRef) + sizeof(char) * _cols.back()->name().size() + sizeof(size_t); |
1011 | 2.95k | _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); |
1012 | 2.95k | _vl_field_mem_size += sizeof(int32_t) * 2; |
1013 | 2.95k | _field_id_to_index[_cols.back()->unique_id()] = _num_columns; |
1014 | 2.95k | } |
1015 | 2.95k | _num_columns++; |
1016 | 2.95k | } |
1017 | 616 | for (auto& index_pb : schema.index()) { |
1018 | 56 | TabletIndex index; |
1019 | 56 | index.init_from_pb(index_pb); |
1020 | 56 | _indexes.emplace_back(std::move(index)); |
1021 | 56 | } |
1022 | 616 | _num_short_key_columns = schema.num_short_key_columns(); |
1023 | 616 | _num_rows_per_row_block = schema.num_rows_per_row_block(); |
1024 | 616 | _compress_kind = schema.compress_kind(); |
1025 | 616 | _next_column_unique_id = schema.next_column_unique_id(); |
1026 | 616 | if (schema.has_bf_fpp()) { |
1027 | 0 | _has_bf_fpp = true; |
1028 | 0 | _bf_fpp = schema.bf_fpp(); |
1029 | 616 | } else { |
1030 | 616 | _has_bf_fpp = false; |
1031 | 616 | _bf_fpp = BLOOM_FILTER_DEFAULT_FPP; |
1032 | 616 | } |
1033 | 616 | _is_in_memory = schema.is_in_memory(); |
1034 | 616 | _disable_auto_compaction = schema.disable_auto_compaction(); |
1035 | 616 | _enable_single_replica_compaction = schema.enable_single_replica_compaction(); |
1036 | 616 | _store_row_column = schema.store_row_column(); |
1037 | 616 | _skip_write_index_on_load = schema.skip_write_index_on_load(); |
1038 | 616 | _delete_sign_idx = schema.delete_sign_idx(); |
1039 | 616 | _sequence_col_idx = schema.sequence_col_idx(); |
1040 | 616 | _version_col_idx = schema.version_col_idx(); |
1041 | 616 | _skip_bitmap_col_idx = schema.skip_bitmap_col_idx(); |
1042 | 616 | _sort_type = schema.sort_type(); |
1043 | 616 | _sort_col_num = schema.sort_col_num(); |
1044 | 616 | _compression_type = schema.compression_type(); |
1045 | 616 | _row_store_page_size = schema.row_store_page_size(); |
1046 | 616 | _storage_page_size = schema.storage_page_size(); |
1047 | 616 | _schema_version = schema.schema_version(); |
1048 | | // Default to V1 inverted index storage format for backward compatibility if not specified in schema. |
1049 | 616 | if (!schema.has_inverted_index_storage_format()) { |
1050 | 122 | _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; |
1051 | 494 | } else { |
1052 | 494 | _inverted_index_storage_format = schema.inverted_index_storage_format(); |
1053 | 494 | } |
1054 | | |
1055 | 616 | _row_store_column_unique_ids.assign(schema.row_store_column_unique_ids().begin(), |
1056 | 616 | schema.row_store_column_unique_ids().end()); |
1057 | 616 | _variant_enable_flatten_nested = schema.variant_enable_flatten_nested(); |
1058 | 616 | _vl_field_mem_size += _row_store_column_unique_ids.capacity() * sizeof(int32_t); |
1059 | 616 | update_metadata_size(); |
1060 | 616 | } |
1061 | | |
1062 | 166 | void TabletSchema::copy_from(const TabletSchema& tablet_schema) { |
1063 | 166 | TabletSchemaPB tablet_schema_pb; |
1064 | 166 | tablet_schema.to_schema_pb(&tablet_schema_pb); |
1065 | 166 | init_from_pb(tablet_schema_pb); |
1066 | 166 | _table_id = tablet_schema.table_id(); |
1067 | 166 | } |
1068 | | |
1069 | 0 | void TabletSchema::update_index_info_from(const TabletSchema& tablet_schema) { |
1070 | 0 | for (auto& col : _cols) { |
1071 | 0 | if (col->unique_id() < 0) { |
1072 | 0 | continue; |
1073 | 0 | } |
1074 | 0 | const auto iter = tablet_schema._field_id_to_index.find(col->unique_id()); |
1075 | 0 | if (iter == tablet_schema._field_id_to_index.end()) { |
1076 | 0 | continue; |
1077 | 0 | } |
1078 | 0 | auto col_idx = iter->second; |
1079 | 0 | if (col_idx < 0 || col_idx >= tablet_schema._cols.size()) { |
1080 | 0 | continue; |
1081 | 0 | } |
1082 | 0 | col->set_is_bf_column(tablet_schema._cols[col_idx]->is_bf_column()); |
1083 | 0 | col->set_has_bitmap_index(tablet_schema._cols[col_idx]->has_bitmap_index()); |
1084 | 0 | } |
1085 | 0 | } |
1086 | | |
1087 | 1.19k | std::string TabletSchema::to_key() const { |
1088 | 1.19k | TabletSchemaPB pb; |
1089 | 1.19k | to_schema_pb(&pb); |
1090 | 1.19k | return TabletSchema::deterministic_string_serialize(pb); |
1091 | 1.19k | } |
1092 | | |
1093 | | void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version, |
1094 | | const OlapTableIndexSchema* index, |
1095 | 0 | const TabletSchema& ori_tablet_schema) { |
1096 | | // copy from ori_tablet_schema |
1097 | 0 | _keys_type = ori_tablet_schema.keys_type(); |
1098 | 0 | _num_short_key_columns = ori_tablet_schema.num_short_key_columns(); |
1099 | 0 | _num_rows_per_row_block = ori_tablet_schema.num_rows_per_row_block(); |
1100 | 0 | _compress_kind = ori_tablet_schema.compress_kind(); |
1101 | | |
1102 | | // todo(yixiu): unique_id |
1103 | 0 | _next_column_unique_id = ori_tablet_schema.next_column_unique_id(); |
1104 | 0 | _is_in_memory = ori_tablet_schema.is_in_memory(); |
1105 | 0 | _disable_auto_compaction = ori_tablet_schema.disable_auto_compaction(); |
1106 | 0 | _enable_single_replica_compaction = ori_tablet_schema.enable_single_replica_compaction(); |
1107 | 0 | _skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load(); |
1108 | 0 | _sort_type = ori_tablet_schema.sort_type(); |
1109 | 0 | _sort_col_num = ori_tablet_schema.sort_col_num(); |
1110 | 0 | _row_store_page_size = ori_tablet_schema.row_store_page_size(); |
1111 | 0 | _storage_page_size = ori_tablet_schema.storage_page_size(); |
1112 | 0 | _variant_enable_flatten_nested = ori_tablet_schema.variant_flatten_nested(); |
1113 | | |
1114 | | // copy from table_schema_param |
1115 | 0 | _schema_version = version; |
1116 | 0 | _num_columns = 0; |
1117 | 0 | _num_variant_columns = 0; |
1118 | 0 | _num_key_columns = 0; |
1119 | 0 | _num_null_columns = 0; |
1120 | 0 | bool has_bf_columns = false; |
1121 | 0 | _cols.clear(); |
1122 | 0 | _indexes.clear(); |
1123 | 0 | _field_name_to_index.clear(); |
1124 | 0 | _field_id_to_index.clear(); |
1125 | 0 | _delete_sign_idx = -1; |
1126 | 0 | _sequence_col_idx = -1; |
1127 | 0 | _version_col_idx = -1; |
1128 | 0 | _skip_bitmap_col_idx = -1; |
1129 | 0 | _cluster_key_idxes.clear(); |
1130 | 0 | clear_column_cache_handlers(); |
1131 | 0 | for (const auto& i : ori_tablet_schema._cluster_key_idxes) { |
1132 | 0 | _cluster_key_idxes.push_back(i); |
1133 | 0 | } |
1134 | 0 | for (auto& column : index->columns) { |
1135 | 0 | if (column->is_key()) { |
1136 | 0 | _num_key_columns++; |
1137 | 0 | } |
1138 | 0 | if (column->is_nullable()) { |
1139 | 0 | _num_null_columns++; |
1140 | 0 | } |
1141 | 0 | if (column->is_bf_column()) { |
1142 | 0 | has_bf_columns = true; |
1143 | 0 | } |
1144 | 0 | if (column->is_variant_type()) { |
1145 | 0 | ++_num_variant_columns; |
1146 | 0 | } |
1147 | 0 | if (UNLIKELY(column->name() == DELETE_SIGN)) { |
1148 | 0 | _delete_sign_idx = _num_columns; |
1149 | 0 | } else if (UNLIKELY(column->name() == SEQUENCE_COL)) { |
1150 | 0 | _sequence_col_idx = _num_columns; |
1151 | 0 | } else if (UNLIKELY(column->name() == VERSION_COL)) { |
1152 | 0 | _version_col_idx = _num_columns; |
1153 | 0 | } else if (UNLIKELY(column->name() == SKIP_BITMAP_COL)) { |
1154 | 0 | _skip_bitmap_col_idx = _num_columns; |
1155 | 0 | } |
1156 | 0 | _cols.emplace_back(std::make_shared<TabletColumn>(*column)); |
1157 | 0 | _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); |
1158 | 0 | _field_id_to_index[_cols.back()->unique_id()] = _num_columns; |
1159 | 0 | _num_columns++; |
1160 | 0 | } |
1161 | |
|
1162 | 0 | for (auto& i : index->indexes) { |
1163 | 0 | _indexes.emplace_back(*i); |
1164 | 0 | } |
1165 | |
|
1166 | 0 | if (has_bf_columns) { |
1167 | 0 | _has_bf_fpp = true; |
1168 | 0 | _bf_fpp = ori_tablet_schema.bloom_filter_fpp(); |
1169 | 0 | } else { |
1170 | 0 | _has_bf_fpp = false; |
1171 | 0 | _bf_fpp = BLOOM_FILTER_DEFAULT_FPP; |
1172 | 0 | } |
1173 | 0 | } |
1174 | | |
1175 | 77 | void TabletSchema::merge_dropped_columns(const TabletSchema& src_schema) { |
1176 | | // If they are the same tablet schema object, then just return |
1177 | 77 | if (this == &src_schema) { |
1178 | 0 | return; |
1179 | 0 | } |
1180 | 206 | for (const auto& src_col : src_schema.columns()) { |
1181 | 206 | if (_field_id_to_index.find(src_col->unique_id()) == _field_id_to_index.end()) { |
1182 | 0 | CHECK(!src_col->is_key()) |
1183 | 0 | << src_col->name() << " is key column, should not be dropped."; |
1184 | 0 | ColumnPB src_col_pb; |
1185 | | // There are some pointer in tablet column, not sure the reference relation, so |
1186 | | // that deep copy it. |
1187 | 0 | src_col->to_schema_pb(&src_col_pb); |
1188 | 0 | TabletColumn new_col(src_col_pb); |
1189 | 0 | append_column(new_col, TabletSchema::ColumnType::DROPPED); |
1190 | 0 | } |
1191 | 206 | } |
1192 | 77 | } |
1193 | | |
1194 | 0 | TabletSchemaSPtr TabletSchema::copy_without_variant_extracted_columns() { |
1195 | 0 | TabletSchemaSPtr copy = std::make_shared<TabletSchema>(); |
1196 | 0 | TabletSchemaPB tablet_schema_pb; |
1197 | 0 | this->to_schema_pb(&tablet_schema_pb); |
1198 | 0 | copy->init_from_pb(tablet_schema_pb, true /*ignore extracted_columns*/); |
1199 | 0 | return copy; |
1200 | 0 | } |
1201 | | |
1202 | | // Dropped column is in _field_id_to_index but not in _field_name_to_index |
1203 | | // Could refer to append_column method |
1204 | 4.64k | bool TabletSchema::is_dropped_column(const TabletColumn& col) const { |
1205 | 4.64k | CHECK(_field_id_to_index.find(col.unique_id()) != _field_id_to_index.end()) |
1206 | 0 | << "could not find col with unique id = " << col.unique_id() |
1207 | 0 | << " and name = " << col.name() << " table_id=" << _table_id; |
1208 | 4.64k | auto it = _field_name_to_index.find(StringRef {col.name()}); |
1209 | 4.64k | return it == _field_name_to_index.end() || _cols[it->second]->unique_id() != col.unique_id(); |
1210 | 4.64k | } |
1211 | | |
1212 | 0 | void TabletSchema::copy_extracted_columns(const TabletSchema& src_schema) { |
1213 | 0 | std::unordered_set<int32_t> variant_columns; |
1214 | 0 | for (const auto& col : columns()) { |
1215 | 0 | if (col->is_variant_type()) { |
1216 | 0 | variant_columns.insert(col->unique_id()); |
1217 | 0 | } |
1218 | 0 | } |
1219 | 0 | for (const TabletColumnPtr& col : src_schema.columns()) { |
1220 | 0 | if (col->is_extracted_column() && variant_columns.contains(col->parent_unique_id())) { |
1221 | 0 | ColumnPB col_pb; |
1222 | 0 | col->to_schema_pb(&col_pb); |
1223 | 0 | TabletColumn new_col(col_pb); |
1224 | 0 | append_column(new_col, ColumnType::VARIANT); |
1225 | 0 | } |
1226 | 0 | } |
1227 | 0 | } |
1228 | | |
1229 | 0 | void TabletSchema::reserve_extracted_columns() { |
1230 | 0 | for (auto it = _cols.begin(); it != _cols.end();) { |
1231 | 0 | if (!(*it)->is_extracted_column()) { |
1232 | 0 | it = _cols.erase(it); |
1233 | 0 | } else { |
1234 | 0 | ++it; |
1235 | 0 | } |
1236 | 0 | } |
1237 | 0 | } |
1238 | | |
1239 | 1.90k | void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const { |
1240 | 1.90k | for (const auto& i : _cluster_key_idxes) { |
1241 | 28 | tablet_schema_pb->add_cluster_key_idxes(i); |
1242 | 28 | } |
1243 | 1.90k | tablet_schema_pb->set_keys_type(_keys_type); |
1244 | 10.9k | for (const auto& col : _cols) { |
1245 | 10.9k | ColumnPB* column = tablet_schema_pb->add_column(); |
1246 | 10.9k | col->to_schema_pb(column); |
1247 | 10.9k | } |
1248 | 1.90k | for (const auto& index : _indexes) { |
1249 | 96 | auto* index_pb = tablet_schema_pb->add_index(); |
1250 | 96 | index.to_schema_pb(index_pb); |
1251 | 96 | } |
1252 | 1.90k | tablet_schema_pb->set_num_short_key_columns(_num_short_key_columns); |
1253 | 1.90k | tablet_schema_pb->set_num_rows_per_row_block(_num_rows_per_row_block); |
1254 | 1.90k | tablet_schema_pb->set_compress_kind(_compress_kind); |
1255 | 1.90k | if (_has_bf_fpp) { |
1256 | 0 | tablet_schema_pb->set_bf_fpp(_bf_fpp); |
1257 | 0 | } |
1258 | 1.90k | tablet_schema_pb->set_next_column_unique_id(_next_column_unique_id); |
1259 | 1.90k | tablet_schema_pb->set_is_in_memory(_is_in_memory); |
1260 | 1.90k | tablet_schema_pb->set_disable_auto_compaction(_disable_auto_compaction); |
1261 | 1.90k | tablet_schema_pb->set_enable_single_replica_compaction(_enable_single_replica_compaction); |
1262 | 1.90k | tablet_schema_pb->set_store_row_column(_store_row_column); |
1263 | 1.90k | tablet_schema_pb->set_skip_write_index_on_load(_skip_write_index_on_load); |
1264 | 1.90k | tablet_schema_pb->set_delete_sign_idx(_delete_sign_idx); |
1265 | 1.90k | tablet_schema_pb->set_sequence_col_idx(_sequence_col_idx); |
1266 | 1.90k | tablet_schema_pb->set_sort_type(_sort_type); |
1267 | 1.90k | tablet_schema_pb->set_sort_col_num(_sort_col_num); |
1268 | 1.90k | tablet_schema_pb->set_schema_version(_schema_version); |
1269 | 1.90k | tablet_schema_pb->set_compression_type(_compression_type); |
1270 | 1.90k | tablet_schema_pb->set_row_store_page_size(_row_store_page_size); |
1271 | 1.90k | tablet_schema_pb->set_storage_page_size(_storage_page_size); |
1272 | 1.90k | tablet_schema_pb->set_version_col_idx(_version_col_idx); |
1273 | 1.90k | tablet_schema_pb->set_skip_bitmap_col_idx(_skip_bitmap_col_idx); |
1274 | 1.90k | tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format); |
1275 | 1.90k | tablet_schema_pb->mutable_row_store_column_unique_ids()->Assign( |
1276 | 1.90k | _row_store_column_unique_ids.begin(), _row_store_column_unique_ids.end()); |
1277 | 1.90k | tablet_schema_pb->set_variant_enable_flatten_nested(_variant_enable_flatten_nested); |
1278 | 1.90k | } |
1279 | | |
1280 | 0 | size_t TabletSchema::row_size() const { |
1281 | 0 | size_t size = 0; |
1282 | 0 | for (const auto& column : _cols) { |
1283 | 0 | size += column->length(); |
1284 | 0 | } |
1285 | 0 | size += (_num_columns + 7) / 8; |
1286 | |
|
1287 | 0 | return size; |
1288 | 0 | } |
1289 | | |
1290 | 1.14k | int32_t TabletSchema::field_index(const std::string& field_name) const { |
1291 | 1.14k | const auto& found = _field_name_to_index.find(StringRef(field_name)); |
1292 | 1.14k | return (found == _field_name_to_index.end()) ? -1 : found->second; |
1293 | 1.14k | } |
1294 | | |
1295 | 0 | int32_t TabletSchema::field_index(const vectorized::PathInData& path) const { |
1296 | 0 | const auto& found = _field_path_to_index.find(vectorized::PathInDataRef(&path)); |
1297 | 0 | return (found == _field_path_to_index.end()) ? -1 : found->second; |
1298 | 0 | } |
1299 | | |
1300 | 106 | int32_t TabletSchema::field_index(int32_t col_unique_id) const { |
1301 | 106 | const auto& found = _field_id_to_index.find(col_unique_id); |
1302 | 106 | return (found == _field_id_to_index.end()) ? -1 : found->second; |
1303 | 106 | } |
1304 | | |
1305 | 21.2k | const std::vector<TabletColumnPtr>& TabletSchema::columns() const { |
1306 | 21.2k | return _cols; |
1307 | 21.2k | } |
1308 | | |
1309 | 0 | const std::vector<TabletColumnPtr>& TabletColumn::sparse_columns() const { |
1310 | 0 | return _sparse_cols; |
1311 | 0 | } |
1312 | | |
1313 | 97.9k | const TabletColumn& TabletSchema::column(size_t ordinal) const { |
1314 | 97.9k | DCHECK(ordinal < _num_columns) << "ordinal:" << ordinal << ", _num_columns:" << _num_columns; |
1315 | 97.9k | return *_cols[ordinal]; |
1316 | 97.9k | } |
1317 | | |
1318 | 0 | const TabletColumn& TabletColumn::sparse_column_at(size_t ordinal) const { |
1319 | 0 | DCHECK(ordinal < _sparse_cols.size()) |
1320 | 0 | << "ordinal:" << ordinal << ", _num_columns:" << _sparse_cols.size(); |
1321 | 0 | return *_sparse_cols[ordinal]; |
1322 | 0 | } |
1323 | | |
1324 | 133 | const TabletColumn& TabletSchema::column_by_uid(int32_t col_unique_id) const { |
1325 | 133 | return *_cols.at(_field_id_to_index.at(col_unique_id)); |
1326 | 133 | } |
1327 | | |
1328 | 0 | TabletColumn& TabletSchema::mutable_column_by_uid(int32_t col_unique_id) { |
1329 | 0 | return *_cols.at(_field_id_to_index.at(col_unique_id)); |
1330 | 0 | } |
1331 | | |
1332 | 7 | TabletColumn& TabletSchema::mutable_column(size_t ordinal) { |
1333 | 7 | return *_cols.at(ordinal); |
1334 | 7 | } |
1335 | | |
1336 | 0 | void TabletSchema::update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>& tindexes) { |
1337 | 0 | std::vector<TabletIndex> indexes; |
1338 | 0 | for (auto& tindex : tindexes) { |
1339 | 0 | TabletIndex index; |
1340 | 0 | index.init_from_thrift(tindex, *this); |
1341 | 0 | indexes.emplace_back(std::move(index)); |
1342 | 0 | } |
1343 | 0 | _indexes = std::move(indexes); |
1344 | 0 | } |
1345 | | |
1346 | 0 | bool TabletSchema::exist_column(const std::string& field_name) const { |
1347 | 0 | return _field_name_to_index.contains(StringRef {field_name}); |
1348 | 0 | } |
1349 | | |
1350 | 0 | Status TabletSchema::have_column(const std::string& field_name) const { |
1351 | 0 | if (!_field_name_to_index.contains(StringRef(field_name))) { |
1352 | 0 | return Status::Error<ErrorCode::INTERNAL_ERROR>( |
1353 | 0 | "Not found field_name, field_name:{}, schema:{}", field_name, |
1354 | 0 | get_all_field_names()); |
1355 | 0 | } |
1356 | 0 | return Status::OK(); |
1357 | 0 | } |
1358 | | |
1359 | 100 | Result<const TabletColumn*> TabletSchema::column(const std::string& field_name) const { |
1360 | 100 | auto it = _field_name_to_index.find(StringRef {field_name}); |
1361 | 100 | if (it == _field_name_to_index.end()) { |
1362 | 0 | DCHECK(false) << "field_name=" << field_name << ", table_id=" << _table_id |
1363 | 0 | << ", field_name_to_index=" << get_all_field_names(); |
1364 | 0 | return ResultError( |
1365 | 0 | Status::InternalError("column not found, name={}, table_id={}, schema_version={}", |
1366 | 0 | field_name, _table_id, _schema_version)); |
1367 | 0 | } |
1368 | 100 | return _cols[it->second].get(); |
1369 | 100 | } |
1370 | | |
1371 | | void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema, |
1372 | 0 | const std::vector<TColumn>& t_columns) { |
1373 | 0 | copy_from(tablet_schema); |
1374 | 0 | if (!t_columns.empty() && t_columns[0].col_unique_id >= 0) { |
1375 | 0 | clear_columns(); |
1376 | 0 | for (const auto& column : t_columns) { |
1377 | 0 | append_column(TabletColumn(column)); |
1378 | 0 | } |
1379 | 0 | } |
1380 | 0 | } |
1381 | | |
1382 | 0 | bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const { |
1383 | 0 | for (size_t i = 0; i < _indexes.size(); i++) { |
1384 | 0 | if (_indexes[i].index_type() == IndexType::INVERTED && _indexes[i].index_id() == index_id) { |
1385 | 0 | return true; |
1386 | 0 | } |
1387 | 0 | } |
1388 | 0 | return false; |
1389 | 0 | } |
1390 | | |
1391 | | const TabletIndex* TabletSchema::inverted_index(int32_t col_unique_id, |
1392 | 20.9k | const std::string& suffix_path) const { |
1393 | 21.7k | for (size_t i = 0; i < _indexes.size(); i++) { |
1394 | 1.43k | if (_indexes[i].index_type() == IndexType::INVERTED) { |
1395 | 1.42k | for (int32_t id : _indexes[i].col_unique_ids()) { |
1396 | 1.42k | if (id == col_unique_id && |
1397 | 1.42k | _indexes[i].get_index_suffix() == escape_for_path_name(suffix_path)) { |
1398 | 561 | return &(_indexes[i]); |
1399 | 561 | } |
1400 | 1.42k | } |
1401 | 1.42k | } |
1402 | 1.43k | } |
1403 | 20.3k | return nullptr; |
1404 | 20.9k | } |
1405 | | |
1406 | 10.8k | const TabletIndex* TabletSchema::inverted_index(const TabletColumn& col) const { |
1407 | | // Some columns(Float, Double, JSONB ...) from the variant do not support inverted index |
1408 | 10.8k | if (!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) { |
1409 | 2 | return nullptr; |
1410 | 2 | } |
1411 | | // TODO use more efficient impl |
1412 | | // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants |
1413 | 10.8k | int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id(); |
1414 | 10.8k | return inverted_index(col_unique_id, escape_for_path_name(col.suffix_path())); |
1415 | 10.8k | } |
1416 | | |
1417 | 0 | bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const { |
1418 | | // TODO use more efficient impl |
1419 | 0 | for (size_t i = 0; i < _indexes.size(); i++) { |
1420 | 0 | if (_indexes[i].index_type() == IndexType::NGRAM_BF) { |
1421 | 0 | for (int32_t id : _indexes[i].col_unique_ids()) { |
1422 | 0 | if (id == col_unique_id) { |
1423 | 0 | return true; |
1424 | 0 | } |
1425 | 0 | } |
1426 | 0 | } |
1427 | 0 | } |
1428 | | |
1429 | 0 | return false; |
1430 | 0 | } |
1431 | | |
1432 | 10.4k | const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id) const { |
1433 | | // TODO use more efficient impl |
1434 | 11.1k | for (size_t i = 0; i < _indexes.size(); i++) { |
1435 | 704 | if (_indexes[i].index_type() == IndexType::NGRAM_BF) { |
1436 | 0 | for (int32_t id : _indexes[i].col_unique_ids()) { |
1437 | 0 | if (id == col_unique_id) { |
1438 | 0 | return &(_indexes[i]); |
1439 | 0 | } |
1440 | 0 | } |
1441 | 0 | } |
1442 | 704 | } |
1443 | 10.4k | return nullptr; |
1444 | 10.4k | } |
1445 | | |
1446 | | vectorized::Block TabletSchema::create_block( |
1447 | | const std::vector<uint32_t>& return_columns, |
1448 | 516 | const std::unordered_set<uint32_t>* tablet_columns_need_convert_null) const { |
1449 | 516 | vectorized::Block block; |
1450 | 1.82k | for (int i = 0; i < return_columns.size(); ++i) { |
1451 | 1.31k | const auto& col = *_cols[return_columns[i]]; |
1452 | 1.31k | bool is_nullable = (tablet_columns_need_convert_null != nullptr && |
1453 | 1.31k | tablet_columns_need_convert_null->find(return_columns[i]) != |
1454 | 0 | tablet_columns_need_convert_null->end()); |
1455 | 1.31k | auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col, is_nullable); |
1456 | 1.31k | auto column = data_type->create_column(); |
1457 | 1.31k | block.insert({std::move(column), data_type, col.name()}); |
1458 | 1.31k | } |
1459 | 516 | return block; |
1460 | 516 | } |
1461 | | |
1462 | 1.74k | vectorized::Block TabletSchema::create_block(bool ignore_dropped_col) const { |
1463 | 1.74k | vectorized::Block block; |
1464 | 4.64k | for (const auto& col : _cols) { |
1465 | 4.64k | if (ignore_dropped_col && is_dropped_column(*col)) { |
1466 | 0 | continue; |
1467 | 0 | } |
1468 | 4.64k | auto data_type = vectorized::DataTypeFactory::instance().create_data_type(*col); |
1469 | 4.64k | block.insert({data_type->create_column(), data_type, col->name()}); |
1470 | 4.64k | } |
1471 | 1.74k | return block; |
1472 | 1.74k | } |
1473 | | |
1474 | 0 | vectorized::Block TabletSchema::create_block_by_cids(const std::vector<uint32_t>& cids) const { |
1475 | 0 | vectorized::Block block; |
1476 | 0 | for (const auto& cid : cids) { |
1477 | 0 | const auto& col = *_cols[cid]; |
1478 | 0 | auto data_type = vectorized::DataTypeFactory::instance().create_data_type(col); |
1479 | 0 | block.insert({data_type->create_column(), data_type, col.name()}); |
1480 | 0 | } |
1481 | 0 | return block; |
1482 | 0 | } |
1483 | | |
1484 | 0 | bool operator==(const TabletColumn& a, const TabletColumn& b) { |
1485 | 0 | if (a._unique_id != b._unique_id) return false; |
1486 | 0 | if (a._col_name != b._col_name) return false; |
1487 | 0 | if (a._type != b._type) return false; |
1488 | 0 | if (a._is_key != b._is_key) return false; |
1489 | 0 | if (a._aggregation != b._aggregation) return false; |
1490 | 0 | if (a._is_nullable != b._is_nullable) return false; |
1491 | 0 | if (a._has_default_value != b._has_default_value) return false; |
1492 | 0 | if (a._has_default_value) { |
1493 | 0 | if (a._default_value != b._default_value) return false; |
1494 | 0 | } |
1495 | 0 | if (a._is_decimal != b._is_decimal) return false; |
1496 | 0 | if (a._is_decimal) { |
1497 | 0 | if (a._precision != b._precision) return false; |
1498 | 0 | if (a._frac != b._frac) return false; |
1499 | 0 | } |
1500 | 0 | if (a._length != b._length) return false; |
1501 | 0 | if (a._index_length != b._index_length) return false; |
1502 | 0 | if (a._is_bf_column != b._is_bf_column) return false; |
1503 | 0 | if (a._has_bitmap_index != b._has_bitmap_index) return false; |
1504 | 0 | if (a._column_path == nullptr && a._column_path != nullptr) return false; |
1505 | 0 | if (b._column_path == nullptr && a._column_path != nullptr) return false; |
1506 | 0 | if (b._column_path != nullptr && a._column_path != nullptr && |
1507 | 0 | *a._column_path != *b._column_path) |
1508 | 0 | return false; |
1509 | 0 | return true; |
1510 | 0 | } |
1511 | | |
1512 | 0 | bool operator!=(const TabletColumn& a, const TabletColumn& b) { |
1513 | 0 | return !(a == b); |
1514 | 0 | } |
1515 | | |
1516 | 1 | bool operator==(const TabletSchema& a, const TabletSchema& b) { |
1517 | 1 | if (a._keys_type != b._keys_type) return false; |
1518 | 1 | if (a._cols.size() != b._cols.size()) return false; |
1519 | 1 | for (int i = 0; i < a._cols.size(); ++i) { |
1520 | 0 | if (*a._cols[i] != *b._cols[i]) return false; |
1521 | 0 | } |
1522 | 1 | if (a._num_columns != b._num_columns) return false; |
1523 | 1 | if (a._num_key_columns != b._num_key_columns) return false; |
1524 | 1 | if (a._num_null_columns != b._num_null_columns) return false; |
1525 | 1 | if (a._num_short_key_columns != b._num_short_key_columns) return false; |
1526 | 1 | if (a._num_rows_per_row_block != b._num_rows_per_row_block) return false; |
1527 | 1 | if (a._compress_kind != b._compress_kind) return false; |
1528 | 1 | if (a._next_column_unique_id != b._next_column_unique_id) return false; |
1529 | 1 | if (a._has_bf_fpp != b._has_bf_fpp) return false; |
1530 | 1 | if (a._has_bf_fpp) { |
1531 | 0 | if (std::abs(a._bf_fpp - b._bf_fpp) > 1e-6) return false; |
1532 | 0 | } |
1533 | 1 | if (a._is_in_memory != b._is_in_memory) return false; |
1534 | 1 | if (a._delete_sign_idx != b._delete_sign_idx) return false; |
1535 | 1 | if (a._disable_auto_compaction != b._disable_auto_compaction) return false; |
1536 | 1 | if (a._enable_single_replica_compaction != b._enable_single_replica_compaction) return false; |
1537 | 1 | if (a._store_row_column != b._store_row_column) return false; |
1538 | 1 | if (a._row_store_page_size != b._row_store_page_size) return false; |
1539 | 1 | if (a._storage_page_size != b._storage_page_size) return false; |
1540 | 1 | if (a._skip_write_index_on_load != b._skip_write_index_on_load) return false; |
1541 | 1 | if (a._variant_enable_flatten_nested != b._variant_enable_flatten_nested) return false; |
1542 | 1 | return true; |
1543 | 1 | } |
1544 | | |
1545 | 1 | bool operator!=(const TabletSchema& a, const TabletSchema& b) { |
1546 | 1 | return !(a == b); |
1547 | 1 | } |
1548 | | |
1549 | | } // namespace doris |