be/src/storage/tablet/tablet_schema.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "storage/tablet/tablet_schema.h" |
19 | | |
20 | | #include <gen_cpp/Descriptors_types.h> |
21 | | #include <gen_cpp/olap_file.pb.h> |
22 | | #include <glog/logging.h> |
23 | | #include <google/protobuf/io/coded_stream.h> |
24 | | #include <google/protobuf/io/zero_copy_stream.h> |
25 | | #include <google/protobuf/io/zero_copy_stream_impl_lite.h> |
26 | | |
27 | | #include <algorithm> |
28 | | #include <cctype> |
29 | | // IWYU pragma: no_include <bits/std_abs.h> |
30 | | #include <cmath> // IWYU pragma: keep |
31 | | #include <memory> |
32 | | #include <ostream> |
33 | | #include <vector> |
34 | | |
35 | | #include "common/compiler_util.h" // IWYU pragma: keep |
36 | | #include "common/consts.h" |
37 | | #include "common/status.h" |
38 | | #include "core/block/block.h" |
39 | | #include "core/column/column_nothing.h" |
40 | | #include "core/data_type/data_type.h" |
41 | | #include "core/data_type/data_type_factory.hpp" |
42 | | #include "core/string_ref.h" |
43 | | #include "exec/common/hex.h" |
44 | | #include "exprs/aggregate/aggregate_function_simple_factory.h" |
45 | | #include "exprs/aggregate/aggregate_function_state_union.h" |
46 | | #include "storage/index/inverted/analyzer/analyzer.h" |
47 | | #include "storage/index/inverted/inverted_index_parser.h" |
48 | | #include "storage/olap_common.h" |
49 | | #include "storage/olap_define.h" |
50 | | #include "storage/tablet/tablet_column_object_pool.h" |
51 | | #include "storage/tablet/tablet_meta.h" |
52 | | #include "storage/tablet_info.h" |
53 | | #include "storage/types.h" |
54 | | #include "storage/utils.h" |
55 | | #include "util/json/path_in_data.h" |
56 | | |
57 | | namespace doris { |
58 | | #include "common/compile_check_begin.h" |
59 | 71.8k | FieldType TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) { |
60 | 71.8k | switch (primitiveType) { |
61 | 0 | case PrimitiveType::INVALID_TYPE: |
62 | 0 | return FieldType::OLAP_FIELD_TYPE_UNKNOWN; |
63 | 0 | case PrimitiveType::TYPE_NULL: |
64 | 0 | return FieldType::OLAP_FIELD_TYPE_NONE; |
65 | 8 | case PrimitiveType::TYPE_BOOLEAN: |
66 | 8 | return FieldType::OLAP_FIELD_TYPE_BOOL; |
67 | 0 | case PrimitiveType::TYPE_TINYINT: |
68 | 0 | return FieldType::OLAP_FIELD_TYPE_TINYINT; |
69 | 5 | case PrimitiveType::TYPE_SMALLINT: |
70 | 5 | return FieldType::OLAP_FIELD_TYPE_SMALLINT; |
71 | 150 | case PrimitiveType::TYPE_INT: |
72 | 150 | return FieldType::OLAP_FIELD_TYPE_INT; |
73 | 71.7k | case PrimitiveType::TYPE_BIGINT: |
74 | 71.7k | return FieldType::OLAP_FIELD_TYPE_BIGINT; |
75 | 3 | case PrimitiveType::TYPE_LARGEINT: |
76 | 3 | return FieldType::OLAP_FIELD_TYPE_LARGEINT; |
77 | 0 | case PrimitiveType::TYPE_FLOAT: |
78 | 0 | return FieldType::OLAP_FIELD_TYPE_FLOAT; |
79 | 13 | case PrimitiveType::TYPE_DOUBLE: |
80 | 13 | return FieldType::OLAP_FIELD_TYPE_DOUBLE; |
81 | 0 | case PrimitiveType::TYPE_VARCHAR: |
82 | 0 | return FieldType::OLAP_FIELD_TYPE_VARCHAR; |
83 | 0 | case PrimitiveType::TYPE_DATE: |
84 | 0 | return FieldType::OLAP_FIELD_TYPE_DATE; |
85 | 0 | case PrimitiveType::TYPE_DATETIME: |
86 | 0 | return FieldType::OLAP_FIELD_TYPE_DATETIME; |
87 | 0 | case PrimitiveType::TYPE_BINARY: |
88 | 0 | return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented |
89 | 0 | case PrimitiveType::TYPE_CHAR: |
90 | 0 | return FieldType::OLAP_FIELD_TYPE_CHAR; |
91 | 0 | case PrimitiveType::TYPE_STRUCT: |
92 | 0 | return FieldType::OLAP_FIELD_TYPE_STRUCT; |
93 | 0 | case PrimitiveType::TYPE_ARRAY: |
94 | 0 | return FieldType::OLAP_FIELD_TYPE_ARRAY; |
95 | 0 | case PrimitiveType::TYPE_MAP: |
96 | 0 | return FieldType::OLAP_FIELD_TYPE_MAP; |
97 | 0 | case PrimitiveType::TYPE_HLL: |
98 | 0 | return FieldType::OLAP_FIELD_TYPE_HLL; |
99 | 0 | case PrimitiveType::TYPE_DECIMALV2: |
100 | 0 | return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented |
101 | 0 | case PrimitiveType::TYPE_BITMAP: |
102 | 0 | return FieldType::OLAP_FIELD_TYPE_BITMAP; |
103 | 0 | case PrimitiveType::TYPE_STRING: |
104 | 0 | return FieldType::OLAP_FIELD_TYPE_STRING; |
105 | 0 | case PrimitiveType::TYPE_QUANTILE_STATE: |
106 | 0 | return FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE; |
107 | 0 | case PrimitiveType::TYPE_DATEV2: |
108 | 0 | return FieldType::OLAP_FIELD_TYPE_DATEV2; |
109 | 0 | case PrimitiveType::TYPE_DATETIMEV2: |
110 | 0 | return FieldType::OLAP_FIELD_TYPE_DATETIMEV2; |
111 | 0 | case PrimitiveType::TYPE_TIMESTAMPTZ: |
112 | 0 | return FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ; |
113 | 0 | case PrimitiveType::TYPE_TIMEV2: |
114 | 0 | return FieldType::OLAP_FIELD_TYPE_TIMEV2; |
115 | 0 | case PrimitiveType::TYPE_DECIMAL32: |
116 | 0 | return FieldType::OLAP_FIELD_TYPE_DECIMAL32; |
117 | 0 | case PrimitiveType::TYPE_DECIMAL64: |
118 | 0 | return FieldType::OLAP_FIELD_TYPE_DECIMAL64; |
119 | 0 | case PrimitiveType::TYPE_DECIMAL128I: |
120 | 0 | return FieldType::OLAP_FIELD_TYPE_DECIMAL128I; |
121 | 0 | case PrimitiveType::TYPE_DECIMAL256: |
122 | 0 | return FieldType::OLAP_FIELD_TYPE_DECIMAL256; |
123 | 0 | case PrimitiveType::TYPE_JSONB: |
124 | 0 | return FieldType::OLAP_FIELD_TYPE_JSONB; |
125 | 0 | case PrimitiveType::TYPE_VARIANT: |
126 | 0 | return FieldType::OLAP_FIELD_TYPE_VARIANT; |
127 | 0 | case PrimitiveType::TYPE_IPV4: |
128 | 0 | return FieldType::OLAP_FIELD_TYPE_IPV4; |
129 | 0 | case PrimitiveType::TYPE_IPV6: |
130 | 0 | return FieldType::OLAP_FIELD_TYPE_IPV6; |
131 | 0 | case PrimitiveType::TYPE_LAMBDA_FUNCTION: |
132 | 0 | return FieldType::OLAP_FIELD_TYPE_UNKNOWN; // Not implemented |
133 | 0 | case PrimitiveType::TYPE_AGG_STATE: |
134 | 0 | return FieldType::OLAP_FIELD_TYPE_AGG_STATE; |
135 | 0 | default: |
136 | 0 | return FieldType::OLAP_FIELD_TYPE_UNKNOWN; |
137 | 71.8k | } |
138 | 71.8k | } |
139 | | |
140 | 31.9k | PrimitiveType TabletColumn::get_primitive_type_by_field_type(FieldType type) { |
141 | 31.9k | static const PrimitiveType mapping[] = { |
142 | 31.9k | /* 0 */ PrimitiveType::INVALID_TYPE, |
143 | 31.9k | /* 1 OLAP_FIELD_TYPE_TINYINT */ PrimitiveType::TYPE_TINYINT, |
144 | 31.9k | /* 2 OLAP_FIELD_TYPE_UNSIGNED_TINYINT */ PrimitiveType::INVALID_TYPE, |
145 | 31.9k | /* 3 OLAP_FIELD_TYPE_SMALLINT */ PrimitiveType::TYPE_SMALLINT, |
146 | 31.9k | /* 4 OLAP_FIELD_TYPE_UNSIGNED_SMALLINT */ PrimitiveType::INVALID_TYPE, |
147 | 31.9k | /* 5 OLAP_FIELD_TYPE_INT */ PrimitiveType::TYPE_INT, |
148 | 31.9k | /* 6 OLAP_FIELD_TYPE_UNSIGNED_INT */ PrimitiveType::INVALID_TYPE, |
149 | 31.9k | /* 7 OLAP_FIELD_TYPE_BIGINT */ PrimitiveType::TYPE_BIGINT, |
150 | 31.9k | /* 8 OLAP_FIELD_TYPE_UNSIGNED_BIGINT */ PrimitiveType::INVALID_TYPE, |
151 | 31.9k | /* 9 OLAP_FIELD_TYPE_LARGEINT */ PrimitiveType::TYPE_LARGEINT, |
152 | 31.9k | /* 10 OLAP_FIELD_TYPE_FLOAT */ PrimitiveType::TYPE_FLOAT, |
153 | 31.9k | /* 11 OLAP_FIELD_TYPE_DOUBLE */ PrimitiveType::TYPE_DOUBLE, |
154 | 31.9k | /* 12 OLAP_FIELD_TYPE_DISCRETE_DOUBLE */ PrimitiveType::INVALID_TYPE, |
155 | 31.9k | /* 13 OLAP_FIELD_TYPE_CHAR */ PrimitiveType::TYPE_CHAR, |
156 | 31.9k | /* 14 OLAP_FIELD_TYPE_DATE */ PrimitiveType::TYPE_DATE, |
157 | 31.9k | /* 15 OLAP_FIELD_TYPE_DATETIME */ PrimitiveType::TYPE_DATETIME, |
158 | 31.9k | /* 16 OLAP_FIELD_TYPE_DECIMAL */ PrimitiveType::INVALID_TYPE, |
159 | 31.9k | /* 17 OLAP_FIELD_TYPE_VARCHAR */ PrimitiveType::TYPE_VARCHAR, |
160 | 31.9k | /* 18 OLAP_FIELD_TYPE_STRUCT */ PrimitiveType::TYPE_STRUCT, |
161 | 31.9k | /* 19 OLAP_FIELD_TYPE_ARRAY */ PrimitiveType::TYPE_ARRAY, |
162 | 31.9k | /* 20 OLAP_FIELD_TYPE_MAP */ PrimitiveType::TYPE_MAP, |
163 | 31.9k | /* 21 OLAP_FIELD_TYPE_UNKNOWN */ PrimitiveType::INVALID_TYPE, |
164 | 31.9k | /* 22 OLAP_FIELD_TYPE_NONE */ PrimitiveType::TYPE_NULL, |
165 | 31.9k | /* 23 OLAP_FIELD_TYPE_HLL */ PrimitiveType::TYPE_HLL, |
166 | 31.9k | /* 24 OLAP_FIELD_TYPE_BOOL */ PrimitiveType::TYPE_BOOLEAN, |
167 | 31.9k | /* 25 OLAP_FIELD_TYPE_BITMAP */ PrimitiveType::TYPE_BITMAP, |
168 | 31.9k | /* 26 OLAP_FIELD_TYPE_STRING */ PrimitiveType::TYPE_STRING, |
169 | 31.9k | /* 27 OLAP_FIELD_TYPE_QUANTILE_STATE */ PrimitiveType::TYPE_QUANTILE_STATE, |
170 | 31.9k | /* 28 OLAP_FIELD_TYPE_DATEV2 */ PrimitiveType::TYPE_DATEV2, |
171 | 31.9k | /* 29 OLAP_FIELD_TYPE_DATETIMEV2 */ PrimitiveType::TYPE_DATETIMEV2, |
172 | 31.9k | /* 30 OLAP_FIELD_TYPE_TIMEV2 */ PrimitiveType::TYPE_TIMEV2, |
173 | 31.9k | /* 31 OLAP_FIELD_TYPE_DECIMAL32 */ PrimitiveType::TYPE_DECIMAL32, |
174 | 31.9k | /* 32 OLAP_FIELD_TYPE_DECIMAL64 */ PrimitiveType::TYPE_DECIMAL64, |
175 | 31.9k | /* 33 OLAP_FIELD_TYPE_DECIMAL128I */ PrimitiveType::TYPE_DECIMAL128I, |
176 | 31.9k | /* 34 OLAP_FIELD_TYPE_JSONB */ PrimitiveType::TYPE_JSONB, |
177 | 31.9k | /* 35 OLAP_FIELD_TYPE_VARIANT */ PrimitiveType::TYPE_VARIANT, |
178 | 31.9k | /* 36 OLAP_FIELD_TYPE_AGG_STATE */ PrimitiveType::TYPE_AGG_STATE, |
179 | 31.9k | /* 37 OLAP_FIELD_TYPE_DECIMAL256 */ PrimitiveType::TYPE_DECIMAL256, |
180 | 31.9k | /* 38 OLAP_FIELD_TYPE_IPV4 */ PrimitiveType::TYPE_IPV4, |
181 | 31.9k | /* 39 OLAP_FIELD_TYPE_IPV6 */ PrimitiveType::TYPE_IPV6, |
182 | 31.9k | /* 40 OLAP_FIELD_TYPE_TIMESTAMPTZ */ PrimitiveType::TYPE_TIMESTAMPTZ, |
183 | 31.9k | }; |
184 | | |
185 | 31.9k | int idx = static_cast<int>(type); |
186 | 31.9k | return mapping[idx]; |
187 | 31.9k | } |
188 | | |
189 | 5.64M | FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) { |
190 | 5.64M | std::string upper_type_str = type_str; |
191 | 5.64M | std::transform(type_str.begin(), type_str.end(), upper_type_str.begin(), |
192 | 35.7M | [](auto c) { return std::toupper(c); }); |
193 | 5.64M | FieldType type; |
194 | | |
195 | 5.64M | if (0 == upper_type_str.compare("TINYINT")) { |
196 | 199k | type = FieldType::OLAP_FIELD_TYPE_TINYINT; |
197 | 5.44M | } else if (0 == upper_type_str.compare("SMALLINT")) { |
198 | 77.8k | type = FieldType::OLAP_FIELD_TYPE_SMALLINT; |
199 | 5.36M | } else if (0 == upper_type_str.compare("INT")) { |
200 | 889k | type = FieldType::OLAP_FIELD_TYPE_INT; |
201 | 4.47M | } else if (0 == upper_type_str.compare("BIGINT")) { |
202 | 700k | type = FieldType::OLAP_FIELD_TYPE_BIGINT; |
203 | 3.77M | } else if (0 == upper_type_str.compare("LARGEINT")) { |
204 | 69.9k | type = FieldType::OLAP_FIELD_TYPE_LARGEINT; |
205 | 3.70M | } else if (0 == upper_type_str.compare("UNSIGNED_TINYINT")) { |
206 | 0 | type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT; |
207 | 3.70M | } else if (0 == upper_type_str.compare("UNSIGNED_SMALLINT")) { |
208 | 0 | type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT; |
209 | 3.70M | } else if (0 == upper_type_str.compare("UNSIGNED_INT")) { |
210 | 0 | type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT; |
211 | 3.70M | } else if (0 == upper_type_str.compare("UNSIGNED_BIGINT")) { |
212 | 0 | type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT; |
213 | 3.70M | } else if (0 == upper_type_str.compare("IPV4")) { |
214 | 1.41k | type = FieldType::OLAP_FIELD_TYPE_IPV4; |
215 | 3.70M | } else if (0 == upper_type_str.compare("IPV6")) { |
216 | 1.48k | type = FieldType::OLAP_FIELD_TYPE_IPV6; |
217 | 3.70M | } else if (0 == upper_type_str.compare("FLOAT")) { |
218 | 64.4k | type = FieldType::OLAP_FIELD_TYPE_FLOAT; |
219 | 3.64M | } else if (0 == upper_type_str.compare("DISCRETE_DOUBLE")) { |
220 | 0 | type = FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE; |
221 | 3.64M | } else if (0 == upper_type_str.compare("DOUBLE")) { |
222 | 75.8k | type = FieldType::OLAP_FIELD_TYPE_DOUBLE; |
223 | 3.56M | } else if (0 == upper_type_str.compare("CHAR")) { |
224 | 97.2k | type = FieldType::OLAP_FIELD_TYPE_CHAR; |
225 | 3.46M | } else if (0 == upper_type_str.compare("DATE")) { |
226 | 764 | type = FieldType::OLAP_FIELD_TYPE_DATE; |
227 | 3.46M | } else if (0 == upper_type_str.compare("DATEV2")) { |
228 | 462k | type = FieldType::OLAP_FIELD_TYPE_DATEV2; |
229 | 3.00M | } else if (0 == upper_type_str.compare("DATETIMEV2")) { |
230 | 271k | type = FieldType::OLAP_FIELD_TYPE_DATETIMEV2; |
231 | 2.73M | } else if (0 == upper_type_str.compare("DATETIME")) { |
232 | 1.32k | type = FieldType::OLAP_FIELD_TYPE_DATETIME; |
233 | 2.73M | } else if (0 == upper_type_str.compare("TIMESTAMPTZ")) { |
234 | 30.6k | type = FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ; |
235 | 2.70M | } else if (0 == upper_type_str.compare("DECIMAL32")) { |
236 | 129k | type = FieldType::OLAP_FIELD_TYPE_DECIMAL32; |
237 | 2.57M | } else if (0 == upper_type_str.compare("DECIMAL64")) { |
238 | 298k | type = FieldType::OLAP_FIELD_TYPE_DECIMAL64; |
239 | 2.27M | } else if (0 == upper_type_str.compare("DECIMAL128I")) { |
240 | 107k | type = FieldType::OLAP_FIELD_TYPE_DECIMAL128I; |
241 | 2.16M | } else if (0 == upper_type_str.compare("DECIMAL256")) { |
242 | 13.0k | type = FieldType::OLAP_FIELD_TYPE_DECIMAL256; |
243 | 2.15M | } else if (0 == upper_type_str.compare(0, 7, "DECIMAL")) { |
244 | 2.26k | type = FieldType::OLAP_FIELD_TYPE_DECIMAL; |
245 | 2.14M | } else if (0 == upper_type_str.compare(0, 7, "VARCHAR")) { |
246 | 1.54M | type = FieldType::OLAP_FIELD_TYPE_VARCHAR; |
247 | 1.54M | } else if (0 == upper_type_str.compare("STRING")) { |
248 | 225k | type = FieldType::OLAP_FIELD_TYPE_STRING; |
249 | 380k | } else if (0 == upper_type_str.compare("JSONB")) { |
250 | 28.6k | type = FieldType::OLAP_FIELD_TYPE_JSONB; |
251 | 351k | } else if (0 == upper_type_str.compare("VARIANT")) { |
252 | 9.12k | type = FieldType::OLAP_FIELD_TYPE_VARIANT; |
253 | 342k | } else if (0 == upper_type_str.compare("BOOLEAN")) { |
254 | 62.5k | type = FieldType::OLAP_FIELD_TYPE_BOOL; |
255 | 280k | } else if (0 == upper_type_str.compare(0, 3, "HLL")) { |
256 | 6.40k | type = FieldType::OLAP_FIELD_TYPE_HLL; |
257 | 273k | } else if (0 == upper_type_str.compare("STRUCT")) { |
258 | 22.8k | type = FieldType::OLAP_FIELD_TYPE_STRUCT; |
259 | 250k | } else if (0 == upper_type_str.compare("LIST")) { |
260 | 0 | type = FieldType::OLAP_FIELD_TYPE_ARRAY; |
261 | 250k | } else if (0 == upper_type_str.compare("MAP")) { |
262 | 77.6k | type = FieldType::OLAP_FIELD_TYPE_MAP; |
263 | 173k | } else if (0 == upper_type_str.compare("OBJECT")) { |
264 | 7.82k | type = FieldType::OLAP_FIELD_TYPE_BITMAP; |
265 | 165k | } else if (0 == upper_type_str.compare("BITMAP")) { |
266 | 84 | type = FieldType::OLAP_FIELD_TYPE_BITMAP; |
267 | 165k | } else if (0 == upper_type_str.compare("ARRAY")) { |
268 | 161k | type = FieldType::OLAP_FIELD_TYPE_ARRAY; |
269 | 161k | } else if (0 == upper_type_str.compare("QUANTILE_STATE")) { |
270 | 5.33k | type = FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE; |
271 | 18.4E | } else if (0 == upper_type_str.compare("AGG_STATE")) { |
272 | 1.18k | type = FieldType::OLAP_FIELD_TYPE_AGG_STATE; |
273 | 18.4E | } else { |
274 | 18.4E | LOG(WARNING) << "invalid type string. [type='" << type_str << "']"; |
275 | 18.4E | type = FieldType::OLAP_FIELD_TYPE_UNKNOWN; |
276 | 18.4E | } |
277 | | |
278 | 5.64M | return type; |
279 | 5.64M | } |
280 | | |
281 | 5.59M | FieldAggregationMethod TabletColumn::get_aggregation_type_by_string(const std::string& str) { |
282 | 5.59M | std::string upper_str = str; |
283 | 5.59M | std::transform(str.begin(), str.end(), upper_str.begin(), |
284 | 22.8M | [](auto c) { return std::toupper(c); }); |
285 | 5.59M | FieldAggregationMethod aggregation_type; |
286 | | |
287 | 5.59M | if (0 == upper_str.compare("NONE")) { |
288 | 5.41M | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE; |
289 | 5.41M | } else if (0 == upper_str.compare("SUM")) { |
290 | 65.1k | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM; |
291 | 108k | } else if (0 == upper_str.compare("MIN")) { |
292 | 3.62k | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN; |
293 | 104k | } else if (0 == upper_str.compare("MAX")) { |
294 | 5.88k | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX; |
295 | 98.7k | } else if (0 == upper_str.compare("REPLACE")) { |
296 | 71.3k | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE; |
297 | 71.3k | } else if (0 == upper_str.compare("REPLACE_IF_NOT_NULL")) { |
298 | 9.63k | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL; |
299 | 17.7k | } else if (0 == upper_str.compare("HLL_UNION")) { |
300 | 6.12k | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION; |
301 | 11.5k | } else if (0 == upper_str.compare("BITMAP_UNION")) { |
302 | 7.28k | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION; |
303 | 7.28k | } else if (0 == upper_str.compare("QUANTILE_UNION")) { |
304 | 5.32k | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION; |
305 | 18.4E | } else if (!upper_str.empty()) { |
306 | 992 | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC; |
307 | 18.4E | } else { |
308 | 18.4E | aggregation_type = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN; |
309 | 18.4E | } |
310 | | |
311 | 5.59M | return aggregation_type; |
312 | 5.59M | } |
313 | | |
314 | 8.58M | std::string TabletColumn::get_string_by_field_type(FieldType type) { |
315 | 8.58M | switch (type) { |
316 | 352k | case FieldType::OLAP_FIELD_TYPE_TINYINT: |
317 | 352k | return "TINYINT"; |
318 | | |
319 | 0 | case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT: |
320 | 0 | return "UNSIGNED_TINYINT"; |
321 | | |
322 | 136k | case FieldType::OLAP_FIELD_TYPE_SMALLINT: |
323 | 136k | return "SMALLINT"; |
324 | | |
325 | 0 | case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: |
326 | 0 | return "UNSIGNED_SMALLINT"; |
327 | | |
328 | 1.22M | case FieldType::OLAP_FIELD_TYPE_INT: |
329 | 1.22M | return "INT"; |
330 | | |
331 | 0 | case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT: |
332 | 0 | return "UNSIGNED_INT"; |
333 | | |
334 | 1.13M | case FieldType::OLAP_FIELD_TYPE_BIGINT: |
335 | 1.13M | return "BIGINT"; |
336 | | |
337 | 122k | case FieldType::OLAP_FIELD_TYPE_LARGEINT: |
338 | 122k | return "LARGEINT"; |
339 | | |
340 | 0 | case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT: |
341 | 0 | return "UNSIGNED_BIGINT"; |
342 | | |
343 | 2.43k | case FieldType::OLAP_FIELD_TYPE_IPV4: |
344 | 2.43k | return "IPV4"; |
345 | | |
346 | 3.02k | case FieldType::OLAP_FIELD_TYPE_IPV6: |
347 | 3.02k | return "IPV6"; |
348 | | |
349 | 114k | case FieldType::OLAP_FIELD_TYPE_FLOAT: |
350 | 114k | return "FLOAT"; |
351 | | |
352 | 132k | case FieldType::OLAP_FIELD_TYPE_DOUBLE: |
353 | 132k | return "DOUBLE"; |
354 | | |
355 | 0 | case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE: |
356 | 0 | return "DISCRETE_DOUBLE"; |
357 | | |
358 | 147k | case FieldType::OLAP_FIELD_TYPE_CHAR: |
359 | 147k | return "CHAR"; |
360 | | |
361 | 1.81k | case FieldType::OLAP_FIELD_TYPE_DATE: |
362 | 1.81k | return "DATE"; |
363 | | |
364 | 564k | case FieldType::OLAP_FIELD_TYPE_DATEV2: |
365 | 564k | return "DATEV2"; |
366 | | |
367 | 2.52k | case FieldType::OLAP_FIELD_TYPE_DATETIME: |
368 | 2.52k | return "DATETIME"; |
369 | | |
370 | 446k | case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: |
371 | 446k | return "DATETIMEV2"; |
372 | | |
373 | 115k | case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ: |
374 | 115k | return "TIMESTAMPTZ"; |
375 | | |
376 | 6.35k | case FieldType::OLAP_FIELD_TYPE_DECIMAL: |
377 | 6.35k | return "DECIMAL"; |
378 | | |
379 | 171k | case FieldType::OLAP_FIELD_TYPE_DECIMAL32: |
380 | 171k | return "DECIMAL32"; |
381 | | |
382 | 320k | case FieldType::OLAP_FIELD_TYPE_DECIMAL64: |
383 | 320k | return "DECIMAL64"; |
384 | | |
385 | 169k | case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: |
386 | 169k | return "DECIMAL128I"; |
387 | | |
388 | 14.6k | case FieldType::OLAP_FIELD_TYPE_DECIMAL256: |
389 | 14.6k | return "DECIMAL256"; |
390 | | |
391 | 2.21M | case FieldType::OLAP_FIELD_TYPE_VARCHAR: |
392 | 2.21M | return "VARCHAR"; |
393 | | |
394 | 60.3k | case FieldType::OLAP_FIELD_TYPE_JSONB: |
395 | 60.3k | return "JSONB"; |
396 | | |
397 | 9.02k | case FieldType::OLAP_FIELD_TYPE_VARIANT: |
398 | 9.02k | return "VARIANT"; |
399 | | |
400 | 499k | case FieldType::OLAP_FIELD_TYPE_STRING: |
401 | 499k | return "STRING"; |
402 | | |
403 | 108k | case FieldType::OLAP_FIELD_TYPE_BOOL: |
404 | 108k | return "BOOLEAN"; |
405 | | |
406 | 15.5k | case FieldType::OLAP_FIELD_TYPE_HLL: |
407 | 15.5k | return "HLL"; |
408 | | |
409 | 29.9k | case FieldType::OLAP_FIELD_TYPE_STRUCT: |
410 | 29.9k | return "STRUCT"; |
411 | | |
412 | 322k | case FieldType::OLAP_FIELD_TYPE_ARRAY: |
413 | 322k | return "ARRAY"; |
414 | | |
415 | 104k | case FieldType::OLAP_FIELD_TYPE_MAP: |
416 | 104k | return "MAP"; |
417 | | |
418 | 15.0k | case FieldType::OLAP_FIELD_TYPE_BITMAP: |
419 | 15.0k | return "OBJECT"; |
420 | 11.7k | case FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE: |
421 | 11.7k | return "QUANTILE_STATE"; |
422 | 1.45k | case FieldType::OLAP_FIELD_TYPE_AGG_STATE: |
423 | 1.45k | return "AGG_STATE"; |
424 | 0 | default: |
425 | 0 | return "UNKNOWN"; |
426 | 8.58M | } |
427 | 8.58M | } |
428 | | |
429 | 2.74k | std::string TabletColumn::get_string_by_aggregation_type(FieldAggregationMethod type) { |
430 | 2.74k | switch (type) { |
431 | 1.82k | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE: |
432 | 1.82k | return "NONE"; |
433 | | |
434 | 807 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM: |
435 | 807 | return "SUM"; |
436 | | |
437 | 4 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN: |
438 | 4 | return "MIN"; |
439 | | |
440 | 4 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX: |
441 | 4 | return "MAX"; |
442 | | |
443 | 23 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE: |
444 | 23 | return "REPLACE"; |
445 | | |
446 | 14 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL: |
447 | 14 | return "REPLACE_IF_NOT_NULL"; |
448 | | |
449 | 48 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION: |
450 | 48 | return "HLL_UNION"; |
451 | | |
452 | 20 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION: |
453 | 20 | return "BITMAP_UNION"; |
454 | | |
455 | 0 | case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION: |
456 | 0 | return "QUANTILE_UNION"; |
457 | | |
458 | 2 | default: |
459 | 2 | return "UNKNOWN"; |
460 | 2.74k | } |
461 | 2.74k | } |
462 | | |
463 | 951k | uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint32_t string_length) { |
464 | 951k | switch (type) { |
465 | 52.6k | case TPrimitiveType::TINYINT: |
466 | 57.9k | case TPrimitiveType::BOOLEAN: |
467 | 57.9k | return 1; |
468 | 8.77k | case TPrimitiveType::SMALLINT: |
469 | 8.77k | return 2; |
470 | 39.8k | case TPrimitiveType::INT: |
471 | 39.8k | return 4; |
472 | 234k | case TPrimitiveType::BIGINT: |
473 | 234k | return 8; |
474 | 6.10k | case TPrimitiveType::LARGEINT: |
475 | 6.10k | return 16; |
476 | 16 | case TPrimitiveType::IPV4: |
477 | 16 | return 4; |
478 | 16 | case TPrimitiveType::IPV6: |
479 | 16 | return 16; |
480 | 104 | case TPrimitiveType::DATE: |
481 | 104 | return 3; |
482 | 9.02k | case TPrimitiveType::DATEV2: |
483 | 9.02k | return 4; |
484 | 112 | case TPrimitiveType::DATETIME: |
485 | 112 | return 8; |
486 | 56.9k | case TPrimitiveType::DATETIMEV2: |
487 | 56.9k | case TPrimitiveType::TIMESTAMPTZ: |
488 | 56.9k | return 8; |
489 | 6.67k | case TPrimitiveType::FLOAT: |
490 | 6.67k | return 4; |
491 | 8.73k | case TPrimitiveType::DOUBLE: |
492 | 8.73k | return 8; |
493 | 0 | case TPrimitiveType::QUANTILE_STATE: |
494 | 66 | case TPrimitiveType::BITMAP: |
495 | 66 | return 16; |
496 | 6.54k | case TPrimitiveType::CHAR: |
497 | 6.54k | return string_length; |
498 | 407k | case TPrimitiveType::VARCHAR: |
499 | 407k | case TPrimitiveType::HLL: |
500 | 407k | case TPrimitiveType::AGG_STATE: |
501 | 407k | return string_length + sizeof(OLAP_VARCHAR_MAX_LENGTH); |
502 | 78.4k | case TPrimitiveType::STRING: |
503 | 78.5k | case TPrimitiveType::VARIANT: |
504 | 78.5k | return string_length + sizeof(OLAP_STRING_MAX_LENGTH); |
505 | 1.22k | case TPrimitiveType::JSONB: |
506 | 1.22k | return string_length + sizeof(OLAP_JSONB_MAX_LENGTH); |
507 | 836 | case TPrimitiveType::STRUCT: |
508 | | // Note that(xy): this is the length of struct type itself, |
509 | | // the length of its subtypes are not included. |
510 | 836 | return OLAP_STRUCT_MAX_LENGTH; |
511 | 11.2k | case TPrimitiveType::ARRAY: |
512 | 11.2k | return OLAP_ARRAY_MAX_LENGTH; |
513 | 1.16k | case TPrimitiveType::MAP: |
514 | 1.16k | return OLAP_MAP_MAX_LENGTH; |
515 | 4.43k | case TPrimitiveType::DECIMAL32: |
516 | 4.43k | return 4; |
517 | 7.85k | case TPrimitiveType::DECIMAL64: |
518 | 7.85k | return 8; |
519 | 4.72k | case TPrimitiveType::DECIMAL128I: |
520 | 4.72k | return 16; |
521 | 0 | case TPrimitiveType::DECIMAL256: |
522 | 0 | return 32; |
523 | 104 | case TPrimitiveType::DECIMALV2: |
524 | 104 | return 12; // use 12 bytes in olap engine. |
525 | 0 | default: |
526 | 0 | LOG(WARNING) << "unknown field type. [type=" << type << "]"; |
527 | 0 | return 0; |
528 | 951k | } |
529 | 951k | } |
530 | | |
531 | 9 | bool TabletColumn::has_char_type() const { |
532 | 9 | switch (_type) { |
533 | 3 | case FieldType::OLAP_FIELD_TYPE_CHAR: { |
534 | 3 | return true; |
535 | 0 | } |
536 | 4 | case FieldType::OLAP_FIELD_TYPE_ARRAY: |
537 | 4 | case FieldType::OLAP_FIELD_TYPE_MAP: |
538 | 4 | case FieldType::OLAP_FIELD_TYPE_STRUCT: { |
539 | 4 | return std::any_of(_sub_columns.begin(), _sub_columns.end(), |
540 | 4 | [&](const auto& sub) -> bool { return sub->has_char_type(); }); |
541 | 4 | } |
542 | 2 | default: |
543 | 2 | return false; |
544 | 9 | } |
545 | 9 | } |
546 | | |
547 | 4.77M | TabletColumn::TabletColumn() : _aggregation(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) {} |
548 | | |
549 | 37 | TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType type) { |
550 | 37 | _aggregation = agg; |
551 | 37 | _type = type; |
552 | 37 | } |
553 | | |
554 | 17 | TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable) { |
555 | 17 | _aggregation = agg; |
556 | 17 | _type = filed_type; |
557 | 17 | _length = cast_set<int32_t>(get_scalar_type_info(filed_type)->size()); |
558 | 17 | _is_nullable = is_nullable; |
559 | 17 | } |
560 | | |
561 | | TabletColumn::TabletColumn(FieldAggregationMethod agg, FieldType filed_type, bool is_nullable, |
562 | 6.20k | int32_t unique_id, size_t length) { |
563 | 6.20k | _aggregation = agg; |
564 | 6.20k | _type = filed_type; |
565 | 6.20k | _is_nullable = is_nullable; |
566 | 6.20k | _unique_id = unique_id; |
567 | 6.20k | _length = cast_set<int32_t>(length); |
568 | 6.20k | } |
569 | | |
570 | 44 | TabletColumn::TabletColumn(const ColumnPB& column) { |
571 | 44 | init_from_pb(column); |
572 | 44 | } |
573 | | |
574 | 831k | TabletColumn::TabletColumn(const TColumn& column) { |
575 | 831k | init_from_thrift(column); |
576 | 831k | } |
577 | | |
578 | 892k | void TabletColumn::init_from_thrift(const TColumn& tcolumn) { |
579 | 892k | ColumnPB column_pb; |
580 | 892k | TabletMeta::init_column_from_tcolumn(tcolumn.col_unique_id, tcolumn, &column_pb); |
581 | 892k | init_from_pb(column_pb); |
582 | 892k | } |
583 | | |
584 | 5.60M | void TabletColumn::init_from_pb(const ColumnPB& column) { |
585 | 5.60M | _unique_id = column.unique_id(); |
586 | 5.60M | _col_name = column.name(); |
587 | 5.60M | _col_name_lower_case = to_lower(_col_name); |
588 | 5.60M | _type = TabletColumn::get_field_type_by_string(column.type()); |
589 | 5.60M | _is_key = column.is_key(); |
590 | 5.60M | _is_nullable = column.is_nullable(); |
591 | 5.60M | _is_auto_increment = column.is_auto_increment(); |
592 | 5.60M | if (column.has_is_on_update_current_timestamp()) { |
593 | 5.60M | _is_on_update_current_timestamp = column.is_on_update_current_timestamp(); |
594 | 5.60M | } |
595 | | |
596 | 5.60M | _has_default_value = column.has_default_value(); |
597 | 5.60M | if (_has_default_value) { |
598 | 673k | _default_value = column.default_value(); |
599 | 673k | } |
600 | | |
601 | 5.60M | if (column.has_precision()) { |
602 | 5.59M | _is_decimal = true; |
603 | 5.59M | _precision = column.precision(); |
604 | 5.59M | } else { |
605 | 5.72k | _is_decimal = false; |
606 | 5.72k | } |
607 | 5.60M | if (column.has_frac()) { |
608 | 5.59M | _frac = column.frac(); |
609 | 5.59M | } |
610 | 5.60M | _length = column.length(); |
611 | 5.60M | _index_length = column.index_length(); |
612 | 5.60M | if (column.has_is_bf_column()) { |
613 | 29.6k | _is_bf_column = column.is_bf_column(); |
614 | 5.57M | } else { |
615 | 5.57M | _is_bf_column = false; |
616 | 5.57M | } |
617 | 5.60M | if (column.has_aggregation()) { |
618 | 5.59M | _aggregation = get_aggregation_type_by_string(column.aggregation()); |
619 | 5.59M | _aggregation_name = column.aggregation(); |
620 | 5.59M | } |
621 | | |
622 | 5.60M | if (_type == FieldType::OLAP_FIELD_TYPE_AGG_STATE) { |
623 | 1.17k | _result_is_nullable = column.result_is_nullable(); |
624 | 1.17k | _be_exec_version = column.be_exec_version(); |
625 | 1.17k | } |
626 | | |
627 | 5.60M | if (column.has_visible()) { |
628 | 5.60M | _visible = column.visible(); |
629 | 5.60M | } |
630 | 5.60M | if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) { |
631 | 158k | CHECK(column.children_columns_size() == 1) |
632 | 0 | << "ARRAY type should has 1 children types, but got " |
633 | 0 | << column.children_columns_size(); |
634 | 158k | } |
635 | 5.60M | if (_type == FieldType::OLAP_FIELD_TYPE_MAP) { |
636 | 76.6k | DCHECK(column.children_columns_size() == 2) |
637 | 0 | << "MAP type should has 2 children types, but got " |
638 | 0 | << column.children_columns_size(); |
639 | 76.6k | if (UNLIKELY(column.children_columns_size() != 2)) { |
640 | 0 | LOG(WARNING) << "MAP type should has 2 children types, but got " |
641 | 0 | << column.children_columns_size(); |
642 | 0 | } |
643 | 76.6k | } |
644 | 6.10M | for (int i = 0; i < column.children_columns_size(); i++) { |
645 | 506k | TabletColumn child_column; |
646 | 506k | child_column.init_from_pb(column.children_columns(i)); |
647 | 506k | add_sub_column(child_column); |
648 | 506k | } |
649 | 5.60M | if (column.has_column_path_info()) { |
650 | 8.99k | _column_path = std::make_shared<PathInData>(); |
651 | 8.99k | _column_path->from_protobuf(column.column_path_info()); |
652 | 8.99k | _parent_col_unique_id = column.column_path_info().parrent_column_unique_id(); |
653 | 8.99k | } |
654 | 5.60M | if (is_variant_type() && !column.has_column_path_info()) { |
655 | | // set path info for variant root column, to prevent from missing |
656 | 121 | _column_path = std::make_shared<PathInData>(_col_name_lower_case); |
657 | | // _parent_col_unique_id = _unique_id; |
658 | 121 | } |
659 | 5.60M | if (column.has_variant_max_subcolumns_count()) { |
660 | 5.60M | _variant.max_subcolumns_count = column.variant_max_subcolumns_count(); |
661 | 5.60M | } |
662 | 5.60M | if (column.has_variant_enable_typed_paths_to_sparse()) { |
663 | 5.60M | _variant.enable_typed_paths_to_sparse = column.variant_enable_typed_paths_to_sparse(); |
664 | 5.60M | } |
665 | 5.60M | if (column.has_variant_max_sparse_column_statistics_size()) { |
666 | 5.60M | _variant.max_sparse_column_statistics_size = |
667 | 5.60M | column.variant_max_sparse_column_statistics_size(); |
668 | 5.60M | } |
669 | 5.60M | if (column.has_variant_sparse_hash_shard_count()) { |
670 | 5.58M | _variant.sparse_hash_shard_count = column.variant_sparse_hash_shard_count(); |
671 | 5.58M | } |
672 | 5.60M | if (column.has_variant_enable_doc_mode()) { |
673 | 5.58M | _variant.enable_doc_mode = column.variant_enable_doc_mode(); |
674 | 5.58M | } |
675 | 5.60M | if (column.has_variant_doc_materialization_min_rows()) { |
676 | 5.58M | _variant.doc_materialization_min_rows = column.variant_doc_materialization_min_rows(); |
677 | 5.58M | } |
678 | 5.60M | if (column.has_variant_doc_hash_shard_count()) { |
679 | 5.58M | _variant.doc_hash_shard_count = column.variant_doc_hash_shard_count(); |
680 | 5.58M | } |
681 | 5.60M | if (column.has_variant_enable_nested_group()) { |
682 | 5.58M | _variant.enable_nested_group = column.variant_enable_nested_group(); |
683 | 5.58M | } |
684 | 5.60M | if (column.has_pattern_type()) { |
685 | 4.64M | _pattern_type = column.pattern_type(); |
686 | 4.64M | } |
687 | 5.60M | } |
688 | | |
689 | | TabletColumn TabletColumn::create_materialized_variant_column(const std::string& root, |
690 | | const std::vector<std::string>& paths, |
691 | | int32_t parent_unique_id, |
692 | 31 | int32_t max_subcolumns_count) { |
693 | 31 | TabletColumn subcol; |
694 | 31 | subcol.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT); |
695 | 31 | subcol.set_is_nullable(true); |
696 | 31 | subcol.set_unique_id(-1); |
697 | 31 | subcol.set_parent_unique_id(parent_unique_id); |
698 | 31 | PathInData path(root, paths); |
699 | 31 | subcol.set_path_info(path); |
700 | 31 | subcol.set_name(path.get_path()); |
701 | 31 | subcol.set_variant_max_subcolumns_count(max_subcolumns_count); |
702 | 31 | return subcol; |
703 | 31 | } |
704 | | |
705 | 8.57M | void TabletColumn::to_schema_pb(ColumnPB* column) const { |
706 | 8.57M | column->set_unique_id(_unique_id); |
707 | 8.57M | column->set_name(_col_name); |
708 | 8.57M | column->set_type(get_string_by_field_type(_type)); |
709 | 8.57M | column->set_is_key(_is_key); |
710 | 8.57M | column->set_is_nullable(_is_nullable); |
711 | 8.57M | column->set_is_auto_increment(_is_auto_increment); |
712 | 8.57M | column->set_is_on_update_current_timestamp(_is_on_update_current_timestamp); |
713 | 8.57M | if (_has_default_value) { |
714 | 1.20M | column->set_default_value(_default_value); |
715 | 1.20M | } |
716 | 8.57M | if (_is_decimal) { |
717 | 8.56M | column->set_precision(_precision); |
718 | 8.56M | column->set_frac(_frac); |
719 | 8.56M | } |
720 | 8.57M | column->set_length(_length); |
721 | 8.57M | column->set_index_length(_index_length); |
722 | 8.57M | if (_is_bf_column) { |
723 | 56.7k | column->set_is_bf_column(_is_bf_column); |
724 | 56.7k | } |
725 | 8.57M | if (!_aggregation_name.empty()) { |
726 | 8.56M | column->set_aggregation(_aggregation_name); |
727 | 8.56M | } |
728 | 8.57M | column->set_result_is_nullable(_result_is_nullable); |
729 | 8.57M | column->set_be_exec_version(_be_exec_version); |
730 | 8.57M | column->set_visible(_visible); |
731 | | |
732 | 8.57M | if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) { |
733 | 18.4E | CHECK(_sub_columns.size() == 1) |
734 | 18.4E | << "ARRAY type should has 1 children types, but got " << _sub_columns.size(); |
735 | 322k | } |
736 | 8.57M | if (_type == FieldType::OLAP_FIELD_TYPE_MAP) { |
737 | 18.4E | DCHECK(_sub_columns.size() == 2) |
738 | 18.4E | << "MAP type should has 2 children types, but got " << _sub_columns.size(); |
739 | 104k | if (UNLIKELY(_sub_columns.size() != 2)) { |
740 | 0 | LOG(WARNING) << "MAP type should has 2 children types, but got " << _sub_columns.size(); |
741 | 0 | } |
742 | 104k | } |
743 | | |
744 | 9.32M | for (size_t i = 0; i < _sub_columns.size(); i++) { |
745 | 748k | ColumnPB* child = column->add_children_columns(); |
746 | 748k | _sub_columns[i]->to_schema_pb(child); |
747 | 748k | } |
748 | | |
749 | | // set parts info |
750 | 8.57M | if (has_path_info()) { |
751 | | // CHECK_GT(_parent_col_unique_id, 0); |
752 | 9.02k | _column_path->to_protobuf(column->mutable_column_path_info(), _parent_col_unique_id); |
753 | | // Update unstable information for variant columns. Some of the fields in the tablet schema |
754 | | // are irrelevant for variant sub-columns, but retaining them may lead to an excessive growth |
755 | | // in the number of tablet schema cache entries. |
756 | 9.02k | if (_type == FieldType::OLAP_FIELD_TYPE_STRING) { |
757 | 0 | column->set_length(INT_MAX); |
758 | 0 | } |
759 | 9.02k | column->set_index_length(0); |
760 | 9.02k | } |
761 | 8.57M | column->set_variant_max_subcolumns_count(_variant.max_subcolumns_count); |
762 | 8.57M | column->set_pattern_type(_pattern_type); |
763 | 8.57M | column->set_variant_enable_typed_paths_to_sparse(_variant.enable_typed_paths_to_sparse); |
764 | 8.57M | column->set_variant_max_sparse_column_statistics_size( |
765 | 8.57M | _variant.max_sparse_column_statistics_size); |
766 | 8.57M | column->set_variant_sparse_hash_shard_count(_variant.sparse_hash_shard_count); |
767 | 8.57M | column->set_variant_enable_doc_mode(_variant.enable_doc_mode); |
768 | 8.57M | column->set_variant_doc_materialization_min_rows(_variant.doc_materialization_min_rows); |
769 | 8.57M | column->set_variant_doc_hash_shard_count(_variant.doc_hash_shard_count); |
770 | 8.57M | column->set_variant_enable_nested_group(_variant.enable_nested_group); |
771 | 8.57M | } |
772 | | |
773 | 507k | void TabletColumn::add_sub_column(TabletColumn& sub_column) { |
774 | 507k | _sub_columns.push_back(std::make_shared<TabletColumn>(sub_column)); |
775 | 507k | sub_column._parent_col_unique_id = this->_unique_id; |
776 | 507k | _sub_column_count += 1; |
777 | 507k | } |
778 | | |
779 | 132k | bool TabletColumn::is_row_store_column() const { |
780 | 132k | return _col_name == BeConsts::ROW_STORE_COL; |
781 | 132k | } |
782 | | |
783 | | AggregateFunctionPtr TabletColumn::get_aggregate_function_union(DataTypePtr type, |
784 | 8 | int current_be_exec_version) const { |
785 | 8 | const auto* state_type = assert_cast<const DataTypeAggState*>(type.get()); |
786 | 8 | BeExecVersionManager::check_function_compatibility( |
787 | 8 | current_be_exec_version, _be_exec_version, |
788 | 8 | state_type->get_nested_function()->get_name()); |
789 | 8 | return AggregateStateUnion::create(state_type->get_nested_function(), {type}, type); |
790 | 8 | } |
791 | | |
792 | | AggregateFunctionPtr TabletColumn::get_aggregate_function(std::string suffix, |
793 | 928 | int current_be_exec_version) const { |
794 | 928 | AggregateFunctionPtr function = nullptr; |
795 | | |
796 | 928 | auto type = DataTypeFactory::instance().create_data_type(*this); |
797 | 928 | if (type && type->get_primitive_type() == PrimitiveType::TYPE_AGG_STATE) { |
798 | 8 | function = get_aggregate_function_union(type, current_be_exec_version); |
799 | 920 | } else { |
800 | 920 | std::string origin_name = TabletColumn::get_string_by_aggregation_type(_aggregation); |
801 | 920 | std::string agg_name = origin_name + suffix; |
802 | 920 | std::transform(agg_name.begin(), agg_name.end(), agg_name.begin(), |
803 | 8.93k | [](unsigned char c) { return std::tolower(c); }); |
804 | 920 | function = AggregateFunctionSimpleFactory::instance().get( |
805 | 920 | agg_name, {type}, type, type->is_nullable(), |
806 | 920 | BeExecVersionManager::get_newest_version()); |
807 | 920 | if (!function) { |
808 | 0 | LOG(WARNING) << "get column aggregate function failed, aggregation_name=" << origin_name |
809 | 0 | << ", column_type=" << type->get_name(); |
810 | 0 | } |
811 | 920 | } |
812 | 928 | if (function) { |
813 | 928 | function->set_version(_be_exec_version); |
814 | 928 | return function; |
815 | 928 | } |
816 | 0 | return nullptr; |
817 | 928 | } |
818 | | |
819 | 1.90k | void TabletColumn::set_path_info(const PathInData& path) { |
820 | 1.90k | _column_path = std::make_shared<PathInData>(path); |
821 | 1.90k | } |
822 | | |
823 | 791 | DataTypePtr TabletColumn::get_vec_type() const { |
824 | 791 | return DataTypeFactory::instance().create_data_type(*this); |
825 | 791 | } |
826 | | |
827 | | // escape '.' and '_' |
828 | 16.4M | std::string escape_for_path_name(const std::string& s) { |
829 | 16.4M | std::string res; |
830 | 16.4M | const char* pos = s.data(); |
831 | 16.4M | const char* end = pos + s.size(); |
832 | 16.4M | while (pos != end) { |
833 | 737 | unsigned char c = *pos; |
834 | 737 | if (c == '.' || c == '_') { |
835 | 83 | res += '%'; |
836 | 83 | res += hex_digit_uppercase(c / 16); |
837 | 83 | res += hex_digit_uppercase(c % 16); |
838 | 654 | } else { |
839 | 654 | res += c; |
840 | 654 | } |
841 | 737 | ++pos; |
842 | 737 | } |
843 | 16.4M | return res; |
844 | 16.4M | } |
845 | | |
846 | 26 | void TabletIndex::set_escaped_escaped_index_suffix_path(const std::string& path_name) { |
847 | 26 | std::string escaped_path = escape_for_path_name(path_name); |
848 | 26 | _escaped_index_suffix_path = escaped_path; |
849 | 26 | } |
850 | | |
851 | | void TabletIndex::init_from_thrift(const TOlapTableIndex& index, |
852 | 18 | const TabletSchema& tablet_schema) { |
853 | 18 | _index_id = index.index_id; |
854 | 18 | _index_name = index.index_name; |
855 | | // init col_unique_id in index at be side, since col_unique_id may be -1 at fe side |
856 | | // get column unique id by name |
857 | 18 | std::vector<int32_t> col_unique_ids(index.columns.size()); |
858 | 36 | for (size_t i = 0; i < index.columns.size(); i++) { |
859 | 18 | auto column_idx = tablet_schema.field_index(index.columns[i]); |
860 | 18 | if (column_idx >= 0) { |
861 | 14 | col_unique_ids[i] = tablet_schema.column(column_idx).unique_id(); |
862 | 14 | } else { |
863 | | // if column unique id not found by column name, find by column unique id |
864 | | // column unique id can not found means this column is a new column added by light schema change |
865 | 4 | if (index.__isset.column_unique_ids && !index.column_unique_ids.empty() && |
866 | 4 | tablet_schema.has_column_unique_id(index.column_unique_ids[i])) { |
867 | 1 | col_unique_ids[i] = index.column_unique_ids[i]; |
868 | 3 | } else { |
869 | 3 | col_unique_ids[i] = -1; |
870 | 3 | } |
871 | 4 | } |
872 | 18 | } |
873 | 18 | _col_unique_ids = std::move(col_unique_ids); |
874 | | |
875 | 18 | switch (index.index_type) { |
876 | 0 | case TIndexType::BITMAP: |
877 | 0 | _index_type = IndexType::BITMAP; |
878 | 0 | break; |
879 | 17 | case TIndexType::INVERTED: |
880 | 17 | _index_type = IndexType::INVERTED; |
881 | 17 | break; |
882 | 1 | case TIndexType::ANN: |
883 | 1 | _index_type = IndexType::ANN; |
884 | 1 | break; |
885 | 0 | case TIndexType::BLOOMFILTER: |
886 | 0 | _index_type = IndexType::BLOOMFILTER; |
887 | 0 | break; |
888 | 0 | case TIndexType::NGRAM_BF: |
889 | 0 | _index_type = IndexType::NGRAM_BF; |
890 | 0 | break; |
891 | 18 | } |
892 | 18 | if (index.__isset.properties) { |
893 | 4 | for (auto kv : index.properties) { |
894 | 4 | _properties[kv.first] = kv.second; |
895 | 4 | } |
896 | 1 | } |
897 | 18 | } |
898 | | |
899 | | void TabletIndex::init_from_thrift(const TOlapTableIndex& index, |
900 | 1 | const std::vector<int32_t>& column_uids) { |
901 | 1 | _index_id = index.index_id; |
902 | 1 | _index_name = index.index_name; |
903 | 1 | _col_unique_ids = column_uids; |
904 | | |
905 | 1 | switch (index.index_type) { |
906 | 0 | case TIndexType::BITMAP: |
907 | 0 | _index_type = IndexType::BITMAP; |
908 | 0 | break; |
909 | 1 | case TIndexType::INVERTED: |
910 | 1 | _index_type = IndexType::INVERTED; |
911 | 1 | break; |
912 | 0 | case TIndexType::ANN: |
913 | 0 | _index_type = IndexType::ANN; |
914 | 0 | break; |
915 | 0 | case TIndexType::BLOOMFILTER: |
916 | 0 | _index_type = IndexType::BLOOMFILTER; |
917 | 0 | break; |
918 | 0 | case TIndexType::NGRAM_BF: |
919 | 0 | _index_type = IndexType::NGRAM_BF; |
920 | 0 | break; |
921 | 1 | } |
922 | 1 | if (index.__isset.properties) { |
923 | 3 | for (auto kv : index.properties) { |
924 | 3 | _properties[kv.first] = kv.second; |
925 | 3 | } |
926 | 1 | } |
927 | 1 | } |
928 | | |
929 | 281k | void TabletIndex::init_from_pb(const TabletIndexPB& index) { |
930 | 281k | _index_id = index.index_id(); |
931 | 281k | _index_name = index.index_name(); |
932 | 281k | _col_unique_ids.clear(); |
933 | 281k | for (auto col_unique_id : index.col_unique_id()) { |
934 | 281k | _col_unique_ids.push_back(col_unique_id); |
935 | 281k | } |
936 | 281k | _index_type = index.index_type(); |
937 | 281k | for (const auto& kv : index.properties()) { |
938 | 151k | _properties[kv.first] = kv.second; |
939 | 151k | } |
940 | 281k | _escaped_index_suffix_path = index.index_suffix_name(); |
941 | 281k | } |
942 | | |
943 | 374k | void TabletIndex::to_schema_pb(TabletIndexPB* index) const { |
944 | 374k | index->set_index_id(_index_id); |
945 | 374k | index->set_index_name(_index_name); |
946 | 374k | index->clear_col_unique_id(); |
947 | 374k | for (auto col_unique_id : _col_unique_ids) { |
948 | 374k | index->add_col_unique_id(col_unique_id); |
949 | 374k | } |
950 | 374k | index->set_index_type(_index_type); |
951 | 374k | for (const auto& kv : _properties) { |
952 | 261k | DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { |
953 | 261k | if (kv.first == INVERTED_INDEX_PARSER_LOWERCASE_KEY) { |
954 | 261k | continue; |
955 | 261k | } |
956 | 261k | }) |
957 | 261k | (*index->mutable_properties())[kv.first] = kv.second; |
958 | 261k | } |
959 | 374k | index->set_index_suffix_name(_escaped_index_suffix_path); |
960 | | |
961 | 374k | DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { return; }) |
962 | | |
963 | | // Only add lower_case=true default for built-in analyzers/parsers, NOT for custom analyzers |
964 | | // Custom analyzer: lower_case is determined by analyzer's internal token filter |
965 | 374k | if (!_properties.empty() && !_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) { |
966 | 243 | bool has_parser = _properties.contains(INVERTED_INDEX_PARSER_KEY) || |
967 | 243 | _properties.contains(INVERTED_INDEX_PARSER_KEY_ALIAS); |
968 | 243 | std::string analyzer_name = get_analyzer_name_from_properties(_properties); |
969 | 243 | bool is_builtin = analyzer_name.empty() || |
970 | 243 | segment_v2::inverted_index::InvertedIndexAnalyzer::is_builtin_analyzer( |
971 | 64 | analyzer_name); |
972 | 243 | if (has_parser || is_builtin) { |
973 | 179 | (*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] = |
974 | 179 | INVERTED_INDEX_PARSER_TRUE; |
975 | 179 | } |
976 | 243 | } |
977 | 374k | } |
978 | | |
979 | 854k | TabletSchema::TabletSchema() = default; |
980 | | |
981 | 847k | TabletSchema::~TabletSchema() {} |
982 | | |
983 | 422k | int64_t TabletSchema::get_metadata_size() const { |
984 | 422k | return sizeof(TabletSchema); |
985 | 422k | } |
986 | | |
987 | 839k | void TabletSchema::append_column(TabletColumn column, ColumnType col_type) { |
988 | 839k | if (column.is_key()) { |
989 | 316k | _num_key_columns++; |
990 | 316k | } |
991 | 839k | if (column.is_nullable()) { |
992 | 430k | _num_null_columns++; |
993 | 430k | } |
994 | 839k | if (column.is_variant_type()) { |
995 | 128 | ++_num_variant_columns; |
996 | 128 | if (!column.has_path_info()) { |
997 | 23 | const std::string& col_name = column.name_lower_case(); |
998 | 23 | PathInData path(col_name); |
999 | 23 | column.set_path_info(path); |
1000 | 23 | } |
1001 | 128 | } |
1002 | 839k | if (UNLIKELY(column.name() == DELETE_SIGN)) { |
1003 | 44.2k | _delete_sign_idx = _num_columns; |
1004 | 795k | } else if (UNLIKELY(column.name() == SEQUENCE_COL)) { |
1005 | 4 | _sequence_col_idx = _num_columns; |
1006 | 795k | } else if (UNLIKELY(column.name() == VERSION_COL)) { |
1007 | 44.2k | _version_col_idx = _num_columns; |
1008 | 751k | } else if (UNLIKELY(column.name() == SKIP_BITMAP_COL)) { |
1009 | 0 | _skip_bitmap_col_idx = _num_columns; |
1010 | 751k | } else if (UNLIKELY(column.name().starts_with(BeConsts::VIRTUAL_COLUMN_PREFIX))) { |
1011 | 0 | _vir_col_idx_to_unique_id[_num_columns] = column.unique_id(); |
1012 | 0 | } |
1013 | 839k | _field_uniqueid_to_index[column.unique_id()] = _num_columns; |
1014 | 839k | _cols.push_back(std::make_shared<TabletColumn>(std::move(column))); |
1015 | | // The dropped column may have same name with exsiting column, so that |
1016 | | // not add to name to index map, only for uid to index map |
1017 | 840k | if (col_type == ColumnType::VARIANT || _cols.back()->is_variant_type() || |
1018 | 840k | _cols.back()->is_extracted_column()) { |
1019 | 184 | _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); |
1020 | 184 | _field_path_to_index[_cols.back()->path_info_ptr().get()] = _num_columns; |
1021 | 840k | } else if (col_type == ColumnType::NORMAL) { |
1022 | 840k | _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); |
1023 | 840k | } |
1024 | 839k | _num_columns++; |
1025 | 839k | _num_virtual_columns = _vir_col_idx_to_unique_id.size(); |
1026 | | // generate column index mapping for seq map |
1027 | 839k | if (_seq_col_uid_to_value_cols_uid.contains(column.unique_id())) { |
1028 | 0 | const auto seq_idx = _field_uniqueid_to_index[column.unique_id()]; |
1029 | 0 | if (!_seq_col_idx_to_value_cols_idx.contains(seq_idx)) { |
1030 | 0 | _seq_col_idx_to_value_cols_idx[seq_idx] = {}; |
1031 | 0 | } |
1032 | 0 | } |
1033 | 839k | if (_value_col_uid_to_seq_col_uid.contains(column.unique_id())) { |
1034 | 0 | const auto seq_uid = _value_col_uid_to_seq_col_uid[column.unique_id()]; |
1035 | 0 | if (_field_uniqueid_to_index.contains(seq_uid)) { |
1036 | 0 | bool all_uid_index_found = true; |
1037 | 0 | std::vector<int32_t> value_cols_index; |
1038 | 0 | for (const auto value_col_uid : _seq_col_uid_to_value_cols_uid[seq_uid]) { |
1039 | 0 | if (!_field_uniqueid_to_index.contains(value_col_uid)) { |
1040 | 0 | all_uid_index_found = false; |
1041 | 0 | break; |
1042 | 0 | } |
1043 | 0 | value_cols_index.push_back(_field_uniqueid_to_index[value_col_uid]); |
1044 | 0 | } |
1045 | 0 | if (all_uid_index_found) { |
1046 | 0 | const auto seq_idx = _field_uniqueid_to_index[seq_uid]; |
1047 | 0 | for (const auto col_idx : value_cols_index) { |
1048 | 0 | _seq_col_idx_to_value_cols_idx[seq_idx].push_back(col_idx); |
1049 | 0 | _value_col_idx_to_seq_col_idx[col_idx] = seq_idx; |
1050 | 0 | } |
1051 | 0 | _value_col_idx_to_seq_col_idx[seq_idx] = seq_idx; |
1052 | 0 | } |
1053 | 0 | } |
1054 | 0 | } |
1055 | 839k | } |
1056 | | |
1057 | 73 | void TabletSchema::append_index(TabletIndex&& index) { |
1058 | 73 | size_t index_pos = _indexes.size(); |
1059 | 73 | _indexes.push_back(std::make_shared<TabletIndex>(index)); |
1060 | 76 | for (int32_t id : _indexes.back()->col_unique_ids()) { |
1061 | 76 | if (auto field_pattern = _indexes.back()->field_pattern(); !field_pattern.empty()) { |
1062 | 6 | auto& pattern_to_index_map = _index_by_unique_id_with_pattern[id]; |
1063 | 6 | pattern_to_index_map[field_pattern].emplace_back(_indexes.back()); |
1064 | 70 | } else { |
1065 | 70 | IndexKey key = std::make_tuple(_indexes.back()->index_type(), id, |
1066 | 70 | _indexes.back()->get_index_suffix()); |
1067 | 70 | _col_id_suffix_to_index[key].push_back(index_pos); |
1068 | 70 | } |
1069 | 76 | } |
1070 | 73 | } |
1071 | | |
1072 | 0 | void TabletSchema::replace_column(size_t pos, TabletColumn new_col) { |
1073 | 0 | CHECK_LT(pos, num_columns()) << " outof range"; |
1074 | 0 | _cols[pos] = std::make_shared<TabletColumn>(std::move(new_col)); |
1075 | 0 | } |
1076 | | |
1077 | 3 | void TabletSchema::clear_index() { |
1078 | 3 | _indexes.clear(); |
1079 | 3 | _col_id_suffix_to_index.clear(); |
1080 | 3 | _index_by_unique_id_with_pattern.clear(); |
1081 | 3 | } |
1082 | | |
1083 | 7 | void TabletSchema::remove_index(int64_t index_id) { |
1084 | 7 | std::vector<TabletIndexPtr> new_indexes; |
1085 | 11 | for (auto& index : _indexes) { |
1086 | 11 | if (index->index_id() != index_id) { |
1087 | 4 | new_indexes.emplace_back(std::move(index)); |
1088 | 4 | } |
1089 | 11 | } |
1090 | 7 | _indexes = std::move(new_indexes); |
1091 | 7 | _col_id_suffix_to_index.clear(); |
1092 | 7 | _index_by_unique_id_with_pattern.clear(); |
1093 | 11 | for (size_t new_pos = 0; new_pos < _indexes.size(); ++new_pos) { |
1094 | 4 | const auto& index = _indexes[new_pos]; |
1095 | 4 | for (int32_t col_uid : index->col_unique_ids()) { |
1096 | 4 | if (auto field_pattern = index->field_pattern(); !field_pattern.empty()) { |
1097 | 0 | auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid]; |
1098 | 0 | pattern_to_index_map[field_pattern].emplace_back(index); |
1099 | 4 | } else { |
1100 | 4 | IndexKey key = std::make_tuple(_indexes.back()->index_type(), col_uid, |
1101 | 4 | _indexes.back()->get_index_suffix()); |
1102 | 4 | _col_id_suffix_to_index[key].push_back(new_pos); |
1103 | 4 | } |
1104 | 4 | } |
1105 | 4 | } |
1106 | 7 | } |
1107 | | |
1108 | 51.2k | void TabletSchema::clear_columns() { |
1109 | 51.2k | _field_path_to_index.clear(); |
1110 | 51.2k | _field_name_to_index.clear(); |
1111 | 51.2k | _field_uniqueid_to_index.clear(); |
1112 | 51.2k | _num_columns = 0; |
1113 | 51.2k | _num_variant_columns = 0; |
1114 | 51.2k | _num_null_columns = 0; |
1115 | 51.2k | _num_key_columns = 0; |
1116 | 51.2k | _seq_col_idx_to_value_cols_idx.clear(); |
1117 | 51.2k | _value_col_idx_to_seq_col_idx.clear(); |
1118 | 51.2k | _cols.clear(); |
1119 | 51.2k | } |
1120 | | |
1121 | | void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns, |
1122 | 407k | bool reuse_cache_column) { |
1123 | 407k | _keys_type = schema.keys_type(); |
1124 | 407k | _num_columns = 0; |
1125 | 407k | _num_variant_columns = 0; |
1126 | 407k | _num_key_columns = 0; |
1127 | 407k | _num_null_columns = 0; |
1128 | 407k | _cols.clear(); |
1129 | 407k | _indexes.clear(); |
1130 | 407k | _index_by_unique_id_with_pattern.clear(); |
1131 | 407k | _col_id_suffix_to_index.clear(); |
1132 | 407k | _field_name_to_index.clear(); |
1133 | 407k | _field_uniqueid_to_index.clear(); |
1134 | 407k | _cluster_key_uids.clear(); |
1135 | 407k | for (const auto& i : schema.cluster_key_uids()) { |
1136 | 4.23k | _cluster_key_uids.push_back(i); |
1137 | 4.23k | } |
1138 | 4.23M | for (auto& column_pb : schema.column()) { |
1139 | 4.23M | TabletColumnPtr column; |
1140 | 4.23M | if (reuse_cache_column) { |
1141 | 80.5k | auto pair = TabletColumnObjectPool::instance()->insert( |
1142 | 80.5k | deterministic_string_serialize(column_pb)); |
1143 | 80.5k | column = pair.second; |
1144 | | // Release the handle quickly, because we use shared ptr to manage column. |
1145 | | // It often core during tablet schema copy to another schema because handle's |
1146 | | // reference count should be managed mannually. |
1147 | 80.5k | TabletColumnObjectPool::instance()->release(pair.first); |
1148 | 4.15M | } else { |
1149 | 4.15M | column = std::make_shared<TabletColumn>(); |
1150 | 4.15M | column->init_from_pb(column_pb); |
1151 | 4.15M | } |
1152 | 4.23M | if (ignore_extracted_columns && column->is_extracted_column()) { |
1153 | 0 | continue; |
1154 | 0 | } |
1155 | 4.23M | if (column->is_key()) { |
1156 | 848k | _num_key_columns++; |
1157 | 848k | } |
1158 | 4.23M | if (column->is_nullable()) { |
1159 | 2.85M | _num_null_columns++; |
1160 | 2.85M | } |
1161 | 4.23M | if (column->is_variant_type()) { |
1162 | 9.33k | ++_num_variant_columns; |
1163 | 9.33k | } |
1164 | | |
1165 | 4.23M | _cols.emplace_back(std::move(column)); |
1166 | 4.23M | if (!_cols.back()->is_extracted_column()) { |
1167 | 4.23M | _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); |
1168 | 4.23M | _field_uniqueid_to_index[_cols.back()->unique_id()] = _num_columns; |
1169 | 4.23M | } |
1170 | 4.23M | _num_columns++; |
1171 | 4.23M | } |
1172 | 407k | for (const auto& index_pb : schema.index()) { |
1173 | 283k | TabletIndexPtr index; |
1174 | 283k | if (reuse_cache_column) { |
1175 | 6.00k | auto pair = TabletColumnObjectPool::instance()->insert_index( |
1176 | 6.00k | deterministic_string_serialize(index_pb)); |
1177 | 6.00k | index = pair.second; |
1178 | | // Only need the value to be cached by the pool, release it quickly because the handle need |
1179 | | // record reference count mannually, or it will core during tablet schema copy method. |
1180 | 6.00k | TabletColumnObjectPool::instance()->release(pair.first); |
1181 | 277k | } else { |
1182 | 277k | index = std::make_shared<TabletIndex>(); |
1183 | 277k | index->init_from_pb(index_pb); |
1184 | 277k | } |
1185 | 283k | size_t index_pos = _indexes.size(); |
1186 | 283k | _indexes.emplace_back(std::move(index)); |
1187 | 283k | for (int32_t col_uid : _indexes.back()->col_unique_ids()) { |
1188 | 283k | if (auto field_pattern = _indexes.back()->field_pattern(); !field_pattern.empty()) { |
1189 | 3.13k | auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid]; |
1190 | 3.13k | pattern_to_index_map[field_pattern].emplace_back(_indexes.back()); |
1191 | 279k | } else { |
1192 | 279k | IndexKey key = std::make_tuple(_indexes.back()->index_type(), col_uid, |
1193 | 279k | _indexes.back()->get_index_suffix()); |
1194 | 279k | _col_id_suffix_to_index[key].push_back(index_pos); |
1195 | 279k | } |
1196 | 283k | } |
1197 | 283k | } |
1198 | 407k | _num_short_key_columns = schema.num_short_key_columns(); |
1199 | 407k | _num_rows_per_row_block = schema.num_rows_per_row_block(); |
1200 | 407k | _compress_kind = schema.compress_kind(); |
1201 | 407k | _next_column_unique_id = schema.next_column_unique_id(); |
1202 | 407k | if (schema.has_bf_fpp()) { |
1203 | 7.88k | _has_bf_fpp = true; |
1204 | 7.88k | _bf_fpp = schema.bf_fpp(); |
1205 | 399k | } else { |
1206 | 399k | _has_bf_fpp = false; |
1207 | 399k | _bf_fpp = BLOOM_FILTER_DEFAULT_FPP; |
1208 | 399k | } |
1209 | 407k | _is_in_memory = schema.is_in_memory(); |
1210 | 407k | _disable_auto_compaction = schema.disable_auto_compaction(); |
1211 | 407k | _enable_single_replica_compaction = schema.enable_single_replica_compaction(); |
1212 | 407k | _store_row_column = schema.store_row_column(); |
1213 | 407k | _skip_write_index_on_load = schema.skip_write_index_on_load(); |
1214 | 407k | _delete_sign_idx = schema.delete_sign_idx(); |
1215 | 407k | _sequence_col_idx = schema.sequence_col_idx(); |
1216 | 407k | _version_col_idx = schema.version_col_idx(); |
1217 | 407k | _skip_bitmap_col_idx = schema.skip_bitmap_col_idx(); |
1218 | 407k | _sort_type = schema.sort_type(); |
1219 | 407k | _sort_col_num = schema.sort_col_num(); |
1220 | 407k | _compression_type = schema.compression_type(); |
1221 | 407k | _row_store_page_size = schema.row_store_page_size(); |
1222 | 407k | _storage_page_size = schema.storage_page_size(); |
1223 | 407k | _storage_dict_page_size = schema.storage_dict_page_size(); |
1224 | 407k | _schema_version = schema.schema_version(); |
1225 | 407k | if (schema.has_seq_map()) { |
1226 | 407k | auto column_groups_pb = schema.seq_map(); |
1227 | 407k | _seq_col_uid_to_value_cols_uid.clear(); |
1228 | 407k | _value_col_uid_to_seq_col_uid.clear(); |
1229 | 407k | _seq_col_idx_to_value_cols_idx.clear(); |
1230 | 407k | _value_col_idx_to_seq_col_idx.clear(); |
1231 | | /* |
1232 | | * ColumnGroupsPB is a list of cg_pb, and |
1233 | | * ColumnGroupsPB do not have begin() or end() method. |
1234 | | * we must use for(i=0;i<xx;i++) loop |
1235 | | */ |
1236 | 407k | for (int i = 0; i < column_groups_pb.cg_size(); i++) { |
1237 | 24 | ColumnGroupPB cg_pb = column_groups_pb.cg(i); |
1238 | 24 | uint32_t key_uid = cg_pb.sequence_column(); |
1239 | 24 | auto found = _field_uniqueid_to_index.find(key_uid); |
1240 | 24 | DCHECK(found != _field_uniqueid_to_index.end()) |
1241 | 0 | << "could not find sequence col with unique id = " << key_uid |
1242 | 0 | << " table_id=" << _table_id; |
1243 | 24 | int32_t seq_index = found->second; |
1244 | 24 | _seq_col_uid_to_value_cols_uid[key_uid] = {}; |
1245 | 24 | _seq_col_idx_to_value_cols_idx[seq_index] = {}; |
1246 | 38 | for (auto val_uid : cg_pb.columns_in_group()) { |
1247 | 38 | _seq_col_uid_to_value_cols_uid[key_uid].push_back(val_uid); |
1248 | 38 | found = _field_uniqueid_to_index.find(val_uid); |
1249 | 38 | DCHECK(found != _field_uniqueid_to_index.end()) |
1250 | 0 | << "could not find value col with unique id = " << key_uid |
1251 | 0 | << " table_id=" << _table_id; |
1252 | 38 | int32_t val_index = found->second; |
1253 | 38 | _seq_col_idx_to_value_cols_idx[seq_index].push_back(val_index); |
1254 | 38 | } |
1255 | 24 | } |
1256 | | |
1257 | 407k | if (!_seq_col_uid_to_value_cols_uid.empty()) { |
1258 | | /* |
1259 | | |** KEY **| ** VALUE ** | |
1260 | | ------------------------------------ |
1261 | | |** KEY **| CDE is value| sequence| |
1262 | | |----|----|----|----|----|----|----| |
1263 | | A B C D E S1 S2 |
1264 | | 0 1 2 3 4 5 6 |
1265 | | for example: _seq_map is {5:{2,3}, 6:{4}} |
1266 | | then, _value_to_seq = {2:5,3:5,5:5,4:6,6:6} |
1267 | | */ |
1268 | 24 | for (auto& [seq_uid, cols_uid] : _seq_col_uid_to_value_cols_uid) { |
1269 | 38 | for (auto col_uid : cols_uid) { |
1270 | 38 | _value_col_uid_to_seq_col_uid[col_uid] = seq_uid; |
1271 | 38 | } |
1272 | 24 | _value_col_uid_to_seq_col_uid[seq_uid] = seq_uid; |
1273 | 24 | } |
1274 | | |
1275 | 24 | for (auto& [seq_idx, value_cols_idx] : _seq_col_idx_to_value_cols_idx) { |
1276 | 38 | for (auto col_idx : value_cols_idx) { |
1277 | 38 | _value_col_idx_to_seq_col_idx[col_idx] = seq_idx; |
1278 | 38 | } |
1279 | 24 | _value_col_idx_to_seq_col_idx[seq_idx] = seq_idx; |
1280 | 24 | } |
1281 | 14 | } |
1282 | 407k | } |
1283 | | // Default to V1 inverted index storage format for backward compatibility if not specified in schema. |
1284 | 407k | if (!schema.has_inverted_index_storage_format()) { |
1285 | 291 | _inverted_index_storage_format = InvertedIndexStorageFormatPB::V1; |
1286 | 407k | } else { |
1287 | 407k | _inverted_index_storage_format = schema.inverted_index_storage_format(); |
1288 | 407k | } |
1289 | | |
1290 | 407k | _row_store_column_unique_ids.assign(schema.row_store_column_unique_ids().begin(), |
1291 | 407k | schema.row_store_column_unique_ids().end()); |
1292 | 407k | _enable_variant_flatten_nested = schema.enable_variant_flatten_nested(); |
1293 | 407k | if (schema.has_is_external_segment_column_meta_used()) { |
1294 | 400k | _is_external_segment_column_meta_used = schema.is_external_segment_column_meta_used(); |
1295 | 400k | } else { |
1296 | 7.03k | _is_external_segment_column_meta_used = false; |
1297 | 7.03k | } |
1298 | 407k | if (schema.has_integer_type_default_use_plain_encoding()) { |
1299 | 400k | _integer_type_default_use_plain_encoding = schema.integer_type_default_use_plain_encoding(); |
1300 | 400k | } |
1301 | 407k | if (schema.has_binary_plain_encoding_default_impl()) { |
1302 | 400k | _binary_plain_encoding_default_impl = schema.binary_plain_encoding_default_impl(); |
1303 | 400k | } |
1304 | 407k | update_metadata_size(); |
1305 | 407k | } |
1306 | | |
1307 | 11.0k | void TabletSchema::copy_from(const TabletSchema& tablet_schema) { |
1308 | 11.0k | TabletSchemaPB tablet_schema_pb; |
1309 | 11.0k | tablet_schema.to_schema_pb(&tablet_schema_pb); |
1310 | 11.0k | init_from_pb(tablet_schema_pb); |
1311 | 11.0k | _table_id = tablet_schema.table_id(); |
1312 | 11.0k | _path_set_info_map = tablet_schema._path_set_info_map; |
1313 | 11.0k | } |
1314 | | |
1315 | 15.3k | void TabletSchema::shawdow_copy_without_columns(const TabletSchema& tablet_schema) { |
1316 | 15.3k | *this = tablet_schema; |
1317 | 15.3k | _field_path_to_index.clear(); |
1318 | 15.3k | _field_name_to_index.clear(); |
1319 | 15.3k | _field_uniqueid_to_index.clear(); |
1320 | 15.3k | _num_columns = 0; |
1321 | 15.3k | _num_variant_columns = 0; |
1322 | 15.3k | _num_null_columns = 0; |
1323 | 15.3k | _num_key_columns = 0; |
1324 | 15.3k | _cols.clear(); |
1325 | 15.3k | _delete_sign_idx = -1; |
1326 | 15.3k | _sequence_col_idx = -1; |
1327 | 15.3k | _version_col_idx = -1; |
1328 | 15.3k | } |
1329 | | |
1330 | 0 | void TabletSchema::update_index_info_from(const TabletSchema& tablet_schema) { |
1331 | 0 | for (auto& col : _cols) { |
1332 | 0 | if (col->unique_id() < 0) { |
1333 | 0 | continue; |
1334 | 0 | } |
1335 | 0 | const auto iter = tablet_schema._field_uniqueid_to_index.find(col->unique_id()); |
1336 | 0 | if (iter == tablet_schema._field_uniqueid_to_index.end()) { |
1337 | 0 | continue; |
1338 | 0 | } |
1339 | 0 | auto col_idx = iter->second; |
1340 | 0 | if (col_idx < 0 || col_idx >= tablet_schema._cols.size()) { |
1341 | 0 | continue; |
1342 | 0 | } |
1343 | 0 | col->set_is_bf_column(tablet_schema._cols[col_idx]->is_bf_column()); |
1344 | 0 | } |
1345 | 0 | } |
1346 | | |
1347 | 432k | std::string TabletSchema::to_key() const { |
1348 | 432k | TabletSchemaPB pb; |
1349 | 432k | to_schema_pb(&pb); |
1350 | 432k | return TabletSchema::deterministic_string_serialize(pb); |
1351 | 432k | } |
1352 | | |
1353 | | void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version, |
1354 | | const OlapTableIndexSchema* index, |
1355 | 17.7k | const TabletSchema& ori_tablet_schema) { |
1356 | | // copy from ori_tablet_schema |
1357 | 17.7k | _keys_type = ori_tablet_schema.keys_type(); |
1358 | 17.7k | _num_short_key_columns = ori_tablet_schema.num_short_key_columns(); |
1359 | 17.7k | _num_rows_per_row_block = ori_tablet_schema.num_rows_per_row_block(); |
1360 | 17.7k | _compress_kind = ori_tablet_schema.compress_kind(); |
1361 | | |
1362 | | // todo(yixiu): unique_id |
1363 | 17.7k | _next_column_unique_id = ori_tablet_schema.next_column_unique_id(); |
1364 | 17.7k | _is_in_memory = ori_tablet_schema.is_in_memory(); |
1365 | 17.7k | _disable_auto_compaction = ori_tablet_schema.disable_auto_compaction(); |
1366 | 17.7k | _enable_single_replica_compaction = ori_tablet_schema.enable_single_replica_compaction(); |
1367 | 17.7k | _skip_write_index_on_load = ori_tablet_schema.skip_write_index_on_load(); |
1368 | 17.7k | _sort_type = ori_tablet_schema.sort_type(); |
1369 | 17.7k | _sort_col_num = ori_tablet_schema.sort_col_num(); |
1370 | 17.7k | _row_store_page_size = ori_tablet_schema.row_store_page_size(); |
1371 | 17.7k | _storage_page_size = ori_tablet_schema.storage_page_size(); |
1372 | 17.7k | _storage_dict_page_size = ori_tablet_schema.storage_dict_page_size(); |
1373 | 17.7k | _enable_variant_flatten_nested = ori_tablet_schema.variant_flatten_nested(); |
1374 | | |
1375 | | // copy from table_schema_param |
1376 | 17.7k | _schema_version = version; |
1377 | 17.7k | _num_columns = 0; |
1378 | 17.7k | _num_variant_columns = 0; |
1379 | 17.7k | _num_key_columns = 0; |
1380 | 17.7k | _num_null_columns = 0; |
1381 | 17.7k | bool has_bf_columns = false; |
1382 | 17.7k | _cols.clear(); |
1383 | 17.7k | _indexes.clear(); |
1384 | 17.7k | _col_id_suffix_to_index.clear(); |
1385 | 17.7k | _index_by_unique_id_with_pattern.clear(); |
1386 | 17.7k | _field_name_to_index.clear(); |
1387 | 17.7k | _field_uniqueid_to_index.clear(); |
1388 | 17.7k | _delete_sign_idx = -1; |
1389 | 17.7k | _sequence_col_idx = -1; |
1390 | 17.7k | _version_col_idx = -1; |
1391 | 17.7k | _skip_bitmap_col_idx = -1; |
1392 | 17.7k | _cluster_key_uids.clear(); |
1393 | 17.7k | for (const auto& i : ori_tablet_schema._cluster_key_uids) { |
1394 | 0 | _cluster_key_uids.push_back(i); |
1395 | 0 | } |
1396 | 158k | for (auto& column : index->columns) { |
1397 | 158k | if (column->is_key()) { |
1398 | 56.7k | _num_key_columns++; |
1399 | 56.7k | } |
1400 | 158k | if (column->is_nullable()) { |
1401 | 101k | _num_null_columns++; |
1402 | 101k | } |
1403 | 158k | if (column->is_bf_column()) { |
1404 | 0 | has_bf_columns = true; |
1405 | 0 | } |
1406 | 158k | if (column->is_variant_type()) { |
1407 | 12 | ++_num_variant_columns; |
1408 | 12 | } |
1409 | 158k | if (UNLIKELY(column->name() == DELETE_SIGN)) { |
1410 | 6.32k | _delete_sign_idx = _num_columns; |
1411 | 152k | } else if (UNLIKELY(column->name() == SEQUENCE_COL)) { |
1412 | 0 | _sequence_col_idx = _num_columns; |
1413 | 152k | } else if (UNLIKELY(column->name() == VERSION_COL)) { |
1414 | 6.32k | _version_col_idx = _num_columns; |
1415 | 146k | } else if (UNLIKELY(column->name() == SKIP_BITMAP_COL)) { |
1416 | 0 | _skip_bitmap_col_idx = _num_columns; |
1417 | 0 | } |
1418 | | // Reuse TabletColumn object from pool to reduce memory consumption |
1419 | 158k | TabletColumnPtr new_column; |
1420 | 158k | ColumnPB column_pb; |
1421 | 158k | column->to_schema_pb(&column_pb); |
1422 | 158k | auto pair = TabletColumnObjectPool::instance()->insert( |
1423 | 158k | deterministic_string_serialize(column_pb)); |
1424 | 158k | new_column = pair.second; |
1425 | | // Release the handle quickly, because we use shared ptr to manage column |
1426 | 158k | TabletColumnObjectPool::instance()->release(pair.first); |
1427 | 158k | _cols.emplace_back(std::move(new_column)); |
1428 | 158k | _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); |
1429 | 158k | _field_uniqueid_to_index[_cols.back()->unique_id()] = _num_columns; |
1430 | 158k | _num_columns++; |
1431 | 158k | } |
1432 | | |
1433 | 17.7k | for (const auto& i : index->indexes) { |
1434 | 0 | size_t index_pos = _indexes.size(); |
1435 | | // Reuse TabletIndex object from pool to reduce memory consumption |
1436 | 0 | TabletIndexPtr new_index; |
1437 | 0 | TabletIndexPB index_pb; |
1438 | 0 | i->to_schema_pb(&index_pb); |
1439 | 0 | auto pair = TabletColumnObjectPool::instance()->insert_index( |
1440 | 0 | deterministic_string_serialize(index_pb)); |
1441 | 0 | new_index = pair.second; |
1442 | | // Release the handle quickly, because we use shared ptr to manage index |
1443 | 0 | TabletColumnObjectPool::instance()->release(pair.first); |
1444 | 0 | _indexes.emplace_back(std::move(new_index)); |
1445 | 0 | for (int32_t col_uid : _indexes.back()->col_unique_ids()) { |
1446 | 0 | if (auto field_pattern = _indexes.back()->field_pattern(); !field_pattern.empty()) { |
1447 | 0 | auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid]; |
1448 | 0 | pattern_to_index_map[field_pattern].emplace_back(_indexes.back()); |
1449 | 0 | } else { |
1450 | 0 | IndexKey key = std::make_tuple(_indexes.back()->index_type(), col_uid, |
1451 | 0 | _indexes.back()->get_index_suffix()); |
1452 | 0 | _col_id_suffix_to_index[key].push_back(index_pos); |
1453 | 0 | } |
1454 | 0 | } |
1455 | 0 | } |
1456 | | |
1457 | 17.7k | if (has_bf_columns) { |
1458 | 0 | _has_bf_fpp = true; |
1459 | 0 | _bf_fpp = ori_tablet_schema.bloom_filter_fpp(); |
1460 | 17.7k | } else { |
1461 | 17.7k | _has_bf_fpp = false; |
1462 | 17.7k | _bf_fpp = BLOOM_FILTER_DEFAULT_FPP; |
1463 | 17.7k | } |
1464 | 17.7k | } |
1465 | | |
1466 | 179 | void TabletSchema::merge_dropped_columns(const TabletSchema& src_schema) { |
1467 | | // If they are the same tablet schema object, then just return |
1468 | 179 | if (this == &src_schema) { |
1469 | 0 | return; |
1470 | 0 | } |
1471 | 2.99k | for (const auto& src_col : src_schema.columns()) { |
1472 | 2.99k | if (_field_uniqueid_to_index.find(src_col->unique_id()) == _field_uniqueid_to_index.end()) { |
1473 | 0 | CHECK(!src_col->is_key()) |
1474 | 0 | << src_col->name() << " is key column, should not be dropped."; |
1475 | 0 | ColumnPB src_col_pb; |
1476 | | // There are some pointer in tablet column, not sure the reference relation, so |
1477 | | // that deep copy it. |
1478 | 0 | src_col->to_schema_pb(&src_col_pb); |
1479 | 0 | TabletColumn new_col(src_col_pb); |
1480 | 0 | append_column(new_col, TabletSchema::ColumnType::DROPPED); |
1481 | 0 | } |
1482 | 2.99k | } |
1483 | 179 | } |
1484 | | |
1485 | 232 | TabletSchemaSPtr TabletSchema::copy_without_variant_extracted_columns() { |
1486 | 232 | TabletSchemaSPtr copy = std::make_shared<TabletSchema>(); |
1487 | 232 | copy->shawdow_copy_without_columns(*this); |
1488 | 3.23k | for (auto& col : this->columns()) { |
1489 | 3.23k | if (col->is_extracted_column()) { |
1490 | 0 | continue; |
1491 | 0 | } |
1492 | 3.23k | copy->append_column(*col); |
1493 | 3.23k | } |
1494 | 232 | return copy; |
1495 | 232 | } |
1496 | | |
1497 | | // Dropped column is in _field_uniqueid_to_index but not in _field_name_to_index |
1498 | | // Could refer to append_column method |
1499 | 32.9k | bool TabletSchema::is_dropped_column(const TabletColumn& col) const { |
1500 | 18.4E | CHECK(_field_uniqueid_to_index.find(col.unique_id()) != _field_uniqueid_to_index.end()) |
1501 | 18.4E | << "could not find col with unique id = " << col.unique_id() |
1502 | 18.4E | << " and name = " << col.name() << " table_id=" << _table_id; |
1503 | 32.9k | auto it = _field_name_to_index.find(StringRef {col.name()}); |
1504 | 33.0k | return it == _field_name_to_index.end() || _cols[it->second]->unique_id() != col.unique_id(); |
1505 | 32.9k | } |
1506 | | |
1507 | 0 | void TabletSchema::copy_extracted_columns(const TabletSchema& src_schema) { |
1508 | 0 | std::unordered_set<int32_t> variant_columns; |
1509 | 0 | for (const auto& col : columns()) { |
1510 | 0 | if (col->is_variant_type()) { |
1511 | 0 | variant_columns.insert(col->unique_id()); |
1512 | 0 | } |
1513 | 0 | } |
1514 | 0 | for (const TabletColumnPtr& col : src_schema.columns()) { |
1515 | 0 | if (col->is_extracted_column() && variant_columns.contains(col->parent_unique_id())) { |
1516 | 0 | ColumnPB col_pb; |
1517 | 0 | col->to_schema_pb(&col_pb); |
1518 | 0 | TabletColumn new_col(col_pb); |
1519 | 0 | append_column(new_col, ColumnType::VARIANT); |
1520 | 0 | } |
1521 | 0 | } |
1522 | 0 | } |
1523 | | |
1524 | 0 | void TabletSchema::reserve_extracted_columns() { |
1525 | 0 | for (auto it = _cols.begin(); it != _cols.end();) { |
1526 | 0 | if (!(*it)->is_extracted_column()) { |
1527 | 0 | it = _cols.erase(it); |
1528 | 0 | } else { |
1529 | 0 | ++it; |
1530 | 0 | } |
1531 | 0 | } |
1532 | 0 | } |
1533 | | |
1534 | 727k | void TabletSchema::to_schema_pb(TabletSchemaPB* tablet_schema_pb) const { |
1535 | 727k | for (const auto& i : _cluster_key_uids) { |
1536 | 4.01k | tablet_schema_pb->add_cluster_key_uids(i); |
1537 | 4.01k | } |
1538 | 727k | tablet_schema_pb->set_keys_type(_keys_type); |
1539 | 7.62M | for (const auto& col : _cols) { |
1540 | 7.62M | ColumnPB* column = tablet_schema_pb->add_column(); |
1541 | 7.62M | col->to_schema_pb(column); |
1542 | 7.62M | } |
1543 | 727k | for (const auto& index : _indexes) { |
1544 | 374k | auto* index_pb = tablet_schema_pb->add_index(); |
1545 | 374k | index->to_schema_pb(index_pb); |
1546 | 374k | } |
1547 | 727k | tablet_schema_pb->set_num_short_key_columns(cast_set<int32_t>(_num_short_key_columns)); |
1548 | 727k | tablet_schema_pb->set_num_rows_per_row_block(cast_set<int32_t>(_num_rows_per_row_block)); |
1549 | 727k | tablet_schema_pb->set_compress_kind(_compress_kind); |
1550 | 727k | if (_has_bf_fpp) { |
1551 | 16.1k | tablet_schema_pb->set_bf_fpp(_bf_fpp); |
1552 | 16.1k | } |
1553 | 727k | tablet_schema_pb->set_next_column_unique_id(cast_set<uint32_t>(_next_column_unique_id)); |
1554 | 727k | tablet_schema_pb->set_is_in_memory(_is_in_memory); |
1555 | 727k | tablet_schema_pb->set_disable_auto_compaction(_disable_auto_compaction); |
1556 | 727k | tablet_schema_pb->set_enable_single_replica_compaction(_enable_single_replica_compaction); |
1557 | 727k | tablet_schema_pb->set_store_row_column(_store_row_column); |
1558 | 727k | tablet_schema_pb->set_skip_write_index_on_load(_skip_write_index_on_load); |
1559 | 727k | tablet_schema_pb->set_delete_sign_idx(_delete_sign_idx); |
1560 | 727k | tablet_schema_pb->set_sequence_col_idx(_sequence_col_idx); |
1561 | 727k | tablet_schema_pb->set_sort_type(_sort_type); |
1562 | 727k | tablet_schema_pb->set_sort_col_num(cast_set<int32_t>(_sort_col_num)); |
1563 | 727k | tablet_schema_pb->set_schema_version(_schema_version); |
1564 | 727k | tablet_schema_pb->set_compression_type(_compression_type); |
1565 | 727k | tablet_schema_pb->set_row_store_page_size(_row_store_page_size); |
1566 | 727k | tablet_schema_pb->set_storage_page_size(_storage_page_size); |
1567 | 727k | tablet_schema_pb->set_storage_dict_page_size(_storage_dict_page_size); |
1568 | 727k | tablet_schema_pb->set_version_col_idx(_version_col_idx); |
1569 | 727k | tablet_schema_pb->set_skip_bitmap_col_idx(_skip_bitmap_col_idx); |
1570 | 727k | tablet_schema_pb->set_inverted_index_storage_format(_inverted_index_storage_format); |
1571 | 727k | tablet_schema_pb->mutable_row_store_column_unique_ids()->Assign( |
1572 | 727k | _row_store_column_unique_ids.begin(), _row_store_column_unique_ids.end()); |
1573 | 727k | tablet_schema_pb->set_enable_variant_flatten_nested(_enable_variant_flatten_nested); |
1574 | 727k | tablet_schema_pb->set_is_external_segment_column_meta_used( |
1575 | 727k | _is_external_segment_column_meta_used); |
1576 | 727k | tablet_schema_pb->set_integer_type_default_use_plain_encoding( |
1577 | 727k | _integer_type_default_use_plain_encoding); |
1578 | 727k | tablet_schema_pb->set_binary_plain_encoding_default_impl(_binary_plain_encoding_default_impl); |
1579 | 727k | auto column_groups_pb = tablet_schema_pb->mutable_seq_map(); |
1580 | 727k | for (const auto& it : _seq_col_uid_to_value_cols_uid) { |
1581 | 10 | uint32_t key = it.first; |
1582 | 10 | ColumnGroupPB* cg_pb = column_groups_pb->add_cg(); // ColumnGroupPB {key: {v1, v2, v3}} |
1583 | 10 | cg_pb->set_sequence_column(key); |
1584 | 16 | for (auto v : it.second) { |
1585 | 16 | cg_pb->add_columns_in_group(v); |
1586 | 16 | } |
1587 | 10 | } |
1588 | 727k | } |
1589 | | |
1590 | 0 | size_t TabletSchema::row_size() const { |
1591 | 0 | size_t size = 0; |
1592 | 0 | for (const auto& column : _cols) { |
1593 | 0 | size += column->length(); |
1594 | 0 | } |
1595 | 0 | size += (_num_columns + 7) / 8; |
1596 | |
|
1597 | 0 | return size; |
1598 | 0 | } |
1599 | | |
1600 | 3.85M | int32_t TabletSchema::field_index(const std::string& field_name) const { |
1601 | 3.85M | const auto& found = _field_name_to_index.find(StringRef(field_name)); |
1602 | 3.85M | return (found == _field_name_to_index.end()) ? -1 : found->second; |
1603 | 3.85M | } |
1604 | | |
1605 | 61 | int32_t TabletSchema::field_index(const PathInData& path) const { |
1606 | 61 | const auto& found = _field_path_to_index.find(PathInDataRef(&path)); |
1607 | 61 | return (found == _field_path_to_index.end()) ? -1 : found->second; |
1608 | 61 | } |
1609 | | |
1610 | 5.21M | int32_t TabletSchema::field_index(int32_t col_unique_id) const { |
1611 | 5.21M | const auto& found = _field_uniqueid_to_index.find(col_unique_id); |
1612 | 5.21M | return (found == _field_uniqueid_to_index.end()) ? -1 : found->second; |
1613 | 5.21M | } |
1614 | | |
1615 | 1.51M | const std::vector<TabletColumnPtr>& TabletSchema::columns() const { |
1616 | 1.51M | return _cols; |
1617 | 1.51M | } |
1618 | | |
1619 | 44.0M | const TabletColumn& TabletSchema::column(size_t ordinal) const { |
1620 | 18.4E | DCHECK(ordinal < _num_columns) << "ordinal:" << ordinal << ", _num_columns:" << _num_columns; |
1621 | 44.0M | return *_cols[ordinal]; |
1622 | 44.0M | } |
1623 | | |
1624 | 493k | const TabletColumn& TabletSchema::column_by_uid(int32_t col_unique_id) const { |
1625 | 493k | return *_cols.at(_field_uniqueid_to_index.at(col_unique_id)); |
1626 | 493k | } |
1627 | | |
1628 | 1 | TabletColumn& TabletSchema::mutable_column_by_uid(int32_t col_unique_id) { |
1629 | 1 | return *_cols.at(_field_uniqueid_to_index.at(col_unique_id)); |
1630 | 1 | } |
1631 | | |
1632 | 96 | TabletColumn& TabletSchema::mutable_column(size_t ordinal) { |
1633 | 96 | return *_cols.at(ordinal); |
1634 | 96 | } |
1635 | | |
1636 | 51.2k | void TabletSchema::update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>& tindexes) { |
1637 | 51.2k | std::vector<TabletIndexPtr> indexes; |
1638 | 51.2k | for (const auto& tindex : tindexes) { |
1639 | 2 | TabletIndex index; |
1640 | 2 | index.init_from_thrift(tindex, *this); |
1641 | 2 | indexes.emplace_back(std::make_shared<TabletIndex>(std::move(index))); |
1642 | 2 | } |
1643 | 51.2k | _indexes = std::move(indexes); |
1644 | 51.2k | _col_id_suffix_to_index.clear(); |
1645 | 51.2k | _index_by_unique_id_with_pattern.clear(); |
1646 | 51.2k | size_t index_pos = 0; |
1647 | 51.2k | for (auto& index : _indexes) { |
1648 | 2 | for (int32_t col_uid : index->col_unique_ids()) { |
1649 | 2 | if (auto field_pattern = index->field_pattern(); !field_pattern.empty()) { |
1650 | 0 | auto& pattern_to_index_map = _index_by_unique_id_with_pattern[col_uid]; |
1651 | 0 | pattern_to_index_map[field_pattern].emplace_back(index); |
1652 | 2 | } else { |
1653 | 2 | IndexKey key = |
1654 | 2 | std::make_tuple(index->index_type(), col_uid, index->get_index_suffix()); |
1655 | 2 | _col_id_suffix_to_index[key].push_back(index_pos); |
1656 | 2 | } |
1657 | 2 | } |
1658 | 2 | index_pos++; |
1659 | 2 | } |
1660 | 51.2k | } |
1661 | | |
1662 | 0 | bool TabletSchema::exist_column(const std::string& field_name) const { |
1663 | 0 | return _field_name_to_index.contains(StringRef {field_name}); |
1664 | 0 | } |
1665 | | |
1666 | 8.50M | bool TabletSchema::has_column_unique_id(int32_t col_unique_id) const { |
1667 | 8.50M | return _field_uniqueid_to_index.contains(col_unique_id); |
1668 | 8.50M | } |
1669 | | |
1670 | 4.04k | Status TabletSchema::have_column(const std::string& field_name) const { |
1671 | 4.04k | if (!_field_name_to_index.contains(StringRef(field_name))) { |
1672 | 4.04k | return Status::Error<ErrorCode::INTERNAL_ERROR>( |
1673 | 4.04k | "Not found field_name, field_name:{}, schema:{}", field_name, |
1674 | 4.04k | get_all_field_names()); |
1675 | 4.04k | } |
1676 | 2 | return Status::OK(); |
1677 | 4.04k | } |
1678 | | |
1679 | 183 | Result<const TabletColumn*> TabletSchema::column(const std::string& field_name) const { |
1680 | 183 | auto it = _field_name_to_index.find(StringRef {field_name}); |
1681 | 183 | if (it == _field_name_to_index.end()) { |
1682 | 0 | DCHECK(false) << "field_name=" << field_name << ", table_id=" << _table_id |
1683 | 0 | << ", field_name_to_index=" << get_all_field_names(); |
1684 | 0 | return ResultError( |
1685 | 0 | Status::InternalError("column not found, name={}, table_id={}, schema_version={}", |
1686 | 0 | field_name, _table_id, _schema_version)); |
1687 | 0 | } |
1688 | 183 | return _cols[it->second].get(); |
1689 | 183 | } |
1690 | | |
1691 | | void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema, |
1692 | 0 | const std::vector<TColumn>& t_columns) { |
1693 | 0 | copy_from(tablet_schema); |
1694 | 0 | if (!t_columns.empty() && t_columns[0].col_unique_id >= 0) { |
1695 | 0 | clear_columns(); |
1696 | 0 | for (const auto& column : t_columns) { |
1697 | 0 | append_column(TabletColumn(column)); |
1698 | 0 | } |
1699 | 0 | } |
1700 | 0 | } |
1701 | | |
1702 | 67 | bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const { |
1703 | 86 | for (size_t i = 0; i < _indexes.size(); i++) { |
1704 | 48 | if ((_indexes[i]->index_type() == IndexType::INVERTED || |
1705 | 48 | _indexes[i]->index_type() == IndexType::ANN) && |
1706 | 48 | _indexes[i]->index_id() == index_id) { |
1707 | 29 | return true; |
1708 | 29 | } |
1709 | 48 | } |
1710 | 38 | return false; |
1711 | 67 | } |
1712 | | |
1713 | | std::vector<const TabletIndex*> TabletSchema::inverted_indexs( |
1714 | 8.23M | int32_t col_unique_id, const std::string& suffix_path) const { |
1715 | 8.23M | std::vector<const TabletIndex*> result; |
1716 | 8.23M | const std::string escaped_suffix = escape_for_path_name(suffix_path); |
1717 | 8.23M | auto it = _col_id_suffix_to_index.find( |
1718 | 8.23M | std::make_tuple(IndexType::INVERTED, col_unique_id, escaped_suffix)); |
1719 | 8.23M | if (it != _col_id_suffix_to_index.end()) { |
1720 | 11.4k | for (size_t pos : it->second) { |
1721 | 11.4k | if (pos < _indexes.size()) { |
1722 | 11.4k | result.push_back(_indexes[pos].get()); |
1723 | 11.4k | } |
1724 | 11.4k | } |
1725 | 11.4k | } |
1726 | 8.23M | return result; |
1727 | 8.23M | } |
1728 | | |
1729 | | std::vector<TabletIndexPtr> TabletSchema::inverted_index_by_field_pattern( |
1730 | 52 | int32_t col_unique_id, const std::string& field_pattern) const { |
1731 | 52 | auto id_to_pattern_map = _index_by_unique_id_with_pattern.find(col_unique_id); |
1732 | 52 | if (id_to_pattern_map == _index_by_unique_id_with_pattern.end()) { |
1733 | 43 | return {}; |
1734 | 43 | } |
1735 | 9 | auto pattern_to_index_map = id_to_pattern_map->second.find(field_pattern); |
1736 | 9 | if (pattern_to_index_map == id_to_pattern_map->second.end()) { |
1737 | 4 | return {}; |
1738 | 4 | } |
1739 | 5 | return pattern_to_index_map->second; |
1740 | 9 | } |
1741 | | |
1742 | 8.23M | std::vector<const TabletIndex*> TabletSchema::inverted_indexs(const TabletColumn& col) const { |
1743 | | // Some columns(Float, Double, JSONB ...) from the variant do not support inverted index |
1744 | 8.23M | if (!segment_v2::IndexColumnWriter::check_support_inverted_index(col)) { |
1745 | 2.35k | return {}; |
1746 | 2.35k | } |
1747 | | |
1748 | | // TODO use more efficient impl |
1749 | | // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants |
1750 | 8.23M | int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id(); |
1751 | 8.23M | std::vector<const TabletIndex*> result; |
1752 | 8.23M | if (result = inverted_indexs(col_unique_id, escape_for_path_name(col.suffix_path())); |
1753 | 8.23M | !result.empty()) { |
1754 | 8.78k | return result; |
1755 | 8.78k | } |
1756 | | // variant's typed column has it's own index |
1757 | 8.22M | else if (col.is_extracted_column() && col.path_info_ptr()->get_is_typed()) { |
1758 | 2 | std::string relative_path = col.path_info_ptr()->copy_pop_front().get_path(); |
1759 | 2 | if (_path_set_info_map.find(col_unique_id) == _path_set_info_map.end()) { |
1760 | 0 | return result; |
1761 | 0 | } |
1762 | 2 | const auto& path_set_info = _path_set_info_map.at(col_unique_id); |
1763 | 2 | if (path_set_info.typed_path_set.find(relative_path) == |
1764 | 2 | path_set_info.typed_path_set.end()) { |
1765 | 0 | return result; |
1766 | 0 | } |
1767 | 2 | for (const auto& index : path_set_info.typed_path_set.at(relative_path).indexes) { |
1768 | 2 | result.push_back(index.get()); |
1769 | 2 | } |
1770 | 2 | return result; |
1771 | 2 | } |
1772 | | // variant's subcolumns has it's own index |
1773 | 8.22M | else if (col.is_extracted_column()) { |
1774 | 3 | std::string relative_path = col.path_info_ptr()->copy_pop_front().get_path(); |
1775 | 3 | if (_path_set_info_map.find(col_unique_id) == _path_set_info_map.end()) { |
1776 | 1 | return result; |
1777 | 1 | } |
1778 | 2 | const auto& path_set_info = _path_set_info_map.at(col_unique_id); |
1779 | 2 | if (path_set_info.subcolumn_indexes.find(relative_path) == |
1780 | 2 | path_set_info.subcolumn_indexes.end()) { |
1781 | 1 | return result; |
1782 | 1 | } |
1783 | 1 | for (const auto& index : path_set_info.subcolumn_indexes.at(relative_path)) { |
1784 | 1 | result.push_back(index.get()); |
1785 | 1 | } |
1786 | 1 | } |
1787 | 8.22M | return result; |
1788 | 8.23M | } |
1789 | | |
1790 | | const TabletIndex* TabletSchema::ann_index(int32_t col_unique_id, |
1791 | 3.73k | const std::string& suffix_path) const { |
1792 | 3.73k | for (size_t i = 0; i < _indexes.size(); i++) { |
1793 | 4 | if (_indexes[i]->index_type() == IndexType::ANN) { |
1794 | 4 | for (int32_t id : _indexes[i]->col_unique_ids()) { |
1795 | 4 | if (id == col_unique_id && |
1796 | 4 | _indexes[i]->get_index_suffix() == escape_for_path_name(suffix_path)) { |
1797 | 4 | return _indexes[i].get(); |
1798 | 4 | } |
1799 | 4 | } |
1800 | 4 | } |
1801 | 4 | } |
1802 | 3.73k | return nullptr; |
1803 | 3.73k | } |
1804 | | |
1805 | 8.07M | const TabletIndex* TabletSchema::ann_index(const TabletColumn& col) const { |
1806 | 8.07M | if (!segment_v2::IndexColumnWriter::check_support_ann_index(col)) { |
1807 | 8.06M | return nullptr; |
1808 | 8.06M | } |
1809 | | // TODO use more efficient impl |
1810 | | // Use parent id if unique not assigned, this could happend when accessing subcolumns of variants |
1811 | 4.54k | int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id() : col.unique_id(); |
1812 | 4.54k | return ann_index(col_unique_id, escape_for_path_name(col.suffix_path())); |
1813 | 8.07M | } |
1814 | | |
1815 | 0 | bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const { |
1816 | 0 | IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, ""); |
1817 | 0 | auto it = _col_id_suffix_to_index.find(index_key); |
1818 | 0 | return it != _col_id_suffix_to_index.end(); |
1819 | 0 | } |
1820 | | |
1821 | 75.3k | const TabletIndex* TabletSchema::get_ngram_bf_index(int32_t col_unique_id) const { |
1822 | | // Get the ngram bf index for the given column unique id |
1823 | 75.3k | IndexKey index_key(IndexType::NGRAM_BF, col_unique_id, ""); |
1824 | 75.3k | auto it = _col_id_suffix_to_index.find(index_key); |
1825 | 75.3k | if (it != _col_id_suffix_to_index.end()) { |
1826 | 1 | if (!it->second.empty() && it->second[0] < _indexes.size()) { |
1827 | 1 | return _indexes[it->second[0]].get(); |
1828 | 1 | } |
1829 | 1 | } |
1830 | 75.3k | return nullptr; |
1831 | 75.3k | } |
1832 | | |
1833 | | const TabletIndex* TabletSchema::get_index(int32_t col_unique_id, IndexType index_type, |
1834 | 14 | const std::string& suffix_path) const { |
1835 | 14 | IndexKey index_key(index_type, col_unique_id, suffix_path); |
1836 | 14 | auto it = _col_id_suffix_to_index.find(index_key); |
1837 | 14 | if (it != _col_id_suffix_to_index.end()) { |
1838 | 12 | if (!it->second.empty() && it->second[0] < _indexes.size()) { |
1839 | 12 | return _indexes[it->second[0]].get(); |
1840 | 12 | } |
1841 | 12 | } |
1842 | 2 | return nullptr; |
1843 | 14 | } |
1844 | | |
1845 | | Block TabletSchema::create_block( |
1846 | | const std::vector<uint32_t>& return_columns, |
1847 | 564k | const std::unordered_set<uint32_t>* tablet_columns_need_convert_null) const { |
1848 | 564k | Block block; |
1849 | 8.88M | for (int i = 0; i < return_columns.size(); ++i) { |
1850 | 8.32M | const ColumnId cid = return_columns[i]; |
1851 | 8.32M | const auto& col = *_cols[cid]; |
1852 | 8.32M | bool is_nullable = (tablet_columns_need_convert_null != nullptr && |
1853 | 8.32M | tablet_columns_need_convert_null->find(cid) != |
1854 | 8.30M | tablet_columns_need_convert_null->end()); |
1855 | 8.32M | auto data_type = DataTypeFactory::instance().create_data_type(col, is_nullable); |
1856 | 8.32M | if (col.type() == FieldType::OLAP_FIELD_TYPE_STRUCT || |
1857 | 8.32M | col.type() == FieldType::OLAP_FIELD_TYPE_MAP || |
1858 | 8.32M | col.type() == FieldType::OLAP_FIELD_TYPE_ARRAY) { |
1859 | 1.59k | if (_pruned_columns_data_type.contains(col.unique_id())) { |
1860 | 1.26k | data_type = _pruned_columns_data_type.at(col.unique_id()); |
1861 | 1.26k | } |
1862 | 1.59k | } |
1863 | | |
1864 | 8.32M | if (_vir_col_idx_to_unique_id.contains(cid)) { |
1865 | 0 | block.insert({ColumnNothing::create(0), data_type, col.name()}); |
1866 | 0 | VLOG_DEBUG << fmt::format( |
1867 | 0 | "Create block from tablet schema, column cid {} is virtual column, col_name: " |
1868 | 0 | "{}, col_unique_id: {}, type {}", |
1869 | 0 | cid, col.name(), col.unique_id(), data_type->get_name()); |
1870 | 8.32M | } else { |
1871 | 8.32M | block.insert({data_type->create_column(), data_type, col.name()}); |
1872 | 8.32M | } |
1873 | 8.32M | } |
1874 | 564k | return block; |
1875 | 564k | } |
1876 | | |
1877 | 3.66k | Block TabletSchema::create_block() const { |
1878 | 3.66k | Block block; |
1879 | 33.0k | for (const auto& col : _cols) { |
1880 | 33.0k | if (is_dropped_column(*col)) { |
1881 | 0 | continue; |
1882 | 0 | } |
1883 | | |
1884 | 33.0k | auto data_type = DataTypeFactory::instance().create_data_type(*col); |
1885 | 33.0k | if (col->type() == FieldType::OLAP_FIELD_TYPE_STRUCT) { |
1886 | 6 | if (_pruned_columns_data_type.contains(col->unique_id())) { |
1887 | 0 | data_type = _pruned_columns_data_type.at(col->unique_id()); |
1888 | 0 | } |
1889 | 6 | } |
1890 | 33.0k | block.insert({data_type->create_column(), data_type, col->name()}); |
1891 | 33.0k | } |
1892 | 3.66k | return block; |
1893 | 3.66k | } |
1894 | | |
1895 | 268 | Block TabletSchema::create_block_by_cids(const std::vector<uint32_t>& cids) const { |
1896 | 268 | Block block; |
1897 | 2.16k | for (const auto& cid : cids) { |
1898 | 2.16k | const auto& col = *_cols[cid]; |
1899 | 2.16k | auto data_type = DataTypeFactory::instance().create_data_type(col); |
1900 | 2.16k | if (col.type() == FieldType::OLAP_FIELD_TYPE_STRUCT) { |
1901 | 0 | if (_pruned_columns_data_type.contains(col.unique_id())) { |
1902 | 0 | data_type = _pruned_columns_data_type.at(col.unique_id()); |
1903 | 0 | } |
1904 | 0 | } |
1905 | 2.16k | block.insert({data_type->create_column(), data_type, col.name()}); |
1906 | 2.16k | } |
1907 | 268 | return block; |
1908 | 268 | } |
1909 | | |
1910 | 0 | bool operator==(const TabletColumn& a, const TabletColumn& b) { |
1911 | 0 | if (a._unique_id != b._unique_id) return false; |
1912 | 0 | if (a._col_name != b._col_name) return false; |
1913 | 0 | if (a._type != b._type) return false; |
1914 | 0 | if (a._is_key != b._is_key) return false; |
1915 | 0 | if (a._aggregation != b._aggregation) return false; |
1916 | 0 | if (a._is_nullable != b._is_nullable) return false; |
1917 | 0 | if (a._has_default_value != b._has_default_value) return false; |
1918 | 0 | if (a._has_default_value) { |
1919 | 0 | if (a._default_value != b._default_value) return false; |
1920 | 0 | } |
1921 | 0 | if (a._is_decimal != b._is_decimal) return false; |
1922 | 0 | if (a._is_decimal) { |
1923 | 0 | if (a._precision != b._precision) return false; |
1924 | 0 | if (a._frac != b._frac) return false; |
1925 | 0 | } |
1926 | 0 | if (a._length != b._length) return false; |
1927 | 0 | if (a._index_length != b._index_length) return false; |
1928 | 0 | if (a._is_bf_column != b._is_bf_column) return false; |
1929 | 0 | if (a._column_path == nullptr && a._column_path != nullptr) return false; |
1930 | 0 | if (b._column_path == nullptr && a._column_path != nullptr) return false; |
1931 | 0 | if (b._column_path != nullptr && a._column_path != nullptr && |
1932 | 0 | *a._column_path != *b._column_path) |
1933 | 0 | return false; |
1934 | 0 | return true; |
1935 | 0 | } |
1936 | | |
1937 | 0 | bool operator!=(const TabletColumn& a, const TabletColumn& b) { |
1938 | 0 | return !(a == b); |
1939 | 0 | } |
1940 | | |
1941 | 3 | bool operator==(const TabletSchema& a, const TabletSchema& b) { |
1942 | 3 | if (a._keys_type != b._keys_type) return false; |
1943 | 3 | if (a._cols.size() != b._cols.size()) return false; |
1944 | 3 | for (int i = 0; i < a._cols.size(); ++i) { |
1945 | 0 | if (*a._cols[i] != *b._cols[i]) return false; |
1946 | 0 | } |
1947 | 3 | if (a._num_columns != b._num_columns) return false; |
1948 | 3 | if (a._num_key_columns != b._num_key_columns) return false; |
1949 | 3 | if (a._num_null_columns != b._num_null_columns) return false; |
1950 | 3 | if (a._num_short_key_columns != b._num_short_key_columns) return false; |
1951 | 3 | if (a._num_rows_per_row_block != b._num_rows_per_row_block) return false; |
1952 | 3 | if (a._compress_kind != b._compress_kind) return false; |
1953 | 3 | if (a._next_column_unique_id != b._next_column_unique_id) return false; |
1954 | 3 | if (a._has_bf_fpp != b._has_bf_fpp) return false; |
1955 | 3 | if (a._has_bf_fpp) { |
1956 | 0 | if (std::abs(a._bf_fpp - b._bf_fpp) > 1e-6) return false; |
1957 | 0 | } |
1958 | 3 | if (a._is_in_memory != b._is_in_memory) return false; |
1959 | 3 | if (a._delete_sign_idx != b._delete_sign_idx) return false; |
1960 | 3 | if (a._disable_auto_compaction != b._disable_auto_compaction) return false; |
1961 | 3 | if (a._enable_single_replica_compaction != b._enable_single_replica_compaction) return false; |
1962 | 3 | if (a._store_row_column != b._store_row_column) return false; |
1963 | 3 | if (a._row_store_page_size != b._row_store_page_size) return false; |
1964 | 3 | if (a._storage_page_size != b._storage_page_size) return false; |
1965 | 3 | if (a._storage_dict_page_size != b._storage_dict_page_size) return false; |
1966 | 3 | if (a._skip_write_index_on_load != b._skip_write_index_on_load) return false; |
1967 | 3 | if (a._enable_variant_flatten_nested != b._enable_variant_flatten_nested) return false; |
1968 | 3 | if (a._is_external_segment_column_meta_used != b._is_external_segment_column_meta_used) |
1969 | 0 | return false; |
1970 | 3 | if (a._integer_type_default_use_plain_encoding != b._integer_type_default_use_plain_encoding) |
1971 | 0 | return false; |
1972 | 3 | if (a._binary_plain_encoding_default_impl != b._binary_plain_encoding_default_impl) |
1973 | 0 | return false; |
1974 | 3 | return true; |
1975 | 3 | } |
1976 | | |
1977 | 3 | bool operator!=(const TabletSchema& a, const TabletSchema& b) { |
1978 | 3 | return !(a == b); |
1979 | 3 | } |
1980 | | #include "common/compile_check_end.h" |
1981 | | } // namespace doris |