be/src/format/parquet/parquet_column_convert.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <gen_cpp/parquet_types.h> |
21 | | |
22 | | #include "common/cast_set.h" |
23 | | #include "core/column/column_varbinary.h" |
24 | | #include "core/data_type/data_type_factory.hpp" |
25 | | #include "core/data_type/primitive_type.h" |
26 | | #include "core/extended_types.h" |
27 | | #include "core/field.h" |
28 | | #include "core/types.h" |
29 | | #include "format/column_type_convert.h" |
30 | | #include "format/format_common.h" |
31 | | #include "format/parquet/decoder.h" |
32 | | #include "format/parquet/parquet_common.h" |
33 | | #include "format/parquet/schema_desc.h" |
34 | | |
35 | | namespace doris::parquet { |
36 | | #include "common/compile_check_begin.h" |
37 | | struct ConvertParams { |
38 | | // schema.logicalType.TIMESTAMP.isAdjustedToUTC == false |
39 | | static const cctz::time_zone utc0; |
40 | | // schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set local time zone |
41 | | const cctz::time_zone* ctz = nullptr; |
42 | | int64_t second_mask = 1; |
43 | | int64_t scale_to_nano_factor = 1; |
44 | | const FieldSchema* field_schema = nullptr; |
45 | | |
46 | | //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128. |
47 | | bool is_type_compatibility = false; |
48 | | |
49 | | /** |
50 | | * Some frameworks like paimon maybe writes non-standard parquet files. Timestamp field doesn't have |
51 | | * logicalType or converted_type to indicates its precision. We have to reset the time mask. |
52 | | */ |
53 | 5 | void reset_time_scale_if_missing(int scale) { |
54 | 5 | const auto& schema = field_schema->parquet_schema; |
55 | 5 | if (!schema.__isset.logicalType && !schema.__isset.converted_type) { |
56 | 0 | int ts_scale = 9; |
57 | 0 | if (scale <= 3) { |
58 | 0 | ts_scale = 3; |
59 | 0 | } else if (scale <= 6) { |
60 | 0 | ts_scale = 6; |
61 | 0 | } |
62 | 0 | second_mask = common::exp10_i64(ts_scale); |
63 | 0 | scale_to_nano_factor = common::exp10_i64(9 - ts_scale); |
64 | | |
65 | | // The missing parque metadata makes it impossible for us to know the time zone information, |
66 | | // so we default to UTC here. |
67 | 0 | if (ctz == nullptr) { |
68 | 0 | ctz = &utc0; |
69 | 0 | } |
70 | 0 | } |
71 | 5 | } |
72 | | |
73 | 214 | void init(const FieldSchema* field_schema_, const cctz::time_zone* ctz_) { |
74 | 214 | field_schema = field_schema_; |
75 | 214 | if (ctz_ != nullptr) { |
76 | 214 | ctz = ctz_; |
77 | 214 | } |
78 | 214 | const auto& schema = field_schema->parquet_schema; |
79 | 214 | if (schema.__isset.logicalType && schema.logicalType.__isset.TIMESTAMP) { |
80 | 1 | const auto& timestamp_info = schema.logicalType.TIMESTAMP; |
81 | 1 | if (!timestamp_info.isAdjustedToUTC) { |
82 | | // should set timezone to utc+0 |
83 | | // Reference: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#instant-semantics-timestamps-normalized-to-utc |
84 | | // If isAdjustedToUTC = false, the reader should display the same value no mater what local time zone is. For example: |
85 | | // When a timestamp is stored as `1970-01-03 12:00:00`, |
86 | | // if isAdjustedToUTC = true, UTC8 should read as `1970-01-03 20:00:00`, UTC6 should read as `1970-01-03 18:00:00` |
87 | | // if isAdjustedToUTC = false, UTC8 and UTC6 should read as `1970-01-03 12:00:00`, which is the same as `1970-01-03 12:00:00` in UTC0 |
88 | 1 | ctz = &utc0; |
89 | 1 | } |
90 | 1 | const auto& time_unit = timestamp_info.unit; |
91 | 1 | if (time_unit.__isset.MILLIS) { |
92 | 1 | second_mask = 1000; |
93 | 1 | scale_to_nano_factor = 1000000; |
94 | 1 | } else if (time_unit.__isset.MICROS) { |
95 | 0 | second_mask = 1000000; |
96 | 0 | scale_to_nano_factor = 1000; |
97 | 0 | } else if (time_unit.__isset.NANOS) { |
98 | 0 | second_mask = 1000000000; |
99 | 0 | scale_to_nano_factor = 1; |
100 | 0 | } |
101 | 213 | } else if (schema.__isset.converted_type) { |
102 | 55 | const auto& converted_type = schema.converted_type; |
103 | 55 | if (converted_type == tparquet::ConvertedType::TIMESTAMP_MILLIS) { |
104 | 0 | second_mask = 1000; |
105 | 0 | scale_to_nano_factor = 1000000; |
106 | 55 | } else if (converted_type == tparquet::ConvertedType::TIMESTAMP_MICROS) { |
107 | 4 | second_mask = 1000000; |
108 | 4 | scale_to_nano_factor = 1000; |
109 | 4 | } |
110 | 55 | } |
111 | | |
112 | 214 | is_type_compatibility = field_schema_->is_type_compatibility; |
113 | 214 | } |
114 | | }; |
115 | | |
116 | | /** |
117 | | * Convert parquet physical column to logical column |
118 | | * In parquet document(https://github.com/apache/parquet-format/blob/master/LogicalTypes.md), |
119 | | * Logical or converted type is the data type of column, physical type is the stored type of column chunk. |
120 | | * eg, decimal type can be stored as INT32, INT64, BYTE_ARRAY, FIXED_LENGTH_BYTE_ARRAY. |
121 | | * So there is a convert process from physical type to logical type. |
122 | | * In addition, Schema change will bring about a change in logical type. |
123 | | * |
124 | | * `PhysicalToLogicalConverter` strips away the conversion of logical type, and reuse `ColumnTypeConverter` |
125 | | * to resolve schema change, allowing parquet reader to only focus on the conversion of physical types. |
126 | | * |
127 | | * Therefore, tow layers converters are designed: |
128 | | * First, read parquet data with the physical type |
129 | | * Second, convert physical type to logical type |
130 | | * Third, convert logical type to the final type planned by FE(schema change) |
131 | | * |
132 | | * Ultimate performance optimization: |
133 | | * 1. If process of (First => Second) is consistent, eg. from BYTE_ARRAY to string, no additional copies and conversions will be introduced; |
134 | | * 2. If process of (Second => Third) is consistent, no additional copies and conversions will be introduced; |
135 | | * 3. Null map is share among all processes, no additional copies and conversions will be introduced in null map; |
136 | | * 4. Only create one physical column in physical conversion, and reused in each loop; |
137 | | * 5. Only create one logical column in logical conversion, and reused in each loop; |
138 | | * 6. FIXED_LENGTH_BYTE_ARRAY is read as ColumnUInt8 instead of ColumnString, so the underlying decoder has no process to decode string |
139 | | * and use memory copy to read the data as a whole, and the conversion has no need to resolve the Offsets in ColumnString. |
140 | | */ |
141 | | class PhysicalToLogicalConverter { |
142 | | protected: |
143 | | ColumnPtr _cached_src_physical_column = nullptr; |
144 | | DataTypePtr _cached_src_physical_type = nullptr; |
145 | | std::unique_ptr<converter::ColumnTypeConverter> _logical_converter = nullptr; |
146 | | |
147 | | std::string _error_msg; |
148 | | |
149 | | std::unique_ptr<ConvertParams> _convert_params; |
150 | | |
151 | | public: |
152 | | static std::unique_ptr<PhysicalToLogicalConverter> get_converter( |
153 | | const FieldSchema* field_schema, DataTypePtr src_logical_type, |
154 | | const DataTypePtr& dst_logical_type, const cctz::time_zone* ctz, |
155 | | bool is_dict_filter = false); |
156 | | |
157 | | static bool is_parquet_native_type(PrimitiveType type); |
158 | | |
159 | | static bool is_decimal_type(PrimitiveType type); |
160 | | |
161 | 214 | PhysicalToLogicalConverter() = default; |
162 | 214 | virtual ~PhysicalToLogicalConverter() = default; |
163 | | |
164 | 4 | virtual Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) { |
165 | 4 | return Status::OK(); |
166 | 4 | } |
167 | | |
168 | | Status convert(ColumnPtr& src_physical_col, DataTypePtr src_logical_type, |
169 | | const DataTypePtr& dst_logical_type, ColumnPtr& dst_logical_col, |
170 | 224 | bool is_dict_filter) { |
171 | 224 | if (is_dict_filter) { |
172 | 0 | src_logical_type = DataTypeFactory::instance().create_data_type( |
173 | 0 | PrimitiveType::TYPE_INT, dst_logical_type->is_nullable()); |
174 | 0 | } |
175 | 224 | if (is_consistent() && _logical_converter->is_consistent()) { |
176 | 179 | return Status::OK(); |
177 | 179 | } |
178 | 45 | ColumnPtr src_logical_column; |
179 | 45 | if (is_consistent()) { |
180 | 4 | if (dst_logical_type->is_nullable()) { |
181 | 4 | auto doris_nullable_column = |
182 | 4 | assert_cast<const ColumnNullable*>(dst_logical_col.get()); |
183 | 4 | src_logical_column = |
184 | 4 | ColumnNullable::create(_cached_src_physical_column, |
185 | 4 | doris_nullable_column->get_null_map_column_ptr()); |
186 | 4 | } else { |
187 | 0 | src_logical_column = _cached_src_physical_column; |
188 | 0 | } |
189 | 41 | } else { |
190 | 41 | src_logical_column = _logical_converter->get_column(src_logical_type, dst_logical_col, |
191 | 41 | dst_logical_type); |
192 | 41 | } |
193 | 45 | RETURN_IF_ERROR(physical_convert(src_physical_col, src_logical_column)); |
194 | 45 | auto converted_column = dst_logical_col->assume_mutable(); |
195 | 45 | return _logical_converter->convert(src_logical_column, converted_column); |
196 | 45 | } |
197 | | |
198 | | virtual ColumnPtr get_physical_column(tparquet::Type::type src_physical_type, |
199 | | DataTypePtr src_logical_type, |
200 | | ColumnPtr& dst_logical_column, |
201 | | const DataTypePtr& dst_logical_type, bool is_dict_filter); |
202 | | |
203 | 224 | DataTypePtr& get_physical_type() { return _cached_src_physical_type; } |
204 | | |
205 | 127 | virtual bool is_consistent() { return false; } |
206 | | |
207 | 354 | virtual bool support() { return true; } |
208 | | |
209 | 0 | std::string get_error_msg() { return _error_msg; } |
210 | | }; |
211 | | |
212 | | class ConsistentPhysicalConverter : public PhysicalToLogicalConverter { |
213 | 441 | bool is_consistent() override { return true; } |
214 | | }; |
215 | | |
216 | | class UnsupportedConverter : public PhysicalToLogicalConverter { |
217 | | public: |
218 | 0 | UnsupportedConverter(std::string error_msg) { _error_msg = error_msg; } |
219 | | |
220 | | UnsupportedConverter(tparquet::Type::type src_physical_type, |
221 | 0 | const DataTypePtr& src_logical_type) { |
222 | 0 | std::string src_physical_str = tparquet::to_string(src_physical_type); |
223 | 0 | std::string src_logical_str = src_logical_type->get_name(); |
224 | 0 | _error_msg = src_physical_str + " => " + src_logical_str; |
225 | 0 | } |
226 | | |
227 | 0 | bool support() override { return false; } |
228 | | |
229 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
230 | 0 | return Status::InternalError("Unsupported physical to logical type: {}", _error_msg); |
231 | 0 | } |
232 | | }; |
233 | | |
234 | | // for tinyint, smallint |
235 | | template <PrimitiveType IntPrimitiveType> |
236 | | class LittleIntPhysicalConverter : public PhysicalToLogicalConverter { |
237 | 28 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
238 | 28 | using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType; |
239 | 28 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; |
240 | 28 | ColumnPtr from_col = remove_nullable(src_physical_col); |
241 | 28 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
242 | | |
243 | 28 | size_t rows = from_col->size(); |
244 | | // always comes from tparquet::Type::INT32 |
245 | 28 | auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data(); |
246 | 28 | size_t start_idx = to_col->size(); |
247 | 28 | to_col->resize(start_idx + rows); |
248 | 28 | auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data(); |
249 | 108 | for (int i = 0; i < rows; ++i) { |
250 | 80 | data[start_idx + i] = static_cast<DstCppType>(src_data[i]); |
251 | 80 | } |
252 | | |
253 | 28 | return Status::OK(); |
254 | 28 | } _ZN5doris7parquet26LittleIntPhysicalConverterILNS_13PrimitiveTypeE3EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 237 | 17 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 238 | 17 | using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType; | 239 | 17 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; | 240 | 17 | ColumnPtr from_col = remove_nullable(src_physical_col); | 241 | 17 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); | 242 | | | 243 | 17 | size_t rows = from_col->size(); | 244 | | // always comes from tparquet::Type::INT32 | 245 | 17 | auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data(); | 246 | 17 | size_t start_idx = to_col->size(); | 247 | 17 | to_col->resize(start_idx + rows); | 248 | 17 | auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data(); | 249 | 59 | for (int i = 0; i < rows; ++i) { | 250 | 42 | data[start_idx + i] = static_cast<DstCppType>(src_data[i]); | 251 | 42 | } | 252 | | | 253 | 17 | return Status::OK(); | 254 | 17 | } |
_ZN5doris7parquet26LittleIntPhysicalConverterILNS_13PrimitiveTypeE4EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 237 | 11 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 238 | 11 | using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType; | 239 | 11 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; | 240 | 11 | ColumnPtr from_col = remove_nullable(src_physical_col); | 241 | 11 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); | 242 | | | 243 | 11 | size_t rows = from_col->size(); | 244 | | // always comes from tparquet::Type::INT32 | 245 | 11 | auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data(); | 246 | 11 | size_t start_idx = to_col->size(); | 247 | 11 | to_col->resize(start_idx + rows); | 248 | 11 | auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data(); | 249 | 49 | for (int i = 0; i < rows; ++i) { | 250 | 38 | data[start_idx + i] = static_cast<DstCppType>(src_data[i]); | 251 | 38 | } | 252 | | | 253 | 11 | return Status::OK(); | 254 | 11 | } |
|
255 | | }; |
256 | | |
257 | | template <PrimitiveType type> |
258 | | struct UnsignedTypeTraits; |
259 | | |
260 | | template <> |
261 | | struct UnsignedTypeTraits<TYPE_SMALLINT> { |
262 | | using UnsignedCppType = UInt8; |
263 | | //https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#unsigned-integers |
264 | | //INT(8, false), INT(16, false), and INT(32, false) must annotate an int32 primitive type and INT(64, false) |
265 | | //must annotate an int64 primitive type. |
266 | | using StorageCppType = Int32; |
267 | | using StorageColumnType = ColumnInt32; |
268 | | }; |
269 | | |
270 | | template <> |
271 | | struct UnsignedTypeTraits<TYPE_INT> { |
272 | | using UnsignedCppType = UInt16; |
273 | | using StorageCppType = Int32; |
274 | | using StorageColumnType = ColumnInt32; |
275 | | }; |
276 | | |
277 | | template <> |
278 | | struct UnsignedTypeTraits<TYPE_BIGINT> { |
279 | | using UnsignedCppType = UInt32; |
280 | | using StorageCppType = Int32; |
281 | | using StorageColumnType = ColumnInt32; |
282 | | }; |
283 | | |
284 | | template <> |
285 | | struct UnsignedTypeTraits<TYPE_LARGEINT> { |
286 | | using UnsignedCppType = UInt64; |
287 | | using StorageCppType = Int64; |
288 | | using StorageColumnType = ColumnInt64; |
289 | | }; |
290 | | |
291 | | template <PrimitiveType IntPrimitiveType> |
292 | | class UnsignedIntegerConverter : public PhysicalToLogicalConverter { |
293 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
294 | 0 | using UnsignedCppType = typename UnsignedTypeTraits<IntPrimitiveType>::UnsignedCppType; |
295 | 0 | using StorageCppType = typename UnsignedTypeTraits<IntPrimitiveType>::StorageCppType; |
296 | 0 | using StorageColumnType = typename UnsignedTypeTraits<IntPrimitiveType>::StorageColumnType; |
297 | 0 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; |
298 | |
|
299 | 0 | ColumnPtr from_col = remove_nullable(src_physical_col); |
300 | 0 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
301 | 0 | auto& src_data = assert_cast<const StorageColumnType*>(from_col.get())->get_data(); |
302 | |
|
303 | 0 | size_t rows = src_data.size(); |
304 | 0 | size_t start_idx = to_col->size(); |
305 | 0 | to_col->resize(start_idx + rows); |
306 | 0 | auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data(); |
307 | |
|
308 | 0 | for (int i = 0; i < rows; i++) { |
309 | 0 | StorageCppType src_value = src_data[i]; |
310 | 0 | auto unsigned_value = static_cast<UnsignedCppType>(src_value); |
311 | 0 | data[start_idx + i] = unsigned_value; |
312 | 0 | } |
313 | |
|
314 | 0 | return Status::OK(); |
315 | 0 | } Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE4EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE5EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE6EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE7EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
316 | | }; |
317 | | |
318 | | class FixedSizeBinaryConverter : public PhysicalToLogicalConverter { |
319 | | private: |
320 | | int _type_length; |
321 | | |
322 | | public: |
323 | 0 | FixedSizeBinaryConverter(int type_length) : _type_length(type_length) {} |
324 | | |
325 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
326 | 0 | ColumnPtr from_col = remove_nullable(src_physical_col); |
327 | 0 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
328 | |
|
329 | 0 | auto* src_data = assert_cast<const ColumnUInt8*>(from_col.get()); |
330 | 0 | size_t length = src_data->size(); |
331 | 0 | size_t num_values = length / _type_length; |
332 | 0 | auto& string_col = static_cast<ColumnString&>(*to_col.get()); |
333 | 0 | auto& offsets = string_col.get_offsets(); |
334 | 0 | auto& chars = string_col.get_chars(); |
335 | |
|
336 | 0 | size_t origin_size = chars.size(); |
337 | 0 | chars.resize(origin_size + length); |
338 | 0 | memcpy(chars.data() + origin_size, src_data->get_data().data(), length); |
339 | |
|
340 | 0 | origin_size = offsets.size(); |
341 | 0 | offsets.resize(origin_size + num_values); |
342 | 0 | auto end_offset = offsets[origin_size - 1]; |
343 | 0 | for (int i = 0; i < num_values; ++i) { |
344 | 0 | end_offset += _type_length; |
345 | 0 | offsets[origin_size + i] = end_offset; |
346 | 0 | } |
347 | |
|
348 | 0 | return Status::OK(); |
349 | 0 | } |
350 | | }; |
351 | | |
352 | | class Float16PhysicalConverter : public PhysicalToLogicalConverter { |
353 | | private: |
354 | | int _type_length; |
355 | | |
356 | | public: |
357 | 0 | Float16PhysicalConverter(int type_length) : _type_length(type_length) { |
358 | 0 | DCHECK_EQ(_type_length, 2); |
359 | 0 | } |
360 | | |
361 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
362 | 0 | ColumnPtr from_col = remove_nullable(src_physical_col); |
363 | 0 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
364 | |
|
365 | 0 | const auto* src_data = assert_cast<const ColumnUInt8*>(from_col.get()); |
366 | 0 | size_t length = src_data->size(); |
367 | 0 | size_t num_values = length / _type_length; |
368 | 0 | auto* to_float_column = assert_cast<ColumnFloat32*>(to_col.get()); |
369 | 0 | size_t start_idx = to_float_column->size(); |
370 | 0 | to_float_column->resize(start_idx + num_values); |
371 | 0 | auto& to_float_column_data = to_float_column->get_data(); |
372 | 0 | const auto* ptr = src_data->get_data().data(); |
373 | 0 | for (int i = 0; i < num_values; ++i) { |
374 | 0 | size_t offset = i * _type_length; |
375 | 0 | const auto* data_ptr = ptr + offset; |
376 | 0 | uint16_t raw; |
377 | 0 | memcpy(&raw, data_ptr, sizeof(uint16_t)); |
378 | 0 | float value = half_to_float(raw); |
379 | 0 | to_float_column_data[start_idx + i] = value; |
380 | 0 | } |
381 | |
|
382 | 0 | return Status::OK(); |
383 | 0 | } |
384 | | |
385 | 0 | float half_to_float(uint16_t h) { |
386 | | // uint16_t h: half precision floating point |
387 | | // bit 15: sign(1 bit) |
388 | | // bits 14..10 : exponent(5 bits) |
389 | | // bits 9..0 : mantissa(10 bits) |
390 | | |
391 | | // sign bit placed to float32 bit31 |
392 | 0 | uint32_t sign = (h & 0x8000U) << 16; // 0x8000 << 16 = 0x8000_0000 |
393 | | // exponent:(5 bits) |
394 | 0 | uint32_t exp = (h & 0x7C00U) >> 10; // 0x7C00 = 0111 1100 0000 (half exponent mask) |
395 | | // mantissa(10 bits) |
396 | 0 | uint32_t mant = (h & 0x03FFU); // 10-bit fraction |
397 | | |
398 | | // cases:Zero/Subnormal, Normal, Inf/NaN |
399 | 0 | if (exp == 0) { |
400 | | // exp==0: Zero or Subnormal ---------- |
401 | 0 | if (mant == 0) { |
402 | | // ±0.0 |
403 | | // sign = either 0x00000000 or 0x80000000 |
404 | 0 | return std::bit_cast<float>(sign); |
405 | 0 | } else { |
406 | | // ---------- Subnormal ---------- |
407 | | // half subnormal: |
408 | | // value = (-1)^sign * (mant / 2^10) * 2^(1 - bias) |
409 | | // half bias = 15 → exponent = 1 - 15 = -14 |
410 | 0 | float f = (static_cast<float>(mant) / 1024.0F) * std::powf(2.0F, -14.0F); |
411 | 0 | return sign ? -f : f; |
412 | 0 | } |
413 | 0 | } else if (exp == 0x1F) { |
414 | | // exp==31: Inf or NaN ---------- |
415 | | // float32: |
416 | | // exponent = 255 (0xFF) |
417 | | // mantissa = mant << 13 |
418 | 0 | uint32_t f = sign | 0x7F800000U | (mant << 13); |
419 | 0 | return std::bit_cast<float>(f); |
420 | 0 | } else { |
421 | | // Normalized ---------- |
422 | | // float32 exponent: |
423 | | // exp32 = exp16 - bias16 + bias32 |
424 | | // bias16 = 15 |
425 | | // bias32 = 127 |
426 | | // |
427 | | // so: exp32 = exp + (127 - 15) |
428 | 0 | uint32_t f = sign | ((exp + (127 - 15)) << 23) // place to float32 exponent |
429 | 0 | | (mant << 13); // mantissa align to 23 bits |
430 | 0 | return std::bit_cast<float>(f); |
431 | 0 | } |
432 | 0 | } |
433 | | }; |
434 | | |
435 | | class UUIDVarBinaryConverter : public PhysicalToLogicalConverter { |
436 | | public: |
437 | 1 | UUIDVarBinaryConverter(int type_length) : _type_length(type_length) {} |
438 | | |
439 | 1 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
440 | 1 | DCHECK(!is_column_const(*src_physical_col)) << src_physical_col->dump_structure(); |
441 | 1 | DCHECK(!is_column_const(*src_logical_column)) << src_logical_column->dump_structure(); |
442 | 1 | const ColumnUInt8* uint8_col = nullptr; |
443 | 1 | if (is_column_nullable(*src_physical_col)) { |
444 | 1 | const auto& nullable = assert_cast<const ColumnNullable*>(src_physical_col.get()); |
445 | 1 | uint8_col = &assert_cast<const ColumnUInt8&>(nullable->get_nested_column()); |
446 | 1 | } else { |
447 | 0 | uint8_col = &assert_cast<const ColumnUInt8&>(*src_physical_col); |
448 | 0 | } |
449 | | |
450 | 1 | MutableColumnPtr to_col = nullptr; |
451 | | // nullmap flag seems have been handled in upper level |
452 | 1 | if (src_logical_column->is_nullable()) { |
453 | 1 | const auto* nullable = assert_cast<const ColumnNullable*>(src_logical_column.get()); |
454 | 1 | to_col = nullable->get_nested_column_ptr()->assume_mutable(); |
455 | 1 | } else { |
456 | 0 | to_col = src_logical_column->assume_mutable(); |
457 | 0 | } |
458 | 1 | auto* to_varbinary_column = assert_cast<ColumnVarbinary*>(to_col.get()); |
459 | 1 | size_t length = uint8_col->size(); |
460 | 1 | size_t num_values = length / _type_length; |
461 | 1 | const auto* ptr = uint8_col->get_data().data(); |
462 | | |
463 | 4 | for (int i = 0; i < num_values; ++i) { |
464 | 3 | auto offset = i * _type_length; |
465 | 3 | const char* data_ptr = reinterpret_cast<const char*>(ptr + offset); |
466 | 3 | to_varbinary_column->insert_data(data_ptr, _type_length); |
467 | 3 | } |
468 | 1 | return Status::OK(); |
469 | 1 | } |
470 | | |
471 | | private: |
472 | | int _type_length; |
473 | | }; |
474 | | |
475 | | template <PrimitiveType DecimalPType> |
476 | | class FixedSizeToDecimal : public PhysicalToLogicalConverter { |
477 | | public: |
478 | | using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType; |
479 | 2 | FixedSizeToDecimal(int32_t type_length) : _type_length(type_length) {}Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EEC2Ei _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EEC2Ei Line | Count | Source | 479 | 2 | FixedSizeToDecimal(int32_t type_length) : _type_length(type_length) {} |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EEC2Ei Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EEC2Ei |
480 | | |
481 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
482 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); |
483 | 2 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
484 | | |
485 | 2 | #define M(FixedTypeLength, ValueCopyType) \ |
486 | 2 | case FixedTypeLength: \ |
487 | 2 | return _convert_internal<FixedTypeLength, ValueCopyType>(src_col, dst_col); |
488 | | |
489 | 2 | #define APPLY_FOR_DECIMALS() \ |
490 | 2 | M(1, int64_t) \ |
491 | 0 | M(2, int64_t) \ |
492 | 0 | M(3, int64_t) \ |
493 | 0 | M(4, int64_t) \ |
494 | 1 | M(5, int64_t) \ |
495 | 1 | M(6, int64_t) \ |
496 | 0 | M(7, int64_t) \ |
497 | 1 | M(8, int64_t) \ |
498 | 1 | M(9, int128_t) \ |
499 | 0 | M(10, int128_t) \ |
500 | 0 | M(11, int128_t) \ |
501 | 0 | M(12, int128_t) \ |
502 | 0 | M(13, int128_t) \ |
503 | 0 | M(14, int128_t) \ |
504 | 0 | M(15, int128_t) \ |
505 | 0 | M(16, int128_t) \ |
506 | 0 | M(17, wide::Int256) \ |
507 | 0 | M(18, wide::Int256) \ |
508 | 0 | M(19, wide::Int256) \ |
509 | 0 | M(20, wide::Int256) \ |
510 | 0 | M(21, wide::Int256) \ |
511 | 0 | M(22, wide::Int256) \ |
512 | 0 | M(23, wide::Int256) \ |
513 | 0 | M(24, wide::Int256) \ |
514 | 0 | M(25, wide::Int256) \ |
515 | 0 | M(26, wide::Int256) \ |
516 | 0 | M(27, wide::Int256) \ |
517 | 0 | M(28, wide::Int256) \ |
518 | 0 | M(29, wide::Int256) \ |
519 | 0 | M(30, wide::Int256) \ |
520 | 0 | M(31, wide::Int256) \ |
521 | 0 | M(32, wide::Int256) |
522 | | |
523 | 2 | switch (_type_length) { |
524 | 0 | APPLY_FOR_DECIMALS() |
525 | 0 | default: |
526 | 0 | throw Exception(Status::FatalError("__builtin_unreachable")); |
527 | 2 | } |
528 | 0 | return Status::OK(); |
529 | 2 | #undef APPLY_FOR_DECIMALS |
530 | 2 | #undef M |
531 | 2 | } Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 481 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 482 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); | 483 | 2 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); | 484 | | | 485 | 2 | #define M(FixedTypeLength, ValueCopyType) \ | 486 | 2 | case FixedTypeLength: \ | 487 | 2 | return _convert_internal<FixedTypeLength, ValueCopyType>(src_col, dst_col); | 488 | | | 489 | 2 | #define APPLY_FOR_DECIMALS() \ | 490 | 2 | M(1, int64_t) \ | 491 | 2 | M(2, int64_t) \ | 492 | 2 | M(3, int64_t) \ | 493 | 2 | M(4, int64_t) \ | 494 | 2 | M(5, int64_t) \ | 495 | 2 | M(6, int64_t) \ | 496 | 2 | M(7, int64_t) \ | 497 | 2 | M(8, int64_t) \ | 498 | 2 | M(9, int128_t) \ | 499 | 2 | M(10, int128_t) \ | 500 | 2 | M(11, int128_t) \ | 501 | 2 | M(12, int128_t) \ | 502 | 2 | M(13, int128_t) \ | 503 | 2 | M(14, int128_t) \ | 504 | 2 | M(15, int128_t) \ | 505 | 2 | M(16, int128_t) \ | 506 | 2 | M(17, wide::Int256) \ | 507 | 2 | M(18, wide::Int256) \ | 508 | 2 | M(19, wide::Int256) \ | 509 | 2 | M(20, wide::Int256) \ | 510 | 2 | M(21, wide::Int256) \ | 511 | 2 | M(22, wide::Int256) \ | 512 | 2 | M(23, wide::Int256) \ | 513 | 2 | M(24, wide::Int256) \ | 514 | 2 | M(25, wide::Int256) \ | 515 | 2 | M(26, wide::Int256) \ | 516 | 2 | M(27, wide::Int256) \ | 517 | 2 | M(28, wide::Int256) \ | 518 | 2 | M(29, wide::Int256) \ | 519 | 2 | M(30, wide::Int256) \ | 520 | 2 | M(31, wide::Int256) \ | 521 | 2 | M(32, wide::Int256) | 522 | | | 523 | 2 | switch (_type_length) { | 524 | 0 | APPLY_FOR_DECIMALS() | 525 | 0 | default: | 526 | 0 | throw Exception(Status::FatalError("__builtin_unreachable")); | 527 | 2 | } | 528 | 0 | return Status::OK(); | 529 | 2 | #undef APPLY_FOR_DECIMALS | 530 | 2 | #undef M | 531 | 2 | } |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
532 | | |
533 | | template <int fixed_type_length, typename ValueCopyType> |
534 | 2 | Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) { |
535 | 2 | size_t rows = src_col->size() / fixed_type_length; |
536 | 2 | auto* buf = static_cast<const ColumnUInt8*>(src_col.get())->get_data().data(); |
537 | 2 | size_t start_idx = dst_col->size(); |
538 | 2 | dst_col->resize(start_idx + rows); |
539 | | |
540 | 2 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data(); |
541 | 2 | size_t offset = 0; |
542 | 6 | for (int i = 0; i < rows; i++) { |
543 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, |
544 | | // the unscaled number must be encoded as two's complement using big-endian byte order. |
545 | 4 | ValueCopyType value = 0; |
546 | 4 | memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value)); |
547 | 4 | offset += fixed_type_length; |
548 | 4 | value = to_endian<std::endian::big>(value); |
549 | 4 | value = value >> ((sizeof(value) - fixed_type_length) * 8); |
550 | 4 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); |
551 | 4 | v = (DecimalType)value; |
552 | 4 | } |
553 | | |
554 | 2 | return Status::OK(); |
555 | 2 | } Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Line | Count | Source | 534 | 1 | Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) { | 535 | 1 | size_t rows = src_col->size() / fixed_type_length; | 536 | 1 | auto* buf = static_cast<const ColumnUInt8*>(src_col.get())->get_data().data(); | 537 | 1 | size_t start_idx = dst_col->size(); | 538 | 1 | dst_col->resize(start_idx + rows); | 539 | | | 540 | 1 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data(); | 541 | 1 | size_t offset = 0; | 542 | 3 | for (int i = 0; i < rows; i++) { | 543 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, | 544 | | // the unscaled number must be encoded as two's complement using big-endian byte order. | 545 | 2 | ValueCopyType value = 0; | 546 | 2 | memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value)); | 547 | 2 | offset += fixed_type_length; | 548 | 2 | value = to_endian<std::endian::big>(value); | 549 | 2 | value = value >> ((sizeof(value) - fixed_type_length) * 8); | 550 | 2 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); | 551 | 2 | v = (DecimalType)value; | 552 | 2 | } | 553 | | | 554 | 1 | return Status::OK(); | 555 | 1 | } |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Line | Count | Source | 534 | 1 | Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) { | 535 | 1 | size_t rows = src_col->size() / fixed_type_length; | 536 | 1 | auto* buf = static_cast<const ColumnUInt8*>(src_col.get())->get_data().data(); | 537 | 1 | size_t start_idx = dst_col->size(); | 538 | 1 | dst_col->resize(start_idx + rows); | 539 | | | 540 | 1 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data(); | 541 | 1 | size_t offset = 0; | 542 | 3 | for (int i = 0; i < rows; i++) { | 543 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, | 544 | | // the unscaled number must be encoded as two's complement using big-endian byte order. | 545 | 2 | ValueCopyType value = 0; | 546 | 2 | memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value)); | 547 | 2 | offset += fixed_type_length; | 548 | 2 | value = to_endian<std::endian::big>(value); | 549 | 2 | value = value >> ((sizeof(value) - fixed_type_length) * 8); | 550 | 2 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); | 551 | 2 | v = (DecimalType)value; | 552 | 2 | } | 553 | | | 554 | 1 | return Status::OK(); | 555 | 1 | } |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE |
556 | | |
557 | | private: |
558 | | int32_t _type_length; |
559 | | }; |
560 | | |
561 | | template <PrimitiveType DecimalPType> |
562 | | class StringToDecimal : public PhysicalToLogicalConverter { |
563 | | using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType; |
564 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
565 | 0 | using ValueCopyType = DecimalType::NativeType; |
566 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
567 | 0 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
568 | |
|
569 | 0 | size_t rows = src_col->size(); |
570 | 0 | auto buf = static_cast<const ColumnString*>(src_col.get())->get_chars().data(); |
571 | 0 | auto& offset = static_cast<const ColumnString*>(src_col.get())->get_offsets(); |
572 | 0 | size_t start_idx = dst_col->size(); |
573 | 0 | dst_col->resize(start_idx + rows); |
574 | |
|
575 | 0 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data(); |
576 | 0 | for (int i = 0; i < rows; i++) { |
577 | 0 | size_t len = offset[i] - offset[i - 1]; |
578 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, |
579 | | // the unscaled number must be encoded as two's complement using big-endian byte order. |
580 | 0 | ValueCopyType value = 0; |
581 | 0 | if (len > 0) { |
582 | 0 | memcpy(reinterpret_cast<char*>(&value), buf + offset[i - 1], len); |
583 | 0 | value = to_endian<std::endian::big>(value); |
584 | 0 | value = value >> ((sizeof(value) - len) * 8); |
585 | 0 | } |
586 | 0 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); |
587 | 0 | v = (DecimalType)value; |
588 | 0 | } |
589 | |
|
590 | 0 | return Status::OK(); |
591 | 0 | } Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
592 | | }; |
593 | | |
594 | | template <PrimitiveType NumberType, PrimitiveType DecimalPType> |
595 | | class NumberToDecimal : public PhysicalToLogicalConverter { |
596 | | using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType; |
597 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
598 | 2 | using ValueCopyType = typename DecimalType::NativeType; |
599 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); |
600 | 2 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
601 | | |
602 | 2 | size_t rows = src_col->size(); |
603 | 2 | auto* src_data = |
604 | 2 | static_cast<const ColumnVector<NumberType>*>(src_col.get())->get_data().data(); |
605 | 2 | size_t start_idx = dst_col->size(); |
606 | 2 | dst_col->resize(start_idx + rows); |
607 | | |
608 | 2 | auto* data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data().data(); |
609 | | |
610 | 24 | for (int i = 0; i < rows; i++) { |
611 | 22 | ValueCopyType value; |
612 | 22 | if constexpr (std::is_same_v<DecimalType, Decimal256>) { |
613 | 0 | value = src_data[i]; |
614 | 22 | } else { |
615 | 22 | value = cast_set<ValueCopyType, typename PrimitiveTypeTraits<NumberType>::CppType, |
616 | 22 | false>(src_data[i]); |
617 | 22 | } |
618 | | |
619 | 22 | data[start_idx + i] = (DecimalType)value; |
620 | 22 | } |
621 | 2 | return Status::OK(); |
622 | 2 | } Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 597 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 598 | 2 | using ValueCopyType = typename DecimalType::NativeType; | 599 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); | 600 | 2 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); | 601 | | | 602 | 2 | size_t rows = src_col->size(); | 603 | 2 | auto* src_data = | 604 | 2 | static_cast<const ColumnVector<NumberType>*>(src_col.get())->get_data().data(); | 605 | 2 | size_t start_idx = dst_col->size(); | 606 | 2 | dst_col->resize(start_idx + rows); | 607 | | | 608 | 2 | auto* data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data().data(); | 609 | | | 610 | 24 | for (int i = 0; i < rows; i++) { | 611 | 22 | ValueCopyType value; | 612 | | if constexpr (std::is_same_v<DecimalType, Decimal256>) { | 613 | | value = src_data[i]; | 614 | 22 | } else { | 615 | 22 | value = cast_set<ValueCopyType, typename PrimitiveTypeTraits<NumberType>::CppType, | 616 | 22 | false>(src_data[i]); | 617 | 22 | } | 618 | | | 619 | 22 | data[start_idx + i] = (DecimalType)value; | 620 | 22 | } | 621 | 2 | return Status::OK(); | 622 | 2 | } |
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
623 | | }; |
624 | | |
625 | | class Int32ToDate : public PhysicalToLogicalConverter { |
626 | 7 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
627 | 7 | ColumnPtr src_col = remove_nullable(src_physical_col); |
628 | 7 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
629 | | |
630 | 7 | size_t rows = src_col->size(); |
631 | 7 | size_t start_idx = dst_col->size(); |
632 | 7 | dst_col->reserve(start_idx + rows); |
633 | | |
634 | 7 | auto& src_data = static_cast<const ColumnInt32*>(src_col.get())->get_data(); |
635 | 7 | auto& data = static_cast<ColumnDateV2*>(dst_col.get())->get_data(); |
636 | 7 | date_day_offset_dict& date_dict = date_day_offset_dict::get(); |
637 | | |
638 | 59 | for (int i = 0; i < rows; i++) { |
639 | 52 | data.push_back_without_reserve(date_dict[src_data[i]].to_date_int_val()); |
640 | 52 | } |
641 | | |
642 | 7 | return Status::OK(); |
643 | 7 | } |
644 | | }; |
645 | | |
646 | | struct Int64ToTimestamp : public PhysicalToLogicalConverter { |
647 | 5 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
648 | 5 | ColumnPtr src_col = remove_nullable(src_physical_col); |
649 | 5 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
650 | | |
651 | 5 | size_t rows = src_col->size(); |
652 | 5 | size_t start_idx = dst_col->size(); |
653 | 5 | dst_col->resize(start_idx + rows); |
654 | | |
655 | 5 | auto src_data = static_cast<const ColumnInt64*>(src_col.get())->get_data().data(); |
656 | 5 | auto& data = static_cast<ColumnDateTimeV2*>(dst_col.get())->get_data(); |
657 | | |
658 | 51 | for (int i = 0; i < rows; i++) { |
659 | 46 | int64_t x = src_data[i]; |
660 | 46 | auto& num = data[start_idx + i]; |
661 | 46 | auto& value = reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(num); |
662 | 46 | value.from_unixtime(x / _convert_params->second_mask, *_convert_params->ctz); |
663 | 46 | value.set_microsecond((x % _convert_params->second_mask) * |
664 | 46 | (_convert_params->scale_to_nano_factor / 1000)); |
665 | 46 | } |
666 | 5 | return Status::OK(); |
667 | 5 | } |
668 | | }; |
669 | | |
670 | | struct Int64ToTimestampTz : public PhysicalToLogicalConverter { |
671 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
672 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
673 | 0 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
674 | |
|
675 | 0 | size_t rows = src_col->size(); |
676 | 0 | size_t start_idx = dst_col->size(); |
677 | 0 | dst_col->resize(start_idx + rows); |
678 | |
|
679 | 0 | const auto& src_data = assert_cast<const ColumnInt64*>(src_col.get())->get_data(); |
680 | 0 | auto& dest_data = assert_cast<ColumnTimeStampTz*>(dst_col.get())->get_data(); |
681 | 0 | static const cctz::time_zone UTC = cctz::utc_time_zone(); |
682 | |
|
683 | 0 | for (int i = 0; i < rows; i++) { |
684 | 0 | int64_t x = src_data[i]; |
685 | 0 | auto& tz = dest_data[start_idx + i]; |
686 | 0 | tz.from_unixtime(x / _convert_params->second_mask, UTC); |
687 | 0 | tz.set_microsecond((x % _convert_params->second_mask) * |
688 | 0 | (_convert_params->scale_to_nano_factor / 1000)); |
689 | 0 | } |
690 | 0 | return Status::OK(); |
691 | 0 | } |
692 | | }; |
693 | | |
694 | | struct Int96toTimestamp : public PhysicalToLogicalConverter { |
695 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
696 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
697 | 0 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
698 | |
|
699 | 0 | size_t rows = src_col->size() / sizeof(ParquetInt96); |
700 | 0 | auto& src_data = static_cast<const ColumnInt8*>(src_col.get())->get_data(); |
701 | 0 | auto ParquetInt96_data = (ParquetInt96*)src_data.data(); |
702 | 0 | size_t start_idx = dst_col->size(); |
703 | 0 | dst_col->resize(start_idx + rows); |
704 | 0 | auto& data = static_cast<ColumnDateTimeV2*>(dst_col.get())->get_data(); |
705 | |
|
706 | 0 | for (int i = 0; i < rows; i++) { |
707 | 0 | ParquetInt96 src_cell_data = ParquetInt96_data[i]; |
708 | 0 | auto& dst_value = |
709 | 0 | reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(data[start_idx + i]); |
710 | |
|
711 | 0 | int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros(); |
712 | 0 | dst_value.from_unixtime(timestamp_with_micros / 1000000, *_convert_params->ctz); |
713 | 0 | dst_value.set_microsecond(timestamp_with_micros % 1000000); |
714 | 0 | } |
715 | 0 | return Status::OK(); |
716 | 0 | } |
717 | | }; |
718 | | |
719 | | struct Int96toTimestampTz : public PhysicalToLogicalConverter { |
720 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
721 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
722 | 0 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
723 | |
|
724 | 0 | size_t rows = src_col->size() / sizeof(ParquetInt96); |
725 | 0 | const auto& src_data = assert_cast<const ColumnInt8*>(src_col.get())->get_data(); |
726 | 0 | auto* ParquetInt96_data = (ParquetInt96*)src_data.data(); |
727 | 0 | size_t start_idx = dst_col->size(); |
728 | 0 | dst_col->resize(start_idx + rows); |
729 | 0 | auto& data = assert_cast<ColumnTimeStampTz*>(dst_col.get())->get_data(); |
730 | 0 | static const cctz::time_zone UTC = cctz::utc_time_zone(); |
731 | |
|
732 | 0 | for (int i = 0; i < rows; i++) { |
733 | 0 | ParquetInt96 src_cell_data = ParquetInt96_data[i]; |
734 | 0 | int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros(); |
735 | 0 | auto& tz = data[start_idx + i]; |
736 | 0 | tz.from_unixtime(timestamp_with_micros / 1000000, UTC); |
737 | 0 | tz.set_microsecond(timestamp_with_micros % 1000000); |
738 | 0 | } |
739 | 0 | return Status::OK(); |
740 | 0 | } |
741 | | }; |
742 | | #include "common/compile_check_end.h" |
743 | | |
744 | | } // namespace doris::parquet |