be/src/format/parquet/parquet_column_convert.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <gen_cpp/parquet_types.h> |
21 | | |
22 | | #include "common/cast_set.h" |
23 | | #include "core/column/column_varbinary.h" |
24 | | #include "core/data_type/data_type_factory.hpp" |
25 | | #include "core/data_type/primitive_type.h" |
26 | | #include "core/extended_types.h" |
27 | | #include "core/field.h" |
28 | | #include "core/types.h" |
29 | | #include "format/column_type_convert.h" |
30 | | #include "format/format_common.h" |
31 | | #include "format/parquet/decoder.h" |
32 | | #include "format/parquet/parquet_common.h" |
33 | | #include "format/parquet/schema_desc.h" |
34 | | |
35 | | namespace doris::parquet { |
36 | | struct ConvertParams { |
37 | | // schema.logicalType.TIMESTAMP.isAdjustedToUTC == false |
38 | | static const cctz::time_zone utc0; |
39 | | // schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set local time zone |
40 | | const cctz::time_zone* ctz = nullptr; |
41 | | int64_t second_mask = 1; |
42 | | int64_t scale_to_nano_factor = 1; |
43 | | const FieldSchema* field_schema = nullptr; |
44 | | |
45 | | //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128. |
46 | | bool is_type_compatibility = false; |
47 | | |
48 | | /** |
49 | | * Some frameworks like paimon maybe writes non-standard parquet files. Timestamp field doesn't have |
50 | | * logicalType or converted_type to indicates its precision. We have to reset the time mask. |
51 | | */ |
52 | 5 | void reset_time_scale_if_missing(int scale) { |
53 | 5 | const auto& schema = field_schema->parquet_schema; |
54 | 5 | if (!schema.__isset.logicalType && !schema.__isset.converted_type) { |
55 | 0 | int ts_scale = 9; |
56 | 0 | if (scale <= 3) { |
57 | 0 | ts_scale = 3; |
58 | 0 | } else if (scale <= 6) { |
59 | 0 | ts_scale = 6; |
60 | 0 | } |
61 | 0 | second_mask = common::exp10_i64(ts_scale); |
62 | 0 | scale_to_nano_factor = common::exp10_i64(9 - ts_scale); |
63 | | |
64 | | // The missing parque metadata makes it impossible for us to know the time zone information, |
65 | | // so we default to UTC here. |
66 | 0 | if (ctz == nullptr) { |
67 | 0 | ctz = &utc0; |
68 | 0 | } |
69 | 0 | } |
70 | 5 | } |
71 | | |
72 | 221 | void init(const FieldSchema* field_schema_, const cctz::time_zone* ctz_) { |
73 | 221 | field_schema = field_schema_; |
74 | 221 | if (ctz_ != nullptr) { |
75 | 221 | ctz = ctz_; |
76 | 221 | } |
77 | 221 | const auto& schema = field_schema->parquet_schema; |
78 | 221 | if (schema.__isset.logicalType && schema.logicalType.__isset.TIMESTAMP) { |
79 | 1 | const auto& timestamp_info = schema.logicalType.TIMESTAMP; |
80 | 1 | if (!timestamp_info.isAdjustedToUTC) { |
81 | | // should set timezone to utc+0 |
82 | | // Reference: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#instant-semantics-timestamps-normalized-to-utc |
83 | | // If isAdjustedToUTC = false, the reader should display the same value no mater what local time zone is. For example: |
84 | | // When a timestamp is stored as `1970-01-03 12:00:00`, |
85 | | // if isAdjustedToUTC = true, UTC8 should read as `1970-01-03 20:00:00`, UTC6 should read as `1970-01-03 18:00:00` |
86 | | // if isAdjustedToUTC = false, UTC8 and UTC6 should read as `1970-01-03 12:00:00`, which is the same as `1970-01-03 12:00:00` in UTC0 |
87 | 1 | ctz = &utc0; |
88 | 1 | } |
89 | 1 | const auto& time_unit = timestamp_info.unit; |
90 | 1 | if (time_unit.__isset.MILLIS) { |
91 | 1 | second_mask = 1000; |
92 | 1 | scale_to_nano_factor = 1000000; |
93 | 1 | } else if (time_unit.__isset.MICROS) { |
94 | 0 | second_mask = 1000000; |
95 | 0 | scale_to_nano_factor = 1000; |
96 | 0 | } else if (time_unit.__isset.NANOS) { |
97 | 0 | second_mask = 1000000000; |
98 | 0 | scale_to_nano_factor = 1; |
99 | 0 | } |
100 | 220 | } else if (schema.__isset.converted_type) { |
101 | 61 | const auto& converted_type = schema.converted_type; |
102 | 61 | if (converted_type == tparquet::ConvertedType::TIMESTAMP_MILLIS) { |
103 | 0 | second_mask = 1000; |
104 | 0 | scale_to_nano_factor = 1000000; |
105 | 61 | } else if (converted_type == tparquet::ConvertedType::TIMESTAMP_MICROS) { |
106 | 4 | second_mask = 1000000; |
107 | 4 | scale_to_nano_factor = 1000; |
108 | 4 | } |
109 | 61 | } |
110 | | |
111 | 221 | is_type_compatibility = field_schema_->is_type_compatibility; |
112 | 221 | } |
113 | | }; |
114 | | |
115 | | /** |
116 | | * Convert parquet physical column to logical column |
117 | | * In parquet document(https://github.com/apache/parquet-format/blob/master/LogicalTypes.md), |
118 | | * Logical or converted type is the data type of column, physical type is the stored type of column chunk. |
119 | | * eg, decimal type can be stored as INT32, INT64, BYTE_ARRAY, FIXED_LENGTH_BYTE_ARRAY. |
120 | | * So there is a convert process from physical type to logical type. |
121 | | * In addition, Schema change will bring about a change in logical type. |
122 | | * |
123 | | * `PhysicalToLogicalConverter` strips away the conversion of logical type, and reuse `ColumnTypeConverter` |
124 | | * to resolve schema change, allowing parquet reader to only focus on the conversion of physical types. |
125 | | * |
126 | | * Therefore, tow layers converters are designed: |
127 | | * First, read parquet data with the physical type |
128 | | * Second, convert physical type to logical type |
129 | | * Third, convert logical type to the final type planned by FE(schema change) |
130 | | * |
131 | | * Ultimate performance optimization: |
132 | | * 1. If process of (First => Second) is consistent, eg. from BYTE_ARRAY to string, no additional copies and conversions will be introduced; |
133 | | * 2. If process of (Second => Third) is consistent, no additional copies and conversions will be introduced; |
134 | | * 3. Null map is share among all processes, no additional copies and conversions will be introduced in null map; |
135 | | * 4. Only create one physical column in physical conversion, and reused in each loop; |
136 | | * 5. Only create one logical column in logical conversion, and reused in each loop; |
137 | | * 6. FIXED_LENGTH_BYTE_ARRAY is read as ColumnUInt8 instead of ColumnString, so the underlying decoder has no process to decode string |
138 | | * and use memory copy to read the data as a whole, and the conversion has no need to resolve the Offsets in ColumnString. |
139 | | */ |
140 | | class PhysicalToLogicalConverter { |
141 | | protected: |
142 | | ColumnPtr _cached_src_physical_column = nullptr; |
143 | | DataTypePtr _cached_src_physical_type = nullptr; |
144 | | std::unique_ptr<converter::ColumnTypeConverter> _logical_converter = nullptr; |
145 | | |
146 | | std::string _error_msg; |
147 | | |
148 | | std::unique_ptr<ConvertParams> _convert_params; |
149 | | |
150 | | public: |
151 | | static std::unique_ptr<PhysicalToLogicalConverter> get_converter( |
152 | | const FieldSchema* field_schema, DataTypePtr src_logical_type, |
153 | | const DataTypePtr& dst_logical_type, const cctz::time_zone* ctz, |
154 | | bool is_dict_filter = false); |
155 | | |
156 | | static bool is_parquet_native_type(PrimitiveType type); |
157 | | |
158 | | static bool is_decimal_type(PrimitiveType type); |
159 | | |
160 | 221 | PhysicalToLogicalConverter() = default; |
161 | 221 | virtual ~PhysicalToLogicalConverter() = default; |
162 | | |
163 | 4 | virtual Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) { |
164 | 4 | return Status::OK(); |
165 | 4 | } |
166 | | |
167 | | Status convert(ColumnPtr& src_physical_col, DataTypePtr src_logical_type, |
168 | | const DataTypePtr& dst_logical_type, ColumnPtr& dst_logical_col, |
169 | 231 | bool is_dict_filter) { |
170 | 231 | if (is_dict_filter) { |
171 | 0 | src_logical_type = DataTypeFactory::instance().create_data_type( |
172 | 0 | PrimitiveType::TYPE_INT, dst_logical_type->is_nullable()); |
173 | 0 | } |
174 | 231 | if (is_consistent() && _logical_converter->is_consistent()) { |
175 | 186 | return Status::OK(); |
176 | 186 | } |
177 | 45 | ColumnPtr src_logical_column; |
178 | 45 | if (is_consistent()) { |
179 | 4 | if (dst_logical_type->is_nullable()) { |
180 | 4 | auto doris_nullable_column = |
181 | 4 | assert_cast<const ColumnNullable*>(dst_logical_col.get()); |
182 | 4 | src_logical_column = |
183 | 4 | ColumnNullable::create(_cached_src_physical_column, |
184 | 4 | doris_nullable_column->get_null_map_column_ptr()); |
185 | 4 | } else { |
186 | 0 | src_logical_column = _cached_src_physical_column; |
187 | 0 | } |
188 | 41 | } else { |
189 | 41 | src_logical_column = _logical_converter->get_column(src_logical_type, dst_logical_col, |
190 | 41 | dst_logical_type); |
191 | 41 | } |
192 | 45 | RETURN_IF_ERROR(physical_convert(src_physical_col, src_logical_column)); |
193 | 45 | auto converted_column = dst_logical_col->assume_mutable(); |
194 | 45 | return _logical_converter->convert(src_logical_column, converted_column); |
195 | 45 | } |
196 | | |
197 | | virtual ColumnPtr get_physical_column(tparquet::Type::type src_physical_type, |
198 | | DataTypePtr src_logical_type, |
199 | | ColumnPtr& dst_logical_column, |
200 | | const DataTypePtr& dst_logical_type, bool is_dict_filter); |
201 | | |
202 | 231 | DataTypePtr& get_physical_type() { return _cached_src_physical_type; } |
203 | | |
204 | 127 | virtual bool is_consistent() { return false; } |
205 | | |
206 | 368 | virtual bool support() { return true; } |
207 | | |
208 | 0 | std::string get_error_msg() { return _error_msg; } |
209 | | }; |
210 | | |
211 | | class ConsistentPhysicalConverter : public PhysicalToLogicalConverter { |
212 | 455 | bool is_consistent() override { return true; } |
213 | | }; |
214 | | |
215 | | class UnsupportedConverter : public PhysicalToLogicalConverter { |
216 | | public: |
217 | 0 | UnsupportedConverter(std::string error_msg) { _error_msg = error_msg; } |
218 | | |
219 | | UnsupportedConverter(tparquet::Type::type src_physical_type, |
220 | 0 | const DataTypePtr& src_logical_type) { |
221 | 0 | std::string src_physical_str = tparquet::to_string(src_physical_type); |
222 | 0 | std::string src_logical_str = src_logical_type->get_name(); |
223 | 0 | _error_msg = src_physical_str + " => " + src_logical_str; |
224 | 0 | } |
225 | | |
226 | 0 | bool support() override { return false; } |
227 | | |
228 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
229 | 0 | return Status::InternalError("Unsupported physical to logical type: {}", _error_msg); |
230 | 0 | } |
231 | | }; |
232 | | |
233 | | // for tinyint, smallint |
234 | | template <PrimitiveType IntPrimitiveType> |
235 | | class LittleIntPhysicalConverter : public PhysicalToLogicalConverter { |
236 | 28 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
237 | 28 | using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType; |
238 | 28 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; |
239 | 28 | ColumnPtr from_col = remove_nullable(src_physical_col); |
240 | 28 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
241 | | |
242 | 28 | size_t rows = from_col->size(); |
243 | | // always comes from tparquet::Type::INT32 |
244 | 28 | auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data(); |
245 | 28 | size_t start_idx = to_col->size(); |
246 | 28 | to_col->resize(start_idx + rows); |
247 | 28 | auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data(); |
248 | 108 | for (int i = 0; i < rows; ++i) { |
249 | 80 | data[start_idx + i] = static_cast<DstCppType>(src_data[i]); |
250 | 80 | } |
251 | | |
252 | 28 | return Status::OK(); |
253 | 28 | } _ZN5doris7parquet26LittleIntPhysicalConverterILNS_13PrimitiveTypeE3EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 236 | 17 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 237 | 17 | using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType; | 238 | 17 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; | 239 | 17 | ColumnPtr from_col = remove_nullable(src_physical_col); | 240 | 17 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); | 241 | | | 242 | 17 | size_t rows = from_col->size(); | 243 | | // always comes from tparquet::Type::INT32 | 244 | 17 | auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data(); | 245 | 17 | size_t start_idx = to_col->size(); | 246 | 17 | to_col->resize(start_idx + rows); | 247 | 17 | auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data(); | 248 | 59 | for (int i = 0; i < rows; ++i) { | 249 | 42 | data[start_idx + i] = static_cast<DstCppType>(src_data[i]); | 250 | 42 | } | 251 | | | 252 | 17 | return Status::OK(); | 253 | 17 | } |
_ZN5doris7parquet26LittleIntPhysicalConverterILNS_13PrimitiveTypeE4EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 236 | 11 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 237 | 11 | using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType; | 238 | 11 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; | 239 | 11 | ColumnPtr from_col = remove_nullable(src_physical_col); | 240 | 11 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); | 241 | | | 242 | 11 | size_t rows = from_col->size(); | 243 | | // always comes from tparquet::Type::INT32 | 244 | 11 | auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data(); | 245 | 11 | size_t start_idx = to_col->size(); | 246 | 11 | to_col->resize(start_idx + rows); | 247 | 11 | auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data(); | 248 | 49 | for (int i = 0; i < rows; ++i) { | 249 | 38 | data[start_idx + i] = static_cast<DstCppType>(src_data[i]); | 250 | 38 | } | 251 | | | 252 | 11 | return Status::OK(); | 253 | 11 | } |
|
254 | | }; |
255 | | |
256 | | template <PrimitiveType type> |
257 | | struct UnsignedTypeTraits; |
258 | | |
259 | | template <> |
260 | | struct UnsignedTypeTraits<TYPE_SMALLINT> { |
261 | | using UnsignedCppType = UInt8; |
262 | | //https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#unsigned-integers |
263 | | //INT(8, false), INT(16, false), and INT(32, false) must annotate an int32 primitive type and INT(64, false) |
264 | | //must annotate an int64 primitive type. |
265 | | using StorageCppType = Int32; |
266 | | using StorageColumnType = ColumnInt32; |
267 | | }; |
268 | | |
269 | | template <> |
270 | | struct UnsignedTypeTraits<TYPE_INT> { |
271 | | using UnsignedCppType = UInt16; |
272 | | using StorageCppType = Int32; |
273 | | using StorageColumnType = ColumnInt32; |
274 | | }; |
275 | | |
276 | | template <> |
277 | | struct UnsignedTypeTraits<TYPE_BIGINT> { |
278 | | using UnsignedCppType = UInt32; |
279 | | using StorageCppType = Int32; |
280 | | using StorageColumnType = ColumnInt32; |
281 | | }; |
282 | | |
283 | | template <> |
284 | | struct UnsignedTypeTraits<TYPE_LARGEINT> { |
285 | | using UnsignedCppType = UInt64; |
286 | | using StorageCppType = Int64; |
287 | | using StorageColumnType = ColumnInt64; |
288 | | }; |
289 | | |
290 | | template <PrimitiveType IntPrimitiveType> |
291 | | class UnsignedIntegerConverter : public PhysicalToLogicalConverter { |
292 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
293 | 0 | using UnsignedCppType = typename UnsignedTypeTraits<IntPrimitiveType>::UnsignedCppType; |
294 | 0 | using StorageCppType = typename UnsignedTypeTraits<IntPrimitiveType>::StorageCppType; |
295 | 0 | using StorageColumnType = typename UnsignedTypeTraits<IntPrimitiveType>::StorageColumnType; |
296 | 0 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; |
297 | |
|
298 | 0 | ColumnPtr from_col = remove_nullable(src_physical_col); |
299 | 0 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
300 | 0 | auto& src_data = assert_cast<const StorageColumnType*>(from_col.get())->get_data(); |
301 | |
|
302 | 0 | size_t rows = src_data.size(); |
303 | 0 | size_t start_idx = to_col->size(); |
304 | 0 | to_col->resize(start_idx + rows); |
305 | 0 | auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data(); |
306 | |
|
307 | 0 | for (int i = 0; i < rows; i++) { |
308 | 0 | StorageCppType src_value = src_data[i]; |
309 | 0 | auto unsigned_value = static_cast<UnsignedCppType>(src_value); |
310 | 0 | data[start_idx + i] = unsigned_value; |
311 | 0 | } |
312 | |
|
313 | 0 | return Status::OK(); |
314 | 0 | } Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE4EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE5EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE6EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE7EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
315 | | }; |
316 | | |
317 | | class FixedSizeBinaryConverter : public PhysicalToLogicalConverter { |
318 | | private: |
319 | | int _type_length; |
320 | | |
321 | | public: |
322 | 0 | FixedSizeBinaryConverter(int type_length) : _type_length(type_length) {} |
323 | | |
324 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
325 | 0 | ColumnPtr from_col = remove_nullable(src_physical_col); |
326 | 0 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
327 | |
|
328 | 0 | auto* src_data = assert_cast<const ColumnUInt8*>(from_col.get()); |
329 | 0 | size_t length = src_data->size(); |
330 | 0 | size_t num_values = length / _type_length; |
331 | 0 | auto& string_col = static_cast<ColumnString&>(*to_col.get()); |
332 | 0 | auto& offsets = string_col.get_offsets(); |
333 | 0 | auto& chars = string_col.get_chars(); |
334 | |
|
335 | 0 | size_t origin_size = chars.size(); |
336 | 0 | chars.resize(origin_size + length); |
337 | 0 | memcpy(chars.data() + origin_size, src_data->get_data().data(), length); |
338 | |
|
339 | 0 | origin_size = offsets.size(); |
340 | 0 | offsets.resize(origin_size + num_values); |
341 | 0 | auto end_offset = offsets[origin_size - 1]; |
342 | 0 | for (int i = 0; i < num_values; ++i) { |
343 | 0 | end_offset += _type_length; |
344 | 0 | offsets[origin_size + i] = end_offset; |
345 | 0 | } |
346 | |
|
347 | 0 | return Status::OK(); |
348 | 0 | } |
349 | | }; |
350 | | |
351 | | class Float16PhysicalConverter : public PhysicalToLogicalConverter { |
352 | | private: |
353 | | int _type_length; |
354 | | |
355 | | public: |
356 | 0 | Float16PhysicalConverter(int type_length) : _type_length(type_length) { |
357 | 0 | DCHECK_EQ(_type_length, 2); |
358 | 0 | } |
359 | | |
360 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
361 | 0 | ColumnPtr from_col = remove_nullable(src_physical_col); |
362 | 0 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
363 | |
|
364 | 0 | const auto* src_data = assert_cast<const ColumnUInt8*>(from_col.get()); |
365 | 0 | size_t length = src_data->size(); |
366 | 0 | size_t num_values = length / _type_length; |
367 | 0 | auto* to_float_column = assert_cast<ColumnFloat32*>(to_col.get()); |
368 | 0 | size_t start_idx = to_float_column->size(); |
369 | 0 | to_float_column->resize(start_idx + num_values); |
370 | 0 | auto& to_float_column_data = to_float_column->get_data(); |
371 | 0 | const auto* ptr = src_data->get_data().data(); |
372 | 0 | for (int i = 0; i < num_values; ++i) { |
373 | 0 | size_t offset = i * _type_length; |
374 | 0 | const auto* data_ptr = ptr + offset; |
375 | 0 | uint16_t raw; |
376 | 0 | memcpy(&raw, data_ptr, sizeof(uint16_t)); |
377 | 0 | float value = half_to_float(raw); |
378 | 0 | to_float_column_data[start_idx + i] = value; |
379 | 0 | } |
380 | |
|
381 | 0 | return Status::OK(); |
382 | 0 | } |
383 | | |
384 | 0 | float half_to_float(uint16_t h) { |
385 | | // uint16_t h: half precision floating point |
386 | | // bit 15: sign(1 bit) |
387 | | // bits 14..10 : exponent(5 bits) |
388 | | // bits 9..0 : mantissa(10 bits) |
389 | | |
390 | | // sign bit placed to float32 bit31 |
391 | 0 | uint32_t sign = (h & 0x8000U) << 16; // 0x8000 << 16 = 0x8000_0000 |
392 | | // exponent:(5 bits) |
393 | 0 | uint32_t exp = (h & 0x7C00U) >> 10; // 0x7C00 = 0111 1100 0000 (half exponent mask) |
394 | | // mantissa(10 bits) |
395 | 0 | uint32_t mant = (h & 0x03FFU); // 10-bit fraction |
396 | | |
397 | | // cases:Zero/Subnormal, Normal, Inf/NaN |
398 | 0 | if (exp == 0) { |
399 | | // exp==0: Zero or Subnormal ---------- |
400 | 0 | if (mant == 0) { |
401 | | // ±0.0 |
402 | | // sign = either 0x00000000 or 0x80000000 |
403 | 0 | return std::bit_cast<float>(sign); |
404 | 0 | } else { |
405 | | // ---------- Subnormal ---------- |
406 | | // half subnormal: |
407 | | // value = (-1)^sign * (mant / 2^10) * 2^(1 - bias) |
408 | | // half bias = 15 → exponent = 1 - 15 = -14 |
409 | 0 | float f = (static_cast<float>(mant) / 1024.0F) * std::powf(2.0F, -14.0F); |
410 | 0 | return sign ? -f : f; |
411 | 0 | } |
412 | 0 | } else if (exp == 0x1F) { |
413 | | // exp==31: Inf or NaN ---------- |
414 | | // float32: |
415 | | // exponent = 255 (0xFF) |
416 | | // mantissa = mant << 13 |
417 | 0 | uint32_t f = sign | 0x7F800000U | (mant << 13); |
418 | 0 | return std::bit_cast<float>(f); |
419 | 0 | } else { |
420 | | // Normalized ---------- |
421 | | // float32 exponent: |
422 | | // exp32 = exp16 - bias16 + bias32 |
423 | | // bias16 = 15 |
424 | | // bias32 = 127 |
425 | | // |
426 | | // so: exp32 = exp + (127 - 15) |
427 | 0 | uint32_t f = sign | ((exp + (127 - 15)) << 23) // place to float32 exponent |
428 | 0 | | (mant << 13); // mantissa align to 23 bits |
429 | 0 | return std::bit_cast<float>(f); |
430 | 0 | } |
431 | 0 | } |
432 | | }; |
433 | | |
434 | | class UUIDVarBinaryConverter : public PhysicalToLogicalConverter { |
435 | | public: |
436 | 1 | UUIDVarBinaryConverter(int type_length) : _type_length(type_length) {} |
437 | | |
438 | 1 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
439 | 1 | DCHECK(!is_column_const(*src_physical_col)) << src_physical_col->dump_structure(); |
440 | 1 | DCHECK(!is_column_const(*src_logical_column)) << src_logical_column->dump_structure(); |
441 | 1 | const ColumnUInt8* uint8_col = nullptr; |
442 | 1 | if (is_column_nullable(*src_physical_col)) { |
443 | 1 | const auto& nullable = assert_cast<const ColumnNullable*>(src_physical_col.get()); |
444 | 1 | uint8_col = &assert_cast<const ColumnUInt8&>(nullable->get_nested_column()); |
445 | 1 | } else { |
446 | 0 | uint8_col = &assert_cast<const ColumnUInt8&>(*src_physical_col); |
447 | 0 | } |
448 | | |
449 | 1 | MutableColumnPtr to_col = nullptr; |
450 | | // nullmap flag seems have been handled in upper level |
451 | 1 | if (src_logical_column->is_nullable()) { |
452 | 1 | const auto* nullable = assert_cast<const ColumnNullable*>(src_logical_column.get()); |
453 | 1 | to_col = nullable->get_nested_column_ptr()->assume_mutable(); |
454 | 1 | } else { |
455 | 0 | to_col = src_logical_column->assume_mutable(); |
456 | 0 | } |
457 | 1 | auto* to_varbinary_column = assert_cast<ColumnVarbinary*>(to_col.get()); |
458 | 1 | size_t length = uint8_col->size(); |
459 | 1 | size_t num_values = length / _type_length; |
460 | 1 | const auto* ptr = uint8_col->get_data().data(); |
461 | | |
462 | 4 | for (int i = 0; i < num_values; ++i) { |
463 | 3 | auto offset = i * _type_length; |
464 | 3 | const char* data_ptr = reinterpret_cast<const char*>(ptr + offset); |
465 | 3 | to_varbinary_column->insert_data(data_ptr, _type_length); |
466 | 3 | } |
467 | 1 | return Status::OK(); |
468 | 1 | } |
469 | | |
470 | | private: |
471 | | int _type_length; |
472 | | }; |
473 | | |
474 | | template <PrimitiveType DecimalPType> |
475 | | class FixedSizeToDecimal : public PhysicalToLogicalConverter { |
476 | | public: |
477 | | using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType; |
478 | 2 | FixedSizeToDecimal(int32_t type_length) : _type_length(type_length) {}Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EEC2Ei _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EEC2Ei Line | Count | Source | 478 | 2 | FixedSizeToDecimal(int32_t type_length) : _type_length(type_length) {} |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EEC2Ei Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EEC2Ei |
479 | | |
480 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
481 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); |
482 | 2 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
483 | | |
484 | 2 | #define M(FixedTypeLength, ValueCopyType) \ |
485 | 2 | case FixedTypeLength: \ |
486 | 2 | return _convert_internal<FixedTypeLength, ValueCopyType>(src_col, dst_col); |
487 | | |
488 | 2 | #define APPLY_FOR_DECIMALS() \ |
489 | 2 | M(1, int64_t) \ |
490 | 0 | M(2, int64_t) \ |
491 | 0 | M(3, int64_t) \ |
492 | 0 | M(4, int64_t) \ |
493 | 1 | M(5, int64_t) \ |
494 | 1 | M(6, int64_t) \ |
495 | 0 | M(7, int64_t) \ |
496 | 1 | M(8, int64_t) \ |
497 | 1 | M(9, int128_t) \ |
498 | 0 | M(10, int128_t) \ |
499 | 0 | M(11, int128_t) \ |
500 | 0 | M(12, int128_t) \ |
501 | 0 | M(13, int128_t) \ |
502 | 0 | M(14, int128_t) \ |
503 | 0 | M(15, int128_t) \ |
504 | 0 | M(16, int128_t) \ |
505 | 0 | M(17, wide::Int256) \ |
506 | 0 | M(18, wide::Int256) \ |
507 | 0 | M(19, wide::Int256) \ |
508 | 0 | M(20, wide::Int256) \ |
509 | 0 | M(21, wide::Int256) \ |
510 | 0 | M(22, wide::Int256) \ |
511 | 0 | M(23, wide::Int256) \ |
512 | 0 | M(24, wide::Int256) \ |
513 | 0 | M(25, wide::Int256) \ |
514 | 0 | M(26, wide::Int256) \ |
515 | 0 | M(27, wide::Int256) \ |
516 | 0 | M(28, wide::Int256) \ |
517 | 0 | M(29, wide::Int256) \ |
518 | 0 | M(30, wide::Int256) \ |
519 | 0 | M(31, wide::Int256) \ |
520 | 0 | M(32, wide::Int256) |
521 | | |
522 | 2 | switch (_type_length) { |
523 | 0 | APPLY_FOR_DECIMALS() |
524 | 0 | default: |
525 | 0 | throw Exception(Status::FatalError("__builtin_unreachable")); |
526 | 2 | } |
527 | 0 | return Status::OK(); |
528 | 2 | #undef APPLY_FOR_DECIMALS |
529 | 2 | #undef M |
530 | 2 | } Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 480 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 481 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); | 482 | 2 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); | 483 | | | 484 | 2 | #define M(FixedTypeLength, ValueCopyType) \ | 485 | 2 | case FixedTypeLength: \ | 486 | 2 | return _convert_internal<FixedTypeLength, ValueCopyType>(src_col, dst_col); | 487 | | | 488 | 2 | #define APPLY_FOR_DECIMALS() \ | 489 | 2 | M(1, int64_t) \ | 490 | 2 | M(2, int64_t) \ | 491 | 2 | M(3, int64_t) \ | 492 | 2 | M(4, int64_t) \ | 493 | 2 | M(5, int64_t) \ | 494 | 2 | M(6, int64_t) \ | 495 | 2 | M(7, int64_t) \ | 496 | 2 | M(8, int64_t) \ | 497 | 2 | M(9, int128_t) \ | 498 | 2 | M(10, int128_t) \ | 499 | 2 | M(11, int128_t) \ | 500 | 2 | M(12, int128_t) \ | 501 | 2 | M(13, int128_t) \ | 502 | 2 | M(14, int128_t) \ | 503 | 2 | M(15, int128_t) \ | 504 | 2 | M(16, int128_t) \ | 505 | 2 | M(17, wide::Int256) \ | 506 | 2 | M(18, wide::Int256) \ | 507 | 2 | M(19, wide::Int256) \ | 508 | 2 | M(20, wide::Int256) \ | 509 | 2 | M(21, wide::Int256) \ | 510 | 2 | M(22, wide::Int256) \ | 511 | 2 | M(23, wide::Int256) \ | 512 | 2 | M(24, wide::Int256) \ | 513 | 2 | M(25, wide::Int256) \ | 514 | 2 | M(26, wide::Int256) \ | 515 | 2 | M(27, wide::Int256) \ | 516 | 2 | M(28, wide::Int256) \ | 517 | 2 | M(29, wide::Int256) \ | 518 | 2 | M(30, wide::Int256) \ | 519 | 2 | M(31, wide::Int256) \ | 520 | 2 | M(32, wide::Int256) | 521 | | | 522 | 2 | switch (_type_length) { | 523 | 0 | APPLY_FOR_DECIMALS() | 524 | 0 | default: | 525 | 0 | throw Exception(Status::FatalError("__builtin_unreachable")); | 526 | 2 | } | 527 | 0 | return Status::OK(); | 528 | 2 | #undef APPLY_FOR_DECIMALS | 529 | 2 | #undef M | 530 | 2 | } |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
531 | | |
532 | | template <int fixed_type_length, typename ValueCopyType> |
533 | 2 | Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) { |
534 | 2 | size_t rows = src_col->size() / fixed_type_length; |
535 | 2 | auto* buf = static_cast<const ColumnUInt8*>(src_col.get())->get_data().data(); |
536 | 2 | size_t start_idx = dst_col->size(); |
537 | 2 | dst_col->resize(start_idx + rows); |
538 | | |
539 | 2 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data(); |
540 | 2 | size_t offset = 0; |
541 | 6 | for (int i = 0; i < rows; i++) { |
542 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, |
543 | | // the unscaled number must be encoded as two's complement using big-endian byte order. |
544 | 4 | ValueCopyType value = 0; |
545 | 4 | memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value)); |
546 | 4 | offset += fixed_type_length; |
547 | 4 | value = to_endian<std::endian::big>(value); |
548 | 4 | value = value >> ((sizeof(value) - fixed_type_length) * 8); |
549 | 4 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); |
550 | 4 | v = (DecimalType)value; |
551 | 4 | } |
552 | | |
553 | 2 | return Status::OK(); |
554 | 2 | } Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Line | Count | Source | 533 | 1 | Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) { | 534 | 1 | size_t rows = src_col->size() / fixed_type_length; | 535 | 1 | auto* buf = static_cast<const ColumnUInt8*>(src_col.get())->get_data().data(); | 536 | 1 | size_t start_idx = dst_col->size(); | 537 | 1 | dst_col->resize(start_idx + rows); | 538 | | | 539 | 1 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data(); | 540 | 1 | size_t offset = 0; | 541 | 3 | for (int i = 0; i < rows; i++) { | 542 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, | 543 | | // the unscaled number must be encoded as two's complement using big-endian byte order. | 544 | 2 | ValueCopyType value = 0; | 545 | 2 | memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value)); | 546 | 2 | offset += fixed_type_length; | 547 | 2 | value = to_endian<std::endian::big>(value); | 548 | 2 | value = value >> ((sizeof(value) - fixed_type_length) * 8); | 549 | 2 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); | 550 | 2 | v = (DecimalType)value; | 551 | 2 | } | 552 | | | 553 | 1 | return Status::OK(); | 554 | 1 | } |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Line | Count | Source | 533 | 1 | Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) { | 534 | 1 | size_t rows = src_col->size() / fixed_type_length; | 535 | 1 | auto* buf = static_cast<const ColumnUInt8*>(src_col.get())->get_data().data(); | 536 | 1 | size_t start_idx = dst_col->size(); | 537 | 1 | dst_col->resize(start_idx + rows); | 538 | | | 539 | 1 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data(); | 540 | 1 | size_t offset = 0; | 541 | 3 | for (int i = 0; i < rows; i++) { | 542 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, | 543 | | // the unscaled number must be encoded as two's complement using big-endian byte order. | 544 | 2 | ValueCopyType value = 0; | 545 | 2 | memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value)); | 546 | 2 | offset += fixed_type_length; | 547 | 2 | value = to_endian<std::endian::big>(value); | 548 | 2 | value = value >> ((sizeof(value) - fixed_type_length) * 8); | 549 | 2 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); | 550 | 2 | v = (DecimalType)value; | 551 | 2 | } | 552 | | | 553 | 1 | return Status::OK(); | 554 | 1 | } |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE |
555 | | |
556 | | private: |
557 | | int32_t _type_length; |
558 | | }; |
559 | | |
560 | | template <PrimitiveType DecimalPType> |
561 | | class StringToDecimal : public PhysicalToLogicalConverter { |
562 | | using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType; |
563 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
564 | 0 | using ValueCopyType = DecimalType::NativeType; |
565 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
566 | 0 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
567 | |
|
568 | 0 | size_t rows = src_col->size(); |
569 | 0 | auto buf = static_cast<const ColumnString*>(src_col.get())->get_chars().data(); |
570 | 0 | auto& offset = static_cast<const ColumnString*>(src_col.get())->get_offsets(); |
571 | 0 | size_t start_idx = dst_col->size(); |
572 | 0 | dst_col->resize(start_idx + rows); |
573 | |
|
574 | 0 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data(); |
575 | 0 | for (int i = 0; i < rows; i++) { |
576 | 0 | size_t len = offset[i] - offset[i - 1]; |
577 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, |
578 | | // the unscaled number must be encoded as two's complement using big-endian byte order. |
579 | 0 | ValueCopyType value = 0; |
580 | 0 | if (len > 0) { |
581 | 0 | memcpy(reinterpret_cast<char*>(&value), buf + offset[i - 1], len); |
582 | 0 | value = to_endian<std::endian::big>(value); |
583 | 0 | value = value >> ((sizeof(value) - len) * 8); |
584 | 0 | } |
585 | 0 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); |
586 | 0 | v = (DecimalType)value; |
587 | 0 | } |
588 | |
|
589 | 0 | return Status::OK(); |
590 | 0 | } Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
591 | | }; |
592 | | |
593 | | template <PrimitiveType NumberType, PrimitiveType DecimalPType> |
594 | | class NumberToDecimal : public PhysicalToLogicalConverter { |
595 | | using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType; |
596 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
597 | 2 | using ValueCopyType = typename DecimalType::NativeType; |
598 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); |
599 | 2 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
600 | | |
601 | 2 | size_t rows = src_col->size(); |
602 | 2 | auto* src_data = |
603 | 2 | static_cast<const ColumnVector<NumberType>*>(src_col.get())->get_data().data(); |
604 | 2 | size_t start_idx = dst_col->size(); |
605 | 2 | dst_col->resize(start_idx + rows); |
606 | | |
607 | 2 | auto* data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data().data(); |
608 | | |
609 | 24 | for (int i = 0; i < rows; i++) { |
610 | 22 | ValueCopyType value; |
611 | 22 | if constexpr (std::is_same_v<DecimalType, Decimal256>) { |
612 | 0 | value = src_data[i]; |
613 | 22 | } else { |
614 | 22 | value = cast_set<ValueCopyType, typename PrimitiveTypeTraits<NumberType>::CppType, |
615 | 22 | false>(src_data[i]); |
616 | 22 | } |
617 | | |
618 | 22 | data[start_idx + i] = (DecimalType)value; |
619 | 22 | } |
620 | 2 | return Status::OK(); |
621 | 2 | } Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 596 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 597 | 2 | using ValueCopyType = typename DecimalType::NativeType; | 598 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); | 599 | 2 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); | 600 | | | 601 | 2 | size_t rows = src_col->size(); | 602 | 2 | auto* src_data = | 603 | 2 | static_cast<const ColumnVector<NumberType>*>(src_col.get())->get_data().data(); | 604 | 2 | size_t start_idx = dst_col->size(); | 605 | 2 | dst_col->resize(start_idx + rows); | 606 | | | 607 | 2 | auto* data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data().data(); | 608 | | | 609 | 24 | for (int i = 0; i < rows; i++) { | 610 | 22 | ValueCopyType value; | 611 | | if constexpr (std::is_same_v<DecimalType, Decimal256>) { | 612 | | value = src_data[i]; | 613 | 22 | } else { | 614 | 22 | value = cast_set<ValueCopyType, typename PrimitiveTypeTraits<NumberType>::CppType, | 615 | 22 | false>(src_data[i]); | 616 | 22 | } | 617 | | | 618 | 22 | data[start_idx + i] = (DecimalType)value; | 619 | 22 | } | 620 | 2 | return Status::OK(); | 621 | 2 | } |
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
622 | | }; |
623 | | |
624 | | class Int32ToDate : public PhysicalToLogicalConverter { |
625 | 7 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
626 | 7 | ColumnPtr src_col = remove_nullable(src_physical_col); |
627 | 7 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
628 | | |
629 | 7 | size_t rows = src_col->size(); |
630 | 7 | size_t start_idx = dst_col->size(); |
631 | 7 | dst_col->reserve(start_idx + rows); |
632 | | |
633 | 7 | auto& src_data = static_cast<const ColumnInt32*>(src_col.get())->get_data(); |
634 | 7 | auto& data = static_cast<ColumnDateV2*>(dst_col.get())->get_data(); |
635 | 7 | date_day_offset_dict& date_dict = date_day_offset_dict::get(); |
636 | | |
637 | 59 | for (int i = 0; i < rows; i++) { |
638 | 52 | data.push_back_without_reserve(date_dict[src_data[i]].to_date_int_val()); |
639 | 52 | } |
640 | | |
641 | 7 | return Status::OK(); |
642 | 7 | } |
643 | | }; |
644 | | |
645 | | struct Int64ToTimestamp : public PhysicalToLogicalConverter { |
646 | 5 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
647 | 5 | ColumnPtr src_col = remove_nullable(src_physical_col); |
648 | 5 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
649 | | |
650 | 5 | size_t rows = src_col->size(); |
651 | 5 | size_t start_idx = dst_col->size(); |
652 | 5 | dst_col->resize(start_idx + rows); |
653 | | |
654 | 5 | auto src_data = static_cast<const ColumnInt64*>(src_col.get())->get_data().data(); |
655 | 5 | auto& data = static_cast<ColumnDateTimeV2*>(dst_col.get())->get_data(); |
656 | | |
657 | 51 | for (int i = 0; i < rows; i++) { |
658 | 46 | int64_t x = src_data[i]; |
659 | 46 | auto& num = data[start_idx + i]; |
660 | 46 | auto& value = reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(num); |
661 | 46 | value.from_unixtime(x / _convert_params->second_mask, *_convert_params->ctz); |
662 | 46 | value.set_microsecond((x % _convert_params->second_mask) * |
663 | 46 | (_convert_params->scale_to_nano_factor / 1000)); |
664 | 46 | } |
665 | 5 | return Status::OK(); |
666 | 5 | } |
667 | | }; |
668 | | |
669 | | struct Int64ToTimestampTz : public PhysicalToLogicalConverter { |
670 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
671 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
672 | 0 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
673 | |
|
674 | 0 | size_t rows = src_col->size(); |
675 | 0 | size_t start_idx = dst_col->size(); |
676 | 0 | dst_col->resize(start_idx + rows); |
677 | |
|
678 | 0 | const auto& src_data = assert_cast<const ColumnInt64*>(src_col.get())->get_data(); |
679 | 0 | auto& dest_data = assert_cast<ColumnTimeStampTz*>(dst_col.get())->get_data(); |
680 | 0 | static const cctz::time_zone UTC = cctz::utc_time_zone(); |
681 | |
|
682 | 0 | for (int i = 0; i < rows; i++) { |
683 | 0 | int64_t x = src_data[i]; |
684 | 0 | auto& tz = dest_data[start_idx + i]; |
685 | 0 | tz.from_unixtime(x / _convert_params->second_mask, UTC); |
686 | 0 | tz.set_microsecond((x % _convert_params->second_mask) * |
687 | 0 | (_convert_params->scale_to_nano_factor / 1000)); |
688 | 0 | } |
689 | 0 | return Status::OK(); |
690 | 0 | } |
691 | | }; |
692 | | |
693 | | struct Int96toTimestamp : public PhysicalToLogicalConverter { |
694 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
695 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
696 | 0 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
697 | |
|
698 | 0 | size_t rows = src_col->size() / sizeof(ParquetInt96); |
699 | 0 | auto& src_data = static_cast<const ColumnInt8*>(src_col.get())->get_data(); |
700 | 0 | auto ParquetInt96_data = (ParquetInt96*)src_data.data(); |
701 | 0 | size_t start_idx = dst_col->size(); |
702 | 0 | dst_col->resize(start_idx + rows); |
703 | 0 | auto& data = static_cast<ColumnDateTimeV2*>(dst_col.get())->get_data(); |
704 | |
|
705 | 0 | for (int i = 0; i < rows; i++) { |
706 | 0 | ParquetInt96 src_cell_data = ParquetInt96_data[i]; |
707 | 0 | auto& dst_value = |
708 | 0 | reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(data[start_idx + i]); |
709 | |
|
710 | 0 | int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros(); |
711 | 0 | dst_value.from_unixtime(timestamp_with_micros / 1000000, *_convert_params->ctz); |
712 | 0 | dst_value.set_microsecond(timestamp_with_micros % 1000000); |
713 | 0 | } |
714 | 0 | return Status::OK(); |
715 | 0 | } |
716 | | }; |
717 | | |
718 | | struct Int96toTimestampTz : public PhysicalToLogicalConverter { |
719 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
720 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
721 | 0 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
722 | |
|
723 | 0 | size_t rows = src_col->size() / sizeof(ParquetInt96); |
724 | 0 | const auto& src_data = assert_cast<const ColumnInt8*>(src_col.get())->get_data(); |
725 | 0 | auto* ParquetInt96_data = (ParquetInt96*)src_data.data(); |
726 | 0 | size_t start_idx = dst_col->size(); |
727 | 0 | dst_col->resize(start_idx + rows); |
728 | 0 | auto& data = assert_cast<ColumnTimeStampTz*>(dst_col.get())->get_data(); |
729 | 0 | static const cctz::time_zone UTC = cctz::utc_time_zone(); |
730 | |
|
731 | 0 | for (int i = 0; i < rows; i++) { |
732 | 0 | ParquetInt96 src_cell_data = ParquetInt96_data[i]; |
733 | 0 | int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros(); |
734 | 0 | auto& tz = data[start_idx + i]; |
735 | 0 | tz.from_unixtime(timestamp_with_micros / 1000000, UTC); |
736 | 0 | tz.set_microsecond(timestamp_with_micros % 1000000); |
737 | 0 | } |
738 | 0 | return Status::OK(); |
739 | 0 | } |
740 | | }; |
741 | | |
742 | | } // namespace doris::parquet |