be/src/format/parquet/parquet_column_convert.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <gen_cpp/parquet_types.h> |
21 | | |
22 | | #include "common/cast_set.h" |
23 | | #include "core/column/column_varbinary.h" |
24 | | #include "core/data_type/data_type_factory.hpp" |
25 | | #include "core/data_type/primitive_type.h" |
26 | | #include "core/extended_types.h" |
27 | | #include "core/field.h" |
28 | | #include "core/types.h" |
29 | | #include "format/column_type_convert.h" |
30 | | #include "format/format_common.h" |
31 | | #include "format/parquet/decoder.h" |
32 | | #include "format/parquet/parquet_common.h" |
33 | | #include "format/parquet/schema_desc.h" |
34 | | |
35 | | namespace doris::parquet { |
36 | | #include "common/compile_check_begin.h" |
37 | | struct ConvertParams { |
38 | | // schema.logicalType.TIMESTAMP.isAdjustedToUTC == false |
39 | | static const cctz::time_zone utc0; |
40 | | // schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set local time zone |
41 | | const cctz::time_zone* ctz = nullptr; |
42 | | size_t offset_days = 0; |
43 | | int64_t second_mask = 1; |
44 | | int64_t scale_to_nano_factor = 1; |
45 | | const FieldSchema* field_schema = nullptr; |
46 | | |
47 | | //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128. |
48 | | bool is_type_compatibility = false; |
49 | | |
50 | | /** |
51 | | * Some frameworks like paimon maybe writes non-standard parquet files. Timestamp field doesn't have |
52 | | * logicalType or converted_type to indicates its precision. We have to reset the time mask. |
53 | | */ |
54 | 5 | void reset_time_scale_if_missing(int scale) { |
55 | 5 | const auto& schema = field_schema->parquet_schema; |
56 | 5 | if (!schema.__isset.logicalType && !schema.__isset.converted_type) { |
57 | 0 | int ts_scale = 9; |
58 | 0 | if (scale <= 3) { |
59 | 0 | ts_scale = 3; |
60 | 0 | } else if (scale <= 6) { |
61 | 0 | ts_scale = 6; |
62 | 0 | } |
63 | 0 | second_mask = common::exp10_i64(ts_scale); |
64 | 0 | scale_to_nano_factor = common::exp10_i64(9 - ts_scale); |
65 | | |
66 | | // The missing parque metadata makes it impossible for us to know the time zone information, |
67 | | // so we default to UTC here. |
68 | 0 | if (ctz == nullptr) { |
69 | 0 | ctz = &utc0; |
70 | 0 | } |
71 | 0 | } |
72 | 5 | } |
73 | | |
74 | 212 | void init(const FieldSchema* field_schema_, const cctz::time_zone* ctz_) { |
75 | 212 | field_schema = field_schema_; |
76 | 212 | if (ctz_ != nullptr) { |
77 | 212 | ctz = ctz_; |
78 | 212 | } |
79 | 212 | const auto& schema = field_schema->parquet_schema; |
80 | 212 | if (schema.__isset.logicalType && schema.logicalType.__isset.TIMESTAMP) { |
81 | 1 | const auto& timestamp_info = schema.logicalType.TIMESTAMP; |
82 | 1 | if (!timestamp_info.isAdjustedToUTC) { |
83 | | // should set timezone to utc+0 |
84 | | // Reference: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#instant-semantics-timestamps-normalized-to-utc |
85 | | // If isAdjustedToUTC = false, the reader should display the same value no mater what local time zone is. For example: |
86 | | // When a timestamp is stored as `1970-01-03 12:00:00`, |
87 | | // if isAdjustedToUTC = true, UTC8 should read as `1970-01-03 20:00:00`, UTC6 should read as `1970-01-03 18:00:00` |
88 | | // if isAdjustedToUTC = false, UTC8 and UTC6 should read as `1970-01-03 12:00:00`, which is the same as `1970-01-03 12:00:00` in UTC0 |
89 | 1 | ctz = &utc0; |
90 | 1 | } |
91 | 1 | const auto& time_unit = timestamp_info.unit; |
92 | 1 | if (time_unit.__isset.MILLIS) { |
93 | 1 | second_mask = 1000; |
94 | 1 | scale_to_nano_factor = 1000000; |
95 | 1 | } else if (time_unit.__isset.MICROS) { |
96 | 0 | second_mask = 1000000; |
97 | 0 | scale_to_nano_factor = 1000; |
98 | 0 | } else if (time_unit.__isset.NANOS) { |
99 | 0 | second_mask = 1000000000; |
100 | 0 | scale_to_nano_factor = 1; |
101 | 0 | } |
102 | 211 | } else if (schema.__isset.converted_type) { |
103 | 53 | const auto& converted_type = schema.converted_type; |
104 | 53 | if (converted_type == tparquet::ConvertedType::TIMESTAMP_MILLIS) { |
105 | 0 | second_mask = 1000; |
106 | 0 | scale_to_nano_factor = 1000000; |
107 | 53 | } else if (converted_type == tparquet::ConvertedType::TIMESTAMP_MICROS) { |
108 | 4 | second_mask = 1000000; |
109 | 4 | scale_to_nano_factor = 1000; |
110 | 4 | } |
111 | 53 | } |
112 | | |
113 | 212 | if (ctz) { |
114 | 212 | VecDateTimeValue t; |
115 | 212 | t.from_unixtime(0, *ctz); |
116 | 212 | offset_days = t.day() == 31 ? -1 : 0; |
117 | 212 | } |
118 | 212 | is_type_compatibility = field_schema_->is_type_compatibility; |
119 | 212 | } |
120 | | }; |
121 | | |
122 | | /** |
123 | | * Convert parquet physical column to logical column |
124 | | * In parquet document(https://github.com/apache/parquet-format/blob/master/LogicalTypes.md), |
125 | | * Logical or converted type is the data type of column, physical type is the stored type of column chunk. |
126 | | * eg, decimal type can be stored as INT32, INT64, BYTE_ARRAY, FIXED_LENGTH_BYTE_ARRAY. |
127 | | * So there is a convert process from physical type to logical type. |
128 | | * In addition, Schema change will bring about a change in logical type. |
129 | | * |
130 | | * `PhysicalToLogicalConverter` strips away the conversion of logical type, and reuse `ColumnTypeConverter` |
131 | | * to resolve schema change, allowing parquet reader to only focus on the conversion of physical types. |
132 | | * |
133 | | * Therefore, tow layers converters are designed: |
134 | | * First, read parquet data with the physical type |
135 | | * Second, convert physical type to logical type |
136 | | * Third, convert logical type to the final type planned by FE(schema change) |
137 | | * |
138 | | * Ultimate performance optimization: |
139 | | * 1. If process of (First => Second) is consistent, eg. from BYTE_ARRAY to string, no additional copies and conversions will be introduced; |
140 | | * 2. If process of (Second => Third) is consistent, no additional copies and conversions will be introduced; |
141 | | * 3. Null map is share among all processes, no additional copies and conversions will be introduced in null map; |
142 | | * 4. Only create one physical column in physical conversion, and reused in each loop; |
143 | | * 5. Only create one logical column in logical conversion, and reused in each loop; |
144 | | * 6. FIXED_LENGTH_BYTE_ARRAY is read as ColumnUInt8 instead of ColumnString, so the underlying decoder has no process to decode string |
145 | | * and use memory copy to read the data as a whole, and the conversion has no need to resolve the Offsets in ColumnString. |
146 | | */ |
147 | | class PhysicalToLogicalConverter { |
148 | | protected: |
149 | | ColumnPtr _cached_src_physical_column = nullptr; |
150 | | DataTypePtr _cached_src_physical_type = nullptr; |
151 | | std::unique_ptr<converter::ColumnTypeConverter> _logical_converter = nullptr; |
152 | | |
153 | | std::string _error_msg; |
154 | | |
155 | | std::unique_ptr<ConvertParams> _convert_params; |
156 | | |
157 | | public: |
158 | | static std::unique_ptr<PhysicalToLogicalConverter> get_converter( |
159 | | const FieldSchema* field_schema, DataTypePtr src_logical_type, |
160 | | const DataTypePtr& dst_logical_type, const cctz::time_zone* ctz, |
161 | | bool is_dict_filter = false); |
162 | | |
163 | | static bool is_parquet_native_type(PrimitiveType type); |
164 | | |
165 | | static bool is_decimal_type(PrimitiveType type); |
166 | | |
167 | 212 | PhysicalToLogicalConverter() = default; |
168 | 212 | virtual ~PhysicalToLogicalConverter() = default; |
169 | | |
170 | 4 | virtual Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) { |
171 | 4 | return Status::OK(); |
172 | 4 | } |
173 | | |
174 | | Status convert(ColumnPtr& src_physical_col, DataTypePtr src_logical_type, |
175 | | const DataTypePtr& dst_logical_type, ColumnPtr& dst_logical_col, |
176 | 222 | bool is_dict_filter) { |
177 | 222 | if (is_dict_filter) { |
178 | 0 | src_logical_type = DataTypeFactory::instance().create_data_type( |
179 | 0 | PrimitiveType::TYPE_INT, dst_logical_type->is_nullable()); |
180 | 0 | } |
181 | 222 | if (is_consistent() && _logical_converter->is_consistent()) { |
182 | 179 | return Status::OK(); |
183 | 179 | } |
184 | 43 | ColumnPtr src_logical_column; |
185 | 43 | if (is_consistent()) { |
186 | 4 | if (dst_logical_type->is_nullable()) { |
187 | 4 | auto doris_nullable_column = |
188 | 4 | assert_cast<const ColumnNullable*>(dst_logical_col.get()); |
189 | 4 | src_logical_column = |
190 | 4 | ColumnNullable::create(_cached_src_physical_column, |
191 | 4 | doris_nullable_column->get_null_map_column_ptr()); |
192 | 4 | } else { |
193 | 0 | src_logical_column = _cached_src_physical_column; |
194 | 0 | } |
195 | 39 | } else { |
196 | 39 | src_logical_column = _logical_converter->get_column(src_logical_type, dst_logical_col, |
197 | 39 | dst_logical_type); |
198 | 39 | } |
199 | 43 | RETURN_IF_ERROR(physical_convert(src_physical_col, src_logical_column)); |
200 | 43 | auto converted_column = dst_logical_col->assume_mutable(); |
201 | 43 | return _logical_converter->convert(src_logical_column, converted_column); |
202 | 43 | } |
203 | | |
204 | | virtual ColumnPtr get_physical_column(tparquet::Type::type src_physical_type, |
205 | | DataTypePtr src_logical_type, |
206 | | ColumnPtr& dst_logical_column, |
207 | | const DataTypePtr& dst_logical_type, bool is_dict_filter); |
208 | | |
209 | 222 | DataTypePtr& get_physical_type() { return _cached_src_physical_type; } |
210 | | |
211 | 121 | virtual bool is_consistent() { return false; } |
212 | | |
213 | 350 | virtual bool support() { return true; } |
214 | | |
215 | 0 | std::string get_error_msg() { return _error_msg; } |
216 | | }; |
217 | | |
218 | | class ConsistentPhysicalConverter : public PhysicalToLogicalConverter { |
219 | 441 | bool is_consistent() override { return true; } |
220 | | }; |
221 | | |
222 | | class UnsupportedConverter : public PhysicalToLogicalConverter { |
223 | | public: |
224 | 0 | UnsupportedConverter(std::string error_msg) { _error_msg = error_msg; } |
225 | | |
226 | | UnsupportedConverter(tparquet::Type::type src_physical_type, |
227 | 0 | const DataTypePtr& src_logical_type) { |
228 | 0 | std::string src_physical_str = tparquet::to_string(src_physical_type); |
229 | 0 | std::string src_logical_str = src_logical_type->get_name(); |
230 | 0 | _error_msg = src_physical_str + " => " + src_logical_str; |
231 | 0 | } |
232 | | |
233 | 0 | bool support() override { return false; } |
234 | | |
235 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
236 | 0 | return Status::InternalError("Unsupported physical to logical type: {}", _error_msg); |
237 | 0 | } |
238 | | }; |
239 | | |
240 | | // for tinyint, smallint |
241 | | template <PrimitiveType IntPrimitiveType> |
242 | | class LittleIntPhysicalConverter : public PhysicalToLogicalConverter { |
243 | 28 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
244 | 28 | using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType; |
245 | 28 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; |
246 | 28 | ColumnPtr from_col = remove_nullable(src_physical_col); |
247 | 28 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
248 | | |
249 | 28 | size_t rows = from_col->size(); |
250 | | // always comes from tparquet::Type::INT32 |
251 | 28 | auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data(); |
252 | 28 | size_t start_idx = to_col->size(); |
253 | 28 | to_col->resize(start_idx + rows); |
254 | 28 | auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data(); |
255 | 108 | for (int i = 0; i < rows; ++i) { |
256 | 80 | data[start_idx + i] = static_cast<DstCppType>(src_data[i]); |
257 | 80 | } |
258 | | |
259 | 28 | return Status::OK(); |
260 | 28 | } _ZN5doris7parquet26LittleIntPhysicalConverterILNS_13PrimitiveTypeE3EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 243 | 17 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 244 | 17 | using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType; | 245 | 17 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; | 246 | 17 | ColumnPtr from_col = remove_nullable(src_physical_col); | 247 | 17 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); | 248 | | | 249 | 17 | size_t rows = from_col->size(); | 250 | | // always comes from tparquet::Type::INT32 | 251 | 17 | auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data(); | 252 | 17 | size_t start_idx = to_col->size(); | 253 | 17 | to_col->resize(start_idx + rows); | 254 | 17 | auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data(); | 255 | 59 | for (int i = 0; i < rows; ++i) { | 256 | 42 | data[start_idx + i] = static_cast<DstCppType>(src_data[i]); | 257 | 42 | } | 258 | | | 259 | 17 | return Status::OK(); | 260 | 17 | } |
_ZN5doris7parquet26LittleIntPhysicalConverterILNS_13PrimitiveTypeE4EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 243 | 11 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 244 | 11 | using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType; | 245 | 11 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; | 246 | 11 | ColumnPtr from_col = remove_nullable(src_physical_col); | 247 | 11 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); | 248 | | | 249 | 11 | size_t rows = from_col->size(); | 250 | | // always comes from tparquet::Type::INT32 | 251 | 11 | auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data(); | 252 | 11 | size_t start_idx = to_col->size(); | 253 | 11 | to_col->resize(start_idx + rows); | 254 | 11 | auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data(); | 255 | 49 | for (int i = 0; i < rows; ++i) { | 256 | 38 | data[start_idx + i] = static_cast<DstCppType>(src_data[i]); | 257 | 38 | } | 258 | | | 259 | 11 | return Status::OK(); | 260 | 11 | } |
|
261 | | }; |
262 | | |
263 | | template <PrimitiveType type> |
264 | | struct UnsignedTypeTraits; |
265 | | |
266 | | template <> |
267 | | struct UnsignedTypeTraits<TYPE_SMALLINT> { |
268 | | using UnsignedCppType = UInt8; |
269 | | //https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#unsigned-integers |
270 | | //INT(8, false), INT(16, false), and INT(32, false) must annotate an int32 primitive type and INT(64, false) |
271 | | //must annotate an int64 primitive type. |
272 | | using StorageCppType = Int32; |
273 | | using StorageColumnType = ColumnInt32; |
274 | | }; |
275 | | |
276 | | template <> |
277 | | struct UnsignedTypeTraits<TYPE_INT> { |
278 | | using UnsignedCppType = UInt16; |
279 | | using StorageCppType = Int32; |
280 | | using StorageColumnType = ColumnInt32; |
281 | | }; |
282 | | |
283 | | template <> |
284 | | struct UnsignedTypeTraits<TYPE_BIGINT> { |
285 | | using UnsignedCppType = UInt32; |
286 | | using StorageCppType = Int32; |
287 | | using StorageColumnType = ColumnInt32; |
288 | | }; |
289 | | |
290 | | template <> |
291 | | struct UnsignedTypeTraits<TYPE_LARGEINT> { |
292 | | using UnsignedCppType = UInt64; |
293 | | using StorageCppType = Int64; |
294 | | using StorageColumnType = ColumnInt64; |
295 | | }; |
296 | | |
297 | | template <PrimitiveType IntPrimitiveType> |
298 | | class UnsignedIntegerConverter : public PhysicalToLogicalConverter { |
299 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
300 | 0 | using UnsignedCppType = typename UnsignedTypeTraits<IntPrimitiveType>::UnsignedCppType; |
301 | 0 | using StorageCppType = typename UnsignedTypeTraits<IntPrimitiveType>::StorageCppType; |
302 | 0 | using StorageColumnType = typename UnsignedTypeTraits<IntPrimitiveType>::StorageColumnType; |
303 | 0 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; |
304 | |
|
305 | 0 | ColumnPtr from_col = remove_nullable(src_physical_col); |
306 | 0 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
307 | 0 | auto& src_data = assert_cast<const StorageColumnType*>(from_col.get())->get_data(); |
308 | |
|
309 | 0 | size_t rows = src_data.size(); |
310 | 0 | size_t start_idx = to_col->size(); |
311 | 0 | to_col->resize(start_idx + rows); |
312 | 0 | auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data(); |
313 | |
|
314 | 0 | for (int i = 0; i < rows; i++) { |
315 | 0 | StorageCppType src_value = src_data[i]; |
316 | 0 | auto unsigned_value = static_cast<UnsignedCppType>(src_value); |
317 | 0 | data[start_idx + i] = unsigned_value; |
318 | 0 | } |
319 | |
|
320 | 0 | return Status::OK(); |
321 | 0 | } Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE4EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE5EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE6EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE7EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
322 | | }; |
323 | | |
324 | | class FixedSizeBinaryConverter : public PhysicalToLogicalConverter { |
325 | | private: |
326 | | int _type_length; |
327 | | |
328 | | public: |
329 | 0 | FixedSizeBinaryConverter(int type_length) : _type_length(type_length) {} |
330 | | |
331 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
332 | 0 | ColumnPtr from_col = remove_nullable(src_physical_col); |
333 | 0 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
334 | |
|
335 | 0 | auto* src_data = assert_cast<const ColumnUInt8*>(from_col.get()); |
336 | 0 | size_t length = src_data->size(); |
337 | 0 | size_t num_values = length / _type_length; |
338 | 0 | auto& string_col = static_cast<ColumnString&>(*to_col.get()); |
339 | 0 | auto& offsets = string_col.get_offsets(); |
340 | 0 | auto& chars = string_col.get_chars(); |
341 | |
|
342 | 0 | size_t origin_size = chars.size(); |
343 | 0 | chars.resize(origin_size + length); |
344 | 0 | memcpy(chars.data() + origin_size, src_data->get_data().data(), length); |
345 | |
|
346 | 0 | origin_size = offsets.size(); |
347 | 0 | offsets.resize(origin_size + num_values); |
348 | 0 | auto end_offset = offsets[origin_size - 1]; |
349 | 0 | for (int i = 0; i < num_values; ++i) { |
350 | 0 | end_offset += _type_length; |
351 | 0 | offsets[origin_size + i] = end_offset; |
352 | 0 | } |
353 | |
|
354 | 0 | return Status::OK(); |
355 | 0 | } |
356 | | }; |
357 | | |
358 | | class Float16PhysicalConverter : public PhysicalToLogicalConverter { |
359 | | private: |
360 | | int _type_length; |
361 | | |
362 | | public: |
363 | 0 | Float16PhysicalConverter(int type_length) : _type_length(type_length) { |
364 | 0 | DCHECK_EQ(_type_length, 2); |
365 | 0 | } |
366 | | |
367 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
368 | 0 | ColumnPtr from_col = remove_nullable(src_physical_col); |
369 | 0 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
370 | |
|
371 | 0 | const auto* src_data = assert_cast<const ColumnUInt8*>(from_col.get()); |
372 | 0 | size_t length = src_data->size(); |
373 | 0 | size_t num_values = length / _type_length; |
374 | 0 | auto* to_float_column = assert_cast<ColumnFloat32*>(to_col.get()); |
375 | 0 | size_t start_idx = to_float_column->size(); |
376 | 0 | to_float_column->resize(start_idx + num_values); |
377 | 0 | auto& to_float_column_data = to_float_column->get_data(); |
378 | 0 | const auto* ptr = src_data->get_data().data(); |
379 | 0 | for (int i = 0; i < num_values; ++i) { |
380 | 0 | size_t offset = i * _type_length; |
381 | 0 | const auto* data_ptr = ptr + offset; |
382 | 0 | uint16_t raw; |
383 | 0 | memcpy(&raw, data_ptr, sizeof(uint16_t)); |
384 | 0 | float value = half_to_float(raw); |
385 | 0 | to_float_column_data[start_idx + i] = value; |
386 | 0 | } |
387 | |
|
388 | 0 | return Status::OK(); |
389 | 0 | } |
390 | | |
391 | 0 | float half_to_float(uint16_t h) { |
392 | | // uint16_t h: half precision floating point |
393 | | // bit 15: sign(1 bit) |
394 | | // bits 14..10 : exponent(5 bits) |
395 | | // bits 9..0 : mantissa(10 bits) |
396 | | |
397 | | // sign bit placed to float32 bit31 |
398 | 0 | uint32_t sign = (h & 0x8000U) << 16; // 0x8000 << 16 = 0x8000_0000 |
399 | | // exponent:(5 bits) |
400 | 0 | uint32_t exp = (h & 0x7C00U) >> 10; // 0x7C00 = 0111 1100 0000 (half exponent mask) |
401 | | // mantissa(10 bits) |
402 | 0 | uint32_t mant = (h & 0x03FFU); // 10-bit fraction |
403 | | |
404 | | // cases:Zero/Subnormal, Normal, Inf/NaN |
405 | 0 | if (exp == 0) { |
406 | | // exp==0: Zero or Subnormal ---------- |
407 | 0 | if (mant == 0) { |
408 | | // ±0.0 |
409 | | // sign = either 0x00000000 or 0x80000000 |
410 | 0 | return std::bit_cast<float>(sign); |
411 | 0 | } else { |
412 | | // ---------- Subnormal ---------- |
413 | | // half subnormal: |
414 | | // value = (-1)^sign * (mant / 2^10) * 2^(1 - bias) |
415 | | // half bias = 15 → exponent = 1 - 15 = -14 |
416 | 0 | float f = (static_cast<float>(mant) / 1024.0F) * std::powf(2.0F, -14.0F); |
417 | 0 | return sign ? -f : f; |
418 | 0 | } |
419 | 0 | } else if (exp == 0x1F) { |
420 | | // exp==31: Inf or NaN ---------- |
421 | | // float32: |
422 | | // exponent = 255 (0xFF) |
423 | | // mantissa = mant << 13 |
424 | 0 | uint32_t f = sign | 0x7F800000U | (mant << 13); |
425 | 0 | return std::bit_cast<float>(f); |
426 | 0 | } else { |
427 | | // Normalized ---------- |
428 | | // float32 exponent: |
429 | | // exp32 = exp16 - bias16 + bias32 |
430 | | // bias16 = 15 |
431 | | // bias32 = 127 |
432 | | // |
433 | | // so: exp32 = exp + (127 - 15) |
434 | 0 | uint32_t f = sign | ((exp + (127 - 15)) << 23) // place to float32 exponent |
435 | 0 | | (mant << 13); // mantissa align to 23 bits |
436 | 0 | return std::bit_cast<float>(f); |
437 | 0 | } |
438 | 0 | } |
439 | | }; |
440 | | |
441 | | class UUIDVarBinaryConverter : public PhysicalToLogicalConverter { |
442 | | public: |
443 | 1 | UUIDVarBinaryConverter(int type_length) : _type_length(type_length) {} |
444 | | |
445 | 1 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
446 | 1 | DCHECK(!is_column_const(*src_physical_col)) << src_physical_col->dump_structure(); |
447 | 1 | DCHECK(!is_column_const(*src_logical_column)) << src_logical_column->dump_structure(); |
448 | 1 | const ColumnUInt8* uint8_col = nullptr; |
449 | 1 | if (is_column_nullable(*src_physical_col)) { |
450 | 1 | const auto& nullable = assert_cast<const ColumnNullable*>(src_physical_col.get()); |
451 | 1 | uint8_col = &assert_cast<const ColumnUInt8&>(nullable->get_nested_column()); |
452 | 1 | } else { |
453 | 0 | uint8_col = &assert_cast<const ColumnUInt8&>(*src_physical_col); |
454 | 0 | } |
455 | | |
456 | 1 | MutableColumnPtr to_col = nullptr; |
457 | | // nullmap flag seems have been handled in upper level |
458 | 1 | if (src_logical_column->is_nullable()) { |
459 | 1 | const auto* nullable = assert_cast<const ColumnNullable*>(src_logical_column.get()); |
460 | 1 | to_col = nullable->get_nested_column_ptr()->assume_mutable(); |
461 | 1 | } else { |
462 | 0 | to_col = src_logical_column->assume_mutable(); |
463 | 0 | } |
464 | 1 | auto* to_varbinary_column = assert_cast<ColumnVarbinary*>(to_col.get()); |
465 | 1 | size_t length = uint8_col->size(); |
466 | 1 | size_t num_values = length / _type_length; |
467 | 1 | const auto* ptr = uint8_col->get_data().data(); |
468 | | |
469 | 4 | for (int i = 0; i < num_values; ++i) { |
470 | 3 | auto offset = i * _type_length; |
471 | 3 | const char* data_ptr = reinterpret_cast<const char*>(ptr + offset); |
472 | 3 | to_varbinary_column->insert_data(data_ptr, _type_length); |
473 | 3 | } |
474 | 1 | return Status::OK(); |
475 | 1 | } |
476 | | |
477 | | private: |
478 | | int _type_length; |
479 | | }; |
480 | | |
481 | | template <PrimitiveType DecimalPType> |
482 | | class FixedSizeToDecimal : public PhysicalToLogicalConverter { |
483 | | public: |
484 | | using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType; |
485 | 2 | FixedSizeToDecimal(int32_t type_length) : _type_length(type_length) {}Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EEC2Ei _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EEC2Ei Line | Count | Source | 485 | 2 | FixedSizeToDecimal(int32_t type_length) : _type_length(type_length) {} |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EEC2Ei Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EEC2Ei |
486 | | |
487 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
488 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); |
489 | 2 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
490 | | |
491 | 2 | #define M(FixedTypeLength, ValueCopyType) \ |
492 | 2 | case FixedTypeLength: \ |
493 | 2 | return _convert_internal<FixedTypeLength, ValueCopyType>(src_col, dst_col); |
494 | | |
495 | 2 | #define APPLY_FOR_DECIMALS() \ |
496 | 2 | M(1, int64_t) \ |
497 | 0 | M(2, int64_t) \ |
498 | 0 | M(3, int64_t) \ |
499 | 0 | M(4, int64_t) \ |
500 | 1 | M(5, int64_t) \ |
501 | 1 | M(6, int64_t) \ |
502 | 0 | M(7, int64_t) \ |
503 | 1 | M(8, int64_t) \ |
504 | 1 | M(9, int128_t) \ |
505 | 0 | M(10, int128_t) \ |
506 | 0 | M(11, int128_t) \ |
507 | 0 | M(12, int128_t) \ |
508 | 0 | M(13, int128_t) \ |
509 | 0 | M(14, int128_t) \ |
510 | 0 | M(15, int128_t) \ |
511 | 0 | M(16, int128_t) \ |
512 | 0 | M(17, wide::Int256) \ |
513 | 0 | M(18, wide::Int256) \ |
514 | 0 | M(19, wide::Int256) \ |
515 | 0 | M(20, wide::Int256) \ |
516 | 0 | M(21, wide::Int256) \ |
517 | 0 | M(22, wide::Int256) \ |
518 | 0 | M(23, wide::Int256) \ |
519 | 0 | M(24, wide::Int256) \ |
520 | 0 | M(25, wide::Int256) \ |
521 | 0 | M(26, wide::Int256) \ |
522 | 0 | M(27, wide::Int256) \ |
523 | 0 | M(28, wide::Int256) \ |
524 | 0 | M(29, wide::Int256) \ |
525 | 0 | M(30, wide::Int256) \ |
526 | 0 | M(31, wide::Int256) \ |
527 | 0 | M(32, wide::Int256) |
528 | | |
529 | 2 | switch (_type_length) { |
530 | 0 | APPLY_FOR_DECIMALS() |
531 | 0 | default: |
532 | 0 | throw Exception(Status::FatalError("__builtin_unreachable")); |
533 | 2 | } |
534 | 0 | return Status::OK(); |
535 | 2 | #undef APPLY_FOR_DECIMALS |
536 | 2 | #undef M |
537 | 2 | } Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 487 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 488 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); | 489 | 2 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); | 490 | | | 491 | 2 | #define M(FixedTypeLength, ValueCopyType) \ | 492 | 2 | case FixedTypeLength: \ | 493 | 2 | return _convert_internal<FixedTypeLength, ValueCopyType>(src_col, dst_col); | 494 | | | 495 | 2 | #define APPLY_FOR_DECIMALS() \ | 496 | 2 | M(1, int64_t) \ | 497 | 2 | M(2, int64_t) \ | 498 | 2 | M(3, int64_t) \ | 499 | 2 | M(4, int64_t) \ | 500 | 2 | M(5, int64_t) \ | 501 | 2 | M(6, int64_t) \ | 502 | 2 | M(7, int64_t) \ | 503 | 2 | M(8, int64_t) \ | 504 | 2 | M(9, int128_t) \ | 505 | 2 | M(10, int128_t) \ | 506 | 2 | M(11, int128_t) \ | 507 | 2 | M(12, int128_t) \ | 508 | 2 | M(13, int128_t) \ | 509 | 2 | M(14, int128_t) \ | 510 | 2 | M(15, int128_t) \ | 511 | 2 | M(16, int128_t) \ | 512 | 2 | M(17, wide::Int256) \ | 513 | 2 | M(18, wide::Int256) \ | 514 | 2 | M(19, wide::Int256) \ | 515 | 2 | M(20, wide::Int256) \ | 516 | 2 | M(21, wide::Int256) \ | 517 | 2 | M(22, wide::Int256) \ | 518 | 2 | M(23, wide::Int256) \ | 519 | 2 | M(24, wide::Int256) \ | 520 | 2 | M(25, wide::Int256) \ | 521 | 2 | M(26, wide::Int256) \ | 522 | 2 | M(27, wide::Int256) \ | 523 | 2 | M(28, wide::Int256) \ | 524 | 2 | M(29, wide::Int256) \ | 525 | 2 | M(30, wide::Int256) \ | 526 | 2 | M(31, wide::Int256) \ | 527 | 2 | M(32, wide::Int256) | 528 | | | 529 | 2 | switch (_type_length) { | 530 | 0 | APPLY_FOR_DECIMALS() | 531 | 0 | default: | 532 | 0 | throw Exception(Status::FatalError("__builtin_unreachable")); | 533 | 2 | } | 534 | 0 | return Status::OK(); | 535 | 2 | #undef APPLY_FOR_DECIMALS | 536 | 2 | #undef M | 537 | 2 | } |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
538 | | |
539 | | template <int fixed_type_length, typename ValueCopyType> |
540 | 2 | Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) { |
541 | 2 | size_t rows = src_col->size() / fixed_type_length; |
542 | 2 | auto* buf = static_cast<const ColumnUInt8*>(src_col.get())->get_data().data(); |
543 | 2 | size_t start_idx = dst_col->size(); |
544 | 2 | dst_col->resize(start_idx + rows); |
545 | | |
546 | 2 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data(); |
547 | 2 | size_t offset = 0; |
548 | 6 | for (int i = 0; i < rows; i++) { |
549 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, |
550 | | // the unscaled number must be encoded as two's complement using big-endian byte order. |
551 | 4 | ValueCopyType value = 0; |
552 | 4 | memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value)); |
553 | 4 | offset += fixed_type_length; |
554 | 4 | value = to_endian<std::endian::big>(value); |
555 | 4 | value = value >> ((sizeof(value) - fixed_type_length) * 8); |
556 | 4 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); |
557 | 4 | v = (DecimalType)value; |
558 | 4 | } |
559 | | |
560 | 2 | return Status::OK(); |
561 | 2 | } Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Line | Count | Source | 540 | 1 | Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) { | 541 | 1 | size_t rows = src_col->size() / fixed_type_length; | 542 | 1 | auto* buf = static_cast<const ColumnUInt8*>(src_col.get())->get_data().data(); | 543 | 1 | size_t start_idx = dst_col->size(); | 544 | 1 | dst_col->resize(start_idx + rows); | 545 | | | 546 | 1 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data(); | 547 | 1 | size_t offset = 0; | 548 | 3 | for (int i = 0; i < rows; i++) { | 549 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, | 550 | | // the unscaled number must be encoded as two's complement using big-endian byte order. | 551 | 2 | ValueCopyType value = 0; | 552 | 2 | memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value)); | 553 | 2 | offset += fixed_type_length; | 554 | 2 | value = to_endian<std::endian::big>(value); | 555 | 2 | value = value >> ((sizeof(value) - fixed_type_length) * 8); | 556 | 2 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); | 557 | 2 | v = (DecimalType)value; | 558 | 2 | } | 559 | | | 560 | 1 | return Status::OK(); | 561 | 1 | } |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Line | Count | Source | 540 | 1 | Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) { | 541 | 1 | size_t rows = src_col->size() / fixed_type_length; | 542 | 1 | auto* buf = static_cast<const ColumnUInt8*>(src_col.get())->get_data().data(); | 543 | 1 | size_t start_idx = dst_col->size(); | 544 | 1 | dst_col->resize(start_idx + rows); | 545 | | | 546 | 1 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data(); | 547 | 1 | size_t offset = 0; | 548 | 3 | for (int i = 0; i < rows; i++) { | 549 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, | 550 | | // the unscaled number must be encoded as two's complement using big-endian byte order. | 551 | 2 | ValueCopyType value = 0; | 552 | 2 | memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value)); | 553 | 2 | offset += fixed_type_length; | 554 | 2 | value = to_endian<std::endian::big>(value); | 555 | 2 | value = value >> ((sizeof(value) - fixed_type_length) * 8); | 556 | 2 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); | 557 | 2 | v = (DecimalType)value; | 558 | 2 | } | 559 | | | 560 | 1 | return Status::OK(); | 561 | 1 | } |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE |
562 | | |
563 | | private: |
564 | | int32_t _type_length; |
565 | | }; |
566 | | |
567 | | template <PrimitiveType DecimalPType> |
568 | | class StringToDecimal : public PhysicalToLogicalConverter { |
569 | | using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType; |
570 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
571 | 0 | using ValueCopyType = DecimalType::NativeType; |
572 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
573 | 0 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
574 | |
|
575 | 0 | size_t rows = src_col->size(); |
576 | 0 | auto buf = static_cast<const ColumnString*>(src_col.get())->get_chars().data(); |
577 | 0 | auto& offset = static_cast<const ColumnString*>(src_col.get())->get_offsets(); |
578 | 0 | size_t start_idx = dst_col->size(); |
579 | 0 | dst_col->resize(start_idx + rows); |
580 | |
|
581 | 0 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data(); |
582 | 0 | for (int i = 0; i < rows; i++) { |
583 | 0 | size_t len = offset[i] - offset[i - 1]; |
584 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, |
585 | | // the unscaled number must be encoded as two's complement using big-endian byte order. |
586 | 0 | ValueCopyType value = 0; |
587 | 0 | if (len > 0) { |
588 | 0 | memcpy(reinterpret_cast<char*>(&value), buf + offset[i - 1], len); |
589 | 0 | value = to_endian<std::endian::big>(value); |
590 | 0 | value = value >> ((sizeof(value) - len) * 8); |
591 | 0 | } |
592 | 0 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); |
593 | 0 | v = (DecimalType)value; |
594 | 0 | } |
595 | |
|
596 | 0 | return Status::OK(); |
597 | 0 | } Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
598 | | }; |
599 | | |
600 | | template <PrimitiveType NumberType, PrimitiveType DecimalPType> |
601 | | class NumberToDecimal : public PhysicalToLogicalConverter { |
602 | | using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType; |
603 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
604 | 2 | using ValueCopyType = typename DecimalType::NativeType; |
605 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); |
606 | 2 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
607 | | |
608 | 2 | size_t rows = src_col->size(); |
609 | 2 | auto* src_data = |
610 | 2 | static_cast<const ColumnVector<NumberType>*>(src_col.get())->get_data().data(); |
611 | 2 | size_t start_idx = dst_col->size(); |
612 | 2 | dst_col->resize(start_idx + rows); |
613 | | |
614 | 2 | auto* data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data().data(); |
615 | | |
616 | 24 | for (int i = 0; i < rows; i++) { |
617 | 22 | ValueCopyType value; |
618 | 22 | if constexpr (std::is_same_v<DecimalType, Decimal256>) { |
619 | 0 | value = src_data[i]; |
620 | 22 | } else { |
621 | 22 | value = cast_set<ValueCopyType, typename PrimitiveTypeTraits<NumberType>::CppType, |
622 | 22 | false>(src_data[i]); |
623 | 22 | } |
624 | | |
625 | 22 | data[start_idx + i] = (DecimalType)value; |
626 | 22 | } |
627 | 2 | return Status::OK(); |
628 | 2 | } Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 603 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 604 | 2 | using ValueCopyType = typename DecimalType::NativeType; | 605 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); | 606 | 2 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); | 607 | | | 608 | 2 | size_t rows = src_col->size(); | 609 | 2 | auto* src_data = | 610 | 2 | static_cast<const ColumnVector<NumberType>*>(src_col.get())->get_data().data(); | 611 | 2 | size_t start_idx = dst_col->size(); | 612 | 2 | dst_col->resize(start_idx + rows); | 613 | | | 614 | 2 | auto* data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data().data(); | 615 | | | 616 | 24 | for (int i = 0; i < rows; i++) { | 617 | 22 | ValueCopyType value; | 618 | | if constexpr (std::is_same_v<DecimalType, Decimal256>) { | 619 | | value = src_data[i]; | 620 | 22 | } else { | 621 | 22 | value = cast_set<ValueCopyType, typename PrimitiveTypeTraits<NumberType>::CppType, | 622 | 22 | false>(src_data[i]); | 623 | 22 | } | 624 | | | 625 | 22 | data[start_idx + i] = (DecimalType)value; | 626 | 22 | } | 627 | 2 | return Status::OK(); | 628 | 2 | } |
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
629 | | }; |
630 | | |
631 | | class Int32ToDate : public PhysicalToLogicalConverter { |
632 | 5 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
633 | 5 | ColumnPtr src_col = remove_nullable(src_physical_col); |
634 | 5 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
635 | | |
636 | 5 | size_t rows = src_col->size(); |
637 | 5 | size_t start_idx = dst_col->size(); |
638 | 5 | dst_col->reserve(start_idx + rows); |
639 | | |
640 | 5 | auto& src_data = static_cast<const ColumnInt32*>(src_col.get())->get_data(); |
641 | 5 | auto& data = static_cast<ColumnDateV2*>(dst_col.get())->get_data(); |
642 | 5 | date_day_offset_dict& date_dict = date_day_offset_dict::get(); |
643 | | |
644 | 51 | for (int i = 0; i < rows; i++) { |
645 | 46 | int64_t date_value = (int64_t)src_data[i] + _convert_params->offset_days; |
646 | 46 | data.push_back_without_reserve( |
647 | 46 | date_dict[cast_set<int32_t>(date_value)].to_date_int_val()); |
648 | 46 | } |
649 | | |
650 | 5 | return Status::OK(); |
651 | 5 | } |
652 | | }; |
653 | | |
654 | | struct Int64ToTimestamp : public PhysicalToLogicalConverter { |
655 | 5 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
656 | 5 | ColumnPtr src_col = remove_nullable(src_physical_col); |
657 | 5 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
658 | | |
659 | 5 | size_t rows = src_col->size(); |
660 | 5 | size_t start_idx = dst_col->size(); |
661 | 5 | dst_col->resize(start_idx + rows); |
662 | | |
663 | 5 | auto src_data = static_cast<const ColumnInt64*>(src_col.get())->get_data().data(); |
664 | 5 | auto& data = static_cast<ColumnDateTimeV2*>(dst_col.get())->get_data(); |
665 | | |
666 | 51 | for (int i = 0; i < rows; i++) { |
667 | 46 | int64_t x = src_data[i]; |
668 | 46 | auto& num = data[start_idx + i]; |
669 | 46 | auto& value = reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(num); |
670 | 46 | value.from_unixtime(x / _convert_params->second_mask, *_convert_params->ctz); |
671 | 46 | value.set_microsecond((x % _convert_params->second_mask) * |
672 | 46 | (_convert_params->scale_to_nano_factor / 1000)); |
673 | 46 | } |
674 | 5 | return Status::OK(); |
675 | 5 | } |
676 | | }; |
677 | | |
678 | | struct Int64ToTimestampTz : public PhysicalToLogicalConverter { |
679 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
680 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
681 | 0 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
682 | |
|
683 | 0 | size_t rows = src_col->size(); |
684 | 0 | size_t start_idx = dst_col->size(); |
685 | 0 | dst_col->resize(start_idx + rows); |
686 | |
|
687 | 0 | const auto& src_data = assert_cast<const ColumnInt64*>(src_col.get())->get_data(); |
688 | 0 | auto& dest_data = assert_cast<ColumnTimeStampTz*>(dst_col.get())->get_data(); |
689 | 0 | static const cctz::time_zone UTC = cctz::utc_time_zone(); |
690 | |
|
691 | 0 | for (int i = 0; i < rows; i++) { |
692 | 0 | int64_t x = src_data[i]; |
693 | 0 | auto& tz = dest_data[start_idx + i]; |
694 | 0 | tz.from_unixtime(x / _convert_params->second_mask, UTC); |
695 | 0 | tz.set_microsecond((x % _convert_params->second_mask) * |
696 | 0 | (_convert_params->scale_to_nano_factor / 1000)); |
697 | 0 | } |
698 | 0 | return Status::OK(); |
699 | 0 | } |
700 | | }; |
701 | | |
702 | | struct Int96toTimestamp : public PhysicalToLogicalConverter { |
703 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
704 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
705 | 0 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
706 | |
|
707 | 0 | size_t rows = src_col->size() / sizeof(ParquetInt96); |
708 | 0 | auto& src_data = static_cast<const ColumnInt8*>(src_col.get())->get_data(); |
709 | 0 | auto ParquetInt96_data = (ParquetInt96*)src_data.data(); |
710 | 0 | size_t start_idx = dst_col->size(); |
711 | 0 | dst_col->resize(start_idx + rows); |
712 | 0 | auto& data = static_cast<ColumnDateTimeV2*>(dst_col.get())->get_data(); |
713 | |
|
714 | 0 | for (int i = 0; i < rows; i++) { |
715 | 0 | ParquetInt96 src_cell_data = ParquetInt96_data[i]; |
716 | 0 | auto& dst_value = |
717 | 0 | reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(data[start_idx + i]); |
718 | |
|
719 | 0 | int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros(); |
720 | 0 | dst_value.from_unixtime(timestamp_with_micros / 1000000, *_convert_params->ctz); |
721 | 0 | dst_value.set_microsecond(timestamp_with_micros % 1000000); |
722 | 0 | } |
723 | 0 | return Status::OK(); |
724 | 0 | } |
725 | | }; |
726 | | |
727 | | struct Int96toTimestampTz : public PhysicalToLogicalConverter { |
728 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
729 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
730 | 0 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
731 | |
|
732 | 0 | size_t rows = src_col->size() / sizeof(ParquetInt96); |
733 | 0 | const auto& src_data = assert_cast<const ColumnInt8*>(src_col.get())->get_data(); |
734 | 0 | auto* ParquetInt96_data = (ParquetInt96*)src_data.data(); |
735 | 0 | size_t start_idx = dst_col->size(); |
736 | 0 | dst_col->resize(start_idx + rows); |
737 | 0 | auto& data = assert_cast<ColumnTimeStampTz*>(dst_col.get())->get_data(); |
738 | 0 | static const cctz::time_zone UTC = cctz::utc_time_zone(); |
739 | |
|
740 | 0 | for (int i = 0; i < rows; i++) { |
741 | 0 | ParquetInt96 src_cell_data = ParquetInt96_data[i]; |
742 | 0 | int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros(); |
743 | 0 | auto& tz = data[start_idx + i]; |
744 | 0 | tz.from_unixtime(timestamp_with_micros / 1000000, UTC); |
745 | 0 | tz.set_microsecond(timestamp_with_micros % 1000000); |
746 | 0 | } |
747 | 0 | return Status::OK(); |
748 | 0 | } |
749 | | }; |
750 | | #include "common/compile_check_end.h" |
751 | | |
752 | | } // namespace doris::parquet |