be/src/format/parquet/parquet_column_convert.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <cctz/time_zone.h> |
21 | | #include <gen_cpp/parquet_types.h> |
22 | | #include <libdivide.h> |
23 | | |
24 | | #include <chrono> |
25 | | #include <limits> |
26 | | |
27 | | #include "common/cast_set.h" |
28 | | #include "core/column/column_varbinary.h" |
29 | | #include "core/data_type/data_type_factory.hpp" |
30 | | #include "core/data_type/primitive_type.h" |
31 | | #include "core/extended_types.h" |
32 | | #include "core/field.h" |
33 | | #include "core/types.h" |
34 | | #include "format/column_type_convert.h" |
35 | | #include "format/format_common.h" |
36 | | #include "format/parquet/decoder.h" |
37 | | #include "format/parquet/parquet_common.h" |
38 | | #include "format/parquet/schema_desc.h" |
39 | | #include "util/timezone_utils.h" |
40 | | |
41 | | namespace doris::parquet { |
42 | | namespace detail { |
43 | | |
44 | | inline bool try_split_local_time(int64_t local_time, uint16_t* year, uint8_t* month, uint8_t* day, |
45 | 62 | uint8_t* hour, uint8_t* minute, uint8_t* second) { |
46 | 62 | static const libdivide::divider<int64_t> fast_div_86400(86400); |
47 | 62 | static const libdivide::divider<int64_t> fast_div_3600(3600); |
48 | 62 | static const libdivide::divider<int64_t> fast_div_60(60); |
49 | 62 | static constexpr int64_t kMinSupportedDays = -365LL * 10000; |
50 | 62 | static constexpr int64_t kMaxSupportedDays = 365LL * 10000; |
51 | | |
52 | 62 | int64_t days = local_time / fast_div_86400; |
53 | 62 | int64_t second_of_day = local_time - days * 86400; |
54 | 62 | if (second_of_day < 0) { |
55 | 3 | second_of_day += 86400; |
56 | 3 | --days; |
57 | 3 | } |
58 | 62 | if (days < kMinSupportedDays || days > kMaxSupportedDays) { |
59 | 0 | return false; |
60 | 0 | } |
61 | | |
62 | 62 | const auto ymd = std::chrono::year_month_day {std::chrono::sys_days {std::chrono::days {days}}}; |
63 | 62 | const int y = static_cast<int>(ymd.year()); |
64 | 62 | if (y < 0 || y > std::numeric_limits<uint16_t>::max()) { |
65 | 0 | return false; |
66 | 0 | } |
67 | | |
68 | 62 | const int64_t h = second_of_day / fast_div_3600; |
69 | 62 | const int64_t minute_second = second_of_day - h * 3600; |
70 | 62 | const int64_t m = minute_second / fast_div_60; |
71 | 62 | const int64_t s = minute_second - m * 60; |
72 | | |
73 | 62 | *year = static_cast<uint16_t>(y); |
74 | 62 | *month = static_cast<uint8_t>(static_cast<unsigned>(ymd.month())); |
75 | 62 | *day = static_cast<uint8_t>(static_cast<unsigned>(ymd.day())); |
76 | 62 | *hour = static_cast<uint8_t>(h); |
77 | 62 | *minute = static_cast<uint8_t>(m); |
78 | 62 | *second = static_cast<uint8_t>(s); |
79 | 62 | return true; |
80 | 62 | } |
81 | | |
82 | | template <typename DateType> |
83 | | inline bool try_convert_timestamp_with_fixed_offset(DateType& value, int64_t epoch_seconds, |
84 | 62 | int32_t offset_seconds) { |
85 | 62 | uint16_t year = 0; |
86 | 62 | uint8_t month = 0; |
87 | 62 | uint8_t day = 0; |
88 | 62 | uint8_t hour = 0; |
89 | 62 | uint8_t minute = 0; |
90 | 62 | uint8_t second = 0; |
91 | 62 | if (!try_split_local_time(epoch_seconds + offset_seconds, &year, &month, &day, &hour, &minute, |
92 | 62 | &second)) { |
93 | 0 | return false; |
94 | 0 | } |
95 | | // The caller sets sub-second precision immediately after this conversion. |
96 | 62 | value.unchecked_set_time(year, month, day, hour, minute, second, 0); |
97 | 62 | return true; |
98 | 62 | } |
99 | | |
100 | | template <typename DateType> |
101 | | inline bool try_convert_timestamp_with_lookup(DateType& value, int64_t epoch_seconds, |
102 | 10 | const cctz::time_zone& ctz) { |
103 | 10 | static const auto epoch = std::chrono::time_point_cast<cctz::sys_seconds>( |
104 | 10 | std::chrono::system_clock::from_time_t(0)); |
105 | 10 | cctz::time_point<cctz::sys_seconds> t = epoch + cctz::seconds(epoch_seconds); |
106 | 10 | const int32_t offset = ctz.lookup_offset(t).offset; |
107 | 10 | return try_convert_timestamp_with_fixed_offset(value, epoch_seconds, offset); |
108 | 10 | } |
109 | | |
110 | | } // namespace detail |
111 | | |
112 | | struct ConvertParams { |
113 | | // schema.logicalType.TIMESTAMP.isAdjustedToUTC == false |
114 | | static const cctz::time_zone utc0; |
115 | | // schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set local time zone |
116 | | const cctz::time_zone* ctz = nullptr; |
117 | | bool is_fixed_offset = false; |
118 | | int32_t fixed_offset_seconds = 0; |
119 | | int64_t second_mask = 1; |
120 | | int64_t scale_to_nano_factor = 1; |
121 | | const FieldSchema* field_schema = nullptr; |
122 | | |
123 | | //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128. |
124 | | bool is_type_compatibility = false; |
125 | | |
126 | | /** |
127 | | * Some frameworks like paimon maybe writes non-standard parquet files. Timestamp field doesn't have |
128 | | * logicalType or converted_type to indicates its precision. We have to reset the time mask. |
129 | | */ |
130 | 5 | void reset_time_scale_if_missing(int scale) { |
131 | 5 | const auto& schema = field_schema->parquet_schema; |
132 | 5 | if (!schema.__isset.logicalType && !schema.__isset.converted_type) { |
133 | 0 | int ts_scale = 9; |
134 | 0 | if (scale <= 3) { |
135 | 0 | ts_scale = 3; |
136 | 0 | } else if (scale <= 6) { |
137 | 0 | ts_scale = 6; |
138 | 0 | } |
139 | 0 | second_mask = common::exp10_i64(ts_scale); |
140 | 0 | scale_to_nano_factor = common::exp10_i64(9 - ts_scale); |
141 | | |
142 | | // The missing parque metadata makes it impossible for us to know the time zone information, |
143 | | // so we default to UTC here. |
144 | 0 | if (ctz == nullptr) { |
145 | 0 | ctz = &utc0; |
146 | 0 | } |
147 | 0 | } |
148 | 5 | } |
149 | | |
150 | 225 | void init(const FieldSchema* field_schema_, const cctz::time_zone* ctz_) { |
151 | 225 | field_schema = field_schema_; |
152 | 225 | if (ctz_ != nullptr) { |
153 | 225 | ctz = ctz_; |
154 | 225 | } |
155 | 225 | const auto& schema = field_schema->parquet_schema; |
156 | 225 | if (schema.__isset.logicalType && schema.logicalType.__isset.TIMESTAMP) { |
157 | 5 | const auto& timestamp_info = schema.logicalType.TIMESTAMP; |
158 | 5 | if (!timestamp_info.isAdjustedToUTC) { |
159 | | // should set timezone to utc+0 |
160 | | // Reference: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#instant-semantics-timestamps-normalized-to-utc |
161 | | // If isAdjustedToUTC = false, the reader should display the same value no mater what local time zone is. For example: |
162 | | // When a timestamp is stored as `1970-01-03 12:00:00`, |
163 | | // if isAdjustedToUTC = true, UTC8 should read as `1970-01-03 20:00:00`, UTC6 should read as `1970-01-03 18:00:00` |
164 | | // if isAdjustedToUTC = false, UTC8 and UTC6 should read as `1970-01-03 12:00:00`, which is the same as `1970-01-03 12:00:00` in UTC0 |
165 | 2 | ctz = &utc0; |
166 | 2 | } |
167 | 5 | const auto& time_unit = timestamp_info.unit; |
168 | 5 | if (time_unit.__isset.MILLIS) { |
169 | 1 | second_mask = 1000; |
170 | 1 | scale_to_nano_factor = 1000000; |
171 | 4 | } else if (time_unit.__isset.MICROS) { |
172 | 4 | second_mask = 1000000; |
173 | 4 | scale_to_nano_factor = 1000; |
174 | 4 | } else if (time_unit.__isset.NANOS) { |
175 | 0 | second_mask = 1000000000; |
176 | 0 | scale_to_nano_factor = 1; |
177 | 0 | } |
178 | 220 | } else if (schema.__isset.converted_type) { |
179 | 61 | const auto& converted_type = schema.converted_type; |
180 | 61 | if (converted_type == tparquet::ConvertedType::TIMESTAMP_MILLIS) { |
181 | 0 | second_mask = 1000; |
182 | 0 | scale_to_nano_factor = 1000000; |
183 | 61 | } else if (converted_type == tparquet::ConvertedType::TIMESTAMP_MICROS) { |
184 | 4 | second_mask = 1000000; |
185 | 4 | scale_to_nano_factor = 1000; |
186 | 4 | } |
187 | 61 | } |
188 | | |
189 | 225 | if (ctz != nullptr) { |
190 | 225 | is_fixed_offset = |
191 | 225 | TimezoneUtils::try_get_fixed_offset_seconds(*ctz, &fixed_offset_seconds); |
192 | 225 | } |
193 | 225 | is_type_compatibility = field_schema_->is_type_compatibility; |
194 | 225 | } |
195 | | }; |
196 | | |
197 | | /** |
198 | | * Convert parquet physical column to logical column |
199 | | * In parquet document(https://github.com/apache/parquet-format/blob/master/LogicalTypes.md), |
200 | | * Logical or converted type is the data type of column, physical type is the stored type of column chunk. |
201 | | * eg, decimal type can be stored as INT32, INT64, BYTE_ARRAY, FIXED_LENGTH_BYTE_ARRAY. |
202 | | * So there is a convert process from physical type to logical type. |
203 | | * In addition, Schema change will bring about a change in logical type. |
204 | | * |
205 | | * `PhysicalToLogicalConverter` strips away the conversion of logical type, and reuse `ColumnTypeConverter` |
206 | | * to resolve schema change, allowing parquet reader to only focus on the conversion of physical types. |
207 | | * |
208 | | * Therefore, tow layers converters are designed: |
209 | | * First, read parquet data with the physical type |
210 | | * Second, convert physical type to logical type |
211 | | * Third, convert logical type to the final type planned by FE(schema change) |
212 | | * |
213 | | * Ultimate performance optimization: |
214 | | * 1. If process of (First => Second) is consistent, eg. from BYTE_ARRAY to string, no additional copies and conversions will be introduced; |
215 | | * 2. If process of (Second => Third) is consistent, no additional copies and conversions will be introduced; |
216 | | * 3. Null map is share among all processes, no additional copies and conversions will be introduced in null map; |
217 | | * 4. Only create one physical column in physical conversion, and reused in each loop; |
218 | | * 5. Only create one logical column in logical conversion, and reused in each loop; |
219 | | * 6. FIXED_LENGTH_BYTE_ARRAY is read as ColumnUInt8 instead of ColumnString, so the underlying decoder has no process to decode string |
220 | | * and use memory copy to read the data as a whole, and the conversion has no need to resolve the Offsets in ColumnString. |
221 | | */ |
222 | | class PhysicalToLogicalConverter { |
223 | | protected: |
224 | | ColumnPtr _cached_src_physical_column = nullptr; |
225 | | DataTypePtr _cached_src_physical_type = nullptr; |
226 | | std::unique_ptr<converter::ColumnTypeConverter> _logical_converter = nullptr; |
227 | | |
228 | | std::string _error_msg; |
229 | | |
230 | | std::unique_ptr<ConvertParams> _convert_params; |
231 | | |
232 | | public: |
233 | | static std::unique_ptr<PhysicalToLogicalConverter> get_converter( |
234 | | const FieldSchema* field_schema, DataTypePtr src_logical_type, |
235 | | const DataTypePtr& dst_logical_type, const cctz::time_zone* ctz, |
236 | | bool is_dict_filter = false); |
237 | | |
238 | | static bool is_parquet_native_type(PrimitiveType type); |
239 | | |
240 | | static bool is_decimal_type(PrimitiveType type); |
241 | | |
242 | 221 | PhysicalToLogicalConverter() = default; |
243 | 221 | virtual ~PhysicalToLogicalConverter() = default; |
244 | | |
245 | 4 | virtual Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) { |
246 | 4 | return Status::OK(); |
247 | 4 | } |
248 | | |
249 | | Status convert(ColumnPtr& src_physical_col, DataTypePtr src_logical_type, |
250 | | const DataTypePtr& dst_logical_type, ColumnPtr& dst_logical_col, |
251 | 231 | bool is_dict_filter) { |
252 | 231 | if (is_dict_filter) { |
253 | 0 | src_logical_type = DataTypeFactory::instance().create_data_type( |
254 | 0 | PrimitiveType::TYPE_INT, dst_logical_type->is_nullable()); |
255 | 0 | } |
256 | 231 | if (is_consistent() && _logical_converter->is_consistent()) { |
257 | 186 | return Status::OK(); |
258 | 186 | } |
259 | 45 | ColumnPtr src_logical_column; |
260 | 45 | if (is_consistent()) { |
261 | 4 | if (dst_logical_type->is_nullable()) { |
262 | 4 | auto doris_nullable_column = |
263 | 4 | assert_cast<const ColumnNullable*>(dst_logical_col.get()); |
264 | 4 | src_logical_column = |
265 | 4 | ColumnNullable::create(_cached_src_physical_column, |
266 | 4 | doris_nullable_column->get_null_map_column_ptr()); |
267 | 4 | } else { |
268 | 0 | src_logical_column = _cached_src_physical_column; |
269 | 0 | } |
270 | 41 | } else { |
271 | 41 | src_logical_column = _logical_converter->get_column(src_logical_type, dst_logical_col, |
272 | 41 | dst_logical_type); |
273 | 41 | } |
274 | 45 | RETURN_IF_ERROR(physical_convert(src_physical_col, src_logical_column)); |
275 | 45 | auto converted_column = dst_logical_col->assume_mutable(); |
276 | 45 | return _logical_converter->convert(src_logical_column, converted_column); |
277 | 45 | } |
278 | | |
279 | | virtual ColumnPtr get_physical_column(tparquet::Type::type src_physical_type, |
280 | | DataTypePtr src_logical_type, |
281 | | ColumnPtr& dst_logical_column, |
282 | | const DataTypePtr& dst_logical_type, bool is_dict_filter); |
283 | | |
284 | 231 | DataTypePtr& get_physical_type() { return _cached_src_physical_type; } |
285 | | |
286 | 127 | virtual bool is_consistent() { return false; } |
287 | | |
288 | 368 | virtual bool support() { return true; } |
289 | | |
290 | 0 | std::string get_error_msg() { return _error_msg; } |
291 | | }; |
292 | | |
293 | | class ConsistentPhysicalConverter : public PhysicalToLogicalConverter { |
294 | 455 | bool is_consistent() override { return true; } |
295 | | }; |
296 | | |
297 | | class UnsupportedConverter : public PhysicalToLogicalConverter { |
298 | | public: |
299 | 0 | UnsupportedConverter(std::string error_msg) { _error_msg = error_msg; } |
300 | | |
301 | | UnsupportedConverter(tparquet::Type::type src_physical_type, |
302 | 0 | const DataTypePtr& src_logical_type) { |
303 | 0 | std::string src_physical_str = tparquet::to_string(src_physical_type); |
304 | 0 | std::string src_logical_str = src_logical_type->get_name(); |
305 | 0 | _error_msg = src_physical_str + " => " + src_logical_str; |
306 | 0 | } |
307 | | |
308 | 0 | bool support() override { return false; } |
309 | | |
310 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
311 | 0 | return Status::InternalError("Unsupported physical to logical type: {}", _error_msg); |
312 | 0 | } |
313 | | }; |
314 | | |
315 | | // for tinyint, smallint |
316 | | template <PrimitiveType IntPrimitiveType> |
317 | | class LittleIntPhysicalConverter : public PhysicalToLogicalConverter { |
318 | 28 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
319 | 28 | using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType; |
320 | 28 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; |
321 | 28 | ColumnPtr from_col = remove_nullable(src_physical_col); |
322 | 28 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
323 | | |
324 | 28 | size_t rows = from_col->size(); |
325 | | // always comes from tparquet::Type::INT32 |
326 | 28 | auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data(); |
327 | 28 | size_t start_idx = to_col->size(); |
328 | 28 | to_col->resize(start_idx + rows); |
329 | 28 | auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data(); |
330 | 108 | for (int i = 0; i < rows; ++i) { |
331 | 80 | data[start_idx + i] = static_cast<DstCppType>(src_data[i]); |
332 | 80 | } |
333 | | |
334 | 28 | return Status::OK(); |
335 | 28 | } _ZN5doris7parquet26LittleIntPhysicalConverterILNS_13PrimitiveTypeE3EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 318 | 17 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 319 | 17 | using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType; | 320 | 17 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; | 321 | 17 | ColumnPtr from_col = remove_nullable(src_physical_col); | 322 | 17 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); | 323 | | | 324 | 17 | size_t rows = from_col->size(); | 325 | | // always comes from tparquet::Type::INT32 | 326 | 17 | auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data(); | 327 | 17 | size_t start_idx = to_col->size(); | 328 | 17 | to_col->resize(start_idx + rows); | 329 | 17 | auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data(); | 330 | 59 | for (int i = 0; i < rows; ++i) { | 331 | 42 | data[start_idx + i] = static_cast<DstCppType>(src_data[i]); | 332 | 42 | } | 333 | | | 334 | 17 | return Status::OK(); | 335 | 17 | } |
_ZN5doris7parquet26LittleIntPhysicalConverterILNS_13PrimitiveTypeE4EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 318 | 11 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 319 | 11 | using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType; | 320 | 11 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; | 321 | 11 | ColumnPtr from_col = remove_nullable(src_physical_col); | 322 | 11 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); | 323 | | | 324 | 11 | size_t rows = from_col->size(); | 325 | | // always comes from tparquet::Type::INT32 | 326 | 11 | auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data(); | 327 | 11 | size_t start_idx = to_col->size(); | 328 | 11 | to_col->resize(start_idx + rows); | 329 | 11 | auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data(); | 330 | 49 | for (int i = 0; i < rows; ++i) { | 331 | 38 | data[start_idx + i] = static_cast<DstCppType>(src_data[i]); | 332 | 38 | } | 333 | | | 334 | 11 | return Status::OK(); | 335 | 11 | } |
|
336 | | }; |
337 | | |
338 | | template <PrimitiveType type> |
339 | | struct UnsignedTypeTraits; |
340 | | |
341 | | template <> |
342 | | struct UnsignedTypeTraits<TYPE_SMALLINT> { |
343 | | using UnsignedCppType = UInt8; |
344 | | //https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#unsigned-integers |
345 | | //INT(8, false), INT(16, false), and INT(32, false) must annotate an int32 primitive type and INT(64, false) |
346 | | //must annotate an int64 primitive type. |
347 | | using StorageCppType = Int32; |
348 | | using StorageColumnType = ColumnInt32; |
349 | | }; |
350 | | |
351 | | template <> |
352 | | struct UnsignedTypeTraits<TYPE_INT> { |
353 | | using UnsignedCppType = UInt16; |
354 | | using StorageCppType = Int32; |
355 | | using StorageColumnType = ColumnInt32; |
356 | | }; |
357 | | |
358 | | template <> |
359 | | struct UnsignedTypeTraits<TYPE_BIGINT> { |
360 | | using UnsignedCppType = UInt32; |
361 | | using StorageCppType = Int32; |
362 | | using StorageColumnType = ColumnInt32; |
363 | | }; |
364 | | |
365 | | template <> |
366 | | struct UnsignedTypeTraits<TYPE_LARGEINT> { |
367 | | using UnsignedCppType = UInt64; |
368 | | using StorageCppType = Int64; |
369 | | using StorageColumnType = ColumnInt64; |
370 | | }; |
371 | | |
372 | | template <PrimitiveType IntPrimitiveType> |
373 | | class UnsignedIntegerConverter : public PhysicalToLogicalConverter { |
374 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
375 | 0 | using UnsignedCppType = typename UnsignedTypeTraits<IntPrimitiveType>::UnsignedCppType; |
376 | 0 | using StorageCppType = typename UnsignedTypeTraits<IntPrimitiveType>::StorageCppType; |
377 | 0 | using StorageColumnType = typename UnsignedTypeTraits<IntPrimitiveType>::StorageColumnType; |
378 | 0 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; |
379 | |
|
380 | 0 | ColumnPtr from_col = remove_nullable(src_physical_col); |
381 | 0 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
382 | 0 | auto& src_data = assert_cast<const StorageColumnType*>(from_col.get())->get_data(); |
383 | |
|
384 | 0 | size_t rows = src_data.size(); |
385 | 0 | size_t start_idx = to_col->size(); |
386 | 0 | to_col->resize(start_idx + rows); |
387 | 0 | auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data(); |
388 | |
|
389 | 0 | for (int i = 0; i < rows; i++) { |
390 | 0 | StorageCppType src_value = src_data[i]; |
391 | 0 | auto unsigned_value = static_cast<UnsignedCppType>(src_value); |
392 | 0 | data[start_idx + i] = unsigned_value; |
393 | 0 | } |
394 | |
|
395 | 0 | return Status::OK(); |
396 | 0 | } Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE4EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE5EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE6EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE7EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
397 | | }; |
398 | | |
399 | | class FixedSizeBinaryConverter : public PhysicalToLogicalConverter { |
400 | | private: |
401 | | int _type_length; |
402 | | |
403 | | public: |
404 | 0 | FixedSizeBinaryConverter(int type_length) : _type_length(type_length) {} |
405 | | |
406 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
407 | 0 | ColumnPtr from_col = remove_nullable(src_physical_col); |
408 | 0 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
409 | |
|
410 | 0 | auto* src_data = assert_cast<const ColumnUInt8*>(from_col.get()); |
411 | 0 | size_t length = src_data->size(); |
412 | 0 | size_t num_values = length / _type_length; |
413 | 0 | auto& string_col = static_cast<ColumnString&>(*to_col.get()); |
414 | 0 | auto& offsets = string_col.get_offsets(); |
415 | 0 | auto& chars = string_col.get_chars(); |
416 | |
|
417 | 0 | size_t origin_size = chars.size(); |
418 | 0 | chars.resize(origin_size + length); |
419 | 0 | memcpy(chars.data() + origin_size, src_data->get_data().data(), length); |
420 | |
|
421 | 0 | origin_size = offsets.size(); |
422 | 0 | offsets.resize(origin_size + num_values); |
423 | 0 | auto end_offset = offsets[origin_size - 1]; |
424 | 0 | for (int i = 0; i < num_values; ++i) { |
425 | 0 | end_offset += _type_length; |
426 | 0 | offsets[origin_size + i] = end_offset; |
427 | 0 | } |
428 | |
|
429 | 0 | return Status::OK(); |
430 | 0 | } |
431 | | }; |
432 | | |
433 | | class Float16PhysicalConverter : public PhysicalToLogicalConverter { |
434 | | private: |
435 | | int _type_length; |
436 | | |
437 | | public: |
438 | 0 | Float16PhysicalConverter(int type_length) : _type_length(type_length) { |
439 | 0 | DCHECK_EQ(_type_length, 2); |
440 | 0 | } |
441 | | |
442 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
443 | 0 | ColumnPtr from_col = remove_nullable(src_physical_col); |
444 | 0 | MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); |
445 | |
|
446 | 0 | const auto* src_data = assert_cast<const ColumnUInt8*>(from_col.get()); |
447 | 0 | size_t length = src_data->size(); |
448 | 0 | size_t num_values = length / _type_length; |
449 | 0 | auto* to_float_column = assert_cast<ColumnFloat32*>(to_col.get()); |
450 | 0 | size_t start_idx = to_float_column->size(); |
451 | 0 | to_float_column->resize(start_idx + num_values); |
452 | 0 | auto& to_float_column_data = to_float_column->get_data(); |
453 | 0 | const auto* ptr = src_data->get_data().data(); |
454 | 0 | for (int i = 0; i < num_values; ++i) { |
455 | 0 | size_t offset = i * _type_length; |
456 | 0 | const auto* data_ptr = ptr + offset; |
457 | 0 | uint16_t raw; |
458 | 0 | memcpy(&raw, data_ptr, sizeof(uint16_t)); |
459 | 0 | float value = half_to_float(raw); |
460 | 0 | to_float_column_data[start_idx + i] = value; |
461 | 0 | } |
462 | |
|
463 | 0 | return Status::OK(); |
464 | 0 | } |
465 | | |
466 | 0 | float half_to_float(uint16_t h) { |
467 | | // uint16_t h: half precision floating point |
468 | | // bit 15: sign(1 bit) |
469 | | // bits 14..10 : exponent(5 bits) |
470 | | // bits 9..0 : mantissa(10 bits) |
471 | | |
472 | | // sign bit placed to float32 bit31 |
473 | 0 | uint32_t sign = (h & 0x8000U) << 16; // 0x8000 << 16 = 0x8000_0000 |
474 | | // exponent:(5 bits) |
475 | 0 | uint32_t exp = (h & 0x7C00U) >> 10; // 0x7C00 = 0111 1100 0000 (half exponent mask) |
476 | | // mantissa(10 bits) |
477 | 0 | uint32_t mant = (h & 0x03FFU); // 10-bit fraction |
478 | | |
479 | | // cases:Zero/Subnormal, Normal, Inf/NaN |
480 | 0 | if (exp == 0) { |
481 | | // exp==0: Zero or Subnormal ---------- |
482 | 0 | if (mant == 0) { |
483 | | // ±0.0 |
484 | | // sign = either 0x00000000 or 0x80000000 |
485 | 0 | return std::bit_cast<float>(sign); |
486 | 0 | } else { |
487 | | // ---------- Subnormal ---------- |
488 | | // half subnormal: |
489 | | // value = (-1)^sign * (mant / 2^10) * 2^(1 - bias) |
490 | | // half bias = 15 → exponent = 1 - 15 = -14 |
491 | 0 | float f = (static_cast<float>(mant) / 1024.0F) * std::powf(2.0F, -14.0F); |
492 | 0 | return sign ? -f : f; |
493 | 0 | } |
494 | 0 | } else if (exp == 0x1F) { |
495 | | // exp==31: Inf or NaN ---------- |
496 | | // float32: |
497 | | // exponent = 255 (0xFF) |
498 | | // mantissa = mant << 13 |
499 | 0 | uint32_t f = sign | 0x7F800000U | (mant << 13); |
500 | 0 | return std::bit_cast<float>(f); |
501 | 0 | } else { |
502 | | // Normalized ---------- |
503 | | // float32 exponent: |
504 | | // exp32 = exp16 - bias16 + bias32 |
505 | | // bias16 = 15 |
506 | | // bias32 = 127 |
507 | | // |
508 | | // so: exp32 = exp + (127 - 15) |
509 | 0 | uint32_t f = sign | ((exp + (127 - 15)) << 23) // place to float32 exponent |
510 | 0 | | (mant << 13); // mantissa align to 23 bits |
511 | 0 | return std::bit_cast<float>(f); |
512 | 0 | } |
513 | 0 | } |
514 | | }; |
515 | | |
516 | | class UUIDVarBinaryConverter : public PhysicalToLogicalConverter { |
517 | | public: |
518 | 1 | UUIDVarBinaryConverter(int type_length) : _type_length(type_length) {} |
519 | | |
520 | 1 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
521 | 1 | DCHECK(!is_column_const(*src_physical_col)) << src_physical_col->dump_structure(); |
522 | 1 | DCHECK(!is_column_const(*src_logical_column)) << src_logical_column->dump_structure(); |
523 | 1 | const ColumnUInt8* uint8_col = nullptr; |
524 | 1 | if (is_column_nullable(*src_physical_col)) { |
525 | 1 | const auto& nullable = assert_cast<const ColumnNullable*>(src_physical_col.get()); |
526 | 1 | uint8_col = &assert_cast<const ColumnUInt8&>(nullable->get_nested_column()); |
527 | 1 | } else { |
528 | 0 | uint8_col = &assert_cast<const ColumnUInt8&>(*src_physical_col); |
529 | 0 | } |
530 | | |
531 | 1 | MutableColumnPtr to_col = nullptr; |
532 | | // nullmap flag seems have been handled in upper level |
533 | 1 | if (src_logical_column->is_nullable()) { |
534 | 1 | const auto* nullable = assert_cast<const ColumnNullable*>(src_logical_column.get()); |
535 | 1 | to_col = nullable->get_nested_column_ptr()->assume_mutable(); |
536 | 1 | } else { |
537 | 0 | to_col = src_logical_column->assume_mutable(); |
538 | 0 | } |
539 | 1 | auto* to_varbinary_column = assert_cast<ColumnVarbinary*>(to_col.get()); |
540 | 1 | size_t length = uint8_col->size(); |
541 | 1 | size_t num_values = length / _type_length; |
542 | 1 | const auto* ptr = uint8_col->get_data().data(); |
543 | | |
544 | 4 | for (int i = 0; i < num_values; ++i) { |
545 | 3 | auto offset = i * _type_length; |
546 | 3 | const char* data_ptr = reinterpret_cast<const char*>(ptr + offset); |
547 | 3 | to_varbinary_column->insert_data(data_ptr, _type_length); |
548 | 3 | } |
549 | 1 | return Status::OK(); |
550 | 1 | } |
551 | | |
552 | | private: |
553 | | int _type_length; |
554 | | }; |
555 | | |
556 | | template <PrimitiveType DecimalPType> |
557 | | class FixedSizeToDecimal : public PhysicalToLogicalConverter { |
558 | | public: |
559 | | using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType; |
560 | 2 | FixedSizeToDecimal(int32_t type_length) : _type_length(type_length) {}Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EEC2Ei _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EEC2Ei Line | Count | Source | 560 | 2 | FixedSizeToDecimal(int32_t type_length) : _type_length(type_length) {} |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EEC2Ei Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EEC2Ei |
561 | | |
562 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
563 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); |
564 | 2 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
565 | | |
566 | 2 | #define M(FixedTypeLength, ValueCopyType) \ |
567 | 2 | case FixedTypeLength: \ |
568 | 2 | return _convert_internal<FixedTypeLength, ValueCopyType>(src_col, dst_col); |
569 | | |
570 | 2 | #define APPLY_FOR_DECIMALS() \ |
571 | 2 | M(1, int64_t) \ |
572 | 0 | M(2, int64_t) \ |
573 | 0 | M(3, int64_t) \ |
574 | 0 | M(4, int64_t) \ |
575 | 1 | M(5, int64_t) \ |
576 | 1 | M(6, int64_t) \ |
577 | 0 | M(7, int64_t) \ |
578 | 1 | M(8, int64_t) \ |
579 | 1 | M(9, int128_t) \ |
580 | 0 | M(10, int128_t) \ |
581 | 0 | M(11, int128_t) \ |
582 | 0 | M(12, int128_t) \ |
583 | 0 | M(13, int128_t) \ |
584 | 0 | M(14, int128_t) \ |
585 | 0 | M(15, int128_t) \ |
586 | 0 | M(16, int128_t) \ |
587 | 0 | M(17, wide::Int256) \ |
588 | 0 | M(18, wide::Int256) \ |
589 | 0 | M(19, wide::Int256) \ |
590 | 0 | M(20, wide::Int256) \ |
591 | 0 | M(21, wide::Int256) \ |
592 | 0 | M(22, wide::Int256) \ |
593 | 0 | M(23, wide::Int256) \ |
594 | 0 | M(24, wide::Int256) \ |
595 | 0 | M(25, wide::Int256) \ |
596 | 0 | M(26, wide::Int256) \ |
597 | 0 | M(27, wide::Int256) \ |
598 | 0 | M(28, wide::Int256) \ |
599 | 0 | M(29, wide::Int256) \ |
600 | 0 | M(30, wide::Int256) \ |
601 | 0 | M(31, wide::Int256) \ |
602 | 0 | M(32, wide::Int256) |
603 | | |
604 | 2 | switch (_type_length) { |
605 | 0 | APPLY_FOR_DECIMALS() |
606 | 0 | default: |
607 | 0 | throw Exception(Status::FatalError("__builtin_unreachable")); |
608 | 2 | } |
609 | 0 | return Status::OK(); |
610 | 2 | #undef APPLY_FOR_DECIMALS |
611 | 2 | #undef M |
612 | 2 | } Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 562 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 563 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); | 564 | 2 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); | 565 | | | 566 | 2 | #define M(FixedTypeLength, ValueCopyType) \ | 567 | 2 | case FixedTypeLength: \ | 568 | 2 | return _convert_internal<FixedTypeLength, ValueCopyType>(src_col, dst_col); | 569 | | | 570 | 2 | #define APPLY_FOR_DECIMALS() \ | 571 | 2 | M(1, int64_t) \ | 572 | 2 | M(2, int64_t) \ | 573 | 2 | M(3, int64_t) \ | 574 | 2 | M(4, int64_t) \ | 575 | 2 | M(5, int64_t) \ | 576 | 2 | M(6, int64_t) \ | 577 | 2 | M(7, int64_t) \ | 578 | 2 | M(8, int64_t) \ | 579 | 2 | M(9, int128_t) \ | 580 | 2 | M(10, int128_t) \ | 581 | 2 | M(11, int128_t) \ | 582 | 2 | M(12, int128_t) \ | 583 | 2 | M(13, int128_t) \ | 584 | 2 | M(14, int128_t) \ | 585 | 2 | M(15, int128_t) \ | 586 | 2 | M(16, int128_t) \ | 587 | 2 | M(17, wide::Int256) \ | 588 | 2 | M(18, wide::Int256) \ | 589 | 2 | M(19, wide::Int256) \ | 590 | 2 | M(20, wide::Int256) \ | 591 | 2 | M(21, wide::Int256) \ | 592 | 2 | M(22, wide::Int256) \ | 593 | 2 | M(23, wide::Int256) \ | 594 | 2 | M(24, wide::Int256) \ | 595 | 2 | M(25, wide::Int256) \ | 596 | 2 | M(26, wide::Int256) \ | 597 | 2 | M(27, wide::Int256) \ | 598 | 2 | M(28, wide::Int256) \ | 599 | 2 | M(29, wide::Int256) \ | 600 | 2 | M(30, wide::Int256) \ | 601 | 2 | M(31, wide::Int256) \ | 602 | 2 | M(32, wide::Int256) | 603 | | | 604 | 2 | switch (_type_length) { | 605 | 0 | APPLY_FOR_DECIMALS() | 606 | 0 | default: | 607 | 0 | throw Exception(Status::FatalError("__builtin_unreachable")); | 608 | 2 | } | 609 | 0 | return Status::OK(); | 610 | 2 | #undef APPLY_FOR_DECIMALS | 611 | 2 | #undef M | 612 | 2 | } |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
613 | | |
614 | | template <int fixed_type_length, typename ValueCopyType> |
615 | 2 | Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) { |
616 | 2 | size_t rows = src_col->size() / fixed_type_length; |
617 | 2 | auto* buf = static_cast<const ColumnUInt8*>(src_col.get())->get_data().data(); |
618 | 2 | size_t start_idx = dst_col->size(); |
619 | 2 | dst_col->resize(start_idx + rows); |
620 | | |
621 | 2 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data(); |
622 | 2 | size_t offset = 0; |
623 | 6 | for (int i = 0; i < rows; i++) { |
624 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, |
625 | | // the unscaled number must be encoded as two's complement using big-endian byte order. |
626 | 4 | ValueCopyType value = 0; |
627 | 4 | memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value)); |
628 | 4 | offset += fixed_type_length; |
629 | 4 | value = to_endian<std::endian::big>(value); |
630 | 4 | value = value >> ((sizeof(value) - fixed_type_length) * 8); |
631 | 4 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); |
632 | 4 | v = (DecimalType)value; |
633 | 4 | } |
634 | | |
635 | 2 | return Status::OK(); |
636 | 2 | } Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Line | Count | Source | 615 | 1 | Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) { | 616 | 1 | size_t rows = src_col->size() / fixed_type_length; | 617 | 1 | auto* buf = static_cast<const ColumnUInt8*>(src_col.get())->get_data().data(); | 618 | 1 | size_t start_idx = dst_col->size(); | 619 | 1 | dst_col->resize(start_idx + rows); | 620 | | | 621 | 1 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data(); | 622 | 1 | size_t offset = 0; | 623 | 3 | for (int i = 0; i < rows; i++) { | 624 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, | 625 | | // the unscaled number must be encoded as two's complement using big-endian byte order. | 626 | 2 | ValueCopyType value = 0; | 627 | 2 | memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value)); | 628 | 2 | offset += fixed_type_length; | 629 | 2 | value = to_endian<std::endian::big>(value); | 630 | 2 | value = value >> ((sizeof(value) - fixed_type_length) * 8); | 631 | 2 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); | 632 | 2 | v = (DecimalType)value; | 633 | 2 | } | 634 | | | 635 | 1 | return Status::OK(); | 636 | 1 | } |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Line | Count | Source | 615 | 1 | Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) { | 616 | 1 | size_t rows = src_col->size() / fixed_type_length; | 617 | 1 | auto* buf = static_cast<const ColumnUInt8*>(src_col.get())->get_data().data(); | 618 | 1 | size_t start_idx = dst_col->size(); | 619 | 1 | dst_col->resize(start_idx + rows); | 620 | | | 621 | 1 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data(); | 622 | 1 | size_t offset = 0; | 623 | 3 | for (int i = 0; i < rows; i++) { | 624 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, | 625 | | // the unscaled number must be encoded as two's complement using big-endian byte order. | 626 | 2 | ValueCopyType value = 0; | 627 | 2 | memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value)); | 628 | 2 | offset += fixed_type_length; | 629 | 2 | value = to_endian<std::endian::big>(value); | 630 | 2 | value = value >> ((sizeof(value) - fixed_type_length) * 8); | 631 | 2 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); | 632 | 2 | v = (DecimalType)value; | 633 | 2 | } | 634 | | | 635 | 1 | return Status::OK(); | 636 | 1 | } |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE |
637 | | |
638 | | private: |
639 | | int32_t _type_length; |
640 | | }; |
641 | | |
642 | | template <PrimitiveType DecimalPType> |
643 | | class StringToDecimal : public PhysicalToLogicalConverter { |
644 | | using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType; |
645 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
646 | 0 | using ValueCopyType = DecimalType::NativeType; |
647 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
648 | 0 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
649 | |
|
650 | 0 | size_t rows = src_col->size(); |
651 | 0 | auto buf = static_cast<const ColumnString*>(src_col.get())->get_chars().data(); |
652 | 0 | auto& offset = static_cast<const ColumnString*>(src_col.get())->get_offsets(); |
653 | 0 | size_t start_idx = dst_col->size(); |
654 | 0 | dst_col->resize(start_idx + rows); |
655 | |
|
656 | 0 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data(); |
657 | 0 | for (int i = 0; i < rows; i++) { |
658 | 0 | size_t len = offset[i] - offset[i - 1]; |
659 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, |
660 | | // the unscaled number must be encoded as two's complement using big-endian byte order. |
661 | 0 | ValueCopyType value = 0; |
662 | 0 | if (len > 0) { |
663 | 0 | memcpy(reinterpret_cast<char*>(&value), buf + offset[i - 1], len); |
664 | 0 | value = to_endian<std::endian::big>(value); |
665 | 0 | value = value >> ((sizeof(value) - len) * 8); |
666 | 0 | } |
667 | 0 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); |
668 | 0 | v = (DecimalType)value; |
669 | 0 | } |
670 | |
|
671 | 0 | return Status::OK(); |
672 | 0 | } Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
673 | | }; |
674 | | |
675 | | template <PrimitiveType NumberType, PrimitiveType DecimalPType> |
676 | | class NumberToDecimal : public PhysicalToLogicalConverter { |
677 | | using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType; |
678 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
679 | 2 | using ValueCopyType = typename DecimalType::NativeType; |
680 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); |
681 | 2 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
682 | | |
683 | 2 | size_t rows = src_col->size(); |
684 | 2 | auto* src_data = |
685 | 2 | static_cast<const ColumnVector<NumberType>*>(src_col.get())->get_data().data(); |
686 | 2 | size_t start_idx = dst_col->size(); |
687 | 2 | dst_col->resize(start_idx + rows); |
688 | | |
689 | 2 | auto* data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data().data(); |
690 | | |
691 | 24 | for (int i = 0; i < rows; i++) { |
692 | 22 | ValueCopyType value; |
693 | 22 | if constexpr (std::is_same_v<DecimalType, Decimal256>) { |
694 | 0 | value = src_data[i]; |
695 | 22 | } else { |
696 | 22 | value = cast_set<ValueCopyType, typename PrimitiveTypeTraits<NumberType>::CppType, |
697 | 22 | false>(src_data[i]); |
698 | 22 | } |
699 | | |
700 | 22 | data[start_idx + i] = (DecimalType)value; |
701 | 22 | } |
702 | 2 | return Status::OK(); |
703 | 2 | } Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 678 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 679 | 2 | using ValueCopyType = typename DecimalType::NativeType; | 680 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); | 681 | 2 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); | 682 | | | 683 | 2 | size_t rows = src_col->size(); | 684 | 2 | auto* src_data = | 685 | 2 | static_cast<const ColumnVector<NumberType>*>(src_col.get())->get_data().data(); | 686 | 2 | size_t start_idx = dst_col->size(); | 687 | 2 | dst_col->resize(start_idx + rows); | 688 | | | 689 | 2 | auto* data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data().data(); | 690 | | | 691 | 24 | for (int i = 0; i < rows; i++) { | 692 | 22 | ValueCopyType value; | 693 | | if constexpr (std::is_same_v<DecimalType, Decimal256>) { | 694 | | value = src_data[i]; | 695 | 22 | } else { | 696 | 22 | value = cast_set<ValueCopyType, typename PrimitiveTypeTraits<NumberType>::CppType, | 697 | 22 | false>(src_data[i]); | 698 | 22 | } | 699 | | | 700 | 22 | data[start_idx + i] = (DecimalType)value; | 701 | 22 | } | 702 | 2 | return Status::OK(); | 703 | 2 | } |
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
704 | | }; |
705 | | |
706 | | class Int32ToDate : public PhysicalToLogicalConverter { |
707 | 7 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
708 | 7 | ColumnPtr src_col = remove_nullable(src_physical_col); |
709 | 7 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
710 | | |
711 | 7 | size_t rows = src_col->size(); |
712 | 7 | size_t start_idx = dst_col->size(); |
713 | 7 | dst_col->reserve(start_idx + rows); |
714 | | |
715 | 7 | auto& src_data = static_cast<const ColumnInt32*>(src_col.get())->get_data(); |
716 | 7 | auto& data = static_cast<ColumnDateV2*>(dst_col.get())->get_data(); |
717 | 7 | date_day_offset_dict& date_dict = date_day_offset_dict::get(); |
718 | | |
719 | 59 | for (int i = 0; i < rows; i++) { |
720 | 52 | data.push_back_without_reserve(date_dict[src_data[i]].to_date_int_val()); |
721 | 52 | } |
722 | | |
723 | 7 | return Status::OK(); |
724 | 7 | } |
725 | | }; |
726 | | |
727 | | struct Int64ToTimestamp : public PhysicalToLogicalConverter { |
728 | 5 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
729 | 5 | ColumnPtr src_col = remove_nullable(src_physical_col); |
730 | 5 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
731 | | |
732 | 5 | size_t rows = src_col->size(); |
733 | 5 | size_t start_idx = dst_col->size(); |
734 | 5 | dst_col->resize(start_idx + rows); |
735 | | |
736 | 5 | auto src_data = static_cast<const ColumnInt64*>(src_col.get())->get_data().data(); |
737 | 5 | auto& data = static_cast<ColumnDateTimeV2*>(dst_col.get())->get_data(); |
738 | | |
739 | 51 | for (int i = 0; i < rows; i++) { |
740 | 46 | int64_t x = src_data[i]; |
741 | 46 | auto& num = data[start_idx + i]; |
742 | 46 | auto& value = reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(num); |
743 | 46 | const int64_t epoch_seconds = x / _convert_params->second_mask; |
744 | 46 | if (_convert_params->is_fixed_offset) { |
745 | 46 | if (!detail::try_convert_timestamp_with_fixed_offset( |
746 | 46 | value, epoch_seconds, _convert_params->fixed_offset_seconds)) { |
747 | 0 | value.from_unixtime(epoch_seconds, *_convert_params->ctz); |
748 | 0 | } |
749 | 46 | } else if (!detail::try_convert_timestamp_with_lookup(value, epoch_seconds, |
750 | 0 | *_convert_params->ctz)) { |
751 | 0 | value.from_unixtime(epoch_seconds, *_convert_params->ctz); |
752 | 0 | } |
753 | 46 | value.set_microsecond((x % _convert_params->second_mask) * |
754 | 46 | (_convert_params->scale_to_nano_factor / 1000)); |
755 | 46 | } |
756 | 5 | return Status::OK(); |
757 | 5 | } |
758 | | }; |
759 | | |
760 | | struct Int64ToTimestampTz : public PhysicalToLogicalConverter { |
761 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
762 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
763 | 0 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
764 | |
|
765 | 0 | size_t rows = src_col->size(); |
766 | 0 | size_t start_idx = dst_col->size(); |
767 | 0 | dst_col->resize(start_idx + rows); |
768 | |
|
769 | 0 | const auto& src_data = assert_cast<const ColumnInt64*>(src_col.get())->get_data(); |
770 | 0 | auto& dest_data = assert_cast<ColumnTimeStampTz*>(dst_col.get())->get_data(); |
771 | 0 | static const cctz::time_zone UTC = cctz::utc_time_zone(); |
772 | |
|
773 | 0 | for (int i = 0; i < rows; i++) { |
774 | 0 | int64_t x = src_data[i]; |
775 | 0 | auto& tz = dest_data[start_idx + i]; |
776 | 0 | tz.from_unixtime(x / _convert_params->second_mask, UTC); |
777 | 0 | tz.set_microsecond((x % _convert_params->second_mask) * |
778 | 0 | (_convert_params->scale_to_nano_factor / 1000)); |
779 | 0 | } |
780 | 0 | return Status::OK(); |
781 | 0 | } |
782 | | }; |
783 | | |
784 | | struct Int96toTimestamp : public PhysicalToLogicalConverter { |
785 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
786 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
787 | 0 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
788 | |
|
789 | 0 | size_t rows = src_col->size() / sizeof(ParquetInt96); |
790 | 0 | auto& src_data = static_cast<const ColumnInt8*>(src_col.get())->get_data(); |
791 | 0 | auto ParquetInt96_data = (ParquetInt96*)src_data.data(); |
792 | 0 | size_t start_idx = dst_col->size(); |
793 | 0 | dst_col->resize(start_idx + rows); |
794 | 0 | auto& data = static_cast<ColumnDateTimeV2*>(dst_col.get())->get_data(); |
795 | |
|
796 | 0 | for (int i = 0; i < rows; i++) { |
797 | 0 | ParquetInt96 src_cell_data = ParquetInt96_data[i]; |
798 | 0 | auto& dst_value = |
799 | 0 | reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(data[start_idx + i]); |
800 | |
|
801 | 0 | int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros(); |
802 | 0 | const int64_t epoch_seconds = timestamp_with_micros / 1000000; |
803 | 0 | if (_convert_params->is_fixed_offset) { |
804 | 0 | if (!detail::try_convert_timestamp_with_fixed_offset( |
805 | 0 | dst_value, epoch_seconds, _convert_params->fixed_offset_seconds)) { |
806 | 0 | dst_value.from_unixtime(epoch_seconds, *_convert_params->ctz); |
807 | 0 | } |
808 | 0 | } else if (!detail::try_convert_timestamp_with_lookup(dst_value, epoch_seconds, |
809 | 0 | *_convert_params->ctz)) { |
810 | 0 | dst_value.from_unixtime(epoch_seconds, *_convert_params->ctz); |
811 | 0 | } |
812 | 0 | dst_value.set_microsecond(timestamp_with_micros % 1000000); |
813 | 0 | } |
814 | 0 | return Status::OK(); |
815 | 0 | } |
816 | | }; |
817 | | |
818 | | struct Int96toTimestampTz : public PhysicalToLogicalConverter { |
819 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
820 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
821 | 0 | MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); |
822 | |
|
823 | 0 | size_t rows = src_col->size() / sizeof(ParquetInt96); |
824 | 0 | const auto& src_data = assert_cast<const ColumnInt8*>(src_col.get())->get_data(); |
825 | 0 | auto* ParquetInt96_data = (ParquetInt96*)src_data.data(); |
826 | 0 | size_t start_idx = dst_col->size(); |
827 | 0 | dst_col->resize(start_idx + rows); |
828 | 0 | auto& data = assert_cast<ColumnTimeStampTz*>(dst_col.get())->get_data(); |
829 | 0 | static const cctz::time_zone UTC = cctz::utc_time_zone(); |
830 | |
|
831 | 0 | for (int i = 0; i < rows; i++) { |
832 | 0 | ParquetInt96 src_cell_data = ParquetInt96_data[i]; |
833 | 0 | int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros(); |
834 | 0 | auto& tz = data[start_idx + i]; |
835 | 0 | tz.from_unixtime(timestamp_with_micros / 1000000, UTC); |
836 | 0 | tz.set_microsecond(timestamp_with_micros % 1000000); |
837 | 0 | } |
838 | 0 | return Status::OK(); |
839 | 0 | } |
840 | | }; |
841 | | |
842 | | } // namespace doris::parquet |