be/src/format/parquet/parquet_column_convert.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <cctz/time_zone.h> |
21 | | #include <gen_cpp/parquet_types.h> |
22 | | #include <libdivide.h> |
23 | | |
24 | | #include <chrono> |
25 | | #include <limits> |
26 | | |
27 | | #include "common/cast_set.h" |
28 | | #include "core/column/column_fixed_length_object.h" |
29 | | #include "core/column/column_varbinary.h" |
30 | | #include "core/column/column_vector.h" |
31 | | #include "core/data_type/data_type_factory.hpp" |
32 | | #include "core/data_type/primitive_type.h" |
33 | | #include "core/extended_types.h" |
34 | | #include "core/field.h" |
35 | | #include "core/types.h" |
36 | | #include "format/column_type_convert.h" |
37 | | #include "format/format_common.h" |
38 | | #include "format/parquet/decoder.h" |
39 | | #include "format/parquet/parquet_common.h" |
40 | | #include "format/parquet/schema_desc.h" |
41 | | #include "util/timezone_utils.h" |
42 | | |
43 | | namespace doris::parquet { |
44 | | namespace detail { |
45 | | |
46 | | inline bool try_split_local_time(int64_t local_time, uint16_t* year, uint8_t* month, uint8_t* day, |
47 | 62 | uint8_t* hour, uint8_t* minute, uint8_t* second) { |
48 | 62 | static const libdivide::divider<int64_t> fast_div_86400(86400); |
49 | 62 | static const libdivide::divider<int64_t> fast_div_3600(3600); |
50 | 62 | static const libdivide::divider<int64_t> fast_div_60(60); |
51 | 62 | static constexpr int64_t kMinSupportedDays = -365LL * 10000; |
52 | 62 | static constexpr int64_t kMaxSupportedDays = 365LL * 10000; |
53 | | |
54 | 62 | int64_t days = local_time / fast_div_86400; |
55 | 62 | int64_t second_of_day = local_time - days * 86400; |
56 | 62 | if (second_of_day < 0) { |
57 | 3 | second_of_day += 86400; |
58 | 3 | --days; |
59 | 3 | } |
60 | 62 | if (days < kMinSupportedDays || days > kMaxSupportedDays) { |
61 | 0 | return false; |
62 | 0 | } |
63 | | |
64 | 62 | const auto ymd = std::chrono::year_month_day {std::chrono::sys_days {std::chrono::days {days}}}; |
65 | 62 | const int y = static_cast<int>(ymd.year()); |
66 | 62 | if (y < 0 || y > std::numeric_limits<uint16_t>::max()) { |
67 | 0 | return false; |
68 | 0 | } |
69 | | |
70 | 62 | const int64_t h = second_of_day / fast_div_3600; |
71 | 62 | const int64_t minute_second = second_of_day - h * 3600; |
72 | 62 | const int64_t m = minute_second / fast_div_60; |
73 | 62 | const int64_t s = minute_second - m * 60; |
74 | | |
75 | 62 | *year = static_cast<uint16_t>(y); |
76 | 62 | *month = static_cast<uint8_t>(static_cast<unsigned>(ymd.month())); |
77 | 62 | *day = static_cast<uint8_t>(static_cast<unsigned>(ymd.day())); |
78 | 62 | *hour = static_cast<uint8_t>(h); |
79 | 62 | *minute = static_cast<uint8_t>(m); |
80 | 62 | *second = static_cast<uint8_t>(s); |
81 | 62 | return true; |
82 | 62 | } |
83 | | |
84 | | template <typename DateType> |
85 | | inline bool try_convert_timestamp_with_fixed_offset(DateType& value, int64_t epoch_seconds, |
86 | 62 | int32_t offset_seconds) { |
87 | 62 | uint16_t year = 0; |
88 | 62 | uint8_t month = 0; |
89 | 62 | uint8_t day = 0; |
90 | 62 | uint8_t hour = 0; |
91 | 62 | uint8_t minute = 0; |
92 | 62 | uint8_t second = 0; |
93 | 62 | if (!try_split_local_time(epoch_seconds + offset_seconds, &year, &month, &day, &hour, &minute, |
94 | 62 | &second)) { |
95 | 0 | return false; |
96 | 0 | } |
97 | | // The caller sets sub-second precision immediately after this conversion. |
98 | 62 | value.unchecked_set_time(year, month, day, hour, minute, second, 0); |
99 | 62 | return true; |
100 | 62 | } |
101 | | |
102 | | template <typename DateType> |
103 | | inline bool try_convert_timestamp_with_lookup(DateType& value, int64_t epoch_seconds, |
104 | 10 | const cctz::time_zone& ctz) { |
105 | 10 | static const auto epoch = std::chrono::time_point_cast<cctz::sys_seconds>( |
106 | 10 | std::chrono::system_clock::from_time_t(0)); |
107 | 10 | cctz::time_point<cctz::sys_seconds> t = epoch + cctz::seconds(epoch_seconds); |
108 | 10 | const int32_t offset = ctz.lookup_offset(t).offset; |
109 | 10 | return try_convert_timestamp_with_fixed_offset(value, epoch_seconds, offset); |
110 | 10 | } |
111 | | |
112 | | } // namespace detail |
113 | | |
114 | | struct ConvertParams { |
115 | | // schema.logicalType.TIMESTAMP.isAdjustedToUTC == false |
116 | | static const cctz::time_zone utc0; |
117 | | // schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set local time zone |
118 | | const cctz::time_zone* ctz = nullptr; |
119 | | bool is_fixed_offset = false; |
120 | | int32_t fixed_offset_seconds = 0; |
121 | | int64_t second_mask = 1; |
122 | | int64_t scale_to_nano_factor = 1; |
123 | | const FieldSchema* field_schema = nullptr; |
124 | | |
125 | | //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128. |
126 | | bool is_type_compatibility = false; |
127 | | |
128 | | /** |
129 | | * Some frameworks like paimon maybe writes non-standard parquet files. Timestamp field doesn't have |
130 | | * logicalType or converted_type to indicates its precision. We have to reset the time mask. |
131 | | */ |
132 | 5 | void reset_time_scale_if_missing(int scale) { |
133 | 5 | const auto& schema = field_schema->parquet_schema; |
134 | 5 | if (!schema.__isset.logicalType && !schema.__isset.converted_type) { |
135 | 0 | int ts_scale = 9; |
136 | 0 | if (scale <= 3) { |
137 | 0 | ts_scale = 3; |
138 | 0 | } else if (scale <= 6) { |
139 | 0 | ts_scale = 6; |
140 | 0 | } |
141 | 0 | second_mask = common::exp10_i64(ts_scale); |
142 | 0 | scale_to_nano_factor = common::exp10_i64(9 - ts_scale); |
143 | | |
144 | | // The missing parque metadata makes it impossible for us to know the time zone information, |
145 | | // so we default to UTC here. |
146 | 0 | if (ctz == nullptr) { |
147 | 0 | ctz = &utc0; |
148 | 0 | } |
149 | 0 | } |
150 | 5 | } |
151 | | |
152 | 227 | void init(const FieldSchema* field_schema_, const cctz::time_zone* ctz_) { |
153 | 227 | field_schema = field_schema_; |
154 | 227 | if (ctz_ != nullptr) { |
155 | 225 | ctz = ctz_; |
156 | 225 | } |
157 | 227 | const auto& schema = field_schema->parquet_schema; |
158 | 227 | if (schema.__isset.logicalType && schema.logicalType.__isset.TIMESTAMP) { |
159 | 5 | const auto& timestamp_info = schema.logicalType.TIMESTAMP; |
160 | 5 | if (!timestamp_info.isAdjustedToUTC) { |
161 | | // should set timezone to utc+0 |
162 | | // Reference: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#instant-semantics-timestamps-normalized-to-utc |
163 | | // If isAdjustedToUTC = false, the reader should display the same value no mater what local time zone is. For example: |
164 | | // When a timestamp is stored as `1970-01-03 12:00:00`, |
165 | | // if isAdjustedToUTC = true, UTC8 should read as `1970-01-03 20:00:00`, UTC6 should read as `1970-01-03 18:00:00` |
166 | | // if isAdjustedToUTC = false, UTC8 and UTC6 should read as `1970-01-03 12:00:00`, which is the same as `1970-01-03 12:00:00` in UTC0 |
167 | 2 | ctz = &utc0; |
168 | 2 | } |
169 | 5 | const auto& time_unit = timestamp_info.unit; |
170 | 5 | if (time_unit.__isset.MILLIS) { |
171 | 1 | second_mask = 1000; |
172 | 1 | scale_to_nano_factor = 1000000; |
173 | 4 | } else if (time_unit.__isset.MICROS) { |
174 | 4 | second_mask = 1000000; |
175 | 4 | scale_to_nano_factor = 1000; |
176 | 4 | } else if (time_unit.__isset.NANOS) { |
177 | 0 | second_mask = 1000000000; |
178 | 0 | scale_to_nano_factor = 1; |
179 | 0 | } |
180 | 222 | } else if (schema.__isset.converted_type) { |
181 | 61 | const auto& converted_type = schema.converted_type; |
182 | 61 | if (converted_type == tparquet::ConvertedType::TIMESTAMP_MILLIS) { |
183 | 0 | second_mask = 1000; |
184 | 0 | scale_to_nano_factor = 1000000; |
185 | 61 | } else if (converted_type == tparquet::ConvertedType::TIMESTAMP_MICROS) { |
186 | 4 | second_mask = 1000000; |
187 | 4 | scale_to_nano_factor = 1000; |
188 | 4 | } |
189 | 61 | } |
190 | | |
191 | 227 | if (ctz != nullptr) { |
192 | 225 | is_fixed_offset = |
193 | 225 | TimezoneUtils::try_get_fixed_offset_seconds(*ctz, &fixed_offset_seconds); |
194 | 225 | } |
195 | 227 | is_type_compatibility = field_schema_->is_type_compatibility; |
196 | 227 | } |
197 | | }; |
198 | | |
199 | 46 | inline IColumn* get_mutable_inner_column(ColumnPtr& column) { |
200 | 46 | column = IColumn::mutate(std::move(column)); |
201 | 46 | auto mutable_column = column->assert_mutable(); |
202 | 46 | if (mutable_column->is_nullable()) { |
203 | 42 | return &assert_cast<ColumnNullable*>(mutable_column.get())->get_nested_column(); |
204 | 42 | } |
205 | 4 | return mutable_column.get(); |
206 | 46 | } |
207 | | |
208 | 110 | inline size_t get_mutable_inner_column_size(const ColumnPtr& column) { |
209 | 110 | if (column->is_nullable()) { |
210 | 110 | const auto* nullable = assert_cast<const ColumnNullable*>(column.get()); |
211 | 110 | return nullable->get_nested_column().size(); |
212 | 110 | } |
213 | 0 | return column->size(); |
214 | 110 | } |
215 | | |
216 | 56 | inline size_t get_null_map_size_or_inner_column_size(const ColumnPtr& column) { |
217 | 56 | if (column->is_nullable()) { |
218 | 56 | const auto* nullable = assert_cast<const ColumnNullable*>(column.get()); |
219 | 56 | return nullable->get_null_map_column().size(); |
220 | 56 | } |
221 | 0 | return column->size(); |
222 | 56 | } |
223 | | |
224 | 56 | inline size_t get_appended_null_map_start(const ColumnPtr& column, size_t new_rows) { |
225 | 56 | if (!column->is_nullable()) { |
226 | 0 | return 0; |
227 | 0 | } |
228 | 56 | const auto* nullable = assert_cast<const ColumnNullable*>(column.get()); |
229 | 56 | const size_t null_map_size = nullable->get_null_map_column().size(); |
230 | 56 | DCHECK_GE(null_map_size, new_rows); |
231 | 56 | return null_map_size - new_rows; |
232 | 56 | } |
233 | | |
234 | | inline void align_null_map(ColumnPtr& src_column, ColumnPtr& dst_column, size_t old_null_map_size, |
235 | 57 | size_t new_rows, size_t src_null_map_start = 0) { |
236 | 57 | if (!dst_column->is_nullable()) { |
237 | 0 | return; |
238 | 0 | } |
239 | | |
240 | 57 | dst_column = IColumn::mutate(std::move(dst_column)); |
241 | 57 | auto* dst_nullable = assert_cast<ColumnNullable*>(dst_column->assert_mutable().get()); |
242 | 57 | auto& dst_null_map = dst_nullable->get_null_map_column(); |
243 | 57 | const size_t expected_rows = old_null_map_size + new_rows; |
244 | 57 | if (dst_null_map.size() == expected_rows) { |
245 | 16 | return; |
246 | 16 | } |
247 | 57 | DCHECK_EQ(dst_null_map.size(), old_null_map_size); |
248 | 41 | if (src_column->is_nullable()) { |
249 | 41 | const auto* src_nullable = assert_cast<const ColumnNullable*>(src_column.get()); |
250 | 41 | DCHECK_GE(src_nullable->get_null_map_column().size(), src_null_map_start + new_rows); |
251 | 41 | dst_null_map.insert_range_from(src_nullable->get_null_map_column(), src_null_map_start, |
252 | 41 | new_rows); |
253 | 41 | } else { |
254 | 0 | dst_null_map.insert_many_vals(0, new_rows); |
255 | 0 | } |
256 | 41 | } |
257 | | |
258 | | struct FixedLengthPhysicalData { |
259 | | const uint8_t* data = nullptr; |
260 | | size_t byte_size = 0; |
261 | | size_t rows = 0; |
262 | | }; |
263 | | |
264 | | inline FixedLengthPhysicalData get_fixed_length_physical_data(const IColumn& column, |
265 | 4 | size_t type_length) { |
266 | 4 | if (const auto* fixed_length_column = check_and_get_column<ColumnFixedLengthObject>(column)) { |
267 | 2 | DCHECK_EQ(fixed_length_column->item_size(), type_length); |
268 | 2 | return {fixed_length_column->get_data().data(), fixed_length_column->byte_size(), |
269 | 2 | fixed_length_column->size()}; |
270 | 2 | } |
271 | | |
272 | 2 | const auto& uint8_column = assert_cast<const ColumnUInt8&>(column); |
273 | 2 | DCHECK_EQ(uint8_column.size() % type_length, 0); |
274 | 2 | return {uint8_column.get_data().data(), uint8_column.size(), uint8_column.size() / type_length}; |
275 | 4 | } |
276 | | |
277 | | /** |
278 | | * Convert parquet physical column to logical column |
279 | | * In parquet document(https://github.com/apache/parquet-format/blob/master/LogicalTypes.md), |
280 | | * Logical or converted type is the data type of column, physical type is the stored type of column chunk. |
281 | | * eg, decimal type can be stored as INT32, INT64, BYTE_ARRAY, FIXED_LENGTH_BYTE_ARRAY. |
282 | | * So there is a convert process from physical type to logical type. |
283 | | * In addition, Schema change will bring about a change in logical type. |
284 | | * |
285 | | * `PhysicalToLogicalConverter` strips away the conversion of logical type, and reuse `ColumnTypeConverter` |
286 | | * to resolve schema change, allowing parquet reader to only focus on the conversion of physical types. |
287 | | * |
288 | | * Therefore, tow layers converters are designed: |
289 | | * First, read parquet data with the physical type |
290 | | * Second, convert physical type to logical type |
291 | | * Third, convert logical type to the final type planned by FE(schema change) |
292 | | * |
293 | | * Ultimate performance optimization: |
294 | | * 1. If process of (First => Second) is consistent, eg. from BYTE_ARRAY to string, no additional copies and conversions will be introduced; |
295 | | * 2. If process of (Second => Third) is consistent, no additional copies and conversions will be introduced; |
296 | | * 3. Null maps are owned by each temporary nullable column, and only appended null slices are |
297 | | * copied between conversion stages; |
298 | | * 4. Only create one physical column in physical conversion, and reused in each loop; |
299 | | * 5. Only create one logical column in logical conversion, and reused in each loop; |
300 | | * 6. FIXED_LENGTH_BYTE_ARRAY is read as ColumnFixedLengthObject instead of ColumnString, so |
301 | | * the decoder can copy fixed-size values as a whole while keeping nullable row counts valid. |
302 | | */ |
303 | | class PhysicalToLogicalConverter { |
304 | | protected: |
305 | | ColumnPtr _cached_src_physical_column = nullptr; |
306 | | DataTypePtr _cached_src_physical_type = nullptr; |
307 | | std::unique_ptr<converter::ColumnTypeConverter> _logical_converter = nullptr; |
308 | | |
309 | | std::string _error_msg; |
310 | | |
311 | | std::unique_ptr<ConvertParams> _convert_params; |
312 | | |
313 | | public: |
314 | | static std::unique_ptr<PhysicalToLogicalConverter> get_converter( |
315 | | const FieldSchema* field_schema, DataTypePtr src_logical_type, |
316 | | const DataTypePtr& dst_logical_type, const cctz::time_zone* ctz, |
317 | | bool is_dict_filter = false); |
318 | | |
319 | | static bool is_parquet_native_type(PrimitiveType type); |
320 | | |
321 | | static bool is_decimal_type(PrimitiveType type); |
322 | | |
323 | 223 | PhysicalToLogicalConverter() = default; |
324 | 223 | virtual ~PhysicalToLogicalConverter() = default; |
325 | | |
326 | 5 | virtual Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) { |
327 | 5 | return Status::OK(); |
328 | 5 | } |
329 | | |
330 | | Status convert(ColumnPtr& src_physical_col, DataTypePtr src_logical_type, |
331 | | const DataTypePtr& dst_logical_type, ColumnPtr& dst_logical_col, |
332 | 321 | bool is_dict_filter) { |
333 | 321 | if (is_dict_filter) { |
334 | 0 | src_logical_type = DataTypeFactory::instance().create_data_type( |
335 | 0 | PrimitiveType::TYPE_INT, dst_logical_type->is_nullable()); |
336 | 0 | } |
337 | 321 | if (is_consistent() && _logical_converter->is_consistent()) { |
338 | 274 | dst_logical_col = std::move(src_physical_col); |
339 | 274 | return Status::OK(); |
340 | 274 | } |
341 | 47 | if (_logical_converter->is_consistent()) { |
342 | 39 | const size_t old_rows = get_mutable_inner_column_size(dst_logical_col); |
343 | 39 | const size_t old_null_map_size = |
344 | 39 | get_null_map_size_or_inner_column_size(dst_logical_col); |
345 | 39 | RETURN_IF_ERROR(physical_convert(src_physical_col, dst_logical_col)); |
346 | 39 | const size_t new_rows = get_mutable_inner_column_size(dst_logical_col) - old_rows; |
347 | 39 | align_null_map(src_physical_col, dst_logical_col, old_null_map_size, new_rows, |
348 | 39 | get_appended_null_map_start(src_physical_col, new_rows)); |
349 | 39 | return Status::OK(); |
350 | 39 | } |
351 | | |
352 | 8 | ColumnPtr src_logical_column; |
353 | 8 | if (is_consistent()) { |
354 | 5 | src_logical_column = src_physical_col; |
355 | 5 | } else { |
356 | 3 | src_logical_column = _logical_converter->get_column(src_logical_type, dst_logical_col, |
357 | 3 | dst_logical_type); |
358 | 3 | } |
359 | 8 | const size_t src_old_rows = get_mutable_inner_column_size(src_logical_column); |
360 | 8 | const size_t src_old_null_map_size = |
361 | 8 | get_null_map_size_or_inner_column_size(src_logical_column); |
362 | 8 | RETURN_IF_ERROR(physical_convert(src_physical_col, src_logical_column)); |
363 | 8 | const size_t src_new_rows = |
364 | 8 | get_mutable_inner_column_size(src_logical_column) - src_old_rows; |
365 | 8 | align_null_map(src_physical_col, src_logical_column, src_old_null_map_size, src_new_rows, |
366 | 8 | get_appended_null_map_start(src_physical_col, src_new_rows)); |
367 | | |
368 | 8 | dst_logical_col = IColumn::mutate(std::move(dst_logical_col)); |
369 | 8 | const size_t dst_old_rows = get_mutable_inner_column_size(dst_logical_col); |
370 | 8 | const size_t dst_old_null_map_size = |
371 | 8 | get_null_map_size_or_inner_column_size(dst_logical_col); |
372 | 8 | auto converted_column = dst_logical_col->assert_mutable(); |
373 | 8 | RETURN_IF_ERROR(_logical_converter->convert(src_logical_column, converted_column)); |
374 | 8 | const size_t dst_new_rows = get_mutable_inner_column_size(dst_logical_col) - dst_old_rows; |
375 | 8 | align_null_map(src_logical_column, dst_logical_col, dst_old_null_map_size, dst_new_rows, |
376 | 8 | get_appended_null_map_start(src_logical_column, dst_new_rows)); |
377 | 8 | return Status::OK(); |
378 | 8 | } |
379 | | |
380 | | virtual ColumnPtr get_physical_column(tparquet::Type::type src_physical_type, |
381 | | DataTypePtr src_logical_type, |
382 | | ColumnPtr& dst_logical_column, |
383 | | const DataTypePtr& dst_logical_type, bool is_dict_filter); |
384 | | |
385 | 319 | DataTypePtr& get_physical_type() { return _cached_src_physical_type; } |
386 | | |
387 | 319 | bool read_directly_into_dst_logical_column() { |
388 | 319 | return !_convert_params->is_type_compatibility && is_consistent() && |
389 | 319 | _logical_converter->is_consistent(); |
390 | 319 | } |
391 | | |
392 | 131 | virtual bool is_consistent() { return false; } |
393 | | |
394 | 372 | virtual bool support() { return true; } |
395 | | |
396 | 0 | std::string get_error_msg() { return _error_msg; } |
397 | | }; |
398 | | |
399 | | class ConsistentPhysicalConverter : public PhysicalToLogicalConverter { |
400 | 911 | bool is_consistent() override { return true; } |
401 | | }; |
402 | | |
403 | | class UnsupportedConverter : public PhysicalToLogicalConverter { |
404 | | public: |
405 | 0 | UnsupportedConverter(std::string error_msg) { _error_msg = error_msg; } |
406 | | |
407 | | UnsupportedConverter(tparquet::Type::type src_physical_type, |
408 | 0 | const DataTypePtr& src_logical_type) { |
409 | 0 | std::string src_physical_str = tparquet::to_string(src_physical_type); |
410 | 0 | std::string src_logical_str = src_logical_type->get_name(); |
411 | 0 | _error_msg = src_physical_str + " => " + src_logical_str; |
412 | 0 | } |
413 | | |
414 | 0 | bool support() override { return false; } |
415 | | |
416 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
417 | 0 | return Status::InternalError("Unsupported physical to logical type: {}", _error_msg); |
418 | 0 | } |
419 | | }; |
420 | | |
421 | | // for tinyint, smallint |
422 | | template <PrimitiveType IntPrimitiveType> |
423 | | class LittleIntPhysicalConverter : public PhysicalToLogicalConverter { |
424 | 28 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
425 | 28 | using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType; |
426 | 28 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; |
427 | 28 | ColumnPtr from_col = remove_nullable(src_physical_col); |
428 | 28 | IColumn* to_col = get_mutable_inner_column(src_logical_column); |
429 | | |
430 | 28 | size_t rows = from_col->size(); |
431 | | // always comes from tparquet::Type::INT32 |
432 | 28 | auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data(); |
433 | 28 | size_t start_idx = to_col->size(); |
434 | 28 | to_col->resize(start_idx + rows); |
435 | 28 | auto& data = assert_cast<DstColumnType&>(*to_col).get_data(); |
436 | 108 | for (int i = 0; i < rows; ++i) { |
437 | 80 | data[start_idx + i] = static_cast<DstCppType>(src_data[i]); |
438 | 80 | } |
439 | | |
440 | 28 | return Status::OK(); |
441 | 28 | } _ZN5doris7parquet26LittleIntPhysicalConverterILNS_13PrimitiveTypeE3EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 424 | 17 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 425 | 17 | using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType; | 426 | 17 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; | 427 | 17 | ColumnPtr from_col = remove_nullable(src_physical_col); | 428 | 17 | IColumn* to_col = get_mutable_inner_column(src_logical_column); | 429 | | | 430 | 17 | size_t rows = from_col->size(); | 431 | | // always comes from tparquet::Type::INT32 | 432 | 17 | auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data(); | 433 | 17 | size_t start_idx = to_col->size(); | 434 | 17 | to_col->resize(start_idx + rows); | 435 | 17 | auto& data = assert_cast<DstColumnType&>(*to_col).get_data(); | 436 | 59 | for (int i = 0; i < rows; ++i) { | 437 | 42 | data[start_idx + i] = static_cast<DstCppType>(src_data[i]); | 438 | 42 | } | 439 | | | 440 | 17 | return Status::OK(); | 441 | 17 | } |
_ZN5doris7parquet26LittleIntPhysicalConverterILNS_13PrimitiveTypeE4EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 424 | 11 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 425 | 11 | using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType; | 426 | 11 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; | 427 | 11 | ColumnPtr from_col = remove_nullable(src_physical_col); | 428 | 11 | IColumn* to_col = get_mutable_inner_column(src_logical_column); | 429 | | | 430 | 11 | size_t rows = from_col->size(); | 431 | | // always comes from tparquet::Type::INT32 | 432 | 11 | auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data(); | 433 | 11 | size_t start_idx = to_col->size(); | 434 | 11 | to_col->resize(start_idx + rows); | 435 | 11 | auto& data = assert_cast<DstColumnType&>(*to_col).get_data(); | 436 | 49 | for (int i = 0; i < rows; ++i) { | 437 | 38 | data[start_idx + i] = static_cast<DstCppType>(src_data[i]); | 438 | 38 | } | 439 | | | 440 | 11 | return Status::OK(); | 441 | 11 | } |
|
442 | | }; |
443 | | |
444 | | template <PrimitiveType type> |
445 | | struct UnsignedTypeTraits; |
446 | | |
447 | | template <> |
448 | | struct UnsignedTypeTraits<TYPE_SMALLINT> { |
449 | | using UnsignedCppType = UInt8; |
450 | | //https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#unsigned-integers |
451 | | //INT(8, false), INT(16, false), and INT(32, false) must annotate an int32 primitive type and INT(64, false) |
452 | | //must annotate an int64 primitive type. |
453 | | using StorageCppType = Int32; |
454 | | using StorageColumnType = ColumnInt32; |
455 | | }; |
456 | | |
457 | | template <> |
458 | | struct UnsignedTypeTraits<TYPE_INT> { |
459 | | using UnsignedCppType = UInt16; |
460 | | using StorageCppType = Int32; |
461 | | using StorageColumnType = ColumnInt32; |
462 | | }; |
463 | | |
464 | | template <> |
465 | | struct UnsignedTypeTraits<TYPE_BIGINT> { |
466 | | using UnsignedCppType = UInt32; |
467 | | using StorageCppType = Int32; |
468 | | using StorageColumnType = ColumnInt32; |
469 | | }; |
470 | | |
471 | | template <> |
472 | | struct UnsignedTypeTraits<TYPE_LARGEINT> { |
473 | | using UnsignedCppType = UInt64; |
474 | | using StorageCppType = Int64; |
475 | | using StorageColumnType = ColumnInt64; |
476 | | }; |
477 | | |
478 | | template <PrimitiveType IntPrimitiveType> |
479 | | class UnsignedIntegerConverter : public PhysicalToLogicalConverter { |
480 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
481 | 0 | using UnsignedCppType = typename UnsignedTypeTraits<IntPrimitiveType>::UnsignedCppType; |
482 | 0 | using StorageCppType = typename UnsignedTypeTraits<IntPrimitiveType>::StorageCppType; |
483 | 0 | using StorageColumnType = typename UnsignedTypeTraits<IntPrimitiveType>::StorageColumnType; |
484 | 0 | using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType; |
485 | |
|
486 | 0 | ColumnPtr from_col = remove_nullable(src_physical_col); |
487 | 0 | IColumn* to_col = get_mutable_inner_column(src_logical_column); |
488 | 0 | auto& src_data = assert_cast<const StorageColumnType*>(from_col.get())->get_data(); |
489 | |
|
490 | 0 | size_t rows = src_data.size(); |
491 | 0 | size_t start_idx = to_col->size(); |
492 | 0 | to_col->resize(start_idx + rows); |
493 | 0 | auto& data = assert_cast<DstColumnType&>(*to_col).get_data(); |
494 | |
|
495 | 0 | for (int i = 0; i < rows; i++) { |
496 | 0 | StorageCppType src_value = src_data[i]; |
497 | 0 | auto unsigned_value = static_cast<UnsignedCppType>(src_value); |
498 | 0 | data[start_idx + i] = unsigned_value; |
499 | 0 | } |
500 | |
|
501 | 0 | return Status::OK(); |
502 | 0 | } Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE4EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE5EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE6EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE7EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
503 | | }; |
504 | | |
505 | | class FixedSizeBinaryConverter : public PhysicalToLogicalConverter { |
506 | | private: |
507 | | int _type_length; |
508 | | |
509 | | public: |
510 | 1 | FixedSizeBinaryConverter(int type_length) : _type_length(type_length) {} |
511 | | |
512 | 1 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
513 | 1 | ColumnPtr from_col = remove_nullable(src_physical_col); |
514 | 1 | IColumn* to_col = get_mutable_inner_column(src_logical_column); |
515 | | |
516 | 1 | const auto src_data = get_fixed_length_physical_data(*from_col, _type_length); |
517 | 1 | size_t length = src_data.byte_size; |
518 | 1 | size_t num_values = src_data.rows; |
519 | 1 | auto& string_col = static_cast<ColumnString&>(*to_col); |
520 | 1 | auto& offsets = string_col.get_offsets(); |
521 | 1 | auto& chars = string_col.get_chars(); |
522 | | |
523 | 1 | size_t origin_size = chars.size(); |
524 | 1 | chars.resize(origin_size + length); |
525 | 1 | memcpy(chars.data() + origin_size, src_data.data, length); |
526 | | |
527 | 1 | origin_size = offsets.size(); |
528 | 1 | offsets.resize(origin_size + num_values); |
529 | 1 | auto end_offset = offsets[origin_size - 1]; |
530 | 4 | for (int i = 0; i < num_values; ++i) { |
531 | 3 | end_offset += _type_length; |
532 | 3 | offsets[origin_size + i] = end_offset; |
533 | 3 | } |
534 | | |
535 | 1 | return Status::OK(); |
536 | 1 | } |
537 | | }; |
538 | | |
539 | | class Float16PhysicalConverter : public PhysicalToLogicalConverter { |
540 | | private: |
541 | | int _type_length; |
542 | | |
543 | | public: |
544 | 0 | Float16PhysicalConverter(int type_length) : _type_length(type_length) { |
545 | 0 | DCHECK_EQ(_type_length, 2); |
546 | 0 | } |
547 | | |
548 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
549 | 0 | ColumnPtr from_col = remove_nullable(src_physical_col); |
550 | 0 | IColumn* to_col = get_mutable_inner_column(src_logical_column); |
551 | |
|
552 | 0 | const auto src_data = get_fixed_length_physical_data(*from_col, _type_length); |
553 | 0 | size_t num_values = src_data.rows; |
554 | 0 | auto* to_float_column = assert_cast<ColumnFloat32*>(to_col); |
555 | 0 | size_t start_idx = to_float_column->size(); |
556 | 0 | to_float_column->resize(start_idx + num_values); |
557 | 0 | auto& to_float_column_data = to_float_column->get_data(); |
558 | 0 | const auto* ptr = src_data.data; |
559 | 0 | for (int i = 0; i < num_values; ++i) { |
560 | 0 | size_t offset = i * _type_length; |
561 | 0 | const auto* data_ptr = ptr + offset; |
562 | 0 | uint16_t raw; |
563 | 0 | memcpy(&raw, data_ptr, sizeof(uint16_t)); |
564 | 0 | float value = half_to_float(raw); |
565 | 0 | to_float_column_data[start_idx + i] = value; |
566 | 0 | } |
567 | |
|
568 | 0 | return Status::OK(); |
569 | 0 | } |
570 | | |
571 | 0 | float half_to_float(uint16_t h) { |
572 | | // uint16_t h: half precision floating point |
573 | | // bit 15: sign(1 bit) |
574 | | // bits 14..10 : exponent(5 bits) |
575 | | // bits 9..0 : mantissa(10 bits) |
576 | | |
577 | | // sign bit placed to float32 bit31 |
578 | 0 | uint32_t sign = (h & 0x8000U) << 16; // 0x8000 << 16 = 0x8000_0000 |
579 | | // exponent:(5 bits) |
580 | 0 | uint32_t exp = (h & 0x7C00U) >> 10; // 0x7C00 = 0111 1100 0000 (half exponent mask) |
581 | | // mantissa(10 bits) |
582 | 0 | uint32_t mant = (h & 0x03FFU); // 10-bit fraction |
583 | | |
584 | | // cases:Zero/Subnormal, Normal, Inf/NaN |
585 | 0 | if (exp == 0) { |
586 | | // exp==0: Zero or Subnormal ---------- |
587 | 0 | if (mant == 0) { |
588 | | // ±0.0 |
589 | | // sign = either 0x00000000 or 0x80000000 |
590 | 0 | return std::bit_cast<float>(sign); |
591 | 0 | } else { |
592 | | // ---------- Subnormal ---------- |
593 | | // half subnormal: |
594 | | // value = (-1)^sign * (mant / 2^10) * 2^(1 - bias) |
595 | | // half bias = 15 → exponent = 1 - 15 = -14 |
596 | 0 | float f = (static_cast<float>(mant) / 1024.0F) * std::powf(2.0F, -14.0F); |
597 | 0 | return sign ? -f : f; |
598 | 0 | } |
599 | 0 | } else if (exp == 0x1F) { |
600 | | // exp==31: Inf or NaN ---------- |
601 | | // float32: |
602 | | // exponent = 255 (0xFF) |
603 | | // mantissa = mant << 13 |
604 | 0 | uint32_t f = sign | 0x7F800000U | (mant << 13); |
605 | 0 | return std::bit_cast<float>(f); |
606 | 0 | } else { |
607 | | // Normalized ---------- |
608 | | // float32 exponent: |
609 | | // exp32 = exp16 - bias16 + bias32 |
610 | | // bias16 = 15 |
611 | | // bias32 = 127 |
612 | | // |
613 | | // so: exp32 = exp + (127 - 15) |
614 | 0 | uint32_t f = sign | ((exp + (127 - 15)) << 23) // place to float32 exponent |
615 | 0 | | (mant << 13); // mantissa align to 23 bits |
616 | 0 | return std::bit_cast<float>(f); |
617 | 0 | } |
618 | 0 | } |
619 | | }; |
620 | | |
621 | | class UUIDVarBinaryConverter : public PhysicalToLogicalConverter { |
622 | | public: |
623 | 1 | UUIDVarBinaryConverter(int type_length) : _type_length(type_length) {} |
624 | | |
625 | 1 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
626 | 1 | DCHECK(!is_column_const(*src_physical_col)) << src_physical_col->dump_structure(); |
627 | 1 | DCHECK(!is_column_const(*src_logical_column)) << src_logical_column->dump_structure(); |
628 | 1 | const ColumnPtr from_col = remove_nullable(src_physical_col); |
629 | 1 | const auto src_data = get_fixed_length_physical_data(*from_col, _type_length); |
630 | | |
631 | 1 | IColumn* to_col = get_mutable_inner_column(src_logical_column); |
632 | 1 | auto* to_varbinary_column = assert_cast<ColumnVarbinary*>(to_col); |
633 | 1 | size_t num_values = src_data.rows; |
634 | 1 | const auto* ptr = src_data.data; |
635 | | |
636 | 4 | for (int i = 0; i < num_values; ++i) { |
637 | 3 | auto offset = i * _type_length; |
638 | 3 | const char* data_ptr = reinterpret_cast<const char*>(ptr + offset); |
639 | 3 | to_varbinary_column->insert_data(data_ptr, _type_length); |
640 | 3 | } |
641 | 1 | return Status::OK(); |
642 | 1 | } |
643 | | |
644 | | private: |
645 | | int _type_length; |
646 | | }; |
647 | | |
648 | | template <PrimitiveType DecimalPType> |
649 | | class FixedSizeToDecimal : public PhysicalToLogicalConverter { |
650 | | public: |
651 | | using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType; |
652 | 2 | FixedSizeToDecimal(int32_t type_length) : _type_length(type_length) {}Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EEC2Ei _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EEC2Ei Line | Count | Source | 652 | 2 | FixedSizeToDecimal(int32_t type_length) : _type_length(type_length) {} |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EEC2Ei Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EEC2Ei |
653 | | |
654 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
655 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); |
656 | 2 | IColumn* dst_col = get_mutable_inner_column(src_logical_column); |
657 | | |
658 | 2 | #define M(FixedTypeLength, ValueCopyType) \ |
659 | 2 | case FixedTypeLength: \ |
660 | 2 | return _convert_internal<FixedTypeLength, ValueCopyType>(src_col, dst_col); |
661 | | |
662 | 2 | #define APPLY_FOR_DECIMALS() \ |
663 | 2 | M(1, int64_t) \ |
664 | 0 | M(2, int64_t) \ |
665 | 0 | M(3, int64_t) \ |
666 | 0 | M(4, int64_t) \ |
667 | 1 | M(5, int64_t) \ |
668 | 1 | M(6, int64_t) \ |
669 | 0 | M(7, int64_t) \ |
670 | 1 | M(8, int64_t) \ |
671 | 1 | M(9, int128_t) \ |
672 | 0 | M(10, int128_t) \ |
673 | 0 | M(11, int128_t) \ |
674 | 0 | M(12, int128_t) \ |
675 | 0 | M(13, int128_t) \ |
676 | 0 | M(14, int128_t) \ |
677 | 0 | M(15, int128_t) \ |
678 | 0 | M(16, int128_t) \ |
679 | 0 | M(17, wide::Int256) \ |
680 | 0 | M(18, wide::Int256) \ |
681 | 0 | M(19, wide::Int256) \ |
682 | 0 | M(20, wide::Int256) \ |
683 | 0 | M(21, wide::Int256) \ |
684 | 0 | M(22, wide::Int256) \ |
685 | 0 | M(23, wide::Int256) \ |
686 | 0 | M(24, wide::Int256) \ |
687 | 0 | M(25, wide::Int256) \ |
688 | 0 | M(26, wide::Int256) \ |
689 | 0 | M(27, wide::Int256) \ |
690 | 0 | M(28, wide::Int256) \ |
691 | 0 | M(29, wide::Int256) \ |
692 | 0 | M(30, wide::Int256) \ |
693 | 0 | M(31, wide::Int256) \ |
694 | 0 | M(32, wide::Int256) |
695 | | |
696 | 2 | switch (_type_length) { |
697 | 0 | APPLY_FOR_DECIMALS() |
698 | 0 | default: |
699 | 0 | throw Exception(Status::FatalError("__builtin_unreachable")); |
700 | 2 | } |
701 | 0 | return Status::OK(); |
702 | 2 | #undef APPLY_FOR_DECIMALS |
703 | 2 | #undef M |
704 | 2 | } Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 654 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 655 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); | 656 | 2 | IColumn* dst_col = get_mutable_inner_column(src_logical_column); | 657 | | | 658 | 2 | #define M(FixedTypeLength, ValueCopyType) \ | 659 | 2 | case FixedTypeLength: \ | 660 | 2 | return _convert_internal<FixedTypeLength, ValueCopyType>(src_col, dst_col); | 661 | | | 662 | 2 | #define APPLY_FOR_DECIMALS() \ | 663 | 2 | M(1, int64_t) \ | 664 | 2 | M(2, int64_t) \ | 665 | 2 | M(3, int64_t) \ | 666 | 2 | M(4, int64_t) \ | 667 | 2 | M(5, int64_t) \ | 668 | 2 | M(6, int64_t) \ | 669 | 2 | M(7, int64_t) \ | 670 | 2 | M(8, int64_t) \ | 671 | 2 | M(9, int128_t) \ | 672 | 2 | M(10, int128_t) \ | 673 | 2 | M(11, int128_t) \ | 674 | 2 | M(12, int128_t) \ | 675 | 2 | M(13, int128_t) \ | 676 | 2 | M(14, int128_t) \ | 677 | 2 | M(15, int128_t) \ | 678 | 2 | M(16, int128_t) \ | 679 | 2 | M(17, wide::Int256) \ | 680 | 2 | M(18, wide::Int256) \ | 681 | 2 | M(19, wide::Int256) \ | 682 | 2 | M(20, wide::Int256) \ | 683 | 2 | M(21, wide::Int256) \ | 684 | 2 | M(22, wide::Int256) \ | 685 | 2 | M(23, wide::Int256) \ | 686 | 2 | M(24, wide::Int256) \ | 687 | 2 | M(25, wide::Int256) \ | 688 | 2 | M(26, wide::Int256) \ | 689 | 2 | M(27, wide::Int256) \ | 690 | 2 | M(28, wide::Int256) \ | 691 | 2 | M(29, wide::Int256) \ | 692 | 2 | M(30, wide::Int256) \ | 693 | 2 | M(31, wide::Int256) \ | 694 | 2 | M(32, wide::Int256) | 695 | | | 696 | 2 | switch (_type_length) { | 697 | 0 | APPLY_FOR_DECIMALS() | 698 | 0 | default: | 699 | 0 | throw Exception(Status::FatalError("__builtin_unreachable")); | 700 | 2 | } | 701 | 0 | return Status::OK(); | 702 | 2 | #undef APPLY_FOR_DECIMALS | 703 | 2 | #undef M | 704 | 2 | } |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
705 | | |
706 | | template <int fixed_type_length, typename ValueCopyType> |
707 | 2 | Status _convert_internal(ColumnPtr& src_col, IColumn* dst_col) { |
708 | 2 | const auto src_data = get_fixed_length_physical_data(*src_col, fixed_type_length); |
709 | 2 | size_t rows = src_data.rows; |
710 | 2 | const auto* buf = src_data.data; |
711 | 2 | size_t start_idx = dst_col->size(); |
712 | 2 | dst_col->resize(start_idx + rows); |
713 | | |
714 | 2 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col)->get_data(); |
715 | 2 | size_t offset = 0; |
716 | 6 | for (int i = 0; i < rows; i++) { |
717 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, |
718 | | // the unscaled number must be encoded as two's complement using big-endian byte order. |
719 | 4 | ValueCopyType value = 0; |
720 | 4 | memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value)); |
721 | 4 | offset += fixed_type_length; |
722 | 4 | value = to_endian<std::endian::big>(value); |
723 | 4 | value = value >> ((sizeof(value) - fixed_type_length) * 8); |
724 | 4 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); |
725 | 4 | v = (DecimalType)value; |
726 | 4 | } |
727 | | |
728 | 2 | return Status::OK(); |
729 | 2 | } Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Line | Count | Source | 707 | 1 | Status _convert_internal(ColumnPtr& src_col, IColumn* dst_col) { | 708 | 1 | const auto src_data = get_fixed_length_physical_data(*src_col, fixed_type_length); | 709 | 1 | size_t rows = src_data.rows; | 710 | 1 | const auto* buf = src_data.data; | 711 | 1 | size_t start_idx = dst_col->size(); | 712 | 1 | dst_col->resize(start_idx + rows); | 713 | | | 714 | 1 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col)->get_data(); | 715 | 1 | size_t offset = 0; | 716 | 3 | for (int i = 0; i < rows; i++) { | 717 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, | 718 | | // the unscaled number must be encoded as two's complement using big-endian byte order. | 719 | 2 | ValueCopyType value = 0; | 720 | 2 | memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value)); | 721 | 2 | offset += fixed_type_length; | 722 | 2 | value = to_endian<std::endian::big>(value); | 723 | 2 | value = value >> ((sizeof(value) - fixed_type_length) * 8); | 724 | 2 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); | 725 | 2 | v = (DecimalType)value; | 726 | 2 | } | 727 | | | 728 | 1 | return Status::OK(); | 729 | 1 | } |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Line | Count | Source | 707 | 1 | Status _convert_internal(ColumnPtr& src_col, IColumn* dst_col) { | 708 | 1 | const auto src_data = get_fixed_length_physical_data(*src_col, fixed_type_length); | 709 | 1 | size_t rows = src_data.rows; | 710 | 1 | const auto* buf = src_data.data; | 711 | 1 | size_t start_idx = dst_col->size(); | 712 | 1 | dst_col->resize(start_idx + rows); | 713 | | | 714 | 1 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col)->get_data(); | 715 | 1 | size_t offset = 0; | 716 | 3 | for (int i = 0; i < rows; i++) { | 717 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, | 718 | | // the unscaled number must be encoded as two's complement using big-endian byte order. | 719 | 2 | ValueCopyType value = 0; | 720 | 2 | memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value)); | 721 | 2 | offset += fixed_type_length; | 722 | 2 | value = to_endian<std::endian::big>(value); | 723 | 2 | value = value >> ((sizeof(value) - fixed_type_length) * 8); | 724 | 2 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); | 725 | 2 | v = (DecimalType)value; | 726 | 2 | } | 727 | | | 728 | 1 | return Status::OK(); | 729 | 1 | } |
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_ |
730 | | |
731 | | private: |
732 | | int32_t _type_length; |
733 | | }; |
734 | | |
735 | | template <PrimitiveType DecimalPType> |
736 | | class StringToDecimal : public PhysicalToLogicalConverter { |
737 | | using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType; |
738 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
739 | 0 | using ValueCopyType = DecimalType::NativeType; |
740 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
741 | 0 | IColumn* dst_col = get_mutable_inner_column(src_logical_column); |
742 | |
|
743 | 0 | size_t rows = src_col->size(); |
744 | 0 | auto buf = static_cast<const ColumnString*>(src_col.get())->get_chars().data(); |
745 | 0 | auto& offset = static_cast<const ColumnString*>(src_col.get())->get_offsets(); |
746 | 0 | size_t start_idx = dst_col->size(); |
747 | 0 | dst_col->resize(start_idx + rows); |
748 | |
|
749 | 0 | auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col)->get_data(); |
750 | 0 | for (int i = 0; i < rows; i++) { |
751 | 0 | size_t len = offset[i] - offset[i - 1]; |
752 | | // When Decimal in parquet is stored in byte arrays, binary and fixed, |
753 | | // the unscaled number must be encoded as two's complement using big-endian byte order. |
754 | 0 | ValueCopyType value = 0; |
755 | 0 | if (len > 0) { |
756 | 0 | memcpy(reinterpret_cast<char*>(&value), buf + offset[i - 1], len); |
757 | 0 | value = to_endian<std::endian::big>(value); |
758 | 0 | value = value >> ((sizeof(value) - len) * 8); |
759 | 0 | } |
760 | 0 | auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]); |
761 | 0 | v = (DecimalType)value; |
762 | 0 | } |
763 | |
|
764 | 0 | return Status::OK(); |
765 | 0 | } Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
766 | | }; |
767 | | |
768 | | template <PrimitiveType NumberType, PrimitiveType DecimalPType> |
769 | | class NumberToDecimal : public PhysicalToLogicalConverter { |
770 | | using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType; |
771 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
772 | 2 | using ValueCopyType = typename DecimalType::NativeType; |
773 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); |
774 | 2 | IColumn* dst_col = get_mutable_inner_column(src_logical_column); |
775 | | |
776 | 2 | size_t rows = src_col->size(); |
777 | 2 | auto* src_data = |
778 | 2 | static_cast<const ColumnVector<NumberType>*>(src_col.get())->get_data().data(); |
779 | 2 | size_t start_idx = dst_col->size(); |
780 | 2 | dst_col->resize(start_idx + rows); |
781 | | |
782 | 2 | auto* data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col)->get_data().data(); |
783 | | |
784 | 24 | for (int i = 0; i < rows; i++) { |
785 | 22 | ValueCopyType value; |
786 | 22 | if constexpr (std::is_same_v<DecimalType, Decimal256>) { |
787 | 0 | value = src_data[i]; |
788 | 22 | } else { |
789 | 22 | value = cast_set<ValueCopyType, typename PrimitiveTypeTraits<NumberType>::CppType, |
790 | 22 | false>(src_data[i]); |
791 | 22 | } |
792 | | |
793 | 22 | data[start_idx + i] = (DecimalType)value; |
794 | 22 | } |
795 | 2 | return Status::OK(); |
796 | 2 | } Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Line | Count | Source | 771 | 2 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { | 772 | 2 | using ValueCopyType = typename DecimalType::NativeType; | 773 | 2 | ColumnPtr src_col = remove_nullable(src_physical_col); | 774 | 2 | IColumn* dst_col = get_mutable_inner_column(src_logical_column); | 775 | | | 776 | 2 | size_t rows = src_col->size(); | 777 | 2 | auto* src_data = | 778 | 2 | static_cast<const ColumnVector<NumberType>*>(src_col.get())->get_data().data(); | 779 | 2 | size_t start_idx = dst_col->size(); | 780 | 2 | dst_col->resize(start_idx + rows); | 781 | | | 782 | 2 | auto* data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col)->get_data().data(); | 783 | | | 784 | 24 | for (int i = 0; i < rows; i++) { | 785 | 22 | ValueCopyType value; | 786 | | if constexpr (std::is_same_v<DecimalType, Decimal256>) { | 787 | | value = src_data[i]; | 788 | 22 | } else { | 789 | 22 | value = cast_set<ValueCopyType, typename PrimitiveTypeTraits<NumberType>::CppType, | 790 | 22 | false>(src_data[i]); | 791 | 22 | } | 792 | | | 793 | 22 | data[start_idx + i] = (DecimalType)value; | 794 | 22 | } | 795 | 2 | return Status::OK(); | 796 | 2 | } |
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_ |
797 | | }; |
798 | | |
799 | | class Int32ToDate : public PhysicalToLogicalConverter { |
800 | 7 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
801 | 7 | ColumnPtr src_col = remove_nullable(src_physical_col); |
802 | 7 | IColumn* dst_col = get_mutable_inner_column(src_logical_column); |
803 | | |
804 | 7 | size_t rows = src_col->size(); |
805 | 7 | size_t start_idx = dst_col->size(); |
806 | 7 | dst_col->reserve(start_idx + rows); |
807 | | |
808 | 7 | auto& src_data = static_cast<const ColumnInt32*>(src_col.get())->get_data(); |
809 | 7 | auto& data = static_cast<ColumnDateV2*>(dst_col)->get_data(); |
810 | 7 | date_day_offset_dict& date_dict = date_day_offset_dict::get(); |
811 | | |
812 | 59 | for (int i = 0; i < rows; i++) { |
813 | 52 | data.push_back_without_reserve(date_dict[src_data[i]].to_date_int_val()); |
814 | 52 | } |
815 | | |
816 | 7 | return Status::OK(); |
817 | 7 | } |
818 | | }; |
819 | | |
820 | | struct Int64ToTimestamp : public PhysicalToLogicalConverter { |
821 | 5 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
822 | 5 | ColumnPtr src_col = remove_nullable(src_physical_col); |
823 | 5 | IColumn* dst_col = get_mutable_inner_column(src_logical_column); |
824 | | |
825 | 5 | size_t rows = src_col->size(); |
826 | 5 | size_t start_idx = dst_col->size(); |
827 | 5 | dst_col->resize(start_idx + rows); |
828 | | |
829 | 5 | auto src_data = static_cast<const ColumnInt64*>(src_col.get())->get_data().data(); |
830 | 5 | auto& data = static_cast<ColumnDateTimeV2*>(dst_col)->get_data(); |
831 | | |
832 | 51 | for (int i = 0; i < rows; i++) { |
833 | 46 | int64_t x = src_data[i]; |
834 | 46 | auto& num = data[start_idx + i]; |
835 | 46 | auto& value = reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(num); |
836 | 46 | const int64_t epoch_seconds = x / _convert_params->second_mask; |
837 | 46 | if (_convert_params->is_fixed_offset) { |
838 | 46 | if (!detail::try_convert_timestamp_with_fixed_offset( |
839 | 46 | value, epoch_seconds, _convert_params->fixed_offset_seconds)) { |
840 | 0 | value.from_unixtime(epoch_seconds, *_convert_params->ctz); |
841 | 0 | } |
842 | 46 | } else if (!detail::try_convert_timestamp_with_lookup(value, epoch_seconds, |
843 | 0 | *_convert_params->ctz)) { |
844 | 0 | value.from_unixtime(epoch_seconds, *_convert_params->ctz); |
845 | 0 | } |
846 | 46 | value.set_microsecond((x % _convert_params->second_mask) * |
847 | 46 | (_convert_params->scale_to_nano_factor / 1000)); |
848 | 46 | } |
849 | 5 | return Status::OK(); |
850 | 5 | } |
851 | | }; |
852 | | |
853 | | struct Int64ToTimestampTz : public PhysicalToLogicalConverter { |
854 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
855 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
856 | 0 | IColumn* dst_col = get_mutable_inner_column(src_logical_column); |
857 | |
|
858 | 0 | size_t rows = src_col->size(); |
859 | 0 | size_t start_idx = dst_col->size(); |
860 | 0 | dst_col->resize(start_idx + rows); |
861 | |
|
862 | 0 | const auto& src_data = assert_cast<const ColumnInt64*>(src_col.get())->get_data(); |
863 | 0 | auto& dest_data = assert_cast<ColumnTimeStampTz*>(dst_col)->get_data(); |
864 | 0 | static const cctz::time_zone UTC = cctz::utc_time_zone(); |
865 | |
|
866 | 0 | for (int i = 0; i < rows; i++) { |
867 | 0 | int64_t x = src_data[i]; |
868 | 0 | auto& tz = dest_data[start_idx + i]; |
869 | 0 | tz.from_unixtime(x / _convert_params->second_mask, UTC); |
870 | 0 | tz.set_microsecond((x % _convert_params->second_mask) * |
871 | 0 | (_convert_params->scale_to_nano_factor / 1000)); |
872 | 0 | } |
873 | 0 | return Status::OK(); |
874 | 0 | } |
875 | | }; |
876 | | |
877 | | struct Int96toTimestamp : public PhysicalToLogicalConverter { |
878 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
879 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
880 | 0 | IColumn* dst_col = get_mutable_inner_column(src_logical_column); |
881 | |
|
882 | 0 | size_t rows = src_col->size() / sizeof(ParquetInt96); |
883 | 0 | auto& src_data = static_cast<const ColumnInt8*>(src_col.get())->get_data(); |
884 | 0 | auto ParquetInt96_data = (ParquetInt96*)src_data.data(); |
885 | 0 | size_t start_idx = dst_col->size(); |
886 | 0 | dst_col->resize(start_idx + rows); |
887 | 0 | auto& data = static_cast<ColumnDateTimeV2*>(dst_col)->get_data(); |
888 | |
|
889 | 0 | for (int i = 0; i < rows; i++) { |
890 | 0 | ParquetInt96 src_cell_data = ParquetInt96_data[i]; |
891 | 0 | auto& dst_value = |
892 | 0 | reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(data[start_idx + i]); |
893 | |
|
894 | 0 | int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros(); |
895 | 0 | const int64_t epoch_seconds = timestamp_with_micros / 1000000; |
896 | 0 | if (_convert_params->is_fixed_offset) { |
897 | 0 | if (!detail::try_convert_timestamp_with_fixed_offset( |
898 | 0 | dst_value, epoch_seconds, _convert_params->fixed_offset_seconds)) { |
899 | 0 | dst_value.from_unixtime(epoch_seconds, *_convert_params->ctz); |
900 | 0 | } |
901 | 0 | } else if (!detail::try_convert_timestamp_with_lookup(dst_value, epoch_seconds, |
902 | 0 | *_convert_params->ctz)) { |
903 | 0 | dst_value.from_unixtime(epoch_seconds, *_convert_params->ctz); |
904 | 0 | } |
905 | 0 | dst_value.set_microsecond(timestamp_with_micros % 1000000); |
906 | 0 | } |
907 | 0 | return Status::OK(); |
908 | 0 | } |
909 | | }; |
910 | | |
911 | | struct Int96toTimestampTz : public PhysicalToLogicalConverter { |
912 | 0 | Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { |
913 | 0 | ColumnPtr src_col = remove_nullable(src_physical_col); |
914 | 0 | IColumn* dst_col = get_mutable_inner_column(src_logical_column); |
915 | |
|
916 | 0 | size_t rows = src_col->size() / sizeof(ParquetInt96); |
917 | 0 | const auto& src_data = assert_cast<const ColumnInt8*>(src_col.get())->get_data(); |
918 | 0 | auto* ParquetInt96_data = (ParquetInt96*)src_data.data(); |
919 | 0 | size_t start_idx = dst_col->size(); |
920 | 0 | dst_col->resize(start_idx + rows); |
921 | 0 | auto& data = assert_cast<ColumnTimeStampTz*>(dst_col)->get_data(); |
922 | 0 | static const cctz::time_zone UTC = cctz::utc_time_zone(); |
923 | |
|
924 | 0 | for (int i = 0; i < rows; i++) { |
925 | 0 | ParquetInt96 src_cell_data = ParquetInt96_data[i]; |
926 | 0 | int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros(); |
927 | 0 | auto& tz = data[start_idx + i]; |
928 | 0 | tz.from_unixtime(timestamp_with_micros / 1000000, UTC); |
929 | 0 | tz.set_microsecond(timestamp_with_micros % 1000000); |
930 | 0 | } |
931 | 0 | return Status::OK(); |
932 | 0 | } |
933 | | }; |
934 | | |
935 | | } // namespace doris::parquet |