Coverage Report

Created: 2026-04-27 23:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/parquet/parquet_column_convert.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <cctz/time_zone.h>
21
#include <gen_cpp/parquet_types.h>
22
#include <libdivide.h>
23
24
#include <chrono>
25
#include <limits>
26
27
#include "common/cast_set.h"
28
#include "core/column/column_varbinary.h"
29
#include "core/data_type/data_type_factory.hpp"
30
#include "core/data_type/primitive_type.h"
31
#include "core/extended_types.h"
32
#include "core/field.h"
33
#include "core/types.h"
34
#include "format/column_type_convert.h"
35
#include "format/format_common.h"
36
#include "format/parquet/decoder.h"
37
#include "format/parquet/parquet_common.h"
38
#include "format/parquet/schema_desc.h"
39
#include "util/timezone_utils.h"
40
41
namespace doris::parquet {
42
namespace detail {
43
44
inline bool try_split_local_time(int64_t local_time, uint16_t* year, uint8_t* month, uint8_t* day,
45
62
                                 uint8_t* hour, uint8_t* minute, uint8_t* second) {
46
62
    static const libdivide::divider<int64_t> fast_div_86400(86400);
47
62
    static const libdivide::divider<int64_t> fast_div_3600(3600);
48
62
    static const libdivide::divider<int64_t> fast_div_60(60);
49
62
    static constexpr int64_t kMinSupportedDays = -365LL * 10000;
50
62
    static constexpr int64_t kMaxSupportedDays = 365LL * 10000;
51
52
62
    int64_t days = local_time / fast_div_86400;
53
62
    int64_t second_of_day = local_time - days * 86400;
54
62
    if (second_of_day < 0) {
55
3
        second_of_day += 86400;
56
3
        --days;
57
3
    }
58
62
    if (days < kMinSupportedDays || days > kMaxSupportedDays) {
59
0
        return false;
60
0
    }
61
62
62
    const auto ymd = std::chrono::year_month_day {std::chrono::sys_days {std::chrono::days {days}}};
63
62
    const int y = static_cast<int>(ymd.year());
64
62
    if (y < 0 || y > std::numeric_limits<uint16_t>::max()) {
65
0
        return false;
66
0
    }
67
68
62
    const int64_t h = second_of_day / fast_div_3600;
69
62
    const int64_t minute_second = second_of_day - h * 3600;
70
62
    const int64_t m = minute_second / fast_div_60;
71
62
    const int64_t s = minute_second - m * 60;
72
73
62
    *year = static_cast<uint16_t>(y);
74
62
    *month = static_cast<uint8_t>(static_cast<unsigned>(ymd.month()));
75
62
    *day = static_cast<uint8_t>(static_cast<unsigned>(ymd.day()));
76
62
    *hour = static_cast<uint8_t>(h);
77
62
    *minute = static_cast<uint8_t>(m);
78
62
    *second = static_cast<uint8_t>(s);
79
62
    return true;
80
62
}
81
82
template <typename DateType>
83
inline bool try_convert_timestamp_with_fixed_offset(DateType& value, int64_t epoch_seconds,
84
62
                                                    int32_t offset_seconds) {
85
62
    uint16_t year = 0;
86
62
    uint8_t month = 0;
87
62
    uint8_t day = 0;
88
62
    uint8_t hour = 0;
89
62
    uint8_t minute = 0;
90
62
    uint8_t second = 0;
91
62
    if (!try_split_local_time(epoch_seconds + offset_seconds, &year, &month, &day, &hour, &minute,
92
62
                              &second)) {
93
0
        return false;
94
0
    }
95
    // The caller sets sub-second precision immediately after this conversion.
96
62
    value.unchecked_set_time(year, month, day, hour, minute, second, 0);
97
62
    return true;
98
62
}
99
100
template <typename DateType>
101
inline bool try_convert_timestamp_with_lookup(DateType& value, int64_t epoch_seconds,
102
10
                                              const cctz::time_zone& ctz) {
103
10
    static const auto epoch = std::chrono::time_point_cast<cctz::sys_seconds>(
104
10
            std::chrono::system_clock::from_time_t(0));
105
10
    cctz::time_point<cctz::sys_seconds> t = epoch + cctz::seconds(epoch_seconds);
106
10
    const int32_t offset = ctz.lookup_offset(t).offset;
107
10
    return try_convert_timestamp_with_fixed_offset(value, epoch_seconds, offset);
108
10
}
109
110
} // namespace detail
111
112
struct ConvertParams {
113
    // schema.logicalType.TIMESTAMP.isAdjustedToUTC == false
114
    static const cctz::time_zone utc0;
115
    // schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set local time zone
116
    const cctz::time_zone* ctz = nullptr;
117
    bool is_fixed_offset = false;
118
    int32_t fixed_offset_seconds = 0;
119
    int64_t second_mask = 1;
120
    int64_t scale_to_nano_factor = 1;
121
    const FieldSchema* field_schema = nullptr;
122
123
    //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128.
124
    bool is_type_compatibility = false;
125
126
    /**
127
     * Some frameworks like paimon maybe writes non-standard parquet files. Timestamp field doesn't have
128
     * logicalType or converted_type to indicates its precision. We have to reset the time mask.
129
     */
130
5
    void reset_time_scale_if_missing(int scale) {
131
5
        const auto& schema = field_schema->parquet_schema;
132
5
        if (!schema.__isset.logicalType && !schema.__isset.converted_type) {
133
0
            int ts_scale = 9;
134
0
            if (scale <= 3) {
135
0
                ts_scale = 3;
136
0
            } else if (scale <= 6) {
137
0
                ts_scale = 6;
138
0
            }
139
0
            second_mask = common::exp10_i64(ts_scale);
140
0
            scale_to_nano_factor = common::exp10_i64(9 - ts_scale);
141
142
            // The missing parque metadata makes it impossible for us to know the time zone information,
143
            // so we default to UTC here.
144
0
            if (ctz == nullptr) {
145
0
                ctz = &utc0;
146
0
            }
147
0
        }
148
5
    }
149
150
225
    void init(const FieldSchema* field_schema_, const cctz::time_zone* ctz_) {
151
225
        field_schema = field_schema_;
152
225
        if (ctz_ != nullptr) {
153
225
            ctz = ctz_;
154
225
        }
155
225
        const auto& schema = field_schema->parquet_schema;
156
225
        if (schema.__isset.logicalType && schema.logicalType.__isset.TIMESTAMP) {
157
5
            const auto& timestamp_info = schema.logicalType.TIMESTAMP;
158
5
            if (!timestamp_info.isAdjustedToUTC) {
159
                // should set timezone to utc+0
160
                // Reference: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#instant-semantics-timestamps-normalized-to-utc
161
                // If isAdjustedToUTC = false, the reader should display the same value no mater what local time zone is. For example:
162
                // When a timestamp is stored as `1970-01-03 12:00:00`,
163
                // if isAdjustedToUTC = true, UTC8 should read as `1970-01-03 20:00:00`, UTC6 should read as `1970-01-03 18:00:00`
164
                // if isAdjustedToUTC = false, UTC8 and UTC6 should read as `1970-01-03 12:00:00`, which is the same as `1970-01-03 12:00:00` in UTC0
165
2
                ctz = &utc0;
166
2
            }
167
5
            const auto& time_unit = timestamp_info.unit;
168
5
            if (time_unit.__isset.MILLIS) {
169
1
                second_mask = 1000;
170
1
                scale_to_nano_factor = 1000000;
171
4
            } else if (time_unit.__isset.MICROS) {
172
4
                second_mask = 1000000;
173
4
                scale_to_nano_factor = 1000;
174
4
            } else if (time_unit.__isset.NANOS) {
175
0
                second_mask = 1000000000;
176
0
                scale_to_nano_factor = 1;
177
0
            }
178
220
        } else if (schema.__isset.converted_type) {
179
61
            const auto& converted_type = schema.converted_type;
180
61
            if (converted_type == tparquet::ConvertedType::TIMESTAMP_MILLIS) {
181
0
                second_mask = 1000;
182
0
                scale_to_nano_factor = 1000000;
183
61
            } else if (converted_type == tparquet::ConvertedType::TIMESTAMP_MICROS) {
184
4
                second_mask = 1000000;
185
4
                scale_to_nano_factor = 1000;
186
4
            }
187
61
        }
188
189
225
        if (ctz != nullptr) {
190
225
            is_fixed_offset =
191
225
                    TimezoneUtils::try_get_fixed_offset_seconds(*ctz, &fixed_offset_seconds);
192
225
        }
193
225
        is_type_compatibility = field_schema_->is_type_compatibility;
194
225
    }
195
};
196
197
/**
198
 * Convert parquet physical column to logical column
199
 * In parquet document(https://github.com/apache/parquet-format/blob/master/LogicalTypes.md),
200
 * Logical or converted type is the data type of column, physical type is the stored type of column chunk.
201
 * eg, decimal type can be stored as INT32, INT64, BYTE_ARRAY, FIXED_LENGTH_BYTE_ARRAY.
202
 * So there is a convert process from physical type to logical type.
203
 * In addition, Schema change will bring about a change in logical type.
204
 *
205
 * `PhysicalToLogicalConverter` strips away the conversion of logical type, and reuse `ColumnTypeConverter`
206
 * to resolve schema change, allowing parquet reader to only focus on the conversion of physical types.
207
 *
208
 * Therefore, tow layers converters are designed:
209
 * First, read parquet data with the physical type
210
 * Second, convert physical type to logical type
211
 * Third, convert logical type to the final type planned by FE(schema change)
212
 *
213
 * Ultimate performance optimization:
214
 * 1. If process of (First => Second) is consistent, eg. from BYTE_ARRAY to string, no additional copies and conversions will be introduced;
215
 * 2. If process of (Second => Third) is consistent, no additional copies and conversions will be introduced;
216
 * 3. Null map is share among all processes, no additional copies and conversions will be introduced in null map;
217
 * 4. Only create one physical column in physical conversion, and reused in each loop;
218
 * 5. Only create one logical column in logical conversion, and reused in each loop;
219
 * 6. FIXED_LENGTH_BYTE_ARRAY is read as ColumnUInt8 instead of ColumnString, so the underlying decoder has no process to decode string
220
 *    and use memory copy to read the data as a whole, and the conversion has no need to resolve the Offsets in ColumnString.
221
 */
222
class PhysicalToLogicalConverter {
223
protected:
224
    ColumnPtr _cached_src_physical_column = nullptr;
225
    DataTypePtr _cached_src_physical_type = nullptr;
226
    std::unique_ptr<converter::ColumnTypeConverter> _logical_converter = nullptr;
227
228
    std::string _error_msg;
229
230
    std::unique_ptr<ConvertParams> _convert_params;
231
232
public:
233
    static std::unique_ptr<PhysicalToLogicalConverter> get_converter(
234
            const FieldSchema* field_schema, DataTypePtr src_logical_type,
235
            const DataTypePtr& dst_logical_type, const cctz::time_zone* ctz,
236
            bool is_dict_filter = false);
237
238
    static bool is_parquet_native_type(PrimitiveType type);
239
240
    static bool is_decimal_type(PrimitiveType type);
241
242
221
    PhysicalToLogicalConverter() = default;
243
221
    virtual ~PhysicalToLogicalConverter() = default;
244
245
4
    virtual Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) {
246
4
        return Status::OK();
247
4
    }
248
249
    Status convert(ColumnPtr& src_physical_col, DataTypePtr src_logical_type,
250
                   const DataTypePtr& dst_logical_type, ColumnPtr& dst_logical_col,
251
231
                   bool is_dict_filter) {
252
231
        if (is_dict_filter) {
253
0
            src_logical_type = DataTypeFactory::instance().create_data_type(
254
0
                    PrimitiveType::TYPE_INT, dst_logical_type->is_nullable());
255
0
        }
256
231
        if (is_consistent() && _logical_converter->is_consistent()) {
257
186
            return Status::OK();
258
186
        }
259
45
        ColumnPtr src_logical_column;
260
45
        if (is_consistent()) {
261
4
            if (dst_logical_type->is_nullable()) {
262
4
                auto doris_nullable_column =
263
4
                        assert_cast<const ColumnNullable*>(dst_logical_col.get());
264
4
                src_logical_column =
265
4
                        ColumnNullable::create(_cached_src_physical_column,
266
4
                                               doris_nullable_column->get_null_map_column_ptr());
267
4
            } else {
268
0
                src_logical_column = _cached_src_physical_column;
269
0
            }
270
41
        } else {
271
41
            src_logical_column = _logical_converter->get_column(src_logical_type, dst_logical_col,
272
41
                                                                dst_logical_type);
273
41
        }
274
45
        RETURN_IF_ERROR(physical_convert(src_physical_col, src_logical_column));
275
45
        auto converted_column = dst_logical_col->assume_mutable();
276
45
        return _logical_converter->convert(src_logical_column, converted_column);
277
45
    }
278
279
    virtual ColumnPtr get_physical_column(tparquet::Type::type src_physical_type,
280
                                          DataTypePtr src_logical_type,
281
                                          ColumnPtr& dst_logical_column,
282
                                          const DataTypePtr& dst_logical_type, bool is_dict_filter);
283
284
231
    DataTypePtr& get_physical_type() { return _cached_src_physical_type; }
285
286
127
    virtual bool is_consistent() { return false; }
287
288
368
    virtual bool support() { return true; }
289
290
0
    std::string get_error_msg() { return _error_msg; }
291
};
292
293
class ConsistentPhysicalConverter : public PhysicalToLogicalConverter {
294
455
    bool is_consistent() override { return true; }
295
};
296
297
class UnsupportedConverter : public PhysicalToLogicalConverter {
298
public:
299
0
    UnsupportedConverter(std::string error_msg) { _error_msg = error_msg; }
300
301
    UnsupportedConverter(tparquet::Type::type src_physical_type,
302
0
                         const DataTypePtr& src_logical_type) {
303
0
        std::string src_physical_str = tparquet::to_string(src_physical_type);
304
0
        std::string src_logical_str = src_logical_type->get_name();
305
0
        _error_msg = src_physical_str + " => " + src_logical_str;
306
0
    }
307
308
0
    bool support() override { return false; }
309
310
0
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
311
0
        return Status::InternalError("Unsupported physical to logical type: {}", _error_msg);
312
0
    }
313
};
314
315
// for tinyint, smallint
316
template <PrimitiveType IntPrimitiveType>
317
class LittleIntPhysicalConverter : public PhysicalToLogicalConverter {
318
28
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
319
28
        using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType;
320
28
        using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType;
321
28
        ColumnPtr from_col = remove_nullable(src_physical_col);
322
28
        MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable();
323
324
28
        size_t rows = from_col->size();
325
        // always comes from tparquet::Type::INT32
326
28
        auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data();
327
28
        size_t start_idx = to_col->size();
328
28
        to_col->resize(start_idx + rows);
329
28
        auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data();
330
108
        for (int i = 0; i < rows; ++i) {
331
80
            data[start_idx + i] = static_cast<DstCppType>(src_data[i]);
332
80
        }
333
334
28
        return Status::OK();
335
28
    }
_ZN5doris7parquet26LittleIntPhysicalConverterILNS_13PrimitiveTypeE3EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Line
Count
Source
318
17
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
319
17
        using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType;
320
17
        using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType;
321
17
        ColumnPtr from_col = remove_nullable(src_physical_col);
322
17
        MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable();
323
324
17
        size_t rows = from_col->size();
325
        // always comes from tparquet::Type::INT32
326
17
        auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data();
327
17
        size_t start_idx = to_col->size();
328
17
        to_col->resize(start_idx + rows);
329
17
        auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data();
330
59
        for (int i = 0; i < rows; ++i) {
331
42
            data[start_idx + i] = static_cast<DstCppType>(src_data[i]);
332
42
        }
333
334
17
        return Status::OK();
335
17
    }
_ZN5doris7parquet26LittleIntPhysicalConverterILNS_13PrimitiveTypeE4EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Line
Count
Source
318
11
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
319
11
        using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType;
320
11
        using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType;
321
11
        ColumnPtr from_col = remove_nullable(src_physical_col);
322
11
        MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable();
323
324
11
        size_t rows = from_col->size();
325
        // always comes from tparquet::Type::INT32
326
11
        auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data();
327
11
        size_t start_idx = to_col->size();
328
11
        to_col->resize(start_idx + rows);
329
11
        auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data();
330
49
        for (int i = 0; i < rows; ++i) {
331
38
            data[start_idx + i] = static_cast<DstCppType>(src_data[i]);
332
38
        }
333
334
11
        return Status::OK();
335
11
    }
336
};
337
338
template <PrimitiveType type>
339
struct UnsignedTypeTraits;
340
341
template <>
342
struct UnsignedTypeTraits<TYPE_SMALLINT> {
343
    using UnsignedCppType = UInt8;
344
    //https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#unsigned-integers
345
    //INT(8, false), INT(16, false), and INT(32, false) must annotate an int32 primitive type and INT(64, false)
346
    //must annotate an int64 primitive type.
347
    using StorageCppType = Int32;
348
    using StorageColumnType = ColumnInt32;
349
};
350
351
template <>
352
struct UnsignedTypeTraits<TYPE_INT> {
353
    using UnsignedCppType = UInt16;
354
    using StorageCppType = Int32;
355
    using StorageColumnType = ColumnInt32;
356
};
357
358
template <>
359
struct UnsignedTypeTraits<TYPE_BIGINT> {
360
    using UnsignedCppType = UInt32;
361
    using StorageCppType = Int32;
362
    using StorageColumnType = ColumnInt32;
363
};
364
365
template <>
366
struct UnsignedTypeTraits<TYPE_LARGEINT> {
367
    using UnsignedCppType = UInt64;
368
    using StorageCppType = Int64;
369
    using StorageColumnType = ColumnInt64;
370
};
371
372
template <PrimitiveType IntPrimitiveType>
373
class UnsignedIntegerConverter : public PhysicalToLogicalConverter {
374
0
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
375
0
        using UnsignedCppType = typename UnsignedTypeTraits<IntPrimitiveType>::UnsignedCppType;
376
0
        using StorageCppType = typename UnsignedTypeTraits<IntPrimitiveType>::StorageCppType;
377
0
        using StorageColumnType = typename UnsignedTypeTraits<IntPrimitiveType>::StorageColumnType;
378
0
        using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType;
379
380
0
        ColumnPtr from_col = remove_nullable(src_physical_col);
381
0
        MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable();
382
0
        auto& src_data = assert_cast<const StorageColumnType*>(from_col.get())->get_data();
383
384
0
        size_t rows = src_data.size();
385
0
        size_t start_idx = to_col->size();
386
0
        to_col->resize(start_idx + rows);
387
0
        auto& data = assert_cast<DstColumnType&>(*to_col.get()).get_data();
388
389
0
        for (int i = 0; i < rows; i++) {
390
0
            StorageCppType src_value = src_data[i];
391
0
            auto unsigned_value = static_cast<UnsignedCppType>(src_value);
392
0
            data[start_idx + i] = unsigned_value;
393
0
        }
394
395
0
        return Status::OK();
396
0
    }
Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE4EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE5EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE6EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE7EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
397
};
398
399
class FixedSizeBinaryConverter : public PhysicalToLogicalConverter {
400
private:
401
    int _type_length;
402
403
public:
404
0
    FixedSizeBinaryConverter(int type_length) : _type_length(type_length) {}
405
406
0
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
407
0
        ColumnPtr from_col = remove_nullable(src_physical_col);
408
0
        MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable();
409
410
0
        auto* src_data = assert_cast<const ColumnUInt8*>(from_col.get());
411
0
        size_t length = src_data->size();
412
0
        size_t num_values = length / _type_length;
413
0
        auto& string_col = static_cast<ColumnString&>(*to_col.get());
414
0
        auto& offsets = string_col.get_offsets();
415
0
        auto& chars = string_col.get_chars();
416
417
0
        size_t origin_size = chars.size();
418
0
        chars.resize(origin_size + length);
419
0
        memcpy(chars.data() + origin_size, src_data->get_data().data(), length);
420
421
0
        origin_size = offsets.size();
422
0
        offsets.resize(origin_size + num_values);
423
0
        auto end_offset = offsets[origin_size - 1];
424
0
        for (int i = 0; i < num_values; ++i) {
425
0
            end_offset += _type_length;
426
0
            offsets[origin_size + i] = end_offset;
427
0
        }
428
429
0
        return Status::OK();
430
0
    }
431
};
432
433
class Float16PhysicalConverter : public PhysicalToLogicalConverter {
434
private:
435
    int _type_length;
436
437
public:
438
0
    Float16PhysicalConverter(int type_length) : _type_length(type_length) {
439
0
        DCHECK_EQ(_type_length, 2);
440
0
    }
441
442
0
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
443
0
        ColumnPtr from_col = remove_nullable(src_physical_col);
444
0
        MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable();
445
446
0
        const auto* src_data = assert_cast<const ColumnUInt8*>(from_col.get());
447
0
        size_t length = src_data->size();
448
0
        size_t num_values = length / _type_length;
449
0
        auto* to_float_column = assert_cast<ColumnFloat32*>(to_col.get());
450
0
        size_t start_idx = to_float_column->size();
451
0
        to_float_column->resize(start_idx + num_values);
452
0
        auto& to_float_column_data = to_float_column->get_data();
453
0
        const auto* ptr = src_data->get_data().data();
454
0
        for (int i = 0; i < num_values; ++i) {
455
0
            size_t offset = i * _type_length;
456
0
            const auto* data_ptr = ptr + offset;
457
0
            uint16_t raw;
458
0
            memcpy(&raw, data_ptr, sizeof(uint16_t));
459
0
            float value = half_to_float(raw);
460
0
            to_float_column_data[start_idx + i] = value;
461
0
        }
462
463
0
        return Status::OK();
464
0
    }
465
466
0
    float half_to_float(uint16_t h) {
467
        // uint16_t h: half precision floating point
468
        // bit 15:       sign(1 bit)
469
        // bits 14..10 : exponent(5 bits)
470
        // bits 9..0   : mantissa(10 bits)
471
472
        // sign bit placed to float32 bit31
473
0
        uint32_t sign = (h & 0x8000U) << 16; // 0x8000 << 16 = 0x8000_0000
474
        // exponent:(5 bits)
475
0
        uint32_t exp = (h & 0x7C00U) >> 10; // 0x7C00 = 0111 1100 0000 (half exponent mask)
476
        // mantissa(10 bits)
477
0
        uint32_t mant = (h & 0x03FFU); // 10-bit fraction
478
479
        // cases:Zero/Subnormal, Normal, Inf/NaN
480
0
        if (exp == 0) {
481
            // exp==0: Zero or Subnormal ----------
482
0
            if (mant == 0) {
483
                // ±0.0
484
                // sign = either 0x00000000 or 0x80000000
485
0
                return std::bit_cast<float>(sign);
486
0
            } else {
487
                // ---------- Subnormal ----------
488
                // half subnormal:
489
                //    value = (-1)^sign * (mant / 2^10) * 2^(1 - bias)
490
                // half bias = 15 → exponent = 1 - 15 = -14
491
0
                float f = (static_cast<float>(mant) / 1024.0F) * std::powf(2.0F, -14.0F);
492
0
                return sign ? -f : f;
493
0
            }
494
0
        } else if (exp == 0x1F) {
495
            // exp==31: Inf or NaN ----------
496
            // float32:
497
            //    exponent = 255 (0xFF)
498
            //    mantissa = mant << 13
499
0
            uint32_t f = sign | 0x7F800000U | (mant << 13);
500
0
            return std::bit_cast<float>(f);
501
0
        } else {
502
            // Normalized ----------
503
            // float32 exponent:
504
            //   exp32 = exp16 - bias16 + bias32
505
            //   bias16 = 15
506
            //   bias32 = 127
507
            //
508
            // so: exp32 = exp + (127 - 15)
509
0
            uint32_t f = sign | ((exp + (127 - 15)) << 23) // place to float32 exponent
510
0
                         | (mant << 13);                   // mantissa align to 23 bits
511
0
            return std::bit_cast<float>(f);
512
0
        }
513
0
    }
514
};
515
516
class UUIDVarBinaryConverter : public PhysicalToLogicalConverter {
517
public:
518
1
    UUIDVarBinaryConverter(int type_length) : _type_length(type_length) {}
519
520
1
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
521
1
        DCHECK(!is_column_const(*src_physical_col)) << src_physical_col->dump_structure();
522
1
        DCHECK(!is_column_const(*src_logical_column)) << src_logical_column->dump_structure();
523
1
        const ColumnUInt8* uint8_col = nullptr;
524
1
        if (is_column_nullable(*src_physical_col)) {
525
1
            const auto& nullable = assert_cast<const ColumnNullable*>(src_physical_col.get());
526
1
            uint8_col = &assert_cast<const ColumnUInt8&>(nullable->get_nested_column());
527
1
        } else {
528
0
            uint8_col = &assert_cast<const ColumnUInt8&>(*src_physical_col);
529
0
        }
530
531
1
        MutableColumnPtr to_col = nullptr;
532
        // nullmap flag seems have been handled in upper level
533
1
        if (src_logical_column->is_nullable()) {
534
1
            const auto* nullable = assert_cast<const ColumnNullable*>(src_logical_column.get());
535
1
            to_col = nullable->get_nested_column_ptr()->assume_mutable();
536
1
        } else {
537
0
            to_col = src_logical_column->assume_mutable();
538
0
        }
539
1
        auto* to_varbinary_column = assert_cast<ColumnVarbinary*>(to_col.get());
540
1
        size_t length = uint8_col->size();
541
1
        size_t num_values = length / _type_length;
542
1
        const auto* ptr = uint8_col->get_data().data();
543
544
4
        for (int i = 0; i < num_values; ++i) {
545
3
            auto offset = i * _type_length;
546
3
            const char* data_ptr = reinterpret_cast<const char*>(ptr + offset);
547
3
            to_varbinary_column->insert_data(data_ptr, _type_length);
548
3
        }
549
1
        return Status::OK();
550
1
    }
551
552
private:
553
    int _type_length;
554
};
555
556
template <PrimitiveType DecimalPType>
557
class FixedSizeToDecimal : public PhysicalToLogicalConverter {
558
public:
559
    using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType;
560
2
    FixedSizeToDecimal(int32_t type_length) : _type_length(type_length) {}
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EEC2Ei
_ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EEC2Ei
Line
Count
Source
560
2
    FixedSizeToDecimal(int32_t type_length) : _type_length(type_length) {}
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EEC2Ei
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EEC2Ei
561
562
2
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
563
2
        ColumnPtr src_col = remove_nullable(src_physical_col);
564
2
        MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable();
565
566
2
#define M(FixedTypeLength, ValueCopyType) \
567
2
    case FixedTypeLength:                 \
568
2
        return _convert_internal<FixedTypeLength, ValueCopyType>(src_col, dst_col);
569
570
2
#define APPLY_FOR_DECIMALS() \
571
2
    M(1, int64_t)            \
572
0
    M(2, int64_t)            \
573
0
    M(3, int64_t)            \
574
0
    M(4, int64_t)            \
575
1
    M(5, int64_t)            \
576
1
    M(6, int64_t)            \
577
0
    M(7, int64_t)            \
578
1
    M(8, int64_t)            \
579
1
    M(9, int128_t)           \
580
0
    M(10, int128_t)          \
581
0
    M(11, int128_t)          \
582
0
    M(12, int128_t)          \
583
0
    M(13, int128_t)          \
584
0
    M(14, int128_t)          \
585
0
    M(15, int128_t)          \
586
0
    M(16, int128_t)          \
587
0
    M(17, wide::Int256)      \
588
0
    M(18, wide::Int256)      \
589
0
    M(19, wide::Int256)      \
590
0
    M(20, wide::Int256)      \
591
0
    M(21, wide::Int256)      \
592
0
    M(22, wide::Int256)      \
593
0
    M(23, wide::Int256)      \
594
0
    M(24, wide::Int256)      \
595
0
    M(25, wide::Int256)      \
596
0
    M(26, wide::Int256)      \
597
0
    M(27, wide::Int256)      \
598
0
    M(28, wide::Int256)      \
599
0
    M(29, wide::Int256)      \
600
0
    M(30, wide::Int256)      \
601
0
    M(31, wide::Int256)      \
602
0
    M(32, wide::Int256)
603
604
2
        switch (_type_length) {
605
0
            APPLY_FOR_DECIMALS()
606
0
        default:
607
0
            throw Exception(Status::FatalError("__builtin_unreachable"));
608
2
        }
609
0
        return Status::OK();
610
2
#undef APPLY_FOR_DECIMALS
611
2
#undef M
612
2
    }
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
_ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Line
Count
Source
562
2
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
563
2
        ColumnPtr src_col = remove_nullable(src_physical_col);
564
2
        MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable();
565
566
2
#define M(FixedTypeLength, ValueCopyType) \
567
2
    case FixedTypeLength:                 \
568
2
        return _convert_internal<FixedTypeLength, ValueCopyType>(src_col, dst_col);
569
570
2
#define APPLY_FOR_DECIMALS() \
571
2
    M(1, int64_t)            \
572
2
    M(2, int64_t)            \
573
2
    M(3, int64_t)            \
574
2
    M(4, int64_t)            \
575
2
    M(5, int64_t)            \
576
2
    M(6, int64_t)            \
577
2
    M(7, int64_t)            \
578
2
    M(8, int64_t)            \
579
2
    M(9, int128_t)           \
580
2
    M(10, int128_t)          \
581
2
    M(11, int128_t)          \
582
2
    M(12, int128_t)          \
583
2
    M(13, int128_t)          \
584
2
    M(14, int128_t)          \
585
2
    M(15, int128_t)          \
586
2
    M(16, int128_t)          \
587
2
    M(17, wide::Int256)      \
588
2
    M(18, wide::Int256)      \
589
2
    M(19, wide::Int256)      \
590
2
    M(20, wide::Int256)      \
591
2
    M(21, wide::Int256)      \
592
2
    M(22, wide::Int256)      \
593
2
    M(23, wide::Int256)      \
594
2
    M(24, wide::Int256)      \
595
2
    M(25, wide::Int256)      \
596
2
    M(26, wide::Int256)      \
597
2
    M(27, wide::Int256)      \
598
2
    M(28, wide::Int256)      \
599
2
    M(29, wide::Int256)      \
600
2
    M(30, wide::Int256)      \
601
2
    M(31, wide::Int256)      \
602
2
    M(32, wide::Int256)
603
604
2
        switch (_type_length) {
605
0
            APPLY_FOR_DECIMALS()
606
0
        default:
607
0
            throw Exception(Status::FatalError("__builtin_unreachable"));
608
2
        }
609
0
        return Status::OK();
610
2
#undef APPLY_FOR_DECIMALS
611
2
#undef M
612
2
    }
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
613
614
    template <int fixed_type_length, typename ValueCopyType>
615
2
    Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) {
616
2
        size_t rows = src_col->size() / fixed_type_length;
617
2
        auto* buf = static_cast<const ColumnUInt8*>(src_col.get())->get_data().data();
618
2
        size_t start_idx = dst_col->size();
619
2
        dst_col->resize(start_idx + rows);
620
621
2
        auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data();
622
2
        size_t offset = 0;
623
6
        for (int i = 0; i < rows; i++) {
624
            // When Decimal in parquet is stored in byte arrays, binary and fixed,
625
            // the unscaled number must be encoded as two's complement using big-endian byte order.
626
4
            ValueCopyType value = 0;
627
4
            memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value));
628
4
            offset += fixed_type_length;
629
4
            value = to_endian<std::endian::big>(value);
630
4
            value = value >> ((sizeof(value) - fixed_type_length) * 8);
631
4
            auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]);
632
4
            v = (DecimalType)value;
633
4
        }
634
635
2
        return Status::OK();
636
2
    }
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
_ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Line
Count
Source
615
1
    Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) {
616
1
        size_t rows = src_col->size() / fixed_type_length;
617
1
        auto* buf = static_cast<const ColumnUInt8*>(src_col.get())->get_data().data();
618
1
        size_t start_idx = dst_col->size();
619
1
        dst_col->resize(start_idx + rows);
620
621
1
        auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data();
622
1
        size_t offset = 0;
623
3
        for (int i = 0; i < rows; i++) {
624
            // When Decimal in parquet is stored in byte arrays, binary and fixed,
625
            // the unscaled number must be encoded as two's complement using big-endian byte order.
626
2
            ValueCopyType value = 0;
627
2
            memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value));
628
2
            offset += fixed_type_length;
629
2
            value = to_endian<std::endian::big>(value);
630
2
            value = value >> ((sizeof(value) - fixed_type_length) * 8);
631
2
            auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]);
632
2
            v = (DecimalType)value;
633
2
        }
634
635
1
        return Status::OK();
636
1
    }
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
_ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Line
Count
Source
615
1
    Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) {
616
1
        size_t rows = src_col->size() / fixed_type_length;
617
1
        auto* buf = static_cast<const ColumnUInt8*>(src_col.get())->get_data().data();
618
1
        size_t start_idx = dst_col->size();
619
1
        dst_col->resize(start_idx + rows);
620
621
1
        auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data();
622
1
        size_t offset = 0;
623
3
        for (int i = 0; i < rows; i++) {
624
            // When Decimal in parquet is stored in byte arrays, binary and fixed,
625
            // the unscaled number must be encoded as two's complement using big-endian byte order.
626
2
            ValueCopyType value = 0;
627
2
            memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value));
628
2
            offset += fixed_type_length;
629
2
            value = to_endian<std::endian::big>(value);
630
2
            value = value >> ((sizeof(value) - fixed_type_length) * 8);
631
2
            auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]);
632
2
            v = (DecimalType)value;
633
2
        }
634
635
1
        return Status::OK();
636
1
    }
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EERNS8_11mutable_ptrIS7_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EERNSB_11mutable_ptrISA_EE
637
638
private:
639
    int32_t _type_length;
640
};
641
642
template <PrimitiveType DecimalPType>
643
class StringToDecimal : public PhysicalToLogicalConverter {
644
    using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType;
645
0
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
646
0
        using ValueCopyType = DecimalType::NativeType;
647
0
        ColumnPtr src_col = remove_nullable(src_physical_col);
648
0
        MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable();
649
650
0
        size_t rows = src_col->size();
651
0
        auto buf = static_cast<const ColumnString*>(src_col.get())->get_chars().data();
652
0
        auto& offset = static_cast<const ColumnString*>(src_col.get())->get_offsets();
653
0
        size_t start_idx = dst_col->size();
654
0
        dst_col->resize(start_idx + rows);
655
656
0
        auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data();
657
0
        for (int i = 0; i < rows; i++) {
658
0
            size_t len = offset[i] - offset[i - 1];
659
            // When Decimal in parquet is stored in byte arrays, binary and fixed,
660
            // the unscaled number must be encoded as two's complement using big-endian byte order.
661
0
            ValueCopyType value = 0;
662
0
            if (len > 0) {
663
0
                memcpy(reinterpret_cast<char*>(&value), buf + offset[i - 1], len);
664
0
                value = to_endian<std::endian::big>(value);
665
0
                value = value >> ((sizeof(value) - len) * 8);
666
0
            }
667
0
            auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]);
668
0
            v = (DecimalType)value;
669
0
        }
670
671
0
        return Status::OK();
672
0
    }
Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
673
};
674
675
template <PrimitiveType NumberType, PrimitiveType DecimalPType>
676
class NumberToDecimal : public PhysicalToLogicalConverter {
677
    using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType;
678
2
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
679
2
        using ValueCopyType = typename DecimalType::NativeType;
680
2
        ColumnPtr src_col = remove_nullable(src_physical_col);
681
2
        MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable();
682
683
2
        size_t rows = src_col->size();
684
2
        auto* src_data =
685
2
                static_cast<const ColumnVector<NumberType>*>(src_col.get())->get_data().data();
686
2
        size_t start_idx = dst_col->size();
687
2
        dst_col->resize(start_idx + rows);
688
689
2
        auto* data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data().data();
690
691
24
        for (int i = 0; i < rows; i++) {
692
22
            ValueCopyType value;
693
22
            if constexpr (std::is_same_v<DecimalType, Decimal256>) {
694
0
                value = src_data[i];
695
22
            } else {
696
22
                value = cast_set<ValueCopyType, typename PrimitiveTypeTraits<NumberType>::CppType,
697
22
                                 false>(src_data[i]);
698
22
            }
699
700
22
            data[start_idx + i] = (DecimalType)value;
701
22
        }
702
2
        return Status::OK();
703
2
    }
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
_ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Line
Count
Source
678
2
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
679
2
        using ValueCopyType = typename DecimalType::NativeType;
680
2
        ColumnPtr src_col = remove_nullable(src_physical_col);
681
2
        MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable();
682
683
2
        size_t rows = src_col->size();
684
2
        auto* src_data =
685
2
                static_cast<const ColumnVector<NumberType>*>(src_col.get())->get_data().data();
686
2
        size_t start_idx = dst_col->size();
687
2
        dst_col->resize(start_idx + rows);
688
689
2
        auto* data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col.get())->get_data().data();
690
691
24
        for (int i = 0; i < rows; i++) {
692
22
            ValueCopyType value;
693
            if constexpr (std::is_same_v<DecimalType, Decimal256>) {
694
                value = src_data[i];
695
22
            } else {
696
22
                value = cast_set<ValueCopyType, typename PrimitiveTypeTraits<NumberType>::CppType,
697
22
                                 false>(src_data[i]);
698
22
            }
699
700
22
            data[start_idx + i] = (DecimalType)value;
701
22
        }
702
2
        return Status::OK();
703
2
    }
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
704
};
705
706
class Int32ToDate : public PhysicalToLogicalConverter {
707
7
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
708
7
        ColumnPtr src_col = remove_nullable(src_physical_col);
709
7
        MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable();
710
711
7
        size_t rows = src_col->size();
712
7
        size_t start_idx = dst_col->size();
713
7
        dst_col->reserve(start_idx + rows);
714
715
7
        auto& src_data = static_cast<const ColumnInt32*>(src_col.get())->get_data();
716
7
        auto& data = static_cast<ColumnDateV2*>(dst_col.get())->get_data();
717
7
        date_day_offset_dict& date_dict = date_day_offset_dict::get();
718
719
59
        for (int i = 0; i < rows; i++) {
720
52
            data.push_back_without_reserve(date_dict[src_data[i]].to_date_int_val());
721
52
        }
722
723
7
        return Status::OK();
724
7
    }
725
};
726
727
struct Int64ToTimestamp : public PhysicalToLogicalConverter {
728
5
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
729
5
        ColumnPtr src_col = remove_nullable(src_physical_col);
730
5
        MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable();
731
732
5
        size_t rows = src_col->size();
733
5
        size_t start_idx = dst_col->size();
734
5
        dst_col->resize(start_idx + rows);
735
736
5
        auto src_data = static_cast<const ColumnInt64*>(src_col.get())->get_data().data();
737
5
        auto& data = static_cast<ColumnDateTimeV2*>(dst_col.get())->get_data();
738
739
51
        for (int i = 0; i < rows; i++) {
740
46
            int64_t x = src_data[i];
741
46
            auto& num = data[start_idx + i];
742
46
            auto& value = reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(num);
743
46
            const int64_t epoch_seconds = x / _convert_params->second_mask;
744
46
            if (_convert_params->is_fixed_offset) {
745
46
                if (!detail::try_convert_timestamp_with_fixed_offset(
746
46
                            value, epoch_seconds, _convert_params->fixed_offset_seconds)) {
747
0
                    value.from_unixtime(epoch_seconds, *_convert_params->ctz);
748
0
                }
749
46
            } else if (!detail::try_convert_timestamp_with_lookup(value, epoch_seconds,
750
0
                                                                  *_convert_params->ctz)) {
751
0
                value.from_unixtime(epoch_seconds, *_convert_params->ctz);
752
0
            }
753
46
            value.set_microsecond((x % _convert_params->second_mask) *
754
46
                                  (_convert_params->scale_to_nano_factor / 1000));
755
46
        }
756
5
        return Status::OK();
757
5
    }
758
};
759
760
struct Int64ToTimestampTz : public PhysicalToLogicalConverter {
761
0
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
762
0
        ColumnPtr src_col = remove_nullable(src_physical_col);
763
0
        MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable();
764
765
0
        size_t rows = src_col->size();
766
0
        size_t start_idx = dst_col->size();
767
0
        dst_col->resize(start_idx + rows);
768
769
0
        const auto& src_data = assert_cast<const ColumnInt64*>(src_col.get())->get_data();
770
0
        auto& dest_data = assert_cast<ColumnTimeStampTz*>(dst_col.get())->get_data();
771
0
        static const cctz::time_zone UTC = cctz::utc_time_zone();
772
773
0
        for (int i = 0; i < rows; i++) {
774
0
            int64_t x = src_data[i];
775
0
            auto& tz = dest_data[start_idx + i];
776
0
            tz.from_unixtime(x / _convert_params->second_mask, UTC);
777
0
            tz.set_microsecond((x % _convert_params->second_mask) *
778
0
                               (_convert_params->scale_to_nano_factor / 1000));
779
0
        }
780
0
        return Status::OK();
781
0
    }
782
};
783
784
struct Int96toTimestamp : public PhysicalToLogicalConverter {
785
0
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
786
0
        ColumnPtr src_col = remove_nullable(src_physical_col);
787
0
        MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable();
788
789
0
        size_t rows = src_col->size() / sizeof(ParquetInt96);
790
0
        auto& src_data = static_cast<const ColumnInt8*>(src_col.get())->get_data();
791
0
        auto ParquetInt96_data = (ParquetInt96*)src_data.data();
792
0
        size_t start_idx = dst_col->size();
793
0
        dst_col->resize(start_idx + rows);
794
0
        auto& data = static_cast<ColumnDateTimeV2*>(dst_col.get())->get_data();
795
796
0
        for (int i = 0; i < rows; i++) {
797
0
            ParquetInt96 src_cell_data = ParquetInt96_data[i];
798
0
            auto& dst_value =
799
0
                    reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(data[start_idx + i]);
800
801
0
            int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros();
802
0
            const int64_t epoch_seconds = timestamp_with_micros / 1000000;
803
0
            if (_convert_params->is_fixed_offset) {
804
0
                if (!detail::try_convert_timestamp_with_fixed_offset(
805
0
                            dst_value, epoch_seconds, _convert_params->fixed_offset_seconds)) {
806
0
                    dst_value.from_unixtime(epoch_seconds, *_convert_params->ctz);
807
0
                }
808
0
            } else if (!detail::try_convert_timestamp_with_lookup(dst_value, epoch_seconds,
809
0
                                                                  *_convert_params->ctz)) {
810
0
                dst_value.from_unixtime(epoch_seconds, *_convert_params->ctz);
811
0
            }
812
0
            dst_value.set_microsecond(timestamp_with_micros % 1000000);
813
0
        }
814
0
        return Status::OK();
815
0
    }
816
};
817
818
struct Int96toTimestampTz : public PhysicalToLogicalConverter {
819
0
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
820
0
        ColumnPtr src_col = remove_nullable(src_physical_col);
821
0
        MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable();
822
823
0
        size_t rows = src_col->size() / sizeof(ParquetInt96);
824
0
        const auto& src_data = assert_cast<const ColumnInt8*>(src_col.get())->get_data();
825
0
        auto* ParquetInt96_data = (ParquetInt96*)src_data.data();
826
0
        size_t start_idx = dst_col->size();
827
0
        dst_col->resize(start_idx + rows);
828
0
        auto& data = assert_cast<ColumnTimeStampTz*>(dst_col.get())->get_data();
829
0
        static const cctz::time_zone UTC = cctz::utc_time_zone();
830
831
0
        for (int i = 0; i < rows; i++) {
832
0
            ParquetInt96 src_cell_data = ParquetInt96_data[i];
833
0
            int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros();
834
0
            auto& tz = data[start_idx + i];
835
0
            tz.from_unixtime(timestamp_with_micros / 1000000, UTC);
836
0
            tz.set_microsecond(timestamp_with_micros % 1000000);
837
0
        }
838
0
        return Status::OK();
839
0
    }
840
};
841
842
} // namespace doris::parquet