Coverage Report

Created: 2026-05-25 21:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/parquet/parquet_column_convert.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <cctz/time_zone.h>
21
#include <gen_cpp/parquet_types.h>
22
#include <libdivide.h>
23
24
#include <chrono>
25
#include <limits>
26
27
#include "common/cast_set.h"
28
#include "core/column/column_fixed_length_object.h"
29
#include "core/column/column_varbinary.h"
30
#include "core/column/column_vector.h"
31
#include "core/data_type/data_type_factory.hpp"
32
#include "core/data_type/primitive_type.h"
33
#include "core/extended_types.h"
34
#include "core/field.h"
35
#include "core/types.h"
36
#include "format/column_type_convert.h"
37
#include "format/format_common.h"
38
#include "format/parquet/decoder.h"
39
#include "format/parquet/parquet_common.h"
40
#include "format/parquet/schema_desc.h"
41
#include "util/timezone_utils.h"
42
43
namespace doris::parquet {
44
namespace detail {
45
46
inline bool try_split_local_time(int64_t local_time, uint16_t* year, uint8_t* month, uint8_t* day,
47
62
                                 uint8_t* hour, uint8_t* minute, uint8_t* second) {
48
62
    static const libdivide::divider<int64_t> fast_div_86400(86400);
49
62
    static const libdivide::divider<int64_t> fast_div_3600(3600);
50
62
    static const libdivide::divider<int64_t> fast_div_60(60);
51
62
    static constexpr int64_t kMinSupportedDays = -365LL * 10000;
52
62
    static constexpr int64_t kMaxSupportedDays = 365LL * 10000;
53
54
62
    int64_t days = local_time / fast_div_86400;
55
62
    int64_t second_of_day = local_time - days * 86400;
56
62
    if (second_of_day < 0) {
57
3
        second_of_day += 86400;
58
3
        --days;
59
3
    }
60
62
    if (days < kMinSupportedDays || days > kMaxSupportedDays) {
61
0
        return false;
62
0
    }
63
64
62
    const auto ymd = std::chrono::year_month_day {std::chrono::sys_days {std::chrono::days {days}}};
65
62
    const int y = static_cast<int>(ymd.year());
66
62
    if (y < 0 || y > std::numeric_limits<uint16_t>::max()) {
67
0
        return false;
68
0
    }
69
70
62
    const int64_t h = second_of_day / fast_div_3600;
71
62
    const int64_t minute_second = second_of_day - h * 3600;
72
62
    const int64_t m = minute_second / fast_div_60;
73
62
    const int64_t s = minute_second - m * 60;
74
75
62
    *year = static_cast<uint16_t>(y);
76
62
    *month = static_cast<uint8_t>(static_cast<unsigned>(ymd.month()));
77
62
    *day = static_cast<uint8_t>(static_cast<unsigned>(ymd.day()));
78
62
    *hour = static_cast<uint8_t>(h);
79
62
    *minute = static_cast<uint8_t>(m);
80
62
    *second = static_cast<uint8_t>(s);
81
62
    return true;
82
62
}
83
84
template <typename DateType>
85
inline bool try_convert_timestamp_with_fixed_offset(DateType& value, int64_t epoch_seconds,
86
62
                                                    int32_t offset_seconds) {
87
62
    uint16_t year = 0;
88
62
    uint8_t month = 0;
89
62
    uint8_t day = 0;
90
62
    uint8_t hour = 0;
91
62
    uint8_t minute = 0;
92
62
    uint8_t second = 0;
93
62
    if (!try_split_local_time(epoch_seconds + offset_seconds, &year, &month, &day, &hour, &minute,
94
62
                              &second)) {
95
0
        return false;
96
0
    }
97
    // The caller sets sub-second precision immediately after this conversion.
98
62
    value.unchecked_set_time(year, month, day, hour, minute, second, 0);
99
62
    return true;
100
62
}
101
102
template <typename DateType>
103
inline bool try_convert_timestamp_with_lookup(DateType& value, int64_t epoch_seconds,
104
10
                                              const cctz::time_zone& ctz) {
105
10
    static const auto epoch = std::chrono::time_point_cast<cctz::sys_seconds>(
106
10
            std::chrono::system_clock::from_time_t(0));
107
10
    cctz::time_point<cctz::sys_seconds> t = epoch + cctz::seconds(epoch_seconds);
108
10
    const int32_t offset = ctz.lookup_offset(t).offset;
109
10
    return try_convert_timestamp_with_fixed_offset(value, epoch_seconds, offset);
110
10
}
111
112
} // namespace detail
113
114
struct ConvertParams {
115
    // schema.logicalType.TIMESTAMP.isAdjustedToUTC == false
116
    static const cctz::time_zone utc0;
117
    // schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set local time zone
118
    const cctz::time_zone* ctz = nullptr;
119
    bool is_fixed_offset = false;
120
    int32_t fixed_offset_seconds = 0;
121
    int64_t second_mask = 1;
122
    int64_t scale_to_nano_factor = 1;
123
    const FieldSchema* field_schema = nullptr;
124
125
    //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128.
126
    bool is_type_compatibility = false;
127
128
    /**
129
     * Some frameworks like paimon maybe writes non-standard parquet files. Timestamp field doesn't have
130
     * logicalType or converted_type to indicates its precision. We have to reset the time mask.
131
     */
132
5
    void reset_time_scale_if_missing(int scale) {
133
5
        const auto& schema = field_schema->parquet_schema;
134
5
        if (!schema.__isset.logicalType && !schema.__isset.converted_type) {
135
0
            int ts_scale = 9;
136
0
            if (scale <= 3) {
137
0
                ts_scale = 3;
138
0
            } else if (scale <= 6) {
139
0
                ts_scale = 6;
140
0
            }
141
0
            second_mask = common::exp10_i64(ts_scale);
142
0
            scale_to_nano_factor = common::exp10_i64(9 - ts_scale);
143
144
            // The missing parque metadata makes it impossible for us to know the time zone information,
145
            // so we default to UTC here.
146
0
            if (ctz == nullptr) {
147
0
                ctz = &utc0;
148
0
            }
149
0
        }
150
5
    }
151
152
227
    void init(const FieldSchema* field_schema_, const cctz::time_zone* ctz_) {
153
227
        field_schema = field_schema_;
154
227
        if (ctz_ != nullptr) {
155
225
            ctz = ctz_;
156
225
        }
157
227
        const auto& schema = field_schema->parquet_schema;
158
227
        if (schema.__isset.logicalType && schema.logicalType.__isset.TIMESTAMP) {
159
5
            const auto& timestamp_info = schema.logicalType.TIMESTAMP;
160
5
            if (!timestamp_info.isAdjustedToUTC) {
161
                // should set timezone to utc+0
162
                // Reference: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#instant-semantics-timestamps-normalized-to-utc
163
                // If isAdjustedToUTC = false, the reader should display the same value no mater what local time zone is. For example:
164
                // When a timestamp is stored as `1970-01-03 12:00:00`,
165
                // if isAdjustedToUTC = true, UTC8 should read as `1970-01-03 20:00:00`, UTC6 should read as `1970-01-03 18:00:00`
166
                // if isAdjustedToUTC = false, UTC8 and UTC6 should read as `1970-01-03 12:00:00`, which is the same as `1970-01-03 12:00:00` in UTC0
167
2
                ctz = &utc0;
168
2
            }
169
5
            const auto& time_unit = timestamp_info.unit;
170
5
            if (time_unit.__isset.MILLIS) {
171
1
                second_mask = 1000;
172
1
                scale_to_nano_factor = 1000000;
173
4
            } else if (time_unit.__isset.MICROS) {
174
4
                second_mask = 1000000;
175
4
                scale_to_nano_factor = 1000;
176
4
            } else if (time_unit.__isset.NANOS) {
177
0
                second_mask = 1000000000;
178
0
                scale_to_nano_factor = 1;
179
0
            }
180
222
        } else if (schema.__isset.converted_type) {
181
61
            const auto& converted_type = schema.converted_type;
182
61
            if (converted_type == tparquet::ConvertedType::TIMESTAMP_MILLIS) {
183
0
                second_mask = 1000;
184
0
                scale_to_nano_factor = 1000000;
185
61
            } else if (converted_type == tparquet::ConvertedType::TIMESTAMP_MICROS) {
186
4
                second_mask = 1000000;
187
4
                scale_to_nano_factor = 1000;
188
4
            }
189
61
        }
190
191
227
        if (ctz != nullptr) {
192
225
            is_fixed_offset =
193
225
                    TimezoneUtils::try_get_fixed_offset_seconds(*ctz, &fixed_offset_seconds);
194
225
        }
195
227
        is_type_compatibility = field_schema_->is_type_compatibility;
196
227
    }
197
};
198
199
46
inline IColumn* get_mutable_inner_column(ColumnPtr& column) {
200
46
    column = IColumn::mutate(std::move(column));
201
46
    auto mutable_column = column->assert_mutable();
202
46
    if (mutable_column->is_nullable()) {
203
42
        return &assert_cast<ColumnNullable*>(mutable_column.get())->get_nested_column();
204
42
    }
205
4
    return mutable_column.get();
206
46
}
207
208
110
inline size_t get_mutable_inner_column_size(const ColumnPtr& column) {
209
110
    if (column->is_nullable()) {
210
110
        const auto* nullable = assert_cast<const ColumnNullable*>(column.get());
211
110
        return nullable->get_nested_column().size();
212
110
    }
213
0
    return column->size();
214
110
}
215
216
56
inline size_t get_null_map_size_or_inner_column_size(const ColumnPtr& column) {
217
56
    if (column->is_nullable()) {
218
56
        const auto* nullable = assert_cast<const ColumnNullable*>(column.get());
219
56
        return nullable->get_null_map_column().size();
220
56
    }
221
0
    return column->size();
222
56
}
223
224
56
inline size_t get_appended_null_map_start(const ColumnPtr& column, size_t new_rows) {
225
56
    if (!column->is_nullable()) {
226
0
        return 0;
227
0
    }
228
56
    const auto* nullable = assert_cast<const ColumnNullable*>(column.get());
229
56
    const size_t null_map_size = nullable->get_null_map_column().size();
230
56
    DCHECK_GE(null_map_size, new_rows);
231
56
    return null_map_size - new_rows;
232
56
}
233
234
inline void align_null_map(ColumnPtr& src_column, ColumnPtr& dst_column, size_t old_null_map_size,
235
57
                           size_t new_rows, size_t src_null_map_start = 0) {
236
57
    if (!dst_column->is_nullable()) {
237
0
        return;
238
0
    }
239
240
57
    dst_column = IColumn::mutate(std::move(dst_column));
241
57
    auto* dst_nullable = assert_cast<ColumnNullable*>(dst_column->assert_mutable().get());
242
57
    auto& dst_null_map = dst_nullable->get_null_map_column();
243
57
    const size_t expected_rows = old_null_map_size + new_rows;
244
57
    if (dst_null_map.size() == expected_rows) {
245
16
        return;
246
16
    }
247
57
    DCHECK_EQ(dst_null_map.size(), old_null_map_size);
248
41
    if (src_column->is_nullable()) {
249
41
        const auto* src_nullable = assert_cast<const ColumnNullable*>(src_column.get());
250
41
        DCHECK_GE(src_nullable->get_null_map_column().size(), src_null_map_start + new_rows);
251
41
        dst_null_map.insert_range_from(src_nullable->get_null_map_column(), src_null_map_start,
252
41
                                       new_rows);
253
41
    } else {
254
0
        dst_null_map.insert_many_vals(0, new_rows);
255
0
    }
256
41
}
257
258
struct FixedLengthPhysicalData {
259
    const uint8_t* data = nullptr;
260
    size_t byte_size = 0;
261
    size_t rows = 0;
262
};
263
264
inline FixedLengthPhysicalData get_fixed_length_physical_data(const IColumn& column,
265
4
                                                              size_t type_length) {
266
4
    if (const auto* fixed_length_column = check_and_get_column<ColumnFixedLengthObject>(column)) {
267
2
        DCHECK_EQ(fixed_length_column->item_size(), type_length);
268
2
        return {fixed_length_column->get_data().data(), fixed_length_column->byte_size(),
269
2
                fixed_length_column->size()};
270
2
    }
271
272
2
    const auto& uint8_column = assert_cast<const ColumnUInt8&>(column);
273
2
    DCHECK_EQ(uint8_column.size() % type_length, 0);
274
2
    return {uint8_column.get_data().data(), uint8_column.size(), uint8_column.size() / type_length};
275
4
}
276
277
/**
278
 * Convert parquet physical column to logical column
279
 * In parquet document(https://github.com/apache/parquet-format/blob/master/LogicalTypes.md),
280
 * Logical or converted type is the data type of column, physical type is the stored type of column chunk.
281
 * eg, decimal type can be stored as INT32, INT64, BYTE_ARRAY, FIXED_LENGTH_BYTE_ARRAY.
282
 * So there is a convert process from physical type to logical type.
283
 * In addition, Schema change will bring about a change in logical type.
284
 *
285
 * `PhysicalToLogicalConverter` strips away the conversion of logical type, and reuse `ColumnTypeConverter`
286
 * to resolve schema change, allowing parquet reader to only focus on the conversion of physical types.
287
 *
288
 * Therefore, tow layers converters are designed:
289
 * First, read parquet data with the physical type
290
 * Second, convert physical type to logical type
291
 * Third, convert logical type to the final type planned by FE(schema change)
292
 *
293
 * Ultimate performance optimization:
294
 * 1. If process of (First => Second) is consistent, eg. from BYTE_ARRAY to string, no additional copies and conversions will be introduced;
295
 * 2. If process of (Second => Third) is consistent, no additional copies and conversions will be introduced;
296
 * 3. Null maps are owned by each temporary nullable column, and only appended null slices are
297
 *    copied between conversion stages;
298
 * 4. Only create one physical column in physical conversion, and reused in each loop;
299
 * 5. Only create one logical column in logical conversion, and reused in each loop;
300
 * 6. FIXED_LENGTH_BYTE_ARRAY is read as ColumnFixedLengthObject instead of ColumnString, so
301
 *    the decoder can copy fixed-size values as a whole while keeping nullable row counts valid.
302
 */
303
class PhysicalToLogicalConverter {
304
protected:
305
    ColumnPtr _cached_src_physical_column = nullptr;
306
    DataTypePtr _cached_src_physical_type = nullptr;
307
    std::unique_ptr<converter::ColumnTypeConverter> _logical_converter = nullptr;
308
309
    std::string _error_msg;
310
311
    std::unique_ptr<ConvertParams> _convert_params;
312
313
public:
314
    static std::unique_ptr<PhysicalToLogicalConverter> get_converter(
315
            const FieldSchema* field_schema, DataTypePtr src_logical_type,
316
            const DataTypePtr& dst_logical_type, const cctz::time_zone* ctz,
317
            bool is_dict_filter = false);
318
319
    static bool is_parquet_native_type(PrimitiveType type);
320
321
    static bool is_decimal_type(PrimitiveType type);
322
323
223
    PhysicalToLogicalConverter() = default;
324
223
    virtual ~PhysicalToLogicalConverter() = default;
325
326
5
    virtual Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) {
327
5
        return Status::OK();
328
5
    }
329
330
    Status convert(ColumnPtr& src_physical_col, DataTypePtr src_logical_type,
331
                   const DataTypePtr& dst_logical_type, ColumnPtr& dst_logical_col,
332
321
                   bool is_dict_filter) {
333
321
        if (is_dict_filter) {
334
0
            src_logical_type = DataTypeFactory::instance().create_data_type(
335
0
                    PrimitiveType::TYPE_INT, dst_logical_type->is_nullable());
336
0
        }
337
321
        if (is_consistent() && _logical_converter->is_consistent()) {
338
274
            dst_logical_col = std::move(src_physical_col);
339
274
            return Status::OK();
340
274
        }
341
47
        if (_logical_converter->is_consistent()) {
342
39
            const size_t old_rows = get_mutable_inner_column_size(dst_logical_col);
343
39
            const size_t old_null_map_size =
344
39
                    get_null_map_size_or_inner_column_size(dst_logical_col);
345
39
            RETURN_IF_ERROR(physical_convert(src_physical_col, dst_logical_col));
346
39
            const size_t new_rows = get_mutable_inner_column_size(dst_logical_col) - old_rows;
347
39
            align_null_map(src_physical_col, dst_logical_col, old_null_map_size, new_rows,
348
39
                           get_appended_null_map_start(src_physical_col, new_rows));
349
39
            return Status::OK();
350
39
        }
351
352
8
        ColumnPtr src_logical_column;
353
8
        if (is_consistent()) {
354
5
            src_logical_column = src_physical_col;
355
5
        } else {
356
3
            src_logical_column = _logical_converter->get_column(src_logical_type, dst_logical_col,
357
3
                                                                dst_logical_type);
358
3
        }
359
8
        const size_t src_old_rows = get_mutable_inner_column_size(src_logical_column);
360
8
        const size_t src_old_null_map_size =
361
8
                get_null_map_size_or_inner_column_size(src_logical_column);
362
8
        RETURN_IF_ERROR(physical_convert(src_physical_col, src_logical_column));
363
8
        const size_t src_new_rows =
364
8
                get_mutable_inner_column_size(src_logical_column) - src_old_rows;
365
8
        align_null_map(src_physical_col, src_logical_column, src_old_null_map_size, src_new_rows,
366
8
                       get_appended_null_map_start(src_physical_col, src_new_rows));
367
368
8
        dst_logical_col = IColumn::mutate(std::move(dst_logical_col));
369
8
        const size_t dst_old_rows = get_mutable_inner_column_size(dst_logical_col);
370
8
        const size_t dst_old_null_map_size =
371
8
                get_null_map_size_or_inner_column_size(dst_logical_col);
372
8
        auto converted_column = dst_logical_col->assert_mutable();
373
8
        RETURN_IF_ERROR(_logical_converter->convert(src_logical_column, converted_column));
374
8
        const size_t dst_new_rows = get_mutable_inner_column_size(dst_logical_col) - dst_old_rows;
375
8
        align_null_map(src_logical_column, dst_logical_col, dst_old_null_map_size, dst_new_rows,
376
8
                       get_appended_null_map_start(src_logical_column, dst_new_rows));
377
8
        return Status::OK();
378
8
    }
379
380
    virtual ColumnPtr get_physical_column(tparquet::Type::type src_physical_type,
381
                                          DataTypePtr src_logical_type,
382
                                          ColumnPtr& dst_logical_column,
383
                                          const DataTypePtr& dst_logical_type, bool is_dict_filter);
384
385
319
    DataTypePtr& get_physical_type() { return _cached_src_physical_type; }
386
387
319
    bool read_directly_into_dst_logical_column() {
388
319
        return !_convert_params->is_type_compatibility && is_consistent() &&
389
319
               _logical_converter->is_consistent();
390
319
    }
391
392
131
    virtual bool is_consistent() { return false; }
393
394
372
    virtual bool support() { return true; }
395
396
0
    std::string get_error_msg() { return _error_msg; }
397
};
398
399
class ConsistentPhysicalConverter : public PhysicalToLogicalConverter {
400
911
    bool is_consistent() override { return true; }
401
};
402
403
class UnsupportedConverter : public PhysicalToLogicalConverter {
404
public:
405
0
    UnsupportedConverter(std::string error_msg) { _error_msg = error_msg; }
406
407
    UnsupportedConverter(tparquet::Type::type src_physical_type,
408
0
                         const DataTypePtr& src_logical_type) {
409
0
        std::string src_physical_str = tparquet::to_string(src_physical_type);
410
0
        std::string src_logical_str = src_logical_type->get_name();
411
0
        _error_msg = src_physical_str + " => " + src_logical_str;
412
0
    }
413
414
0
    bool support() override { return false; }
415
416
0
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
417
0
        return Status::InternalError("Unsupported physical to logical type: {}", _error_msg);
418
0
    }
419
};
420
421
// for tinyint, smallint
422
template <PrimitiveType IntPrimitiveType>
423
class LittleIntPhysicalConverter : public PhysicalToLogicalConverter {
424
28
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
425
28
        using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType;
426
28
        using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType;
427
28
        ColumnPtr from_col = remove_nullable(src_physical_col);
428
28
        IColumn* to_col = get_mutable_inner_column(src_logical_column);
429
430
28
        size_t rows = from_col->size();
431
        // always comes from tparquet::Type::INT32
432
28
        auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data();
433
28
        size_t start_idx = to_col->size();
434
28
        to_col->resize(start_idx + rows);
435
28
        auto& data = assert_cast<DstColumnType&>(*to_col).get_data();
436
108
        for (int i = 0; i < rows; ++i) {
437
80
            data[start_idx + i] = static_cast<DstCppType>(src_data[i]);
438
80
        }
439
440
28
        return Status::OK();
441
28
    }
_ZN5doris7parquet26LittleIntPhysicalConverterILNS_13PrimitiveTypeE3EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Line
Count
Source
424
17
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
425
17
        using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType;
426
17
        using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType;
427
17
        ColumnPtr from_col = remove_nullable(src_physical_col);
428
17
        IColumn* to_col = get_mutable_inner_column(src_logical_column);
429
430
17
        size_t rows = from_col->size();
431
        // always comes from tparquet::Type::INT32
432
17
        auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data();
433
17
        size_t start_idx = to_col->size();
434
17
        to_col->resize(start_idx + rows);
435
17
        auto& data = assert_cast<DstColumnType&>(*to_col).get_data();
436
59
        for (int i = 0; i < rows; ++i) {
437
42
            data[start_idx + i] = static_cast<DstCppType>(src_data[i]);
438
42
        }
439
440
17
        return Status::OK();
441
17
    }
_ZN5doris7parquet26LittleIntPhysicalConverterILNS_13PrimitiveTypeE4EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Line
Count
Source
424
11
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
425
11
        using DstCppType = typename PrimitiveTypeTraits<IntPrimitiveType>::CppType;
426
11
        using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType;
427
11
        ColumnPtr from_col = remove_nullable(src_physical_col);
428
11
        IColumn* to_col = get_mutable_inner_column(src_logical_column);
429
430
11
        size_t rows = from_col->size();
431
        // always comes from tparquet::Type::INT32
432
11
        auto& src_data = assert_cast<const ColumnInt32*>(from_col.get())->get_data();
433
11
        size_t start_idx = to_col->size();
434
11
        to_col->resize(start_idx + rows);
435
11
        auto& data = assert_cast<DstColumnType&>(*to_col).get_data();
436
49
        for (int i = 0; i < rows; ++i) {
437
38
            data[start_idx + i] = static_cast<DstCppType>(src_data[i]);
438
38
        }
439
440
11
        return Status::OK();
441
11
    }
442
};
443
444
template <PrimitiveType type>
445
struct UnsignedTypeTraits;
446
447
template <>
448
struct UnsignedTypeTraits<TYPE_SMALLINT> {
449
    using UnsignedCppType = UInt8;
450
    //https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#unsigned-integers
451
    //INT(8, false), INT(16, false), and INT(32, false) must annotate an int32 primitive type and INT(64, false)
452
    //must annotate an int64 primitive type.
453
    using StorageCppType = Int32;
454
    using StorageColumnType = ColumnInt32;
455
};
456
457
template <>
458
struct UnsignedTypeTraits<TYPE_INT> {
459
    using UnsignedCppType = UInt16;
460
    using StorageCppType = Int32;
461
    using StorageColumnType = ColumnInt32;
462
};
463
464
template <>
465
struct UnsignedTypeTraits<TYPE_BIGINT> {
466
    using UnsignedCppType = UInt32;
467
    using StorageCppType = Int32;
468
    using StorageColumnType = ColumnInt32;
469
};
470
471
template <>
472
struct UnsignedTypeTraits<TYPE_LARGEINT> {
473
    using UnsignedCppType = UInt64;
474
    using StorageCppType = Int64;
475
    using StorageColumnType = ColumnInt64;
476
};
477
478
template <PrimitiveType IntPrimitiveType>
479
class UnsignedIntegerConverter : public PhysicalToLogicalConverter {
480
0
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
481
0
        using UnsignedCppType = typename UnsignedTypeTraits<IntPrimitiveType>::UnsignedCppType;
482
0
        using StorageCppType = typename UnsignedTypeTraits<IntPrimitiveType>::StorageCppType;
483
0
        using StorageColumnType = typename UnsignedTypeTraits<IntPrimitiveType>::StorageColumnType;
484
0
        using DstColumnType = typename PrimitiveTypeTraits<IntPrimitiveType>::ColumnType;
485
486
0
        ColumnPtr from_col = remove_nullable(src_physical_col);
487
0
        IColumn* to_col = get_mutable_inner_column(src_logical_column);
488
0
        auto& src_data = assert_cast<const StorageColumnType*>(from_col.get())->get_data();
489
490
0
        size_t rows = src_data.size();
491
0
        size_t start_idx = to_col->size();
492
0
        to_col->resize(start_idx + rows);
493
0
        auto& data = assert_cast<DstColumnType&>(*to_col).get_data();
494
495
0
        for (int i = 0; i < rows; i++) {
496
0
            StorageCppType src_value = src_data[i];
497
0
            auto unsigned_value = static_cast<UnsignedCppType>(src_value);
498
0
            data[start_idx + i] = unsigned_value;
499
0
        }
500
501
0
        return Status::OK();
502
0
    }
Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE4EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE5EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE6EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet24UnsignedIntegerConverterILNS_13PrimitiveTypeE7EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
503
};
504
505
class FixedSizeBinaryConverter : public PhysicalToLogicalConverter {
506
private:
507
    int _type_length;
508
509
public:
510
1
    FixedSizeBinaryConverter(int type_length) : _type_length(type_length) {}
511
512
1
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
513
1
        ColumnPtr from_col = remove_nullable(src_physical_col);
514
1
        IColumn* to_col = get_mutable_inner_column(src_logical_column);
515
516
1
        const auto src_data = get_fixed_length_physical_data(*from_col, _type_length);
517
1
        size_t length = src_data.byte_size;
518
1
        size_t num_values = src_data.rows;
519
1
        auto& string_col = static_cast<ColumnString&>(*to_col);
520
1
        auto& offsets = string_col.get_offsets();
521
1
        auto& chars = string_col.get_chars();
522
523
1
        size_t origin_size = chars.size();
524
1
        chars.resize(origin_size + length);
525
1
        memcpy(chars.data() + origin_size, src_data.data, length);
526
527
1
        origin_size = offsets.size();
528
1
        offsets.resize(origin_size + num_values);
529
1
        auto end_offset = offsets[origin_size - 1];
530
4
        for (int i = 0; i < num_values; ++i) {
531
3
            end_offset += _type_length;
532
3
            offsets[origin_size + i] = end_offset;
533
3
        }
534
535
1
        return Status::OK();
536
1
    }
537
};
538
539
class Float16PhysicalConverter : public PhysicalToLogicalConverter {
540
private:
541
    int _type_length;
542
543
public:
544
0
    Float16PhysicalConverter(int type_length) : _type_length(type_length) {
545
0
        DCHECK_EQ(_type_length, 2);
546
0
    }
547
548
0
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
549
0
        ColumnPtr from_col = remove_nullable(src_physical_col);
550
0
        IColumn* to_col = get_mutable_inner_column(src_logical_column);
551
552
0
        const auto src_data = get_fixed_length_physical_data(*from_col, _type_length);
553
0
        size_t num_values = src_data.rows;
554
0
        auto* to_float_column = assert_cast<ColumnFloat32*>(to_col);
555
0
        size_t start_idx = to_float_column->size();
556
0
        to_float_column->resize(start_idx + num_values);
557
0
        auto& to_float_column_data = to_float_column->get_data();
558
0
        const auto* ptr = src_data.data;
559
0
        for (int i = 0; i < num_values; ++i) {
560
0
            size_t offset = i * _type_length;
561
0
            const auto* data_ptr = ptr + offset;
562
0
            uint16_t raw;
563
0
            memcpy(&raw, data_ptr, sizeof(uint16_t));
564
0
            float value = half_to_float(raw);
565
0
            to_float_column_data[start_idx + i] = value;
566
0
        }
567
568
0
        return Status::OK();
569
0
    }
570
571
0
    float half_to_float(uint16_t h) {
572
        // uint16_t h: half precision floating point
573
        // bit 15:       sign(1 bit)
574
        // bits 14..10 : exponent(5 bits)
575
        // bits 9..0   : mantissa(10 bits)
576
577
        // sign bit placed to float32 bit31
578
0
        uint32_t sign = (h & 0x8000U) << 16; // 0x8000 << 16 = 0x8000_0000
579
        // exponent:(5 bits)
580
0
        uint32_t exp = (h & 0x7C00U) >> 10; // 0x7C00 = 0111 1100 0000 (half exponent mask)
581
        // mantissa(10 bits)
582
0
        uint32_t mant = (h & 0x03FFU); // 10-bit fraction
583
584
        // cases:Zero/Subnormal, Normal, Inf/NaN
585
0
        if (exp == 0) {
586
            // exp==0: Zero or Subnormal ----------
587
0
            if (mant == 0) {
588
                // ±0.0
589
                // sign = either 0x00000000 or 0x80000000
590
0
                return std::bit_cast<float>(sign);
591
0
            } else {
592
                // ---------- Subnormal ----------
593
                // half subnormal:
594
                //    value = (-1)^sign * (mant / 2^10) * 2^(1 - bias)
595
                // half bias = 15 → exponent = 1 - 15 = -14
596
0
                float f = (static_cast<float>(mant) / 1024.0F) * std::powf(2.0F, -14.0F);
597
0
                return sign ? -f : f;
598
0
            }
599
0
        } else if (exp == 0x1F) {
600
            // exp==31: Inf or NaN ----------
601
            // float32:
602
            //    exponent = 255 (0xFF)
603
            //    mantissa = mant << 13
604
0
            uint32_t f = sign | 0x7F800000U | (mant << 13);
605
0
            return std::bit_cast<float>(f);
606
0
        } else {
607
            // Normalized ----------
608
            // float32 exponent:
609
            //   exp32 = exp16 - bias16 + bias32
610
            //   bias16 = 15
611
            //   bias32 = 127
612
            //
613
            // so: exp32 = exp + (127 - 15)
614
0
            uint32_t f = sign | ((exp + (127 - 15)) << 23) // place to float32 exponent
615
0
                         | (mant << 13);                   // mantissa align to 23 bits
616
0
            return std::bit_cast<float>(f);
617
0
        }
618
0
    }
619
};
620
621
class UUIDVarBinaryConverter : public PhysicalToLogicalConverter {
622
public:
623
1
    UUIDVarBinaryConverter(int type_length) : _type_length(type_length) {}
624
625
1
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
626
1
        DCHECK(!is_column_const(*src_physical_col)) << src_physical_col->dump_structure();
627
1
        DCHECK(!is_column_const(*src_logical_column)) << src_logical_column->dump_structure();
628
1
        const ColumnPtr from_col = remove_nullable(src_physical_col);
629
1
        const auto src_data = get_fixed_length_physical_data(*from_col, _type_length);
630
631
1
        IColumn* to_col = get_mutable_inner_column(src_logical_column);
632
1
        auto* to_varbinary_column = assert_cast<ColumnVarbinary*>(to_col);
633
1
        size_t num_values = src_data.rows;
634
1
        const auto* ptr = src_data.data;
635
636
4
        for (int i = 0; i < num_values; ++i) {
637
3
            auto offset = i * _type_length;
638
3
            const char* data_ptr = reinterpret_cast<const char*>(ptr + offset);
639
3
            to_varbinary_column->insert_data(data_ptr, _type_length);
640
3
        }
641
1
        return Status::OK();
642
1
    }
643
644
private:
645
    int _type_length;
646
};
647
648
template <PrimitiveType DecimalPType>
649
class FixedSizeToDecimal : public PhysicalToLogicalConverter {
650
public:
651
    using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType;
652
2
    FixedSizeToDecimal(int32_t type_length) : _type_length(type_length) {}
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EEC2Ei
_ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EEC2Ei
Line
Count
Source
652
2
    FixedSizeToDecimal(int32_t type_length) : _type_length(type_length) {}
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EEC2Ei
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EEC2Ei
653
654
2
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
655
2
        ColumnPtr src_col = remove_nullable(src_physical_col);
656
2
        IColumn* dst_col = get_mutable_inner_column(src_logical_column);
657
658
2
#define M(FixedTypeLength, ValueCopyType) \
659
2
    case FixedTypeLength:                 \
660
2
        return _convert_internal<FixedTypeLength, ValueCopyType>(src_col, dst_col);
661
662
2
#define APPLY_FOR_DECIMALS() \
663
2
    M(1, int64_t)            \
664
0
    M(2, int64_t)            \
665
0
    M(3, int64_t)            \
666
0
    M(4, int64_t)            \
667
1
    M(5, int64_t)            \
668
1
    M(6, int64_t)            \
669
0
    M(7, int64_t)            \
670
1
    M(8, int64_t)            \
671
1
    M(9, int128_t)           \
672
0
    M(10, int128_t)          \
673
0
    M(11, int128_t)          \
674
0
    M(12, int128_t)          \
675
0
    M(13, int128_t)          \
676
0
    M(14, int128_t)          \
677
0
    M(15, int128_t)          \
678
0
    M(16, int128_t)          \
679
0
    M(17, wide::Int256)      \
680
0
    M(18, wide::Int256)      \
681
0
    M(19, wide::Int256)      \
682
0
    M(20, wide::Int256)      \
683
0
    M(21, wide::Int256)      \
684
0
    M(22, wide::Int256)      \
685
0
    M(23, wide::Int256)      \
686
0
    M(24, wide::Int256)      \
687
0
    M(25, wide::Int256)      \
688
0
    M(26, wide::Int256)      \
689
0
    M(27, wide::Int256)      \
690
0
    M(28, wide::Int256)      \
691
0
    M(29, wide::Int256)      \
692
0
    M(30, wide::Int256)      \
693
0
    M(31, wide::Int256)      \
694
0
    M(32, wide::Int256)
695
696
2
        switch (_type_length) {
697
0
            APPLY_FOR_DECIMALS()
698
0
        default:
699
0
            throw Exception(Status::FatalError("__builtin_unreachable"));
700
2
        }
701
0
        return Status::OK();
702
2
#undef APPLY_FOR_DECIMALS
703
2
#undef M
704
2
    }
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
_ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Line
Count
Source
654
2
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
655
2
        ColumnPtr src_col = remove_nullable(src_physical_col);
656
2
        IColumn* dst_col = get_mutable_inner_column(src_logical_column);
657
658
2
#define M(FixedTypeLength, ValueCopyType) \
659
2
    case FixedTypeLength:                 \
660
2
        return _convert_internal<FixedTypeLength, ValueCopyType>(src_col, dst_col);
661
662
2
#define APPLY_FOR_DECIMALS() \
663
2
    M(1, int64_t)            \
664
2
    M(2, int64_t)            \
665
2
    M(3, int64_t)            \
666
2
    M(4, int64_t)            \
667
2
    M(5, int64_t)            \
668
2
    M(6, int64_t)            \
669
2
    M(7, int64_t)            \
670
2
    M(8, int64_t)            \
671
2
    M(9, int128_t)           \
672
2
    M(10, int128_t)          \
673
2
    M(11, int128_t)          \
674
2
    M(12, int128_t)          \
675
2
    M(13, int128_t)          \
676
2
    M(14, int128_t)          \
677
2
    M(15, int128_t)          \
678
2
    M(16, int128_t)          \
679
2
    M(17, wide::Int256)      \
680
2
    M(18, wide::Int256)      \
681
2
    M(19, wide::Int256)      \
682
2
    M(20, wide::Int256)      \
683
2
    M(21, wide::Int256)      \
684
2
    M(22, wide::Int256)      \
685
2
    M(23, wide::Int256)      \
686
2
    M(24, wide::Int256)      \
687
2
    M(25, wide::Int256)      \
688
2
    M(26, wide::Int256)      \
689
2
    M(27, wide::Int256)      \
690
2
    M(28, wide::Int256)      \
691
2
    M(29, wide::Int256)      \
692
2
    M(30, wide::Int256)      \
693
2
    M(31, wide::Int256)      \
694
2
    M(32, wide::Int256)
695
696
2
        switch (_type_length) {
697
0
            APPLY_FOR_DECIMALS()
698
0
        default:
699
0
            throw Exception(Status::FatalError("__builtin_unreachable"));
700
2
        }
701
0
        return Status::OK();
702
2
#undef APPLY_FOR_DECIMALS
703
2
#undef M
704
2
    }
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
705
706
    template <int fixed_type_length, typename ValueCopyType>
707
2
    Status _convert_internal(ColumnPtr& src_col, IColumn* dst_col) {
708
2
        const auto src_data = get_fixed_length_physical_data(*src_col, fixed_type_length);
709
2
        size_t rows = src_data.rows;
710
2
        const auto* buf = src_data.data;
711
2
        size_t start_idx = dst_col->size();
712
2
        dst_col->resize(start_idx + rows);
713
714
2
        auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col)->get_data();
715
2
        size_t offset = 0;
716
6
        for (int i = 0; i < rows; i++) {
717
            // When Decimal in parquet is stored in byte arrays, binary and fixed,
718
            // the unscaled number must be encoded as two's complement using big-endian byte order.
719
4
            ValueCopyType value = 0;
720
4
            memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value));
721
4
            offset += fixed_type_length;
722
4
            value = to_endian<std::endian::big>(value);
723
4
            value = value >> ((sizeof(value) - fixed_type_length) * 8);
724
4
            auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]);
725
4
            v = (DecimalType)value;
726
4
        }
727
728
2
        return Status::OK();
729
2
    }
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE28EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
_ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Line
Count
Source
707
1
    Status _convert_internal(ColumnPtr& src_col, IColumn* dst_col) {
708
1
        const auto src_data = get_fixed_length_physical_data(*src_col, fixed_type_length);
709
1
        size_t rows = src_data.rows;
710
1
        const auto* buf = src_data.data;
711
1
        size_t start_idx = dst_col->size();
712
1
        dst_col->resize(start_idx + rows);
713
714
1
        auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col)->get_data();
715
1
        size_t offset = 0;
716
3
        for (int i = 0; i < rows; i++) {
717
            // When Decimal in parquet is stored in byte arrays, binary and fixed,
718
            // the unscaled number must be encoded as two's complement using big-endian byte order.
719
2
            ValueCopyType value = 0;
720
2
            memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value));
721
2
            offset += fixed_type_length;
722
2
            value = to_endian<std::endian::big>(value);
723
2
            value = value >> ((sizeof(value) - fixed_type_length) * 8);
724
2
            auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]);
725
2
            v = (DecimalType)value;
726
2
        }
727
728
1
        return Status::OK();
729
1
    }
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
_ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Line
Count
Source
707
1
    Status _convert_internal(ColumnPtr& src_col, IColumn* dst_col) {
708
1
        const auto src_data = get_fixed_length_physical_data(*src_col, fixed_type_length);
709
1
        size_t rows = src_data.rows;
710
1
        const auto* buf = src_data.data;
711
1
        size_t start_idx = dst_col->size();
712
1
        dst_col->resize(start_idx + rows);
713
714
1
        auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col)->get_data();
715
1
        size_t offset = 0;
716
3
        for (int i = 0; i < rows; i++) {
717
            // When Decimal in parquet is stored in byte arrays, binary and fixed,
718
            // the unscaled number must be encoded as two's complement using big-endian byte order.
719
2
            ValueCopyType value = 0;
720
2
            memcpy(reinterpret_cast<char*>(&value), buf + offset, sizeof(value));
721
2
            offset += fixed_type_length;
722
2
            value = to_endian<std::endian::big>(value);
723
2
            value = value >> ((sizeof(value) - fixed_type_length) * 8);
724
2
            auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]);
725
2
            v = (DecimalType)value;
726
2
        }
727
728
1
        return Status::OK();
729
1
    }
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE29EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE30EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi1ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi2ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi3ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi4ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi5ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi6ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi7ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi8ElEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi9EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi10EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi11EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi12EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi13EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi14EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi15EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi16EnEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrIS7_EEPS7_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi17EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi18EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi19EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi20EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi21EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi22EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi23EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi24EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi25EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi26EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi27EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi28EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi29EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi30EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi31EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
Unexecuted instantiation: _ZN5doris7parquet18FixedSizeToDecimalILNS_13PrimitiveTypeE35EE17_convert_internalILi32EN4wide7integerILm256EiEEEENS_6StatusERNS_3COWINS_7IColumnEE13immutable_ptrISA_EEPSA_
730
731
private:
732
    int32_t _type_length;
733
};
734
735
template <PrimitiveType DecimalPType>
736
class StringToDecimal : public PhysicalToLogicalConverter {
737
    using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType;
738
0
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
739
0
        using ValueCopyType = DecimalType::NativeType;
740
0
        ColumnPtr src_col = remove_nullable(src_physical_col);
741
0
        IColumn* dst_col = get_mutable_inner_column(src_logical_column);
742
743
0
        size_t rows = src_col->size();
744
0
        auto buf = static_cast<const ColumnString*>(src_col.get())->get_chars().data();
745
0
        auto& offset = static_cast<const ColumnString*>(src_col.get())->get_offsets();
746
0
        size_t start_idx = dst_col->size();
747
0
        dst_col->resize(start_idx + rows);
748
749
0
        auto& data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col)->get_data();
750
0
        for (int i = 0; i < rows; i++) {
751
0
            size_t len = offset[i] - offset[i - 1];
752
            // When Decimal in parquet is stored in byte arrays, binary and fixed,
753
            // the unscaled number must be encoded as two's complement using big-endian byte order.
754
0
            ValueCopyType value = 0;
755
0
            if (len > 0) {
756
0
                memcpy(reinterpret_cast<char*>(&value), buf + offset[i - 1], len);
757
0
                value = to_endian<std::endian::big>(value);
758
0
                value = value >> ((sizeof(value) - len) * 8);
759
0
            }
760
0
            auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]);
761
0
            v = (DecimalType)value;
762
0
        }
763
764
0
        return Status::OK();
765
0
    }
Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet15StringToDecimalILNS_13PrimitiveTypeE35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
766
};
767
768
template <PrimitiveType NumberType, PrimitiveType DecimalPType>
769
class NumberToDecimal : public PhysicalToLogicalConverter {
770
    using DecimalType = typename PrimitiveTypeTraits<DecimalPType>::CppType;
771
2
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
772
2
        using ValueCopyType = typename DecimalType::NativeType;
773
2
        ColumnPtr src_col = remove_nullable(src_physical_col);
774
2
        IColumn* dst_col = get_mutable_inner_column(src_logical_column);
775
776
2
        size_t rows = src_col->size();
777
2
        auto* src_data =
778
2
                static_cast<const ColumnVector<NumberType>*>(src_col.get())->get_data().data();
779
2
        size_t start_idx = dst_col->size();
780
2
        dst_col->resize(start_idx + rows);
781
782
2
        auto* data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col)->get_data().data();
783
784
24
        for (int i = 0; i < rows; i++) {
785
22
            ValueCopyType value;
786
22
            if constexpr (std::is_same_v<DecimalType, Decimal256>) {
787
0
                value = src_data[i];
788
22
            } else {
789
22
                value = cast_set<ValueCopyType, typename PrimitiveTypeTraits<NumberType>::CppType,
790
22
                                 false>(src_data[i]);
791
22
            }
792
793
22
            data[start_idx + i] = (DecimalType)value;
794
22
        }
795
2
        return Status::OK();
796
2
    }
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_28EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
_ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_29EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Line
Count
Source
771
2
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
772
2
        using ValueCopyType = typename DecimalType::NativeType;
773
2
        ColumnPtr src_col = remove_nullable(src_physical_col);
774
2
        IColumn* dst_col = get_mutable_inner_column(src_logical_column);
775
776
2
        size_t rows = src_col->size();
777
2
        auto* src_data =
778
2
                static_cast<const ColumnVector<NumberType>*>(src_col.get())->get_data().data();
779
2
        size_t start_idx = dst_col->size();
780
2
        dst_col->resize(start_idx + rows);
781
782
2
        auto* data = static_cast<ColumnDecimal<DecimalPType>*>(dst_col)->get_data().data();
783
784
24
        for (int i = 0; i < rows; i++) {
785
22
            ValueCopyType value;
786
            if constexpr (std::is_same_v<DecimalType, Decimal256>) {
787
                value = src_data[i];
788
22
            } else {
789
22
                value = cast_set<ValueCopyType, typename PrimitiveTypeTraits<NumberType>::CppType,
790
22
                                 false>(src_data[i]);
791
22
            }
792
793
22
            data[start_idx + i] = (DecimalType)value;
794
22
        }
795
2
        return Status::OK();
796
2
    }
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_30EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE5ELS2_35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
Unexecuted instantiation: _ZN5doris7parquet15NumberToDecimalILNS_13PrimitiveTypeE6ELS2_35EE16physical_convertERNS_3COWINS_7IColumnEE13immutable_ptrIS5_EES9_
797
};
798
799
class Int32ToDate : public PhysicalToLogicalConverter {
800
7
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
801
7
        ColumnPtr src_col = remove_nullable(src_physical_col);
802
7
        IColumn* dst_col = get_mutable_inner_column(src_logical_column);
803
804
7
        size_t rows = src_col->size();
805
7
        size_t start_idx = dst_col->size();
806
7
        dst_col->reserve(start_idx + rows);
807
808
7
        auto& src_data = static_cast<const ColumnInt32*>(src_col.get())->get_data();
809
7
        auto& data = static_cast<ColumnDateV2*>(dst_col)->get_data();
810
7
        date_day_offset_dict& date_dict = date_day_offset_dict::get();
811
812
59
        for (int i = 0; i < rows; i++) {
813
52
            data.push_back_without_reserve(date_dict[src_data[i]].to_date_int_val());
814
52
        }
815
816
7
        return Status::OK();
817
7
    }
818
};
819
820
struct Int64ToTimestamp : public PhysicalToLogicalConverter {
821
5
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
822
5
        ColumnPtr src_col = remove_nullable(src_physical_col);
823
5
        IColumn* dst_col = get_mutable_inner_column(src_logical_column);
824
825
5
        size_t rows = src_col->size();
826
5
        size_t start_idx = dst_col->size();
827
5
        dst_col->resize(start_idx + rows);
828
829
5
        auto src_data = static_cast<const ColumnInt64*>(src_col.get())->get_data().data();
830
5
        auto& data = static_cast<ColumnDateTimeV2*>(dst_col)->get_data();
831
832
51
        for (int i = 0; i < rows; i++) {
833
46
            int64_t x = src_data[i];
834
46
            auto& num = data[start_idx + i];
835
46
            auto& value = reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(num);
836
46
            const int64_t epoch_seconds = x / _convert_params->second_mask;
837
46
            if (_convert_params->is_fixed_offset) {
838
46
                if (!detail::try_convert_timestamp_with_fixed_offset(
839
46
                            value, epoch_seconds, _convert_params->fixed_offset_seconds)) {
840
0
                    value.from_unixtime(epoch_seconds, *_convert_params->ctz);
841
0
                }
842
46
            } else if (!detail::try_convert_timestamp_with_lookup(value, epoch_seconds,
843
0
                                                                  *_convert_params->ctz)) {
844
0
                value.from_unixtime(epoch_seconds, *_convert_params->ctz);
845
0
            }
846
46
            value.set_microsecond((x % _convert_params->second_mask) *
847
46
                                  (_convert_params->scale_to_nano_factor / 1000));
848
46
        }
849
5
        return Status::OK();
850
5
    }
851
};
852
853
struct Int64ToTimestampTz : public PhysicalToLogicalConverter {
854
0
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
855
0
        ColumnPtr src_col = remove_nullable(src_physical_col);
856
0
        IColumn* dst_col = get_mutable_inner_column(src_logical_column);
857
858
0
        size_t rows = src_col->size();
859
0
        size_t start_idx = dst_col->size();
860
0
        dst_col->resize(start_idx + rows);
861
862
0
        const auto& src_data = assert_cast<const ColumnInt64*>(src_col.get())->get_data();
863
0
        auto& dest_data = assert_cast<ColumnTimeStampTz*>(dst_col)->get_data();
864
0
        static const cctz::time_zone UTC = cctz::utc_time_zone();
865
866
0
        for (int i = 0; i < rows; i++) {
867
0
            int64_t x = src_data[i];
868
0
            auto& tz = dest_data[start_idx + i];
869
0
            tz.from_unixtime(x / _convert_params->second_mask, UTC);
870
0
            tz.set_microsecond((x % _convert_params->second_mask) *
871
0
                               (_convert_params->scale_to_nano_factor / 1000));
872
0
        }
873
0
        return Status::OK();
874
0
    }
875
};
876
877
struct Int96toTimestamp : public PhysicalToLogicalConverter {
878
0
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
879
0
        ColumnPtr src_col = remove_nullable(src_physical_col);
880
0
        IColumn* dst_col = get_mutable_inner_column(src_logical_column);
881
882
0
        size_t rows = src_col->size() / sizeof(ParquetInt96);
883
0
        auto& src_data = static_cast<const ColumnInt8*>(src_col.get())->get_data();
884
0
        auto ParquetInt96_data = (ParquetInt96*)src_data.data();
885
0
        size_t start_idx = dst_col->size();
886
0
        dst_col->resize(start_idx + rows);
887
0
        auto& data = static_cast<ColumnDateTimeV2*>(dst_col)->get_data();
888
889
0
        for (int i = 0; i < rows; i++) {
890
0
            ParquetInt96 src_cell_data = ParquetInt96_data[i];
891
0
            auto& dst_value =
892
0
                    reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(data[start_idx + i]);
893
894
0
            int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros();
895
0
            const int64_t epoch_seconds = timestamp_with_micros / 1000000;
896
0
            if (_convert_params->is_fixed_offset) {
897
0
                if (!detail::try_convert_timestamp_with_fixed_offset(
898
0
                            dst_value, epoch_seconds, _convert_params->fixed_offset_seconds)) {
899
0
                    dst_value.from_unixtime(epoch_seconds, *_convert_params->ctz);
900
0
                }
901
0
            } else if (!detail::try_convert_timestamp_with_lookup(dst_value, epoch_seconds,
902
0
                                                                  *_convert_params->ctz)) {
903
0
                dst_value.from_unixtime(epoch_seconds, *_convert_params->ctz);
904
0
            }
905
0
            dst_value.set_microsecond(timestamp_with_micros % 1000000);
906
0
        }
907
0
        return Status::OK();
908
0
    }
909
};
910
911
struct Int96toTimestampTz : public PhysicalToLogicalConverter {
912
0
    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override {
913
0
        ColumnPtr src_col = remove_nullable(src_physical_col);
914
0
        IColumn* dst_col = get_mutable_inner_column(src_logical_column);
915
916
0
        size_t rows = src_col->size() / sizeof(ParquetInt96);
917
0
        const auto& src_data = assert_cast<const ColumnInt8*>(src_col.get())->get_data();
918
0
        auto* ParquetInt96_data = (ParquetInt96*)src_data.data();
919
0
        size_t start_idx = dst_col->size();
920
0
        dst_col->resize(start_idx + rows);
921
0
        auto& data = assert_cast<ColumnTimeStampTz*>(dst_col)->get_data();
922
0
        static const cctz::time_zone UTC = cctz::utc_time_zone();
923
924
0
        for (int i = 0; i < rows; i++) {
925
0
            ParquetInt96 src_cell_data = ParquetInt96_data[i];
926
0
            int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros();
927
0
            auto& tz = data[start_idx + i];
928
0
            tz.from_unixtime(timestamp_with_micros / 1000000, UTC);
929
0
            tz.set_microsecond(timestamp_with_micros % 1000000);
930
0
        }
931
0
        return Status::OK();
932
0
    }
933
};
934
935
} // namespace doris::parquet