Coverage Report

Created: 2026-03-13 09:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/parquet/decoder.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "format/parquet/decoder.h"
19
20
#include <cctz/time_zone.h>
21
#include <gen_cpp/parquet_types.h>
22
23
#include "format/parquet/bool_plain_decoder.h"
24
#include "format/parquet/bool_rle_decoder.h"
25
#include "format/parquet/byte_array_dict_decoder.h"
26
#include "format/parquet/byte_array_plain_decoder.h"
27
#include "format/parquet/byte_stream_split_decoder.h"
28
#include "format/parquet/delta_bit_pack_decoder.h"
29
#include "format/parquet/fix_length_dict_decoder.hpp"
30
#include "format/parquet/fix_length_plain_decoder.h"
31
32
namespace doris {
33
#include "common/compile_check_begin.h"
34
Status Decoder::get_decoder(tparquet::Type::type type, tparquet::Encoding::type encoding,
35
169k
                            std::unique_ptr<Decoder>& decoder) {
36
169k
    switch (encoding) {
37
75.4k
    case tparquet::Encoding::PLAIN:
38
75.4k
        switch (type) {
39
5.48k
        case tparquet::Type::BOOLEAN:
40
5.48k
            decoder.reset(new BoolPlainDecoder());
41
5.48k
            break;
42
24.5k
        case tparquet::Type::BYTE_ARRAY:
43
24.5k
            decoder.reset(new ByteArrayPlainDecoder());
44
24.5k
            break;
45
22.5k
        case tparquet::Type::INT32:
46
33.2k
        case tparquet::Type::INT64:
47
33.3k
        case tparquet::Type::INT96:
48
35.7k
        case tparquet::Type::FLOAT:
49
39.8k
        case tparquet::Type::DOUBLE:
50
45.4k
        case tparquet::Type::FIXED_LEN_BYTE_ARRAY:
51
45.4k
            decoder.reset(new FixLengthPlainDecoder());
52
45.4k
            break;
53
0
        default:
54
0
            return Status::InternalError("Unsupported type {}(encoding={}) in parquet decoder",
55
0
                                         tparquet::to_string(type), tparquet::to_string(encoding));
56
75.4k
        }
57
75.4k
        break;
58
91.3k
    case tparquet::Encoding::RLE_DICTIONARY:
59
91.3k
        switch (type) {
60
0
        case tparquet::Type::BOOLEAN:
61
0
            return Status::InternalError("Bool type can't has dictionary page");
62
28.6k
        case tparquet::Type::BYTE_ARRAY:
63
28.6k
            decoder.reset(new ByteArrayDictDecoder());
64
28.6k
            break;
65
26.8k
        case tparquet::Type::INT32:
66
26.8k
            decoder.reset(new FixLengthDictDecoder<tparquet::Type::INT32>());
67
26.8k
            break;
68
13.0k
        case tparquet::Type::INT64:
69
13.0k
            decoder.reset(new FixLengthDictDecoder<tparquet::Type::INT64>());
70
13.0k
            break;
71
1.46k
        case tparquet::Type::INT96:
72
1.46k
            decoder.reset(new FixLengthDictDecoder<tparquet::Type::INT96>());
73
1.46k
            break;
74
3.45k
        case tparquet::Type::FLOAT:
75
3.45k
            decoder.reset(new FixLengthDictDecoder<tparquet::Type::FLOAT>());
76
3.45k
            break;
77
8.15k
        case tparquet::Type::DOUBLE:
78
8.15k
            decoder.reset(new FixLengthDictDecoder<tparquet::Type::DOUBLE>());
79
8.15k
            break;
80
9.66k
        case tparquet::Type::FIXED_LEN_BYTE_ARRAY:
81
9.66k
            decoder.reset(new FixLengthDictDecoder<tparquet::Type::FIXED_LEN_BYTE_ARRAY>());
82
9.66k
            break;
83
0
        default:
84
0
            return Status::InternalError("Unsupported type {}(encoding={}) in parquet decoder",
85
0
                                         tparquet::to_string(type), tparquet::to_string(encoding));
86
91.3k
        }
87
91.3k
        break;
88
91.3k
    case tparquet::Encoding::RLE:
89
766
        switch (type) {
90
766
        case tparquet::Type::BOOLEAN:
91
766
            decoder.reset(new BoolRLEDecoder());
92
766
            break;
93
0
        default:
94
0
            return Status::InternalError("Unsupported type {}(encoding={}) in parquet decoder",
95
0
                                         tparquet::to_string(type), tparquet::to_string(encoding));
96
766
        }
97
766
        break;
98
1.76k
    case tparquet::Encoding::DELTA_BINARY_PACKED:
99
        // Supports only INT32 and INT64.
100
1.76k
        switch (type) {
101
100
        case tparquet::Type::INT32:
102
100
            decoder.reset(new DeltaBitPackDecoder<int32_t>());
103
100
            break;
104
1.66k
        case tparquet::Type::INT64:
105
1.66k
            decoder.reset(new DeltaBitPackDecoder<int64_t>());
106
1.66k
            break;
107
0
        default:
108
0
            return Status::InternalError("DELTA_BINARY_PACKED only supports INT32 and INT64");
109
1.76k
        }
110
1.76k
        break;
111
1.76k
    case tparquet::Encoding::DELTA_BYTE_ARRAY:
112
260
        switch (type) {
113
260
        case tparquet::Type::BYTE_ARRAY:
114
260
        case tparquet::Type::FIXED_LEN_BYTE_ARRAY:
115
260
            decoder.reset(new DeltaByteArrayDecoder());
116
260
            break;
117
0
        default:
118
0
            return Status::InternalError(
119
0
                    "DELTA_BYTE_ARRAY only supports BYTE_ARRAY, FIXED_LEN_BYTE_ARRAY.");
120
260
        }
121
260
        break;
122
260
    case tparquet::Encoding::DELTA_LENGTH_BYTE_ARRAY:
123
28
        switch (type) {
124
28
        case tparquet::Type::BYTE_ARRAY:
125
28
            decoder.reset(new DeltaLengthByteArrayDecoder());
126
28
            break;
127
0
        default:
128
0
            return Status::InternalError("DELTA_LENGTH_BYTE_ARRAY only supports BYTE_ARRAY.");
129
28
        }
130
28
        break;
131
28
    case tparquet::Encoding::BYTE_STREAM_SPLIT:
132
22
        switch (type) {
133
2
        case tparquet::Type::INT32:
134
4
        case tparquet::Type::INT64:
135
4
        case tparquet::Type::INT96:
136
10
        case tparquet::Type::FLOAT:
137
16
        case tparquet::Type::DOUBLE:
138
22
        case tparquet::Type::FIXED_LEN_BYTE_ARRAY:
139
22
            decoder.reset(new ByteStreamSplitDecoder());
140
22
            break;
141
0
        default:
142
0
            return Status::InternalError("Unsupported type {}(encoding={}) in parquet decoder",
143
0
                                         tparquet::to_string(type), tparquet::to_string(encoding));
144
22
        }
145
22
        break;
146
22
    default:
147
0
        return Status::InternalError("Unsupported encoding {}(type={}) in parquet decoder",
148
0
                                     tparquet::to_string(encoding), tparquet::to_string(type));
149
169k
    }
150
169k
    return Status::OK();
151
169k
}
152
#include "common/compile_check_end.h"
153
154
} // namespace doris