Coverage Report

Created: 2026-04-22 07:12

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/parquet/decoder.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "format/parquet/decoder.h"
19
20
#include <cctz/time_zone.h>
21
#include <gen_cpp/parquet_types.h>
22
23
#include "format/parquet/bool_plain_decoder.h"
24
#include "format/parquet/bool_rle_decoder.h"
25
#include "format/parquet/byte_array_dict_decoder.h"
26
#include "format/parquet/byte_array_plain_decoder.h"
27
#include "format/parquet/byte_stream_split_decoder.h"
28
#include "format/parquet/delta_bit_pack_decoder.h"
29
#include "format/parquet/fix_length_dict_decoder.hpp"
30
#include "format/parquet/fix_length_plain_decoder.h"
31
32
namespace doris {
33
Status Decoder::get_decoder(tparquet::Type::type type, tparquet::Encoding::type encoding,
34
163k
                            std::unique_ptr<Decoder>& decoder) {
35
163k
    switch (encoding) {
36
70.5k
    case tparquet::Encoding::PLAIN:
37
70.5k
        switch (type) {
38
5.35k
        case tparquet::Type::BOOLEAN:
39
5.35k
            decoder.reset(new BoolPlainDecoder());
40
5.35k
            break;
41
22.7k
        case tparquet::Type::BYTE_ARRAY:
42
22.7k
            decoder.reset(new ByteArrayPlainDecoder());
43
22.7k
            break;
44
21.2k
        case tparquet::Type::INT32:
45
31.6k
        case tparquet::Type::INT64:
46
31.7k
        case tparquet::Type::INT96:
47
34.0k
        case tparquet::Type::FLOAT:
48
38.1k
        case tparquet::Type::DOUBLE:
49
42.5k
        case tparquet::Type::FIXED_LEN_BYTE_ARRAY:
50
42.5k
            decoder.reset(new FixLengthPlainDecoder());
51
42.5k
            break;
52
0
        default:
53
0
            return Status::InternalError("Unsupported type {}(encoding={}) in parquet decoder",
54
0
                                         tparquet::to_string(type), tparquet::to_string(encoding));
55
70.5k
        }
56
70.5k
        break;
57
91.2k
    case tparquet::Encoding::RLE_DICTIONARY:
58
91.2k
        switch (type) {
59
0
        case tparquet::Type::BOOLEAN:
60
0
            return Status::InternalError("Bool type can't has dictionary page");
61
28.3k
        case tparquet::Type::BYTE_ARRAY:
62
28.3k
            decoder.reset(new ByteArrayDictDecoder());
63
28.3k
            break;
64
27.0k
        case tparquet::Type::INT32:
65
27.0k
            decoder.reset(new FixLengthDictDecoder<tparquet::Type::INT32>());
66
27.0k
            break;
67
12.5k
        case tparquet::Type::INT64:
68
12.5k
            decoder.reset(new FixLengthDictDecoder<tparquet::Type::INT64>());
69
12.5k
            break;
70
1.64k
        case tparquet::Type::INT96:
71
1.64k
            decoder.reset(new FixLengthDictDecoder<tparquet::Type::INT96>());
72
1.64k
            break;
73
3.38k
        case tparquet::Type::FLOAT:
74
3.38k
            decoder.reset(new FixLengthDictDecoder<tparquet::Type::FLOAT>());
75
3.38k
            break;
76
8.07k
        case tparquet::Type::DOUBLE:
77
8.07k
            decoder.reset(new FixLengthDictDecoder<tparquet::Type::DOUBLE>());
78
8.07k
            break;
79
10.1k
        case tparquet::Type::FIXED_LEN_BYTE_ARRAY:
80
10.1k
            decoder.reset(new FixLengthDictDecoder<tparquet::Type::FIXED_LEN_BYTE_ARRAY>());
81
10.1k
            break;
82
0
        default:
83
0
            return Status::InternalError("Unsupported type {}(encoding={}) in parquet decoder",
84
0
                                         tparquet::to_string(type), tparquet::to_string(encoding));
85
91.2k
        }
86
91.2k
        break;
87
91.2k
    case tparquet::Encoding::RLE:
88
398
        switch (type) {
89
398
        case tparquet::Type::BOOLEAN:
90
398
            decoder.reset(new BoolRLEDecoder());
91
398
            break;
92
0
        default:
93
0
            return Status::InternalError("Unsupported type {}(encoding={}) in parquet decoder",
94
0
                                         tparquet::to_string(type), tparquet::to_string(encoding));
95
398
        }
96
398
        break;
97
1.05k
    case tparquet::Encoding::DELTA_BINARY_PACKED:
98
        // Supports only INT32 and INT64.
99
1.05k
        switch (type) {
100
72
        case tparquet::Type::INT32:
101
72
            decoder.reset(new DeltaBitPackDecoder<int32_t>());
102
72
            break;
103
978
        case tparquet::Type::INT64:
104
978
            decoder.reset(new DeltaBitPackDecoder<int64_t>());
105
978
            break;
106
0
        default:
107
0
            return Status::InternalError("DELTA_BINARY_PACKED only supports INT32 and INT64");
108
1.05k
        }
109
1.05k
        break;
110
1.05k
    case tparquet::Encoding::DELTA_BYTE_ARRAY:
111
180
        switch (type) {
112
180
        case tparquet::Type::BYTE_ARRAY:
113
180
        case tparquet::Type::FIXED_LEN_BYTE_ARRAY:
114
180
            decoder.reset(new DeltaByteArrayDecoder());
115
180
            break;
116
0
        default:
117
0
            return Status::InternalError(
118
0
                    "DELTA_BYTE_ARRAY only supports BYTE_ARRAY, FIXED_LEN_BYTE_ARRAY.");
119
180
        }
120
180
        break;
121
180
    case tparquet::Encoding::DELTA_LENGTH_BYTE_ARRAY:
122
28
        switch (type) {
123
28
        case tparquet::Type::BYTE_ARRAY:
124
28
            decoder.reset(new DeltaLengthByteArrayDecoder());
125
28
            break;
126
0
        default:
127
0
            return Status::InternalError("DELTA_LENGTH_BYTE_ARRAY only supports BYTE_ARRAY.");
128
28
        }
129
28
        break;
130
28
    case tparquet::Encoding::BYTE_STREAM_SPLIT:
131
22
        switch (type) {
132
2
        case tparquet::Type::INT32:
133
4
        case tparquet::Type::INT64:
134
4
        case tparquet::Type::INT96:
135
10
        case tparquet::Type::FLOAT:
136
16
        case tparquet::Type::DOUBLE:
137
22
        case tparquet::Type::FIXED_LEN_BYTE_ARRAY:
138
22
            decoder.reset(new ByteStreamSplitDecoder());
139
22
            break;
140
0
        default:
141
0
            return Status::InternalError("Unsupported type {}(encoding={}) in parquet decoder",
142
0
                                         tparquet::to_string(type), tparquet::to_string(encoding));
143
22
        }
144
22
        break;
145
22
    default:
146
0
        return Status::InternalError("Unsupported encoding {}(type={}) in parquet decoder",
147
0
                                     tparquet::to_string(encoding), tparquet::to_string(type));
148
163k
    }
149
163k
    return Status::OK();
150
163k
}
151
152
} // namespace doris