be/src/format/parquet/decoder.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "format/parquet/decoder.h" |
19 | | |
20 | | #include <cctz/time_zone.h> |
21 | | #include <gen_cpp/parquet_types.h> |
22 | | |
23 | | #include "format/parquet/bool_plain_decoder.h" |
24 | | #include "format/parquet/bool_rle_decoder.h" |
25 | | #include "format/parquet/byte_array_dict_decoder.h" |
26 | | #include "format/parquet/byte_array_plain_decoder.h" |
27 | | #include "format/parquet/byte_stream_split_decoder.h" |
28 | | #include "format/parquet/delta_bit_pack_decoder.h" |
29 | | #include "format/parquet/fix_length_dict_decoder.hpp" |
30 | | #include "format/parquet/fix_length_plain_decoder.h" |
31 | | |
32 | | namespace doris { |
33 | | #include "common/compile_check_begin.h" |
34 | | Status Decoder::get_decoder(tparquet::Type::type type, tparquet::Encoding::type encoding, |
35 | 169k | std::unique_ptr<Decoder>& decoder) { |
36 | 169k | switch (encoding) { |
37 | 75.4k | case tparquet::Encoding::PLAIN: |
38 | 75.4k | switch (type) { |
39 | 5.48k | case tparquet::Type::BOOLEAN: |
40 | 5.48k | decoder.reset(new BoolPlainDecoder()); |
41 | 5.48k | break; |
42 | 24.5k | case tparquet::Type::BYTE_ARRAY: |
43 | 24.5k | decoder.reset(new ByteArrayPlainDecoder()); |
44 | 24.5k | break; |
45 | 22.5k | case tparquet::Type::INT32: |
46 | 33.2k | case tparquet::Type::INT64: |
47 | 33.3k | case tparquet::Type::INT96: |
48 | 35.7k | case tparquet::Type::FLOAT: |
49 | 39.8k | case tparquet::Type::DOUBLE: |
50 | 45.4k | case tparquet::Type::FIXED_LEN_BYTE_ARRAY: |
51 | 45.4k | decoder.reset(new FixLengthPlainDecoder()); |
52 | 45.4k | break; |
53 | 0 | default: |
54 | 0 | return Status::InternalError("Unsupported type {}(encoding={}) in parquet decoder", |
55 | 0 | tparquet::to_string(type), tparquet::to_string(encoding)); |
56 | 75.4k | } |
57 | 75.4k | break; |
58 | 91.3k | case tparquet::Encoding::RLE_DICTIONARY: |
59 | 91.3k | switch (type) { |
60 | 0 | case tparquet::Type::BOOLEAN: |
61 | 0 | return Status::InternalError("Bool type can't has dictionary page"); |
62 | 28.6k | case tparquet::Type::BYTE_ARRAY: |
63 | 28.6k | decoder.reset(new ByteArrayDictDecoder()); |
64 | 28.6k | break; |
65 | 26.8k | case tparquet::Type::INT32: |
66 | 26.8k | decoder.reset(new FixLengthDictDecoder<tparquet::Type::INT32>()); |
67 | 26.8k | break; |
68 | 13.0k | case tparquet::Type::INT64: |
69 | 13.0k | decoder.reset(new FixLengthDictDecoder<tparquet::Type::INT64>()); |
70 | 13.0k | break; |
71 | 1.46k | case tparquet::Type::INT96: |
72 | 1.46k | decoder.reset(new FixLengthDictDecoder<tparquet::Type::INT96>()); |
73 | 1.46k | break; |
74 | 3.45k | case tparquet::Type::FLOAT: |
75 | 3.45k | decoder.reset(new FixLengthDictDecoder<tparquet::Type::FLOAT>()); |
76 | 3.45k | break; |
77 | 8.15k | case tparquet::Type::DOUBLE: |
78 | 8.15k | decoder.reset(new FixLengthDictDecoder<tparquet::Type::DOUBLE>()); |
79 | 8.15k | break; |
80 | 9.66k | case tparquet::Type::FIXED_LEN_BYTE_ARRAY: |
81 | 9.66k | decoder.reset(new FixLengthDictDecoder<tparquet::Type::FIXED_LEN_BYTE_ARRAY>()); |
82 | 9.66k | break; |
83 | 0 | default: |
84 | 0 | return Status::InternalError("Unsupported type {}(encoding={}) in parquet decoder", |
85 | 0 | tparquet::to_string(type), tparquet::to_string(encoding)); |
86 | 91.3k | } |
87 | 91.3k | break; |
88 | 91.3k | case tparquet::Encoding::RLE: |
89 | 766 | switch (type) { |
90 | 766 | case tparquet::Type::BOOLEAN: |
91 | 766 | decoder.reset(new BoolRLEDecoder()); |
92 | 766 | break; |
93 | 0 | default: |
94 | 0 | return Status::InternalError("Unsupported type {}(encoding={}) in parquet decoder", |
95 | 0 | tparquet::to_string(type), tparquet::to_string(encoding)); |
96 | 766 | } |
97 | 766 | break; |
98 | 1.76k | case tparquet::Encoding::DELTA_BINARY_PACKED: |
99 | | // Supports only INT32 and INT64. |
100 | 1.76k | switch (type) { |
101 | 100 | case tparquet::Type::INT32: |
102 | 100 | decoder.reset(new DeltaBitPackDecoder<int32_t>()); |
103 | 100 | break; |
104 | 1.66k | case tparquet::Type::INT64: |
105 | 1.66k | decoder.reset(new DeltaBitPackDecoder<int64_t>()); |
106 | 1.66k | break; |
107 | 0 | default: |
108 | 0 | return Status::InternalError("DELTA_BINARY_PACKED only supports INT32 and INT64"); |
109 | 1.76k | } |
110 | 1.76k | break; |
111 | 1.76k | case tparquet::Encoding::DELTA_BYTE_ARRAY: |
112 | 260 | switch (type) { |
113 | 260 | case tparquet::Type::BYTE_ARRAY: |
114 | 260 | case tparquet::Type::FIXED_LEN_BYTE_ARRAY: |
115 | 260 | decoder.reset(new DeltaByteArrayDecoder()); |
116 | 260 | break; |
117 | 0 | default: |
118 | 0 | return Status::InternalError( |
119 | 0 | "DELTA_BYTE_ARRAY only supports BYTE_ARRAY, FIXED_LEN_BYTE_ARRAY."); |
120 | 260 | } |
121 | 260 | break; |
122 | 260 | case tparquet::Encoding::DELTA_LENGTH_BYTE_ARRAY: |
123 | 28 | switch (type) { |
124 | 28 | case tparquet::Type::BYTE_ARRAY: |
125 | 28 | decoder.reset(new DeltaLengthByteArrayDecoder()); |
126 | 28 | break; |
127 | 0 | default: |
128 | 0 | return Status::InternalError("DELTA_LENGTH_BYTE_ARRAY only supports BYTE_ARRAY."); |
129 | 28 | } |
130 | 28 | break; |
131 | 28 | case tparquet::Encoding::BYTE_STREAM_SPLIT: |
132 | 22 | switch (type) { |
133 | 2 | case tparquet::Type::INT32: |
134 | 4 | case tparquet::Type::INT64: |
135 | 4 | case tparquet::Type::INT96: |
136 | 10 | case tparquet::Type::FLOAT: |
137 | 16 | case tparquet::Type::DOUBLE: |
138 | 22 | case tparquet::Type::FIXED_LEN_BYTE_ARRAY: |
139 | 22 | decoder.reset(new ByteStreamSplitDecoder()); |
140 | 22 | break; |
141 | 0 | default: |
142 | 0 | return Status::InternalError("Unsupported type {}(encoding={}) in parquet decoder", |
143 | 0 | tparquet::to_string(type), tparquet::to_string(encoding)); |
144 | 22 | } |
145 | 22 | break; |
146 | 22 | default: |
147 | 0 | return Status::InternalError("Unsupported encoding {}(type={}) in parquet decoder", |
148 | 0 | tparquet::to_string(encoding), tparquet::to_string(type)); |
149 | 169k | } |
150 | 169k | return Status::OK(); |
151 | 169k | } |
152 | | #include "common/compile_check_end.h" |
153 | | |
154 | | } // namespace doris |