be/src/format/parquet/bool_plain_decoder.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "format/parquet/bool_plain_decoder.h" |
19 | | |
20 | | #include <glog/logging.h> |
21 | | |
22 | | #include <algorithm> |
23 | | |
24 | | #include "core/column/column_vector.h" |
25 | | #include "core/types.h" |
26 | | #include "format/parquet/parquet_common.h" |
27 | | #include "util/bit_util.h" |
28 | | |
29 | | namespace doris { |
30 | | #include "common/compile_check_begin.h" |
31 | 11 | Status BoolPlainDecoder::skip_values(size_t num_values) { |
32 | 11 | int skip_cached = |
33 | 11 | std::min(num_unpacked_values_ - unpacked_value_idx_, cast_set<int>(num_values)); |
34 | 11 | unpacked_value_idx_ += skip_cached; |
35 | 11 | if (skip_cached == num_values) { |
36 | 3 | return Status::OK(); |
37 | 3 | } |
38 | 8 | int num_remaining = cast_set<int>(num_values - skip_cached); |
39 | 8 | int num_to_skip = BitUtil::RoundDownToPowerOf2(num_remaining, 32); |
40 | 8 | if (num_to_skip > 0) { |
41 | 0 | bool_values_.SkipBatch(1, num_to_skip); |
42 | 0 | } |
43 | 8 | num_remaining -= num_to_skip; |
44 | 8 | if (num_remaining > 0) { |
45 | 8 | DCHECK_LE(num_remaining, UNPACKED_BUFFER_LEN); |
46 | 8 | num_unpacked_values_ = |
47 | 8 | bool_values_.UnpackBatch(1, UNPACKED_BUFFER_LEN, &unpacked_values_[0]); |
48 | 8 | if (UNLIKELY(num_unpacked_values_ < num_remaining)) { |
49 | 0 | return Status::IOError("Can't skip enough booleans in plain decoder"); |
50 | 0 | } |
51 | 8 | unpacked_value_idx_ = num_remaining; |
52 | 8 | } |
53 | 8 | return Status::OK(); |
54 | 8 | } |
55 | | |
56 | | Status BoolPlainDecoder::decode_values(MutableColumnPtr& doris_column, DataTypePtr& data_type, |
57 | 18 | ColumnSelectVector& select_vector, bool is_dict_filter) { |
58 | 18 | if (select_vector.has_filter()) { |
59 | 2 | return _decode_values<true>(doris_column, data_type, select_vector, is_dict_filter); |
60 | 16 | } else { |
61 | 16 | return _decode_values<false>(doris_column, data_type, select_vector, is_dict_filter); |
62 | 16 | } |
63 | 18 | } |
64 | | |
65 | | template <bool has_filter> |
66 | | Status BoolPlainDecoder::_decode_values(MutableColumnPtr& doris_column, DataTypePtr& data_type, |
67 | 18 | ColumnSelectVector& select_vector, bool is_dict_filter) { |
68 | 18 | auto& column_data = assert_cast<ColumnUInt8*>(doris_column.get())->get_data(); |
69 | 18 | size_t data_index = column_data.size(); |
70 | 18 | column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered()); |
71 | | |
72 | 18 | ColumnSelectVector::DataReadType read_type; |
73 | 49 | while (size_t run_length = select_vector.get_next_run<has_filter>(&read_type)) { |
74 | 31 | switch (read_type) { |
75 | 23 | case ColumnSelectVector::CONTENT: { |
76 | 23 | bool value; |
77 | 93 | for (size_t i = 0; i < run_length; ++i) { |
78 | 70 | if (UNLIKELY(!_decode_value(&value))) { |
79 | 0 | return Status::IOError("Can't read enough booleans in plain decoder"); |
80 | 0 | } |
81 | 70 | column_data[data_index++] = (UInt8)value; |
82 | 70 | } |
83 | 23 | break; |
84 | 23 | } |
85 | 23 | case ColumnSelectVector::NULL_DATA: { |
86 | 2 | data_index += run_length; |
87 | 2 | break; |
88 | 23 | } |
89 | 6 | case ColumnSelectVector::FILTERED_CONTENT: { |
90 | 6 | bool value; |
91 | 12 | for (int i = 0; i < run_length; ++i) { |
92 | 6 | if (UNLIKELY(!_decode_value(&value))) { |
93 | 0 | return Status::IOError("Can't read enough booleans in plain decoder"); |
94 | 0 | } |
95 | 6 | } |
96 | 6 | break; |
97 | 6 | } |
98 | 6 | case ColumnSelectVector::FILTERED_NULL: { |
99 | | // do nothing |
100 | 0 | break; |
101 | 6 | } |
102 | 31 | } |
103 | 31 | } |
104 | 18 | return Status::OK(); |
105 | 18 | } _ZN5doris16BoolPlainDecoder14_decode_valuesILb1EEENS_6StatusERNS_3COWINS_7IColumnEE11mutable_ptrIS4_EERSt10shared_ptrIKNS_9IDataTypeEERNS_18ColumnSelectVectorEb Line | Count | Source | 67 | 2 | ColumnSelectVector& select_vector, bool is_dict_filter) { | 68 | 2 | auto& column_data = assert_cast<ColumnUInt8*>(doris_column.get())->get_data(); | 69 | 2 | size_t data_index = column_data.size(); | 70 | 2 | column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered()); | 71 | | | 72 | 2 | ColumnSelectVector::DataReadType read_type; | 73 | 17 | while (size_t run_length = select_vector.get_next_run<has_filter>(&read_type)) { | 74 | 15 | switch (read_type) { | 75 | 7 | case ColumnSelectVector::CONTENT: { | 76 | 7 | bool value; | 77 | 14 | for (size_t i = 0; i < run_length; ++i) { | 78 | 7 | if (UNLIKELY(!_decode_value(&value))) { | 79 | 0 | return Status::IOError("Can't read enough booleans in plain decoder"); | 80 | 0 | } | 81 | 7 | column_data[data_index++] = (UInt8)value; | 82 | 7 | } | 83 | 7 | break; | 84 | 7 | } | 85 | 7 | case ColumnSelectVector::NULL_DATA: { | 86 | 2 | data_index += run_length; | 87 | 2 | break; | 88 | 7 | } | 89 | 6 | case ColumnSelectVector::FILTERED_CONTENT: { | 90 | 6 | bool value; | 91 | 12 | for (int i = 0; i < run_length; ++i) { | 92 | 6 | if (UNLIKELY(!_decode_value(&value))) { | 93 | 0 | return Status::IOError("Can't read enough booleans in plain decoder"); | 94 | 0 | } | 95 | 6 | } | 96 | 6 | break; | 97 | 6 | } | 98 | 6 | case ColumnSelectVector::FILTERED_NULL: { | 99 | | // do nothing | 100 | 0 | break; | 101 | 6 | } | 102 | 15 | } | 103 | 15 | } | 104 | 2 | return Status::OK(); | 105 | 2 | } |
_ZN5doris16BoolPlainDecoder14_decode_valuesILb0EEENS_6StatusERNS_3COWINS_7IColumnEE11mutable_ptrIS4_EERSt10shared_ptrIKNS_9IDataTypeEERNS_18ColumnSelectVectorEb Line | Count | Source | 67 | 16 | ColumnSelectVector& select_vector, bool is_dict_filter) { | 68 | 16 | auto& column_data = assert_cast<ColumnUInt8*>(doris_column.get())->get_data(); | 69 | 16 | size_t data_index = column_data.size(); | 70 | 16 | column_data.resize(data_index + select_vector.num_values() - select_vector.num_filtered()); | 71 | | | 72 | 16 | ColumnSelectVector::DataReadType read_type; | 73 | 32 | while (size_t run_length = select_vector.get_next_run<has_filter>(&read_type)) { | 74 | 16 | switch (read_type) { | 75 | 16 | case ColumnSelectVector::CONTENT: { | 76 | 16 | bool value; | 77 | 79 | for (size_t i = 0; i < run_length; ++i) { | 78 | 63 | if (UNLIKELY(!_decode_value(&value))) { | 79 | 0 | return Status::IOError("Can't read enough booleans in plain decoder"); | 80 | 0 | } | 81 | 63 | column_data[data_index++] = (UInt8)value; | 82 | 63 | } | 83 | 16 | break; | 84 | 16 | } | 85 | 16 | case ColumnSelectVector::NULL_DATA: { | 86 | 0 | data_index += run_length; | 87 | 0 | break; | 88 | 16 | } | 89 | 0 | case ColumnSelectVector::FILTERED_CONTENT: { | 90 | 0 | bool value; | 91 | 0 | for (int i = 0; i < run_length; ++i) { | 92 | 0 | if (UNLIKELY(!_decode_value(&value))) { | 93 | 0 | return Status::IOError("Can't read enough booleans in plain decoder"); | 94 | 0 | } | 95 | 0 | } | 96 | 0 | break; | 97 | 0 | } | 98 | 0 | case ColumnSelectVector::FILTERED_NULL: { | 99 | | // do nothing | 100 | 0 | break; | 101 | 0 | } | 102 | 16 | } | 103 | 16 | } | 104 | 16 | return Status::OK(); | 105 | 16 | } |
|
106 | | #include "common/compile_check_end.h" |
107 | | |
108 | | } // namespace doris |