Coverage Report

Created: 2026-04-14 13:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/parquet/bool_plain_decoder.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <stddef.h>
21
#include <stdint.h>
22
23
#include "common/compiler_util.h" // IWYU pragma: keep
24
#include "common/status.h"
25
#include "core/data_type/data_type.h"
26
#include "format/parquet/decoder.h"
27
#include "util/bit_stream_utils.h"
28
#include "util/bit_stream_utils.inline.h"
29
#include "util/slice.h"
30
31
namespace doris {
32
class ColumnSelectVector;
33
} // namespace doris
34
35
namespace doris {
36
/// Decoder bit-packed boolean-encoded values.
37
/// Implementation from https://github.com/apache/impala/blob/master/be/src/exec/parquet/parquet-bool-decoder.h
38
//bit-packed-run-len and rle-run-len must be in the range [1, 2^31 - 1].
39
// This means that a Parquet implementation can always store the run length in a signed 32-bit integer
40
class BoolPlainDecoder final : public Decoder {
41
public:
42
36
    BoolPlainDecoder() = default;
43
36
    ~BoolPlainDecoder() override = default;
44
45
    // Set the data to be decoded
46
36
    Status set_data(Slice* data) override {
47
36
        bool_values_.Reset((const uint8_t*)data->data, data->size);
48
36
        num_unpacked_values_ = 0;
49
36
        unpacked_value_idx_ = 0;
50
36
        _offset = 0;
51
36
        return Status::OK();
52
36
    }
53
54
    Status decode_values(MutableColumnPtr& doris_column, DataTypePtr& data_type,
55
                         ColumnSelectVector& select_vector, bool is_dict_filter) override;
56
57
    template <bool has_filter>
58
    Status _decode_values(MutableColumnPtr& doris_column, DataTypePtr& data_type,
59
                          ColumnSelectVector& select_vector, bool is_dict_filter);
60
61
    Status skip_values(size_t num_values) override;
62
63
protected:
64
76
    inline bool _decode_value(bool* value) {
65
76
        if (LIKELY(unpacked_value_idx_ < num_unpacked_values_)) {
66
69
            *value = unpacked_values_[unpacked_value_idx_++];
67
69
        } else {
68
7
            num_unpacked_values_ =
69
7
                    bool_values_.UnpackBatch(1, UNPACKED_BUFFER_LEN, &unpacked_values_[0]);
70
7
            if (UNLIKELY(num_unpacked_values_ == 0)) {
71
0
                return false;
72
0
            }
73
7
            *value = unpacked_values_[0];
74
7
            unpacked_value_idx_ = 1;
75
7
        }
76
76
        return true;
77
76
    }
78
79
    /// A buffer to store unpacked values. Must be a multiple of 32 size to use the
80
    /// batch-oriented interface of BatchedBitReader. We use uint8_t instead of bool because
81
    /// bit unpacking is only supported for unsigned integers. The values are converted to
82
    /// bool when returned to the user.
83
    static const int UNPACKED_BUFFER_LEN = 128;
84
    uint8_t unpacked_values_[UNPACKED_BUFFER_LEN];
85
86
    /// The number of valid values in 'unpacked_values_'.
87
    int num_unpacked_values_ = 0;
88
89
    /// The next value to return from 'unpacked_values_'.
90
    int unpacked_value_idx_ = 0;
91
92
    /// Bit packed decoder, used if 'encoding_' is PLAIN.
93
    BatchedBitReader bool_values_;
94
};
95
96
} // namespace doris