Coverage Report

Created: 2026-04-11 14:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/parquet/level_decoder.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "format/parquet/level_decoder.h"
19
20
#include <gen_cpp/parquet_types.h>
21
22
#include <algorithm>
23
24
#include "format/parquet/parquet_common.h"
25
#include "util/bit_stream_utils.inline.h"
26
#include "util/bit_util.h"
27
#include "util/coding.h"
28
29
static constexpr size_t V1_LEVEL_SIZE = 4;
30
#include "common/cast_set.h"
31
32
doris::Status doris::LevelDecoder::init(doris::Slice* slice, tparquet::Encoding::type encoding,
33
737k
                                        doris::level_t max_level, uint32_t num_levels) {
34
737k
    _encoding = encoding;
35
737k
    _bit_width = cast_set<level_t>(BitUtil::log2(max_level + 1));
36
737k
    _max_level = max_level;
37
737k
    _num_levels = num_levels;
38
737k
    switch (encoding) {
39
737k
    case tparquet::Encoding::RLE: {
40
737k
        if (slice->size < V1_LEVEL_SIZE) {
41
0
            return Status::Corruption("Wrong parquet level format");
42
0
        }
43
44
737k
        uint8_t* data = (uint8_t*)slice->data;
45
737k
        uint32_t num_bytes = decode_fixed32_le(data);
46
737k
        if (num_bytes > slice->size - V1_LEVEL_SIZE) {
47
1
            return Status::Corruption("Wrong parquet level format");
48
1
        }
49
737k
        _rle_decoder = RleDecoder<level_t>(data + V1_LEVEL_SIZE, num_bytes, _bit_width);
50
51
737k
        slice->data += V1_LEVEL_SIZE + num_bytes;
52
737k
        slice->size -= V1_LEVEL_SIZE + num_bytes;
53
737k
        break;
54
737k
    }
55
1
    case tparquet::Encoding::BIT_PACKED: {
56
1
        uint32_t num_bits = num_levels * _bit_width;
57
1
        uint32_t num_bytes = BitUtil::RoundUpNumBytes(num_bits);
58
1
        if (num_bytes > slice->size) {
59
0
            return Status::Corruption("Wrong parquet level format");
60
0
        }
61
1
        _bit_packed_decoder = BitReader((uint8_t*)slice->data, num_bytes);
62
63
1
        slice->data += num_bytes;
64
1
        slice->size -= num_bytes;
65
1
        break;
66
1
    }
67
1
    default:
68
1
        return Status::IOError("Unsupported encoding for parquet level");
69
737k
    }
70
737k
    return Status::OK();
71
737k
}
72
73
doris::Status doris::LevelDecoder::init_v2(const doris::Slice& levels, doris::level_t max_level,
74
13.4k
                                           uint32_t num_levels) {
75
13.4k
    _encoding = tparquet::Encoding::RLE;
76
13.4k
    _bit_width = cast_set<level_t>(BitUtil::log2(max_level + 1));
77
13.4k
    _max_level = max_level;
78
13.4k
    _num_levels = num_levels;
79
13.4k
    size_t byte_length = levels.size;
80
13.4k
    _rle_decoder =
81
13.4k
            RleDecoder<level_t>((uint8_t*)levels.data, cast_set<int>(byte_length), _bit_width);
82
13.4k
    return Status::OK();
83
13.4k
}
84
85
130k
size_t doris::LevelDecoder::get_levels(doris::level_t* levels, size_t n) {
86
    // toto template.
87
130k
    if (_encoding == tparquet::Encoding::RLE) {
88
130k
        n = std::min((size_t)_num_levels, n);
89
130k
        auto num_decoded = _rle_decoder.get_values(levels, n);
90
130k
        _num_levels -= num_decoded;
91
130k
        return num_decoded;
92
130k
    } else if (_encoding == tparquet::Encoding::BIT_PACKED) {
93
1
        n = std::min((size_t)_num_levels, n);
94
4
        for (size_t i = 0; i < n; ++i) {
95
3
            if (!_bit_packed_decoder.GetValue(_bit_width, &levels[i])) {
96
0
                throw doris::Exception(ErrorCode::INTERNAL_ERROR,
97
0
                                       "Failed to decode BIT_PACKED levels");
98
0
            }
99
3
        }
100
1
        _num_levels -= n;
101
1
        return n;
102
1
    }
103
0
    return 0;
104
130k
}