Coverage Report

Created: 2026-07-01 22:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/parquet/vparquet_page_index.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "format/parquet/vparquet_page_index.h"
19
20
#include <gen_cpp/parquet_types.h>
21
22
#include <algorithm>
23
#include <limits>
24
#include <ostream>
25
#include <string>
26
27
#include "common/logging.h"
28
#include "common/status.h"
29
#include "format/parquet/parquet_common.h"
30
#include "format/parquet/parquet_predicate.h"
31
#include "util/thrift_util.h"
32
33
namespace cctz {
34
class time_zone;
35
} // namespace cctz
36
namespace doris {
37
struct FieldSchema;
38
} // namespace doris
39
40
namespace doris {
41
42
2
bool PageIndex::check_and_get_page_index_ranges(const std::vector<tparquet::ColumnChunk>& columns) {
43
2
    int64_t ci_start = std::numeric_limits<int64_t>::max();
44
2
    int64_t oi_start = std::numeric_limits<int64_t>::max();
45
2
    int64_t ci_end = -1;
46
2
    int64_t oi_end = -1;
47
22
    for (const tparquet::ColumnChunk& col_chunk : columns) {
48
22
        if (col_chunk.__isset.column_index_offset && col_chunk.__isset.column_index_length) {
49
22
            ci_start = std::min(ci_start, col_chunk.column_index_offset);
50
22
            ci_end =
51
22
                    std::max(ci_end, col_chunk.column_index_offset + col_chunk.column_index_length);
52
22
        }
53
22
        if (col_chunk.__isset.offset_index_offset && col_chunk.__isset.offset_index_length) {
54
22
            oi_start = std::min(oi_start, col_chunk.offset_index_offset);
55
22
            oi_end =
56
22
                    std::max(oi_end, col_chunk.offset_index_offset + col_chunk.offset_index_length);
57
22
        }
58
22
    }
59
2
    bool has_page_index = oi_end != -1 && ci_end != -1;
60
2
    if (has_page_index) {
61
2
        _column_index_start = ci_start;
62
2
        _column_index_size = ci_end - ci_start;
63
2
        _offset_index_start = oi_start;
64
2
        _offset_index_size = oi_end - oi_start;
65
2
    }
66
2
    return has_page_index;
67
2
}
68
69
Status PageIndex::parse_column_index(const tparquet::ColumnChunk& chunk, const uint8_t* buff,
70
1
                                     tparquet::ColumnIndex* column_index) const {
71
1
    int64_t buffer_offset = chunk.column_index_offset - _column_index_start;
72
1
    uint32_t length = chunk.column_index_length;
73
1
    DCHECK_GE(buffer_offset, 0);
74
1
    DCHECK_LE(buffer_offset + length, _column_index_size);
75
1
    RETURN_IF_ERROR(deserialize_thrift_msg(buff + buffer_offset, &length, true, column_index));
76
1
    return Status::OK();
77
1
}
78
79
Status PageIndex::parse_offset_index(const tparquet::ColumnChunk& chunk, const uint8_t* buff,
80
2
                                     tparquet::OffsetIndex* offset_index) const {
81
2
    int64_t buffer_offset = chunk.offset_index_offset - _offset_index_start;
82
2
    uint32_t length = chunk.offset_index_length;
83
2
    DCHECK_GE(buffer_offset, 0);
84
2
    DCHECK_LE(buffer_offset + length, _offset_index_size);
85
2
    RETURN_IF_ERROR(deserialize_thrift_msg(buff + buffer_offset, &length, true, offset_index));
86
2
    return Status::OK();
87
2
}
88
89
} // namespace doris