Coverage Report

Created: 2026-03-17 00:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/parquet/vparquet_page_index.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "format/parquet/vparquet_page_index.h"
19
20
#include <gen_cpp/parquet_types.h>
21
22
#include <algorithm>
23
#include <limits>
24
#include <ostream>
25
#include <string>
26
27
#include "common/logging.h"
28
#include "common/status.h"
29
#include "format/parquet/parquet_common.h"
30
#include "format/parquet/parquet_predicate.h"
31
#include "util/thrift_util.h"
32
33
namespace cctz {
34
class time_zone;
35
} // namespace cctz
36
namespace doris {
37
struct FieldSchema;
38
} // namespace doris
39
40
namespace doris {
41
#include "common/compile_check_begin.h"
42
43
0
bool PageIndex::check_and_get_page_index_ranges(const std::vector<tparquet::ColumnChunk>& columns) {
44
0
    int64_t ci_start = std::numeric_limits<int64_t>::max();
45
0
    int64_t oi_start = std::numeric_limits<int64_t>::max();
46
0
    int64_t ci_end = -1;
47
0
    int64_t oi_end = -1;
48
0
    for (const tparquet::ColumnChunk& col_chunk : columns) {
49
0
        if (col_chunk.__isset.column_index_offset && col_chunk.__isset.column_index_length) {
50
0
            ci_start = std::min(ci_start, col_chunk.column_index_offset);
51
0
            ci_end =
52
0
                    std::max(ci_end, col_chunk.column_index_offset + col_chunk.column_index_length);
53
0
        }
54
0
        if (col_chunk.__isset.offset_index_offset && col_chunk.__isset.offset_index_length) {
55
0
            oi_start = std::min(oi_start, col_chunk.offset_index_offset);
56
0
            oi_end =
57
0
                    std::max(oi_end, col_chunk.offset_index_offset + col_chunk.offset_index_length);
58
0
        }
59
0
    }
60
0
    bool has_page_index = oi_end != -1 && ci_end != -1;
61
0
    if (has_page_index) {
62
0
        _column_index_start = ci_start;
63
0
        _column_index_size = ci_end - ci_start;
64
0
        _offset_index_start = oi_start;
65
0
        _offset_index_size = oi_end - oi_start;
66
0
    }
67
0
    return has_page_index;
68
0
}
69
70
Status PageIndex::parse_column_index(const tparquet::ColumnChunk& chunk, const uint8_t* buff,
71
0
                                     tparquet::ColumnIndex* column_index) const {
72
0
    int64_t buffer_offset = chunk.column_index_offset - _column_index_start;
73
0
    uint32_t length = chunk.column_index_length;
74
0
    DCHECK_GE(buffer_offset, 0);
75
0
    DCHECK_LE(buffer_offset + length, _column_index_size);
76
0
    RETURN_IF_ERROR(deserialize_thrift_msg(buff + buffer_offset, &length, true, column_index));
77
0
    return Status::OK();
78
0
}
79
80
Status PageIndex::parse_offset_index(const tparquet::ColumnChunk& chunk, const uint8_t* buff,
81
0
                                     tparquet::OffsetIndex* offset_index) const {
82
0
    int64_t buffer_offset = chunk.offset_index_offset - _offset_index_start;
83
0
    uint32_t length = chunk.offset_index_length;
84
0
    DCHECK_GE(buffer_offset, 0);
85
0
    DCHECK_LE(buffer_offset + length, _offset_index_size);
86
0
    RETURN_IF_ERROR(deserialize_thrift_msg(buff + buffer_offset, &length, true, offset_index));
87
0
    return Status::OK();
88
0
}
89
#include "common/compile_check_end.h"
90
91
} // namespace doris