Coverage Report

Created: 2026-07-02 13:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format_v2/parquet/parquet_profile.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "format_v2/parquet/parquet_profile.h"
19
20
#include "format_v2/parquet/parquet_statistics.h"
21
22
namespace doris::format::parquet {
23
24
108
void ParquetProfile::init(RuntimeProfile* profile) {
25
108
    if (profile == nullptr) {
26
80
        return;
27
80
    }
28
29
28
    static const char* parquet_profile = "ParquetReader";
30
28
    ADD_TIMER_WITH_LEVEL(profile, parquet_profile, 1);
31
32
28
    filtered_row_groups = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowGroupsFiltered", TUnit::UNIT,
33
28
                                                       parquet_profile, 1);
34
28
    filtered_row_groups_by_min_max = ADD_CHILD_COUNTER_WITH_LEVEL(
35
28
            profile, "RowGroupsFilteredByMinMax", TUnit::UNIT, parquet_profile, 1);
36
28
    filtered_row_groups_by_dictionary = ADD_CHILD_COUNTER_WITH_LEVEL(
37
28
            profile, "RowGroupsFilteredByDictionary", TUnit::UNIT, parquet_profile, 1);
38
28
    filtered_row_groups_by_bloom_filter = ADD_CHILD_COUNTER_WITH_LEVEL(
39
28
            profile, "RowGroupsFilteredByBloomFilter", TUnit::UNIT, parquet_profile, 1);
40
28
    to_read_row_groups = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowGroupsReadNum", TUnit::UNIT,
41
28
                                                      parquet_profile, 1);
42
28
    total_row_groups = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowGroupsTotalNum", TUnit::UNIT,
43
28
                                                    parquet_profile, 1);
44
28
    selected_row_ranges = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "SelectedRowRanges", TUnit::UNIT,
45
28
                                                       parquet_profile, 1);
46
28
    filtered_group_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredRowsByGroup", TUnit::UNIT,
47
28
                                                       parquet_profile, 1);
48
28
    filtered_page_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredRowsByPage", TUnit::UNIT,
49
28
                                                      parquet_profile, 1);
50
28
    pages_skipped_by_data_page_filter = ADD_CHILD_COUNTER_WITH_LEVEL(
51
28
            profile, "PagesSkippedByDataPageFilter", TUnit::UNIT, parquet_profile, 1);
52
28
    data_page_filter_skip_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "DataPageFilterSkipBytes",
53
28
                                                               TUnit::BYTES, parquet_profile, 1);
54
28
    selected_rows =
55
28
            ADD_CHILD_COUNTER_WITH_LEVEL(profile, "SelectedRows", TUnit::UNIT, parquet_profile, 1);
56
28
    rows_filtered_by_conjunct = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowsFilteredByConjunct",
57
28
                                                             TUnit::UNIT, parquet_profile, 1);
58
28
    total_batches =
59
28
            ADD_CHILD_COUNTER_WITH_LEVEL(profile, "TotalBatches", TUnit::UNIT, parquet_profile, 1);
60
28
    empty_selection_batches = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "EmptySelectionBatches",
61
28
                                                           TUnit::UNIT, parquet_profile, 1);
62
28
    range_gap_skipped_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RangeGapSkippedRows",
63
28
                                                          TUnit::UNIT, parquet_profile, 1);
64
28
    reader_read_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ReaderReadRows", TUnit::UNIT,
65
28
                                                    parquet_profile, 1);
66
28
    reader_skip_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ReaderSkipRows", TUnit::UNIT,
67
28
                                                    parquet_profile, 1);
68
28
    reader_select_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ReaderSelectRows", TUnit::UNIT,
69
28
                                                      parquet_profile, 1);
70
28
    arrow_read_records_time =
71
28
            ADD_CHILD_TIMER_WITH_LEVEL(profile, "ArrowReadRecordsTime", parquet_profile, 1);
72
28
    materialization_time =
73
28
            ADD_CHILD_TIMER_WITH_LEVEL(profile, "MaterializationTime", parquet_profile, 1);
74
28
    lazy_read_filtered_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredRowsByLazyRead",
75
28
                                                           TUnit::UNIT, parquet_profile, 1);
76
28
    filtered_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredBytes", TUnit::BYTES,
77
28
                                                  parquet_profile, 1);
78
28
    raw_rows_read =
79
28
            ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RawRowsRead", TUnit::UNIT, parquet_profile, 1);
80
28
    column_read_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ColumnReadTime", parquet_profile, 1);
81
28
    parse_meta_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ParseMetaTime", parquet_profile, 1);
82
28
    parse_footer_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ParseFooterTime", parquet_profile, 1);
83
28
    file_reader_create_time =
84
28
            ADD_CHILD_TIMER_WITH_LEVEL(profile, "FileReaderCreateTime", parquet_profile, 1);
85
28
    open_file_num =
86
28
            ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FileNum", TUnit::UNIT, parquet_profile, 1);
87
28
    page_index_read_calls = ADD_COUNTER_WITH_LEVEL(profile, "PageIndexReadCalls", TUnit::UNIT, 1);
88
28
    page_index_filter_time =
89
28
            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageIndexFilterTime", parquet_profile, 1);
90
28
    read_page_index_time =
91
28
            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageIndexReadTime", parquet_profile, 1);
92
28
    parse_page_index_time =
93
28
            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageIndexParseTime", parquet_profile, 1);
94
28
    row_group_filter_time =
95
28
            ADD_CHILD_TIMER_WITH_LEVEL(profile, "RowGroupFilterTime", parquet_profile, 1);
96
28
    file_footer_read_calls = ADD_COUNTER_WITH_LEVEL(profile, "FileFooterReadCalls", TUnit::UNIT, 1);
97
28
    file_footer_hit_cache = ADD_COUNTER_WITH_LEVEL(profile, "FileFooterHitCache", TUnit::UNIT, 1);
98
28
    decompress_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecompressTime", parquet_profile, 1);
99
28
    decompress_cnt = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "DecompressCount", TUnit::UNIT,
100
28
                                                  parquet_profile, 1);
101
28
    page_read_counter =
102
28
            ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageReadCount", TUnit::UNIT, parquet_profile, 1);
103
28
    page_cache_write_counter = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageCacheWriteCount",
104
28
                                                            TUnit::UNIT, parquet_profile, 1);
105
28
    page_cache_compressed_write_counter = ADD_CHILD_COUNTER_WITH_LEVEL(
106
28
            profile, "PageCacheCompressedWriteCount", TUnit::UNIT, parquet_profile, 1);
107
28
    page_cache_decompressed_write_counter = ADD_CHILD_COUNTER_WITH_LEVEL(
108
28
            profile, "PageCacheDecompressedWriteCount", TUnit::UNIT, parquet_profile, 1);
109
28
    page_cache_hit_counter = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageCacheHitCount", TUnit::UNIT,
110
28
                                                          parquet_profile, 1);
111
28
    page_cache_missing_counter = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageCacheMissingCount",
112
28
                                                              TUnit::UNIT, parquet_profile, 1);
113
28
    page_cache_compressed_hit_counter = ADD_CHILD_COUNTER_WITH_LEVEL(
114
28
            profile, "PageCacheCompressedHitCount", TUnit::UNIT, parquet_profile, 1);
115
28
    page_cache_decompressed_hit_counter = ADD_CHILD_COUNTER_WITH_LEVEL(
116
28
            profile, "PageCacheDecompressedHitCount", TUnit::UNIT, parquet_profile, 1);
117
28
    decode_header_time =
118
28
            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageHeaderDecodeTime", parquet_profile, 1);
119
28
    read_page_header_time =
120
28
            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageHeaderReadTime", parquet_profile, 1);
121
28
    decode_value_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeValueTime", parquet_profile, 1);
122
28
    decode_dict_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeDictTime", parquet_profile, 1);
123
28
    decode_level_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeLevelTime", parquet_profile, 1);
124
28
    decode_null_map_time =
125
28
            ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeNullMapTime", parquet_profile, 1);
126
28
    skip_page_header_num = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "SkipPageHeaderNum", TUnit::UNIT,
127
28
                                                        parquet_profile, 1);
128
28
    parse_page_header_num = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ParsePageHeaderNum", TUnit::UNIT,
129
28
                                                         parquet_profile, 1);
130
28
    predicate_filter_time =
131
28
            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PredicateFilterTime", parquet_profile, 1);
132
28
    dict_filter_rewrite_time =
133
28
            ADD_CHILD_TIMER_WITH_LEVEL(profile, "DictFilterRewriteTime", parquet_profile, 1);
134
28
    convert_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ConvertTime", parquet_profile, 1);
135
28
    bloom_filter_read_time =
136
28
            ADD_CHILD_TIMER_WITH_LEVEL(profile, "BloomFilterReadTime", parquet_profile, 1);
137
28
}
138
139
28
void ParquetProfile::update_pruning_stats(const ParquetPruningStats& pruning_stats) const {
140
28
    COUNTER_UPDATE(filtered_row_groups,
141
28
                   pruning_stats.total_row_groups - pruning_stats.selected_row_groups);
142
28
    COUNTER_UPDATE(filtered_row_groups_by_min_max, pruning_stats.filtered_row_groups_by_statistics);
143
28
    COUNTER_UPDATE(filtered_row_groups_by_dictionary,
144
28
                   pruning_stats.filtered_row_groups_by_dictionary);
145
28
    COUNTER_UPDATE(filtered_row_groups_by_bloom_filter,
146
28
                   pruning_stats.filtered_row_groups_by_bloom_filter);
147
28
    COUNTER_UPDATE(to_read_row_groups, pruning_stats.selected_row_groups);
148
28
    COUNTER_UPDATE(total_row_groups, pruning_stats.total_row_groups);
149
28
    COUNTER_UPDATE(selected_row_ranges, pruning_stats.selected_row_ranges);
150
28
    COUNTER_UPDATE(filtered_group_rows, pruning_stats.filtered_group_rows);
151
28
    COUNTER_UPDATE(filtered_page_rows, pruning_stats.filtered_page_rows);
152
28
    COUNTER_UPDATE(page_index_read_calls, pruning_stats.page_index_read_calls);
153
28
    COUNTER_UPDATE(bloom_filter_read_time, pruning_stats.bloom_filter_read_time);
154
28
    COUNTER_UPDATE(row_group_filter_time, pruning_stats.row_group_filter_time);
155
28
    COUNTER_UPDATE(page_index_filter_time, pruning_stats.page_index_filter_time);
156
28
    COUNTER_UPDATE(read_page_index_time, pruning_stats.read_page_index_time);
157
28
}
158
159
107
ParquetPageSkipProfile ParquetProfile::page_skip_profile() const {
160
107
    return {
161
107
            .skipped_pages = pages_skipped_by_data_page_filter,
162
107
            .skipped_bytes = data_page_filter_skip_bytes,
163
107
    };
164
107
}
165
166
107
ParquetColumnReaderProfile ParquetProfile::column_reader_profile() const {
167
107
    return {
168
107
            .reader_read_rows = reader_read_rows,
169
107
            .reader_skip_rows = reader_skip_rows,
170
107
            .reader_select_rows = reader_select_rows,
171
107
            .arrow_read_records_time = arrow_read_records_time,
172
107
            .materialization_time = materialization_time,
173
107
    };
174
107
}
175
176
107
ParquetScanProfile ParquetProfile::scan_profile() const {
177
107
    return {
178
107
            .raw_rows_read = raw_rows_read,
179
107
            .selected_rows = selected_rows,
180
107
            .rows_filtered_by_conjunct = rows_filtered_by_conjunct,
181
107
            .lazy_read_filtered_rows = lazy_read_filtered_rows,
182
107
            .total_batches = total_batches,
183
107
            .empty_selection_batches = empty_selection_batches,
184
107
            .range_gap_skipped_rows = range_gap_skipped_rows,
185
107
            .column_read_time = column_read_time,
186
107
            .predicate_filter_time = predicate_filter_time,
187
107
            .column_reader_profile = column_reader_profile(),
188
107
    };
189
107
}
190
191
} // namespace doris::format::parquet