be/src/format_v2/parquet/parquet_profile.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "format_v2/parquet/parquet_profile.h" |
19 | | |
20 | | #include "format_v2/parquet/parquet_statistics.h" |
21 | | |
22 | | namespace doris::format::parquet { |
23 | | |
24 | 108 | void ParquetProfile::init(RuntimeProfile* profile) { |
25 | 108 | if (profile == nullptr) { |
26 | 80 | return; |
27 | 80 | } |
28 | | |
29 | 28 | static const char* parquet_profile = "ParquetReader"; |
30 | 28 | ADD_TIMER_WITH_LEVEL(profile, parquet_profile, 1); |
31 | | |
32 | 28 | filtered_row_groups = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowGroupsFiltered", TUnit::UNIT, |
33 | 28 | parquet_profile, 1); |
34 | 28 | filtered_row_groups_by_min_max = ADD_CHILD_COUNTER_WITH_LEVEL( |
35 | 28 | profile, "RowGroupsFilteredByMinMax", TUnit::UNIT, parquet_profile, 1); |
36 | 28 | filtered_row_groups_by_dictionary = ADD_CHILD_COUNTER_WITH_LEVEL( |
37 | 28 | profile, "RowGroupsFilteredByDictionary", TUnit::UNIT, parquet_profile, 1); |
38 | 28 | filtered_row_groups_by_bloom_filter = ADD_CHILD_COUNTER_WITH_LEVEL( |
39 | 28 | profile, "RowGroupsFilteredByBloomFilter", TUnit::UNIT, parquet_profile, 1); |
40 | 28 | to_read_row_groups = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowGroupsReadNum", TUnit::UNIT, |
41 | 28 | parquet_profile, 1); |
42 | 28 | total_row_groups = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowGroupsTotalNum", TUnit::UNIT, |
43 | 28 | parquet_profile, 1); |
44 | 28 | selected_row_ranges = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "SelectedRowRanges", TUnit::UNIT, |
45 | 28 | parquet_profile, 1); |
46 | 28 | filtered_group_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredRowsByGroup", TUnit::UNIT, |
47 | 28 | parquet_profile, 1); |
48 | 28 | filtered_page_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredRowsByPage", TUnit::UNIT, |
49 | 28 | parquet_profile, 1); |
50 | 28 | pages_skipped_by_data_page_filter = ADD_CHILD_COUNTER_WITH_LEVEL( |
51 | 28 | profile, "PagesSkippedByDataPageFilter", TUnit::UNIT, parquet_profile, 1); |
52 | 28 | data_page_filter_skip_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "DataPageFilterSkipBytes", |
53 | 28 | TUnit::BYTES, parquet_profile, 1); |
54 | 28 | selected_rows = |
55 | 28 | ADD_CHILD_COUNTER_WITH_LEVEL(profile, "SelectedRows", TUnit::UNIT, parquet_profile, 1); |
56 | 28 | rows_filtered_by_conjunct = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowsFilteredByConjunct", |
57 | 28 | TUnit::UNIT, parquet_profile, 1); |
58 | 28 | total_batches = |
59 | 28 | ADD_CHILD_COUNTER_WITH_LEVEL(profile, "TotalBatches", TUnit::UNIT, parquet_profile, 1); |
60 | 28 | empty_selection_batches = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "EmptySelectionBatches", |
61 | 28 | TUnit::UNIT, parquet_profile, 1); |
62 | 28 | range_gap_skipped_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RangeGapSkippedRows", |
63 | 28 | TUnit::UNIT, parquet_profile, 1); |
64 | 28 | reader_read_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ReaderReadRows", TUnit::UNIT, |
65 | 28 | parquet_profile, 1); |
66 | 28 | reader_skip_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ReaderSkipRows", TUnit::UNIT, |
67 | 28 | parquet_profile, 1); |
68 | 28 | reader_select_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ReaderSelectRows", TUnit::UNIT, |
69 | 28 | parquet_profile, 1); |
70 | 28 | arrow_read_records_time = |
71 | 28 | ADD_CHILD_TIMER_WITH_LEVEL(profile, "ArrowReadRecordsTime", parquet_profile, 1); |
72 | 28 | materialization_time = |
73 | 28 | ADD_CHILD_TIMER_WITH_LEVEL(profile, "MaterializationTime", parquet_profile, 1); |
74 | 28 | lazy_read_filtered_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredRowsByLazyRead", |
75 | 28 | TUnit::UNIT, parquet_profile, 1); |
76 | 28 | filtered_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredBytes", TUnit::BYTES, |
77 | 28 | parquet_profile, 1); |
78 | 28 | raw_rows_read = |
79 | 28 | ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RawRowsRead", TUnit::UNIT, parquet_profile, 1); |
80 | 28 | column_read_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ColumnReadTime", parquet_profile, 1); |
81 | 28 | parse_meta_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ParseMetaTime", parquet_profile, 1); |
82 | 28 | parse_footer_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ParseFooterTime", parquet_profile, 1); |
83 | 28 | file_reader_create_time = |
84 | 28 | ADD_CHILD_TIMER_WITH_LEVEL(profile, "FileReaderCreateTime", parquet_profile, 1); |
85 | 28 | open_file_num = |
86 | 28 | ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FileNum", TUnit::UNIT, parquet_profile, 1); |
87 | 28 | page_index_read_calls = ADD_COUNTER_WITH_LEVEL(profile, "PageIndexReadCalls", TUnit::UNIT, 1); |
88 | 28 | page_index_filter_time = |
89 | 28 | ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageIndexFilterTime", parquet_profile, 1); |
90 | 28 | read_page_index_time = |
91 | 28 | ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageIndexReadTime", parquet_profile, 1); |
92 | 28 | parse_page_index_time = |
93 | 28 | ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageIndexParseTime", parquet_profile, 1); |
94 | 28 | row_group_filter_time = |
95 | 28 | ADD_CHILD_TIMER_WITH_LEVEL(profile, "RowGroupFilterTime", parquet_profile, 1); |
96 | 28 | file_footer_read_calls = ADD_COUNTER_WITH_LEVEL(profile, "FileFooterReadCalls", TUnit::UNIT, 1); |
97 | 28 | file_footer_hit_cache = ADD_COUNTER_WITH_LEVEL(profile, "FileFooterHitCache", TUnit::UNIT, 1); |
98 | 28 | decompress_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecompressTime", parquet_profile, 1); |
99 | 28 | decompress_cnt = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "DecompressCount", TUnit::UNIT, |
100 | 28 | parquet_profile, 1); |
101 | 28 | page_read_counter = |
102 | 28 | ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageReadCount", TUnit::UNIT, parquet_profile, 1); |
103 | 28 | page_cache_write_counter = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageCacheWriteCount", |
104 | 28 | TUnit::UNIT, parquet_profile, 1); |
105 | 28 | page_cache_compressed_write_counter = ADD_CHILD_COUNTER_WITH_LEVEL( |
106 | 28 | profile, "PageCacheCompressedWriteCount", TUnit::UNIT, parquet_profile, 1); |
107 | 28 | page_cache_decompressed_write_counter = ADD_CHILD_COUNTER_WITH_LEVEL( |
108 | 28 | profile, "PageCacheDecompressedWriteCount", TUnit::UNIT, parquet_profile, 1); |
109 | 28 | page_cache_hit_counter = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageCacheHitCount", TUnit::UNIT, |
110 | 28 | parquet_profile, 1); |
111 | 28 | page_cache_missing_counter = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageCacheMissingCount", |
112 | 28 | TUnit::UNIT, parquet_profile, 1); |
113 | 28 | page_cache_compressed_hit_counter = ADD_CHILD_COUNTER_WITH_LEVEL( |
114 | 28 | profile, "PageCacheCompressedHitCount", TUnit::UNIT, parquet_profile, 1); |
115 | 28 | page_cache_decompressed_hit_counter = ADD_CHILD_COUNTER_WITH_LEVEL( |
116 | 28 | profile, "PageCacheDecompressedHitCount", TUnit::UNIT, parquet_profile, 1); |
117 | 28 | decode_header_time = |
118 | 28 | ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageHeaderDecodeTime", parquet_profile, 1); |
119 | 28 | read_page_header_time = |
120 | 28 | ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageHeaderReadTime", parquet_profile, 1); |
121 | 28 | decode_value_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeValueTime", parquet_profile, 1); |
122 | 28 | decode_dict_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeDictTime", parquet_profile, 1); |
123 | 28 | decode_level_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeLevelTime", parquet_profile, 1); |
124 | 28 | decode_null_map_time = |
125 | 28 | ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeNullMapTime", parquet_profile, 1); |
126 | 28 | skip_page_header_num = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "SkipPageHeaderNum", TUnit::UNIT, |
127 | 28 | parquet_profile, 1); |
128 | 28 | parse_page_header_num = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ParsePageHeaderNum", TUnit::UNIT, |
129 | 28 | parquet_profile, 1); |
130 | 28 | predicate_filter_time = |
131 | 28 | ADD_CHILD_TIMER_WITH_LEVEL(profile, "PredicateFilterTime", parquet_profile, 1); |
132 | 28 | dict_filter_rewrite_time = |
133 | 28 | ADD_CHILD_TIMER_WITH_LEVEL(profile, "DictFilterRewriteTime", parquet_profile, 1); |
134 | 28 | convert_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ConvertTime", parquet_profile, 1); |
135 | 28 | bloom_filter_read_time = |
136 | 28 | ADD_CHILD_TIMER_WITH_LEVEL(profile, "BloomFilterReadTime", parquet_profile, 1); |
137 | 28 | } |
138 | | |
139 | 28 | void ParquetProfile::update_pruning_stats(const ParquetPruningStats& pruning_stats) const { |
140 | 28 | COUNTER_UPDATE(filtered_row_groups, |
141 | 28 | pruning_stats.total_row_groups - pruning_stats.selected_row_groups); |
142 | 28 | COUNTER_UPDATE(filtered_row_groups_by_min_max, pruning_stats.filtered_row_groups_by_statistics); |
143 | 28 | COUNTER_UPDATE(filtered_row_groups_by_dictionary, |
144 | 28 | pruning_stats.filtered_row_groups_by_dictionary); |
145 | 28 | COUNTER_UPDATE(filtered_row_groups_by_bloom_filter, |
146 | 28 | pruning_stats.filtered_row_groups_by_bloom_filter); |
147 | 28 | COUNTER_UPDATE(to_read_row_groups, pruning_stats.selected_row_groups); |
148 | 28 | COUNTER_UPDATE(total_row_groups, pruning_stats.total_row_groups); |
149 | 28 | COUNTER_UPDATE(selected_row_ranges, pruning_stats.selected_row_ranges); |
150 | 28 | COUNTER_UPDATE(filtered_group_rows, pruning_stats.filtered_group_rows); |
151 | 28 | COUNTER_UPDATE(filtered_page_rows, pruning_stats.filtered_page_rows); |
152 | 28 | COUNTER_UPDATE(page_index_read_calls, pruning_stats.page_index_read_calls); |
153 | 28 | COUNTER_UPDATE(bloom_filter_read_time, pruning_stats.bloom_filter_read_time); |
154 | 28 | COUNTER_UPDATE(row_group_filter_time, pruning_stats.row_group_filter_time); |
155 | 28 | COUNTER_UPDATE(page_index_filter_time, pruning_stats.page_index_filter_time); |
156 | 28 | COUNTER_UPDATE(read_page_index_time, pruning_stats.read_page_index_time); |
157 | 28 | } |
158 | | |
159 | 107 | ParquetPageSkipProfile ParquetProfile::page_skip_profile() const { |
160 | 107 | return { |
161 | 107 | .skipped_pages = pages_skipped_by_data_page_filter, |
162 | 107 | .skipped_bytes = data_page_filter_skip_bytes, |
163 | 107 | }; |
164 | 107 | } |
165 | | |
166 | 107 | ParquetColumnReaderProfile ParquetProfile::column_reader_profile() const { |
167 | 107 | return { |
168 | 107 | .reader_read_rows = reader_read_rows, |
169 | 107 | .reader_skip_rows = reader_skip_rows, |
170 | 107 | .reader_select_rows = reader_select_rows, |
171 | 107 | .arrow_read_records_time = arrow_read_records_time, |
172 | 107 | .materialization_time = materialization_time, |
173 | 107 | }; |
174 | 107 | } |
175 | | |
176 | 107 | ParquetScanProfile ParquetProfile::scan_profile() const { |
177 | 107 | return { |
178 | 107 | .raw_rows_read = raw_rows_read, |
179 | 107 | .selected_rows = selected_rows, |
180 | 107 | .rows_filtered_by_conjunct = rows_filtered_by_conjunct, |
181 | 107 | .lazy_read_filtered_rows = lazy_read_filtered_rows, |
182 | 107 | .total_batches = total_batches, |
183 | 107 | .empty_selection_batches = empty_selection_batches, |
184 | 107 | .range_gap_skipped_rows = range_gap_skipped_rows, |
185 | 107 | .column_read_time = column_read_time, |
186 | 107 | .predicate_filter_time = predicate_filter_time, |
187 | 107 | .column_reader_profile = column_reader_profile(), |
188 | 107 | }; |
189 | 107 | } |
190 | | |
191 | | } // namespace doris::format::parquet |