Coverage Report

Created: 2026-01-03 11:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/olap/collection_statistics.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
#pragma once
18
19
#include <cstdint>
20
#include <string>
21
#include <unordered_map>
22
23
#include "common/be_mock_util.h"
24
#include "olap/olap_common.h"
25
#include "olap/rowset/segment_v2/inverted_index/query/query_info.h"
26
#include "runtime/runtime_state.h"
27
#include "vec/exprs/vexpr_fwd.h"
28
29
namespace doris {
30
#include "common/compile_check_begin.h"
31
32
namespace io {
33
class FileSystem;
34
using FileSystemSPtr = std::shared_ptr<FileSystem>;
35
struct IOContext;
36
} // namespace io
37
38
struct RowSetSplits;
39
40
class Rowset;
41
using RowsetSharedPtr = std::shared_ptr<Rowset>;
42
43
class TabletIndex;
44
class TabletSchema;
45
using TabletSchemaSPtr = std::shared_ptr<TabletSchema>;
46
47
struct TermInfoComparer {
48
8
    bool operator()(const segment_v2::TermInfo& lhs, const segment_v2::TermInfo& rhs) const {
49
8
        return lhs.term < rhs.term;
50
8
    }
51
};
52
53
class CollectInfo {
54
public:
55
    std::set<segment_v2::TermInfo, TermInfoComparer> term_infos;
56
    const TabletIndex* index_meta = nullptr;
57
};
58
59
class CollectionStatistics {
60
public:
61
116
    CollectionStatistics() = default;
62
116
    virtual ~CollectionStatistics() = default;
63
64
    Status collect(RuntimeState* state, const std::vector<RowSetSplits>& rs_splits,
65
                   const TabletSchemaSPtr& tablet_schema,
66
                   const vectorized::VExprContextSPtrs& common_expr_ctxs_push_down,
67
                   io::IOContext* io_ctx);
68
69
    MOCK_FUNCTION float get_or_calculate_idf(const std::wstring& lucene_col_name,
70
                                             const std::wstring& term);
71
    MOCK_FUNCTION float get_or_calculate_avg_dl(const std::wstring& lucene_col_name);
72
73
private:
74
    Status extract_collect_info(RuntimeState* state,
75
                                const vectorized::VExprContextSPtrs& common_expr_ctxs_push_down,
76
                                const TabletSchemaSPtr& tablet_schema,
77
                                std::unordered_map<std::wstring, CollectInfo>* collect_infos);
78
    Status process_segment(const RowsetSharedPtr& rowset, int32_t seg_id,
79
                           const TabletSchema* tablet_schema,
80
                           const std::unordered_map<std::wstring, CollectInfo>& collect_infos,
81
                           io::IOContext* io_ctx);
82
83
    uint64_t get_term_doc_freq_by_col(const std::wstring& lucene_col_name,
84
                                      const std::wstring& term);
85
    uint64_t get_total_term_cnt_by_col(const std::wstring& lucene_col_name);
86
    uint64_t get_doc_num() const;
87
88
    uint64_t _total_num_docs = 0;
89
    std::unordered_map<std::wstring, uint64_t> _total_num_tokens;
90
    std::unordered_map<std::wstring, std::unordered_map<std::wstring, uint64_t>> _term_doc_freqs;
91
92
    std::unordered_map<std::wstring, float> _avg_dl_by_col;
93
    std::unordered_map<std::wstring, std::unordered_map<std::wstring, float>> _idf_by_col_term;
94
95
    MOCK_DEFINE(friend class BM25SimilarityTest;)
96
    MOCK_DEFINE(friend class CollectionStatisticsTest;)
97
    MOCK_DEFINE(friend class BooleanQueryTest;)
98
};
99
using CollectionStatisticsPtr = std::shared_ptr<CollectionStatistics>;
100
101
#include "common/compile_check_end.h"
102
} // namespace doris