Coverage Report

Created: 2026-02-12 21:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/olap/collection_similarity.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include "gen_cpp/Opcodes_types.h"
21
#include "rowset/segment_v2/common.h"
22
#include "vec/columns/column.h"
23
24
namespace doris {
25
#include "common/compile_check_begin.h"
26
27
using ScoreMap = phmap::flat_hash_map<segment_v2::rowid_t, float>;
28
using ScoreMapIterator = ScoreMap::const_iterator;
29
30
enum class OrderType {
31
    ASC,
32
    DESC,
33
};
34
35
struct ScoreRangeFilter {
36
    TExprOpcode::type op;
37
    double threshold;
38
39
430k
    bool pass(float score) const {
40
430k
        return (op == TExprOpcode::GT) ? (score > threshold) : (score >= threshold);
41
430k
    }
42
};
43
using ScoreRangeFilterPtr = std::shared_ptr<ScoreRangeFilter>;
44
45
class CollectionSimilarity {
46
public:
47
133
    CollectionSimilarity() { _bm25_scores.reserve(1024); }
48
133
    ~CollectionSimilarity() = default;
49
50
    void collect(segment_v2::rowid_t row_id, float score);
51
52
    void get_bm25_scores(roaring::Roaring* row_bitmap, vectorized::IColumn::MutablePtr& scores,
53
                         std::unique_ptr<std::vector<uint64_t>>& row_ids,
54
                         const ScoreRangeFilterPtr& filter = nullptr) const;
55
56
    void get_topn_bm25_scores(roaring::Roaring* row_bitmap, vectorized::IColumn::MutablePtr& scores,
57
                              std::unique_ptr<std::vector<uint64_t>>& row_ids, OrderType order_type,
58
                              size_t top_k, const ScoreRangeFilterPtr& filter = nullptr) const;
59
60
private:
61
    template <OrderType order>
62
    void find_top_k_scores(const roaring::Roaring* row_bitmap, const ScoreMap& all_scores,
63
                           size_t top_k, std::vector<std::pair<uint32_t, float>>& top_k_results,
64
                           const ScoreRangeFilterPtr& filter) const;
65
66
    ScoreMap _bm25_scores;
67
};
68
using CollectionSimilarityPtr = std::shared_ptr<CollectionSimilarity>;
69
70
#include "common/compile_check_end.h"
71
} // namespace doris