Coverage Report

Created: 2026-03-15 17:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/index/ann/ann_index.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
/**
19
 * @file ann_index.h
20
 * @brief Abstract interface for vector similarity search indexes in Doris.
21
 *
22
 * This file defines the abstract VectorIndex interface that provides a unified
23
 * API for different vector index implementations (FAISS, etc.). The interface
24
 * supports both approximate k-nearest neighbor search and range search operations.
25
 *
26
 * Key operations supported:
27
 * - Adding vectors to the index during build phase
28
 * - K-nearest neighbor search for Top-N queries
29
 * - Range search for finding vectors within a distance threshold
30
 * - Persistence to/from storage for index durability
31
 *
32
 * This abstraction allows Doris to support multiple vector index libraries
33
 * through a consistent interface.
34
 */
35
36
#pragma once
37
38
#include <roaring/roaring.hh>
39
40
#include "common/status.h"
41
#include "core/types.h"
42
43
namespace lucene::store {
44
class Directory;
45
}
46
47
#include "common/compile_check_begin.h"
48
namespace doris::segment_v2 {
49
struct IndexSearchParameters;
50
struct IndexSearchResult;
51
52
enum class AnnIndexMetric { L2, IP, UNKNOWN };
53
54
std::string metric_to_string(AnnIndexMetric metric);
55
56
AnnIndexMetric string_to_metric(const std::string& metric);
57
58
enum class AnnIndexType { UNKNOWN, HNSW, IVF };
59
60
std::string ann_index_type_to_string(AnnIndexType type);
61
62
AnnIndexType string_to_ann_index_type(const std::string& type);
63
64
/**
65
 * @brief Abstract base class for vector similarity search indexes.
66
 *
67
 * This class defines the interface that all vector index implementations
68
 * must follow. It provides the core operations needed for vector similarity
69
 * search in Doris, including index building, searching, and persistence.
70
 *
71
 * Implementations of this interface (like FaissVectorIndex) handle the
72
 * specifics of different vector index libraries while providing a consistent
73
 * API for the Doris query execution engine.
74
 */
75
class VectorIndex {
76
public:
77
    VectorIndex();
78
    virtual ~VectorIndex();
79
80
    virtual doris::Status train(Int64 n, const float* x) = 0;
81
82
    /** Add n vectors of dimension d vectors to the index.
83
     *
84
     * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
85
     * This function slices the input vectors in chunks smaller than
86
     * blocksize_add and calls add_core.
87
     * @param n      number of vectors
88
     * @param x      input matrix, size n * d
89
     */
90
    virtual doris::Status add(Int64 n, const float* x) = 0;
91
92
    /** Return approximate nearest neighbors of a query vector.
93
     * The result is stored in the result object.
94
     * @param query_vec  input vector, size d
95
     * @param k          number of nearest neighbors to return
96
     * @param params     search parameters
97
     * @param result     output search result
98
     * @return          status of the operation
99
    */
100
    virtual doris::Status ann_topn_search(const float* query_vec, int k,
101
                                          const segment_v2::IndexSearchParameters& params,
102
                                          segment_v2::IndexSearchResult& result) = 0;
103
    /**
104
    * Search for the nearest neighbors of a query vector within a given radius.
105
    * @param query_vec  input vector, size d
106
    * @param radius  search radius
107
    * @param result  output search result
108
    * @return       status of the operation
109
    */
110
    virtual doris::Status range_search(const float* query_vec, const float& radius,
111
                                       const segment_v2::IndexSearchParameters& params,
112
                                       segment_v2::IndexSearchResult& result) = 0;
113
114
    virtual doris::Status save(lucene::store::Directory*) = 0;
115
116
    virtual doris::Status load(lucene::store::Directory*) = 0;
117
118
27
    size_t get_dimension() const { return _dimension; }
119
120
12
    void set_metric(AnnIndexMetric metric) { _metric = metric; }
121
122
116
    void set_type(AnnIndexType type) { _index_type = type; }
123
124
protected:
125
    // When adding vectors to the index, use this variable to check the dimension of the vectors.
126
    size_t _dimension = 0;
127
    AnnIndexMetric _metric = AnnIndexMetric::L2;   // Default metric is L2 distance
128
    AnnIndexType _index_type = AnnIndexType::HNSW; // Default index type is hnsw
129
};
130
#include "common/compile_check_end.h"
131
} // namespace doris::segment_v2