Coverage Report

Created: 2026-03-25 14:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/index/ann/ann_index.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
/**
19
 * @file ann_index.h
20
 * @brief Abstract interface for vector similarity search indexes in Doris.
21
 *
22
 * This file defines the abstract VectorIndex interface that provides a unified
23
 * API for different vector index implementations (FAISS, etc.). The interface
24
 * supports both approximate k-nearest neighbor search and range search operations.
25
 *
26
 * Key operations supported:
27
 * - Adding vectors to the index during build phase
28
 * - K-nearest neighbor search for Top-N queries
29
 * - Range search for finding vectors within a distance threshold
30
 * - Persistence to/from storage for index durability
31
 *
32
 * This abstraction allows Doris to support multiple vector index libraries
33
 * through a consistent interface.
34
 */
35
36
#pragma once
37
38
#include <roaring/roaring.hh>
39
40
#include "common/status.h"
41
#include "core/types.h"
42
43
namespace lucene::store {
44
class Directory;
45
}
46
47
#include "common/compile_check_begin.h"
48
namespace doris::segment_v2 {
49
struct IndexSearchParameters;
50
struct IndexSearchResult;
51
52
enum class AnnIndexMetric { L2, IP, UNKNOWN };
53
54
std::string metric_to_string(AnnIndexMetric metric);
55
56
AnnIndexMetric string_to_metric(const std::string& metric);
57
58
enum class AnnIndexType { UNKNOWN, HNSW, IVF };
59
60
std::string ann_index_type_to_string(AnnIndexType type);
61
62
AnnIndexType string_to_ann_index_type(const std::string& type);
63
64
/**
65
 * @brief Abstract base class for vector similarity search indexes.
66
 *
67
 * This class defines the interface that all vector index implementations
68
 * must follow. It provides the core operations needed for vector similarity
69
 * search in Doris, including index building, searching, and persistence.
70
 *
71
 * Implementations of this interface (like FaissVectorIndex) handle the
72
 * specifics of different vector index libraries while providing a consistent
73
 * API for the Doris query execution engine.
74
 */
75
class VectorIndex {
76
public:
77
    VectorIndex();
78
    virtual ~VectorIndex();
79
80
    virtual doris::Status train(Int64 n, const float* x) = 0;
81
82
    /** Add n vectors of dimension d vectors to the index.
83
     *
84
     * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
85
     * This function slices the input vectors in chunks smaller than
86
     * blocksize_add and calls add_core.
87
     * @param n      number of vectors
88
     * @param x      input matrix, size n * d
89
     */
90
    virtual doris::Status add(Int64 n, const float* x) = 0;
91
92
    /**
93
     * @brief Returns the minimum number of rows required for training the index.
94
     *
95
     * Some index types (like IVF) require a minimum number of training points.
96
     * For example, IVF requires at least 'nlist' training points.
97
     * HNSW does not require any minimum and returns 0.
98
     *
99
     * @return Minimum number of rows required for training
100
     */
101
0
    virtual Int64 get_min_train_rows() const { return 0; }
102
103
    /** Return approximate nearest neighbors of a query vector.
104
     * The result is stored in the result object.
105
     * @param query_vec  input vector, size d
106
     * @param k          number of nearest neighbors to return
107
     * @param params     search parameters
108
     * @param result     output search result
109
     * @return          status of the operation
110
    */
111
    virtual doris::Status ann_topn_search(const float* query_vec, int k,
112
                                          const segment_v2::IndexSearchParameters& params,
113
                                          segment_v2::IndexSearchResult& result) = 0;
114
    /**
115
    * Search for the nearest neighbors of a query vector within a given radius.
116
    * @param query_vec  input vector, size d
117
    * @param radius  search radius
118
    * @param result  output search result
119
    * @return       status of the operation
120
    */
121
    virtual doris::Status range_search(const float* query_vec, const float& radius,
122
                                       const segment_v2::IndexSearchParameters& params,
123
                                       segment_v2::IndexSearchResult& result) = 0;
124
125
    virtual doris::Status save(lucene::store::Directory*) = 0;
126
127
    virtual doris::Status load(lucene::store::Directory*) = 0;
128
129
51
    size_t get_dimension() const { return _dimension; }
130
131
12
    void set_metric(AnnIndexMetric metric) { _metric = metric; }
132
133
125
    void set_type(AnnIndexType type) { _index_type = type; }
134
135
protected:
136
    // When adding vectors to the index, use this variable to check the dimension of the vectors.
137
    size_t _dimension = 0;
138
    AnnIndexMetric _metric = AnnIndexMetric::L2;   // Default metric is L2 distance
139
    AnnIndexType _index_type = AnnIndexType::HNSW; // Default index type is hnsw
140
};
141
#include "common/compile_check_end.h"
142
} // namespace doris::segment_v2