Coverage Report

Created: 2026-04-15 20:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/index/ann/ann_index.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
/**
19
 * @file ann_index.h
20
 * @brief Abstract interface for vector similarity search indexes in Doris.
21
 *
22
 * This file defines the abstract VectorIndex interface that provides a unified
23
 * API for different vector index implementations (FAISS, etc.). The interface
24
 * supports both approximate k-nearest neighbor search and range search operations.
25
 *
26
 * Key operations supported:
27
 * - Adding vectors to the index during build phase
28
 * - K-nearest neighbor search for Top-N queries
29
 * - Range search for finding vectors within a distance threshold
30
 * - Persistence to/from storage for index durability
31
 *
32
 * This abstraction allows Doris to support multiple vector index libraries
33
 * through a consistent interface.
34
 */
35
36
#pragma once
37
38
#include <roaring/roaring.hh>
39
40
#include "common/status.h"
41
#include "core/types.h"
42
43
namespace lucene::store {
44
class Directory;
45
}
46
47
namespace doris::segment_v2 {
48
struct IndexSearchParameters;
49
struct IndexSearchResult;
50
51
enum class AnnIndexMetric { L2, IP, UNKNOWN };
52
53
std::string metric_to_string(AnnIndexMetric metric);
54
55
AnnIndexMetric string_to_metric(const std::string& metric);
56
57
enum class AnnIndexType { UNKNOWN, HNSW, IVF, IVF_ON_DISK };
58
59
std::string ann_index_type_to_string(AnnIndexType type);
60
61
AnnIndexType string_to_ann_index_type(const std::string& type);
62
63
/**
64
 * @brief Abstract base class for vector similarity search indexes.
65
 *
66
 * This class defines the interface that all vector index implementations
67
 * must follow. It provides the core operations needed for vector similarity
68
 * search in Doris, including index building, searching, and persistence.
69
 *
70
 * Implementations of this interface (like FaissVectorIndex) handle the
71
 * specifics of different vector index libraries while providing a consistent
72
 * API for the Doris query execution engine.
73
 */
74
class VectorIndex {
75
public:
76
    VectorIndex();
77
    virtual ~VectorIndex();
78
79
    virtual doris::Status train(Int64 n, const float* x) = 0;
80
81
    /** Add n vectors of dimension d vectors to the index.
82
     *
83
     * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
84
     * This function slices the input vectors in chunks smaller than
85
     * blocksize_add and calls add_core.
86
     * @param n      number of vectors
87
     * @param x      input matrix, size n * d
88
     */
89
    virtual doris::Status add(Int64 n, const float* x) = 0;
90
91
    /**
92
     * @brief Returns the minimum number of rows required for training the index.
93
     *
94
     * Some index types (like IVF) require a minimum number of training points.
95
     * For example, IVF requires at least 'nlist' training points.
96
     * HNSW does not require any minimum and returns 0.
97
     *
98
     * @return Minimum number of rows required for training
99
     */
100
0
    virtual Int64 get_min_train_rows() const { return 0; }
101
102
    /** Return approximate nearest neighbors of a query vector.
103
     * The result is stored in the result object.
104
     * @param query_vec  input vector, size d
105
     * @param k          number of nearest neighbors to return
106
     * @param params     search parameters
107
     * @param result     output search result
108
     * @return          status of the operation
109
    */
110
    virtual doris::Status ann_topn_search(const float* query_vec, int k,
111
                                          const segment_v2::IndexSearchParameters& params,
112
                                          segment_v2::IndexSearchResult& result) = 0;
113
    /**
114
    * Search for the nearest neighbors of a query vector within a given radius.
115
    * @param query_vec  input vector, size d
116
    * @param radius  search radius
117
    * @param result  output search result
118
    * @return       status of the operation
119
    */
120
    virtual doris::Status range_search(const float* query_vec, const float& radius,
121
                                       const segment_v2::IndexSearchParameters& params,
122
                                       segment_v2::IndexSearchResult& result) = 0;
123
124
    virtual doris::Status save(lucene::store::Directory*) = 0;
125
126
    virtual doris::Status load(lucene::store::Directory*) = 0;
127
128
51
    size_t get_dimension() const { return _dimension; }
129
130
12
    void set_metric(AnnIndexMetric metric) { _metric = metric; }
131
132
125
    void set_type(AnnIndexType type) { _index_type = type; }
133
134
protected:
135
    // When adding vectors to the index, use this variable to check the dimension of the vectors.
136
    size_t _dimension = 0;
137
    AnnIndexMetric _metric = AnnIndexMetric::L2;   // Default metric is L2 distance
138
    AnnIndexType _index_type = AnnIndexType::HNSW; // Default index type is hnsw
139
};
140
} // namespace doris::segment_v2