/root/doris/contrib/faiss/faiss/impl/PolysemousTraining.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | // -*- c++ -*- |
9 | | |
10 | | #ifndef FAISS_POLYSEMOUS_TRAINING_INCLUDED |
11 | | #define FAISS_POLYSEMOUS_TRAINING_INCLUDED |
12 | | |
13 | | #include <faiss/impl/ProductQuantizer.h> |
14 | | |
15 | | namespace faiss { |
16 | | |
17 | | /// parameters used for the simulated annealing method |
18 | | struct SimulatedAnnealingParameters { |
19 | | // optimization parameters |
20 | | double init_temperature = 0.7; // init probability of accepting a bad swap |
21 | | // at each iteration the temp is multiplied by this |
22 | | double temperature_decay = 0.9997893011688015; // = 0.9^(1/500) |
23 | | int n_iter = 500000; // nb of iterations |
24 | | int n_redo = 2; // nb of runs of the simulation |
25 | | int seed = 123; // random seed |
26 | | int verbose = 0; |
27 | | bool only_bit_flips = false; // restrict permutation changes to bit flips |
28 | | bool init_random = |
29 | | false; // initialize with a random permutation (not identity) |
30 | | |
31 | | // set reasonable defaults |
32 | 0 | SimulatedAnnealingParameters() {} |
33 | | }; |
34 | | |
35 | | /// abstract class for the loss function |
36 | | struct PermutationObjective { |
37 | | int n; |
38 | | |
39 | | virtual double compute_cost(const int* perm) const = 0; |
40 | | |
41 | | // what would the cost update be if iw and jw were swapped? |
42 | | // default implementation just computes both and computes the difference |
43 | | virtual double cost_update(const int* perm, int iw, int jw) const; |
44 | | |
45 | 0 | virtual ~PermutationObjective() {} |
46 | | }; |
47 | | |
48 | | struct ReproduceDistancesObjective : PermutationObjective { |
49 | | double dis_weight_factor; |
50 | | |
51 | 0 | static double sqr(double x) { |
52 | 0 | return x * x; |
53 | 0 | } |
54 | | |
55 | | // weighting of distances: it is more important to reproduce small |
56 | | // distances well |
57 | | double dis_weight(double x) const; |
58 | | |
59 | | std::vector<double> source_dis; ///< "real" corrected distances (size n^2) |
60 | | const double* target_dis; ///< wanted distances (size n^2) |
61 | | std::vector<double> weights; ///< weights for each distance (size n^2) |
62 | | |
63 | | double get_source_dis(int i, int j) const; |
64 | | |
65 | | // cost = quadratic difference between actual distance and Hamming distance |
66 | | double compute_cost(const int* perm) const override; |
67 | | |
68 | | // what would the cost update be if iw and jw were swapped? |
69 | | // computed in O(n) instead of O(n^2) for the full re-computation |
70 | | double cost_update(const int* perm, int iw, int jw) const override; |
71 | | |
72 | | ReproduceDistancesObjective( |
73 | | int n, |
74 | | const double* source_dis_in, |
75 | | const double* target_dis_in, |
76 | | double dis_weight_factor); |
77 | | |
78 | | static void compute_mean_stdev( |
79 | | const double* tab, |
80 | | size_t n2, |
81 | | double* mean_out, |
82 | | double* stddev_out); |
83 | | |
84 | | void set_affine_target_dis(const double* source_dis_in); |
85 | | |
86 | 0 | ~ReproduceDistancesObjective() override {} |
87 | | }; |
88 | | |
89 | | struct RandomGenerator; |
90 | | |
91 | | /// Simulated annealing optimization algorithm for permutations. |
92 | | struct SimulatedAnnealingOptimizer : SimulatedAnnealingParameters { |
93 | | PermutationObjective* obj; |
94 | | int n; ///< size of the permutation |
95 | | FILE* logfile; /// logs values of the cost function |
96 | | |
97 | | SimulatedAnnealingOptimizer( |
98 | | PermutationObjective* obj, |
99 | | const SimulatedAnnealingParameters& p); |
100 | | RandomGenerator* rnd; |
101 | | |
102 | | /// remember initial cost of optimization |
103 | | double init_cost; |
104 | | |
105 | | // main entry point. Perform the optimization loop, starting from |
106 | | // and modifying permutation in-place |
107 | | double optimize(int* perm); |
108 | | |
109 | | // run the optimization and return the best result in best_perm |
110 | | double run_optimization(int* best_perm); |
111 | | |
112 | | virtual ~SimulatedAnnealingOptimizer(); |
113 | | }; |
114 | | |
115 | | /// optimizes the order of indices in a ProductQuantizer |
116 | | struct PolysemousTraining : SimulatedAnnealingParameters { |
117 | | enum Optimization_type_t { |
118 | | OT_None, |
119 | | OT_ReproduceDistances_affine, ///< default |
120 | | OT_Ranking_weighted_diff ///< same as _2, but use rank of y+ - rank of |
121 | | ///< y- |
122 | | }; |
123 | | Optimization_type_t optimization_type; |
124 | | |
125 | | /** use 1/4 of the training points for the optimization, with |
126 | | * max. ntrain_permutation. If ntrain_permutation == 0: train on |
127 | | * centroids */ |
128 | | int ntrain_permutation; |
129 | | double dis_weight_factor; ///< decay of exp that weights distance loss |
130 | | |
131 | | /// refuse to train if it would require more than that amount of RAM |
132 | | size_t max_memory; |
133 | | |
134 | | // filename pattern for the logging of iterations |
135 | | std::string log_pattern; |
136 | | |
137 | | // sets default values |
138 | | PolysemousTraining(); |
139 | | |
140 | | /// reorder the centroids so that the Hamming distance becomes a |
141 | | /// good approximation of the SDC distance (called by train) |
142 | | void optimize_pq_for_hamming(ProductQuantizer& pq, size_t n, const float* x) |
143 | | const; |
144 | | |
145 | | /// called by optimize_pq_for_hamming |
146 | | void optimize_ranking(ProductQuantizer& pq, size_t n, const float* x) const; |
147 | | /// called by optimize_pq_for_hamming |
148 | | void optimize_reproduce_distances(ProductQuantizer& pq) const; |
149 | | |
150 | | /// make sure we don't blow up the memory |
151 | | size_t memory_usage_per_thread(const ProductQuantizer& pq) const; |
152 | | }; |
153 | | |
154 | | } // namespace faiss |
155 | | |
156 | | #endif |