contrib/faiss/faiss/impl/platform_macros.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | #pragma once |
9 | | |
10 | | // basic int types and size_t |
11 | | #include <cstdint> |
12 | | #include <cstdio> |
13 | | |
14 | | #ifdef _WIN32 |
15 | | |
16 | | /******************************************************* |
17 | | * Windows specific macros |
18 | | *******************************************************/ |
19 | | |
20 | | #ifdef FAISS_MAIN_LIB |
21 | | #define FAISS_API __declspec(dllexport) |
22 | | #else // _FAISS_MAIN_LIB |
23 | | #define FAISS_API __declspec(dllimport) |
24 | | #endif // FAISS_MAIN_LIB |
25 | | |
26 | | #define strtok_r strtok_s |
27 | | |
28 | | #ifdef _MSC_VER |
29 | | #define __PRETTY_FUNCTION__ __FUNCSIG__ |
30 | | #endif // _MSC_VER |
31 | | |
32 | | #define posix_memalign(p, a, s) \ |
33 | | (((*(p)) = _aligned_malloc((s), (a))), *(p) ? 0 : errno) |
34 | | #define posix_memalign_free _aligned_free |
35 | | |
36 | | // aligned should be in front of the declaration |
37 | | #define ALIGNED(x) __declspec(align(x)) |
38 | | |
39 | | // redefine the GCC intrinsics with Windows equivalents |
40 | | #ifdef _MSC_VER |
41 | | |
42 | | #include <intrin.h> |
43 | | #include <limits.h> |
44 | | |
45 | | #ifndef __clang__ |
46 | | inline int __builtin_ctzll(uint64_t x) { |
47 | | unsigned long ret; |
48 | | _BitScanForward64(&ret, x); |
49 | | return (int)ret; |
50 | | } |
51 | | #endif |
52 | | |
53 | | // cudatoolkit provides __builtin_ctz for NVCC >= 11.0 |
54 | | #if !defined(__CUDACC__) || __CUDACC_VER_MAJOR__ < 11 |
55 | | inline int __builtin_ctz(unsigned long x) { |
56 | | unsigned long ret; |
57 | | _BitScanForward(&ret, x); |
58 | | return (int)ret; |
59 | | } |
60 | | #endif |
61 | | |
62 | | #ifndef __clang__ |
63 | | inline int __builtin_clzll(uint64_t x) { |
64 | | #if defined(_M_X64) || defined(__x86_64__) |
65 | | return (int)__lzcnt64(x); |
66 | | #elif defined(_M_ARM64) |
67 | | unsigned long index; |
68 | | int count = sizeof(uint64_t) * CHAR_BIT; |
69 | | if (_BitScanReverse64(&index, x)) { |
70 | | count = count - 1 - index; |
71 | | } |
72 | | return count; |
73 | | #endif |
74 | | } |
75 | | #endif |
76 | | |
77 | | #define __builtin_popcount __popcnt |
78 | | #define __builtin_popcountl __popcnt64 |
79 | | #define __builtin_popcountll __popcnt64 |
80 | | |
81 | | #ifndef __clang__ |
82 | | #define __m128i_u __m128i |
83 | | #define __m256i_u __m256i |
84 | | #endif |
85 | | |
86 | | // MSVC does not define __SSEx__, and _M_IX86_FP is only defined on 32-bit |
87 | | // processors cf. |
88 | | // https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros |
89 | | #ifdef __AVX__ |
90 | | #define __SSE__ 1 |
91 | | #define __SSE2__ 1 |
92 | | #define __SSE3__ 1 |
93 | | #define __SSE4_1__ 1 |
94 | | #define __SSE4_2__ 1 |
95 | | #endif |
96 | | |
97 | | // MSVC sets FMA and F16C automatically when using AVX2 |
98 | | // Ref. FMA (under /arch:AVX2): |
99 | | // https://docs.microsoft.com/en-us/cpp/build/reference/arch-x64 Ref. F16C (2nd |
100 | | // paragraph): https://walbourn.github.io/directxmath-avx2/ |
101 | | #ifdef __AVX2__ |
102 | | #define __FMA__ 1 |
103 | | #define __F16C__ 1 |
104 | | #endif |
105 | | |
106 | | #endif // _MSC_VER |
107 | | |
108 | | #define FAISS_ALWAYS_INLINE __forceinline |
109 | | |
110 | | #else |
111 | | /******************************************************* |
112 | | * Linux and OSX |
113 | | *******************************************************/ |
114 | | |
115 | | #define FAISS_API |
116 | 3 | #define posix_memalign_free free |
117 | | |
118 | | // aligned should be *in front* of the declaration, for compatibility with |
119 | | // windows |
120 | | #ifdef SWIG |
121 | | #define ALIGNED(x) |
122 | | #else |
123 | 0 | #define ALIGNED(x) __attribute__((aligned(x))) |
124 | | #endif |
125 | | |
126 | | #define FAISS_ALWAYS_INLINE __attribute__((always_inline)) inline |
127 | | |
128 | | #endif |
129 | | |
130 | | #if defined(__GNUC__) || defined(__clang__) |
131 | | #define FAISS_DEPRECATED(msg) __attribute__((deprecated(msg))) |
132 | | #else |
133 | | #define FAISS_DEPRECATED(msg) |
134 | | #endif // GCC or Clang |
135 | | |
136 | | // Localized enablement of imprecise floating point operations |
137 | | // You need to use all 3 macros to cover all compilers. |
138 | | #if defined(_MSC_VER) |
139 | | #define FAISS_PRAGMA_IMPRECISE_LOOP |
140 | | #define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN \ |
141 | | __pragma(float_control(precise, off, push)) |
142 | | #define FAISS_PRAGMA_IMPRECISE_FUNCTION_END __pragma(float_control(pop)) |
143 | | #elif defined(__clang__) |
144 | | #if defined(__PPC__) |
145 | | #define FAISS_PRAGMA_IMPRECISE_LOOP \ |
146 | | _Pragma("clang loop vectorize_width(4) interleave_count(8)") |
147 | | #define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN \ |
148 | | _Pragma("float_control(precise, off, push)") |
149 | | #define FAISS_PRAGMA_IMPRECISE_FUNCTION_END _Pragma("float_control(pop)") |
150 | | #else |
151 | | #define FAISS_PRAGMA_IMPRECISE_LOOP \ |
152 | 13.4M | _Pragma("clang loop vectorize(enable) interleave(enable)") |
153 | | |
154 | | // clang-format off |
155 | | |
156 | | // the following ifdef is needed, because old versions of clang (prior to 14) |
157 | | // do not generate FMAs on x86 unless this pragma is used. On the other hand, |
158 | | // ARM does not support the following pragma flag. |
159 | | // TODO: find out how to enable FMAs on clang 10 and earlier. |
160 | | #if defined(__x86_64__) && (defined(__clang_major__) && (__clang_major__ > 10)) |
161 | | #define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN \ |
162 | | _Pragma("float_control(precise, off, push)") |
163 | | #define FAISS_PRAGMA_IMPRECISE_FUNCTION_END _Pragma("float_control(pop)") |
164 | | #else |
165 | | #define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN |
166 | | #define FAISS_PRAGMA_IMPRECISE_FUNCTION_END |
167 | | #endif |
168 | | #endif |
169 | | #elif defined(__GNUC__) |
170 | | // Unfortunately, GCC does not provide a pragma for detecting it. |
171 | | // So, we have to stick to GNUC, which is defined by MANY compilers. |
172 | | // This is why clang/icc needs to be checked first. |
173 | | |
174 | | // todo: add __INTEL_COMPILER check for the classic ICC |
175 | | // todo: add __INTEL_LLVM_COMPILER for ICX |
176 | | |
177 | | #define FAISS_PRAGMA_IMPRECISE_LOOP |
178 | | #define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN \ |
179 | | _Pragma("GCC push_options") \ |
180 | | _Pragma("GCC optimize (\"unroll-loops,associative-math,no-signed-zeros\")") |
181 | | #define FAISS_PRAGMA_IMPRECISE_FUNCTION_END \ |
182 | | _Pragma("GCC pop_options") |
183 | | #else |
184 | | #define FAISS_PRAGMA_IMPRECISE_LOOP |
185 | | #define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN |
186 | | #define FAISS_PRAGMA_IMPRECISE_FUNCTION_END |
187 | | #endif |
188 | | |
189 | | // clang-format on |
190 | | |
191 | | /******************************************************* |
192 | | * BIGENDIAN specific macros |
193 | | *******************************************************/ |
194 | | #if !defined(_MSC_VER) && \ |
195 | | (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) |
196 | | #define FAISS_BIG_ENDIAN |
197 | | #endif |
198 | | |
199 | | #define Swap2Bytes(val) ((((val) >> 8) & 0x00FF) | (((val) << 8) & 0xFF00)) |
200 | | |
201 | | #define Swap4Bytes(val) \ |
202 | | ((((val) >> 24) & 0x000000FF) | (((val) >> 8) & 0x0000FF00) | \ |
203 | | (((val) << 8) & 0x00FF0000) | (((val) << 24) & 0xFF000000)) |