/root/doris/contrib/openblas/driver/others/dynamic.c
Line | Count | Source |
1 | | /*********************************************************************/ |
2 | | /* Copyright 2009, 2010 The University of Texas at Austin. */ |
3 | | /* All rights reserved. */ |
4 | | /* */ |
5 | | /* Redistribution and use in source and binary forms, with or */ |
6 | | /* without modification, are permitted provided that the following */ |
7 | | /* conditions are met: */ |
8 | | /* */ |
9 | | /* 1. Redistributions of source code must retain the above */ |
10 | | /* copyright notice, this list of conditions and the following */ |
11 | | /* disclaimer. */ |
12 | | /* */ |
13 | | /* 2. Redistributions in binary form must reproduce the above */ |
14 | | /* copyright notice, this list of conditions and the following */ |
15 | | /* disclaimer in the documentation and/or other materials */ |
16 | | /* provided with the distribution. */ |
17 | | /* */ |
18 | | /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ |
19 | | /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ |
20 | | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ |
21 | | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ |
22 | | /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ |
23 | | /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ |
24 | | /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ |
25 | | /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ |
26 | | /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ |
27 | | /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ |
28 | | /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ |
29 | | /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ |
30 | | /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ |
31 | | /* POSSIBILITY OF SUCH DAMAGE. */ |
32 | | /* */ |
33 | | /* The views and conclusions contained in the software and */ |
34 | | /* documentation are those of the authors and should not be */ |
35 | | /* interpreted as representing official policies, either expressed */ |
36 | | /* or implied, of The University of Texas at Austin. */ |
37 | | /*********************************************************************/ |
38 | | |
39 | | #include "common.h" |
40 | | |
41 | | #ifdef _MSC_VER |
42 | | #define strncasecmp _strnicmp |
43 | | #define strcasecmp _stricmp |
44 | | #endif |
45 | | |
46 | | #ifdef ARCH_X86 |
47 | | #define EXTERN extern |
48 | | #else |
49 | | #define EXTERN |
50 | | #endif |
51 | | |
52 | | #ifdef DYNAMIC_LIST |
53 | | extern gotoblas_t gotoblas_PRESCOTT; |
54 | | |
55 | | #ifdef DYN_ATHLON |
56 | | extern gotoblas_t gotoblas_ATHLON; |
57 | | #else |
58 | | #define gotoblas_ATHLON gotoblas_PRESCOTT |
59 | | #endif |
60 | | #ifdef DYN_KATMAI |
61 | | extern gotoblas_t gotoblas_KATMAI; |
62 | | #else |
63 | | #define gotoblas_KATMAI gotoblas_PRESCOTT |
64 | | #endif |
65 | | #ifdef DYN_BANIAS |
66 | | extern gotoblas_t gotoblas_BANIAS; |
67 | | #else |
68 | | #define gotoblas_BANIAS gotoblas_PRESCOTT |
69 | | #endif |
70 | | #ifdef DYN_COPPERMINE |
71 | | extern gotoblas_t gotoblas_COPPERMINE; |
72 | | #else |
73 | | #define gotoblas_COPPERMINE gotoblas_PRESCOTT |
74 | | #endif |
75 | | #ifdef DYN_NORTHWOOD |
76 | | extern gotoblas_t gotoblas_NORTHWOOD; |
77 | | #else |
78 | | #define gotoblas_NORTHWOOD gotoblas_PRESCOTT |
79 | | #endif |
80 | | #ifdef DYN_CORE2 |
81 | | extern gotoblas_t gotoblas_CORE2; |
82 | | #else |
83 | | #define gotoblas_CORE2 gotoblas_PRESCOTT |
84 | | #endif |
85 | | #ifdef DYN_NEHALEM |
86 | | extern gotoblas_t gotoblas_NEHALEM; |
87 | | #else |
88 | | #define gotoblas_NEHALEM gotoblas_PRESCOTT |
89 | | #endif |
90 | | #ifdef DYN_BARCELONA |
91 | | extern gotoblas_t gotoblas_BARCELONA; |
92 | | #elif defined(DYN_NEHALEM) |
93 | | #define gotoblas_BARCELONA gotoblas_NEHALEM |
94 | | #else |
95 | | #define gotoblas_BARCELONA gotoblas_PRESCOTT |
96 | | #endif |
97 | | #ifdef DYN_ATOM |
98 | | extern gotoblas_t gotoblas_ATOM; |
99 | | #elif defined(DYN_NEHALEM) |
100 | | #define gotoblas_ATOM gotoblas_NEHALEM |
101 | | #else |
102 | | #define gotoblas_ATOM gotoblas_PRESCOTT |
103 | | #endif |
104 | | #ifdef DYN_NANO |
105 | | extern gotoblas_t gotoblas_NANO; |
106 | | #else |
107 | | #define gotoblas_NANO gotoblas_PRESCOTT |
108 | | #endif |
109 | | #ifdef DYN_PENRYN |
110 | | extern gotoblas_t gotoblas_PENRYN; |
111 | | #else |
112 | | #define gotoblas_PENRYN gotoblas_PRESCOTT |
113 | | #endif |
114 | | #ifdef DYN_DUNNINGTON |
115 | | extern gotoblas_t gotoblas_DUNNINGTON; |
116 | | #else |
117 | | #define gotoblas_DUNNINGTON gotoblas_PRESCOTT |
118 | | #endif |
119 | | #ifdef DYN_OPTERON |
120 | | extern gotoblas_t gotoblas_OPTERON; |
121 | | #else |
122 | | #define gotoblas_OPTERON gotoblas_PRESCOTT |
123 | | #endif |
124 | | #ifdef DYN_OPTERON_SSE3 |
125 | | extern gotoblas_t gotoblas_OPTERON_SSE3; |
126 | | #else |
127 | | #define gotoblas_OPTERON_SSE3 gotoblas_PRESCOTT |
128 | | #endif |
129 | | #ifdef DYN_BOBCAT |
130 | | extern gotoblas_t gotoblas_BOBCAT; |
131 | | #elif defined(DYN_NEHALEM) |
132 | | #define gotoblas_BOBCAT gotoblas_NEHALEM |
133 | | #else |
134 | | #define gotoblas_BOBCAT gotoblas_PRESCOTT |
135 | | #endif |
136 | | #ifdef DYN_SANDYBRIDGE |
137 | | extern gotoblas_t gotoblas_SANDYBRIDGE; |
138 | | #elif defined(DYN_NEHALEM) |
139 | | #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM |
140 | | #else |
141 | | #define gotoblas_SANDYBRIDGE gotoblas_PRESCOTT |
142 | | #endif |
143 | | #ifdef DYN_BULLDOZER |
144 | | extern gotoblas_t gotoblas_BULLDOZER; |
145 | | #elif defined(DYN_SANDYBRIDGE) |
146 | | #define gotoblas_BULLDOZER gotoblas_SANDYBRIDGE |
147 | | #elif defined(DYN_NEHALEM) |
148 | | #define gotoblas_BULLDOZER gotoblas_NEHALEM |
149 | | #else |
150 | | #define gotoblas_BULLDOZER gotoblas_PRESCOTT |
151 | | #endif |
152 | | #ifdef DYN_PILEDRIVER |
153 | | extern gotoblas_t gotoblas_PILEDRIVER; |
154 | | #elif defined(DYN_SANDYBRIDGE) |
155 | | #define gotoblas_PILEDRIVER gotoblas_SANDYBRIDGE |
156 | | #elif defined(DYN_NEHALEM) |
157 | | #define gotoblas_PILEDRIVER gotoblas_NEHALEM |
158 | | #else |
159 | | #define gotoblas_PILEDRIVER gotoblas_PRESCOTT |
160 | | #endif |
161 | | #ifdef DYN_STEAMROLLER |
162 | | extern gotoblas_t gotoblas_STEAMROLLER; |
163 | | #elif defined(DYN_SANDYBRIDGE) |
164 | | #define gotoblas_STEAMROLLER gotoblas_SANDYBRIDGE |
165 | | #elif defined(DYN_NEHALEM) |
166 | | #define gotoblas_STEAMROLLER gotoblas_NEHALEM |
167 | | #else |
168 | | #define gotoblas_STEAMROLLER gotoblas_PRESCOTT |
169 | | #endif |
170 | | #ifdef DYN_EXCAVATOR |
171 | | extern gotoblas_t gotoblas_EXCAVATOR; |
172 | | #elif defined(DYN_SANDYBRIDGE) |
173 | | #define gotoblas_EXCAVATOR gotoblas_SANDYBRIDGE |
174 | | #elif defined(DYN_NEHALEM) |
175 | | #define gotoblas_EXCAVATOR gotoblas_NEHALEM |
176 | | #else |
177 | | #define gotoblas_EXCAVATOR gotoblas_PRESCOTT |
178 | | #endif |
179 | | #ifdef DYN_HASWELL |
180 | | extern gotoblas_t gotoblas_HASWELL; |
181 | | #elif defined(DYN_SANDYBRIDGE) |
182 | | #define gotoblas_HASWELL gotoblas_SANDYBRIDGE |
183 | | #elif defined(DYN_NEHALEM) |
184 | | #define gotoblas_HASWELL gotoblas_NEHALEM |
185 | | #else |
186 | | #define gotoblas_HASWELL gotoblas_PRESCOTT |
187 | | #endif |
188 | | #ifdef DYN_ZEN |
189 | | extern gotoblas_t gotoblas_ZEN; |
190 | | #elif defined(DYN_HASWELL) |
191 | | #define gotoblas_ZEN gotoblas_HASWELL |
192 | | #elif defined(DYN_SANDYBRIDGE) |
193 | | #define gotoblas_ZEN gotoblas_SANDYBRIDGE |
194 | | #elif defined(DYN_NEHALEM) |
195 | | #define gotoblas_ZEN gotoblas_NEHALEM |
196 | | #else |
197 | | #define gotoblas_ZEN gotoblas_PRESCOTT |
198 | | #endif |
199 | | #ifdef DYN_SKYLAKEX |
200 | | extern gotoblas_t gotoblas_SKYLAKEX; |
201 | | #elif defined(DYN_HASWELL) |
202 | | #define gotoblas_SKYLAKEX gotoblas_HASWELL |
203 | | #elif defined(DYN_SANDYBRIDGE) |
204 | | #define gotoblas_SKYLAKEX gotoblas_SANDYBRIDGE |
205 | | #elif defined(DYN_NEHALEM) |
206 | | #define gotoblas_SKYLAKEX gotoblas_NEHALEM |
207 | | #else |
208 | | #define gotoblas_SKYLAKEX gotoblas_PRESCOTT |
209 | | #endif |
210 | | #ifdef DYN_COOPERLAKE |
211 | | extern gotoblas_t gotoblas_COOPERLAKE; |
212 | | #elif defined(DYN_SKYLAKEX) |
213 | | #define gotoblas_COOPERLAKE gotoblas_SKYLAKEX |
214 | | #elif defined(DYN_HASWELL) |
215 | | #define gotoblas_COOPERLAKE gotoblas_HASWELL |
216 | | #elif defined(DYN_SANDYBRIDGE) |
217 | | #define gotoblas_COOPERLAKE gotoblas_SANDYBRIDGE |
218 | | #elif defined(DYN_NEHALEM) |
219 | | #define gotoblas_COOPERLAKE gotoblas_NEHALEM |
220 | | #else |
221 | | #define gotoblas_COOPERLAKE gotoblas_PRESCOTT |
222 | | #endif |
223 | | #ifdef DYN_SAPPHIRERAPIDS |
224 | | extern gotoblas_t gotoblas_SAPPHIRERAPIDS; |
225 | | #elif defined(DYN_SKYLAKEX) |
226 | | #define gotoblas_SAPPHIRERAPIDS gotoblas_SKYLAKEX |
227 | | #elif defined(DYN_HASWELL) |
228 | | #define gotoblas_SAPPHIRERAPIDS gotoblas_HASWELL |
229 | | #elif defined(DYN_SANDYBRIDGE) |
230 | | #define gotoblas_SAPPHIRERAPIDS gotoblas_SANDYBRIDGE |
231 | | #elif defined(DYN_NEHALEM) |
232 | | #define gotoblas_SAPPHIRERAPIDS gotoblas_NEHALEM |
233 | | #else |
234 | | #define gotoblas_SAPPHIRERAPIDS gotoblas_PRESCOTT |
235 | | #endif |
236 | | |
237 | | |
238 | | #else // not DYNAMIC_LIST |
239 | | EXTERN gotoblas_t gotoblas_KATMAI; |
240 | | EXTERN gotoblas_t gotoblas_COPPERMINE; |
241 | | EXTERN gotoblas_t gotoblas_NORTHWOOD; |
242 | | EXTERN gotoblas_t gotoblas_BANIAS; |
243 | | EXTERN gotoblas_t gotoblas_ATHLON; |
244 | | |
245 | | extern gotoblas_t gotoblas_PRESCOTT; |
246 | | extern gotoblas_t gotoblas_CORE2; |
247 | | extern gotoblas_t gotoblas_NEHALEM; |
248 | | extern gotoblas_t gotoblas_BARCELONA; |
249 | | #ifdef DYNAMIC_OLDER |
250 | | extern gotoblas_t gotoblas_ATOM; |
251 | | extern gotoblas_t gotoblas_NANO; |
252 | | extern gotoblas_t gotoblas_PENRYN; |
253 | | extern gotoblas_t gotoblas_DUNNINGTON; |
254 | | extern gotoblas_t gotoblas_OPTERON; |
255 | | extern gotoblas_t gotoblas_OPTERON_SSE3; |
256 | | extern gotoblas_t gotoblas_BOBCAT; |
257 | | #else |
258 | 521 | #define gotoblas_ATOM gotoblas_NEHALEM |
259 | 521 | #define gotoblas_NANO gotoblas_NEHALEM |
260 | 521 | #define gotoblas_PENRYN gotoblas_CORE2 |
261 | 521 | #define gotoblas_DUNNINGTON gotoblas_CORE2 |
262 | 521 | #define gotoblas_OPTERON gotoblas_CORE2 |
263 | 521 | #define gotoblas_OPTERON_SSE3 gotoblas_CORE2 |
264 | 521 | #define gotoblas_BOBCAT gotoblas_CORE2 |
265 | | #endif |
266 | | |
267 | | #ifndef NO_AVX |
268 | | extern gotoblas_t gotoblas_SANDYBRIDGE; |
269 | | extern gotoblas_t gotoblas_BULLDOZER; |
270 | | extern gotoblas_t gotoblas_PILEDRIVER; |
271 | | extern gotoblas_t gotoblas_STEAMROLLER; |
272 | | extern gotoblas_t gotoblas_EXCAVATOR; |
273 | | #ifdef NO_AVX2 |
274 | | #define gotoblas_HASWELL gotoblas_SANDYBRIDGE |
275 | | #define gotoblas_SKYLAKEX gotoblas_SANDYBRIDGE |
276 | | #define gotoblas_COOPERLAKE gotoblas_SANDYBRIDGE |
277 | | #define gotoblas_ZEN gotoblas_SANDYBRIDGE |
278 | | #define gotoblas_SAPPHIRERAPIDS gotoblas_SANDYBRIDGE |
279 | | #else |
280 | | extern gotoblas_t gotoblas_HASWELL; |
281 | | extern gotoblas_t gotoblas_ZEN; |
282 | | #ifndef NO_AVX512 |
283 | | extern gotoblas_t gotoblas_SKYLAKEX; |
284 | | extern gotoblas_t gotoblas_COOPERLAKE; |
285 | | extern gotoblas_t gotoblas_SAPPHIRERAPIDS; |
286 | | #else |
287 | 0 | #define gotoblas_SKYLAKEX gotoblas_HASWELL |
288 | 0 | #define gotoblas_COOPERLAKE gotoblas_HASWELL |
289 | 0 | #define gotoblas_SAPPHIRERAPIDS gotoblas_HASWELL |
290 | | #endif |
291 | | #endif |
292 | | #else |
293 | | //Use NEHALEM kernels for sandy bridge |
294 | | #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM |
295 | | #define gotoblas_HASWELL gotoblas_NEHALEM |
296 | | #define gotoblas_SKYLAKEX gotoblas_NEHALEM |
297 | | #define gotoblas_COOPERLAKE gotoblas_NEHALEM |
298 | | #define gotoblas_SAPPHIRERAPIDS gotoblas_NEHALEM |
299 | | #define gotoblas_BULLDOZER gotoblas_BARCELONA |
300 | | #define gotoblas_PILEDRIVER gotoblas_BARCELONA |
301 | | #define gotoblas_STEAMROLLER gotoblas_BARCELONA |
302 | | #define gotoblas_EXCAVATOR gotoblas_BARCELONA |
303 | | #define gotoblas_ZEN gotoblas_BARCELONA |
304 | | #endif |
305 | | |
306 | | #endif // DYNAMIC_LIST |
307 | | |
308 | 2 | #define VENDOR_INTEL 1 |
309 | 0 | #define VENDOR_AMD 2 |
310 | 0 | #define VENDOR_CENTAUR 3 |
311 | 0 | #define VENDOR_HYGON 4 |
312 | 0 | #define VENDOR_ZHAOXIN 5 |
313 | 0 | #define VENDOR_UNKNOWN 99 |
314 | | |
315 | 5 | #define BITMASK(a, b, c) ((((a) >> (b)) & (c))) |
316 | | |
317 | | #ifndef NO_AVX |
318 | 1 | static inline void xgetbv(int op, int * eax, int * edx){ |
319 | | //Use binary code for xgetbv |
320 | 1 | __asm__ __volatile__ |
321 | 1 | (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc"); |
322 | 1 | } |
323 | | #endif |
324 | | |
325 | 1 | int support_avx(){ |
326 | 1 | #ifndef NO_AVX |
327 | 1 | int eax, ebx, ecx, edx; |
328 | 1 | int ret=0; |
329 | | |
330 | 1 | cpuid(1, &eax, &ebx, &ecx, &edx); |
331 | 1 | if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){ |
332 | 1 | xgetbv(0, &eax, &edx); |
333 | 1 | if((eax & 6) == 6){ |
334 | 1 | ret=1; //OS support AVX |
335 | 1 | } |
336 | 1 | } |
337 | 1 | return ret; |
338 | | #else |
339 | | return 0; |
340 | | #endif |
341 | 1 | } |
342 | | |
343 | 1 | int support_avx2(){ |
344 | 1 | #ifndef NO_AVX2 |
345 | 1 | int eax, ebx, ecx=0, edx; |
346 | 1 | int ret=0; |
347 | | |
348 | 1 | if (!support_avx()) |
349 | 0 | return 0; |
350 | 1 | cpuid(7, &eax, &ebx, &ecx, &edx); |
351 | 1 | if((ebx & (1<<5)) != 0) |
352 | 1 | ret=1; //AVX2 flag is set |
353 | 1 | return ret; |
354 | | #else |
355 | | return 0; |
356 | | #endif |
357 | 1 | } |
358 | | |
359 | 1 | int support_avx512(){ |
360 | | #if !defined(NO_AVX) && !defined(NO_AVX512) |
361 | | int eax, ebx, ecx, edx; |
362 | | int ret=0; |
363 | | |
364 | | if (!support_avx()) |
365 | | return 0; |
366 | | cpuid(7, &eax, &ebx, &ecx, &edx); |
367 | | if((ebx & (1<<5)) == 0){ |
368 | | ret=0; //cpu does not have avx2 flag |
369 | | } |
370 | | if((ebx & (1<<31)) != 0){ //AVX512VL flag is set |
371 | | xgetbv(0, &eax, &edx); |
372 | | if((eax & 0xe0) == 0xe0) |
373 | | ret=1; //OS supports saving zmm register |
374 | | } |
375 | | return ret; |
376 | | #else |
377 | 1 | return 0; |
378 | 1 | #endif |
379 | 1 | } |
380 | | |
381 | 1 | int support_avx512_bf16(){ |
382 | | #if !defined(NO_AVX) && !defined(NO_AVX512) |
383 | | int eax, ebx, ecx, edx; |
384 | | int ret=0; |
385 | | |
386 | | if (!support_avx512()) |
387 | | return 0; |
388 | | cpuid_count(7, 1, &eax, &ebx, &ecx, &edx); |
389 | | if((eax & 32) == 32){ |
390 | | ret=1; // CPUID.7.1:EAX[bit 5] indicates whether avx512_bf16 supported or not |
391 | | } |
392 | | return ret; |
393 | | #else |
394 | 1 | return 0; |
395 | 1 | #endif |
396 | 1 | } |
397 | | |
398 | | #define BIT_AMX_TILE 0x01000000 |
399 | | #define BIT_AMX_BF16 0x00400000 |
400 | | #define BIT_AMX_ENBD 0x00060000 |
401 | | |
402 | 0 | int support_amx_bf16() { |
403 | | #if !defined(NO_AVX) && !defined(NO_AVX512) |
404 | | int eax, ebx, ecx, edx; |
405 | | int ret=0; |
406 | | |
407 | | if (!support_avx512()) |
408 | | return 0; |
409 | | // CPUID.7.0:EDX indicates AMX support |
410 | | cpuid_count(7, 0, &eax, &ebx, &ecx, &edx); |
411 | | if ((edx & BIT_AMX_TILE) && (edx & BIT_AMX_BF16)) { |
412 | | // CPUID.D.0:EAX[17:18] indicates AMX enabled |
413 | | cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); |
414 | | if ((eax & BIT_AMX_ENBD) == BIT_AMX_ENBD) |
415 | | ret = 1; |
416 | | } |
417 | | return ret; |
418 | | #else |
419 | 0 | return 0; |
420 | 0 | #endif |
421 | 0 | } |
422 | | |
423 | | extern void openblas_warning(int verbose, const char * msg); |
424 | 1 | #define FALLBACK_VERBOSE 1 |
425 | 0 | #define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n" |
426 | 0 | #define SANDYBRIDGE_FALLBACK "OpenBLAS : Your OS does not support AVX2 instructions. OpenBLAS is using Sandybridge kernels as a fallback, which may give poorer performance.\n" |
427 | 1 | #define HASWELL_FALLBACK "OpenBLAS : Your OS does not support AVX512VL instructions. OpenBLAS is using Haswell kernels as a fallback, which may give poorer performance.\n" |
428 | 0 | #define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n" |
429 | | |
430 | 1 | static int get_vendor(void){ |
431 | 1 | int eax, ebx, ecx, edx; |
432 | | |
433 | 1 | union |
434 | 1 | { |
435 | 1 | char vchar[16]; |
436 | 1 | int vint[4]; |
437 | 1 | } vendor; |
438 | | |
439 | 1 | cpuid(0, &eax, &ebx, &ecx, &edx); |
440 | | |
441 | 1 | *(&vendor.vint[0]) = ebx; |
442 | 1 | *(&vendor.vint[1]) = edx; |
443 | 1 | *(&vendor.vint[2]) = ecx; |
444 | | |
445 | 1 | vendor.vchar[12] = '\0'; |
446 | | |
447 | 1 | if (!strcmp(vendor.vchar, "GenuineIntel")) return VENDOR_INTEL; |
448 | 0 | if (!strcmp(vendor.vchar, "AuthenticAMD")) return VENDOR_AMD; |
449 | 0 | if (!strcmp(vendor.vchar, "CentaurHauls")) return VENDOR_CENTAUR; |
450 | 0 | if (!strcmp(vendor.vchar, " Shanghai ")) return VENDOR_ZHAOXIN; |
451 | 0 | if (!strcmp(vendor.vchar, "HygonGenuine")) return VENDOR_HYGON; |
452 | | |
453 | 0 | if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL; |
454 | | |
455 | 0 | return VENDOR_UNKNOWN; |
456 | 0 | } |
457 | | |
458 | 1 | static gotoblas_t *get_coretype(void){ |
459 | | |
460 | 1 | int eax, ebx, ecx, edx; |
461 | 1 | int family, exfamily, model, vendor, exmodel, stepping; |
462 | | |
463 | 1 | cpuid(1, &eax, &ebx, &ecx, &edx); |
464 | | |
465 | 1 | family = BITMASK(eax, 8, 0x0f); |
466 | 1 | exfamily = BITMASK(eax, 20, 0xff); |
467 | 1 | model = BITMASK(eax, 4, 0x0f); |
468 | 1 | exmodel = BITMASK(eax, 16, 0x0f); |
469 | 1 | stepping = BITMASK(eax, 0, 0x0f); |
470 | | |
471 | 1 | vendor = get_vendor(); |
472 | | |
473 | 1 | if (vendor == VENDOR_INTEL){ |
474 | 1 | switch (family) { |
475 | 1 | case 0x6: |
476 | 1 | switch (exmodel) { |
477 | 0 | case 0: |
478 | 0 | if (model <= 0x7) return &gotoblas_KATMAI; |
479 | 0 | if ((model == 0x8) || (model == 0xa) || (model == 0xb)) return &gotoblas_COPPERMINE; |
480 | 0 | if ((model == 0x9) || (model == 0xd)) return &gotoblas_BANIAS; |
481 | 0 | if (model == 14) return &gotoblas_BANIAS; |
482 | 0 | if (model == 15) return &gotoblas_CORE2; |
483 | 0 | return NULL; |
484 | | |
485 | 0 | case 1: |
486 | 0 | if (model == 6) return &gotoblas_CORE2; |
487 | 0 | if (model == 7) return &gotoblas_PENRYN; |
488 | 0 | if (model == 13) return &gotoblas_DUNNINGTON; |
489 | 0 | if ((model == 10) || (model == 11) || (model == 14) || (model == 15)) return &gotoblas_NEHALEM; |
490 | 0 | if (model == 12) return &gotoblas_ATOM; |
491 | 0 | return NULL; |
492 | | |
493 | 0 | case 2: |
494 | | //Intel Core (Clarkdale) / Core (Arrandale) |
495 | | // Pentium (Clarkdale) / Pentium Mobile (Arrandale) |
496 | | // Xeon (Clarkdale), 32nm |
497 | 0 | if (model == 5) return &gotoblas_NEHALEM; |
498 | | |
499 | | //Intel Xeon Processor 5600 (Westmere-EP) |
500 | | //Xeon Processor E7 (Westmere-EX) |
501 | | //Xeon E7540 |
502 | 0 | if (model == 12 || model == 14 || model == 15) return &gotoblas_NEHALEM; |
503 | | |
504 | | //Intel Core i5-2000 /i7-2000 (Sandy Bridge) |
505 | | //Intel Core i7-3000 / Xeon E5 |
506 | 0 | if (model == 10 || model == 13) { |
507 | 0 | if(support_avx()) |
508 | 0 | return &gotoblas_SANDYBRIDGE; |
509 | 0 | else{ |
510 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
511 | 0 | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. |
512 | 0 | } |
513 | 0 | } |
514 | 0 | return NULL; |
515 | 0 | case 3: |
516 | | //Intel Sandy Bridge 22nm (Ivy Bridge?) |
517 | 0 | if (model == 10 || model == 14) { |
518 | 0 | if(support_avx()) |
519 | 0 | return &gotoblas_SANDYBRIDGE; |
520 | 0 | else{ |
521 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
522 | 0 | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. |
523 | 0 | } |
524 | 0 | } |
525 | | //Intel Haswell |
526 | 0 | if (model == 12 || model == 15) { |
527 | 0 | if(support_avx2()) |
528 | 0 | return &gotoblas_HASWELL; |
529 | 0 | if(support_avx()) { |
530 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
531 | 0 | return &gotoblas_SANDYBRIDGE; |
532 | 0 | } else { |
533 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
534 | 0 | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. |
535 | 0 | } |
536 | 0 | } |
537 | | //Intel Broadwell |
538 | 0 | if (model == 13) { |
539 | 0 | if(support_avx2()) |
540 | 0 | return &gotoblas_HASWELL; |
541 | 0 | if(support_avx()) { |
542 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
543 | 0 | return &gotoblas_SANDYBRIDGE; |
544 | 0 | } else { |
545 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
546 | 0 | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. |
547 | 0 | } |
548 | 0 | } |
549 | 0 | if (model == 7) return &gotoblas_ATOM; //Bay Trail |
550 | 0 | return NULL; |
551 | 0 | case 4: |
552 | | //Intel Haswell |
553 | 0 | if (model == 5 || model == 6) { |
554 | 0 | if(support_avx2()) |
555 | 0 | return &gotoblas_HASWELL; |
556 | 0 | if(support_avx()) { |
557 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
558 | 0 | return &gotoblas_SANDYBRIDGE; |
559 | 0 | } else { |
560 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
561 | 0 | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. |
562 | 0 | } |
563 | 0 | } |
564 | | //Intel Broadwell |
565 | 0 | if (model == 7 || model == 15) { |
566 | 0 | if(support_avx2()) |
567 | 0 | return &gotoblas_HASWELL; |
568 | 0 | if(support_avx()) { |
569 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
570 | 0 | return &gotoblas_SANDYBRIDGE; |
571 | 0 | } else { |
572 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
573 | 0 | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. |
574 | 0 | } |
575 | 0 | } |
576 | | //Intel Skylake |
577 | 0 | if (model == 14) { |
578 | 0 | if(support_avx2()) |
579 | 0 | return &gotoblas_HASWELL; |
580 | 0 | if(support_avx()) { |
581 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
582 | 0 | return &gotoblas_SANDYBRIDGE; |
583 | 0 | } else { |
584 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
585 | 0 | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. |
586 | 0 | } |
587 | 0 | } |
588 | | //Intel Braswell / Avoton |
589 | 0 | if (model == 12 || model == 13) { |
590 | 0 | return &gotoblas_NEHALEM; |
591 | 0 | } |
592 | 0 | return NULL; |
593 | 1 | case 5: |
594 | | //Intel Broadwell |
595 | 1 | if (model == 6) { |
596 | 0 | if(support_avx2()) |
597 | 0 | return &gotoblas_HASWELL; |
598 | 0 | if(support_avx()) { |
599 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
600 | 0 | return &gotoblas_SANDYBRIDGE; |
601 | 0 | } else { |
602 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
603 | 0 | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. |
604 | 0 | } |
605 | 0 | } |
606 | 1 | if (model == 5) { |
607 | | // Intel Cooperlake |
608 | 1 | if(support_avx512_bf16()) |
609 | 0 | return &gotoblas_COOPERLAKE; |
610 | | // Intel Skylake X |
611 | 1 | if (support_avx512()) |
612 | 0 | return &gotoblas_SKYLAKEX; |
613 | 1 | if(support_avx2()){ |
614 | 1 | openblas_warning(FALLBACK_VERBOSE, HASWELL_FALLBACK); |
615 | 1 | return &gotoblas_HASWELL; |
616 | 1 | } |
617 | 0 | if(support_avx()) { |
618 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
619 | 0 | return &gotoblas_SANDYBRIDGE; |
620 | 0 | } else { |
621 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
622 | 0 | return &gotoblas_NEHALEM; |
623 | 0 | } |
624 | 0 | } |
625 | | //Intel Skylake |
626 | 0 | if (model == 14) { |
627 | 0 | if(support_avx2()) |
628 | 0 | return &gotoblas_HASWELL; |
629 | 0 | if(support_avx()) { |
630 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
631 | 0 | return &gotoblas_SANDYBRIDGE; |
632 | 0 | } else { |
633 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
634 | 0 | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. |
635 | 0 | } |
636 | 0 | } |
637 | | //Intel Phi Knights Landing |
638 | 0 | if (model == 7) { |
639 | 0 | if(support_avx2()){ |
640 | 0 | openblas_warning(FALLBACK_VERBOSE, HASWELL_FALLBACK); |
641 | 0 | return &gotoblas_HASWELL; |
642 | 0 | } |
643 | 0 | if(support_avx()) { |
644 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
645 | 0 | return &gotoblas_SANDYBRIDGE; |
646 | 0 | } else { |
647 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
648 | 0 | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. |
649 | 0 | } |
650 | 0 | } |
651 | | //Apollo Lake or Denverton |
652 | 0 | if (model == 12 || model == 15) { |
653 | 0 | return &gotoblas_NEHALEM; |
654 | 0 | } |
655 | 0 | return NULL; |
656 | 0 | case 6: |
657 | 0 | if (model == 6) { |
658 | | // Cannon Lake |
659 | 0 | if(support_avx2()) |
660 | 0 | return &gotoblas_HASWELL; |
661 | 0 | if(support_avx()) { |
662 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
663 | 0 | return &gotoblas_SANDYBRIDGE; |
664 | 0 | } else { |
665 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
666 | 0 | return &gotoblas_NEHALEM; |
667 | 0 | } |
668 | 0 | } |
669 | 0 | if (model == 10 || model == 12){ |
670 | | // Ice Lake SP |
671 | 0 | if(support_avx512_bf16()) |
672 | 0 | return &gotoblas_COOPERLAKE; |
673 | 0 | if (support_avx512()) |
674 | 0 | return &gotoblas_SKYLAKEX; |
675 | 0 | if(support_avx2()) |
676 | 0 | return &gotoblas_HASWELL; |
677 | 0 | if(support_avx()) { |
678 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
679 | 0 | return &gotoblas_SANDYBRIDGE; |
680 | 0 | } else { |
681 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
682 | 0 | return &gotoblas_NEHALEM; |
683 | 0 | } |
684 | 0 | } |
685 | 0 | return NULL; |
686 | 0 | case 7: |
687 | 0 | if (model == 10) // Goldmont Plus |
688 | 0 | return &gotoblas_NEHALEM; |
689 | 0 | if (model == 13 || model == 14) { |
690 | | // Ice Lake |
691 | 0 | if (support_avx512()) |
692 | 0 | return &gotoblas_SKYLAKEX; |
693 | 0 | if(support_avx2()){ |
694 | 0 | openblas_warning(FALLBACK_VERBOSE, HASWELL_FALLBACK); |
695 | 0 | return &gotoblas_HASWELL; |
696 | 0 | } |
697 | 0 | if(support_avx()) { |
698 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
699 | 0 | return &gotoblas_SANDYBRIDGE; |
700 | 0 | } else { |
701 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
702 | 0 | return &gotoblas_NEHALEM; |
703 | 0 | } |
704 | 0 | } |
705 | 0 | return NULL; |
706 | 0 | case 8: |
707 | 0 | if (model == 12 || model == 13) { // Tiger Lake |
708 | 0 | if (support_avx512()) |
709 | 0 | return &gotoblas_SKYLAKEX; |
710 | 0 | if(support_avx2()){ |
711 | 0 | openblas_warning(FALLBACK_VERBOSE, HASWELL_FALLBACK); |
712 | 0 | return &gotoblas_HASWELL; |
713 | 0 | } |
714 | 0 | if(support_avx()) { |
715 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
716 | 0 | return &gotoblas_SANDYBRIDGE; |
717 | 0 | } else { |
718 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
719 | 0 | return &gotoblas_NEHALEM; |
720 | 0 | } |
721 | 0 | } |
722 | 0 | if (model == 14 ) { // Kaby Lake, Coffee Lake |
723 | 0 | if(support_avx2()) |
724 | 0 | return &gotoblas_HASWELL; |
725 | 0 | if(support_avx()) { |
726 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
727 | 0 | return &gotoblas_SANDYBRIDGE; |
728 | 0 | } else { |
729 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
730 | 0 | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. |
731 | 0 | } |
732 | 0 | } |
733 | 0 | if (model == 15){ // Sapphire Rapids |
734 | 0 | if(support_amx_bf16()) |
735 | 0 | return &gotoblas_SAPPHIRERAPIDS; |
736 | 0 | if(support_avx512_bf16()) |
737 | 0 | return &gotoblas_COOPERLAKE; |
738 | 0 | if (support_avx512()) |
739 | 0 | return &gotoblas_SKYLAKEX; |
740 | 0 | if(support_avx2()) |
741 | 0 | return &gotoblas_HASWELL; |
742 | 0 | if(support_avx()) { |
743 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
744 | 0 | return &gotoblas_SANDYBRIDGE; |
745 | 0 | } else { |
746 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
747 | 0 | return &gotoblas_NEHALEM; |
748 | 0 | } |
749 | 0 | } |
750 | 0 | return NULL; |
751 | | |
752 | | |
753 | 0 | case 9: |
754 | 0 | if (model == 7 || model == 10) { // Alder Lake |
755 | 0 | if(support_avx512_bf16()) |
756 | 0 | return &gotoblas_COOPERLAKE; |
757 | 0 | if (support_avx512()) |
758 | 0 | return &gotoblas_SKYLAKEX; |
759 | 0 | if(support_avx2()){ |
760 | 0 | return &gotoblas_HASWELL; |
761 | 0 | } |
762 | 0 | if(support_avx()) { |
763 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
764 | 0 | return &gotoblas_SANDYBRIDGE; |
765 | 0 | } else { |
766 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
767 | 0 | return &gotoblas_NEHALEM; |
768 | 0 | } |
769 | 0 | } |
770 | 0 | if (model == 14 ) { // Kaby Lake, Coffee Lake |
771 | 0 | if(support_avx2()) |
772 | 0 | return &gotoblas_HASWELL; |
773 | 0 | if(support_avx()) { |
774 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
775 | 0 | return &gotoblas_SANDYBRIDGE; |
776 | 0 | } else { |
777 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
778 | 0 | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. |
779 | 0 | } |
780 | 0 | } |
781 | 0 | return NULL; |
782 | 0 | case 10: |
783 | 0 | if (model == 5 || model == 6) { |
784 | 0 | if(support_avx2()) |
785 | 0 | return &gotoblas_HASWELL; |
786 | 0 | if(support_avx()) { |
787 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
788 | 0 | return &gotoblas_SANDYBRIDGE; |
789 | 0 | } else { |
790 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
791 | 0 | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. |
792 | 0 | } |
793 | 0 | } |
794 | 0 | if (model == 7) { |
795 | 0 | if (support_avx512()) |
796 | 0 | return &gotoblas_SKYLAKEX; |
797 | 0 | if(support_avx2()) |
798 | 0 | return &gotoblas_HASWELL; |
799 | 0 | if(support_avx()) { |
800 | 0 | openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); |
801 | 0 | return &gotoblas_SANDYBRIDGE; |
802 | 0 | } else { |
803 | 0 | openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); |
804 | 0 | return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. |
805 | 0 | } |
806 | 0 | } |
807 | 0 | return NULL; |
808 | 1 | } |
809 | 0 | break; |
810 | 0 | case 0xf: |
811 | 0 | if (model <= 0x2) return &gotoblas_NORTHWOOD; |
812 | 0 | return &gotoblas_PRESCOTT; |
813 | 1 | } |
814 | 1 | } |
815 | | |
816 | 0 | if (vendor == VENDOR_AMD || vendor == VENDOR_HYGON){ |
817 | 0 | if (family <= 0xe) { |
818 | | // Verify that CPU has 3dnow and 3dnowext before claiming it is Athlon |
819 | 0 | cpuid(0x80000000, &eax, &ebx, &ecx, &edx); |
820 | 0 | if ( (eax & 0xffff) >= 0x01) { |
821 | 0 | cpuid(0x80000001, &eax, &ebx, &ecx, &edx); |
822 | 0 | if ((edx & (1 << 30)) == 0 || (edx & (1u << 31)) == 0) |
823 | 0 | return NULL; |
824 | 0 | } |
825 | 0 | else |
826 | 0 | return NULL; |
827 | | |
828 | 0 | return &gotoblas_ATHLON; |
829 | 0 | } |
830 | 0 | if (family == 0xf){ |
831 | 0 | if ((exfamily == 0) || (exfamily == 2)) { |
832 | 0 | if (ecx & (1 << 0)) return &gotoblas_OPTERON_SSE3; |
833 | 0 | else return &gotoblas_OPTERON; |
834 | 0 | } else if (exfamily == 5 || exfamily == 7) { |
835 | 0 | return &gotoblas_BOBCAT; |
836 | 0 | } else if (exfamily == 6) { |
837 | 0 | if(model == 1){ |
838 | | //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series |
839 | 0 | if(support_avx()) |
840 | 0 | return &gotoblas_BULLDOZER; |
841 | 0 | else{ |
842 | 0 | openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); |
843 | 0 | return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. |
844 | 0 | } |
845 | 0 | }else if(model == 2 || model == 3){ |
846 | | //AMD Bulldozer Opteron 6300 / Opteron 4300 / Opteron 3300 |
847 | 0 | if(support_avx()) |
848 | 0 | return &gotoblas_PILEDRIVER; |
849 | 0 | else{ |
850 | 0 | openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); |
851 | 0 | return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. |
852 | 0 | } |
853 | 0 | }else if(model == 5){ |
854 | 0 | if(support_avx()) |
855 | 0 | return &gotoblas_EXCAVATOR; |
856 | 0 | else{ |
857 | 0 | openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); |
858 | 0 | return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. |
859 | 0 | } |
860 | 0 | }else if(model == 0 || model == 8){ |
861 | 0 | if (exmodel == 1) { |
862 | | //AMD Trinity |
863 | 0 | if(support_avx()) |
864 | 0 | return &gotoblas_PILEDRIVER; |
865 | 0 | else{ |
866 | 0 | openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); |
867 | 0 | return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. |
868 | 0 | } |
869 | 0 | }else if (exmodel == 3) { |
870 | | //AMD STEAMROLLER |
871 | 0 | if(support_avx()) |
872 | 0 | return &gotoblas_STEAMROLLER; |
873 | 0 | else{ |
874 | 0 | openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); |
875 | 0 | return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. |
876 | 0 | } |
877 | 0 | }else if (exmodel == 6) { |
878 | 0 | if(support_avx()) |
879 | 0 | return &gotoblas_EXCAVATOR; |
880 | 0 | else{ |
881 | 0 | openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); |
882 | 0 | return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. |
883 | 0 | } |
884 | |
|
885 | 0 | } |
886 | 0 | } |
887 | 0 | } else if (exfamily == 8) { |
888 | 0 | /* if (model == 1 || model == 8) */ { |
889 | 0 | if(support_avx()) |
890 | 0 | return &gotoblas_ZEN; |
891 | 0 | else{ |
892 | 0 | openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); |
893 | 0 | return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. |
894 | 0 | } |
895 | 0 | } |
896 | 0 | } else if (exfamily == 9) { |
897 | 0 | if(support_avx()) |
898 | 0 | return &gotoblas_ZEN; |
899 | 0 | else{ |
900 | 0 | openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); |
901 | 0 | return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. |
902 | 0 | } |
903 | 0 | } else if (exfamily == 10) { |
904 | 0 | if(support_avx512_bf16()) |
905 | 0 | return &gotoblas_COOPERLAKE; |
906 | 0 | if(support_avx512()) |
907 | 0 | return &gotoblas_SKYLAKEX; |
908 | 0 | if(support_avx()) |
909 | 0 | return &gotoblas_ZEN; |
910 | 0 | else{ |
911 | 0 | openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK); |
912 | 0 | return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels. |
913 | 0 | } |
914 | 0 | }else { |
915 | 0 | return NULL; |
916 | 0 | } |
917 | | |
918 | 0 | } |
919 | 0 | } |
920 | | |
921 | 0 | if (vendor == VENDOR_CENTAUR) { |
922 | 0 | switch (family) { |
923 | 0 | case 0x6: |
924 | 0 | if (model == 0xf && stepping < 0xe) |
925 | 0 | return &gotoblas_NANO; |
926 | 0 | return &gotoblas_NEHALEM; |
927 | 0 | case 0x7: |
928 | 0 | switch (exmodel) { |
929 | 0 | case 5: |
930 | 0 | case 6: |
931 | 0 | if (support_avx2()) |
932 | 0 | return &gotoblas_ZEN; |
933 | 0 | else |
934 | 0 | return &gotoblas_DUNNINGTON; |
935 | 0 | default: |
936 | 0 | return &gotoblas_NEHALEM; |
937 | 0 | } |
938 | 0 | default: |
939 | 0 | if (family >= 0x8) |
940 | 0 | return &gotoblas_NEHALEM; |
941 | 0 | } |
942 | 0 | } |
943 | | |
944 | 0 | if (vendor == VENDOR_ZHAOXIN) { |
945 | 0 | switch (family) { |
946 | 0 | case 0x7: |
947 | 0 | switch (exmodel) { |
948 | 0 | case 5: |
949 | 0 | if (support_avx2()) |
950 | 0 | return &gotoblas_ZEN; |
951 | 0 | else |
952 | 0 | return &gotoblas_DUNNINGTON; |
953 | 0 | default: |
954 | 0 | return &gotoblas_NEHALEM; |
955 | 0 | } |
956 | 0 | default: |
957 | 0 | return &gotoblas_NEHALEM; |
958 | 0 | } |
959 | 0 | } |
960 | | |
961 | 0 | return NULL; |
962 | 0 | } |
963 | | |
964 | | static char *corename[] = { |
965 | | "Unknown", |
966 | | "Katmai", |
967 | | "Coppermine", |
968 | | "Northwood", |
969 | | "Prescott", |
970 | | "Banias", |
971 | | "Atom", |
972 | | "Core2", |
973 | | "Penryn", |
974 | | "Dunnington", |
975 | | "Nehalem", |
976 | | "Athlon", |
977 | | "Opteron", |
978 | | "Opteron_SSE3", |
979 | | "Barcelona", |
980 | | "Nano", |
981 | | "Sandybridge", |
982 | | "Bobcat", |
983 | | "Bulldozer", |
984 | | "Piledriver", |
985 | | "Haswell", |
986 | | "Steamroller", |
987 | | "Excavator", |
988 | | "Zen", |
989 | | "SkylakeX", |
990 | | "Cooperlake", |
991 | | "SapphireRapids" |
992 | | }; |
993 | | |
994 | 521 | char *gotoblas_corename(void) { |
995 | | |
996 | 521 | if (gotoblas == &gotoblas_KATMAI) return corename[ 1]; |
997 | 521 | if (gotoblas == &gotoblas_COPPERMINE) return corename[ 2]; |
998 | 521 | if (gotoblas == &gotoblas_NORTHWOOD) return corename[ 3]; |
999 | 521 | if (gotoblas == &gotoblas_PRESCOTT) return corename[ 4]; |
1000 | 521 | if (gotoblas == &gotoblas_BANIAS) return corename[ 5]; |
1001 | 521 | if (gotoblas == &gotoblas_ATOM) |
1002 | | #ifdef DYNAMIC_OLDER |
1003 | | return corename[ 6]; |
1004 | | #else |
1005 | 0 | return corename[10]; |
1006 | 521 | #endif |
1007 | 521 | if (gotoblas == &gotoblas_CORE2) return corename[ 7]; |
1008 | 521 | if (gotoblas == &gotoblas_PENRYN) |
1009 | | #ifdef DYNAMIC_OLDER |
1010 | | return corename[ 8]; |
1011 | | #else |
1012 | 0 | return corename[7]; |
1013 | 521 | #endif |
1014 | 521 | if (gotoblas == &gotoblas_DUNNINGTON) |
1015 | | #ifdef DYNAMIC_OLDER |
1016 | | return corename[ 9]; |
1017 | | #else |
1018 | 0 | return corename[7]; |
1019 | 521 | #endif |
1020 | 521 | if (gotoblas == &gotoblas_NEHALEM) return corename[10]; |
1021 | 521 | if (gotoblas == &gotoblas_ATHLON) return corename[11]; |
1022 | 521 | if (gotoblas == &gotoblas_OPTERON_SSE3) |
1023 | | #ifdef DYNAMIC_OLDER |
1024 | | return corename[12]; |
1025 | | #else |
1026 | 0 | return corename[7]; |
1027 | 521 | #endif |
1028 | 521 | if (gotoblas == &gotoblas_OPTERON) |
1029 | | #ifdef DYNAMIC_OLDER |
1030 | | return corename[13]; |
1031 | | #else |
1032 | 0 | return corename[7]; |
1033 | 521 | #endif |
1034 | 521 | if (gotoblas == &gotoblas_BARCELONA) return corename[14]; |
1035 | 521 | if (gotoblas == &gotoblas_NANO) |
1036 | | #ifdef DYNAMIC_OLDER |
1037 | | return corename[15]; |
1038 | | #else |
1039 | 0 | return corename[10]; |
1040 | 521 | #endif |
1041 | 521 | if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16]; |
1042 | 521 | if (gotoblas == &gotoblas_BOBCAT) |
1043 | | #ifdef DYNAMIC_OLDER |
1044 | | return corename[17]; |
1045 | | #else |
1046 | 0 | return corename[7]; |
1047 | 521 | #endif |
1048 | 521 | if (gotoblas == &gotoblas_BULLDOZER) return corename[18]; |
1049 | 521 | if (gotoblas == &gotoblas_PILEDRIVER) return corename[19]; |
1050 | 521 | if (gotoblas == &gotoblas_HASWELL) return corename[20]; |
1051 | 0 | if (gotoblas == &gotoblas_STEAMROLLER) return corename[21]; |
1052 | 0 | if (gotoblas == &gotoblas_EXCAVATOR) return corename[22]; |
1053 | 0 | if (gotoblas == &gotoblas_ZEN) return corename[23]; |
1054 | 0 | if (gotoblas == &gotoblas_SKYLAKEX) return corename[24]; |
1055 | 0 | if (gotoblas == &gotoblas_COOPERLAKE) return corename[25]; |
1056 | 0 | if (gotoblas == &gotoblas_SAPPHIRERAPIDS) return corename[26]; |
1057 | 0 | return corename[0]; |
1058 | 0 | } |
1059 | | |
1060 | | |
1061 | | |
1062 | 0 | static gotoblas_t *force_coretype(char *coretype){ |
1063 | |
|
1064 | 0 | int i ; |
1065 | 0 | int found = -1; |
1066 | 0 | char message[128]; |
1067 | | //char mname[20]; |
1068 | |
|
1069 | 0 | for ( i=1 ; i <= 25; i++) |
1070 | 0 | { |
1071 | 0 | if (!strncasecmp(coretype,corename[i],20)) |
1072 | 0 | { |
1073 | 0 | found = i; |
1074 | 0 | break; |
1075 | 0 | } |
1076 | 0 | } |
1077 | 0 | if (found < 0) |
1078 | 0 | { |
1079 | | //strncpy(mname,coretype,20); |
1080 | 0 | snprintf(message, 128, "Core not found: %s\n",coretype); |
1081 | 0 | openblas_warning(1, message); |
1082 | 0 | return(NULL); |
1083 | 0 | } |
1084 | | |
1085 | 0 | switch (found) |
1086 | 0 | { |
1087 | 0 | case 25: return (&gotoblas_COOPERLAKE); |
1088 | 0 | case 24: return (&gotoblas_SKYLAKEX); |
1089 | 0 | case 23: return (&gotoblas_ZEN); |
1090 | 0 | case 22: return (&gotoblas_EXCAVATOR); |
1091 | 0 | case 21: return (&gotoblas_STEAMROLLER); |
1092 | 0 | case 20: return (&gotoblas_HASWELL); |
1093 | 0 | case 19: return (&gotoblas_PILEDRIVER); |
1094 | 0 | case 18: return (&gotoblas_BULLDOZER); |
1095 | 0 | case 17: return (&gotoblas_BOBCAT); |
1096 | 0 | case 16: return (&gotoblas_SANDYBRIDGE); |
1097 | 0 | case 15: return (&gotoblas_NANO); |
1098 | 0 | case 14: return (&gotoblas_BARCELONA); |
1099 | 0 | case 13: return (&gotoblas_OPTERON); |
1100 | 0 | case 12: return (&gotoblas_OPTERON_SSE3); |
1101 | 0 | case 11: return (&gotoblas_ATHLON); |
1102 | 0 | case 10: return (&gotoblas_NEHALEM); |
1103 | 0 | case 9: return (&gotoblas_DUNNINGTON); |
1104 | 0 | case 8: return (&gotoblas_PENRYN); |
1105 | 0 | case 7: return (&gotoblas_CORE2); |
1106 | 0 | case 6: return (&gotoblas_ATOM); |
1107 | 0 | case 5: return (&gotoblas_BANIAS); |
1108 | 0 | case 4: return (&gotoblas_PRESCOTT); |
1109 | 0 | case 3: return (&gotoblas_NORTHWOOD); |
1110 | 0 | case 2: return (&gotoblas_COPPERMINE); |
1111 | 0 | case 1: return (&gotoblas_KATMAI); |
1112 | 0 | } |
1113 | 0 | return(NULL); |
1114 | |
|
1115 | 0 | } |
1116 | | |
1117 | | |
1118 | | |
1119 | | |
1120 | 2 | void gotoblas_dynamic_init(void) { |
1121 | | |
1122 | 2 | char coremsg[128]; |
1123 | 2 | char coren[22]; |
1124 | 2 | char *p; |
1125 | | |
1126 | | |
1127 | 2 | if (gotoblas) return; |
1128 | | |
1129 | 1 | p = getenv("OPENBLAS_CORETYPE"); |
1130 | 1 | if ( p ) |
1131 | 0 | { |
1132 | 0 | gotoblas = force_coretype(p); |
1133 | 0 | } |
1134 | 1 | else |
1135 | 1 | { |
1136 | 1 | gotoblas = get_coretype(); |
1137 | 1 | } |
1138 | | |
1139 | | #ifdef ARCH_X86 |
1140 | | if (gotoblas == NULL) gotoblas = &gotoblas_KATMAI; |
1141 | | #else |
1142 | 1 | if (gotoblas == NULL) { |
1143 | 0 | if (support_avx512_bf16()) gotoblas = &gotoblas_COOPERLAKE; |
1144 | 0 | else if (support_avx512()) gotoblas = &gotoblas_SKYLAKEX; |
1145 | 0 | else if (support_avx2()) gotoblas = &gotoblas_HASWELL; |
1146 | 0 | else if (support_avx()) gotoblas = &gotoblas_SANDYBRIDGE; |
1147 | 0 | else gotoblas = &gotoblas_PRESCOTT; |
1148 | 0 | } |
1149 | | /* sanity check, if 64bit pointer we can't have a 32 bit cpu */ |
1150 | 1 | if (sizeof(void*) == 8) { |
1151 | 1 | if (gotoblas == &gotoblas_KATMAI || |
1152 | 1 | gotoblas == &gotoblas_COPPERMINE || |
1153 | 1 | gotoblas == &gotoblas_NORTHWOOD || |
1154 | 1 | gotoblas == &gotoblas_BANIAS || |
1155 | 1 | gotoblas == &gotoblas_ATHLON) |
1156 | 0 | gotoblas = &gotoblas_PRESCOTT; |
1157 | 1 | } |
1158 | 1 | #endif |
1159 | | |
1160 | 1 | if (gotoblas && gotoblas -> init) { |
1161 | 1 | strncpy(coren,gotoblas_corename(),20); |
1162 | 1 | sprintf(coremsg, "Core: %s\n",coren); |
1163 | 1 | openblas_warning(2, coremsg); |
1164 | 1 | gotoblas -> init(); |
1165 | 1 | } else { |
1166 | 0 | openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n"); |
1167 | 0 | exit(1); |
1168 | 0 | } |
1169 | | |
1170 | 1 | } |
1171 | | |
1172 | 0 | void gotoblas_dynamic_quit(void) { |
1173 | |
|
1174 | 0 | gotoblas = NULL; |
1175 | |
|
1176 | 0 | } |