Coverage Report

Created: 2026-01-19 18:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/openblas/driver/others/dynamic.c
Line
Count
Source
1
/*********************************************************************/
2
/* Copyright 2009, 2010 The University of Texas at Austin.           */
3
/* All rights reserved.                                              */
4
/*                                                                   */
5
/* Redistribution and use in source and binary forms, with or        */
6
/* without modification, are permitted provided that the following   */
7
/* conditions are met:                                               */
8
/*                                                                   */
9
/*   1. Redistributions of source code must retain the above         */
10
/*      copyright notice, this list of conditions and the following  */
11
/*      disclaimer.                                                  */
12
/*                                                                   */
13
/*   2. Redistributions in binary form must reproduce the above      */
14
/*      copyright notice, this list of conditions and the following  */
15
/*      disclaimer in the documentation and/or other materials       */
16
/*      provided with the distribution.                              */
17
/*                                                                   */
18
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
32
/*                                                                   */
33
/* The views and conclusions contained in the software and           */
34
/* documentation are those of the authors and should not be          */
35
/* interpreted as representing official policies, either expressed   */
36
/* or implied, of The University of Texas at Austin.                 */
37
/*********************************************************************/
38
39
#include "common.h"
40
41
#ifdef _MSC_VER
42
#define strncasecmp _strnicmp
43
#define strcasecmp _stricmp
44
#endif
45
46
#ifdef ARCH_X86
47
#define EXTERN extern
48
#else
49
#define EXTERN
50
#endif
51
52
#ifdef DYNAMIC_LIST
53
extern gotoblas_t gotoblas_PRESCOTT;
54
55
#ifdef DYN_ATHLON
56
extern gotoblas_t gotoblas_ATHLON;
57
#else
58
#define gotoblas_ATHLON gotoblas_PRESCOTT
59
#endif
60
#ifdef DYN_KATMAI
61
extern gotoblas_t gotoblas_KATMAI;
62
#else
63
#define gotoblas_KATMAI gotoblas_PRESCOTT
64
#endif
65
#ifdef DYN_BANIAS
66
extern gotoblas_t gotoblas_BANIAS;
67
#else
68
#define gotoblas_BANIAS gotoblas_PRESCOTT
69
#endif
70
#ifdef DYN_COPPERMINE
71
extern gotoblas_t gotoblas_COPPERMINE;
72
#else
73
#define gotoblas_COPPERMINE gotoblas_PRESCOTT
74
#endif
75
#ifdef DYN_NORTHWOOD
76
extern gotoblas_t gotoblas_NORTHWOOD;
77
#else
78
#define gotoblas_NORTHWOOD gotoblas_PRESCOTT
79
#endif
80
#ifdef DYN_CORE2
81
extern gotoblas_t gotoblas_CORE2;
82
#else
83
#define gotoblas_CORE2 gotoblas_PRESCOTT
84
#endif
85
#ifdef DYN_NEHALEM
86
extern gotoblas_t gotoblas_NEHALEM;
87
#else
88
#define gotoblas_NEHALEM gotoblas_PRESCOTT
89
#endif
90
#ifdef DYN_BARCELONA
91
extern gotoblas_t gotoblas_BARCELONA;
92
#elif defined(DYN_NEHALEM)
93
#define gotoblas_BARCELONA gotoblas_NEHALEM
94
#else
95
#define gotoblas_BARCELONA gotoblas_PRESCOTT
96
#endif
97
#ifdef DYN_ATOM
98
extern gotoblas_t gotoblas_ATOM;
99
#elif defined(DYN_NEHALEM)
100
#define gotoblas_ATOM gotoblas_NEHALEM
101
#else
102
#define gotoblas_ATOM gotoblas_PRESCOTT
103
#endif
104
#ifdef DYN_NANO
105
extern gotoblas_t gotoblas_NANO;
106
#else
107
#define gotoblas_NANO gotoblas_PRESCOTT
108
#endif
109
#ifdef DYN_PENRYN
110
extern gotoblas_t gotoblas_PENRYN;
111
#else
112
#define gotoblas_PENRYN gotoblas_PRESCOTT
113
#endif
114
#ifdef DYN_DUNNINGTON
115
extern gotoblas_t gotoblas_DUNNINGTON;
116
#else
117
#define gotoblas_DUNNINGTON gotoblas_PRESCOTT
118
#endif
119
#ifdef DYN_OPTERON
120
extern gotoblas_t gotoblas_OPTERON;
121
#else
122
#define gotoblas_OPTERON gotoblas_PRESCOTT
123
#endif
124
#ifdef DYN_OPTERON_SSE3
125
extern gotoblas_t gotoblas_OPTERON_SSE3;
126
#else
127
#define gotoblas_OPTERON_SSE3 gotoblas_PRESCOTT
128
#endif
129
#ifdef DYN_BOBCAT
130
extern gotoblas_t gotoblas_BOBCAT;
131
#elif defined(DYN_NEHALEM)
132
#define gotoblas_BOBCAT gotoblas_NEHALEM
133
#else
134
#define gotoblas_BOBCAT gotoblas_PRESCOTT
135
#endif
136
#ifdef DYN_SANDYBRIDGE
137
extern gotoblas_t gotoblas_SANDYBRIDGE;
138
#elif defined(DYN_NEHALEM)
139
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
140
#else
141
#define gotoblas_SANDYBRIDGE gotoblas_PRESCOTT
142
#endif
143
#ifdef DYN_BULLDOZER
144
extern gotoblas_t gotoblas_BULLDOZER;
145
#elif defined(DYN_SANDYBRIDGE)
146
#define gotoblas_BULLDOZER gotoblas_SANDYBRIDGE
147
#elif defined(DYN_NEHALEM)
148
#define gotoblas_BULLDOZER gotoblas_NEHALEM
149
#else
150
#define gotoblas_BULLDOZER gotoblas_PRESCOTT
151
#endif
152
#ifdef DYN_PILEDRIVER
153
extern gotoblas_t gotoblas_PILEDRIVER;
154
#elif defined(DYN_SANDYBRIDGE)
155
#define gotoblas_PILEDRIVER gotoblas_SANDYBRIDGE
156
#elif defined(DYN_NEHALEM)
157
#define gotoblas_PILEDRIVER gotoblas_NEHALEM
158
#else
159
#define gotoblas_PILEDRIVER gotoblas_PRESCOTT
160
#endif
161
#ifdef DYN_STEAMROLLER
162
extern gotoblas_t gotoblas_STEAMROLLER;
163
#elif defined(DYN_SANDYBRIDGE)
164
#define gotoblas_STEAMROLLER gotoblas_SANDYBRIDGE
165
#elif defined(DYN_NEHALEM)
166
#define gotoblas_STEAMROLLER gotoblas_NEHALEM
167
#else
168
#define gotoblas_STEAMROLLER gotoblas_PRESCOTT
169
#endif
170
#ifdef DYN_EXCAVATOR
171
extern gotoblas_t gotoblas_EXCAVATOR;
172
#elif defined(DYN_SANDYBRIDGE)
173
#define gotoblas_EXCAVATOR gotoblas_SANDYBRIDGE
174
#elif defined(DYN_NEHALEM)
175
#define gotoblas_EXCAVATOR gotoblas_NEHALEM
176
#else
177
#define gotoblas_EXCAVATOR gotoblas_PRESCOTT
178
#endif
179
#ifdef DYN_HASWELL
180
extern gotoblas_t gotoblas_HASWELL;
181
#elif defined(DYN_SANDYBRIDGE)
182
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
183
#elif defined(DYN_NEHALEM)
184
#define gotoblas_HASWELL gotoblas_NEHALEM
185
#else
186
#define gotoblas_HASWELL gotoblas_PRESCOTT
187
#endif
188
#ifdef DYN_ZEN
189
extern gotoblas_t gotoblas_ZEN;
190
#elif defined(DYN_HASWELL)
191
#define gotoblas_ZEN gotoblas_HASWELL
192
#elif defined(DYN_SANDYBRIDGE)
193
#define gotoblas_ZEN gotoblas_SANDYBRIDGE
194
#elif defined(DYN_NEHALEM)
195
#define gotoblas_ZEN gotoblas_NEHALEM
196
#else
197
#define gotoblas_ZEN gotoblas_PRESCOTT
198
#endif
199
#ifdef DYN_SKYLAKEX
200
extern gotoblas_t gotoblas_SKYLAKEX;
201
#elif defined(DYN_HASWELL)
202
#define gotoblas_SKYLAKEX gotoblas_HASWELL
203
#elif defined(DYN_SANDYBRIDGE)
204
#define gotoblas_SKYLAKEX gotoblas_SANDYBRIDGE
205
#elif defined(DYN_NEHALEM)
206
#define gotoblas_SKYLAKEX gotoblas_NEHALEM
207
#else
208
#define gotoblas_SKYLAKEX gotoblas_PRESCOTT
209
#endif
210
#ifdef DYN_COOPERLAKE
211
extern gotoblas_t gotoblas_COOPERLAKE;
212
#elif defined(DYN_SKYLAKEX)
213
#define gotoblas_COOPERLAKE gotoblas_SKYLAKEX
214
#elif defined(DYN_HASWELL)
215
#define gotoblas_COOPERLAKE gotoblas_HASWELL
216
#elif defined(DYN_SANDYBRIDGE)
217
#define gotoblas_COOPERLAKE gotoblas_SANDYBRIDGE
218
#elif defined(DYN_NEHALEM)
219
#define gotoblas_COOPERLAKE gotoblas_NEHALEM
220
#else
221
#define gotoblas_COOPERLAKE gotoblas_PRESCOTT
222
#endif
223
#ifdef DYN_SAPPHIRERAPIDS
224
extern gotoblas_t gotoblas_SAPPHIRERAPIDS;
225
#elif defined(DYN_SKYLAKEX)
226
#define gotoblas_SAPPHIRERAPIDS gotoblas_SKYLAKEX
227
#elif defined(DYN_HASWELL)
228
#define gotoblas_SAPPHIRERAPIDS gotoblas_HASWELL
229
#elif defined(DYN_SANDYBRIDGE)
230
#define gotoblas_SAPPHIRERAPIDS gotoblas_SANDYBRIDGE
231
#elif defined(DYN_NEHALEM)
232
#define gotoblas_SAPPHIRERAPIDS gotoblas_NEHALEM
233
#else
234
#define gotoblas_SAPPHIRERAPIDS gotoblas_PRESCOTT
235
#endif
236
237
238
#else // not DYNAMIC_LIST
239
EXTERN gotoblas_t  gotoblas_KATMAI;
240
EXTERN gotoblas_t  gotoblas_COPPERMINE;
241
EXTERN gotoblas_t  gotoblas_NORTHWOOD;
242
EXTERN gotoblas_t  gotoblas_BANIAS;
243
EXTERN gotoblas_t  gotoblas_ATHLON;
244
245
extern gotoblas_t  gotoblas_PRESCOTT;
246
extern gotoblas_t  gotoblas_CORE2;
247
extern gotoblas_t  gotoblas_NEHALEM;
248
extern gotoblas_t  gotoblas_BARCELONA;
249
#ifdef DYNAMIC_OLDER
250
extern gotoblas_t  gotoblas_ATOM;
251
extern gotoblas_t  gotoblas_NANO;
252
extern gotoblas_t  gotoblas_PENRYN;
253
extern gotoblas_t  gotoblas_DUNNINGTON;
254
extern gotoblas_t  gotoblas_OPTERON;
255
extern gotoblas_t  gotoblas_OPTERON_SSE3;
256
extern gotoblas_t  gotoblas_BOBCAT;
257
#else
258
521
#define gotoblas_ATOM gotoblas_NEHALEM
259
521
#define gotoblas_NANO gotoblas_NEHALEM
260
521
#define gotoblas_PENRYN gotoblas_CORE2
261
521
#define gotoblas_DUNNINGTON gotoblas_CORE2
262
521
#define gotoblas_OPTERON gotoblas_CORE2
263
521
#define gotoblas_OPTERON_SSE3 gotoblas_CORE2
264
521
#define gotoblas_BOBCAT gotoblas_CORE2
265
#endif
266
267
#ifndef NO_AVX
268
extern gotoblas_t  gotoblas_SANDYBRIDGE;
269
extern gotoblas_t  gotoblas_BULLDOZER;
270
extern gotoblas_t  gotoblas_PILEDRIVER;
271
extern gotoblas_t  gotoblas_STEAMROLLER;
272
extern gotoblas_t  gotoblas_EXCAVATOR;
273
#ifdef NO_AVX2
274
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
275
#define gotoblas_SKYLAKEX gotoblas_SANDYBRIDGE
276
#define gotoblas_COOPERLAKE gotoblas_SANDYBRIDGE
277
#define gotoblas_ZEN gotoblas_SANDYBRIDGE
278
#define gotoblas_SAPPHIRERAPIDS gotoblas_SANDYBRIDGE
279
#else
280
extern gotoblas_t  gotoblas_HASWELL;
281
extern gotoblas_t  gotoblas_ZEN;
282
#ifndef NO_AVX512
283
extern gotoblas_t  gotoblas_SKYLAKEX;
284
extern gotoblas_t  gotoblas_COOPERLAKE;
285
extern gotoblas_t  gotoblas_SAPPHIRERAPIDS;
286
#else
287
0
#define gotoblas_SKYLAKEX gotoblas_HASWELL
288
0
#define gotoblas_COOPERLAKE gotoblas_HASWELL
289
0
#define gotoblas_SAPPHIRERAPIDS gotoblas_HASWELL
290
#endif
291
#endif
292
#else
293
//Use NEHALEM kernels for sandy bridge
294
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
295
#define gotoblas_HASWELL gotoblas_NEHALEM
296
#define gotoblas_SKYLAKEX gotoblas_NEHALEM
297
#define gotoblas_COOPERLAKE gotoblas_NEHALEM
298
#define gotoblas_SAPPHIRERAPIDS gotoblas_NEHALEM
299
#define gotoblas_BULLDOZER gotoblas_BARCELONA
300
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
301
#define gotoblas_STEAMROLLER gotoblas_BARCELONA
302
#define gotoblas_EXCAVATOR gotoblas_BARCELONA
303
#define gotoblas_ZEN gotoblas_BARCELONA
304
#endif
305
306
#endif // DYNAMIC_LIST
307
308
2
#define VENDOR_INTEL      1
309
0
#define VENDOR_AMD        2
310
0
#define VENDOR_CENTAUR    3
311
0
#define VENDOR_HYGON    4
312
0
#define VENDOR_ZHAOXIN    5
313
0
#define VENDOR_UNKNOWN   99
314
315
5
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
316
317
#ifndef NO_AVX
318
1
static inline void xgetbv(int op, int * eax, int * edx){
319
  //Use binary code for xgetbv
320
1
  __asm__ __volatile__
321
1
    (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
322
1
}
323
#endif
324
325
1
int support_avx(){
326
1
#ifndef NO_AVX
327
1
  int eax, ebx, ecx, edx;
328
1
  int ret=0;
329
330
1
  cpuid(1, &eax, &ebx, &ecx, &edx);
331
1
  if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
332
1
    xgetbv(0, &eax, &edx);
333
1
    if((eax & 6) == 6){
334
1
      ret=1;  //OS support AVX
335
1
    }
336
1
  }
337
1
  return ret;
338
#else
339
  return 0;
340
#endif
341
1
}
342
343
1
int support_avx2(){
344
1
#ifndef NO_AVX2
345
1
  int eax, ebx, ecx=0, edx;
346
1
  int ret=0;
347
348
1
  if (!support_avx())
349
0
    return 0;
350
1
  cpuid(7, &eax, &ebx, &ecx, &edx);
351
1
  if((ebx & (1<<5)) != 0)
352
1
      ret=1;  //AVX2 flag is set
353
1
  return ret;
354
#else
355
  return 0;
356
#endif
357
1
}
358
359
1
int support_avx512(){
360
#if !defined(NO_AVX) && !defined(NO_AVX512)
361
  int eax, ebx, ecx, edx;
362
  int ret=0;
363
364
  if (!support_avx())
365
    return 0;
366
  cpuid(7, &eax, &ebx, &ecx, &edx);
367
  if((ebx & (1<<5)) == 0){
368
      ret=0;  //cpu does not have avx2 flag
369
  }
370
  if((ebx & (1<<31)) != 0){ //AVX512VL flag is set
371
    xgetbv(0, &eax, &edx);
372
    if((eax & 0xe0) == 0xe0)
373
      ret=1;  //OS supports saving zmm register
374
  }
375
  return ret;
376
#else
377
1
  return 0;
378
1
#endif
379
1
}
380
381
1
int support_avx512_bf16(){
382
#if !defined(NO_AVX) && !defined(NO_AVX512)
383
  int eax, ebx, ecx, edx;
384
  int ret=0;
385
386
  if (!support_avx512())
387
    return 0;
388
  cpuid_count(7, 1, &eax, &ebx, &ecx, &edx);
389
  if((eax & 32) == 32){
390
      ret=1;  // CPUID.7.1:EAX[bit 5] indicates whether avx512_bf16 supported or not
391
  }
392
  return ret;
393
#else
394
1
  return 0;
395
1
#endif
396
1
}
397
398
#define BIT_AMX_TILE  0x01000000
399
#define BIT_AMX_BF16  0x00400000
400
#define BIT_AMX_ENBD  0x00060000
401
402
0
int support_amx_bf16() {
403
#if !defined(NO_AVX) && !defined(NO_AVX512)
404
  int eax, ebx, ecx, edx;
405
  int ret=0;
406
407
  if (!support_avx512())
408
    return 0;
409
  // CPUID.7.0:EDX indicates AMX support
410
  cpuid_count(7, 0, &eax, &ebx, &ecx, &edx);
411
  if ((edx & BIT_AMX_TILE) && (edx & BIT_AMX_BF16)) {
412
    // CPUID.D.0:EAX[17:18] indicates AMX enabled
413
    cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
414
    if ((eax & BIT_AMX_ENBD) == BIT_AMX_ENBD)
415
      ret = 1;
416
  }
417
  return ret;
418
#else
419
0
  return 0;
420
0
#endif
421
0
}
422
423
extern void openblas_warning(int verbose, const char * msg);
424
1
#define FALLBACK_VERBOSE 1
425
0
#define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n"
426
0
#define SANDYBRIDGE_FALLBACK "OpenBLAS : Your OS does not support AVX2 instructions. OpenBLAS is using Sandybridge kernels as a fallback, which may give poorer performance.\n"
427
1
#define HASWELL_FALLBACK "OpenBLAS : Your OS does not support AVX512VL instructions. OpenBLAS is using Haswell kernels as a fallback, which may give poorer performance.\n"
428
0
#define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n"
429
430
1
static int get_vendor(void){
431
1
  int eax, ebx, ecx, edx;
432
433
1
  union
434
1
  {
435
1
        char vchar[16];
436
1
        int  vint[4];
437
1
  } vendor;
438
439
1
  cpuid(0, &eax, &ebx, &ecx, &edx);
440
441
1
  *(&vendor.vint[0]) = ebx;
442
1
  *(&vendor.vint[1]) = edx;
443
1
  *(&vendor.vint[2]) = ecx;
444
445
1
  vendor.vchar[12] = '\0';
446
447
1
  if (!strcmp(vendor.vchar, "GenuineIntel")) return VENDOR_INTEL;
448
0
  if (!strcmp(vendor.vchar, "AuthenticAMD")) return VENDOR_AMD;
449
0
  if (!strcmp(vendor.vchar, "CentaurHauls")) return VENDOR_CENTAUR;
450
0
  if (!strcmp(vendor.vchar, "  Shanghai  ")) return VENDOR_ZHAOXIN;
451
0
  if (!strcmp(vendor.vchar, "HygonGenuine")) return VENDOR_HYGON;
452
453
0
  if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL;
454
455
0
  return VENDOR_UNKNOWN;
456
0
}
457
458
1
static gotoblas_t *get_coretype(void){
459
460
1
  int eax, ebx, ecx, edx;
461
1
  int family, exfamily, model, vendor, exmodel, stepping;
462
463
1
  cpuid(1, &eax, &ebx, &ecx, &edx);
464
465
1
  family   = BITMASK(eax,  8, 0x0f);
466
1
  exfamily = BITMASK(eax, 20, 0xff);
467
1
  model    = BITMASK(eax,  4, 0x0f);
468
1
  exmodel  = BITMASK(eax, 16, 0x0f);
469
1
  stepping = BITMASK(eax,  0, 0x0f);
470
471
1
  vendor = get_vendor();
472
473
1
  if (vendor == VENDOR_INTEL){
474
1
    switch (family) {
475
1
    case 0x6:
476
1
      switch (exmodel) {
477
0
      case 0:
478
0
  if (model <= 0x7) return &gotoblas_KATMAI;
479
0
  if ((model == 0x8) || (model == 0xa) || (model == 0xb)) return &gotoblas_COPPERMINE;
480
0
  if ((model == 0x9) || (model == 0xd)) return &gotoblas_BANIAS;
481
0
  if (model == 14) return &gotoblas_BANIAS;
482
0
  if (model == 15) return &gotoblas_CORE2;
483
0
  return NULL;
484
485
0
      case 1:
486
0
  if (model == 6) return &gotoblas_CORE2;
487
0
  if (model == 7) return &gotoblas_PENRYN;
488
0
  if (model == 13) return &gotoblas_DUNNINGTON;
489
0
  if ((model == 10) || (model == 11) || (model == 14) || (model == 15)) return &gotoblas_NEHALEM;
490
0
  if (model == 12) return &gotoblas_ATOM;
491
0
  return NULL;
492
493
0
      case 2:
494
  //Intel Core (Clarkdale) / Core (Arrandale)
495
  // Pentium (Clarkdale) / Pentium Mobile (Arrandale)
496
  // Xeon (Clarkdale), 32nm
497
0
  if (model ==  5) return &gotoblas_NEHALEM;
498
499
  //Intel Xeon Processor 5600 (Westmere-EP)
500
  //Xeon Processor E7 (Westmere-EX)
501
  //Xeon E7540
502
0
  if (model == 12 || model == 14 || model == 15) return &gotoblas_NEHALEM;
503
504
  //Intel Core i5-2000 /i7-2000 (Sandy Bridge)
505
  //Intel Core i7-3000 / Xeon E5
506
0
  if (model == 10 || model == 13) {
507
0
    if(support_avx())
508
0
      return &gotoblas_SANDYBRIDGE;
509
0
    else{
510
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
511
0
      return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
512
0
    }
513
0
  }
514
0
  return NULL;
515
0
      case 3:
516
  //Intel Sandy Bridge 22nm (Ivy Bridge?)
517
0
  if (model == 10 || model == 14) {
518
0
    if(support_avx())
519
0
      return &gotoblas_SANDYBRIDGE;
520
0
    else{
521
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
522
0
      return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
523
0
    }
524
0
  }
525
  //Intel Haswell
526
0
  if (model == 12 || model == 15) {
527
0
    if(support_avx2())
528
0
      return &gotoblas_HASWELL;
529
0
    if(support_avx()) {
530
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
531
0
      return &gotoblas_SANDYBRIDGE;
532
0
    } else {
533
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
534
0
      return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
535
0
    }
536
0
  }
537
  //Intel Broadwell
538
0
  if (model == 13) {
539
0
    if(support_avx2())
540
0
      return &gotoblas_HASWELL;
541
0
    if(support_avx()) {
542
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
543
0
      return &gotoblas_SANDYBRIDGE;
544
0
    } else {
545
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
546
0
      return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
547
0
    }
548
0
  }
549
0
  if (model == 7) return &gotoblas_ATOM; //Bay Trail 
550
0
  return NULL;
551
0
      case 4:
552
    //Intel Haswell
553
0
  if (model == 5 || model == 6) {
554
0
    if(support_avx2())
555
0
      return &gotoblas_HASWELL;
556
0
    if(support_avx()) {
557
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
558
0
      return &gotoblas_SANDYBRIDGE;
559
0
    } else {
560
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
561
0
      return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
562
0
    }
563
0
  }
564
  //Intel Broadwell
565
0
  if (model == 7 || model == 15) {
566
0
    if(support_avx2())
567
0
      return &gotoblas_HASWELL;
568
0
    if(support_avx()) {
569
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
570
0
      return &gotoblas_SANDYBRIDGE;
571
0
    } else {
572
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
573
0
      return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
574
0
    }
575
0
  }
576
  //Intel Skylake
577
0
  if (model == 14) {
578
0
    if(support_avx2())
579
0
      return &gotoblas_HASWELL;
580
0
    if(support_avx()) {
581
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
582
0
      return &gotoblas_SANDYBRIDGE;
583
0
    } else {
584
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
585
0
      return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
586
0
    }
587
0
  }
588
  //Intel Braswell / Avoton
589
0
  if (model == 12 || model == 13) { 
590
0
    return &gotoblas_NEHALEM;
591
0
  }  
592
0
  return NULL;
593
1
      case 5:
594
  //Intel Broadwell
595
1
  if (model == 6) {
596
0
    if(support_avx2())
597
0
      return &gotoblas_HASWELL;
598
0
    if(support_avx()) {
599
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
600
0
      return &gotoblas_SANDYBRIDGE;
601
0
    } else {
602
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
603
0
      return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
604
0
    }
605
0
  }
606
1
  if (model == 5) {
607
  // Intel Cooperlake
608
1
          if(support_avx512_bf16())
609
0
             return &gotoblas_COOPERLAKE;
610
  // Intel Skylake X
611
1
          if (support_avx512()) 
612
0
      return &gotoblas_SKYLAKEX;
613
1
    if(support_avx2()){
614
1
      openblas_warning(FALLBACK_VERBOSE, HASWELL_FALLBACK);
615
1
      return &gotoblas_HASWELL;
616
1
          }
617
0
    if(support_avx()) {
618
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
619
0
      return &gotoblas_SANDYBRIDGE;
620
0
    } else {
621
0
          openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
622
0
          return &gotoblas_NEHALEM;
623
0
          }
624
0
  }
625
  //Intel Skylake
626
0
  if (model == 14) {
627
0
    if(support_avx2())
628
0
      return &gotoblas_HASWELL;
629
0
    if(support_avx()) {
630
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
631
0
      return &gotoblas_SANDYBRIDGE;
632
0
    } else {
633
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
634
0
      return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
635
0
    }
636
0
  }
637
  //Intel Phi Knights Landing
638
0
  if (model == 7) {
639
0
    if(support_avx2()){
640
0
      openblas_warning(FALLBACK_VERBOSE, HASWELL_FALLBACK);
641
0
      return &gotoblas_HASWELL;
642
0
    }  
643
0
    if(support_avx()) {
644
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
645
0
      return &gotoblas_SANDYBRIDGE;
646
0
    } else {
647
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
648
0
      return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
649
0
    }
650
0
  }
651
  //Apollo Lake or Denverton
652
0
  if (model == 12 || model == 15) { 
653
0
    return &gotoblas_NEHALEM;
654
0
  }  
655
0
  return NULL;
656
0
      case 6:
657
0
        if (model == 6) {
658
          // Cannon Lake
659
0
    if(support_avx2())
660
0
      return &gotoblas_HASWELL;
661
0
    if(support_avx()) {
662
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
663
0
      return &gotoblas_SANDYBRIDGE;
664
0
    } else {
665
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
666
0
      return &gotoblas_NEHALEM;
667
0
    }
668
0
        }
669
0
  if (model == 10 || model == 12){
670
          // Ice Lake SP
671
0
     if(support_avx512_bf16())
672
0
             return &gotoblas_COOPERLAKE;
673
0
          if (support_avx512()) 
674
0
      return &gotoblas_SKYLAKEX;
675
0
    if(support_avx2())
676
0
      return &gotoblas_HASWELL;
677
0
    if(support_avx()) {
678
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
679
0
      return &gotoblas_SANDYBRIDGE;
680
0
    } else {
681
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
682
0
      return &gotoblas_NEHALEM;
683
0
    }
684
0
        }
685
0
        return NULL;  
686
0
      case 7:
687
0
  if (model == 10) // Goldmont Plus
688
0
     return &gotoblas_NEHALEM;
689
0
        if (model == 13 || model == 14) {
690
  // Ice Lake
691
0
          if (support_avx512()) 
692
0
      return &gotoblas_SKYLAKEX;
693
0
    if(support_avx2()){
694
0
      openblas_warning(FALLBACK_VERBOSE, HASWELL_FALLBACK);
695
0
      return &gotoblas_HASWELL;
696
0
          }
697
0
    if(support_avx()) {
698
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
699
0
      return &gotoblas_SANDYBRIDGE;
700
0
    } else {
701
0
          openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
702
0
          return &gotoblas_NEHALEM;
703
0
          }
704
0
        }
705
0
        return NULL;  
706
0
      case 8:
707
0
        if (model == 12 || model == 13) { // Tiger Lake
708
0
          if (support_avx512()) 
709
0
            return &gotoblas_SKYLAKEX;
710
0
          if(support_avx2()){
711
0
            openblas_warning(FALLBACK_VERBOSE, HASWELL_FALLBACK);
712
0
            return &gotoblas_HASWELL;
713
0
          }
714
0
          if(support_avx()) {
715
0
            openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
716
0
            return &gotoblas_SANDYBRIDGE;
717
0
          } else {
718
0
          openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
719
0
          return &gotoblas_NEHALEM;
720
0
          }
721
0
        }
722
0
  if (model == 14 ) { // Kaby Lake, Coffee Lake
723
0
    if(support_avx2())
724
0
      return &gotoblas_HASWELL;
725
0
    if(support_avx()) {
726
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
727
0
      return &gotoblas_SANDYBRIDGE;
728
0
    } else {
729
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
730
0
      return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
731
0
    }
732
0
  }
733
0
  if (model == 15){          // Sapphire Rapids
734
0
     if(support_amx_bf16())
735
0
       return &gotoblas_SAPPHIRERAPIDS;
736
0
     if(support_avx512_bf16())
737
0
             return &gotoblas_COOPERLAKE;
738
0
          if (support_avx512()) 
739
0
      return &gotoblas_SKYLAKEX;
740
0
    if(support_avx2())
741
0
      return &gotoblas_HASWELL;
742
0
    if(support_avx()) {
743
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
744
0
      return &gotoblas_SANDYBRIDGE;
745
0
    } else {
746
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
747
0
      return &gotoblas_NEHALEM;
748
0
    }
749
0
        }
750
0
  return NULL;
751
  
752
  
753
0
      case 9:
754
0
        if (model == 7 || model == 10) { // Alder Lake
755
0
     if(support_avx512_bf16())
756
0
             return &gotoblas_COOPERLAKE;
757
0
          if (support_avx512()) 
758
0
      return &gotoblas_SKYLAKEX;
759
0
          if(support_avx2()){
760
0
            return &gotoblas_HASWELL;
761
0
          }
762
0
          if(support_avx()) {
763
0
            openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
764
0
            return &gotoblas_SANDYBRIDGE;
765
0
          } else {
766
0
          openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
767
0
          return &gotoblas_NEHALEM;
768
0
          }
769
0
        }
770
0
  if (model == 14 ) { // Kaby Lake, Coffee Lake
771
0
    if(support_avx2())
772
0
      return &gotoblas_HASWELL;
773
0
    if(support_avx()) {
774
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
775
0
      return &gotoblas_SANDYBRIDGE;
776
0
    } else {
777
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
778
0
      return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
779
0
    }
780
0
  }
781
0
  return NULL;
782
0
      case 10:
783
0
        if (model == 5 || model == 6) {
784
0
    if(support_avx2())
785
0
      return &gotoblas_HASWELL;
786
0
    if(support_avx()) {
787
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
788
0
      return &gotoblas_SANDYBRIDGE;
789
0
    } else {
790
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
791
0
      return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
792
0
    }
793
0
        }
794
0
        if (model == 7) {
795
0
    if (support_avx512()) 
796
0
      return &gotoblas_SKYLAKEX;
797
0
    if(support_avx2())
798
0
      return &gotoblas_HASWELL;
799
0
    if(support_avx()) {
800
0
      openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
801
0
      return &gotoblas_SANDYBRIDGE;
802
0
    } else {
803
0
      openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
804
0
      return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
805
0
    }
806
0
        }      
807
0
  return NULL;
808
1
      }
809
0
      break;
810
0
    case 0xf:
811
0
      if (model <= 0x2) return &gotoblas_NORTHWOOD;
812
0
      return &gotoblas_PRESCOTT;
813
1
    }
814
1
  }
815
816
0
  if (vendor == VENDOR_AMD || vendor == VENDOR_HYGON){
817
0
    if (family <= 0xe) {
818
        // Verify that CPU has 3dnow and 3dnowext before claiming it is Athlon
819
0
        cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
820
0
        if ( (eax & 0xffff)  >= 0x01) {
821
0
            cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
822
0
            if ((edx & (1 << 30)) == 0 || (edx & (1u << 31)) == 0)
823
0
              return NULL;
824
0
          }
825
0
        else
826
0
          return NULL;
827
828
0
        return &gotoblas_ATHLON;
829
0
      }
830
0
    if (family == 0xf){
831
0
      if ((exfamily == 0) || (exfamily == 2)) {
832
0
  if (ecx & (1 <<  0)) return &gotoblas_OPTERON_SSE3;
833
0
  else return &gotoblas_OPTERON;
834
0
      }  else if (exfamily == 5 || exfamily == 7) {
835
0
  return &gotoblas_BOBCAT;
836
0
      } else if (exfamily == 6) {
837
0
  if(model == 1){
838
    //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
839
0
    if(support_avx())
840
0
      return &gotoblas_BULLDOZER;
841
0
    else{
842
0
      openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
843
0
      return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
844
0
    }
845
0
  }else if(model == 2 || model == 3){
846
    //AMD Bulldozer Opteron 6300 / Opteron 4300 / Opteron 3300
847
0
    if(support_avx())
848
0
      return &gotoblas_PILEDRIVER;
849
0
    else{
850
0
      openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
851
0
      return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
852
0
    }
853
0
  }else if(model == 5){
854
0
    if(support_avx())
855
0
      return &gotoblas_EXCAVATOR;
856
0
    else{
857
0
      openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
858
0
      return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
859
0
    }
860
0
  }else if(model == 0 || model == 8){
861
0
    if (exmodel == 1) {
862
      //AMD Trinity
863
0
      if(support_avx())
864
0
        return &gotoblas_PILEDRIVER;
865
0
      else{
866
0
        openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
867
0
        return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
868
0
      }
869
0
     }else if (exmodel == 3) {
870
      //AMD STEAMROLLER
871
0
      if(support_avx())
872
0
        return &gotoblas_STEAMROLLER;
873
0
      else{
874
0
        openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
875
0
        return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
876
0
      }
877
0
    }else if (exmodel == 6) {
878
0
      if(support_avx())
879
0
        return &gotoblas_EXCAVATOR;
880
0
      else{
881
0
        openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
882
0
        return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
883
0
      }
884
885
0
    }
886
0
  }
887
0
      } else if (exfamily == 8) {
888
0
  /* if (model == 1 || model == 8) */ {
889
0
    if(support_avx())
890
0
      return &gotoblas_ZEN;
891
0
    else{
892
0
      openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
893
0
      return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
894
0
    }
895
0
  }
896
0
      } else if (exfamily == 9) {  
897
0
    if(support_avx())
898
0
      return &gotoblas_ZEN;
899
0
    else{
900
0
      openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
901
0
      return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
902
0
          }
903
0
      } else if (exfamily == 10) {
904
0
    if(support_avx512_bf16())
905
0
      return &gotoblas_COOPERLAKE;
906
0
    if(support_avx512())
907
0
      return &gotoblas_SKYLAKEX;
908
0
    if(support_avx())
909
0
      return &gotoblas_ZEN;
910
0
    else{
911
0
      openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
912
0
      return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
913
0
          }
914
0
      }else {
915
0
  return NULL;
916
0
      }
917
   
918
0
    }
919
0
  }
920
921
0
  if (vendor == VENDOR_CENTAUR) {
922
0
    switch (family) {
923
0
    case 0x6:
924
0
      if (model == 0xf && stepping < 0xe)
925
0
        return &gotoblas_NANO;
926
0
      return &gotoblas_NEHALEM;
927
0
  case 0x7:
928
0
      switch (exmodel) {
929
0
      case 5:
930
0
      case 6:
931
0
        if (support_avx2())
932
0
          return &gotoblas_ZEN;
933
0
        else
934
0
          return &gotoblas_DUNNINGTON;
935
0
      default:
936
0
        return &gotoblas_NEHALEM;
937
0
      }
938
0
    default:
939
0
      if (family >= 0x8)
940
0
        return &gotoblas_NEHALEM;
941
0
    }
942
0
  }
943
944
0
  if (vendor == VENDOR_ZHAOXIN) {
945
0
    switch (family) {
946
0
      case 0x7:
947
0
        switch (exmodel) {
948
0
        case 5:
949
0
          if (support_avx2())
950
0
            return &gotoblas_ZEN;
951
0
          else
952
0
            return &gotoblas_DUNNINGTON;
953
0
        default:
954
0
          return &gotoblas_NEHALEM;
955
0
        }
956
0
      default:
957
0
        return &gotoblas_NEHALEM;
958
0
    }
959
0
  }
960
961
0
  return NULL;
962
0
}
963
964
static char *corename[] = {
965
    "Unknown",
966
    "Katmai",
967
    "Coppermine",
968
    "Northwood",
969
    "Prescott",
970
    "Banias",
971
    "Atom",
972
    "Core2",
973
    "Penryn",
974
    "Dunnington",
975
    "Nehalem",
976
    "Athlon",
977
    "Opteron",
978
    "Opteron_SSE3",
979
    "Barcelona",
980
    "Nano",
981
    "Sandybridge",
982
    "Bobcat",
983
    "Bulldozer",
984
    "Piledriver",
985
    "Haswell",
986
    "Steamroller",
987
    "Excavator",
988
    "Zen",
989
    "SkylakeX",
990
    "Cooperlake",
991
    "SapphireRapids"
992
};
993
994
521
char *gotoblas_corename(void) {
995
996
521
  if (gotoblas == &gotoblas_KATMAI)       return corename[ 1];
997
521
  if (gotoblas == &gotoblas_COPPERMINE)   return corename[ 2];
998
521
  if (gotoblas == &gotoblas_NORTHWOOD)    return corename[ 3];
999
521
  if (gotoblas == &gotoblas_PRESCOTT)     return corename[ 4];
1000
521
  if (gotoblas == &gotoblas_BANIAS)       return corename[ 5];
1001
521
  if (gotoblas == &gotoblas_ATOM)
1002
#ifdef DYNAMIC_OLDER
1003
           return corename[ 6];
1004
#else
1005
0
           return corename[10];
1006
521
#endif
1007
521
  if (gotoblas == &gotoblas_CORE2)        return corename[ 7];
1008
521
  if (gotoblas == &gotoblas_PENRYN)
1009
#ifdef DYNAMIC_OLDER
1010
           return corename[ 8];
1011
#else
1012
0
           return corename[7];
1013
521
#endif
1014
521
  if (gotoblas == &gotoblas_DUNNINGTON)
1015
#ifdef DYNAMIC_OLDER
1016
           return corename[ 9];
1017
#else
1018
0
           return corename[7];
1019
521
#endif
1020
521
  if (gotoblas == &gotoblas_NEHALEM)      return corename[10];
1021
521
  if (gotoblas == &gotoblas_ATHLON)       return corename[11];
1022
521
  if (gotoblas == &gotoblas_OPTERON_SSE3)
1023
#ifdef DYNAMIC_OLDER
1024
           return corename[12];
1025
#else
1026
0
           return corename[7];
1027
521
#endif
1028
521
  if (gotoblas == &gotoblas_OPTERON)
1029
#ifdef DYNAMIC_OLDER
1030
           return corename[13];
1031
#else
1032
0
           return corename[7];
1033
521
#endif
1034
521
  if (gotoblas == &gotoblas_BARCELONA)    return corename[14];
1035
521
  if (gotoblas == &gotoblas_NANO)
1036
#ifdef DYNAMIC_OLDER
1037
           return corename[15];
1038
#else
1039
0
           return corename[10];
1040
521
#endif
1041
521
  if (gotoblas == &gotoblas_SANDYBRIDGE)  return corename[16];
1042
521
  if (gotoblas == &gotoblas_BOBCAT)
1043
#ifdef DYNAMIC_OLDER
1044
           return corename[17];
1045
#else
1046
0
           return corename[7];
1047
521
#endif
1048
521
  if (gotoblas == &gotoblas_BULLDOZER)    return corename[18];
1049
521
  if (gotoblas == &gotoblas_PILEDRIVER)   return corename[19];
1050
521
  if (gotoblas == &gotoblas_HASWELL)      return corename[20];
1051
0
  if (gotoblas == &gotoblas_STEAMROLLER)  return corename[21];
1052
0
  if (gotoblas == &gotoblas_EXCAVATOR)    return corename[22];
1053
0
  if (gotoblas == &gotoblas_ZEN)          return corename[23];
1054
0
  if (gotoblas == &gotoblas_SKYLAKEX)     return corename[24];
1055
0
  if (gotoblas == &gotoblas_COOPERLAKE)   return corename[25];
1056
0
  if (gotoblas == &gotoblas_SAPPHIRERAPIDS) return corename[26];
1057
0
  return corename[0];
1058
0
}
1059
1060
1061
1062
0
static gotoblas_t *force_coretype(char *coretype){
1063
1064
0
  int i ;
1065
0
  int found = -1;
1066
0
  char message[128];
1067
  //char mname[20];
1068
1069
0
  for ( i=1 ; i <= 25; i++)
1070
0
  {
1071
0
    if (!strncasecmp(coretype,corename[i],20))
1072
0
    {
1073
0
      found = i;
1074
0
      break;
1075
0
    }
1076
0
  }
1077
0
  if (found < 0)
1078
0
  {
1079
          //strncpy(mname,coretype,20);
1080
0
          snprintf(message, 128, "Core not found: %s\n",coretype);
1081
0
        openblas_warning(1, message);
1082
0
    return(NULL);
1083
0
  }
1084
1085
0
  switch (found)
1086
0
  {
1087
0
    case 25: return (&gotoblas_COOPERLAKE);
1088
0
    case 24: return (&gotoblas_SKYLAKEX);  
1089
0
    case 23: return (&gotoblas_ZEN);
1090
0
    case 22: return (&gotoblas_EXCAVATOR);
1091
0
    case 21: return (&gotoblas_STEAMROLLER);
1092
0
    case 20: return (&gotoblas_HASWELL);
1093
0
    case 19: return (&gotoblas_PILEDRIVER);
1094
0
    case 18: return (&gotoblas_BULLDOZER);
1095
0
    case 17: return (&gotoblas_BOBCAT);
1096
0
    case 16: return (&gotoblas_SANDYBRIDGE);
1097
0
    case 15: return (&gotoblas_NANO);
1098
0
    case 14: return (&gotoblas_BARCELONA);
1099
0
    case 13: return (&gotoblas_OPTERON);
1100
0
    case 12: return (&gotoblas_OPTERON_SSE3);
1101
0
    case 11: return (&gotoblas_ATHLON);
1102
0
    case 10: return (&gotoblas_NEHALEM);
1103
0
    case  9: return (&gotoblas_DUNNINGTON);
1104
0
    case  8: return (&gotoblas_PENRYN);
1105
0
    case  7: return (&gotoblas_CORE2);
1106
0
    case  6: return (&gotoblas_ATOM);
1107
0
    case  5: return (&gotoblas_BANIAS);
1108
0
    case  4: return (&gotoblas_PRESCOTT);
1109
0
    case  3: return (&gotoblas_NORTHWOOD);
1110
0
    case  2: return (&gotoblas_COPPERMINE);
1111
0
    case  1: return (&gotoblas_KATMAI);
1112
0
  }
1113
0
  return(NULL);
1114
1115
0
}
1116
1117
1118
1119
1120
2
void gotoblas_dynamic_init(void) {
1121
1122
2
  char coremsg[128];
1123
2
  char coren[22];
1124
2
  char *p;
1125
1126
1127
2
  if (gotoblas) return;
1128
1129
1
  p = getenv("OPENBLAS_CORETYPE");
1130
1
  if ( p )
1131
0
  {
1132
0
  gotoblas = force_coretype(p);
1133
0
  }
1134
1
  else
1135
1
  {
1136
1
    gotoblas = get_coretype();
1137
1
  }
1138
1139
#ifdef ARCH_X86
1140
  if (gotoblas == NULL) gotoblas = &gotoblas_KATMAI;
1141
#else
1142
1
  if (gotoblas == NULL) {
1143
0
   if (support_avx512_bf16()) gotoblas = &gotoblas_COOPERLAKE;
1144
0
   else if (support_avx512()) gotoblas = &gotoblas_SKYLAKEX;
1145
0
   else if   (support_avx2()) gotoblas = &gotoblas_HASWELL;
1146
0
   else if    (support_avx()) gotoblas = &gotoblas_SANDYBRIDGE;
1147
0
   else                       gotoblas = &gotoblas_PRESCOTT;
1148
0
  }
1149
  /* sanity check, if 64bit pointer we can't have a 32 bit cpu */
1150
1
  if (sizeof(void*) == 8) {
1151
1
      if (gotoblas == &gotoblas_KATMAI ||
1152
1
          gotoblas == &gotoblas_COPPERMINE ||
1153
1
          gotoblas == &gotoblas_NORTHWOOD ||
1154
1
          gotoblas == &gotoblas_BANIAS ||
1155
1
          gotoblas == &gotoblas_ATHLON)
1156
0
          gotoblas = &gotoblas_PRESCOTT;
1157
1
  }
1158
1
#endif
1159
1160
1
  if (gotoblas && gotoblas -> init) {
1161
1
    strncpy(coren,gotoblas_corename(),20);
1162
1
    sprintf(coremsg, "Core: %s\n",coren);
1163
1
    openblas_warning(2, coremsg);
1164
1
    gotoblas -> init();
1165
1
  } else {
1166
0
    openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
1167
0
    exit(1);
1168
0
  }
1169
1170
1
}
1171
1172
0
void gotoblas_dynamic_quit(void) {
1173
1174
0
  gotoblas = NULL;
1175
1176
0
}