Coverage Report

Created: 2025-09-12 10:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/openblas/param.h
Line
Count
Source
1
/*****************************************************************************
2
Copyright (c) 2011-2023, The OpenBLAS Project
3
All rights reserved.
4
5
Redistribution and use in source and binary forms, with or without
6
modification, are permitted provided that the following conditions are
7
met:
8
9
   1. Redistributions of source code must retain the above copyright
10
      notice, this list of conditions and the following disclaimer.
11
12
   2. Redistributions in binary form must reproduce the above copyright
13
      notice, this list of conditions and the following disclaimer in
14
      the documentation and/or other materials provided with the
15
      distribution.
16
   3. Neither the name of the OpenBLAS project nor the names of 
17
      its contributors may be used to endorse or promote products 
18
      derived from this software without specific prior written 
19
      permission.
20
21
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
30
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32
**********************************************************************************/
33
34
/*********************************************************************/
35
/* Copyright 2009, 2010 The University of Texas at Austin.           */
36
/* All rights reserved.                                              */
37
/*                                                                   */
38
/* Redistribution and use in source and binary forms, with or        */
39
/* without modification, are permitted provided that the following   */
40
/* conditions are met:                                               */
41
/*                                                                   */
42
/*   1. Redistributions of source code must retain the above         */
43
/*      copyright notice, this list of conditions and the following  */
44
/*      disclaimer.                                                  */
45
/*                                                                   */
46
/*   2. Redistributions in binary form must reproduce the above      */
47
/*      copyright notice, this list of conditions and the following  */
48
/*      disclaimer in the documentation and/or other materials       */
49
/*      provided with the distribution.                              */
50
/*                                                                   */
51
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
52
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
53
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
54
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
55
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
56
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
57
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
58
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
59
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
60
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
61
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
62
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
63
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
64
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
65
/*                                                                   */
66
/* The views and conclusions contained in the software and           */
67
/* documentation are those of the authors and should not be          */
68
/* interpreted as representing official policies, either expressed   */
69
/* or implied, of The University of Texas at Austin.                 */
70
/*********************************************************************/
71
72
#ifndef PARAM_H
73
#define PARAM_H
74
75
76
#define SBGEMM_DEFAULT_UNROLL_N 4
77
#define SBGEMM_DEFAULT_UNROLL_M 8
78
#define SBGEMM_DEFAULT_UNROLL_MN 32
79
#define SBGEMM_DEFAULT_P 256
80
#define SBGEMM_DEFAULT_R 256
81
#define SBGEMM_DEFAULT_Q 256
82
#define SBGEMM_ALIGN_K 1  // must be 2^x
83
84
#ifdef OPTERON
85
86
#define SNUMOPT   4
87
#define DNUMOPT   2
88
89
#define GEMM_DEFAULT_OFFSET_A  64
90
#define GEMM_DEFAULT_OFFSET_B 256
91
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x01ffffUL
92
93
#define SGEMM_DEFAULT_UNROLL_N 4
94
#define DGEMM_DEFAULT_UNROLL_N 4
95
#define QGEMM_DEFAULT_UNROLL_N 2
96
#define CGEMM_DEFAULT_UNROLL_N 2
97
#define ZGEMM_DEFAULT_UNROLL_N 2
98
#define XGEMM_DEFAULT_UNROLL_N 1
99
100
#ifdef ARCH_X86
101
#define SGEMM_DEFAULT_UNROLL_M 4
102
#define DGEMM_DEFAULT_UNROLL_M 2
103
#define QGEMM_DEFAULT_UNROLL_M 2
104
#define CGEMM_DEFAULT_UNROLL_M 2
105
#define ZGEMM_DEFAULT_UNROLL_M 1
106
#define XGEMM_DEFAULT_UNROLL_M 1
107
#else
108
#define SGEMM_DEFAULT_UNROLL_M 8
109
#define DGEMM_DEFAULT_UNROLL_M 4
110
#define QGEMM_DEFAULT_UNROLL_M 2
111
#define CGEMM_DEFAULT_UNROLL_M 4
112
#define ZGEMM_DEFAULT_UNROLL_M 2
113
#define XGEMM_DEFAULT_UNROLL_M 1
114
#endif
115
116
#define SGEMM_DEFAULT_P sgemm_p
117
#define DGEMM_DEFAULT_P dgemm_p
118
#define QGEMM_DEFAULT_P qgemm_p
119
#define CGEMM_DEFAULT_P cgemm_p
120
#define ZGEMM_DEFAULT_P zgemm_p
121
#define XGEMM_DEFAULT_P xgemm_p
122
123
#define SGEMM_DEFAULT_R sgemm_r
124
#define DGEMM_DEFAULT_R dgemm_r
125
#define QGEMM_DEFAULT_R qgemm_r
126
#define CGEMM_DEFAULT_R cgemm_r
127
#define ZGEMM_DEFAULT_R zgemm_r
128
#define XGEMM_DEFAULT_R xgemm_r
129
130
#ifdef ALLOC_HUGETLB
131
132
#define SGEMM_DEFAULT_Q 248
133
#define DGEMM_DEFAULT_Q 248
134
#define QGEMM_DEFAULT_Q 248
135
#define CGEMM_DEFAULT_Q 248
136
#define ZGEMM_DEFAULT_Q 248
137
#define XGEMM_DEFAULT_Q 248
138
139
#else
140
141
#define SGEMM_DEFAULT_Q 240
142
#define DGEMM_DEFAULT_Q 240
143
#define QGEMM_DEFAULT_Q 240
144
#define CGEMM_DEFAULT_Q 240
145
#define ZGEMM_DEFAULT_Q 240
146
#define XGEMM_DEFAULT_Q 240
147
148
#endif
149
150
151
#define SYMV_P  16
152
#define HAVE_EXCLUSIVE_CACHE
153
154
#endif
155
156
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
157
158
#define SNUMOPT   8
159
#define DNUMOPT   4
160
161
#define GEMM_DEFAULT_OFFSET_A  64
162
#define GEMM_DEFAULT_OFFSET_B 832
163
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
164
165
#define SGEMM_DEFAULT_UNROLL_N 4
166
#define DGEMM_DEFAULT_UNROLL_N 4
167
#define QGEMM_DEFAULT_UNROLL_N 2
168
#define CGEMM_DEFAULT_UNROLL_N 2
169
#define ZGEMM_DEFAULT_UNROLL_N 2
170
#define XGEMM_DEFAULT_UNROLL_N 1
171
172
#ifdef ARCH_X86
173
#define SGEMM_DEFAULT_UNROLL_M 4
174
#define DGEMM_DEFAULT_UNROLL_M 2
175
#define QGEMM_DEFAULT_UNROLL_M 2
176
#define CGEMM_DEFAULT_UNROLL_M 2
177
#define ZGEMM_DEFAULT_UNROLL_M 1
178
#define XGEMM_DEFAULT_UNROLL_M 1
179
#else
180
#define SGEMM_DEFAULT_UNROLL_M 8
181
#define DGEMM_DEFAULT_UNROLL_M 4
182
#define QGEMM_DEFAULT_UNROLL_M 2
183
#define CGEMM_DEFAULT_UNROLL_M 4
184
#define ZGEMM_DEFAULT_UNROLL_M 2
185
#define XGEMM_DEFAULT_UNROLL_M 1
186
#endif
187
188
#if 0
189
#define SGEMM_DEFAULT_P 496
190
#define DGEMM_DEFAULT_P 248
191
#define QGEMM_DEFAULT_P 124
192
#define CGEMM_DEFAULT_P 248
193
#define ZGEMM_DEFAULT_P 124
194
#define XGEMM_DEFAULT_P  62
195
196
#define SGEMM_DEFAULT_Q 248
197
#define DGEMM_DEFAULT_Q 248
198
#define QGEMM_DEFAULT_Q 248
199
#define CGEMM_DEFAULT_Q 248
200
#define ZGEMM_DEFAULT_Q 248
201
#define XGEMM_DEFAULT_Q 248
202
203
#else
204
205
#define SGEMM_DEFAULT_P 448
206
#define DGEMM_DEFAULT_P 224
207
#define QGEMM_DEFAULT_P 112
208
#define CGEMM_DEFAULT_P 224
209
#define ZGEMM_DEFAULT_P 112
210
#define XGEMM_DEFAULT_P  56
211
212
#define SGEMM_DEFAULT_Q 224
213
#define DGEMM_DEFAULT_Q 224
214
#define QGEMM_DEFAULT_Q 224
215
#define CGEMM_DEFAULT_Q 224
216
#define ZGEMM_DEFAULT_Q 224
217
#define XGEMM_DEFAULT_Q 224
218
219
#endif
220
221
#define SGEMM_DEFAULT_R sgemm_r
222
#define QGEMM_DEFAULT_R qgemm_r
223
#define DGEMM_DEFAULT_R dgemm_r
224
#define CGEMM_DEFAULT_R cgemm_r
225
#define ZGEMM_DEFAULT_R zgemm_r
226
#define XGEMM_DEFAULT_R xgemm_r
227
228
#define SYMV_P  16
229
#define HAVE_EXCLUSIVE_CACHE
230
231
#define GEMM_THREAD gemm_thread_mn
232
233
#endif
234
235
236
#ifdef BULLDOZER
237
238
#define SNUMOPT   8
239
#define DNUMOPT   4
240
241
#define GEMM_DEFAULT_OFFSET_A  64
242
#define GEMM_DEFAULT_OFFSET_B 832
243
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
244
245
246
247
#define QGEMM_DEFAULT_UNROLL_N 2
248
#define CGEMM_DEFAULT_UNROLL_N 2
249
#define ZGEMM_DEFAULT_UNROLL_N 2
250
#define XGEMM_DEFAULT_UNROLL_N 1
251
252
#ifdef ARCH_X86
253
#define SGEMM_DEFAULT_UNROLL_N 4
254
#define DGEMM_DEFAULT_UNROLL_N 4
255
#define SGEMM_DEFAULT_UNROLL_M 4
256
#define DGEMM_DEFAULT_UNROLL_M 2
257
#define QGEMM_DEFAULT_UNROLL_M 2
258
#define CGEMM_DEFAULT_UNROLL_M 2
259
#define ZGEMM_DEFAULT_UNROLL_M 1
260
#define XGEMM_DEFAULT_UNROLL_M 1
261
#else
262
#define SGEMM_DEFAULT_UNROLL_N 2
263
#define DGEMM_DEFAULT_UNROLL_N 2
264
#define SGEMM_DEFAULT_UNROLL_M 16
265
#define DGEMM_DEFAULT_UNROLL_M 8
266
#define QGEMM_DEFAULT_UNROLL_M 2
267
#define CGEMM_DEFAULT_UNROLL_M 4
268
#define ZGEMM_DEFAULT_UNROLL_M 2
269
#define XGEMM_DEFAULT_UNROLL_M 1
270
#define CGEMM3M_DEFAULT_UNROLL_N 4
271
#define CGEMM3M_DEFAULT_UNROLL_M 8
272
#define ZGEMM3M_DEFAULT_UNROLL_N 4
273
#define ZGEMM3M_DEFAULT_UNROLL_M 4
274
275
#define DGEMM_DEFAULT_UNROLL_MN 16
276
#define GEMV_UNROLL 8
277
#endif
278
279
280
#if defined(ARCH_X86_64)
281
#define SGEMM_DEFAULT_P 768
282
#define DGEMM_DEFAULT_P 384
283
#else
284
#define SGEMM_DEFAULT_P 448
285
#define DGEMM_DEFAULT_P 224
286
#endif
287
288
#define QGEMM_DEFAULT_P 112
289
#define CGEMM_DEFAULT_P 224
290
#define ZGEMM_DEFAULT_P 112
291
#define XGEMM_DEFAULT_P  56
292
293
#if defined(ARCH_X86_64)
294
#define SGEMM_DEFAULT_Q 168
295
#define DGEMM_DEFAULT_Q 168
296
#else
297
#define SGEMM_DEFAULT_Q 224
298
#define DGEMM_DEFAULT_Q 224
299
#endif
300
301
#define QGEMM_DEFAULT_Q 224
302
#define CGEMM_DEFAULT_Q 224
303
#define ZGEMM_DEFAULT_Q 224
304
#define XGEMM_DEFAULT_Q 224
305
306
#define CGEMM3M_DEFAULT_P 448
307
#define ZGEMM3M_DEFAULT_P 224
308
#define XGEMM3M_DEFAULT_P 112
309
#define CGEMM3M_DEFAULT_Q 224
310
#define ZGEMM3M_DEFAULT_Q 224
311
#define XGEMM3M_DEFAULT_Q 224
312
#define CGEMM3M_DEFAULT_R 12288
313
#define ZGEMM3M_DEFAULT_R 12288
314
#define XGEMM3M_DEFAULT_R 12288
315
316
#define SGEMM_DEFAULT_R sgemm_r
317
#define QGEMM_DEFAULT_R qgemm_r
318
#define DGEMM_DEFAULT_R dgemm_r
319
#define CGEMM_DEFAULT_R cgemm_r
320
#define ZGEMM_DEFAULT_R zgemm_r
321
#define XGEMM_DEFAULT_R xgemm_r
322
323
#define SYMV_P  16
324
#define HAVE_EXCLUSIVE_CACHE
325
326
#define GEMM_THREAD gemm_thread_mn
327
328
#endif
329
330
#ifdef PILEDRIVER
331
#define SNUMOPT         8
332
#define DNUMOPT         4
333
334
#define GEMM_DEFAULT_OFFSET_A  64
335
#define GEMM_DEFAULT_OFFSET_B 832
336
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
337
338
339
340
#define QGEMM_DEFAULT_UNROLL_N 2
341
#define CGEMM_DEFAULT_UNROLL_N 2
342
#define ZGEMM_DEFAULT_UNROLL_N 2
343
#define XGEMM_DEFAULT_UNROLL_N 1
344
345
#ifdef ARCH_X86
346
#define SGEMM_DEFAULT_UNROLL_N 4
347
#define DGEMM_DEFAULT_UNROLL_N 4
348
#define SGEMM_DEFAULT_UNROLL_M 4
349
#define DGEMM_DEFAULT_UNROLL_M 2
350
#define QGEMM_DEFAULT_UNROLL_M 2
351
#define CGEMM_DEFAULT_UNROLL_M 2
352
#define ZGEMM_DEFAULT_UNROLL_M 1
353
#define XGEMM_DEFAULT_UNROLL_M 1
354
#else
355
#define SGEMM_DEFAULT_UNROLL_N 2
356
#define DGEMM_DEFAULT_UNROLL_N 2
357
#define SGEMM_DEFAULT_UNROLL_M 16
358
#define DGEMM_DEFAULT_UNROLL_M 8
359
#define QGEMM_DEFAULT_UNROLL_M 2
360
#define CGEMM_DEFAULT_UNROLL_M 4
361
#define ZGEMM_DEFAULT_UNROLL_M 2
362
#define XGEMM_DEFAULT_UNROLL_M 1
363
#define CGEMM3M_DEFAULT_UNROLL_N 4
364
#define CGEMM3M_DEFAULT_UNROLL_M 8
365
#define ZGEMM3M_DEFAULT_UNROLL_N 4
366
#define ZGEMM3M_DEFAULT_UNROLL_M 4
367
#define GEMV_UNROLL 8
368
#endif
369
370
#if defined(ARCH_X86_64)
371
#define SGEMM_DEFAULT_P 768
372
#define DGEMM_DEFAULT_P 768
373
#define ZGEMM_DEFAULT_P 384
374
#define CGEMM_DEFAULT_P 768
375
#else
376
#define SGEMM_DEFAULT_P 448
377
#define DGEMM_DEFAULT_P 480
378
#define ZGEMM_DEFAULT_P 112
379
#define CGEMM_DEFAULT_P 224
380
#endif
381
#define QGEMM_DEFAULT_P 112
382
#define XGEMM_DEFAULT_P  56
383
384
#if defined(ARCH_X86_64)
385
#define SGEMM_DEFAULT_Q 192
386
#define DGEMM_DEFAULT_Q 168
387
#define ZGEMM_DEFAULT_Q 168
388
#define CGEMM_DEFAULT_Q 168
389
#else
390
#define SGEMM_DEFAULT_Q 224
391
#define DGEMM_DEFAULT_Q 224
392
#define ZGEMM_DEFAULT_Q 224
393
#define CGEMM_DEFAULT_Q 224
394
#endif
395
#define QGEMM_DEFAULT_Q 224
396
#define XGEMM_DEFAULT_Q 224
397
398
#define CGEMM3M_DEFAULT_P 448
399
#define ZGEMM3M_DEFAULT_P 224
400
#define XGEMM3M_DEFAULT_P 112
401
#define CGEMM3M_DEFAULT_Q 224
402
#define ZGEMM3M_DEFAULT_Q 224
403
#define XGEMM3M_DEFAULT_Q 224
404
#define CGEMM3M_DEFAULT_R 12288
405
#define ZGEMM3M_DEFAULT_R 12288
406
#define XGEMM3M_DEFAULT_R 12288
407
408
#define SGEMM_DEFAULT_R 12288
409
#define QGEMM_DEFAULT_R qgemm_r
410
#define DGEMM_DEFAULT_R 12288
411
#define CGEMM_DEFAULT_R cgemm_r
412
#define ZGEMM_DEFAULT_R zgemm_r
413
#define XGEMM_DEFAULT_R xgemm_r
414
415
#define SYMV_P  16
416
#define HAVE_EXCLUSIVE_CACHE
417
418
#define GEMM_THREAD gemm_thread_mn
419
420
#endif
421
422
#ifdef STEAMROLLER
423
#define SNUMOPT         8
424
#define DNUMOPT         4
425
426
#define GEMM_DEFAULT_OFFSET_A  64
427
#define GEMM_DEFAULT_OFFSET_B 832
428
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
429
430
431
432
#define QGEMM_DEFAULT_UNROLL_N 2
433
#define CGEMM_DEFAULT_UNROLL_N 2
434
#define ZGEMM_DEFAULT_UNROLL_N 2
435
#define XGEMM_DEFAULT_UNROLL_N 1
436
437
#ifdef ARCH_X86
438
#define SGEMM_DEFAULT_UNROLL_N 4
439
#define DGEMM_DEFAULT_UNROLL_N 4
440
#define SGEMM_DEFAULT_UNROLL_M 4
441
#define DGEMM_DEFAULT_UNROLL_M 2
442
#define QGEMM_DEFAULT_UNROLL_M 2
443
#define CGEMM_DEFAULT_UNROLL_M 2
444
#define ZGEMM_DEFAULT_UNROLL_M 1
445
#define XGEMM_DEFAULT_UNROLL_M 1
446
#else
447
#define SGEMM_DEFAULT_UNROLL_N 2
448
#define DGEMM_DEFAULT_UNROLL_N 2
449
#define SGEMM_DEFAULT_UNROLL_M 16
450
#define DGEMM_DEFAULT_UNROLL_M 8
451
#define QGEMM_DEFAULT_UNROLL_M 2
452
#define CGEMM_DEFAULT_UNROLL_M 4
453
#define ZGEMM_DEFAULT_UNROLL_M 2
454
#define XGEMM_DEFAULT_UNROLL_M 1
455
#define CGEMM3M_DEFAULT_UNROLL_N 4
456
#define CGEMM3M_DEFAULT_UNROLL_M 8
457
#define ZGEMM3M_DEFAULT_UNROLL_N 4
458
#define ZGEMM3M_DEFAULT_UNROLL_M 4
459
#define GEMV_UNROLL 8
460
#endif
461
462
#if defined(ARCH_X86_64)
463
#define SGEMM_DEFAULT_P 768
464
#define DGEMM_DEFAULT_P 576
465
#define ZGEMM_DEFAULT_P 288
466
#define CGEMM_DEFAULT_P 576
467
#else
468
#define SGEMM_DEFAULT_P 448
469
#define DGEMM_DEFAULT_P 480
470
#define ZGEMM_DEFAULT_P 112
471
#define CGEMM_DEFAULT_P 224
472
#endif
473
#define QGEMM_DEFAULT_P 112
474
#define XGEMM_DEFAULT_P  56
475
476
#if defined(ARCH_X86_64)
477
#define SGEMM_DEFAULT_Q 192
478
#define DGEMM_DEFAULT_Q 160
479
#define ZGEMM_DEFAULT_Q 160
480
#define CGEMM_DEFAULT_Q 160
481
#else
482
#define SGEMM_DEFAULT_Q 224
483
#define DGEMM_DEFAULT_Q 224
484
#define ZGEMM_DEFAULT_Q 224
485
#define CGEMM_DEFAULT_Q 224
486
#endif
487
#define QGEMM_DEFAULT_Q 224
488
#define XGEMM_DEFAULT_Q 224
489
490
#define CGEMM3M_DEFAULT_P 448
491
#define ZGEMM3M_DEFAULT_P 224
492
#define XGEMM3M_DEFAULT_P 112
493
#define CGEMM3M_DEFAULT_Q 224
494
#define ZGEMM3M_DEFAULT_Q 224
495
#define XGEMM3M_DEFAULT_Q 224
496
#define CGEMM3M_DEFAULT_R 12288
497
#define ZGEMM3M_DEFAULT_R 12288
498
#define XGEMM3M_DEFAULT_R 12288
499
500
#define SGEMM_DEFAULT_R 12288
501
#define QGEMM_DEFAULT_R qgemm_r
502
#define DGEMM_DEFAULT_R 12288
503
#define CGEMM_DEFAULT_R cgemm_r
504
#define ZGEMM_DEFAULT_R zgemm_r
505
#define XGEMM_DEFAULT_R xgemm_r
506
507
#define SYMV_P  16
508
#define HAVE_EXCLUSIVE_CACHE
509
510
#define GEMM_THREAD gemm_thread_mn
511
512
#endif
513
514
515
#ifdef EXCAVATOR
516
#define SNUMOPT         8
517
#define DNUMOPT         4
518
519
#define GEMM_DEFAULT_OFFSET_A  64
520
#define GEMM_DEFAULT_OFFSET_B 832
521
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0fffUL
522
523
524
525
#define QGEMM_DEFAULT_UNROLL_N 2
526
#define CGEMM_DEFAULT_UNROLL_N 2
527
#define ZGEMM_DEFAULT_UNROLL_N 2
528
#define XGEMM_DEFAULT_UNROLL_N 1
529
530
#ifdef ARCH_X86
531
#define SGEMM_DEFAULT_UNROLL_N 4
532
#define DGEMM_DEFAULT_UNROLL_N 4
533
#define SGEMM_DEFAULT_UNROLL_M 4
534
#define DGEMM_DEFAULT_UNROLL_M 2
535
#define QGEMM_DEFAULT_UNROLL_M 2
536
#define CGEMM_DEFAULT_UNROLL_M 2
537
#define ZGEMM_DEFAULT_UNROLL_M 1
538
#define XGEMM_DEFAULT_UNROLL_M 1
539
#else
540
#define SGEMM_DEFAULT_UNROLL_N 2
541
#define DGEMM_DEFAULT_UNROLL_N 2
542
#define SGEMM_DEFAULT_UNROLL_M 16
543
#define DGEMM_DEFAULT_UNROLL_M 8
544
#define QGEMM_DEFAULT_UNROLL_M 2
545
#define CGEMM_DEFAULT_UNROLL_M 4
546
#define ZGEMM_DEFAULT_UNROLL_M 2
547
#define XGEMM_DEFAULT_UNROLL_M 1
548
#define CGEMM3M_DEFAULT_UNROLL_N 4
549
#define CGEMM3M_DEFAULT_UNROLL_M 8
550
#define ZGEMM3M_DEFAULT_UNROLL_N 4
551
#define ZGEMM3M_DEFAULT_UNROLL_M 4
552
#define GEMV_UNROLL 8
553
#endif
554
555
#if defined(ARCH_X86_64)
556
#define SGEMM_DEFAULT_P 768
557
#define DGEMM_DEFAULT_P 576
558
#define ZGEMM_DEFAULT_P 288
559
#define CGEMM_DEFAULT_P 576
560
#else
561
#define SGEMM_DEFAULT_P 448
562
#define DGEMM_DEFAULT_P 480
563
#define ZGEMM_DEFAULT_P 112
564
#define CGEMM_DEFAULT_P 224
565
#endif
566
#define QGEMM_DEFAULT_P 112
567
#define XGEMM_DEFAULT_P  56
568
569
#if defined(ARCH_X86_64)
570
#define SGEMM_DEFAULT_Q 192
571
#define DGEMM_DEFAULT_Q 160
572
#define ZGEMM_DEFAULT_Q 160
573
#define CGEMM_DEFAULT_Q 160
574
#else
575
#define SGEMM_DEFAULT_Q 224
576
#define DGEMM_DEFAULT_Q 224
577
#define ZGEMM_DEFAULT_Q 224
578
#define CGEMM_DEFAULT_Q 224
579
#endif
580
#define QGEMM_DEFAULT_Q 224
581
#define XGEMM_DEFAULT_Q 224
582
583
#define CGEMM3M_DEFAULT_P 448
584
#define ZGEMM3M_DEFAULT_P 224
585
#define XGEMM3M_DEFAULT_P 112
586
#define CGEMM3M_DEFAULT_Q 224
587
#define ZGEMM3M_DEFAULT_Q 224
588
#define XGEMM3M_DEFAULT_Q 224
589
#define CGEMM3M_DEFAULT_R 12288
590
#define ZGEMM3M_DEFAULT_R 12288
591
#define XGEMM3M_DEFAULT_R 12288
592
593
#define SGEMM_DEFAULT_R 12288
594
#define QGEMM_DEFAULT_R qgemm_r
595
#define DGEMM_DEFAULT_R 12288
596
#define CGEMM_DEFAULT_R cgemm_r
597
#define ZGEMM_DEFAULT_R zgemm_r
598
#define XGEMM_DEFAULT_R xgemm_r
599
600
#define SYMV_P  16
601
#define HAVE_EXCLUSIVE_CACHE
602
603
#define GEMM_THREAD gemm_thread_mn
604
605
#endif
606
607
#ifdef ZEN
608
#define SNUMOPT         16
609
#define DNUMOPT         8
610
611
#define GEMM_DEFAULT_OFFSET_A     0
612
#define GEMM_DEFAULT_OFFSET_B     0
613
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
614
615
#define SYMV_P  8
616
617
#if defined(XDOUBLE) || defined(DOUBLE)
618
#define SWITCH_RATIO            4
619
#define GEMM_PREFERED_SIZE      4
620
#else
621
#define SWITCH_RATIO            8
622
#define GEMM_PREFERED_SIZE      8
623
#endif
624
625
#ifdef ARCH_X86
626
627
#define SGEMM_DEFAULT_UNROLL_M 4
628
#define DGEMM_DEFAULT_UNROLL_M 2
629
#define QGEMM_DEFAULT_UNROLL_M 2
630
#define CGEMM_DEFAULT_UNROLL_M 2
631
#define ZGEMM_DEFAULT_UNROLL_M 1
632
#define XGEMM_DEFAULT_UNROLL_M 1
633
634
#define SGEMM_DEFAULT_UNROLL_N 4
635
#define DGEMM_DEFAULT_UNROLL_N 4
636
#define QGEMM_DEFAULT_UNROLL_N 2
637
#define CGEMM_DEFAULT_UNROLL_N 2
638
#define ZGEMM_DEFAULT_UNROLL_N 2
639
#define XGEMM_DEFAULT_UNROLL_N 1
640
641
#else
642
643
#define SGEMM_DEFAULT_UNROLL_M 8
644
#define DGEMM_DEFAULT_UNROLL_M 4
645
#define QGEMM_DEFAULT_UNROLL_M 2
646
#define CGEMM_DEFAULT_UNROLL_M 8
647
#define ZGEMM_DEFAULT_UNROLL_M 4
648
#define XGEMM_DEFAULT_UNROLL_M 1
649
650
#define SGEMM_DEFAULT_UNROLL_N 4
651
#define DGEMM_DEFAULT_UNROLL_N 8
652
#define QGEMM_DEFAULT_UNROLL_N 2
653
#define CGEMM_DEFAULT_UNROLL_N 2
654
#define ZGEMM_DEFAULT_UNROLL_N 2
655
#define XGEMM_DEFAULT_UNROLL_N 1
656
/*
657
#define SGEMM_DEFAULT_UNROLL_MN 32
658
#define DGEMM_DEFAULT_UNROLL_MN 32
659
*/
660
#endif
661
662
#ifdef ARCH_X86
663
664
#define SGEMM_DEFAULT_P 512
665
#define SGEMM_DEFAULT_R sgemm_r
666
#define DGEMM_DEFAULT_P 512
667
#define DGEMM_DEFAULT_R dgemm_r
668
#define QGEMM_DEFAULT_P 504
669
#define QGEMM_DEFAULT_R qgemm_r
670
#define CGEMM_DEFAULT_P 128
671
#define CGEMM_DEFAULT_R 1024
672
#define ZGEMM_DEFAULT_P 512
673
#define ZGEMM_DEFAULT_R zgemm_r
674
#define XGEMM_DEFAULT_P 252
675
#define XGEMM_DEFAULT_R xgemm_r
676
#define SGEMM_DEFAULT_Q 256
677
#define DGEMM_DEFAULT_Q 256
678
#define QGEMM_DEFAULT_Q 128
679
#define CGEMM_DEFAULT_Q 256
680
#define ZGEMM_DEFAULT_Q 192
681
#define XGEMM_DEFAULT_Q 128
682
683
#else
684
685
#define SGEMM_DEFAULT_P 320
686
#define DGEMM_DEFAULT_P 512
687
#define CGEMM_DEFAULT_P 256
688
#define ZGEMM_DEFAULT_P 192
689
690
#ifdef WINDOWS_ABI
691
#define SGEMM_DEFAULT_Q 320
692
#define DGEMM_DEFAULT_Q 128
693
#else
694
#define SGEMM_DEFAULT_Q 320
695
#define DGEMM_DEFAULT_Q 256
696
#endif
697
#define CGEMM_DEFAULT_Q 256
698
#define ZGEMM_DEFAULT_Q 192
699
700
#define SGEMM_DEFAULT_R sgemm_r
701
#define DGEMM_DEFAULT_R 13824
702
#define CGEMM_DEFAULT_R cgemm_r
703
#define ZGEMM_DEFAULT_R zgemm_r
704
705
#define QGEMM_DEFAULT_Q 128
706
#define QGEMM_DEFAULT_P 504
707
#define QGEMM_DEFAULT_R qgemm_r
708
#define XGEMM_DEFAULT_P 252
709
#define XGEMM_DEFAULT_R xgemm_r
710
#define XGEMM_DEFAULT_Q 128
711
712
#define CGEMM3M_DEFAULT_UNROLL_N 4
713
#define CGEMM3M_DEFAULT_UNROLL_M 8
714
#define ZGEMM3M_DEFAULT_UNROLL_N 4
715
#define ZGEMM3M_DEFAULT_UNROLL_M 4
716
717
#define CGEMM3M_DEFAULT_P 320
718
#define ZGEMM3M_DEFAULT_P 256
719
#define XGEMM3M_DEFAULT_P 112
720
#define CGEMM3M_DEFAULT_Q 320
721
#define ZGEMM3M_DEFAULT_Q 256
722
#define XGEMM3M_DEFAULT_Q 224
723
#define CGEMM3M_DEFAULT_R 12288
724
#define ZGEMM3M_DEFAULT_R 12288
725
#define XGEMM3M_DEFAULT_R 12288
726
727
#endif
728
729
#endif
730
731
#ifdef ATHLON
732
733
#define SNUMOPT   4
734
#define DNUMOPT   2
735
736
#define GEMM_DEFAULT_OFFSET_A   0
737
#define GEMM_DEFAULT_OFFSET_B 384
738
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
739
740
#define SGEMM_DEFAULT_UNROLL_N 4
741
#define DGEMM_DEFAULT_UNROLL_N 4
742
#define QGEMM_DEFAULT_UNROLL_N 2
743
#define CGEMM_DEFAULT_UNROLL_N 2
744
#define ZGEMM_DEFAULT_UNROLL_N 2
745
#define XGEMM_DEFAULT_UNROLL_N 1
746
747
#define SGEMM_DEFAULT_UNROLL_M 2
748
#define DGEMM_DEFAULT_UNROLL_M 1
749
#define QGEMM_DEFAULT_UNROLL_M 2
750
#define CGEMM_DEFAULT_UNROLL_M 1
751
#define ZGEMM_DEFAULT_UNROLL_M 1
752
#define XGEMM_DEFAULT_UNROLL_M 1
753
754
#define SGEMM_DEFAULT_R sgemm_r
755
#define DGEMM_DEFAULT_R dgemm_r
756
#define QGEMM_DEFAULT_R qgemm_r
757
#define CGEMM_DEFAULT_R cgemm_r
758
#define ZGEMM_DEFAULT_R zgemm_r
759
#define XGEMM_DEFAULT_R xgemm_r
760
761
#define SGEMM_DEFAULT_P 208
762
#define DGEMM_DEFAULT_P 104
763
#define QGEMM_DEFAULT_P  56
764
#define CGEMM_DEFAULT_P 104
765
#define ZGEMM_DEFAULT_P  56
766
#define XGEMM_DEFAULT_P  28
767
768
#define SGEMM_DEFAULT_Q 208
769
#define DGEMM_DEFAULT_Q 208
770
#define QGEMM_DEFAULT_Q 208
771
#define CGEMM_DEFAULT_Q 208
772
#define ZGEMM_DEFAULT_Q 208
773
#define XGEMM_DEFAULT_Q 208
774
775
#define SYMV_P  16
776
#define HAVE_EXCLUSIVE_CACHE
777
#endif
778
779
#ifdef VIAC3
780
781
#define SNUMOPT   2
782
#define DNUMOPT   1
783
784
#define GEMM_DEFAULT_OFFSET_A   0
785
#define GEMM_DEFAULT_OFFSET_B 256
786
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
787
788
#define SGEMM_DEFAULT_UNROLL_N 4
789
#define DGEMM_DEFAULT_UNROLL_N 4
790
#define QGEMM_DEFAULT_UNROLL_N 2
791
#define CGEMM_DEFAULT_UNROLL_N 2
792
#define ZGEMM_DEFAULT_UNROLL_N 2
793
#define XGEMM_DEFAULT_UNROLL_N 1
794
795
#define SGEMM_DEFAULT_UNROLL_M 2
796
#define DGEMM_DEFAULT_UNROLL_M 1
797
#define QGEMM_DEFAULT_UNROLL_M 2
798
#define CGEMM_DEFAULT_UNROLL_M 1
799
#define ZGEMM_DEFAULT_UNROLL_M 1
800
#define XGEMM_DEFAULT_UNROLL_M 1
801
802
#define SGEMM_DEFAULT_R sgemm_r
803
#define DGEMM_DEFAULT_R dgemm_r
804
#define QGEMM_DEFAULT_R qgemm_r
805
#define CGEMM_DEFAULT_R cgemm_r
806
#define ZGEMM_DEFAULT_R zgemm_r
807
#define XGEMM_DEFAULT_R xgemm_r
808
809
#define SGEMM_DEFAULT_P 128
810
#define DGEMM_DEFAULT_P 128
811
#define QGEMM_DEFAULT_P 128
812
#define CGEMM_DEFAULT_P 128
813
#define ZGEMM_DEFAULT_P 128
814
#define XGEMM_DEFAULT_P 128
815
816
#define SGEMM_DEFAULT_Q 512
817
#define DGEMM_DEFAULT_Q 256
818
#define QGEMM_DEFAULT_Q 256
819
#define CGEMM_DEFAULT_Q 256
820
#define ZGEMM_DEFAULT_Q 128
821
#define XGEMM_DEFAULT_Q 128
822
823
#define SYMV_P  16
824
#endif
825
826
#ifdef NANO
827
828
#define SNUMOPT   4
829
#define DNUMOPT   2
830
831
#define GEMM_DEFAULT_OFFSET_A  64
832
#define GEMM_DEFAULT_OFFSET_B 256
833
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x01ffffUL
834
835
#ifdef ARCH_X86
836
#define SGEMM_DEFAULT_UNROLL_N 4
837
#define DGEMM_DEFAULT_UNROLL_N 4
838
#define QGEMM_DEFAULT_UNROLL_N 2
839
#define CGEMM_DEFAULT_UNROLL_N 2
840
#define ZGEMM_DEFAULT_UNROLL_N 2
841
#define XGEMM_DEFAULT_UNROLL_N 1
842
843
#define SGEMM_DEFAULT_UNROLL_M 4
844
#define DGEMM_DEFAULT_UNROLL_M 2
845
#define QGEMM_DEFAULT_UNROLL_M 2
846
#define CGEMM_DEFAULT_UNROLL_M 2
847
#define ZGEMM_DEFAULT_UNROLL_M 1
848
#define XGEMM_DEFAULT_UNROLL_M 1
849
#else
850
#define SGEMM_DEFAULT_UNROLL_N 8
851
#define DGEMM_DEFAULT_UNROLL_N 4
852
#define QGEMM_DEFAULT_UNROLL_N 2
853
#define CGEMM_DEFAULT_UNROLL_N 4
854
#define ZGEMM_DEFAULT_UNROLL_N 2
855
#define XGEMM_DEFAULT_UNROLL_N 1
856
857
#define SGEMM_DEFAULT_UNROLL_M 4
858
#define DGEMM_DEFAULT_UNROLL_M 4
859
#define QGEMM_DEFAULT_UNROLL_M 2
860
#define CGEMM_DEFAULT_UNROLL_M 2
861
#define ZGEMM_DEFAULT_UNROLL_M 2
862
#define XGEMM_DEFAULT_UNROLL_M 1
863
#endif
864
865
#define SGEMM_DEFAULT_P 288
866
#define DGEMM_DEFAULT_P 288
867
#define QGEMM_DEFAULT_P 288
868
#define CGEMM_DEFAULT_P 288
869
#define ZGEMM_DEFAULT_P 288
870
#define XGEMM_DEFAULT_P 288
871
872
#define SGEMM_DEFAULT_R sgemm_r
873
#define DGEMM_DEFAULT_R dgemm_r
874
#define QGEMM_DEFAULT_R qgemm_r
875
#define CGEMM_DEFAULT_R cgemm_r
876
#define ZGEMM_DEFAULT_R zgemm_r
877
#define XGEMM_DEFAULT_R xgemm_r
878
879
#define SGEMM_DEFAULT_Q 256
880
#define DGEMM_DEFAULT_Q 128
881
#define QGEMM_DEFAULT_Q  64
882
#define CGEMM_DEFAULT_Q 128
883
#define ZGEMM_DEFAULT_Q  64
884
#define XGEMM_DEFAULT_Q  32
885
886
#define SYMV_P  16
887
#define HAVE_EXCLUSIVE_CACHE
888
889
#endif
890
891
#if defined(PENTIUM) || defined(PENTIUM2) || defined(PENTIUM3)
892
893
#ifdef HAVE_SSE
894
#define SNUMOPT   2
895
#else
896
#define SNUMOPT   1
897
#endif
898
#define DNUMOPT   1
899
900
#define GEMM_DEFAULT_OFFSET_A 0
901
#define GEMM_DEFAULT_OFFSET_B 0
902
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
903
904
#ifdef HAVE_SSE
905
#define SGEMM_DEFAULT_UNROLL_M 8
906
#define CGEMM_DEFAULT_UNROLL_M 4
907
#else
908
#define SGEMM_DEFAULT_UNROLL_M 4
909
#define CGEMM_DEFAULT_UNROLL_M 2
910
#endif
911
#define DGEMM_DEFAULT_UNROLL_M 2
912
#define SGEMM_DEFAULT_UNROLL_N 2
913
#define DGEMM_DEFAULT_UNROLL_N 2
914
#define QGEMM_DEFAULT_UNROLL_M 2
915
#define QGEMM_DEFAULT_UNROLL_N 2
916
#define CGEMM_DEFAULT_UNROLL_N 1
917
#define ZGEMM_DEFAULT_UNROLL_M 1
918
#define ZGEMM_DEFAULT_UNROLL_N 1
919
#define XGEMM_DEFAULT_UNROLL_M 1
920
#define XGEMM_DEFAULT_UNROLL_N 1
921
922
#define SGEMM_DEFAULT_P sgemm_p
923
#define SGEMM_DEFAULT_Q 256
924
#define SGEMM_DEFAULT_R sgemm_r
925
926
#define DGEMM_DEFAULT_P dgemm_p
927
#define DGEMM_DEFAULT_Q 256
928
#define DGEMM_DEFAULT_R dgemm_r
929
930
#define QGEMM_DEFAULT_P qgemm_p
931
#define QGEMM_DEFAULT_Q 256
932
#define QGEMM_DEFAULT_R qgemm_r
933
934
#define CGEMM_DEFAULT_P cgemm_p
935
#define CGEMM_DEFAULT_Q 256
936
#define CGEMM_DEFAULT_R cgemm_r
937
938
#define ZGEMM_DEFAULT_P zgemm_p
939
#define ZGEMM_DEFAULT_Q 256
940
#define ZGEMM_DEFAULT_R zgemm_r
941
942
#define XGEMM_DEFAULT_P xgemm_p
943
#define XGEMM_DEFAULT_Q 256
944
#define XGEMM_DEFAULT_R xgemm_r
945
946
#define SYMV_P  4
947
948
#endif
949
950
#ifdef PENTIUMM
951
952
#define SNUMOPT   2
953
#define DNUMOPT   1
954
955
#define GEMM_DEFAULT_OFFSET_A 0
956
#define GEMM_DEFAULT_OFFSET_B 0
957
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
958
959
#ifdef CORE_YONAH
960
#define SGEMM_DEFAULT_UNROLL_M 4
961
#define SGEMM_DEFAULT_UNROLL_N 4
962
#define DGEMM_DEFAULT_UNROLL_M 2
963
#define DGEMM_DEFAULT_UNROLL_N 4
964
#define QGEMM_DEFAULT_UNROLL_M 2
965
#define QGEMM_DEFAULT_UNROLL_N 2
966
#define CGEMM_DEFAULT_UNROLL_M 2
967
#define CGEMM_DEFAULT_UNROLL_N 2
968
#define ZGEMM_DEFAULT_UNROLL_M 1
969
#define ZGEMM_DEFAULT_UNROLL_N 2
970
#define XGEMM_DEFAULT_UNROLL_M 1
971
#define XGEMM_DEFAULT_UNROLL_N 1
972
#else
973
#define SGEMM_DEFAULT_UNROLL_M 8
974
#define SGEMM_DEFAULT_UNROLL_N 2
975
#define DGEMM_DEFAULT_UNROLL_M 2
976
#define DGEMM_DEFAULT_UNROLL_N 2
977
#define QGEMM_DEFAULT_UNROLL_M 2
978
#define QGEMM_DEFAULT_UNROLL_N 2
979
#define CGEMM_DEFAULT_UNROLL_M 4
980
#define CGEMM_DEFAULT_UNROLL_N 1
981
#define ZGEMM_DEFAULT_UNROLL_M 1
982
#define ZGEMM_DEFAULT_UNROLL_N 1
983
#define XGEMM_DEFAULT_UNROLL_M 1
984
#define XGEMM_DEFAULT_UNROLL_N 1
985
986
#endif
987
988
#define SGEMM_DEFAULT_P sgemm_p
989
#define SGEMM_DEFAULT_Q 256
990
#define SGEMM_DEFAULT_R sgemm_r
991
992
#define DGEMM_DEFAULT_P dgemm_p
993
#define DGEMM_DEFAULT_Q 256
994
#define DGEMM_DEFAULT_R dgemm_r
995
996
#define QGEMM_DEFAULT_P qgemm_p
997
#define QGEMM_DEFAULT_Q 256
998
#define QGEMM_DEFAULT_R qgemm_r
999
1000
#define CGEMM_DEFAULT_P cgemm_p
1001
#define CGEMM_DEFAULT_Q 256
1002
#define CGEMM_DEFAULT_R cgemm_r
1003
1004
#define ZGEMM_DEFAULT_P zgemm_p
1005
#define ZGEMM_DEFAULT_Q 256
1006
#define ZGEMM_DEFAULT_R zgemm_r
1007
1008
#define XGEMM_DEFAULT_P xgemm_p
1009
#define XGEMM_DEFAULT_Q 256
1010
#define XGEMM_DEFAULT_R xgemm_r
1011
1012
#define SYMV_P  4
1013
#endif
1014
1015
#ifdef CORE_NORTHWOOD
1016
1017
#define SNUMOPT   4
1018
#define DNUMOPT   2
1019
1020
#define GEMM_DEFAULT_OFFSET_A      0
1021
#define GEMM_DEFAULT_OFFSET_B     32
1022
1023
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
1024
1025
#define SYMV_P  8
1026
1027
#define SGEMM_DEFAULT_UNROLL_M 8
1028
#define DGEMM_DEFAULT_UNROLL_M 4
1029
#define QGEMM_DEFAULT_UNROLL_M 2
1030
#define CGEMM_DEFAULT_UNROLL_M 4
1031
#define ZGEMM_DEFAULT_UNROLL_M 2
1032
#define XGEMM_DEFAULT_UNROLL_M 1
1033
1034
#define SGEMM_DEFAULT_UNROLL_N 2
1035
#define DGEMM_DEFAULT_UNROLL_N 2
1036
#define QGEMM_DEFAULT_UNROLL_N 2
1037
#define CGEMM_DEFAULT_UNROLL_N 1
1038
#define ZGEMM_DEFAULT_UNROLL_N 1
1039
#define XGEMM_DEFAULT_UNROLL_N 1
1040
1041
#define SGEMM_DEFAULT_P sgemm_p
1042
#define SGEMM_DEFAULT_R sgemm_r
1043
1044
#define DGEMM_DEFAULT_P dgemm_p
1045
#define DGEMM_DEFAULT_R dgemm_r
1046
1047
#define QGEMM_DEFAULT_P qgemm_p
1048
#define QGEMM_DEFAULT_R qgemm_r
1049
1050
#define CGEMM_DEFAULT_P cgemm_p
1051
#define CGEMM_DEFAULT_R cgemm_r
1052
1053
#define ZGEMM_DEFAULT_P zgemm_p
1054
#define ZGEMM_DEFAULT_R zgemm_r
1055
1056
#define XGEMM_DEFAULT_P xgemm_p
1057
#define XGEMM_DEFAULT_R xgemm_r
1058
1059
#define SGEMM_DEFAULT_Q 128
1060
#define DGEMM_DEFAULT_Q 128
1061
#define QGEMM_DEFAULT_Q 128
1062
#define CGEMM_DEFAULT_Q 128
1063
#define ZGEMM_DEFAULT_Q 128
1064
#define XGEMM_DEFAULT_Q 128
1065
#endif
1066
1067
#ifdef CORE_PRESCOTT
1068
1069
#define SNUMOPT   4
1070
#define DNUMOPT   2
1071
1072
#ifndef __64BIT__
1073
#define GEMM_DEFAULT_OFFSET_A    128
1074
#define GEMM_DEFAULT_OFFSET_B    192
1075
#else
1076
#define GEMM_DEFAULT_OFFSET_A      0
1077
#define GEMM_DEFAULT_OFFSET_B    256
1078
#endif
1079
1080
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
1081
1082
#define SYMV_P  8
1083
1084
#ifdef ARCH_X86
1085
#define SGEMM_DEFAULT_UNROLL_M 4
1086
#define DGEMM_DEFAULT_UNROLL_M 2
1087
#define QGEMM_DEFAULT_UNROLL_M 2
1088
#define CGEMM_DEFAULT_UNROLL_M 2
1089
#define ZGEMM_DEFAULT_UNROLL_M 1
1090
#define XGEMM_DEFAULT_UNROLL_M 1
1091
#else
1092
#define SGEMM_DEFAULT_UNROLL_M 8
1093
#define DGEMM_DEFAULT_UNROLL_M 4
1094
#define QGEMM_DEFAULT_UNROLL_M 2
1095
#define CGEMM_DEFAULT_UNROLL_M 4
1096
#define ZGEMM_DEFAULT_UNROLL_M 2
1097
#define XGEMM_DEFAULT_UNROLL_M 1
1098
#endif
1099
1100
#define SGEMM_DEFAULT_UNROLL_N 4
1101
#define DGEMM_DEFAULT_UNROLL_N 4
1102
#define QGEMM_DEFAULT_UNROLL_N 2
1103
#define CGEMM_DEFAULT_UNROLL_N 2
1104
#define ZGEMM_DEFAULT_UNROLL_N 2
1105
#define XGEMM_DEFAULT_UNROLL_N 1
1106
1107
#define SGEMM_DEFAULT_P sgemm_p
1108
#define SGEMM_DEFAULT_R sgemm_r
1109
1110
#define DGEMM_DEFAULT_P dgemm_p
1111
#define DGEMM_DEFAULT_R dgemm_r
1112
1113
#define QGEMM_DEFAULT_P qgemm_p
1114
#define QGEMM_DEFAULT_R qgemm_r
1115
1116
#define CGEMM_DEFAULT_P cgemm_p
1117
#define CGEMM_DEFAULT_R cgemm_r
1118
1119
#define ZGEMM_DEFAULT_P zgemm_p
1120
#define ZGEMM_DEFAULT_R zgemm_r
1121
1122
#define XGEMM_DEFAULT_P xgemm_p
1123
#define XGEMM_DEFAULT_R xgemm_r
1124
1125
#define SGEMM_DEFAULT_Q 128
1126
#define DGEMM_DEFAULT_Q 128
1127
#define QGEMM_DEFAULT_Q 128
1128
#define CGEMM_DEFAULT_Q 128
1129
#define ZGEMM_DEFAULT_Q 128
1130
#define XGEMM_DEFAULT_Q 128
1131
#endif
1132
1133
#ifdef CORE2
1134
1135
#define SNUMOPT   8
1136
#define DNUMOPT   4
1137
1138
#define GEMM_DEFAULT_OFFSET_A    448
1139
#define GEMM_DEFAULT_OFFSET_B    128
1140
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1141
1142
#define SYMV_P  8
1143
1144
#define SWITCH_RATIO  4
1145
1146
#ifdef ARCH_X86
1147
#define SGEMM_DEFAULT_UNROLL_M 8
1148
#define DGEMM_DEFAULT_UNROLL_M 4
1149
#define QGEMM_DEFAULT_UNROLL_M 2
1150
#define CGEMM_DEFAULT_UNROLL_M 4
1151
#define ZGEMM_DEFAULT_UNROLL_M 2
1152
#define XGEMM_DEFAULT_UNROLL_M 1
1153
1154
#define SGEMM_DEFAULT_UNROLL_N 2
1155
#define DGEMM_DEFAULT_UNROLL_N 2
1156
#define QGEMM_DEFAULT_UNROLL_N 2
1157
#define CGEMM_DEFAULT_UNROLL_N 1
1158
#define ZGEMM_DEFAULT_UNROLL_N 1
1159
#define XGEMM_DEFAULT_UNROLL_N 1
1160
1161
#define MASK(a, b) ((((a) + (b) - 1) / (b)) * (b))
1162
1163
#else
1164
#define SGEMM_DEFAULT_UNROLL_M 8
1165
#define DGEMM_DEFAULT_UNROLL_M 4
1166
#define QGEMM_DEFAULT_UNROLL_M 2
1167
#define CGEMM_DEFAULT_UNROLL_M 4
1168
#define ZGEMM_DEFAULT_UNROLL_M 2
1169
#define XGEMM_DEFAULT_UNROLL_M 1
1170
1171
#define SGEMM_DEFAULT_UNROLL_N 4
1172
#define DGEMM_DEFAULT_UNROLL_N 4
1173
#define QGEMM_DEFAULT_UNROLL_N 2
1174
#define CGEMM_DEFAULT_UNROLL_N 2
1175
#define ZGEMM_DEFAULT_UNROLL_N 2
1176
#define XGEMM_DEFAULT_UNROLL_N 1
1177
#endif
1178
1179
#define SGEMM_DEFAULT_P sgemm_p
1180
#define SGEMM_DEFAULT_R sgemm_r
1181
1182
#define DGEMM_DEFAULT_P dgemm_p
1183
#define DGEMM_DEFAULT_R dgemm_r
1184
1185
#define QGEMM_DEFAULT_P qgemm_p
1186
#define QGEMM_DEFAULT_R qgemm_r
1187
1188
#define CGEMM_DEFAULT_P cgemm_p
1189
#define CGEMM_DEFAULT_R cgemm_r
1190
1191
#define ZGEMM_DEFAULT_P zgemm_p
1192
#define ZGEMM_DEFAULT_R zgemm_r
1193
1194
#define XGEMM_DEFAULT_P xgemm_p
1195
#define XGEMM_DEFAULT_R xgemm_r
1196
1197
#define SGEMM_DEFAULT_Q 256
1198
#define DGEMM_DEFAULT_Q 256
1199
#define QGEMM_DEFAULT_Q 256
1200
#define CGEMM_DEFAULT_Q 256
1201
#define ZGEMM_DEFAULT_Q 256
1202
#define XGEMM_DEFAULT_Q 256
1203
1204
#endif
1205
1206
#ifdef PENRYN
1207
1208
#define SNUMOPT   8
1209
#define DNUMOPT   4
1210
1211
#define GEMM_DEFAULT_OFFSET_A   128
1212
#define GEMM_DEFAULT_OFFSET_B     0
1213
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1214
1215
#define SYMV_P  8
1216
1217
#define SWITCH_RATIO  4
1218
1219
#ifdef ARCH_X86
1220
#define SGEMM_DEFAULT_UNROLL_M 4
1221
#define DGEMM_DEFAULT_UNROLL_M 2
1222
#define QGEMM_DEFAULT_UNROLL_M 2
1223
#define CGEMM_DEFAULT_UNROLL_M 2
1224
#define ZGEMM_DEFAULT_UNROLL_M 1
1225
#define XGEMM_DEFAULT_UNROLL_M 1
1226
1227
#define SGEMM_DEFAULT_UNROLL_N 4
1228
#define DGEMM_DEFAULT_UNROLL_N 4
1229
#define QGEMM_DEFAULT_UNROLL_N 2
1230
#define CGEMM_DEFAULT_UNROLL_N 2
1231
#define ZGEMM_DEFAULT_UNROLL_N 2
1232
#define XGEMM_DEFAULT_UNROLL_N 1
1233
#else
1234
#define SGEMM_DEFAULT_UNROLL_M 8
1235
#define DGEMM_DEFAULT_UNROLL_M 4
1236
#define QGEMM_DEFAULT_UNROLL_M 2
1237
#define CGEMM_DEFAULT_UNROLL_M 4
1238
#define ZGEMM_DEFAULT_UNROLL_M 2
1239
#define XGEMM_DEFAULT_UNROLL_M 1
1240
1241
#define SGEMM_DEFAULT_UNROLL_N 4
1242
#define DGEMM_DEFAULT_UNROLL_N 4
1243
#define QGEMM_DEFAULT_UNROLL_N 2
1244
#define CGEMM_DEFAULT_UNROLL_N 2
1245
#define ZGEMM_DEFAULT_UNROLL_N 2
1246
#define XGEMM_DEFAULT_UNROLL_N 1
1247
#endif
1248
1249
#define SGEMM_DEFAULT_P sgemm_p
1250
#define SGEMM_DEFAULT_R sgemm_r
1251
1252
#define DGEMM_DEFAULT_P dgemm_p
1253
#define DGEMM_DEFAULT_R dgemm_r
1254
1255
#define QGEMM_DEFAULT_P qgemm_p
1256
#define QGEMM_DEFAULT_R qgemm_r
1257
1258
#define CGEMM_DEFAULT_P cgemm_p
1259
#define CGEMM_DEFAULT_R cgemm_r
1260
1261
#define ZGEMM_DEFAULT_P zgemm_p
1262
#define ZGEMM_DEFAULT_R zgemm_r
1263
1264
#define XGEMM_DEFAULT_P xgemm_p
1265
#define XGEMM_DEFAULT_R xgemm_r
1266
1267
#define SGEMM_DEFAULT_Q 512
1268
#define DGEMM_DEFAULT_Q 256
1269
#define QGEMM_DEFAULT_Q 128
1270
#define CGEMM_DEFAULT_Q 512
1271
#define ZGEMM_DEFAULT_Q 256
1272
#define XGEMM_DEFAULT_Q 128
1273
1274
#define GETRF_FACTOR 0.75
1275
#endif
1276
1277
#ifdef DUNNINGTON
1278
1279
#define SNUMOPT   8
1280
#define DNUMOPT   4
1281
1282
#define GEMM_DEFAULT_OFFSET_A   128
1283
#define GEMM_DEFAULT_OFFSET_B     0
1284
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1285
1286
#define SYMV_P  8
1287
1288
#define SWITCH_RATIO  4
1289
1290
#ifdef ARCH_X86
1291
#define SGEMM_DEFAULT_UNROLL_M 4
1292
#define DGEMM_DEFAULT_UNROLL_M 2
1293
#define QGEMM_DEFAULT_UNROLL_M 2
1294
#define CGEMM_DEFAULT_UNROLL_M 2
1295
#define ZGEMM_DEFAULT_UNROLL_M 1
1296
#define XGEMM_DEFAULT_UNROLL_M 1
1297
1298
#define SGEMM_DEFAULT_UNROLL_N 4
1299
#define DGEMM_DEFAULT_UNROLL_N 4
1300
#define QGEMM_DEFAULT_UNROLL_N 2
1301
#define CGEMM_DEFAULT_UNROLL_N 2
1302
#define ZGEMM_DEFAULT_UNROLL_N 2
1303
#define XGEMM_DEFAULT_UNROLL_N 1
1304
#else
1305
#define SGEMM_DEFAULT_UNROLL_M 8
1306
#define DGEMM_DEFAULT_UNROLL_M 4
1307
#define QGEMM_DEFAULT_UNROLL_M 2
1308
#define CGEMM_DEFAULT_UNROLL_M 4
1309
#define ZGEMM_DEFAULT_UNROLL_M 2
1310
#define XGEMM_DEFAULT_UNROLL_M 1
1311
1312
#define SGEMM_DEFAULT_UNROLL_N 4
1313
#define DGEMM_DEFAULT_UNROLL_N 4
1314
#define QGEMM_DEFAULT_UNROLL_N 2
1315
#define CGEMM_DEFAULT_UNROLL_N 2
1316
#define ZGEMM_DEFAULT_UNROLL_N 2
1317
#define XGEMM_DEFAULT_UNROLL_N 1
1318
#endif
1319
1320
#define SGEMM_DEFAULT_P sgemm_p
1321
#define SGEMM_DEFAULT_R sgemm_r
1322
1323
#define DGEMM_DEFAULT_P dgemm_p
1324
#define DGEMM_DEFAULT_R dgemm_r
1325
1326
#define QGEMM_DEFAULT_P qgemm_p
1327
#define QGEMM_DEFAULT_R qgemm_r
1328
1329
#define CGEMM_DEFAULT_P cgemm_p
1330
#define CGEMM_DEFAULT_R cgemm_r
1331
1332
#define ZGEMM_DEFAULT_P zgemm_p
1333
#define ZGEMM_DEFAULT_R zgemm_r
1334
1335
#define XGEMM_DEFAULT_P xgemm_p
1336
#define XGEMM_DEFAULT_R xgemm_r
1337
1338
#define SGEMM_DEFAULT_Q 768
1339
#define DGEMM_DEFAULT_Q 384
1340
#define QGEMM_DEFAULT_Q 192
1341
#define CGEMM_DEFAULT_Q 768
1342
#define ZGEMM_DEFAULT_Q 384
1343
#define XGEMM_DEFAULT_Q 192
1344
1345
#define GETRF_FACTOR 0.75
1346
#define GEMM_THREAD gemm_thread_mn
1347
#endif
1348
1349
#ifdef NEHALEM
1350
1351
#define SNUMOPT   8
1352
#define DNUMOPT   4
1353
1354
#define GEMM_DEFAULT_OFFSET_A    32
1355
#define GEMM_DEFAULT_OFFSET_B     0
1356
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1357
1358
#define SYMV_P  8
1359
1360
#define SWITCH_RATIO  4
1361
1362
#ifdef ARCH_X86
1363
#define SGEMM_DEFAULT_UNROLL_M 4
1364
#define DGEMM_DEFAULT_UNROLL_M 2
1365
#define QGEMM_DEFAULT_UNROLL_M 2
1366
#define CGEMM_DEFAULT_UNROLL_M 2
1367
#define ZGEMM_DEFAULT_UNROLL_M 1
1368
#define XGEMM_DEFAULT_UNROLL_M 1
1369
1370
#define SGEMM_DEFAULT_UNROLL_N 4
1371
#define DGEMM_DEFAULT_UNROLL_N 4
1372
#define QGEMM_DEFAULT_UNROLL_N 2
1373
#define CGEMM_DEFAULT_UNROLL_N 2
1374
#define ZGEMM_DEFAULT_UNROLL_N 2
1375
#define XGEMM_DEFAULT_UNROLL_N 1
1376
#else
1377
#define SGEMM_DEFAULT_UNROLL_M 4
1378
#define DGEMM_DEFAULT_UNROLL_M 2
1379
#define QGEMM_DEFAULT_UNROLL_M 2
1380
#define CGEMM_DEFAULT_UNROLL_M 2
1381
#define ZGEMM_DEFAULT_UNROLL_M 1
1382
#define XGEMM_DEFAULT_UNROLL_M 1
1383
1384
#define SGEMM_DEFAULT_UNROLL_N 8
1385
#define DGEMM_DEFAULT_UNROLL_N 8
1386
#define QGEMM_DEFAULT_UNROLL_N 2
1387
#define CGEMM_DEFAULT_UNROLL_N 4
1388
#define ZGEMM_DEFAULT_UNROLL_N 4
1389
#define XGEMM_DEFAULT_UNROLL_N 1
1390
#endif
1391
1392
#define SGEMM_DEFAULT_P 504
1393
#define SGEMM_DEFAULT_R sgemm_r
1394
1395
#define DGEMM_DEFAULT_P 504
1396
#define DGEMM_DEFAULT_R dgemm_r
1397
1398
#define QGEMM_DEFAULT_P 504
1399
#define QGEMM_DEFAULT_R qgemm_r
1400
1401
#define CGEMM_DEFAULT_P 252
1402
#define CGEMM_DEFAULT_R cgemm_r
1403
1404
#define ZGEMM_DEFAULT_P 252
1405
#define ZGEMM_DEFAULT_R zgemm_r
1406
1407
#define XGEMM_DEFAULT_P 252
1408
#define XGEMM_DEFAULT_R xgemm_r
1409
1410
#define SGEMM_DEFAULT_Q 512
1411
#define DGEMM_DEFAULT_Q 256
1412
#define QGEMM_DEFAULT_Q 128
1413
#define CGEMM_DEFAULT_Q 512
1414
#define ZGEMM_DEFAULT_Q 256
1415
#define XGEMM_DEFAULT_Q 128
1416
1417
#define GETRF_FACTOR 0.72
1418
1419
#endif
1420
1421
1422
#ifdef SANDYBRIDGE
1423
1424
#define SNUMOPT   8
1425
#define DNUMOPT   4
1426
1427
#define GEMM_DEFAULT_OFFSET_A   0
1428
#define GEMM_DEFAULT_OFFSET_B     0
1429
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1430
1431
#define SYMV_P  8
1432
1433
#define SWITCH_RATIO  4
1434
1435
#ifdef ARCH_X86
1436
#define SGEMM_DEFAULT_UNROLL_M 4
1437
#define DGEMM_DEFAULT_UNROLL_M 2
1438
#define QGEMM_DEFAULT_UNROLL_M 2
1439
#define CGEMM_DEFAULT_UNROLL_M 2
1440
#define ZGEMM_DEFAULT_UNROLL_M 1
1441
#define XGEMM_DEFAULT_UNROLL_M 1
1442
1443
#define SGEMM_DEFAULT_UNROLL_N 4
1444
#define DGEMM_DEFAULT_UNROLL_N 4
1445
#define QGEMM_DEFAULT_UNROLL_N 2
1446
#define CGEMM_DEFAULT_UNROLL_N 2
1447
#define ZGEMM_DEFAULT_UNROLL_N 2
1448
#define XGEMM_DEFAULT_UNROLL_N 1
1449
#else
1450
#define SGEMM_DEFAULT_UNROLL_M 16
1451
#define DGEMM_DEFAULT_UNROLL_M 8
1452
#define QGEMM_DEFAULT_UNROLL_M 2
1453
#define CGEMM_DEFAULT_UNROLL_M 8
1454
#define ZGEMM_DEFAULT_UNROLL_M 1
1455
#define XGEMM_DEFAULT_UNROLL_M 1
1456
1457
#define SGEMM_DEFAULT_UNROLL_N 4
1458
#define DGEMM_DEFAULT_UNROLL_N 4
1459
#define QGEMM_DEFAULT_UNROLL_N 2
1460
#define CGEMM_DEFAULT_UNROLL_N 2
1461
#define ZGEMM_DEFAULT_UNROLL_N 4
1462
#define XGEMM_DEFAULT_UNROLL_N 1
1463
#endif
1464
1465
#define SGEMM_DEFAULT_P 768
1466
#define SGEMM_DEFAULT_R sgemm_r
1467
/*#define SGEMM_DEFAULT_R 1024*/
1468
1469
#define DGEMM_DEFAULT_P 512
1470
#define DGEMM_DEFAULT_R dgemm_r
1471
/*#define DGEMM_DEFAULT_R 1024*/
1472
1473
#define QGEMM_DEFAULT_P 504
1474
#define QGEMM_DEFAULT_R qgemm_r
1475
1476
#define CGEMM_DEFAULT_P 768
1477
#define CGEMM_DEFAULT_R cgemm_r
1478
/*#define CGEMM_DEFAULT_R 1024*/
1479
1480
#define ZGEMM_DEFAULT_P 512
1481
#define ZGEMM_DEFAULT_R zgemm_r
1482
/*#define ZGEMM_DEFAULT_R 1024*/
1483
1484
#define XGEMM_DEFAULT_P 252
1485
#define XGEMM_DEFAULT_R xgemm_r
1486
1487
#define SGEMM_DEFAULT_Q 384
1488
#define DGEMM_DEFAULT_Q 256
1489
#define QGEMM_DEFAULT_Q 128
1490
#define CGEMM_DEFAULT_Q 512
1491
#define ZGEMM_DEFAULT_Q 192
1492
#define XGEMM_DEFAULT_Q 128
1493
1494
#define CGEMM3M_DEFAULT_UNROLL_N 8
1495
#define CGEMM3M_DEFAULT_UNROLL_M 4
1496
#define ZGEMM3M_DEFAULT_UNROLL_N 8
1497
#define ZGEMM3M_DEFAULT_UNROLL_M 2
1498
1499
#define CGEMM3M_DEFAULT_P 448
1500
#define ZGEMM3M_DEFAULT_P 224
1501
#define XGEMM3M_DEFAULT_P 112
1502
#define CGEMM3M_DEFAULT_Q 224
1503
#define ZGEMM3M_DEFAULT_Q 224
1504
#define XGEMM3M_DEFAULT_Q 224
1505
#define CGEMM3M_DEFAULT_R 12288
1506
#define ZGEMM3M_DEFAULT_R 12288
1507
#define XGEMM3M_DEFAULT_R 12288
1508
1509
1510
1511
#define GETRF_FACTOR 0.72
1512
1513
#endif
1514
1515
#ifdef HASWELL
1516
1517
#define SNUMOPT         16
1518
#define DNUMOPT         8
1519
1520
32
#define GEMM_DEFAULT_OFFSET_A     0
1521
0
#define GEMM_DEFAULT_OFFSET_B     0
1522
64
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1523
1524
0
#define SYMV_P  8
1525
1526
#if defined(XDOUBLE) || defined(DOUBLE)
1527
0
#define SWITCH_RATIO            4
1528
0
#define GEMM_PREFERED_SIZE      4
1529
#else
1530
0
#define SWITCH_RATIO            8
1531
0
#define GEMM_PREFERED_SIZE      8
1532
#endif
1533
1534
#ifdef ARCH_X86
1535
1536
#define SGEMM_DEFAULT_UNROLL_M 4
1537
#define DGEMM_DEFAULT_UNROLL_M 2
1538
#define QGEMM_DEFAULT_UNROLL_M 2
1539
#define CGEMM_DEFAULT_UNROLL_M 2
1540
#define ZGEMM_DEFAULT_UNROLL_M 1
1541
#define XGEMM_DEFAULT_UNROLL_M 1
1542
1543
#define SGEMM_DEFAULT_UNROLL_N 4
1544
#define DGEMM_DEFAULT_UNROLL_N 4
1545
#define QGEMM_DEFAULT_UNROLL_N 2
1546
#define CGEMM_DEFAULT_UNROLL_N 2
1547
#define ZGEMM_DEFAULT_UNROLL_N 2
1548
#define XGEMM_DEFAULT_UNROLL_N 1
1549
1550
#else
1551
1552
24
#define SGEMM_DEFAULT_UNROLL_M 8
1553
24
#define DGEMM_DEFAULT_UNROLL_M 4
1554
#define QGEMM_DEFAULT_UNROLL_M 2
1555
24
#define CGEMM_DEFAULT_UNROLL_M 8
1556
24
#define ZGEMM_DEFAULT_UNROLL_M 4
1557
#define XGEMM_DEFAULT_UNROLL_M 1
1558
1559
0
#define SGEMM_DEFAULT_UNROLL_N 4
1560
0
#define DGEMM_DEFAULT_UNROLL_N 8
1561
#define QGEMM_DEFAULT_UNROLL_N 2
1562
#define CGEMM_DEFAULT_UNROLL_N 2
1563
#define ZGEMM_DEFAULT_UNROLL_N 2
1564
#define XGEMM_DEFAULT_UNROLL_N 1
1565
/*
1566
#define SGEMM_DEFAULT_UNROLL_MN 32
1567
#define DGEMM_DEFAULT_UNROLL_MN 32
1568
*/
1569
#endif
1570
1571
#ifdef ARCH_X86
1572
1573
#define SGEMM_DEFAULT_P 512
1574
#define SGEMM_DEFAULT_R sgemm_r
1575
#define DGEMM_DEFAULT_P 512
1576
#define DGEMM_DEFAULT_R dgemm_r
1577
#define QGEMM_DEFAULT_P 504
1578
#define QGEMM_DEFAULT_R qgemm_r
1579
#define CGEMM_DEFAULT_P 128
1580
#define CGEMM_DEFAULT_R 1024
1581
#define ZGEMM_DEFAULT_P 512
1582
#define ZGEMM_DEFAULT_R zgemm_r
1583
#define XGEMM_DEFAULT_P 252
1584
#define XGEMM_DEFAULT_R xgemm_r
1585
#define SGEMM_DEFAULT_Q 256
1586
#define DGEMM_DEFAULT_Q 256
1587
#define QGEMM_DEFAULT_Q 128
1588
#define CGEMM_DEFAULT_Q 256
1589
#define ZGEMM_DEFAULT_Q 192
1590
#define XGEMM_DEFAULT_Q 128
1591
1592
#else
1593
1594
8
#define SGEMM_DEFAULT_P 320
1595
8
#define DGEMM_DEFAULT_P 512
1596
8
#define CGEMM_DEFAULT_P 256
1597
8
#define ZGEMM_DEFAULT_P 192
1598
1599
#ifdef WINDOWS_ABI
1600
#define SGEMM_DEFAULT_Q 320
1601
#define DGEMM_DEFAULT_Q 128
1602
#else
1603
16
#define SGEMM_DEFAULT_Q 320
1604
16
#define DGEMM_DEFAULT_Q 256
1605
#endif
1606
16
#define CGEMM_DEFAULT_Q 256
1607
16
#define ZGEMM_DEFAULT_Q 192
1608
1609
0
#define SGEMM_DEFAULT_R sgemm_r
1610
0
#define DGEMM_DEFAULT_R 13824
1611
#define CGEMM_DEFAULT_R cgemm_r
1612
#define ZGEMM_DEFAULT_R zgemm_r
1613
1614
#define QGEMM_DEFAULT_Q 128
1615
#define QGEMM_DEFAULT_P 504
1616
#define QGEMM_DEFAULT_R qgemm_r
1617
#define XGEMM_DEFAULT_P 252
1618
#define XGEMM_DEFAULT_R xgemm_r
1619
#define XGEMM_DEFAULT_Q 128
1620
1621
#define CGEMM3M_DEFAULT_UNROLL_N 4
1622
#define CGEMM3M_DEFAULT_UNROLL_M 8
1623
#define ZGEMM3M_DEFAULT_UNROLL_N 4
1624
#define ZGEMM3M_DEFAULT_UNROLL_M 4
1625
1626
#define CGEMM3M_DEFAULT_P 320
1627
#define ZGEMM3M_DEFAULT_P 256
1628
#define XGEMM3M_DEFAULT_P 112
1629
#define CGEMM3M_DEFAULT_Q 320
1630
#define ZGEMM3M_DEFAULT_Q 256
1631
#define XGEMM3M_DEFAULT_Q 224
1632
#define CGEMM3M_DEFAULT_R 12288
1633
#define ZGEMM3M_DEFAULT_R 12288
1634
#define XGEMM3M_DEFAULT_R 12288
1635
1636
#endif
1637
1638
1639
#endif
1640
1641
#ifdef SKYLAKEX
1642
1643
#define SNUMOPT         16
1644
#define DNUMOPT         8
1645
1646
#define GEMM_DEFAULT_OFFSET_A     0
1647
#define GEMM_DEFAULT_OFFSET_B     0
1648
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
1649
1650
#define SYMV_P  8
1651
1652
#if defined(XDOUBLE) || defined(DOUBLE)
1653
#define SWITCH_RATIO           8
1654
#define GEMM_PREFERED_SIZE     8
1655
#else
1656
#define SWITCH_RATIO           16
1657
#define GEMM_PREFERED_SIZE     16
1658
#endif
1659
#define USE_SGEMM_KERNEL_DIRECT 1
1660
1661
#ifdef ARCH_X86
1662
1663
#define SGEMM_DEFAULT_UNROLL_M 4
1664
#define DGEMM_DEFAULT_UNROLL_M 2
1665
#define QGEMM_DEFAULT_UNROLL_M 2
1666
#define CGEMM_DEFAULT_UNROLL_M 2
1667
#define ZGEMM_DEFAULT_UNROLL_M 1
1668
#define XGEMM_DEFAULT_UNROLL_M 1
1669
1670
#define SGEMM_DEFAULT_UNROLL_N 4
1671
#define DGEMM_DEFAULT_UNROLL_N 4
1672
#define QGEMM_DEFAULT_UNROLL_N 2
1673
#define CGEMM_DEFAULT_UNROLL_N 2
1674
#define ZGEMM_DEFAULT_UNROLL_N 2
1675
#define XGEMM_DEFAULT_UNROLL_N 1
1676
1677
#else
1678
1679
#define SGEMM_DEFAULT_UNROLL_M 16
1680
#define DGEMM_DEFAULT_UNROLL_M 16
1681
#define QGEMM_DEFAULT_UNROLL_M 2
1682
#define CGEMM_DEFAULT_UNROLL_M 8
1683
#define ZGEMM_DEFAULT_UNROLL_M 4
1684
#define XGEMM_DEFAULT_UNROLL_M 1
1685
1686
#define SGEMM_DEFAULT_UNROLL_N 4
1687
#define DGEMM_DEFAULT_UNROLL_N 2
1688
#define QGEMM_DEFAULT_UNROLL_N 2
1689
#define CGEMM_DEFAULT_UNROLL_N 2
1690
#define ZGEMM_DEFAULT_UNROLL_N 2
1691
#define XGEMM_DEFAULT_UNROLL_N 1
1692
1693
#define SGEMM_DEFAULT_UNROLL_MN 32
1694
#define DGEMM_DEFAULT_UNROLL_MN 32
1695
#endif
1696
1697
#ifdef ARCH_X86
1698
1699
#define SGEMM_DEFAULT_P 512
1700
#define SGEMM_DEFAULT_R sgemm_r
1701
#define DGEMM_DEFAULT_P 512
1702
#define DGEMM_DEFAULT_R dgemm_r
1703
#define QGEMM_DEFAULT_P 504
1704
#define QGEMM_DEFAULT_R qgemm_r
1705
#define CGEMM_DEFAULT_P 128
1706
#define CGEMM_DEFAULT_R 1024
1707
#define ZGEMM_DEFAULT_P 512
1708
#define ZGEMM_DEFAULT_R zgemm_r
1709
#define XGEMM_DEFAULT_P 252
1710
#define XGEMM_DEFAULT_R xgemm_r
1711
#define SGEMM_DEFAULT_Q 256
1712
#define DGEMM_DEFAULT_Q 256
1713
#define QGEMM_DEFAULT_Q 128
1714
#define CGEMM_DEFAULT_Q 256
1715
#define ZGEMM_DEFAULT_Q 192
1716
#define XGEMM_DEFAULT_Q 128
1717
1718
#else
1719
1720
#define SGEMM_DEFAULT_P 448
1721
#define DGEMM_DEFAULT_P 192
1722
#define CGEMM_DEFAULT_P 384
1723
#define ZGEMM_DEFAULT_P 256
1724
1725
#define SGEMM_DEFAULT_Q 448
1726
#define DGEMM_DEFAULT_Q 384
1727
#define CGEMM_DEFAULT_Q 192
1728
#define ZGEMM_DEFAULT_Q 128
1729
1730
#define SGEMM_DEFAULT_R sgemm_r
1731
#define DGEMM_DEFAULT_R 8640
1732
#define CGEMM_DEFAULT_R cgemm_r
1733
#define ZGEMM_DEFAULT_R zgemm_r
1734
1735
#define QGEMM_DEFAULT_Q 128
1736
#define QGEMM_DEFAULT_P 504
1737
#define QGEMM_DEFAULT_R qgemm_r
1738
#define XGEMM_DEFAULT_P 252
1739
#define XGEMM_DEFAULT_R xgemm_r
1740
#define XGEMM_DEFAULT_Q 128
1741
1742
#define CGEMM3M_DEFAULT_UNROLL_N 4
1743
#define CGEMM3M_DEFAULT_UNROLL_M 8
1744
#define ZGEMM3M_DEFAULT_UNROLL_N 4
1745
#define ZGEMM3M_DEFAULT_UNROLL_M 4
1746
1747
#define CGEMM3M_DEFAULT_P 320
1748
#define ZGEMM3M_DEFAULT_P 256
1749
#define XGEMM3M_DEFAULT_P 112
1750
#define CGEMM3M_DEFAULT_Q 320
1751
#define ZGEMM3M_DEFAULT_Q 256
1752
#define XGEMM3M_DEFAULT_Q 224
1753
#define CGEMM3M_DEFAULT_R 12288
1754
#define ZGEMM3M_DEFAULT_R 12288
1755
#define XGEMM3M_DEFAULT_R 12288
1756
1757
#endif
1758
1759
1760
#endif
1761
1762
#ifdef SAPPHIRERAPIDS
1763
1764
#define SNUMOPT         16
1765
#define DNUMOPT         8
1766
1767
#define GEMM_DEFAULT_OFFSET_A     0
1768
#define GEMM_DEFAULT_OFFSET_B     0
1769
#define GEMM_DEFAULT_ALIGN 0x03fffUL
1770
1771
#define SYMV_P  8
1772
1773
#if defined(XDOUBLE) || defined(DOUBLE)
1774
#define SWITCH_RATIO           8
1775
#define GEMM_PREFERED_SIZE     8
1776
#else
1777
#define SWITCH_RATIO           16
1778
#define GEMM_PREFERED_SIZE     16
1779
#endif
1780
#define USE_SGEMM_KERNEL_DIRECT 1
1781
1782
#undef SBGEMM_DEFAULT_UNROLL_N
1783
#undef SBGEMM_DEFAULT_UNROLL_M
1784
#undef SBGEMM_DEFAULT_P
1785
#undef SBGEMM_DEFAULT_R
1786
#undef SBGEMM_DEFAULT_Q
1787
// FIXME: actually UNROLL_M = UNROLL_N = 16
1788
// If M and N is equal, OpenBLAS will reuse OCOPY as ICOPY.
1789
// But for AMX, they are not the same, set UNROLL_M = 32 to workaround
1790
#define SBGEMM_DEFAULT_UNROLL_N 16
1791
#define SBGEMM_DEFAULT_UNROLL_M 32
1792
#define SBGEMM_DEFAULT_P 256
1793
#define SBGEMM_DEFAULT_Q 1024
1794
#define SBGEMM_DEFAULT_R sbgemm_r
1795
1796
#ifdef ARCH_X86
1797
1798
#define SGEMM_DEFAULT_UNROLL_M 4
1799
#define DGEMM_DEFAULT_UNROLL_M 2
1800
#define QGEMM_DEFAULT_UNROLL_M 2
1801
#define CGEMM_DEFAULT_UNROLL_M 2
1802
#define ZGEMM_DEFAULT_UNROLL_M 1
1803
#define XGEMM_DEFAULT_UNROLL_M 1
1804
1805
#define SGEMM_DEFAULT_UNROLL_N 4
1806
#define DGEMM_DEFAULT_UNROLL_N 4
1807
#define QGEMM_DEFAULT_UNROLL_N 2
1808
#define CGEMM_DEFAULT_UNROLL_N 2
1809
#define ZGEMM_DEFAULT_UNROLL_N 2
1810
#define XGEMM_DEFAULT_UNROLL_N 1
1811
1812
#else
1813
1814
#define SGEMM_DEFAULT_UNROLL_M 16
1815
#define DGEMM_DEFAULT_UNROLL_M 16
1816
#define QGEMM_DEFAULT_UNROLL_M 2
1817
#define CGEMM_DEFAULT_UNROLL_M 8
1818
#define ZGEMM_DEFAULT_UNROLL_M 4
1819
#define XGEMM_DEFAULT_UNROLL_M 1
1820
1821
#define SGEMM_DEFAULT_UNROLL_N 4
1822
#define DGEMM_DEFAULT_UNROLL_N 2
1823
#define QGEMM_DEFAULT_UNROLL_N 2
1824
#define CGEMM_DEFAULT_UNROLL_N 2
1825
#define ZGEMM_DEFAULT_UNROLL_N 2
1826
#define XGEMM_DEFAULT_UNROLL_N 1
1827
1828
#define SGEMM_DEFAULT_UNROLL_MN 32
1829
#define DGEMM_DEFAULT_UNROLL_MN 32
1830
#endif
1831
1832
#ifdef ARCH_X86
1833
1834
#define SGEMM_DEFAULT_P 512
1835
#define SGEMM_DEFAULT_R sgemm_r
1836
#define DGEMM_DEFAULT_P 512
1837
#define DGEMM_DEFAULT_R dgemm_r
1838
#define QGEMM_DEFAULT_P 504
1839
#define QGEMM_DEFAULT_R qgemm_r
1840
#define CGEMM_DEFAULT_P 128
1841
#define CGEMM_DEFAULT_R 1024
1842
#define ZGEMM_DEFAULT_P 512
1843
#define ZGEMM_DEFAULT_R zgemm_r
1844
#define XGEMM_DEFAULT_P 252
1845
#define XGEMM_DEFAULT_R xgemm_r
1846
#define SGEMM_DEFAULT_Q 256
1847
#define DGEMM_DEFAULT_Q 256
1848
#define QGEMM_DEFAULT_Q 128
1849
#define CGEMM_DEFAULT_Q 256
1850
#define ZGEMM_DEFAULT_Q 192
1851
#define XGEMM_DEFAULT_Q 128
1852
1853
#else
1854
1855
#define SGEMM_DEFAULT_P 640
1856
#define DGEMM_DEFAULT_P 192
1857
#define CGEMM_DEFAULT_P 384
1858
#define ZGEMM_DEFAULT_P 256
1859
1860
#define SGEMM_DEFAULT_Q 320
1861
#define DGEMM_DEFAULT_Q 384
1862
#define CGEMM_DEFAULT_Q 192
1863
#define ZGEMM_DEFAULT_Q 128
1864
1865
#define SGEMM_DEFAULT_R sgemm_r
1866
#define DGEMM_DEFAULT_R 8640
1867
#define CGEMM_DEFAULT_R cgemm_r
1868
#define ZGEMM_DEFAULT_R zgemm_r
1869
1870
#define QGEMM_DEFAULT_Q 128
1871
#define QGEMM_DEFAULT_P 504
1872
#define QGEMM_DEFAULT_R qgemm_r
1873
#define XGEMM_DEFAULT_P 252
1874
#define XGEMM_DEFAULT_R xgemm_r
1875
#define XGEMM_DEFAULT_Q 128
1876
1877
#define CGEMM3M_DEFAULT_UNROLL_N 4
1878
#define CGEMM3M_DEFAULT_UNROLL_M 8
1879
#define ZGEMM3M_DEFAULT_UNROLL_N 4
1880
#define ZGEMM3M_DEFAULT_UNROLL_M 4
1881
1882
#define CGEMM3M_DEFAULT_P 320
1883
#define ZGEMM3M_DEFAULT_P 256
1884
#define XGEMM3M_DEFAULT_P 112
1885
#define CGEMM3M_DEFAULT_Q 320
1886
#define ZGEMM3M_DEFAULT_Q 256
1887
#define XGEMM3M_DEFAULT_Q 224
1888
#define CGEMM3M_DEFAULT_R 12288
1889
#define ZGEMM3M_DEFAULT_R 12288
1890
#define XGEMM3M_DEFAULT_R 12288
1891
1892
#endif
1893
#endif
1894
1895
#ifdef COOPERLAKE
1896
1897
#define SNUMOPT         16
1898
#define DNUMOPT         8
1899
1900
#define GEMM_DEFAULT_OFFSET_A     0
1901
#define GEMM_DEFAULT_OFFSET_B     0
1902
#define GEMM_DEFAULT_ALIGN 0x03fffUL
1903
1904
#define SYMV_P  8
1905
1906
#if defined(XDOUBLE) || defined(DOUBLE)
1907
#define SWITCH_RATIO           8
1908
#define GEMM_PREFERED_SIZE     8
1909
#else
1910
#define SWITCH_RATIO           16
1911
#define GEMM_PREFERED_SIZE     16
1912
#endif
1913
#define USE_SGEMM_KERNEL_DIRECT 1
1914
1915
#undef SBGEMM_DEFAULT_UNROLL_N
1916
#undef SBGEMM_DEFAULT_UNROLL_M
1917
#undef SBGEMM_DEFAULT_P
1918
#undef SBGEMM_DEFAULT_R
1919
#undef SBGEMM_DEFAULT_Q
1920
#define SBGEMM_DEFAULT_UNROLL_N 4
1921
#define SBGEMM_DEFAULT_UNROLL_M 16
1922
#define SBGEMM_DEFAULT_P 384
1923
#define SBGEMM_DEFAULT_Q 768
1924
#define SBGEMM_DEFAULT_R sbgemm_r
1925
1926
#ifdef ARCH_X86
1927
1928
#define SGEMM_DEFAULT_UNROLL_M 4
1929
#define DGEMM_DEFAULT_UNROLL_M 2
1930
#define QGEMM_DEFAULT_UNROLL_M 2
1931
#define CGEMM_DEFAULT_UNROLL_M 2
1932
#define ZGEMM_DEFAULT_UNROLL_M 1
1933
#define XGEMM_DEFAULT_UNROLL_M 1
1934
1935
#define SGEMM_DEFAULT_UNROLL_N 4
1936
#define DGEMM_DEFAULT_UNROLL_N 4
1937
#define QGEMM_DEFAULT_UNROLL_N 2
1938
#define CGEMM_DEFAULT_UNROLL_N 2
1939
#define ZGEMM_DEFAULT_UNROLL_N 2
1940
#define XGEMM_DEFAULT_UNROLL_N 1
1941
1942
#else
1943
1944
#define SGEMM_DEFAULT_UNROLL_M 16
1945
#define DGEMM_DEFAULT_UNROLL_M 16
1946
#define QGEMM_DEFAULT_UNROLL_M 2
1947
#define CGEMM_DEFAULT_UNROLL_M 8
1948
#define ZGEMM_DEFAULT_UNROLL_M 4
1949
#define XGEMM_DEFAULT_UNROLL_M 1
1950
1951
#define SGEMM_DEFAULT_UNROLL_N 4
1952
#define DGEMM_DEFAULT_UNROLL_N 2
1953
#define QGEMM_DEFAULT_UNROLL_N 2
1954
#define CGEMM_DEFAULT_UNROLL_N 2
1955
#define ZGEMM_DEFAULT_UNROLL_N 2
1956
#define XGEMM_DEFAULT_UNROLL_N 1
1957
1958
#define SGEMM_DEFAULT_UNROLL_MN 32
1959
#define DGEMM_DEFAULT_UNROLL_MN 32
1960
#endif
1961
1962
#ifdef ARCH_X86
1963
1964
#define SGEMM_DEFAULT_P 512
1965
#define SGEMM_DEFAULT_R sgemm_r
1966
#define DGEMM_DEFAULT_P 512
1967
#define DGEMM_DEFAULT_R dgemm_r
1968
#define QGEMM_DEFAULT_P 504
1969
#define QGEMM_DEFAULT_R qgemm_r
1970
#define CGEMM_DEFAULT_P 128
1971
#define CGEMM_DEFAULT_R 1024
1972
#define ZGEMM_DEFAULT_P 512
1973
#define ZGEMM_DEFAULT_R zgemm_r
1974
#define XGEMM_DEFAULT_P 252
1975
#define XGEMM_DEFAULT_R xgemm_r
1976
#define SGEMM_DEFAULT_Q 256
1977
#define DGEMM_DEFAULT_Q 256
1978
#define QGEMM_DEFAULT_Q 128
1979
#define CGEMM_DEFAULT_Q 256
1980
#define ZGEMM_DEFAULT_Q 192
1981
#define XGEMM_DEFAULT_Q 128
1982
1983
#else
1984
1985
#define SGEMM_DEFAULT_P 640
1986
#define DGEMM_DEFAULT_P 192
1987
#define CGEMM_DEFAULT_P 384
1988
#define ZGEMM_DEFAULT_P 256
1989
1990
#define SGEMM_DEFAULT_Q 320
1991
#define DGEMM_DEFAULT_Q 384
1992
#define CGEMM_DEFAULT_Q 192
1993
#define ZGEMM_DEFAULT_Q 128
1994
1995
#define SGEMM_DEFAULT_R sgemm_r
1996
#define DGEMM_DEFAULT_R 8640
1997
#define CGEMM_DEFAULT_R cgemm_r
1998
#define ZGEMM_DEFAULT_R zgemm_r
1999
2000
#define QGEMM_DEFAULT_Q 128
2001
#define QGEMM_DEFAULT_P 504
2002
#define QGEMM_DEFAULT_R qgemm_r
2003
#define XGEMM_DEFAULT_P 252
2004
#define XGEMM_DEFAULT_R xgemm_r
2005
#define XGEMM_DEFAULT_Q 128
2006
2007
#define CGEMM3M_DEFAULT_UNROLL_N 4
2008
#define CGEMM3M_DEFAULT_UNROLL_M 8
2009
#define ZGEMM3M_DEFAULT_UNROLL_N 4
2010
#define ZGEMM3M_DEFAULT_UNROLL_M 4
2011
2012
#define CGEMM3M_DEFAULT_P 320
2013
#define ZGEMM3M_DEFAULT_P 256
2014
#define XGEMM3M_DEFAULT_P 112
2015
#define CGEMM3M_DEFAULT_Q 320
2016
#define ZGEMM3M_DEFAULT_Q 256
2017
#define XGEMM3M_DEFAULT_Q 224
2018
#define CGEMM3M_DEFAULT_R 12288
2019
#define ZGEMM3M_DEFAULT_R 12288
2020
#define XGEMM3M_DEFAULT_R 12288
2021
2022
#endif
2023
#endif
2024
2025
2026
#ifdef ATOM
2027
2028
#define SNUMOPT   2
2029
#define DNUMOPT   1
2030
2031
#define GEMM_DEFAULT_OFFSET_A     64
2032
#define GEMM_DEFAULT_OFFSET_B      0
2033
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
2034
2035
#define SYMV_P  8
2036
2037
#ifdef ARCH_X86
2038
#define SGEMM_DEFAULT_UNROLL_M 4
2039
#define DGEMM_DEFAULT_UNROLL_M 2
2040
#define QGEMM_DEFAULT_UNROLL_M 2
2041
#define CGEMM_DEFAULT_UNROLL_M 2
2042
#define ZGEMM_DEFAULT_UNROLL_M 1
2043
#define XGEMM_DEFAULT_UNROLL_M 1
2044
#else
2045
#define SGEMM_DEFAULT_UNROLL_M 8
2046
#define DGEMM_DEFAULT_UNROLL_M 4
2047
#define QGEMM_DEFAULT_UNROLL_M 2
2048
#define CGEMM_DEFAULT_UNROLL_M 4
2049
#define ZGEMM_DEFAULT_UNROLL_M 2
2050
#define XGEMM_DEFAULT_UNROLL_M 1
2051
#endif
2052
2053
#define SGEMM_DEFAULT_UNROLL_N 4
2054
#define DGEMM_DEFAULT_UNROLL_N 2
2055
#define QGEMM_DEFAULT_UNROLL_N 2
2056
#define CGEMM_DEFAULT_UNROLL_N 2
2057
#define ZGEMM_DEFAULT_UNROLL_N 1
2058
#define XGEMM_DEFAULT_UNROLL_N 1
2059
2060
#define SGEMM_DEFAULT_P sgemm_p
2061
#define SGEMM_DEFAULT_R sgemm_r
2062
2063
#define DGEMM_DEFAULT_P dgemm_p
2064
#define DGEMM_DEFAULT_R dgemm_r
2065
2066
#define QGEMM_DEFAULT_P qgemm_p
2067
#define QGEMM_DEFAULT_R qgemm_r
2068
2069
#define CGEMM_DEFAULT_P cgemm_p
2070
#define CGEMM_DEFAULT_R cgemm_r
2071
2072
#define ZGEMM_DEFAULT_P zgemm_p
2073
#define ZGEMM_DEFAULT_R zgemm_r
2074
2075
#define XGEMM_DEFAULT_P xgemm_p
2076
#define XGEMM_DEFAULT_R xgemm_r
2077
2078
#define SGEMM_DEFAULT_Q 256
2079
#define DGEMM_DEFAULT_Q 256
2080
#define QGEMM_DEFAULT_Q 256
2081
#define CGEMM_DEFAULT_Q 256
2082
#define ZGEMM_DEFAULT_Q 256
2083
#define XGEMM_DEFAULT_Q 256
2084
2085
#endif
2086
2087
2088
#ifdef ITANIUM2
2089
2090
#define SNUMOPT   4
2091
#define DNUMOPT   4
2092
2093
#define GEMM_DEFAULT_OFFSET_A 0
2094
#define GEMM_DEFAULT_OFFSET_B 128
2095
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2096
2097
#define SGEMM_DEFAULT_UNROLL_M 8
2098
#define SGEMM_DEFAULT_UNROLL_N 8
2099
#define DGEMM_DEFAULT_UNROLL_M 8
2100
#define DGEMM_DEFAULT_UNROLL_N 8
2101
#define QGEMM_DEFAULT_UNROLL_M 8
2102
#define QGEMM_DEFAULT_UNROLL_N 8
2103
#define CGEMM_DEFAULT_UNROLL_M 4
2104
#define CGEMM_DEFAULT_UNROLL_N 4
2105
#define ZGEMM_DEFAULT_UNROLL_M 4
2106
#define ZGEMM_DEFAULT_UNROLL_N 4
2107
#define XGEMM_DEFAULT_UNROLL_M 4
2108
#define XGEMM_DEFAULT_UNROLL_N 4
2109
2110
#define SGEMM_DEFAULT_P sgemm_p
2111
#define DGEMM_DEFAULT_P dgemm_p
2112
#define QGEMM_DEFAULT_P qgemm_p
2113
#define CGEMM_DEFAULT_P cgemm_p
2114
#define ZGEMM_DEFAULT_P zgemm_p
2115
#define XGEMM_DEFAULT_P xgemm_p
2116
2117
#define SGEMM_DEFAULT_Q 1024
2118
#define DGEMM_DEFAULT_Q 1024
2119
#define QGEMM_DEFAULT_Q 1024
2120
#define CGEMM_DEFAULT_Q 1024
2121
#define ZGEMM_DEFAULT_Q 1024
2122
#define XGEMM_DEFAULT_Q 1024
2123
2124
#define SGEMM_DEFAULT_R sgemm_r
2125
#define DGEMM_DEFAULT_R dgemm_r
2126
#define QGEMM_DEFAULT_R qgemm_r
2127
#define CGEMM_DEFAULT_R cgemm_r
2128
#define ZGEMM_DEFAULT_R zgemm_r
2129
#define XGEMM_DEFAULT_R xgemm_r
2130
2131
#define SYMV_P   16
2132
2133
#define GETRF_FACTOR 0.65
2134
2135
#endif
2136
2137
#if defined(EV4) || defined(EV5) || defined(EV6)
2138
2139
#ifdef EV4
2140
#define SNUMOPT   1
2141
#define DNUMOPT   1
2142
#else
2143
#define SNUMOPT   2
2144
#define DNUMOPT   2
2145
#endif
2146
2147
#define GEMM_DEFAULT_OFFSET_A 512
2148
#define GEMM_DEFAULT_OFFSET_B 512
2149
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
2150
2151
#define SGEMM_DEFAULT_UNROLL_M 4
2152
#define SGEMM_DEFAULT_UNROLL_N 4
2153
#define DGEMM_DEFAULT_UNROLL_M 4
2154
#define DGEMM_DEFAULT_UNROLL_N 4
2155
#define CGEMM_DEFAULT_UNROLL_M 2
2156
#define CGEMM_DEFAULT_UNROLL_N 2
2157
#define ZGEMM_DEFAULT_UNROLL_M 2
2158
#define ZGEMM_DEFAULT_UNROLL_N 2
2159
2160
#define SYMV_P   8
2161
2162
#ifdef EV4
2163
#define SGEMM_DEFAULT_P  32
2164
#define SGEMM_DEFAULT_Q 112
2165
#define SGEMM_DEFAULT_R 256
2166
2167
#define DGEMM_DEFAULT_P  32
2168
#define DGEMM_DEFAULT_Q  56
2169
#define DGEMM_DEFAULT_R 256
2170
2171
#define CGEMM_DEFAULT_P  32
2172
#define CGEMM_DEFAULT_Q  64
2173
#define CGEMM_DEFAULT_R 240
2174
2175
#define ZGEMM_DEFAULT_P  32
2176
#define ZGEMM_DEFAULT_Q  32
2177
#define ZGEMM_DEFAULT_R 240
2178
#endif
2179
2180
#ifdef EV5
2181
#define SGEMM_DEFAULT_P  64
2182
#define SGEMM_DEFAULT_Q 256
2183
2184
#define DGEMM_DEFAULT_P  64
2185
#define DGEMM_DEFAULT_Q 128
2186
2187
#define CGEMM_DEFAULT_P  64
2188
#define CGEMM_DEFAULT_Q 128
2189
2190
#define ZGEMM_DEFAULT_P  64
2191
#define ZGEMM_DEFAULT_Q  64
2192
#endif
2193
2194
#ifdef EV6
2195
#define SGEMM_DEFAULT_P 256
2196
#define SGEMM_DEFAULT_Q 512
2197
2198
#define DGEMM_DEFAULT_P 256
2199
#define DGEMM_DEFAULT_Q 256
2200
2201
#define CGEMM_DEFAULT_P 256
2202
#define CGEMM_DEFAULT_Q 256
2203
2204
#define ZGEMM_DEFAULT_P 128
2205
#define ZGEMM_DEFAULT_Q 256
2206
#endif
2207
2208
#endif
2209
2210
#ifdef CELL
2211
2212
#define SNUMOPT   2
2213
#define DNUMOPT   2
2214
2215
#define GEMM_DEFAULT_OFFSET_A 0
2216
#define GEMM_DEFAULT_OFFSET_B 8192
2217
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
2218
2219
#define SGEMM_DEFAULT_UNROLL_M 16
2220
#define SGEMM_DEFAULT_UNROLL_N 4
2221
#define DGEMM_DEFAULT_UNROLL_M 4
2222
#define DGEMM_DEFAULT_UNROLL_N 4
2223
#define CGEMM_DEFAULT_UNROLL_M 8
2224
#define CGEMM_DEFAULT_UNROLL_N 2
2225
#define ZGEMM_DEFAULT_UNROLL_M 2
2226
#define ZGEMM_DEFAULT_UNROLL_N 2
2227
2228
#define SGEMM_DEFAULT_P 128
2229
#define DGEMM_DEFAULT_P 128
2230
#define CGEMM_DEFAULT_P 128
2231
#define ZGEMM_DEFAULT_P 128
2232
2233
#define SGEMM_DEFAULT_Q  512
2234
#define DGEMM_DEFAULT_Q  256
2235
#define CGEMM_DEFAULT_Q  256
2236
#define ZGEMM_DEFAULT_Q  128
2237
2238
#define SYMV_P   4
2239
#endif
2240
2241
#ifdef PPCG4
2242
#define GEMM_DEFAULT_OFFSET_A    0
2243
#define GEMM_DEFAULT_OFFSET_B 1024
2244
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
2245
2246
#define SGEMM_DEFAULT_UNROLL_M 4
2247
#define SGEMM_DEFAULT_UNROLL_N 4
2248
#define DGEMM_DEFAULT_UNROLL_M 4
2249
#define DGEMM_DEFAULT_UNROLL_N 4
2250
#define CGEMM_DEFAULT_UNROLL_M 2
2251
#define CGEMM_DEFAULT_UNROLL_N 2
2252
#define ZGEMM_DEFAULT_UNROLL_M 2
2253
#define ZGEMM_DEFAULT_UNROLL_N 2
2254
2255
#define SGEMM_DEFAULT_P 256
2256
#define DGEMM_DEFAULT_P 128
2257
#define CGEMM_DEFAULT_P 128
2258
#define ZGEMM_DEFAULT_P  64
2259
2260
#define SGEMM_DEFAULT_Q 256
2261
#define DGEMM_DEFAULT_Q 256
2262
#define CGEMM_DEFAULT_Q 256
2263
#define ZGEMM_DEFAULT_Q 256
2264
2265
#define SYMV_P   4
2266
#endif
2267
2268
#ifdef PPC970
2269
2270
#define SNUMOPT   4
2271
#define DNUMOPT   4
2272
2273
#define GEMM_DEFAULT_OFFSET_A 2688
2274
#define GEMM_DEFAULT_OFFSET_B 3072
2275
#define GEMM_DEFAULT_ALIGN 0x03fffUL
2276
2277
#if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2278
#define SGEMM_DEFAULT_UNROLL_M 4
2279
#else
2280
#define SGEMM_DEFAULT_UNROLL_M 16
2281
#endif
2282
#define SGEMM_DEFAULT_UNROLL_N 4
2283
#define DGEMM_DEFAULT_UNROLL_M 4
2284
#define DGEMM_DEFAULT_UNROLL_N 4
2285
#if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2286
#define CGEMM_DEFAULT_UNROLL_M 2
2287
#else
2288
#define CGEMM_DEFAULT_UNROLL_M 8
2289
#endif
2290
#define CGEMM_DEFAULT_UNROLL_N 2
2291
#define ZGEMM_DEFAULT_UNROLL_M 2
2292
#define ZGEMM_DEFAULT_UNROLL_N 2
2293
2294
#if defined(OS_LINUX) || defined(OS_DARWIN) || defined(OS_FREEBSD)
2295
#if L2_SIZE == 1024976
2296
#define SGEMM_DEFAULT_P 320
2297
#define DGEMM_DEFAULT_P 256
2298
#define CGEMM_DEFAULT_P 256
2299
#define ZGEMM_DEFAULT_P 256
2300
#else
2301
#define SGEMM_DEFAULT_P 176
2302
#define DGEMM_DEFAULT_P 176
2303
#define CGEMM_DEFAULT_P 176
2304
#define ZGEMM_DEFAULT_P 176
2305
#endif
2306
#endif
2307
2308
#define SGEMM_DEFAULT_Q 512
2309
#define DGEMM_DEFAULT_Q 256
2310
#define CGEMM_DEFAULT_Q 256
2311
#define ZGEMM_DEFAULT_Q 128
2312
2313
#define SYMV_P   4
2314
2315
#endif
2316
2317
#ifdef PPC440
2318
2319
#define SNUMOPT   2
2320
#define DNUMOPT   2
2321
2322
#define GEMM_DEFAULT_OFFSET_A (32 * 0)
2323
#define GEMM_DEFAULT_OFFSET_B (32 * 0)
2324
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
2325
2326
#define SGEMM_DEFAULT_UNROLL_M 4
2327
#define SGEMM_DEFAULT_UNROLL_N 4
2328
#define DGEMM_DEFAULT_UNROLL_M 4
2329
#define DGEMM_DEFAULT_UNROLL_N 4
2330
#define CGEMM_DEFAULT_UNROLL_M 2
2331
#define CGEMM_DEFAULT_UNROLL_N 2
2332
#define ZGEMM_DEFAULT_UNROLL_M 2
2333
#define ZGEMM_DEFAULT_UNROLL_N 2
2334
2335
#define SGEMM_DEFAULT_P 512
2336
#define DGEMM_DEFAULT_P 512
2337
#define CGEMM_DEFAULT_P 512
2338
#define ZGEMM_DEFAULT_P 512
2339
2340
#define SGEMM_DEFAULT_Q 1024
2341
#define DGEMM_DEFAULT_Q  512
2342
#define CGEMM_DEFAULT_Q  512
2343
#define ZGEMM_DEFAULT_Q  256
2344
2345
#define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
2346
#define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
2347
#define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
2348
#define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
2349
2350
#define SYMV_P   4
2351
#endif
2352
2353
#ifdef PPC440FP2
2354
2355
#define SNUMOPT   4
2356
#define DNUMOPT   4
2357
2358
#define GEMM_DEFAULT_OFFSET_A (32 * 0)
2359
#define GEMM_DEFAULT_OFFSET_B (32 * 0)
2360
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
2361
2362
#define SGEMM_DEFAULT_UNROLL_M 8
2363
#define SGEMM_DEFAULT_UNROLL_N 4
2364
#define DGEMM_DEFAULT_UNROLL_M 8
2365
#define DGEMM_DEFAULT_UNROLL_N 4
2366
#define CGEMM_DEFAULT_UNROLL_M 4
2367
#define CGEMM_DEFAULT_UNROLL_N 2
2368
#define ZGEMM_DEFAULT_UNROLL_M 4
2369
#define ZGEMM_DEFAULT_UNROLL_N 2
2370
2371
#define SGEMM_DEFAULT_P 128
2372
#define DGEMM_DEFAULT_P 128
2373
#define CGEMM_DEFAULT_P 128
2374
#define ZGEMM_DEFAULT_P 128
2375
#if 1
2376
#define SGEMM_DEFAULT_Q 4096
2377
#define DGEMM_DEFAULT_Q 3072
2378
#define CGEMM_DEFAULT_Q 2048
2379
#define ZGEMM_DEFAULT_Q 1024
2380
#else
2381
#define SGEMM_DEFAULT_Q  512
2382
#define DGEMM_DEFAULT_Q  256
2383
#define CGEMM_DEFAULT_Q  256
2384
#define ZGEMM_DEFAULT_Q  128
2385
#endif
2386
2387
#define SYMV_P   4
2388
#endif
2389
2390
2391
2392
#if defined(POWER3) || defined(POWER4) || defined(POWER5)
2393
#define GEMM_DEFAULT_OFFSET_A 0
2394
#define GEMM_DEFAULT_OFFSET_B 2048
2395
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
2396
2397
#define SGEMM_DEFAULT_UNROLL_M 4
2398
#define SGEMM_DEFAULT_UNROLL_N 4
2399
#define DGEMM_DEFAULT_UNROLL_M 4
2400
#define DGEMM_DEFAULT_UNROLL_N 4
2401
#define CGEMM_DEFAULT_UNROLL_M 2
2402
#define CGEMM_DEFAULT_UNROLL_N 2
2403
#define ZGEMM_DEFAULT_UNROLL_M 2
2404
#define ZGEMM_DEFAULT_UNROLL_N 2
2405
2406
#ifdef POWER3
2407
2408
#define SNUMOPT   4
2409
#define DNUMOPT   4
2410
2411
#define SGEMM_DEFAULT_P 256
2412
#define SGEMM_DEFAULT_Q 432
2413
#define SGEMM_DEFAULT_R 1012
2414
2415
#define DGEMM_DEFAULT_P 256
2416
#define DGEMM_DEFAULT_Q 216
2417
#define DGEMM_DEFAULT_R 1012
2418
2419
#define CGEMM_DEFAULT_P 256
2420
#define CGEMM_DEFAULT_Q 104
2421
#define CGEMM_DEFAULT_R 1012
2422
   
2423
#define ZGEMM_DEFAULT_P 256
2424
#define ZGEMM_DEFAULT_Q 104
2425
#define ZGEMM_DEFAULT_R 1012
2426
#endif
2427
2428
#if defined(POWER4)
2429
#ifdef ALLOC_HUGETLB
2430
#define SGEMM_DEFAULT_P 184
2431
#define DGEMM_DEFAULT_P 184
2432
#define CGEMM_DEFAULT_P 184
2433
#define ZGEMM_DEFAULT_P 184
2434
#else
2435
#define SGEMM_DEFAULT_P 144
2436
#define DGEMM_DEFAULT_P 144
2437
#define CGEMM_DEFAULT_P 144
2438
#define ZGEMM_DEFAULT_P 144
2439
#endif
2440
2441
#define SGEMM_DEFAULT_Q 256
2442
#define CGEMM_DEFAULT_Q 256
2443
#define DGEMM_DEFAULT_Q 256
2444
#define ZGEMM_DEFAULT_Q 256
2445
#endif
2446
2447
#if defined(POWER5)
2448
#ifdef ALLOC_HUGETLB
2449
#define SGEMM_DEFAULT_P 512
2450
#define DGEMM_DEFAULT_P 256
2451
#define CGEMM_DEFAULT_P 256
2452
#define ZGEMM_DEFAULT_P 128
2453
#else
2454
#define SGEMM_DEFAULT_P 320
2455
#define DGEMM_DEFAULT_P 160
2456
#define CGEMM_DEFAULT_P 160
2457
#define ZGEMM_DEFAULT_P  80
2458
#endif
2459
2460
#define SGEMM_DEFAULT_Q 256
2461
#define CGEMM_DEFAULT_Q 256
2462
#define DGEMM_DEFAULT_Q 256
2463
#define ZGEMM_DEFAULT_Q 256
2464
#endif
2465
2466
#define SYMV_P   8
2467
2468
#endif
2469
2470
#if defined(POWER6)
2471
2472
#define SNUMOPT   4
2473
#define DNUMOPT   4
2474
2475
#define GEMM_DEFAULT_OFFSET_A  384
2476
#define GEMM_DEFAULT_OFFSET_B 1024
2477
#define GEMM_DEFAULT_ALIGN 0x03fffUL
2478
2479
#define SGEMM_DEFAULT_UNROLL_M 4
2480
#define SGEMM_DEFAULT_UNROLL_N 4
2481
#define DGEMM_DEFAULT_UNROLL_M 4
2482
#define DGEMM_DEFAULT_UNROLL_N 4
2483
#define CGEMM_DEFAULT_UNROLL_M 2
2484
#define CGEMM_DEFAULT_UNROLL_N 4
2485
#define ZGEMM_DEFAULT_UNROLL_M 2
2486
#define ZGEMM_DEFAULT_UNROLL_N 4
2487
2488
#define SGEMM_DEFAULT_P  992
2489
#define DGEMM_DEFAULT_P  480
2490
#define CGEMM_DEFAULT_P  488
2491
#define ZGEMM_DEFAULT_P  248
2492
2493
#define SGEMM_DEFAULT_Q  504
2494
#define DGEMM_DEFAULT_Q  504
2495
#define CGEMM_DEFAULT_Q  400
2496
#define ZGEMM_DEFAULT_Q  400
2497
2498
#define SYMV_P   8
2499
2500
#endif
2501
2502
#if defined(POWER8) || (defined(POWER9) && defined(OS_AIX))
2503
2504
#define SNUMOPT   16
2505
#define DNUMOPT   8
2506
2507
#define GEMM_DEFAULT_OFFSET_A 0 
2508
#define GEMM_DEFAULT_OFFSET_B 65536
2509
2510
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
2511
#if defined(__32BIT__)
2512
#warning using BINARY32==POWER6
2513
#define SGEMM_DEFAULT_UNROLL_M 4
2514
#define SGEMM_DEFAULT_UNROLL_N 4
2515
#define DGEMM_DEFAULT_UNROLL_M 4
2516
#define DGEMM_DEFAULT_UNROLL_N 4
2517
#define CGEMM_DEFAULT_UNROLL_M 2
2518
#define CGEMM_DEFAULT_UNROLL_N 4
2519
#define ZGEMM_DEFAULT_UNROLL_M 2
2520
#define ZGEMM_DEFAULT_UNROLL_N 4
2521
#else
2522
#define SGEMM_DEFAULT_UNROLL_M 16
2523
#define SGEMM_DEFAULT_UNROLL_N 8
2524
#define DGEMM_DEFAULT_UNROLL_M 16
2525
#define DGEMM_DEFAULT_UNROLL_N 4
2526
#define CGEMM_DEFAULT_UNROLL_M 8
2527
#define CGEMM_DEFAULT_UNROLL_N 4
2528
#define ZGEMM_DEFAULT_UNROLL_M 8
2529
#define ZGEMM_DEFAULT_UNROLL_N 2
2530
#endif
2531
#define SGEMM_DEFAULT_P  1280UL
2532
#define DGEMM_DEFAULT_P  640UL
2533
#define CGEMM_DEFAULT_P  640UL
2534
#define ZGEMM_DEFAULT_P  320UL
2535
2536
#define SGEMM_DEFAULT_Q  640UL
2537
#define DGEMM_DEFAULT_Q  720UL
2538
#define CGEMM_DEFAULT_Q  640UL
2539
#define ZGEMM_DEFAULT_Q  640UL
2540
2541
#if 0
2542
#define SGEMM_DEFAULT_R SGEMM_DEFAULT_P
2543
#define DGEMM_DEFAULT_R DGEMM_DEFAULT_P
2544
#define CGEMM_DEFAULT_R CGEMM_DEFAULT_P
2545
#define ZGEMM_DEFAULT_R ZGEMM_DEFAULT_P
2546
#endif
2547
#define SGEMM_DEFAULT_R 4096
2548
#define DGEMM_DEFAULT_R 4096
2549
#define CGEMM_DEFAULT_R 4096
2550
#define ZGEMM_DEFAULT_R 4096
2551
2552
#define SYMV_P   8
2553
2554
#endif
2555
2556
#if defined(POWER9) && (defined(OS_LINUX) || defined(OS_FREEBSD))
2557
2558
#define SNUMOPT   16
2559
#define DNUMOPT   8
2560
2561
#define GEMM_DEFAULT_OFFSET_A 0 
2562
#define GEMM_DEFAULT_OFFSET_B 65536
2563
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
2564
2565
#define SWITCH_RATIO            16
2566
#define GEMM_PREFERED_SIZE      16
2567
2568
#define SGEMM_DEFAULT_UNROLL_M 16
2569
#define SGEMM_DEFAULT_UNROLL_N 8
2570
#define DGEMM_DEFAULT_UNROLL_M 16
2571
#define DGEMM_DEFAULT_UNROLL_N 4
2572
#define CGEMM_DEFAULT_UNROLL_M 8
2573
#define CGEMM_DEFAULT_UNROLL_N 4
2574
#define ZGEMM_DEFAULT_UNROLL_M 8
2575
#define ZGEMM_DEFAULT_UNROLL_N 2
2576
2577
#define SGEMM_DEFAULT_P 832
2578
#define DGEMM_DEFAULT_P  128
2579
#define CGEMM_DEFAULT_P  512
2580
#define ZGEMM_DEFAULT_P 256
2581
2582
#define SGEMM_DEFAULT_Q 1026
2583
#define DGEMM_DEFAULT_Q  384
2584
#define CGEMM_DEFAULT_Q  1026
2585
#define ZGEMM_DEFAULT_Q 1026
2586
2587
#define SGEMM_DEFAULT_R 4096
2588
#define DGEMM_DEFAULT_R 4096
2589
#define CGEMM_DEFAULT_R 4096
2590
#define ZGEMM_DEFAULT_R 4096
2591
2592
#define SYMV_P   8
2593
2594
#endif
2595
2596
#if defined(POWER10)
2597
#define SNUMOPT   16
2598
#define DNUMOPT   8
2599
2600
#define GEMM_DEFAULT_OFFSET_A 0
2601
#define GEMM_DEFAULT_OFFSET_B 65536
2602
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
2603
2604
#define SWITCH_RATIO            16
2605
#define GEMM_PREFERED_SIZE      16
2606
2607
#define SGEMM_DEFAULT_UNROLL_M 16
2608
#define SGEMM_DEFAULT_UNROLL_N 8
2609
#define DGEMM_DEFAULT_UNROLL_M 8
2610
#define DGEMM_DEFAULT_UNROLL_N 8
2611
#define CGEMM_DEFAULT_UNROLL_M 8
2612
#define CGEMM_DEFAULT_UNROLL_N 4
2613
#define ZGEMM_DEFAULT_UNROLL_M 8
2614
#define ZGEMM_DEFAULT_UNROLL_N 2
2615
2616
#define SGEMM_DEFAULT_P 512
2617
#define DGEMM_DEFAULT_P 384
2618
#define CGEMM_DEFAULT_P 512
2619
#define ZGEMM_DEFAULT_P 256
2620
2621
#define SGEMM_DEFAULT_Q 512
2622
#define DGEMM_DEFAULT_Q 512
2623
#define CGEMM_DEFAULT_Q 384
2624
#define ZGEMM_DEFAULT_Q 384 
2625
2626
#define SGEMM_DEFAULT_R 4096
2627
#define DGEMM_DEFAULT_R 4096
2628
#define CGEMM_DEFAULT_R 4096
2629
#define ZGEMM_DEFAULT_R 4096
2630
2631
#define SYMV_P   8
2632
2633
#undef SBGEMM_DEFAULT_UNROLL_N
2634
#undef SBGEMM_DEFAULT_UNROLL_M
2635
#undef SBGEMM_DEFAULT_P
2636
#undef SBGEMM_DEFAULT_R
2637
#undef SBGEMM_DEFAULT_Q
2638
#define SBGEMM_DEFAULT_UNROLL_M 16
2639
#define SBGEMM_DEFAULT_UNROLL_N 8
2640
#define SBGEMM_DEFAULT_P 512
2641
#define SBGEMM_DEFAULT_Q 1024
2642
#define SBGEMM_DEFAULT_R 4096
2643
#endif
2644
2645
#if defined(SPARC) && defined(V7)
2646
2647
#define SNUMOPT   4
2648
#define DNUMOPT   4
2649
2650
#define GEMM_DEFAULT_OFFSET_A 0
2651
#define GEMM_DEFAULT_OFFSET_B 2048
2652
#define GEMM_DEFAULT_ALIGN 0x03fffUL
2653
2654
#define SGEMM_DEFAULT_UNROLL_M 2
2655
#define SGEMM_DEFAULT_UNROLL_N 8
2656
#define DGEMM_DEFAULT_UNROLL_M 2
2657
#define DGEMM_DEFAULT_UNROLL_N 8
2658
#define CGEMM_DEFAULT_UNROLL_M 1
2659
#define CGEMM_DEFAULT_UNROLL_N 4
2660
#define ZGEMM_DEFAULT_UNROLL_M 1
2661
#define ZGEMM_DEFAULT_UNROLL_N 4
2662
2663
#define SGEMM_DEFAULT_P  256
2664
#define DGEMM_DEFAULT_P  256
2665
#define CGEMM_DEFAULT_P  256
2666
#define ZGEMM_DEFAULT_P  256
2667
2668
#define SGEMM_DEFAULT_Q  512
2669
#define DGEMM_DEFAULT_Q  256
2670
#define CGEMM_DEFAULT_Q  256
2671
#define ZGEMM_DEFAULT_Q  128
2672
2673
#define SYMV_P   8
2674
#define GEMM_THREAD gemm_thread_mn
2675
#endif
2676
2677
#if (defined(SPARC) && defined(V9)) || defined(__sparc_v9__)
2678
2679
#define SNUMOPT   2
2680
#define DNUMOPT   2
2681
2682
#define GEMM_DEFAULT_OFFSET_A 0
2683
#define GEMM_DEFAULT_OFFSET_B 2048
2684
#define GEMM_DEFAULT_ALIGN 0x03fffUL
2685
2686
#define SGEMM_DEFAULT_UNROLL_M 4
2687
#define SGEMM_DEFAULT_UNROLL_N 4
2688
#define DGEMM_DEFAULT_UNROLL_M 4
2689
#define DGEMM_DEFAULT_UNROLL_N 4
2690
#define CGEMM_DEFAULT_UNROLL_M 2
2691
#define CGEMM_DEFAULT_UNROLL_N 2
2692
#define ZGEMM_DEFAULT_UNROLL_M 2
2693
#define ZGEMM_DEFAULT_UNROLL_N 2
2694
2695
#define SGEMM_DEFAULT_P  512
2696
#define DGEMM_DEFAULT_P  512
2697
#define CGEMM_DEFAULT_P  512
2698
#define ZGEMM_DEFAULT_P  512
2699
2700
#define SGEMM_DEFAULT_Q 1024
2701
#define DGEMM_DEFAULT_Q  512
2702
#define CGEMM_DEFAULT_Q  512
2703
#define ZGEMM_DEFAULT_Q  256
2704
2705
#define SYMV_P   8
2706
#endif
2707
2708
#ifdef SICORTEX
2709
2710
#define SNUMOPT   2
2711
#define DNUMOPT   2
2712
2713
#define GEMM_DEFAULT_OFFSET_A 0
2714
#define GEMM_DEFAULT_OFFSET_B 0
2715
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2716
2717
#define SGEMM_DEFAULT_UNROLL_M  2
2718
#define SGEMM_DEFAULT_UNROLL_N  8
2719
#define DGEMM_DEFAULT_UNROLL_M  2
2720
#define DGEMM_DEFAULT_UNROLL_N  8
2721
#define CGEMM_DEFAULT_UNROLL_M  1
2722
#define CGEMM_DEFAULT_UNROLL_N  4
2723
#define ZGEMM_DEFAULT_UNROLL_M  1
2724
#define ZGEMM_DEFAULT_UNROLL_N  4
2725
2726
#define SGEMM_DEFAULT_P 108
2727
#define DGEMM_DEFAULT_P 112
2728
#define CGEMM_DEFAULT_P 108
2729
#define ZGEMM_DEFAULT_P 112
2730
2731
#define SGEMM_DEFAULT_Q 288
2732
#define DGEMM_DEFAULT_Q 144
2733
#define CGEMM_DEFAULT_Q 144
2734
#define ZGEMM_DEFAULT_Q  72
2735
2736
#define SGEMM_DEFAULT_R 2000
2737
#define DGEMM_DEFAULT_R 2000
2738
#define CGEMM_DEFAULT_R 2000
2739
#define ZGEMM_DEFAULT_R 2000
2740
2741
#define SYMV_P  16
2742
#endif
2743
2744
#if defined(LOONGSON3R4)
2745
#define SNUMOPT   2
2746
#define DNUMOPT   2
2747
2748
#define GEMM_DEFAULT_OFFSET_A 0
2749
#define GEMM_DEFAULT_OFFSET_B 0
2750
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2751
2752
#if defined(NO_MSA)
2753
#define SGEMM_DEFAULT_UNROLL_M  8
2754
#define SGEMM_DEFAULT_UNROLL_N  4
2755
2756
#define DGEMM_DEFAULT_UNROLL_M  4
2757
#define DGEMM_DEFAULT_UNROLL_N  4
2758
2759
#define CGEMM_DEFAULT_UNROLL_M  4
2760
#define CGEMM_DEFAULT_UNROLL_N  2
2761
2762
#define ZGEMM_DEFAULT_UNROLL_M  2
2763
#define ZGEMM_DEFAULT_UNROLL_N  2
2764
#else
2765
#define SGEMM_DEFAULT_UNROLL_M  8
2766
#define SGEMM_DEFAULT_UNROLL_N  8
2767
2768
#define DGEMM_DEFAULT_UNROLL_M  8
2769
#define DGEMM_DEFAULT_UNROLL_N  4
2770
2771
#define CGEMM_DEFAULT_UNROLL_M  8
2772
#define CGEMM_DEFAULT_UNROLL_N  4
2773
2774
#define ZGEMM_DEFAULT_UNROLL_M  4
2775
#define ZGEMM_DEFAULT_UNROLL_N  4
2776
#endif
2777
2778
#define SGEMM_DEFAULT_P 64
2779
#define DGEMM_DEFAULT_P 44
2780
#define CGEMM_DEFAULT_P 64
2781
#define ZGEMM_DEFAULT_P 32
2782
2783
#define SGEMM_DEFAULT_Q 192
2784
#define DGEMM_DEFAULT_Q 92
2785
#define CGEMM_DEFAULT_Q 128
2786
#define ZGEMM_DEFAULT_Q 80
2787
2788
#define SGEMM_DEFAULT_R 640
2789
#define DGEMM_DEFAULT_R dgemm_r
2790
#define CGEMM_DEFAULT_R 640
2791
#define ZGEMM_DEFAULT_R 640
2792
2793
#define GEMM_OFFSET_A1  0x10000
2794
#define GEMM_OFFSET_B1  0x100000
2795
2796
#define SYMV_P  16
2797
#endif
2798
2799
#if defined(LOONGSON3R3)
2800
////Copy from SICORTEX
2801
#define SNUMOPT   2
2802
#define DNUMOPT   2
2803
2804
#define GEMM_DEFAULT_OFFSET_A 0
2805
#define GEMM_DEFAULT_OFFSET_B 0
2806
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2807
2808
#define SGEMM_DEFAULT_UNROLL_M  8
2809
#define SGEMM_DEFAULT_UNROLL_N  4
2810
2811
#define DGEMM_DEFAULT_UNROLL_M  4
2812
#define DGEMM_DEFAULT_UNROLL_N  4
2813
2814
#define CGEMM_DEFAULT_UNROLL_M  4
2815
#define CGEMM_DEFAULT_UNROLL_N  2
2816
2817
#define ZGEMM_DEFAULT_UNROLL_M  2
2818
#define ZGEMM_DEFAULT_UNROLL_N  2
2819
2820
#define SGEMM_DEFAULT_P 64
2821
#define DGEMM_DEFAULT_P 44
2822
#define CGEMM_DEFAULT_P 64
2823
#define ZGEMM_DEFAULT_P 32
2824
2825
#define SGEMM_DEFAULT_Q 192
2826
#define DGEMM_DEFAULT_Q 92
2827
#define CGEMM_DEFAULT_Q 128
2828
#define ZGEMM_DEFAULT_Q 80
2829
2830
#define SGEMM_DEFAULT_R 640
2831
#define DGEMM_DEFAULT_R dgemm_r
2832
#define CGEMM_DEFAULT_R 640
2833
#define ZGEMM_DEFAULT_R 640
2834
2835
#define GEMM_OFFSET_A1  0x10000
2836
#define GEMM_OFFSET_B1  0x100000
2837
2838
#define SYMV_P  16
2839
#endif
2840
2841
#if defined (LA464)
2842
#define SNUMOPT         2
2843
#define DNUMOPT         2
2844
2845
#define GEMM_DEFAULT_OFFSET_A 0x20000
2846
#define GEMM_DEFAULT_OFFSET_B 0
2847
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
2848
2849
#if defined(NO_LASX)
2850
#define DGEMM_DEFAULT_UNROLL_N 8
2851
#define DGEMM_DEFAULT_UNROLL_M 2
2852
#define SGEMM_DEFAULT_UNROLL_N 8
2853
#define SGEMM_DEFAULT_UNROLL_M 2
2854
#define CGEMM_DEFAULT_UNROLL_N 4
2855
#define CGEMM_DEFAULT_UNROLL_M 1
2856
#define ZGEMM_DEFAULT_UNROLL_N 4
2857
#define ZGEMM_DEFAULT_UNROLL_M 1
2858
#else
2859
#define DGEMM_DEFAULT_UNROLL_N 6
2860
#define DGEMM_DEFAULT_UNROLL_M 16
2861
#define SGEMM_DEFAULT_UNROLL_N 8
2862
#define SGEMM_DEFAULT_UNROLL_M 16
2863
#define CGEMM_DEFAULT_UNROLL_N 4
2864
#define CGEMM_DEFAULT_UNROLL_M 16
2865
#define ZGEMM_DEFAULT_UNROLL_N 4
2866
#define ZGEMM_DEFAULT_UNROLL_M 8
2867
#define DGEMM_DEFAULT_UNROLL_MN 96
2868
#endif
2869
2870
#define QGEMM_DEFAULT_UNROLL_N 2
2871
#define XGEMM_DEFAULT_UNROLL_N 1
2872
2873
#define QGEMM_DEFAULT_UNROLL_M 2
2874
#define XGEMM_DEFAULT_UNROLL_M 1
2875
2876
#define SGEMM_DEFAULT_P sgemm_p
2877
#define DGEMM_DEFAULT_P dgemm_p
2878
#define CGEMM_DEFAULT_P 128
2879
#define ZGEMM_DEFAULT_P zgemm_p
2880
2881
#define SGEMM_DEFAULT_R sgemm_r
2882
#define DGEMM_DEFAULT_R dgemm_r
2883
#define CGEMM_DEFAULT_R 4096
2884
#define ZGEMM_DEFAULT_R zgemm_r
2885
2886
#define SGEMM_DEFAULT_Q sgemm_q
2887
#define DGEMM_DEFAULT_Q dgemm_q
2888
#define CGEMM_DEFAULT_Q 128
2889
#define ZGEMM_DEFAULT_Q zgemm_q
2890
2891
#define SYMV_P  16
2892
#endif
2893
2894
#ifdef LA264
2895
#define GEMM_DEFAULT_OFFSET_A 0
2896
#define GEMM_DEFAULT_OFFSET_B 0
2897
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2898
2899
#define SGEMM_DEFAULT_UNROLL_M  2
2900
#define SGEMM_DEFAULT_UNROLL_N  8
2901
2902
#define DGEMM_DEFAULT_UNROLL_M  8
2903
#define DGEMM_DEFAULT_UNROLL_N  4
2904
2905
#define CGEMM_DEFAULT_UNROLL_M  8
2906
#define CGEMM_DEFAULT_UNROLL_N  4
2907
2908
#define ZGEMM_DEFAULT_UNROLL_M  4
2909
#define ZGEMM_DEFAULT_UNROLL_N  4
2910
2911
#define SGEMM_DEFAULT_P 128
2912
#define DGEMM_DEFAULT_P 128
2913
#define CGEMM_DEFAULT_P 96
2914
#define ZGEMM_DEFAULT_P 64
2915
2916
#define SGEMM_DEFAULT_Q 240
2917
#define DGEMM_DEFAULT_Q 120
2918
#define CGEMM_DEFAULT_Q 120
2919
#define ZGEMM_DEFAULT_Q 120
2920
2921
#define SGEMM_DEFAULT_R 12288
2922
#define DGEMM_DEFAULT_R 8192
2923
#define CGEMM_DEFAULT_R 4096
2924
#define ZGEMM_DEFAULT_R 4096
2925
2926
#define SYMV_P  16
2927
#endif
2928
2929
#ifdef LA64_GENERIC
2930
#define GEMM_DEFAULT_OFFSET_A 0
2931
#define GEMM_DEFAULT_OFFSET_B 0
2932
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
2933
2934
#define SGEMM_DEFAULT_UNROLL_M  2
2935
#define SGEMM_DEFAULT_UNROLL_N  8
2936
2937
#define DGEMM_DEFAULT_UNROLL_M  2
2938
#define DGEMM_DEFAULT_UNROLL_N  8
2939
2940
#define CGEMM_DEFAULT_UNROLL_M  1
2941
#define CGEMM_DEFAULT_UNROLL_N  4
2942
2943
#define ZGEMM_DEFAULT_UNROLL_M  1
2944
#define ZGEMM_DEFAULT_UNROLL_N  4
2945
2946
#define SGEMM_DEFAULT_P 128
2947
#define DGEMM_DEFAULT_P 128
2948
#define CGEMM_DEFAULT_P 96
2949
#define ZGEMM_DEFAULT_P 64
2950
2951
#define SGEMM_DEFAULT_Q 240
2952
#define DGEMM_DEFAULT_Q 120
2953
#define CGEMM_DEFAULT_Q 120
2954
#define ZGEMM_DEFAULT_Q 120
2955
2956
#define SGEMM_DEFAULT_R 12288
2957
#define DGEMM_DEFAULT_R 8192
2958
#define CGEMM_DEFAULT_R 4096
2959
#define ZGEMM_DEFAULT_R 4096
2960
2961
#define SYMV_P  16
2962
#endif
2963
2964
#if defined(MIPS64_GENERIC) || defined(P5600) || defined(MIPS1004K) || defined(MIPS24K) || defined(I6400) || defined(P6600) || defined(I6500)
2965
#define SNUMOPT  2
2966
#define DNUMOPT  2
2967
2968
#define GEMM_DEFAULT_OFFSET_A  0
2969
#define GEMM_DEFAULT_OFFSET_B  0
2970
#define GEMM_DEFAULT_ALIGN (BLASLONG) 0x03fffUL
2971
2972
#if defined(NO_MSA) || defined(MIPS64_GENERIC)
2973
#define SGEMM_DEFAULT_UNROLL_M  2
2974
#define SGEMM_DEFAULT_UNROLL_N  2
2975
2976
#define DGEMM_DEFAULT_UNROLL_M  2
2977
#define DGEMM_DEFAULT_UNROLL_N  2
2978
2979
#define CGEMM_DEFAULT_UNROLL_M  2
2980
#define CGEMM_DEFAULT_UNROLL_N  2
2981
2982
#define ZGEMM_DEFAULT_UNROLL_M  2
2983
#define ZGEMM_DEFAULT_UNROLL_N  2
2984
#else
2985
#define SGEMM_DEFAULT_UNROLL_M  8
2986
#define SGEMM_DEFAULT_UNROLL_N  8
2987
2988
#define DGEMM_DEFAULT_UNROLL_M  8
2989
#define DGEMM_DEFAULT_UNROLL_N  4
2990
2991
#define CGEMM_DEFAULT_UNROLL_M  8
2992
#define CGEMM_DEFAULT_UNROLL_N  4
2993
2994
#define ZGEMM_DEFAULT_UNROLL_M  4
2995
#define ZGEMM_DEFAULT_UNROLL_N  4
2996
#endif
2997
2998
#define SGEMM_DEFAULT_P  128
2999
#define DGEMM_DEFAULT_P  128
3000
#define CGEMM_DEFAULT_P  96
3001
#define ZGEMM_DEFAULT_P  64
3002
3003
#define SGEMM_DEFAULT_Q  240
3004
#define DGEMM_DEFAULT_Q  120
3005
#define CGEMM_DEFAULT_Q  120
3006
#define ZGEMM_DEFAULT_Q  120
3007
3008
#define SGEMM_DEFAULT_R  12288
3009
#define DGEMM_DEFAULT_R  8192
3010
#define CGEMM_DEFAULT_R  4096
3011
#define ZGEMM_DEFAULT_R  4096
3012
3013
#define SYMV_P  16
3014
#endif
3015
3016
#ifdef RISCV64_GENERIC
3017
#define GEMM_DEFAULT_OFFSET_A 0
3018
#define GEMM_DEFAULT_OFFSET_B 0
3019
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3020
3021
#define SGEMM_DEFAULT_UNROLL_M  2
3022
#define SGEMM_DEFAULT_UNROLL_N  2
3023
3024
#define DGEMM_DEFAULT_UNROLL_M  2
3025
#define DGEMM_DEFAULT_UNROLL_N  2
3026
3027
#define CGEMM_DEFAULT_UNROLL_M  2
3028
#define CGEMM_DEFAULT_UNROLL_N  2
3029
3030
#define ZGEMM_DEFAULT_UNROLL_M  2
3031
#define ZGEMM_DEFAULT_UNROLL_N  2
3032
3033
#define SGEMM_DEFAULT_P 128
3034
#define DGEMM_DEFAULT_P 128
3035
#define CGEMM_DEFAULT_P 96
3036
#define ZGEMM_DEFAULT_P 64
3037
3038
#define SGEMM_DEFAULT_Q 240
3039
#define DGEMM_DEFAULT_Q 120
3040
#define CGEMM_DEFAULT_Q 120
3041
#define ZGEMM_DEFAULT_Q 120
3042
3043
#define SGEMM_DEFAULT_R 12288
3044
#define DGEMM_DEFAULT_R 8192
3045
#define CGEMM_DEFAULT_R 4096
3046
#define ZGEMM_DEFAULT_R 4096
3047
3048
#define SYMV_P  16
3049
3050
#define GEMM_DEFAULT_OFFSET_A 0
3051
#define GEMM_DEFAULT_OFFSET_B 0
3052
3053
#endif
3054
3055
#if defined(x280)
3056
#define GEMM_DEFAULT_OFFSET_A 0
3057
#define GEMM_DEFAULT_OFFSET_B 0
3058
#define GEMM_DEFAULT_ALIGN 0x03fffUL
3059
3060
#define SGEMM_DEFAULT_UNROLL_M  16 // 4 // 16 // 2
3061
#define SGEMM_DEFAULT_UNROLL_N  8// 4 // 4 // 2
3062
3063
/* SGEMM_UNROLL_MN is calculated as max(SGEMM_UNROLL_M, SGEMM_UNROLL_N)
3064
 * Since we don't define SGEMM_UNROLL_M correctly we have to manually set this macro.
3065
 * If VLMAX size is ever more than 1024, this should be increased also. */
3066
#define SGEMM_DEFAULT_UNROLL_MN  32
3067
3068
#define DGEMM_DEFAULT_UNROLL_M  16 //2 // 8
3069
#define DGEMM_DEFAULT_UNROLL_N  8 //2 // 4
3070
#define DGEMM_DEFAULT_UNROLL_MN  32
3071
3072
#define CGEMM_DEFAULT_UNROLL_M  8
3073
#define CGEMM_DEFAULT_UNROLL_N  4
3074
#define CGEMM_DEFAULT_UNROLL_MN 32
3075
3076
#define ZGEMM_DEFAULT_UNROLL_M  8
3077
#define ZGEMM_DEFAULT_UNROLL_N  4
3078
#define ZGEMM_DEFAULT_UNROLL_MN 16
3079
3080
#define SGEMM_DEFAULT_P 160
3081
#define DGEMM_DEFAULT_P 160
3082
#define CGEMM_DEFAULT_P 96
3083
#define ZGEMM_DEFAULT_P 64
3084
3085
#define SGEMM_DEFAULT_Q 240
3086
#define DGEMM_DEFAULT_Q 128
3087
#define CGEMM_DEFAULT_Q 120
3088
#define ZGEMM_DEFAULT_Q 120
3089
3090
#define SGEMM_DEFAULT_R 12288
3091
#define DGEMM_DEFAULT_R 8192
3092
#define CGEMM_DEFAULT_R 4096
3093
#define ZGEMM_DEFAULT_R 4096
3094
3095
#define SYMV_P  16
3096
3097
#define GEMM_DEFAULT_OFFSET_A 0
3098
#define GEMM_DEFAULT_OFFSET_B 0
3099
3100
#endif
3101
#ifdef C910V
3102
#define GEMM_DEFAULT_OFFSET_A 0
3103
#define GEMM_DEFAULT_OFFSET_B 0
3104
#define GEMM_DEFAULT_ALIGN 0x03fffUL
3105
3106
#define SGEMM_DEFAULT_UNROLL_M  16
3107
#define SGEMM_DEFAULT_UNROLL_N  4
3108
3109
#define DGEMM_DEFAULT_UNROLL_M  8
3110
#define DGEMM_DEFAULT_UNROLL_N  4
3111
3112
#define CGEMM_DEFAULT_UNROLL_M  2
3113
#define CGEMM_DEFAULT_UNROLL_N  2
3114
3115
#define ZGEMM_DEFAULT_UNROLL_M  2
3116
#define ZGEMM_DEFAULT_UNROLL_N  2
3117
3118
#define SGEMM_DEFAULT_P 160
3119
#define DGEMM_DEFAULT_P 160
3120
#define CGEMM_DEFAULT_P 96
3121
#define ZGEMM_DEFAULT_P 64
3122
3123
#define SGEMM_DEFAULT_Q 240
3124
#define DGEMM_DEFAULT_Q 128
3125
#define CGEMM_DEFAULT_Q 120
3126
#define ZGEMM_DEFAULT_Q 120
3127
3128
#define SGEMM_DEFAULT_R 12288
3129
#define DGEMM_DEFAULT_R 8192
3130
#define CGEMM_DEFAULT_R 4096
3131
#define ZGEMM_DEFAULT_R 4096
3132
3133
#define SYMV_P  16
3134
3135
#define GEMM_DEFAULT_OFFSET_A 0
3136
#define GEMM_DEFAULT_OFFSET_B 0
3137
3138
#endif
3139
3140
#ifdef RISCV64_ZVL128B
3141
#define GEMM_DEFAULT_OFFSET_A 0
3142
#define GEMM_DEFAULT_OFFSET_B 0
3143
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3144
3145
#define SGEMM_DEFAULT_UNROLL_M 8
3146
#define SGEMM_DEFAULT_UNROLL_N 8
3147
3148
#define DGEMM_DEFAULT_UNROLL_M 8
3149
#define DGEMM_DEFAULT_UNROLL_N 4
3150
3151
#define CGEMM_DEFAULT_UNROLL_M 8
3152
#define CGEMM_DEFAULT_UNROLL_N 4
3153
3154
#define ZGEMM_DEFAULT_UNROLL_M 4
3155
#define ZGEMM_DEFAULT_UNROLL_N 4
3156
3157
#define SGEMM_DEFAULT_P 128
3158
#define DGEMM_DEFAULT_P 128
3159
#define CGEMM_DEFAULT_P 96
3160
#define ZGEMM_DEFAULT_P 64
3161
3162
#define SGEMM_DEFAULT_Q 240
3163
#define DGEMM_DEFAULT_Q 120
3164
#define CGEMM_DEFAULT_Q 120
3165
#define ZGEMM_DEFAULT_Q 120
3166
3167
#define SGEMM_DEFAULT_R 12288
3168
#define DGEMM_DEFAULT_R 8192
3169
#define CGEMM_DEFAULT_R 4096
3170
#define ZGEMM_DEFAULT_R 4096
3171
3172
#define SYMV_P 16
3173
3174
#define GEMM_DEFAULT_OFFSET_A 0
3175
#define GEMM_DEFAULT_OFFSET_B 0
3176
3177
#endif
3178
3179
#ifdef RISCV64_ZVL256B
3180
#define GEMM_DEFAULT_OFFSET_A 0
3181
#define GEMM_DEFAULT_OFFSET_B 0
3182
#define GEMM_DEFAULT_ALIGN 0x03fffUL
3183
3184
#define SGEMM_DEFAULT_UNROLL_M  16
3185
#define SGEMM_DEFAULT_UNROLL_N  8
3186
3187
#define DGEMM_DEFAULT_UNROLL_M  8
3188
#define DGEMM_DEFAULT_UNROLL_N  8
3189
3190
#define CGEMM_DEFAULT_UNROLL_M  8
3191
#define CGEMM_DEFAULT_UNROLL_N  8
3192
3193
#define ZGEMM_DEFAULT_UNROLL_M  8
3194
#define ZGEMM_DEFAULT_UNROLL_N  4
3195
3196
#define SGEMM_DEFAULT_P 128
3197
#define DGEMM_DEFAULT_P 64
3198
#define CGEMM_DEFAULT_P 64
3199
#define ZGEMM_DEFAULT_P 64
3200
3201
#define SGEMM_DEFAULT_Q 128
3202
#define DGEMM_DEFAULT_Q 128
3203
#define CGEMM_DEFAULT_Q 128
3204
#define ZGEMM_DEFAULT_Q 64
3205
3206
#define SGEMM_DEFAULT_R 16384
3207
#define DGEMM_DEFAULT_R 8192
3208
#define CGEMM_DEFAULT_R 8192
3209
#define ZGEMM_DEFAULT_R 4096
3210
3211
#define SYMV_P 16
3212
3213
#define GEMM_DEFAULT_OFFSET_A 0
3214
#define GEMM_DEFAULT_OFFSET_B 0
3215
3216
#endif
3217
3218
#ifdef ARMV7
3219
#define SNUMOPT   2
3220
#define DNUMOPT   2
3221
3222
#define GEMM_DEFAULT_OFFSET_A 0
3223
#define GEMM_DEFAULT_OFFSET_B 0
3224
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3225
3226
#define SGEMM_DEFAULT_UNROLL_M  4
3227
#define SGEMM_DEFAULT_UNROLL_N  4
3228
3229
#define DGEMM_DEFAULT_UNROLL_M  4
3230
#define DGEMM_DEFAULT_UNROLL_N  4
3231
3232
#define CGEMM_DEFAULT_UNROLL_M  2
3233
#define CGEMM_DEFAULT_UNROLL_N  2
3234
3235
#define ZGEMM_DEFAULT_UNROLL_M  2
3236
#define ZGEMM_DEFAULT_UNROLL_N  2
3237
3238
#define SGEMM_DEFAULT_P 128
3239
#define DGEMM_DEFAULT_P 128
3240
#define CGEMM_DEFAULT_P 96
3241
#define ZGEMM_DEFAULT_P 64
3242
3243
#define SGEMM_DEFAULT_Q 240
3244
#define DGEMM_DEFAULT_Q 120
3245
#define CGEMM_DEFAULT_Q 120
3246
#define ZGEMM_DEFAULT_Q 120
3247
3248
#define SGEMM_DEFAULT_R 12288
3249
#define DGEMM_DEFAULT_R 8192
3250
#define CGEMM_DEFAULT_R 4096
3251
#define ZGEMM_DEFAULT_R 4096
3252
3253
3254
3255
#define SYMV_P  16
3256
#endif
3257
3258
3259
#if defined(ARMV6)
3260
#define SNUMOPT   2
3261
#define DNUMOPT   2
3262
3263
#define GEMM_DEFAULT_OFFSET_A 0
3264
#define GEMM_DEFAULT_OFFSET_B 0
3265
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3266
3267
#define SGEMM_DEFAULT_UNROLL_M  4
3268
#define SGEMM_DEFAULT_UNROLL_N  2
3269
3270
#define DGEMM_DEFAULT_UNROLL_M  4
3271
#define DGEMM_DEFAULT_UNROLL_N  2
3272
3273
#define CGEMM_DEFAULT_UNROLL_M  2
3274
#define CGEMM_DEFAULT_UNROLL_N  2
3275
3276
#define ZGEMM_DEFAULT_UNROLL_M  2
3277
#define ZGEMM_DEFAULT_UNROLL_N  2
3278
3279
#define SGEMM_DEFAULT_P 128
3280
#define DGEMM_DEFAULT_P 128
3281
#define CGEMM_DEFAULT_P 96
3282
#define ZGEMM_DEFAULT_P 64
3283
3284
#define SGEMM_DEFAULT_Q 240
3285
#define DGEMM_DEFAULT_Q 120
3286
#define CGEMM_DEFAULT_Q 120
3287
#define ZGEMM_DEFAULT_Q 120
3288
3289
#define SGEMM_DEFAULT_R 12288
3290
#define DGEMM_DEFAULT_R 8192
3291
#define CGEMM_DEFAULT_R 4096
3292
#define ZGEMM_DEFAULT_R 4096
3293
3294
3295
#define SYMV_P  16
3296
#endif
3297
3298
/* Common ARMv8 parameters */
3299
#if defined(ARMV8)
3300
3301
#define SNUMOPT   2
3302
#define DNUMOPT   2
3303
3304
#define GEMM_DEFAULT_OFFSET_A 0
3305
#define GEMM_DEFAULT_OFFSET_B 0
3306
3307
3308
#ifdef _WIN64
3309
/* Use explicit casting for win64 as LLP64 datamodel is used */
3310
#define GEMM_DEFAULT_ALIGN (BLASULONG)0x03fffUL
3311
#else
3312
#define GEMM_DEFAULT_ALIGN 0x03fffUL
3313
#endif
3314
3315
#define SYMV_P  16
3316
3317
#if defined(CORTEXA57) || defined(CORTEXX1) || \
3318
    defined(CORTEXA72) || defined(CORTEXA73) || \
3319
    defined(FALKOR)    || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000)
3320
3321
#define SGEMM_DEFAULT_UNROLL_M  16
3322
#define SGEMM_DEFAULT_UNROLL_N  4
3323
3324
#define DGEMM_DEFAULT_UNROLL_M  8
3325
#define DGEMM_DEFAULT_UNROLL_N  4
3326
3327
#define CGEMM_DEFAULT_UNROLL_M  8
3328
#define CGEMM_DEFAULT_UNROLL_N  4
3329
3330
#define ZGEMM_DEFAULT_UNROLL_M  4
3331
#define ZGEMM_DEFAULT_UNROLL_N  4
3332
3333
/*FIXME: this should be using the cache size, but there is currently no easy way to
3334
query that on ARM. So if getarch counted more than 8 cores we simply assume the host
3335
is a big desktop or server with abundant cache rather than a phone or embedded device */ 
3336
#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)|| defined(CORTEXX1)
3337
  #define SGEMM_DEFAULT_P 512
3338
  #define DGEMM_DEFAULT_P 256
3339
  #define CGEMM_DEFAULT_P 256
3340
  #define ZGEMM_DEFAULT_P 128
3341
3342
  #define SGEMM_DEFAULT_Q 1024
3343
  #define DGEMM_DEFAULT_Q 512
3344
  #define CGEMM_DEFAULT_Q 512
3345
  #define ZGEMM_DEFAULT_Q 512
3346
#else
3347
  #define SGEMM_DEFAULT_P 128
3348
  #define DGEMM_DEFAULT_P 160
3349
  #define CGEMM_DEFAULT_P 128
3350
  #define ZGEMM_DEFAULT_P 128
3351
3352
  #define SGEMM_DEFAULT_Q 352
3353
  #define DGEMM_DEFAULT_Q 128
3354
  #define CGEMM_DEFAULT_Q 224
3355
  #define ZGEMM_DEFAULT_Q 112
3356
#endif
3357
3358
#define SGEMM_DEFAULT_R 4096
3359
#define DGEMM_DEFAULT_R 4096
3360
#define CGEMM_DEFAULT_R 4096
3361
#define ZGEMM_DEFAULT_R 2048
3362
3363
#elif defined(CORTEXA76)
3364
3365
#define SGEMM_DEFAULT_UNROLL_M  16
3366
#define SGEMM_DEFAULT_UNROLL_N  4
3367
3368
#define DGEMM_DEFAULT_UNROLL_M  8
3369
#define DGEMM_DEFAULT_UNROLL_N  4
3370
3371
#define CGEMM_DEFAULT_UNROLL_M  8
3372
#define CGEMM_DEFAULT_UNROLL_N  4
3373
3374
#define ZGEMM_DEFAULT_UNROLL_M  4
3375
#define ZGEMM_DEFAULT_UNROLL_N  4
3376
3377
#if defined(XDOUBLE) || defined(DOUBLE)
3378
#define SWITCH_RATIO            8
3379
#else
3380
#define SWITCH_RATIO            16
3381
#endif
3382
3383
  #define SGEMM_DEFAULT_P 256
3384
  #define DGEMM_DEFAULT_P 128
3385
  #define CGEMM_DEFAULT_P 128
3386
  #define ZGEMM_DEFAULT_P 64
3387
3388
  #define SGEMM_DEFAULT_Q 512
3389
  #define DGEMM_DEFAULT_Q 256
3390
  #define CGEMM_DEFAULT_Q 256
3391
  #define ZGEMM_DEFAULT_Q 256
3392
3393
#define SGEMM_DEFAULT_R 4096
3394
#define DGEMM_DEFAULT_R 4096
3395
#define CGEMM_DEFAULT_R 4096
3396
#define ZGEMM_DEFAULT_R 4096
3397
3398
#elif defined(CORTEXA53) || defined(CORTEXA55)
3399
3400
#define SGEMM_DEFAULT_UNROLL_M  8
3401
#define SGEMM_DEFAULT_UNROLL_N  8
3402
3403
#define DGEMM_DEFAULT_UNROLL_M  4
3404
#define DGEMM_DEFAULT_UNROLL_N  4
3405
3406
#define CGEMM_DEFAULT_UNROLL_M  8
3407
#define CGEMM_DEFAULT_UNROLL_N  4
3408
3409
#define ZGEMM_DEFAULT_UNROLL_M  4
3410
#define ZGEMM_DEFAULT_UNROLL_N  4
3411
3412
#define SGEMM_DEFAULT_P 256
3413
#define DGEMM_DEFAULT_P 160
3414
#define CGEMM_DEFAULT_P 128
3415
#define ZGEMM_DEFAULT_P 128
3416
3417
#define SGEMM_DEFAULT_Q 256
3418
#define DGEMM_DEFAULT_Q 128
3419
#define CGEMM_DEFAULT_Q 224
3420
#define ZGEMM_DEFAULT_Q 112
3421
3422
#define SGEMM_DEFAULT_R 4096
3423
#define DGEMM_DEFAULT_R 4096
3424
#define CGEMM_DEFAULT_R 4096
3425
#define ZGEMM_DEFAULT_R 2048
3426
3427
#elif defined(THUNDERX)
3428
3429
#define SGEMM_DEFAULT_UNROLL_M  4
3430
#define SGEMM_DEFAULT_UNROLL_N  4
3431
3432
#define DGEMM_DEFAULT_UNROLL_M  2
3433
#define DGEMM_DEFAULT_UNROLL_N  2
3434
3435
#define CGEMM_DEFAULT_UNROLL_M  2
3436
#define CGEMM_DEFAULT_UNROLL_N  2
3437
3438
#define ZGEMM_DEFAULT_UNROLL_M  2
3439
#define ZGEMM_DEFAULT_UNROLL_N  2
3440
3441
#define SGEMM_DEFAULT_P 128
3442
#define DGEMM_DEFAULT_P 128
3443
#define CGEMM_DEFAULT_P 96
3444
#define ZGEMM_DEFAULT_P 64
3445
3446
#define SGEMM_DEFAULT_Q 240
3447
#define DGEMM_DEFAULT_Q 120
3448
#define CGEMM_DEFAULT_Q 120
3449
#define ZGEMM_DEFAULT_Q 120
3450
3451
#define SGEMM_DEFAULT_R 12288
3452
#define DGEMM_DEFAULT_R 8192
3453
#define CGEMM_DEFAULT_R 4096
3454
#define ZGEMM_DEFAULT_R 4096
3455
3456
#elif defined(THUNDERX2T99)
3457
3458
#define SGEMM_DEFAULT_UNROLL_M  16
3459
#define SGEMM_DEFAULT_UNROLL_N  4
3460
3461
#define DGEMM_DEFAULT_UNROLL_M  8
3462
#define DGEMM_DEFAULT_UNROLL_N  4
3463
3464
#define CGEMM_DEFAULT_UNROLL_M  8
3465
#define CGEMM_DEFAULT_UNROLL_N  4
3466
3467
#define ZGEMM_DEFAULT_UNROLL_M  4
3468
#define ZGEMM_DEFAULT_UNROLL_N  4
3469
3470
#define SGEMM_DEFAULT_P 128
3471
#define DGEMM_DEFAULT_P 160
3472
#define CGEMM_DEFAULT_P 128
3473
#define ZGEMM_DEFAULT_P 128
3474
3475
#define SGEMM_DEFAULT_Q 352
3476
#define DGEMM_DEFAULT_Q 128
3477
#define CGEMM_DEFAULT_Q 224
3478
#define ZGEMM_DEFAULT_Q 112
3479
3480
#define SGEMM_DEFAULT_R 4096
3481
#define DGEMM_DEFAULT_R 4096
3482
#define CGEMM_DEFAULT_R 4096
3483
#define ZGEMM_DEFAULT_R 4096
3484
3485
#elif defined(THUNDERX3T110)
3486
3487
#define SGEMM_DEFAULT_UNROLL_M  16
3488
#define SGEMM_DEFAULT_UNROLL_N  4
3489
3490
#define DGEMM_DEFAULT_UNROLL_M  8
3491
#define DGEMM_DEFAULT_UNROLL_N  4
3492
3493
#define CGEMM_DEFAULT_UNROLL_M  8
3494
#define CGEMM_DEFAULT_UNROLL_N  4
3495
3496
#define ZGEMM_DEFAULT_UNROLL_M  4
3497
#define ZGEMM_DEFAULT_UNROLL_N  4
3498
3499
#define SGEMM_DEFAULT_P 128
3500
#define DGEMM_DEFAULT_P 320
3501
#define CGEMM_DEFAULT_P 128
3502
#define ZGEMM_DEFAULT_P 128
3503
3504
#define SGEMM_DEFAULT_Q 352
3505
#define DGEMM_DEFAULT_Q 128
3506
#define CGEMM_DEFAULT_Q 224
3507
#define ZGEMM_DEFAULT_Q 112
3508
3509
#define SGEMM_DEFAULT_R 4096
3510
#define DGEMM_DEFAULT_R 4096
3511
#define CGEMM_DEFAULT_R 4096
3512
#define ZGEMM_DEFAULT_R 4096
3513
3514
#elif defined(NEOVERSEN1)
3515
3516
#if defined(XDOUBLE) || defined(DOUBLE)
3517
#define SWITCH_RATIO            8
3518
#else
3519
#define SWITCH_RATIO            16
3520
#endif
3521
3522
#define SGEMM_DEFAULT_UNROLL_M  16
3523
#define SGEMM_DEFAULT_UNROLL_N  4
3524
3525
#define DGEMM_DEFAULT_UNROLL_M  8
3526
#define DGEMM_DEFAULT_UNROLL_N  4
3527
3528
#define CGEMM_DEFAULT_UNROLL_M  8
3529
#define CGEMM_DEFAULT_UNROLL_N  4
3530
3531
#define ZGEMM_DEFAULT_UNROLL_M  4
3532
#define ZGEMM_DEFAULT_UNROLL_N  4
3533
3534
#define SGEMM_DEFAULT_P 240
3535
#define DGEMM_DEFAULT_P 240
3536
#define CGEMM_DEFAULT_P 128
3537
#define ZGEMM_DEFAULT_P 128
3538
3539
#define SGEMM_DEFAULT_Q 640
3540
#define DGEMM_DEFAULT_Q 320
3541
#define CGEMM_DEFAULT_Q 224
3542
#define ZGEMM_DEFAULT_Q 112
3543
3544
#define SGEMM_DEFAULT_R 4096
3545
#define DGEMM_DEFAULT_R 4096
3546
#define CGEMM_DEFAULT_R 4096
3547
#define ZGEMM_DEFAULT_R 4096
3548
3549
#elif defined(NEOVERSEV1) // 256-bit SVE
3550
3551
#if defined(XDOUBLE) || defined(DOUBLE)
3552
#define SWITCH_RATIO            8
3553
#define GEMM_PREFERED_SIZE      4
3554
#else
3555
#define SWITCH_RATIO            16
3556
#define GEMM_PREFERED_SIZE      8
3557
#endif
3558
3559
#undef SBGEMM_ALIGN_K
3560
#undef SBGEMM_DEFAULT_UNROLL_M
3561
#undef SBGEMM_DEFAULT_UNROLL_N
3562
#define SBGEMM_ALIGN_K 8
3563
#define SBGEMM_DEFAULT_UNROLL_M 4
3564
#define SBGEMM_DEFAULT_UNROLL_N 4
3565
3566
#define SGEMM_DEFAULT_UNROLL_M  16
3567
#define SGEMM_DEFAULT_UNROLL_N  8
3568
3569
#define DGEMM_DEFAULT_UNROLL_M  4 // Actually 2VL (8) but kept separate to keep copies separate
3570
#define DGEMM_DEFAULT_UNROLL_N  8
3571
3572
#define CGEMM_DEFAULT_UNROLL_M  2
3573
#define CGEMM_DEFAULT_UNROLL_N  4
3574
#define CGEMM_DEFAULT_UNROLL_MN 16
3575
3576
#define ZGEMM_DEFAULT_UNROLL_M  2
3577
#define ZGEMM_DEFAULT_UNROLL_N  4
3578
#define ZGEMM_DEFAULT_UNROLL_MN 16
3579
3580
#define SGEMM_DEFAULT_P 240
3581
#define DGEMM_DEFAULT_P 240
3582
#define CGEMM_DEFAULT_P 128
3583
#define ZGEMM_DEFAULT_P 128
3584
3585
#define SGEMM_DEFAULT_Q 640
3586
#define DGEMM_DEFAULT_Q 320
3587
#define CGEMM_DEFAULT_Q 224
3588
#define ZGEMM_DEFAULT_Q 112
3589
3590
#define SGEMM_DEFAULT_R 4096
3591
#define DGEMM_DEFAULT_R 4096
3592
#define CGEMM_DEFAULT_R 4096
3593
#define ZGEMM_DEFAULT_R 4096
3594
3595
#elif defined(NEOVERSEN2)
3596
3597
#if defined(XDOUBLE) || defined(DOUBLE)
3598
#define SWITCH_RATIO            8
3599
#else
3600
#define SWITCH_RATIO            16
3601
#endif
3602
3603
#undef SBGEMM_ALIGN_K
3604
#define SBGEMM_ALIGN_K 4
3605
3606
#undef SBGEMM_DEFAULT_UNROLL_M
3607
#undef SBGEMM_DEFAULT_UNROLL_N
3608
#define SBGEMM_DEFAULT_UNROLL_M 8
3609
#define SBGEMM_DEFAULT_UNROLL_N 4
3610
3611
#define SGEMM_DEFAULT_UNROLL_M  16
3612
#define SGEMM_DEFAULT_UNROLL_N  4
3613
3614
#define DGEMM_DEFAULT_UNROLL_M  8
3615
#define DGEMM_DEFAULT_UNROLL_N  4
3616
3617
#define CGEMM_DEFAULT_UNROLL_M  8
3618
#define CGEMM_DEFAULT_UNROLL_N  4
3619
3620
#define ZGEMM_DEFAULT_UNROLL_M  4
3621
#define ZGEMM_DEFAULT_UNROLL_N  4
3622
3623
#define SGEMM_DEFAULT_P 128
3624
#define DGEMM_DEFAULT_P 160
3625
#define CGEMM_DEFAULT_P 128
3626
#define ZGEMM_DEFAULT_P 128
3627
3628
#define SGEMM_DEFAULT_Q 352
3629
#define DGEMM_DEFAULT_Q 128
3630
#define CGEMM_DEFAULT_Q 224
3631
#define ZGEMM_DEFAULT_Q 112
3632
3633
#define SGEMM_DEFAULT_R 4096
3634
#define DGEMM_DEFAULT_R 4096
3635
#define CGEMM_DEFAULT_R 4096
3636
#define ZGEMM_DEFAULT_R 4096
3637
3638
#elif defined(A64FX) // 512-bit SVE
3639
3640
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
3641
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
3642
#define SGEMM_DEFAULT_UNROLL_M  4
3643
#define SGEMM_DEFAULT_UNROLL_N  8
3644
/* SGEMM_UNROLL_MN is calculated as max(SGEMM_UNROLL_M, SGEMM_UNROLL_N)
3645
 * Since we don't define SGEMM_UNROLL_M correctly we have to manually set this macro.
3646
 * If SVE size is ever more than 1024, this should be increased also. */
3647
#define SGEMM_DEFAULT_UNROLL_MN  32
3648
3649
/* When all BLAS3 routines are implemeted with SVE, DGEMM_DEFAULT_UNROLL_M should be "sve_vl".
3650
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
3651
#define DGEMM_DEFAULT_UNROLL_M  2 
3652
#define DGEMM_DEFAULT_UNROLL_N  8
3653
3654
#define DGEMM_DEFAULT_UNROLL_MN  32
3655
3656
#define CGEMM_DEFAULT_UNROLL_M  2
3657
#define CGEMM_DEFAULT_UNROLL_N  4
3658
#define CGEMM_DEFAULT_UNROLL_MN  16
3659
3660
#define ZGEMM_DEFAULT_UNROLL_M  2
3661
#define ZGEMM_DEFAULT_UNROLL_N  4
3662
#define ZGEMM_DEFAULT_UNROLL_MN  16
3663
3664
#define SGEMM_DEFAULT_P 128
3665
#define DGEMM_DEFAULT_P 160
3666
#define CGEMM_DEFAULT_P 128
3667
#define ZGEMM_DEFAULT_P 128
3668
3669
#define SGEMM_DEFAULT_Q 352
3670
#define DGEMM_DEFAULT_Q 128
3671
#define CGEMM_DEFAULT_Q 224
3672
#define ZGEMM_DEFAULT_Q 112
3673
3674
#define SGEMM_DEFAULT_R 4096
3675
#define DGEMM_DEFAULT_R 4096
3676
#define CGEMM_DEFAULT_R 4096
3677
#define ZGEMM_DEFAULT_R 4096
3678
3679
#elif defined(ARMV8SVE) || defined(ARMV9SME) || defined(ARMV9) || defined(CORTEXA510)|| defined(CORTEXA710) || defined(CORTEXX2) // 128-bit SVE
3680
3681
#if defined(XDOUBLE) || defined(DOUBLE)
3682
#define SWITCH_RATIO            8
3683
#else
3684
#define SWITCH_RATIO            16
3685
#endif
3686
3687
#define SGEMM_DEFAULT_UNROLL_M  4 // Actually 1VL (8) but kept seperate to keep copies seperate
3688
#define SGEMM_DEFAULT_UNROLL_N  8
3689
3690
#define DGEMM_DEFAULT_UNROLL_M  4
3691
#define DGEMM_DEFAULT_UNROLL_N  8
3692
3693
#define CGEMM_DEFAULT_UNROLL_M  2
3694
#define CGEMM_DEFAULT_UNROLL_N  4
3695
#define CGEMM_DEFAULT_UNROLL_MN  16
3696
3697
#define ZGEMM_DEFAULT_UNROLL_M  2
3698
#define ZGEMM_DEFAULT_UNROLL_N  4
3699
#define ZGEMM_DEFAULT_UNROLL_MN  16
3700
3701
#define SGEMM_DEFAULT_P 128
3702
#define DGEMM_DEFAULT_P 160
3703
#define CGEMM_DEFAULT_P 128
3704
#define ZGEMM_DEFAULT_P 128
3705
3706
#define SGEMM_DEFAULT_Q 352
3707
#define DGEMM_DEFAULT_Q 128
3708
#define CGEMM_DEFAULT_Q 224
3709
#define ZGEMM_DEFAULT_Q 112
3710
3711
#define SGEMM_DEFAULT_R 4096
3712
#define DGEMM_DEFAULT_R 4096
3713
#define CGEMM_DEFAULT_R 4096
3714
#define ZGEMM_DEFAULT_R 4096
3715
3716
#else /* Other/undetected ARMv8 cores */
3717
3718
#define SGEMM_DEFAULT_UNROLL_M  16
3719
#define SGEMM_DEFAULT_UNROLL_N  4
3720
3721
#define DGEMM_DEFAULT_UNROLL_M  8
3722
#define DGEMM_DEFAULT_UNROLL_N  4
3723
3724
#define CGEMM_DEFAULT_UNROLL_M  8
3725
#define CGEMM_DEFAULT_UNROLL_N  4
3726
3727
#define ZGEMM_DEFAULT_UNROLL_M  4
3728
#define ZGEMM_DEFAULT_UNROLL_N  4
3729
3730
#define SGEMM_DEFAULT_P 128
3731
#define DGEMM_DEFAULT_P 160
3732
#define CGEMM_DEFAULT_P 128
3733
#define ZGEMM_DEFAULT_P 128
3734
3735
#define SGEMM_DEFAULT_Q 352
3736
#define DGEMM_DEFAULT_Q 128
3737
#define CGEMM_DEFAULT_Q 224
3738
#define ZGEMM_DEFAULT_Q 112
3739
3740
#define SGEMM_DEFAULT_R 4096
3741
#define DGEMM_DEFAULT_R 4096
3742
#define CGEMM_DEFAULT_R 4096
3743
#define ZGEMM_DEFAULT_R 4096
3744
3745
#endif /* Cores */
3746
3747
3748
#endif /* ARMv8 */
3749
3750
#if defined(ARMV9SME) /* ARMv9 SME */
3751
#define USE_SGEMM_KERNEL_DIRECT 1
3752
#endif /* ARMv9 SME */
3753
3754
#if defined(ARMV5)
3755
#define SNUMOPT   2
3756
#define DNUMOPT   2
3757
3758
#define GEMM_DEFAULT_OFFSET_A 0
3759
#define GEMM_DEFAULT_OFFSET_B 0
3760
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3761
3762
#define SGEMM_DEFAULT_UNROLL_M  2
3763
#define SGEMM_DEFAULT_UNROLL_N  2
3764
3765
#define DGEMM_DEFAULT_UNROLL_M  2
3766
#define DGEMM_DEFAULT_UNROLL_N  2
3767
3768
#define CGEMM_DEFAULT_UNROLL_M  2
3769
#define CGEMM_DEFAULT_UNROLL_N  2
3770
3771
#define ZGEMM_DEFAULT_UNROLL_M  2
3772
#define ZGEMM_DEFAULT_UNROLL_N  2
3773
3774
#define SGEMM_DEFAULT_P 128
3775
#define DGEMM_DEFAULT_P 128
3776
#define CGEMM_DEFAULT_P 96
3777
#define ZGEMM_DEFAULT_P 64
3778
3779
#define SGEMM_DEFAULT_Q 240
3780
#define DGEMM_DEFAULT_Q 120
3781
#define CGEMM_DEFAULT_Q 120
3782
#define ZGEMM_DEFAULT_Q 120
3783
3784
#define SGEMM_DEFAULT_R 12288
3785
#define DGEMM_DEFAULT_R 8192
3786
#define CGEMM_DEFAULT_R 4096
3787
#define ZGEMM_DEFAULT_R 4096
3788
3789
3790
#define SYMV_P  16
3791
#endif
3792
3793
3794
3795
#ifdef CORTEXA9
3796
#define SNUMOPT   2
3797
#define DNUMOPT   2
3798
3799
#define GEMM_DEFAULT_OFFSET_A 0
3800
#define GEMM_DEFAULT_OFFSET_B 0
3801
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3802
3803
#define SGEMM_DEFAULT_UNROLL_M  4
3804
#define SGEMM_DEFAULT_UNROLL_N  4
3805
3806
#define DGEMM_DEFAULT_UNROLL_M  4
3807
#define DGEMM_DEFAULT_UNROLL_N  4
3808
3809
#define CGEMM_DEFAULT_UNROLL_M  2
3810
#define CGEMM_DEFAULT_UNROLL_N  2
3811
3812
#define ZGEMM_DEFAULT_UNROLL_M  2
3813
#define ZGEMM_DEFAULT_UNROLL_N  2
3814
3815
#define SGEMM_DEFAULT_P 128
3816
#define DGEMM_DEFAULT_P 128
3817
#define CGEMM_DEFAULT_P 96
3818
#define ZGEMM_DEFAULT_P 64
3819
3820
#define SGEMM_DEFAULT_Q 240
3821
#define DGEMM_DEFAULT_Q 120
3822
#define CGEMM_DEFAULT_Q 120
3823
#define ZGEMM_DEFAULT_Q 120
3824
3825
#define SGEMM_DEFAULT_R 12288
3826
#define DGEMM_DEFAULT_R 8192
3827
#define CGEMM_DEFAULT_R 4096
3828
#define ZGEMM_DEFAULT_R 4096
3829
3830
3831
3832
#define SYMV_P  16
3833
#endif
3834
3835
3836
#ifdef CORTEXA15
3837
#define SNUMOPT   2
3838
#define DNUMOPT   2
3839
3840
#define GEMM_DEFAULT_OFFSET_A 0
3841
#define GEMM_DEFAULT_OFFSET_B 0
3842
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3843
3844
#define SGEMM_DEFAULT_UNROLL_M  4
3845
#define SGEMM_DEFAULT_UNROLL_N  4
3846
3847
#define DGEMM_DEFAULT_UNROLL_M  4
3848
#define DGEMM_DEFAULT_UNROLL_N  4
3849
3850
#define CGEMM_DEFAULT_UNROLL_M  2
3851
#define CGEMM_DEFAULT_UNROLL_N  2
3852
3853
#define ZGEMM_DEFAULT_UNROLL_M  2
3854
#define ZGEMM_DEFAULT_UNROLL_N  2
3855
3856
#define SGEMM_DEFAULT_P 128
3857
#define DGEMM_DEFAULT_P 128
3858
#define CGEMM_DEFAULT_P 96
3859
#define ZGEMM_DEFAULT_P 64
3860
3861
#define SGEMM_DEFAULT_Q 240
3862
#define DGEMM_DEFAULT_Q 120
3863
#define CGEMM_DEFAULT_Q 120
3864
#define ZGEMM_DEFAULT_Q 120
3865
3866
#define SGEMM_DEFAULT_R 12288
3867
#define DGEMM_DEFAULT_R 8192
3868
#define CGEMM_DEFAULT_R 4096
3869
#define ZGEMM_DEFAULT_R 4096
3870
3871
3872
3873
#define SYMV_P  16
3874
#endif
3875
3876
3877
#if defined(ZARCH_GENERIC)
3878
#define SNUMOPT   2
3879
#define DNUMOPT   2
3880
3881
#define GEMM_DEFAULT_OFFSET_A 0
3882
#define GEMM_DEFAULT_OFFSET_B 0
3883
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3884
3885
#define SGEMM_DEFAULT_UNROLL_M  2
3886
#define SGEMM_DEFAULT_UNROLL_N  2
3887
3888
#define DGEMM_DEFAULT_UNROLL_M  2
3889
#define DGEMM_DEFAULT_UNROLL_N  2
3890
3891
#define CGEMM_DEFAULT_UNROLL_M  2
3892
#define CGEMM_DEFAULT_UNROLL_N  2
3893
3894
#define ZGEMM_DEFAULT_UNROLL_M  2
3895
#define ZGEMM_DEFAULT_UNROLL_N  2
3896
3897
#define SGEMM_DEFAULT_P 128
3898
#define DGEMM_DEFAULT_P 128
3899
#define CGEMM_DEFAULT_P 96
3900
#define ZGEMM_DEFAULT_P 64
3901
3902
#define SGEMM_DEFAULT_Q 240
3903
#define DGEMM_DEFAULT_Q 120
3904
#define CGEMM_DEFAULT_Q 120
3905
#define ZGEMM_DEFAULT_Q 120
3906
3907
#define SGEMM_DEFAULT_R 12288
3908
#define DGEMM_DEFAULT_R 8192
3909
#define CGEMM_DEFAULT_R 4096
3910
#define ZGEMM_DEFAULT_R 4096
3911
3912
3913
#define SYMV_P  16
3914
#endif
3915
3916
#if defined(Z13)
3917
#define SNUMOPT   2
3918
#define DNUMOPT   2
3919
3920
#define GEMM_DEFAULT_OFFSET_A 0
3921
#define GEMM_DEFAULT_OFFSET_B 0
3922
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3923
3924
#define SGEMM_DEFAULT_UNROLL_M  8
3925
#define SGEMM_DEFAULT_UNROLL_N  4
3926
3927
#define DGEMM_DEFAULT_UNROLL_M  8
3928
#define DGEMM_DEFAULT_UNROLL_N  4
3929
3930
#define CGEMM_DEFAULT_UNROLL_M  4
3931
#define CGEMM_DEFAULT_UNROLL_N  4
3932
3933
#define ZGEMM_DEFAULT_UNROLL_M  4
3934
#define ZGEMM_DEFAULT_UNROLL_N  4
3935
3936
#define SGEMM_DEFAULT_P 456
3937
#define DGEMM_DEFAULT_P 320
3938
#define CGEMM_DEFAULT_P 480
3939
#define ZGEMM_DEFAULT_P 224
3940
3941
#define SGEMM_DEFAULT_Q 488
3942
#define DGEMM_DEFAULT_Q 384
3943
#define CGEMM_DEFAULT_Q 128
3944
#define ZGEMM_DEFAULT_Q 352
3945
3946
#define SGEMM_DEFAULT_R 8192
3947
#define DGEMM_DEFAULT_R 4096
3948
#define CGEMM_DEFAULT_R 4096
3949
#define ZGEMM_DEFAULT_R 2048
3950
3951
3952
#define SYMV_P  16
3953
#endif
3954
3955
3956
#if defined(Z14)
3957
#define SNUMOPT   2
3958
#define DNUMOPT   2
3959
3960
#define GEMM_DEFAULT_OFFSET_A 0
3961
#define GEMM_DEFAULT_OFFSET_B 0
3962
#define GEMM_DEFAULT_ALIGN 0x03fffUL
3963
3964
#define SGEMM_DEFAULT_UNROLL_M  16
3965
#define SGEMM_DEFAULT_UNROLL_N  4
3966
3967
#define DGEMM_DEFAULT_UNROLL_M  8
3968
#define DGEMM_DEFAULT_UNROLL_N  4
3969
3970
#define CGEMM_DEFAULT_UNROLL_M  4
3971
#define CGEMM_DEFAULT_UNROLL_N  4
3972
3973
#define ZGEMM_DEFAULT_UNROLL_M  4
3974
#define ZGEMM_DEFAULT_UNROLL_N  4
3975
3976
#define SGEMM_DEFAULT_P 480
3977
#define DGEMM_DEFAULT_P 320
3978
#define CGEMM_DEFAULT_P 480
3979
#define ZGEMM_DEFAULT_P 224
3980
3981
#define SGEMM_DEFAULT_Q 512
3982
#define DGEMM_DEFAULT_Q 384
3983
#define CGEMM_DEFAULT_Q 128
3984
#define ZGEMM_DEFAULT_Q 352
3985
3986
#define SGEMM_DEFAULT_R 8192
3987
#define DGEMM_DEFAULT_R 4096
3988
#define CGEMM_DEFAULT_R 4096
3989
#define ZGEMM_DEFAULT_R 2048
3990
3991
3992
#define SYMV_P  16
3993
#endif
3994
3995
#if defined(CSKY) || defined(CK860FV)
3996
#define GEMM_DEFAULT_OFFSET_A 0
3997
#define GEMM_DEFAULT_OFFSET_B 0
3998
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
3999
4000
#define SGEMM_DEFAULT_UNROLL_M  2
4001
#define SGEMM_DEFAULT_UNROLL_N  2
4002
4003
#define DGEMM_DEFAULT_UNROLL_M  2
4004
#define DGEMM_DEFAULT_UNROLL_N  2
4005
4006
#define CGEMM_DEFAULT_UNROLL_M  2
4007
#define CGEMM_DEFAULT_UNROLL_N  2
4008
4009
#define ZGEMM_DEFAULT_UNROLL_M  2
4010
#define ZGEMM_DEFAULT_UNROLL_N  2
4011
4012
#define SGEMM_DEFAULT_P 128
4013
#define DGEMM_DEFAULT_P 128
4014
#define CGEMM_DEFAULT_P 96
4015
#define ZGEMM_DEFAULT_P 64
4016
4017
#define SGEMM_DEFAULT_Q 240
4018
#define DGEMM_DEFAULT_Q 120
4019
#define CGEMM_DEFAULT_Q 120
4020
#define ZGEMM_DEFAULT_Q 120
4021
4022
#define SGEMM_DEFAULT_R 12288
4023
#define DGEMM_DEFAULT_R 8192
4024
#define CGEMM_DEFAULT_R 4096
4025
#define ZGEMM_DEFAULT_R 4096
4026
4027
#define SYMV_P  16
4028
4029
#define GEMM_DEFAULT_OFFSET_A 0
4030
#define GEMM_DEFAULT_OFFSET_B 0
4031
4032
#endif
4033
4034
#ifdef GENERIC
4035
4036
#define SNUMOPT   2
4037
#define DNUMOPT   2
4038
4039
#define GEMM_DEFAULT_OFFSET_A 0
4040
#define GEMM_DEFAULT_OFFSET_B 0
4041
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
4042
4043
#define SGEMM_DEFAULT_UNROLL_N 2
4044
#define DGEMM_DEFAULT_UNROLL_N 2
4045
#define QGEMM_DEFAULT_UNROLL_N 2
4046
#define CGEMM_DEFAULT_UNROLL_N 2
4047
#define ZGEMM_DEFAULT_UNROLL_N 2
4048
#define XGEMM_DEFAULT_UNROLL_N 1
4049
#define CGEMM3M_DEFAULT_UNROLL_N 2
4050
#define ZGEMM3M_DEFAULT_UNROLL_N 2
4051
4052
#ifdef ARCH_X86
4053
#define SGEMM_DEFAULT_UNROLL_M 2
4054
#define DGEMM_DEFAULT_UNROLL_M 2
4055
#define QGEMM_DEFAULT_UNROLL_M 2
4056
#define CGEMM_DEFAULT_UNROLL_M 2
4057
#define ZGEMM_DEFAULT_UNROLL_M 2
4058
#define XGEMM_DEFAULT_UNROLL_M 1
4059
#else
4060
#define SGEMM_DEFAULT_UNROLL_M 2
4061
#define DGEMM_DEFAULT_UNROLL_M 2
4062
#define QGEMM_DEFAULT_UNROLL_M 2
4063
#define CGEMM_DEFAULT_UNROLL_M 2
4064
#define ZGEMM_DEFAULT_UNROLL_M 2
4065
#define XGEMM_DEFAULT_UNROLL_M 1
4066
#define CGEMM3M_DEFAULT_UNROLL_M 2
4067
#define ZGEMM3M_DEFAULT_UNROLL_M 2
4068
#define CGEMM3M_DEFAULT_P 448
4069
#define ZGEMM3M_DEFAULT_P 224
4070
#define XGEMM3M_DEFAULT_P 112
4071
#define CGEMM3M_DEFAULT_Q 224
4072
#define ZGEMM3M_DEFAULT_Q 224
4073
#define XGEMM3M_DEFAULT_Q 224
4074
#define CGEMM3M_DEFAULT_R 12288
4075
#define ZGEMM3M_DEFAULT_R 12288
4076
#define XGEMM3M_DEFAULT_R 12288
4077
4078
#endif
4079
4080
#ifdef ARCH_MIPS
4081
#define SGEMM_DEFAULT_P  128
4082
#define DGEMM_DEFAULT_P  128
4083
#define CGEMM_DEFAULT_P  96
4084
#define ZGEMM_DEFAULT_P  64
4085
#define SGEMM_DEFAULT_Q  240
4086
#define DGEMM_DEFAULT_Q  120
4087
#define CGEMM_DEFAULT_Q  120
4088
#define ZGEMM_DEFAULT_Q  120
4089
#define SGEMM_DEFAULT_R  12288
4090
#define DGEMM_DEFAULT_R  8192
4091
#define CGEMM_DEFAULT_R  4096
4092
#define ZGEMM_DEFAULT_R  4096
4093
#elif defined(ARCH_LOONGARCH64)
4094
#define SGEMM_DEFAULT_P 128
4095
#define DGEMM_DEFAULT_P 128
4096
#define CGEMM_DEFAULT_P 96
4097
#define ZGEMM_DEFAULT_P 64
4098
4099
#define SGEMM_DEFAULT_Q 240
4100
#define DGEMM_DEFAULT_Q 120
4101
#define CGEMM_DEFAULT_Q 120
4102
#define ZGEMM_DEFAULT_Q 120
4103
4104
#define SGEMM_DEFAULT_R 12288
4105
#define DGEMM_DEFAULT_R 8192
4106
#define CGEMM_DEFAULT_R 4096
4107
#define ZGEMM_DEFAULT_R 4096
4108
#else
4109
#define SGEMM_DEFAULT_P sgemm_p
4110
#define DGEMM_DEFAULT_P dgemm_p
4111
#define QGEMM_DEFAULT_P qgemm_p
4112
#define CGEMM_DEFAULT_P cgemm_p
4113
#define ZGEMM_DEFAULT_P zgemm_p
4114
#define XGEMM_DEFAULT_P xgemm_p
4115
4116
#define SGEMM_DEFAULT_R sgemm_r
4117
#define DGEMM_DEFAULT_R dgemm_r
4118
#define QGEMM_DEFAULT_R qgemm_r
4119
#define CGEMM_DEFAULT_R cgemm_r
4120
#define ZGEMM_DEFAULT_R zgemm_r
4121
#define XGEMM_DEFAULT_R xgemm_r
4122
4123
#define SGEMM_DEFAULT_Q 128
4124
#define DGEMM_DEFAULT_Q 128
4125
#define QGEMM_DEFAULT_Q 128
4126
#define CGEMM_DEFAULT_Q 128
4127
#define ZGEMM_DEFAULT_Q 128
4128
#define XGEMM_DEFAULT_Q 128
4129
#endif
4130
4131
#define SYMV_P  16
4132
4133
#endif
4134
4135
#ifndef SWITCH_RATIO
4136
#define SWITCH_RATIO 2
4137
#endif
4138
4139
#ifndef QGEMM_DEFAULT_UNROLL_M
4140
#define QGEMM_DEFAULT_UNROLL_M 2
4141
#endif
4142
4143
#ifndef QGEMM_DEFAULT_UNROLL_N
4144
#define QGEMM_DEFAULT_UNROLL_N 2
4145
#endif
4146
4147
#ifndef XGEMM_DEFAULT_UNROLL_M
4148
#define XGEMM_DEFAULT_UNROLL_M 2
4149
#endif
4150
4151
#ifndef XGEMM_DEFAULT_UNROLL_N
4152
#define XGEMM_DEFAULT_UNROLL_N 2
4153
#endif
4154
4155
#ifndef HAVE_SSE2
4156
#define SHUFPD_0  shufps  $0x44,
4157
#define SHUFPD_1  shufps  $0x4e,
4158
#define SHUFPD_2  shufps  $0xe4,
4159
#define SHUFPD_3  shufps  $0xee,
4160
#endif
4161
4162
#ifndef SHUFPD_0
4163
#define SHUFPD_0  shufpd  $0,
4164
#endif
4165
4166
#ifndef SHUFPD_1
4167
#define SHUFPD_1  shufpd  $1,
4168
#endif
4169
4170
#ifndef SHUFPD_2
4171
#define SHUFPD_2  shufpd  $2,
4172
#endif
4173
4174
#ifndef SHUFPD_3
4175
#define SHUFPD_3  shufpd  $3,
4176
#endif
4177
4178
#ifndef SHUFPS_39
4179
#define SHUFPS_39 shufps  $0x39,
4180
#endif
4181
4182
4183
#endif