Coverage Report

Created: 2025-09-01 04:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/openblas/common_param.h
Line
Count
Source
1
/*********************************************************************/
2
/* Copyright 2009, 2010 The University of Texas at Austin.           */
3
/* Copyright 2023 The OpenBLAS Project.                              */
4
/* All rights reserved.                                              */
5
/*                                                                   */
6
/* Redistribution and use in source and binary forms, with or        */
7
/* without modification, are permitted provided that the following   */
8
/* conditions are met:                                               */
9
/*                                                                   */
10
/*   1. Redistributions of source code must retain the above         */
11
/*      copyright notice, this list of conditions and the following  */
12
/*      disclaimer.                                                  */
13
/*                                                                   */
14
/*   2. Redistributions in binary form must reproduce the above      */
15
/*      copyright notice, this list of conditions and the following  */
16
/*      disclaimer in the documentation and/or other materials       */
17
/*      provided with the distribution.                              */
18
/*                                                                   */
19
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
20
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
21
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
22
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
23
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
24
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
25
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
26
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
27
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
28
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
29
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
30
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
31
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
32
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
33
/*                                                                   */
34
/* The views and conclusions contained in the software and           */
35
/* documentation are those of the authors and should not be          */
36
/* interpreted as representing official policies, either expressed   */
37
/* or implied, of The University of Texas at Austin.                 */
38
/*********************************************************************/
39
40
#ifndef COMMON_PARAM_H
41
#define COMMON_PARAM_H
42
43
#ifndef ASSEMBLER
44
45
#ifdef DYNAMIC_ARCH
46
47
typedef struct {
48
  int dtb_entries;
49
  int switch_ratio;
50
  int offsetA, offsetB, align;
51
52
#if BUILD_BFLOAT16 == 1
53
  int sbgemm_p, sbgemm_q, sbgemm_r;
54
  int sbgemm_unroll_m, sbgemm_unroll_n, sbgemm_unroll_mn;
55
  int sbgemm_align_k;
56
  int need_amxtile_permission;  // 0 default, 1 for device support amx.
57
58
  void   (*sbstobf16_k) (BLASLONG, float    *, BLASLONG, bfloat16 *, BLASLONG);
59
  void   (*sbdtobf16_k) (BLASLONG, double   *, BLASLONG, bfloat16 *, BLASLONG);
60
  void   (*sbf16tos_k)  (BLASLONG, bfloat16 *, BLASLONG, float    *, BLASLONG);
61
  void   (*dbf16tod_k)  (BLASLONG, bfloat16 *, BLASLONG, double   *, BLASLONG);
62
63
  float  (*sbamax_k) (BLASLONG, float *, BLASLONG);
64
  float  (*sbamin_k) (BLASLONG, float *, BLASLONG);
65
  float  (*sbmax_k)  (BLASLONG, float *, BLASLONG);
66
  float  (*sbmin_k)  (BLASLONG, float *, BLASLONG);
67
BLASLONG (*isbamax_k)(BLASLONG, float *, BLASLONG);
68
BLASLONG (*isbamin_k)(BLASLONG, float *, BLASLONG);
69
BLASLONG (*isbmax_k) (BLASLONG, float *, BLASLONG);
70
BLASLONG (*isbmin_k) (BLASLONG, float *, BLASLONG);
71
72
  float  (*sbnrm2_k) (BLASLONG, float *, BLASLONG);
73
  float  (*sbasum_k) (BLASLONG, float *, BLASLONG);
74
  float  (*sbsum_k)  (BLASLONG, float *, BLASLONG);
75
  int    (*sbcopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
76
  float  (*sbdot_k)  (BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG);
77
  double (*dsbdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
78
79
  int    (*sbrot_k)  (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
80
  int    (*sbrotm_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
81
82
  int    (*sbaxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
83
  int    (*sbscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
84
  int    (*sbswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
85
86
  int    (*sbgemv_n) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
87
  int    (*sbgemv_t) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
88
  int    (*sbger_k)  (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
89
90
  int    (*sbsymv_L) (BLASLONG, BLASLONG, float,  float  *, BLASLONG, float  *, BLASLONG, float  *, BLASLONG, float *);
91
  int    (*sbsymv_U) (BLASLONG, BLASLONG, float,  float  *, BLASLONG, float  *, BLASLONG, float  *, BLASLONG, float *);
92
93
  int    (*sbgemm_kernel   )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, bfloat16 *, float *, BLASLONG);
94
  int    (*sbgemm_beta     )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG);
95
96
  int    (*sbgemm_incopy   )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
97
  int    (*sbgemm_itcopy   )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
98
  int    (*sbgemm_oncopy   )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
99
  int    (*sbgemm_otcopy   )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
100
101
  int    (*sbtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
102
  int    (*sbtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
103
  int    (*sbtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
104
  int    (*sbtrsm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
105
106
  int    (*sbtrsm_iunucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
107
  int    (*sbtrsm_iunncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
108
  int    (*sbtrsm_iutucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
109
  int    (*sbtrsm_iutncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
110
  int    (*sbtrsm_ilnucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
111
  int    (*sbtrsm_ilnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
112
  int    (*sbtrsm_iltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
113
  int    (*sbtrsm_iltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
114
  int    (*sbtrsm_ounucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
115
  int    (*sbtrsm_ounncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
116
  int    (*sbtrsm_outucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
117
  int    (*sbtrsm_outncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
118
  int    (*sbtrsm_olnucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
119
  int    (*sbtrsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
120
  int    (*sbtrsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
121
  int    (*sbtrsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
122
123
  int    (*sbtrmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
124
  int    (*sbtrmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
125
  int    (*sbtrmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
126
  int    (*sbtrmm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
127
128
  int    (*sbtrmm_iunucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
129
  int    (*sbtrmm_iunncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
130
  int    (*sbtrmm_iutucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
131
  int    (*sbtrmm_iutncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
132
  int    (*sbtrmm_ilnucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
133
  int    (*sbtrmm_ilnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
134
  int    (*sbtrmm_iltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
135
  int    (*sbtrmm_iltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
136
  int    (*sbtrmm_ounucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
137
  int    (*sbtrmm_ounncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
138
  int    (*sbtrmm_outucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
139
  int    (*sbtrmm_outncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
140
  int    (*sbtrmm_olnucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
141
  int    (*sbtrmm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
142
  int    (*sbtrmm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
143
  int    (*sbtrmm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
144
145
  int    (*sbsymm_iutcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
146
  int    (*sbsymm_iltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
147
  int    (*sbsymm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
148
  int    (*sbsymm_oltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
149
150
  int  (*sbneg_tcopy)   (BLASLONG, BLASLONG, float *, BLASLONG, float *);
151
  int    (*sblaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
152
153
#ifdef SMALL_MATRIX_OPT
154
  int    (*sbgemm_small_matrix_permit)(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float beta);
155
156
  int    (*sbgemm_small_kernel_nn    )(BLASLONG m, BLASLONG n, BLASLONG k, bfloat16 * A, BLASLONG lda, float alpha, bfloat16 * B, BLASLONG ldb, float beta, float * C, BLASLONG ldc);
157
  int    (*sbgemm_small_kernel_nt    )(BLASLONG m, BLASLONG n, BLASLONG k, bfloat16 * A, BLASLONG lda, float alpha, bfloat16 * B, BLASLONG ldb, float beta, float * C, BLASLONG ldc);
158
  int    (*sbgemm_small_kernel_tn    )(BLASLONG m, BLASLONG n, BLASLONG k, bfloat16 * A, BLASLONG lda, float alpha, bfloat16 * B, BLASLONG ldb, float beta, float * C, BLASLONG ldc);
159
  int    (*sbgemm_small_kernel_tt    )(BLASLONG m, BLASLONG n, BLASLONG k, bfloat16 * A, BLASLONG lda, float alpha, bfloat16 * B, BLASLONG ldb, float beta, float * C, BLASLONG ldc);
160
161
  int    (*sbgemm_small_kernel_b0_nn )(BLASLONG m, BLASLONG n, BLASLONG k, bfloat16 * A, BLASLONG lda, float alpha, bfloat16 * B, BLASLONG ldb, float * C, BLASLONG ldc);
162
  int    (*sbgemm_small_kernel_b0_nt )(BLASLONG m, BLASLONG n, BLASLONG k, bfloat16 * A, BLASLONG lda, float alpha, bfloat16 * B, BLASLONG ldb, float * C, BLASLONG ldc);
163
  int    (*sbgemm_small_kernel_b0_tn )(BLASLONG m, BLASLONG n, BLASLONG k, bfloat16 * A, BLASLONG lda, float alpha, bfloat16 * B, BLASLONG ldb, float * C, BLASLONG ldc);
164
  int    (*sbgemm_small_kernel_b0_tt )(BLASLONG m, BLASLONG n, BLASLONG k, bfloat16 * A, BLASLONG lda, float alpha, bfloat16 * B, BLASLONG ldb, float * C, BLASLONG ldc);
165
#endif
166
#endif
167
168
#if (BUILD_SINGLE == 1) || (BUILD_DOUBLE == 1) || (BUILD_COMPLEX == 1) || (BUILD_COMPLEX16 == 1)
169
  int sgemm_p, sgemm_q, sgemm_r;
170
  int sgemm_unroll_m, sgemm_unroll_n, sgemm_unroll_mn;
171
#endif
172
173
  int exclusive_cache;
174
175
#if (BUILD_SINGLE == 1) || (BUILD_COMPLEX == 1)
176
  float  (*samax_k) (BLASLONG, float *, BLASLONG);
177
  float  (*samin_k) (BLASLONG, float *, BLASLONG);
178
  float  (*smax_k)  (BLASLONG, float *, BLASLONG);
179
  float  (*smin_k)  (BLASLONG, float *, BLASLONG);
180
#endif
181
182
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE ==1) || (BUILD_COMPLEX==1)
183
BLASLONG (*isamax_k)(BLASLONG, float *, BLASLONG);
184
#endif
185
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
186
BLASLONG (*isamin_k)(BLASLONG, float *, BLASLONG);
187
BLASLONG (*ismax_k) (BLASLONG, float *, BLASLONG);
188
BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
189
  float  (*snrm2_k) (BLASLONG, float *, BLASLONG);
190
  float  (*sasum_k) (BLASLONG, float *, BLASLONG);
191
#endif
192
#if (BUILD_SINGLE==1)
193
  float  (*ssum_k)  (BLASLONG, float *, BLASLONG);
194
#endif  
195
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
196
  int    (*scopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
197
  float  (*sdot_k)  (BLASLONG, float *, BLASLONG, float *, BLASLONG);
198
  //double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
199
200
  int    (*srot_k)  (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
201
  int    (*srotm_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
202
#endif
203
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
204
  int    (*saxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
205
#endif
206
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
207
  int    (*sscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
208
#endif
209
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
210
  int    (*sswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
211
212
  int    (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
213
  int    (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
214
#endif
215
#if (BUILD_SINGLE==1)
216
  int    (*sger_k)  (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
217
  int    (*ssymv_L) (BLASLONG, BLASLONG, float,  float  *, BLASLONG, float  *, BLASLONG, float  *, BLASLONG, float *);
218
  int    (*ssymv_U) (BLASLONG, BLASLONG, float,  float  *, BLASLONG, float  *, BLASLONG, float  *, BLASLONG, float *);
219
#endif
220
221
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
222
#ifdef ARCH_X86_64
223
  void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
224
  int  (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K);
225
#endif
226
#ifdef ARCH_ARM64
227
  void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
228
#endif
229
230
  
231
  int    (*sgemm_kernel   )(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG);
232
  int    (*sgemm_beta     )(BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float  *, BLASLONG);
233
234
235
  int    (*sgemm_incopy   )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
236
  int    (*sgemm_itcopy   )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
237
  int    (*sgemm_oncopy   )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
238
  int    (*sgemm_otcopy   )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
239
#endif
240
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
241
#ifdef SMALL_MATRIX_OPT
242
  int    (*sgemm_small_matrix_permit)(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float beta);
243
244
  int    (*sgemm_small_kernel_nn    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha, float * B, BLASLONG ldb, float beta, float * C, BLASLONG ldc);
245
  int    (*sgemm_small_kernel_nt    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha, float * B, BLASLONG ldb, float beta, float * C, BLASLONG ldc);
246
  int    (*sgemm_small_kernel_tn    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha, float * B, BLASLONG ldb, float beta, float * C, BLASLONG ldc);
247
  int    (*sgemm_small_kernel_tt    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha, float * B, BLASLONG ldb, float beta, float * C, BLASLONG ldc);
248
249
  int    (*sgemm_small_kernel_b0_nn )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
250
  int    (*sgemm_small_kernel_b0_nt )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
251
  int    (*sgemm_small_kernel_b0_tn )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
252
  int    (*sgemm_small_kernel_b0_tt )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha, float * B, BLASLONG ldb, float * C, BLASLONG ldc);
253
#endif
254
255
  int    (*strsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
256
  int    (*strsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
257
  int    (*strsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
258
  int    (*strsm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
259
260
  int    (*strsm_iunucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
261
  int    (*strsm_iunncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
262
  int    (*strsm_iutucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
263
  int    (*strsm_iutncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
264
  int    (*strsm_ilnucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
265
  int    (*strsm_ilnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
266
  int    (*strsm_iltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
267
  int    (*strsm_iltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
268
  int    (*strsm_ounucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
269
  int    (*strsm_ounncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
270
  int    (*strsm_outucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
271
  int    (*strsm_outncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
272
  int    (*strsm_olnucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
273
  int    (*strsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
274
  int    (*strsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
275
  int    (*strsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
276
#endif
277
#if (BUILD_SINGLE==1)
278
  int    (*strmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
279
  int    (*strmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
280
  int    (*strmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
281
  int    (*strmm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
282
283
  int    (*strmm_iunucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
284
  int    (*strmm_iunncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
285
  int    (*strmm_iutucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
286
  int    (*strmm_iutncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
287
  int    (*strmm_ilnucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
288
  int    (*strmm_ilnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
289
  int    (*strmm_iltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
290
  int    (*strmm_iltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
291
  int    (*strmm_ounucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
292
  int    (*strmm_ounncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
293
  int    (*strmm_outucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
294
  int    (*strmm_outncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
295
  int    (*strmm_olnucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
296
  int    (*strmm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
297
  int    (*strmm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
298
  int    (*strmm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
299
300
  int    (*ssymm_iutcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
301
  int    (*ssymm_iltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
302
  int    (*ssymm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
303
  int    (*ssymm_oltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
304
305
  int  (*sneg_tcopy)   (BLASLONG, BLASLONG, float *, BLASLONG, float *);
306
  int    (*slaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
307
#endif
308
309
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
310
  int dgemm_p, dgemm_q, dgemm_r;
311
  int dgemm_unroll_m, dgemm_unroll_n, dgemm_unroll_mn;
312
#endif
313
314
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
315
  double (*damax_k) (BLASLONG, double *, BLASLONG);
316
  double (*damin_k) (BLASLONG, double *, BLASLONG);
317
  double (*dmax_k)  (BLASLONG, double *, BLASLONG);
318
  double (*dmin_k)  (BLASLONG, double *, BLASLONG);
319
BLASLONG (*idamax_k)(BLASLONG, double *, BLASLONG);
320
BLASLONG (*idamin_k)(BLASLONG, double *, BLASLONG);
321
BLASLONG (*idmax_k) (BLASLONG, double *, BLASLONG);
322
BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
323
324
  double (*dnrm2_k) (BLASLONG, double *, BLASLONG);
325
  double (*dasum_k) (BLASLONG, double *, BLASLONG);
326
#endif
327
#if (BUILD_DOUBLE==1)
328
  double (*dsum_k)  (BLASLONG, double *, BLASLONG);
329
#endif
330
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
331
  int    (*dcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
332
  double (*ddot_k)  (BLASLONG, double *, BLASLONG, double *, BLASLONG);
333
#endif
334
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
335
  double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
336
#endif
337
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
338
  int    (*drot_k)  (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
339
  int    (*drotm_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
340
  int    (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
341
  int    (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
342
  int    (*dswap_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
343
  int    (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
344
  int    (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
345
#endif
346
#if (BUILD_DOUBLE==1)
347
  int    (*dger_k)  (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
348
349
  int    (*dsymv_L) (BLASLONG, BLASLONG, double,  double  *, BLASLONG, double  *, BLASLONG, double  *, BLASLONG, double *);
350
  int    (*dsymv_U) (BLASLONG, BLASLONG, double,  double  *, BLASLONG, double  *, BLASLONG, double  *, BLASLONG, double *);
351
#endif
352
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
353
  int    (*dgemm_kernel   )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
354
  int    (*dgemm_beta     )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double  *, BLASLONG);
355
356
  int    (*dgemm_incopy   )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
357
  int    (*dgemm_itcopy   )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
358
  int    (*dgemm_oncopy   )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
359
  int    (*dgemm_otcopy   )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
360
#endif
361
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
362
#ifdef SMALL_MATRIX_OPT
363
  int    (*dgemm_small_matrix_permit)(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, double alpha, double beta);
364
365
  int    (*dgemm_small_kernel_nn    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double beta, double * C, BLASLONG ldc);
366
  int    (*dgemm_small_kernel_nt    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double beta, double * C, BLASLONG ldc);
367
  int    (*dgemm_small_kernel_tn    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double beta, double * C, BLASLONG ldc);
368
  int    (*dgemm_small_kernel_tt    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double beta, double * C, BLASLONG ldc);
369
370
  int    (*dgemm_small_kernel_b0_nn )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
371
  int    (*dgemm_small_kernel_b0_nt )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
372
  int    (*dgemm_small_kernel_b0_tn )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
373
  int    (*dgemm_small_kernel_b0_tt )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
374
#endif
375
#endif
376
#if (BUILD_DOUBLE==1)
377
  int    (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
378
  int    (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
379
  int    (*dtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
380
  int    (*dtrsm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
381
382
  int    (*dtrsm_iunucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
383
  int    (*dtrsm_iunncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
384
  int    (*dtrsm_iutucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
385
  int    (*dtrsm_iutncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
386
  int    (*dtrsm_ilnucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
387
  int    (*dtrsm_ilnncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
388
  int    (*dtrsm_iltucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
389
  int    (*dtrsm_iltncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
390
  int    (*dtrsm_ounucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
391
  int    (*dtrsm_ounncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
392
  int    (*dtrsm_outucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
393
  int    (*dtrsm_outncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
394
  int    (*dtrsm_olnucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
395
  int    (*dtrsm_olnncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
396
  int    (*dtrsm_oltucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
397
  int    (*dtrsm_oltncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
398
399
  int    (*dtrmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
400
  int    (*dtrmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
401
  int    (*dtrmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
402
  int    (*dtrmm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
403
404
  int    (*dtrmm_iunucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
405
  int    (*dtrmm_iunncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
406
  int    (*dtrmm_iutucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
407
  int    (*dtrmm_iutncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
408
  int    (*dtrmm_ilnucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
409
  int    (*dtrmm_ilnncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
410
  int    (*dtrmm_iltucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
411
  int    (*dtrmm_iltncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
412
  int    (*dtrmm_ounucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
413
  int    (*dtrmm_ounncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
414
  int    (*dtrmm_outucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
415
  int    (*dtrmm_outncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
416
  int    (*dtrmm_olnucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
417
  int    (*dtrmm_olnncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
418
  int    (*dtrmm_oltucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
419
  int    (*dtrmm_oltncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
420
421
  int    (*dsymm_iutcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
422
  int    (*dsymm_iltcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
423
  int    (*dsymm_outcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
424
  int    (*dsymm_oltcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
425
426
  int  (*dneg_tcopy)   (BLASLONG, BLASLONG, double *, BLASLONG, double *);
427
  int    (*dlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, double *, BLASLONG, blasint *, double *);
428
#endif
429
#ifdef EXPRECISION
430
431
  int qgemm_p, qgemm_q, qgemm_r;
432
  int qgemm_unroll_m, qgemm_unroll_n, qgemm_unroll_mn;
433
434
 xdouble (*qamax_k) (BLASLONG, xdouble *, BLASLONG);
435
 xdouble (*qamin_k) (BLASLONG, xdouble *, BLASLONG);
436
 xdouble (*qmax_k)  (BLASLONG, xdouble *, BLASLONG);
437
 xdouble (*qmin_k)  (BLASLONG, xdouble *, BLASLONG);
438
BLASLONG (*iqamax_k)(BLASLONG, xdouble *, BLASLONG);
439
BLASLONG (*iqamin_k)(BLASLONG, xdouble *, BLASLONG);
440
BLASLONG (*iqmax_k) (BLASLONG, xdouble *, BLASLONG);
441
BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG);
442
443
 xdouble (*qnrm2_k) (BLASLONG, xdouble *, BLASLONG);
444
 xdouble (*qasum_k) (BLASLONG, xdouble *, BLASLONG);
445
 xdouble (*qsum_k)  (BLASLONG, xdouble *, BLASLONG);
446
  int    (*qcopy_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
447
 xdouble (*qdot_k)  (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
448
  int    (*qrot_k)  (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble);
449
  int    (*qrotm_k)  (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
450
451
  int    (*qaxpy_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
452
  int    (*qscal_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
453
  int    (*qswap_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
454
455
  int    (*qgemv_n) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
456
  int    (*qgemv_t) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
457
  int    (*qger_k)  (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
458
459
  int    (*qsymv_L) (BLASLONG, BLASLONG, xdouble,  xdouble  *, BLASLONG, xdouble  *, BLASLONG, xdouble  *, BLASLONG, xdouble *);
460
  int    (*qsymv_U) (BLASLONG, BLASLONG, xdouble,  xdouble  *, BLASLONG, xdouble  *, BLASLONG, xdouble  *, BLASLONG, xdouble *);
461
462
  int    (*qgemm_kernel   )(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
463
  int    (*qgemm_beta     )(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble  *, BLASLONG);
464
465
  int    (*qgemm_incopy   )(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
466
  int    (*qgemm_itcopy   )(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
467
  int    (*qgemm_oncopy   )(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
468
  int    (*qgemm_otcopy   )(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
469
470
  int    (*qtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
471
  int    (*qtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
472
  int    (*qtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
473
  int    (*qtrsm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
474
475
  int    (*qtrsm_iunucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
476
  int    (*qtrsm_iunncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
477
  int    (*qtrsm_iutucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
478
  int    (*qtrsm_iutncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
479
  int    (*qtrsm_ilnucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
480
  int    (*qtrsm_ilnncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
481
  int    (*qtrsm_iltucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
482
  int    (*qtrsm_iltncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
483
  int    (*qtrsm_ounucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
484
  int    (*qtrsm_ounncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
485
  int    (*qtrsm_outucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
486
  int    (*qtrsm_outncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
487
  int    (*qtrsm_olnucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
488
  int    (*qtrsm_olnncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
489
  int    (*qtrsm_oltucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
490
  int    (*qtrsm_oltncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
491
492
  int    (*qtrmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
493
  int    (*qtrmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
494
  int    (*qtrmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
495
  int    (*qtrmm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
496
497
  int    (*qtrmm_iunucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
498
  int    (*qtrmm_iunncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
499
  int    (*qtrmm_iutucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
500
  int    (*qtrmm_iutncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
501
  int    (*qtrmm_ilnucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
502
  int    (*qtrmm_ilnncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
503
  int    (*qtrmm_iltucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
504
  int    (*qtrmm_iltncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
505
  int    (*qtrmm_ounucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
506
  int    (*qtrmm_ounncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
507
  int    (*qtrmm_outucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
508
  int    (*qtrmm_outncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
509
  int    (*qtrmm_olnucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
510
  int    (*qtrmm_olnncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
511
  int    (*qtrmm_oltucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
512
  int    (*qtrmm_oltncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
513
514
  int    (*qsymm_iutcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
515
  int    (*qsymm_iltcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
516
  int    (*qsymm_outcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
517
  int    (*qsymm_oltcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
518
519
  int  (*qneg_tcopy)   (BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
520
  int    (*qlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, xdouble *, BLASLONG, blasint *, xdouble *);
521
522
#endif
523
524
#if (BUILD_COMPLEX==1) 
525
  int cgemm_p, cgemm_q, cgemm_r;
526
  int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn;
527
  
528
  float (*camax_k) (BLASLONG, float *, BLASLONG);
529
  float (*camin_k) (BLASLONG, float *, BLASLONG);
530
 
531
BLASLONG (*icamax_k)(BLASLONG, float *, BLASLONG);  
532
BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
533
534
  float (*cnrm2_k) (BLASLONG, float *, BLASLONG);
535
  float (*casum_k) (BLASLONG, float *, BLASLONG);
536
  float (*csum_k)  (BLASLONG, float *, BLASLONG);
537
538
  int    (*ccopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
539
  openblas_complex_float (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
540
  openblas_complex_float (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
541
542
  int    (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
543
  int    (*caxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
544
  int    (*caxpyc_k)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
545
  int    (*cscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
546
  int    (*cswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
547
548
  int    (*cgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
549
  int    (*cgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
550
  int    (*cgemv_r) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
551
  int    (*cgemv_c) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
552
  int    (*cgemv_o) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
553
  int    (*cgemv_u) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
554
  int    (*cgemv_s) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
555
  int    (*cgemv_d) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
556
  int    (*cgeru_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
557
  int    (*cgerc_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
558
  int    (*cgerv_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
559
  int    (*cgerd_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
560
561
  int    (*csymv_L) (BLASLONG, BLASLONG, float,  float, float  *, BLASLONG, float  *, BLASLONG, float  *, BLASLONG, float *);
562
  int    (*csymv_U) (BLASLONG, BLASLONG, float,  float, float  *, BLASLONG, float  *, BLASLONG, float  *, BLASLONG, float *);
563
  int    (*chemv_L) (BLASLONG, BLASLONG, float,  float, float  *, BLASLONG, float  *, BLASLONG, float  *, BLASLONG, float *);
564
  int    (*chemv_U) (BLASLONG, BLASLONG, float,  float, float  *, BLASLONG, float  *, BLASLONG, float  *, BLASLONG, float *);
565
  int    (*chemv_M) (BLASLONG, BLASLONG, float,  float, float  *, BLASLONG, float  *, BLASLONG, float  *, BLASLONG, float *);
566
  int    (*chemv_V) (BLASLONG, BLASLONG, float,  float, float  *, BLASLONG, float  *, BLASLONG, float  *, BLASLONG, float *);
567
568
  int    (*cgemm_kernel_n )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
569
  int    (*cgemm_kernel_l )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
570
  int    (*cgemm_kernel_r )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
571
  int    (*cgemm_kernel_b )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
572
  int    (*cgemm_beta     )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float  *, BLASLONG);
573
574
  int    (*cgemm_incopy   )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
575
  int    (*cgemm_itcopy   )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
576
  int    (*cgemm_oncopy   )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
577
  int    (*cgemm_otcopy   )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
578
579
#ifdef SMALL_MATRIX_OPT
580
  int    (*cgemm_small_matrix_permit)(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, float alpha0, float alpha1, float beta0, float beta1);
581
582
  int    (*cgemm_small_kernel_nn    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
583
  int    (*cgemm_small_kernel_nt    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
584
  int    (*cgemm_small_kernel_nr    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
585
  int    (*cgemm_small_kernel_nc    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
586
587
  int    (*cgemm_small_kernel_tn    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
588
  int    (*cgemm_small_kernel_tt    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
589
  int    (*cgemm_small_kernel_tr    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
590
  int    (*cgemm_small_kernel_tc    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
591
592
  int    (*cgemm_small_kernel_rn    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
593
  int    (*cgemm_small_kernel_rt    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
594
  int    (*cgemm_small_kernel_rr    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
595
  int    (*cgemm_small_kernel_rc    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
596
597
  int    (*cgemm_small_kernel_cn    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
598
  int    (*cgemm_small_kernel_ct    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
599
  int    (*cgemm_small_kernel_cr    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
600
  int    (*cgemm_small_kernel_cc    )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb, float beta0, float beta1, float * C, BLASLONG ldc);
601
602
  int    (*cgemm_small_kernel_b0_nn )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb,  float * C, BLASLONG ldc);
603
  int    (*cgemm_small_kernel_b0_nt )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb,  float * C, BLASLONG ldc);
604
  int    (*cgemm_small_kernel_b0_nr )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb,  float * C, BLASLONG ldc);
605
  int    (*cgemm_small_kernel_b0_nc )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb,  float * C, BLASLONG ldc);
606
607
  int    (*cgemm_small_kernel_b0_tn )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb,  float * C, BLASLONG ldc);
608
  int    (*cgemm_small_kernel_b0_tt )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb,  float * C, BLASLONG ldc);
609
  int    (*cgemm_small_kernel_b0_tr )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb,  float * C, BLASLONG ldc);
610
  int    (*cgemm_small_kernel_b0_tc )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb,  float * C, BLASLONG ldc);
611
612
  int    (*cgemm_small_kernel_b0_rn )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb,  float * C, BLASLONG ldc);
613
  int    (*cgemm_small_kernel_b0_rt )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb,  float * C, BLASLONG ldc);
614
  int    (*cgemm_small_kernel_b0_rr )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb,  float * C, BLASLONG ldc);
615
  int    (*cgemm_small_kernel_b0_rc )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb,  float * C, BLASLONG ldc);
616
617
  int    (*cgemm_small_kernel_b0_cn )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb,  float * C, BLASLONG ldc);
618
  int    (*cgemm_small_kernel_b0_ct )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb,  float * C, BLASLONG ldc);
619
  int    (*cgemm_small_kernel_b0_cr )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb,  float * C, BLASLONG ldc);
620
  int    (*cgemm_small_kernel_b0_cc )(BLASLONG m, BLASLONG n, BLASLONG k, float * A, BLASLONG lda, float alpha0, float alpha1, float * B, BLASLONG ldb,  float * C, BLASLONG ldc);
621
#endif
622
623
  int    (*ctrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
624
  int    (*ctrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
625
  int    (*ctrsm_kernel_LR)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
626
  int    (*ctrsm_kernel_LC)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
627
  int    (*ctrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
628
  int    (*ctrsm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
629
  int    (*ctrsm_kernel_RR)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
630
  int    (*ctrsm_kernel_RC)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
631
632
  int    (*ctrsm_iunucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
633
  int    (*ctrsm_iunncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
634
  int    (*ctrsm_iutucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
635
  int    (*ctrsm_iutncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
636
  int    (*ctrsm_ilnucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
637
  int    (*ctrsm_ilnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
638
  int    (*ctrsm_iltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
639
  int    (*ctrsm_iltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
640
  int    (*ctrsm_ounucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
641
  int    (*ctrsm_ounncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
642
  int    (*ctrsm_outucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
643
  int    (*ctrsm_outncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
644
  int    (*ctrsm_olnucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
645
  int    (*ctrsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
646
  int    (*ctrsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
647
  int    (*ctrsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
648
649
  int    (*ctrmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
650
  int    (*ctrmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
651
  int    (*ctrmm_kernel_RR)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
652
  int    (*ctrmm_kernel_RC)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
653
  int    (*ctrmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
654
  int    (*ctrmm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
655
  int    (*ctrmm_kernel_LR)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
656
  int    (*ctrmm_kernel_LC)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
657
658
  int    (*ctrmm_iunucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
659
  int    (*ctrmm_iunncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
660
  int    (*ctrmm_iutucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
661
  int    (*ctrmm_iutncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
662
  int    (*ctrmm_ilnucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
663
  int    (*ctrmm_ilnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
664
  int    (*ctrmm_iltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
665
  int    (*ctrmm_iltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
666
  int    (*ctrmm_ounucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
667
  int    (*ctrmm_ounncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
668
  int    (*ctrmm_outucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
669
  int    (*ctrmm_outncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
670
  int    (*ctrmm_olnucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
671
  int    (*ctrmm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
672
  int    (*ctrmm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
673
  int    (*ctrmm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
674
675
  int    (*csymm_iutcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
676
  int    (*csymm_iltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
677
  int    (*csymm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
678
  int    (*csymm_oltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
679
680
  int    (*chemm_iutcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
681
  int    (*chemm_iltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
682
  int    (*chemm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
683
  int    (*chemm_oltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
684
685
  int cgemm3m_p, cgemm3m_q, cgemm3m_r;
686
  int cgemm3m_unroll_m, cgemm3m_unroll_n, cgemm3m_unroll_mn;
687
688
  int    (*cgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
689
690
  int    (*cgemm3m_incopyb)(BLASLONG, BLASLONG, float *, BLASLONG, float *);
691
  int    (*cgemm3m_incopyr)(BLASLONG, BLASLONG, float *, BLASLONG, float *);
692
  int    (*cgemm3m_incopyi)(BLASLONG, BLASLONG, float *, BLASLONG, float *);
693
  int    (*cgemm3m_itcopyb)(BLASLONG, BLASLONG, float *, BLASLONG, float *);
694
  int    (*cgemm3m_itcopyr)(BLASLONG, BLASLONG, float *, BLASLONG, float *);
695
  int    (*cgemm3m_itcopyi)(BLASLONG, BLASLONG, float *, BLASLONG, float *);
696
697
  int    (*cgemm3m_oncopyb)(BLASLONG, BLASLONG, float *, BLASLONG, float, float, float *);
698
  int    (*cgemm3m_oncopyr)(BLASLONG, BLASLONG, float *, BLASLONG, float, float, float *);
699
  int    (*cgemm3m_oncopyi)(BLASLONG, BLASLONG, float *, BLASLONG, float, float, float *);
700
  int    (*cgemm3m_otcopyb)(BLASLONG, BLASLONG, float *, BLASLONG, float, float, float *);
701
  int    (*cgemm3m_otcopyr)(BLASLONG, BLASLONG, float *, BLASLONG, float, float, float *);
702
  int    (*cgemm3m_otcopyi)(BLASLONG, BLASLONG, float *, BLASLONG, float, float, float *);
703
704
  int    (*csymm3m_iucopyb)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
705
  int    (*csymm3m_ilcopyb)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
706
  int    (*csymm3m_iucopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
707
  int    (*csymm3m_ilcopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
708
  int    (*csymm3m_iucopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
709
  int    (*csymm3m_ilcopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
710
711
  int    (*csymm3m_oucopyb)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
712
  int    (*csymm3m_olcopyb)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
713
  int    (*csymm3m_oucopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
714
  int    (*csymm3m_olcopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
715
  int    (*csymm3m_oucopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
716
  int    (*csymm3m_olcopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
717
718
  int    (*chemm3m_iucopyb)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
719
  int    (*chemm3m_ilcopyb)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
720
  int    (*chemm3m_iucopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
721
  int    (*chemm3m_ilcopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
722
  int    (*chemm3m_iucopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
723
  int    (*chemm3m_ilcopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
724
725
  int    (*chemm3m_oucopyb)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
726
  int    (*chemm3m_olcopyb)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
727
  int    (*chemm3m_oucopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
728
  int    (*chemm3m_olcopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
729
  int    (*chemm3m_oucopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
730
  int    (*chemm3m_olcopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
731
732
  int  (*cneg_tcopy)   (BLASLONG, BLASLONG, float *, BLASLONG, float *);
733
  int    (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
734
#endif
735
736
#if (BUILD_COMPLEX16 == 1)
737
  int zgemm_p, zgemm_q, zgemm_r;
738
  int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn;
739
740
  double (*zamax_k) (BLASLONG, double *, BLASLONG);
741
  double (*zamin_k) (BLASLONG, double *, BLASLONG);
742
BLASLONG (*izamax_k)(BLASLONG, double *, BLASLONG);
743
BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);
744
745
  double (*znrm2_k) (BLASLONG, double *, BLASLONG);
746
  double (*zasum_k) (BLASLONG, double *, BLASLONG);
747
  double (*zsum_k)  (BLASLONG, double *, BLASLONG);
748
  int    (*zcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
749
  openblas_complex_double (*zdotu_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
750
  openblas_complex_double (*zdotc_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
751
  int    (*zdrot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
752
753
  int    (*zaxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
754
  int    (*zaxpyc_k)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
755
  int    (*zscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
756
  int    (*zswap_k) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
757
758
  int    (*zgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
759
  int    (*zgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
760
  int    (*zgemv_r) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
761
  int    (*zgemv_c) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
762
  int    (*zgemv_o) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
763
  int    (*zgemv_u) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
764
  int    (*zgemv_s) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
765
  int    (*zgemv_d) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
766
  int    (*zgeru_k) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
767
  int    (*zgerc_k) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
768
  int    (*zgerv_k) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
769
  int    (*zgerd_k) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
770
771
  int    (*zsymv_L) (BLASLONG, BLASLONG, double,  double, double  *, BLASLONG, double  *, BLASLONG, double  *, BLASLONG, double *);
772
  int    (*zsymv_U) (BLASLONG, BLASLONG, double,  double, double  *, BLASLONG, double  *, BLASLONG, double  *, BLASLONG, double *);
773
  int    (*zhemv_L) (BLASLONG, BLASLONG, double,  double, double  *, BLASLONG, double  *, BLASLONG, double  *, BLASLONG, double *);
774
  int    (*zhemv_U) (BLASLONG, BLASLONG, double,  double, double  *, BLASLONG, double  *, BLASLONG, double  *, BLASLONG, double *);
775
  int    (*zhemv_M) (BLASLONG, BLASLONG, double,  double, double  *, BLASLONG, double  *, BLASLONG, double  *, BLASLONG, double *);
776
  int    (*zhemv_V) (BLASLONG, BLASLONG, double,  double, double  *, BLASLONG, double  *, BLASLONG, double  *, BLASLONG, double *);
777
778
  int    (*zgemm_kernel_n )(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
779
  int    (*zgemm_kernel_l )(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
780
  int    (*zgemm_kernel_r )(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
781
  int    (*zgemm_kernel_b )(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
782
  int    (*zgemm_beta     )(BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double  *, BLASLONG);
783
784
  int    (*zgemm_incopy   )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
785
  int    (*zgemm_itcopy   )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
786
  int    (*zgemm_oncopy   )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
787
  int    (*zgemm_otcopy   )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
788
789
#ifdef SMALL_MATRIX_OPT
790
  int    (*zgemm_small_matrix_permit)(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, double alpha0, double alpha1, double beta0, double beta1);
791
792
  int    (*zgemm_small_kernel_nn    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
793
  int    (*zgemm_small_kernel_nt    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
794
  int    (*zgemm_small_kernel_nr    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
795
  int    (*zgemm_small_kernel_nc    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
796
797
  int    (*zgemm_small_kernel_tn    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
798
  int    (*zgemm_small_kernel_tt    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
799
  int    (*zgemm_small_kernel_tr    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
800
  int    (*zgemm_small_kernel_tc    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
801
802
  int    (*zgemm_small_kernel_rn    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
803
  int    (*zgemm_small_kernel_rt    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
804
  int    (*zgemm_small_kernel_rr    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
805
  int    (*zgemm_small_kernel_rc    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
806
807
  int    (*zgemm_small_kernel_cn    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
808
  int    (*zgemm_small_kernel_ct    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
809
  int    (*zgemm_small_kernel_cr    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
810
  int    (*zgemm_small_kernel_cc    )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb, double beta0, double beta1, double * C, BLASLONG ldc);
811
812
  int    (*zgemm_small_kernel_b0_nn )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb,  double * C, BLASLONG ldc);
813
  int    (*zgemm_small_kernel_b0_nt )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb,  double * C, BLASLONG ldc);
814
  int    (*zgemm_small_kernel_b0_nr )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb,  double * C, BLASLONG ldc);
815
  int    (*zgemm_small_kernel_b0_nc )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb,  double * C, BLASLONG ldc);
816
817
  int    (*zgemm_small_kernel_b0_tn )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb,  double * C, BLASLONG ldc);
818
  int    (*zgemm_small_kernel_b0_tt )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb,  double * C, BLASLONG ldc);
819
  int    (*zgemm_small_kernel_b0_tr )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb,  double * C, BLASLONG ldc);
820
  int    (*zgemm_small_kernel_b0_tc )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb,  double * C, BLASLONG ldc);
821
822
  int    (*zgemm_small_kernel_b0_rn )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb,  double * C, BLASLONG ldc);
823
  int    (*zgemm_small_kernel_b0_rt )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb,  double * C, BLASLONG ldc);
824
  int    (*zgemm_small_kernel_b0_rr )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb,  double * C, BLASLONG ldc);
825
  int    (*zgemm_small_kernel_b0_rc )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb,  double * C, BLASLONG ldc);
826
827
  int    (*zgemm_small_kernel_b0_cn )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb,  double * C, BLASLONG ldc);
828
  int    (*zgemm_small_kernel_b0_ct )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb,  double * C, BLASLONG ldc);
829
  int    (*zgemm_small_kernel_b0_cr )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb,  double * C, BLASLONG ldc);
830
  int    (*zgemm_small_kernel_b0_cc )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha0, double alpha1, double * B, BLASLONG ldb,  double * C, BLASLONG ldc);
831
#endif
832
833
  int    (*ztrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
834
  int    (*ztrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
835
  int    (*ztrsm_kernel_LR)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
836
  int    (*ztrsm_kernel_LC)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
837
  int    (*ztrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
838
  int    (*ztrsm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
839
  int    (*ztrsm_kernel_RR)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
840
  int    (*ztrsm_kernel_RC)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
841
842
  int    (*ztrsm_iunucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
843
  int    (*ztrsm_iunncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
844
  int    (*ztrsm_iutucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
845
  int    (*ztrsm_iutncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
846
  int    (*ztrsm_ilnucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
847
  int    (*ztrsm_ilnncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
848
  int    (*ztrsm_iltucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
849
  int    (*ztrsm_iltncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
850
  int    (*ztrsm_ounucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
851
  int    (*ztrsm_ounncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
852
  int    (*ztrsm_outucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
853
  int    (*ztrsm_outncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
854
  int    (*ztrsm_olnucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
855
  int    (*ztrsm_olnncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
856
  int    (*ztrsm_oltucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
857
  int    (*ztrsm_oltncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, double *);
858
859
  int    (*ztrmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
860
  int    (*ztrmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
861
  int    (*ztrmm_kernel_RR)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
862
  int    (*ztrmm_kernel_RC)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
863
  int    (*ztrmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
864
  int    (*ztrmm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
865
  int    (*ztrmm_kernel_LR)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
866
  int    (*ztrmm_kernel_LC)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG, BLASLONG);
867
868
  int    (*ztrmm_iunucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
869
  int    (*ztrmm_iunncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
870
  int    (*ztrmm_iutucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
871
  int    (*ztrmm_iutncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
872
  int    (*ztrmm_ilnucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
873
  int    (*ztrmm_ilnncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
874
  int    (*ztrmm_iltucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
875
  int    (*ztrmm_iltncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
876
  int    (*ztrmm_ounucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
877
  int    (*ztrmm_ounncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
878
  int    (*ztrmm_outucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
879
  int    (*ztrmm_outncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
880
  int    (*ztrmm_olnucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
881
  int    (*ztrmm_olnncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
882
  int    (*ztrmm_oltucopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
883
  int    (*ztrmm_oltncopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
884
885
  int    (*zsymm_iutcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
886
  int    (*zsymm_iltcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
887
  int    (*zsymm_outcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
888
  int    (*zsymm_oltcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
889
890
  int    (*zhemm_iutcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
891
  int    (*zhemm_iltcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
892
  int    (*zhemm_outcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
893
  int    (*zhemm_oltcopy)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
894
895
  int zgemm3m_p, zgemm3m_q, zgemm3m_r;
896
  int zgemm3m_unroll_m, zgemm3m_unroll_n, zgemm3m_unroll_mn;
897
898
  int    (*zgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
899
900
  int    (*zgemm3m_incopyb)(BLASLONG, BLASLONG, double *, BLASLONG, double *);
901
  int    (*zgemm3m_incopyr)(BLASLONG, BLASLONG, double *, BLASLONG, double *);
902
  int    (*zgemm3m_incopyi)(BLASLONG, BLASLONG, double *, BLASLONG, double *);
903
  int    (*zgemm3m_itcopyb)(BLASLONG, BLASLONG, double *, BLASLONG, double *);
904
  int    (*zgemm3m_itcopyr)(BLASLONG, BLASLONG, double *, BLASLONG, double *);
905
  int    (*zgemm3m_itcopyi)(BLASLONG, BLASLONG, double *, BLASLONG, double *);
906
907
  int    (*zgemm3m_oncopyb)(BLASLONG, BLASLONG, double *, BLASLONG, double, double, double *);
908
  int    (*zgemm3m_oncopyr)(BLASLONG, BLASLONG, double *, BLASLONG, double, double, double *);
909
  int    (*zgemm3m_oncopyi)(BLASLONG, BLASLONG, double *, BLASLONG, double, double, double *);
910
  int    (*zgemm3m_otcopyb)(BLASLONG, BLASLONG, double *, BLASLONG, double, double, double *);
911
  int    (*zgemm3m_otcopyr)(BLASLONG, BLASLONG, double *, BLASLONG, double, double, double *);
912
  int    (*zgemm3m_otcopyi)(BLASLONG, BLASLONG, double *, BLASLONG, double, double, double *);
913
914
  int    (*zsymm3m_iucopyb)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
915
  int    (*zsymm3m_ilcopyb)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
916
  int    (*zsymm3m_iucopyr)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
917
  int    (*zsymm3m_ilcopyr)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
918
  int    (*zsymm3m_iucopyi)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
919
  int    (*zsymm3m_ilcopyi)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
920
921
  int    (*zsymm3m_oucopyb)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
922
  int    (*zsymm3m_olcopyb)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
923
  int    (*zsymm3m_oucopyr)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
924
  int    (*zsymm3m_olcopyr)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
925
  int    (*zsymm3m_oucopyi)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
926
  int    (*zsymm3m_olcopyi)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
927
928
  int    (*zhemm3m_iucopyb)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
929
  int    (*zhemm3m_ilcopyb)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
930
  int    (*zhemm3m_iucopyr)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
931
  int    (*zhemm3m_ilcopyr)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
932
  int    (*zhemm3m_iucopyi)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
933
  int    (*zhemm3m_ilcopyi)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double *);
934
935
  int    (*zhemm3m_oucopyb)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
936
  int    (*zhemm3m_olcopyb)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
937
  int    (*zhemm3m_oucopyr)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
938
  int    (*zhemm3m_olcopyr)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
939
  int    (*zhemm3m_oucopyi)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
940
  int    (*zhemm3m_olcopyi)(BLASLONG, BLASLONG, double *, BLASLONG, BLASLONG, BLASLONG, double, double, double *);
941
942
  int  (*zneg_tcopy)   (BLASLONG, BLASLONG, double *, BLASLONG, double *);
943
  int    (*zlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, double *, BLASLONG, blasint *, double *);
944
#endif
945
946
#ifdef EXPRECISION
947
948
  int xgemm_p, xgemm_q, xgemm_r;
949
  int xgemm_unroll_m, xgemm_unroll_n, xgemm_unroll_mn;
950
951
  xdouble (*xamax_k) (BLASLONG, xdouble *, BLASLONG);
952
  xdouble (*xamin_k) (BLASLONG, xdouble *, BLASLONG);
953
BLASLONG (*ixamax_k)(BLASLONG, xdouble *, BLASLONG);
954
BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
955
956
  xdouble (*xnrm2_k) (BLASLONG, xdouble *, BLASLONG);
957
  xdouble (*xasum_k) (BLASLONG, xdouble *, BLASLONG);
958
  xdouble (*xsum_k) (BLASLONG, xdouble *, BLASLONG);
959
  int    (*xcopy_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
960
  openblas_complex_xdouble (*xdotu_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
961
  openblas_complex_xdouble (*xdotc_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
962
  int    (*xqrot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble);
963
964
  int    (*xaxpy_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
965
  int    (*xaxpyc_k)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
966
  int    (*xscal_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
967
  int    (*xswap_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
968
969
  int    (*xgemv_n) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
970
  int    (*xgemv_t) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
971
  int    (*xgemv_r) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
972
  int    (*xgemv_c) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
973
  int    (*xgemv_o) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
974
  int    (*xgemv_u) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
975
  int    (*xgemv_s) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
976
  int    (*xgemv_d) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
977
  int    (*xgeru_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
978
  int    (*xgerc_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
979
  int    (*xgerv_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
980
  int    (*xgerd_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
981
982
  int    (*xsymv_L) (BLASLONG, BLASLONG, xdouble,  xdouble, xdouble  *, BLASLONG, xdouble  *, BLASLONG, xdouble  *, BLASLONG, xdouble *);
983
  int    (*xsymv_U) (BLASLONG, BLASLONG, xdouble,  xdouble, xdouble  *, BLASLONG, xdouble  *, BLASLONG, xdouble  *, BLASLONG, xdouble *);
984
  int    (*xhemv_L) (BLASLONG, BLASLONG, xdouble,  xdouble, xdouble  *, BLASLONG, xdouble  *, BLASLONG, xdouble  *, BLASLONG, xdouble *);
985
  int    (*xhemv_U) (BLASLONG, BLASLONG, xdouble,  xdouble, xdouble  *, BLASLONG, xdouble  *, BLASLONG, xdouble  *, BLASLONG, xdouble *);
986
  int    (*xhemv_M) (BLASLONG, BLASLONG, xdouble,  xdouble, xdouble  *, BLASLONG, xdouble  *, BLASLONG, xdouble  *, BLASLONG, xdouble *);
987
  int    (*xhemv_V) (BLASLONG, BLASLONG, xdouble,  xdouble, xdouble  *, BLASLONG, xdouble  *, BLASLONG, xdouble  *, BLASLONG, xdouble *);
988
989
  int    (*xgemm_kernel_n )(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
990
  int    (*xgemm_kernel_l )(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
991
  int    (*xgemm_kernel_r )(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
992
  int    (*xgemm_kernel_b )(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
993
  int    (*xgemm_beta     )(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble  *, BLASLONG);
994
995
  int    (*xgemm_incopy   )(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
996
  int    (*xgemm_itcopy   )(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
997
  int    (*xgemm_oncopy   )(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
998
  int    (*xgemm_otcopy   )(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
999
1000
  int    (*xtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
1001
  int    (*xtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
1002
  int    (*xtrsm_kernel_LR)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
1003
  int    (*xtrsm_kernel_LC)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
1004
  int    (*xtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
1005
  int    (*xtrsm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
1006
  int    (*xtrsm_kernel_RR)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
1007
  int    (*xtrsm_kernel_RC)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
1008
1009
  int    (*xtrsm_iunucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
1010
  int    (*xtrsm_iunncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
1011
  int    (*xtrsm_iutucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
1012
  int    (*xtrsm_iutncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
1013
  int    (*xtrsm_ilnucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
1014
  int    (*xtrsm_ilnncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
1015
  int    (*xtrsm_iltucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
1016
  int    (*xtrsm_iltncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
1017
  int    (*xtrsm_ounucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
1018
  int    (*xtrsm_ounncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
1019
  int    (*xtrsm_outucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
1020
  int    (*xtrsm_outncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
1021
  int    (*xtrsm_olnucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
1022
  int    (*xtrsm_olnncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
1023
  int    (*xtrsm_oltucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
1024
  int    (*xtrsm_oltncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, xdouble *);
1025
1026
  int    (*xtrmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
1027
  int    (*xtrmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
1028
  int    (*xtrmm_kernel_RR)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
1029
  int    (*xtrmm_kernel_RC)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
1030
  int    (*xtrmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
1031
  int    (*xtrmm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
1032
  int    (*xtrmm_kernel_LR)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
1033
  int    (*xtrmm_kernel_LC)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG, BLASLONG);
1034
1035
  int    (*xtrmm_iunucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1036
  int    (*xtrmm_iunncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1037
  int    (*xtrmm_iutucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1038
  int    (*xtrmm_iutncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1039
  int    (*xtrmm_ilnucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1040
  int    (*xtrmm_ilnncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1041
  int    (*xtrmm_iltucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1042
  int    (*xtrmm_iltncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1043
  int    (*xtrmm_ounucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1044
  int    (*xtrmm_ounncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1045
  int    (*xtrmm_outucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1046
  int    (*xtrmm_outncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1047
  int    (*xtrmm_olnucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1048
  int    (*xtrmm_olnncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1049
  int    (*xtrmm_oltucopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1050
  int    (*xtrmm_oltncopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1051
1052
  int    (*xsymm_iutcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1053
  int    (*xsymm_iltcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1054
  int    (*xsymm_outcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1055
  int    (*xsymm_oltcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1056
1057
  int    (*xhemm_iutcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1058
  int    (*xhemm_iltcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1059
  int    (*xhemm_outcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1060
  int    (*xhemm_oltcopy)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1061
1062
  int xgemm3m_p, xgemm3m_q, xgemm3m_r;
1063
  int xgemm3m_unroll_m, xgemm3m_unroll_n, xgemm3m_unroll_mn;
1064
1065
  int    (*xgemm3m_kernel)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
1066
1067
  int    (*xgemm3m_incopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
1068
  int    (*xgemm3m_incopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
1069
  int    (*xgemm3m_incopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
1070
  int    (*xgemm3m_itcopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
1071
  int    (*xgemm3m_itcopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
1072
  int    (*xgemm3m_itcopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
1073
1074
  int    (*xgemm3m_oncopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble, xdouble *);
1075
  int    (*xgemm3m_oncopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble, xdouble *);
1076
  int    (*xgemm3m_oncopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble, xdouble *);
1077
  int    (*xgemm3m_otcopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble, xdouble *);
1078
  int    (*xgemm3m_otcopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble, xdouble *);
1079
  int    (*xgemm3m_otcopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble, xdouble *);
1080
1081
  int    (*xsymm3m_iucopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1082
  int    (*xsymm3m_ilcopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1083
  int    (*xsymm3m_iucopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1084
  int    (*xsymm3m_ilcopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1085
  int    (*xsymm3m_iucopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1086
  int    (*xsymm3m_ilcopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1087
1088
  int    (*xsymm3m_oucopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
1089
  int    (*xsymm3m_olcopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
1090
  int    (*xsymm3m_oucopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
1091
  int    (*xsymm3m_olcopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
1092
  int    (*xsymm3m_oucopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
1093
  int    (*xsymm3m_olcopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
1094
1095
  int    (*xhemm3m_iucopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1096
  int    (*xhemm3m_ilcopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1097
  int    (*xhemm3m_iucopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1098
  int    (*xhemm3m_ilcopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1099
  int    (*xhemm3m_iucopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1100
  int    (*xhemm3m_ilcopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble *);
1101
1102
  int    (*xhemm3m_oucopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
1103
  int    (*xhemm3m_olcopyb)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
1104
  int    (*xhemm3m_oucopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
1105
  int    (*xhemm3m_olcopyr)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
1106
  int    (*xhemm3m_oucopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
1107
  int    (*xhemm3m_olcopyi)(BLASLONG, BLASLONG, xdouble *, BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *);
1108
1109
  int  (*xneg_tcopy)   (BLASLONG, BLASLONG, xdouble *, BLASLONG, xdouble *);
1110
  int    (*xlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, xdouble *, BLASLONG, blasint *, xdouble *);
1111
1112
#endif
1113
1114
1115
  void (*init)(void);
1116
1117
  int snum_opt, dnum_opt, qnum_opt;
1118
#if (BUILD_SINGLE==1)
1119
  int    (*saxpby_k)     (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG);
1120
#endif
1121
#if (BUILD_DOUBLE==1)
1122
  int    (*daxpby_k)     (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG);
1123
#endif
1124
#if (BUILD_COMPLEX==1)
1125
  int    (*caxpby_k)     (BLASLONG, float, float,  float*, BLASLONG,float,float, float*, BLASLONG);
1126
#endif
1127
#if (BUILD_COMPLEX16==1)
1128
  int    (*zaxpby_k)     (BLASLONG, double, double,  double*, BLASLONG,double,double, double*, BLASLONG);
1129
#endif
1130
1131
#if (BUILD_SINGLE==1)
1132
  int    (*somatcopy_k_cn)  (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
1133
  int    (*somatcopy_k_ct)  (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
1134
  int    (*somatcopy_k_rn)  (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
1135
  int    (*somatcopy_k_rt)  (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
1136
#endif
1137
1138
#if (BUILD_DOUBLE==1) 
1139
  int    (*domatcopy_k_cn)  (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
1140
  int    (*domatcopy_k_ct)  (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
1141
  int    (*domatcopy_k_rn)  (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
1142
  int    (*domatcopy_k_rt)  (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
1143
#endif
1144
1145
#if (BUILD_COMPLEX==1)
1146
  int    (*comatcopy_k_cn)  (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
1147
  int    (*comatcopy_k_ct)  (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
1148
  int    (*comatcopy_k_rn)  (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
1149
  int    (*comatcopy_k_rt)  (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
1150
1151
  int    (*comatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
1152
  int    (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
1153
  int    (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
1154
  int    (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
1155
#endif
1156
1157
#if (BUILD_COMPLEX16==1)
1158
  int    (*zomatcopy_k_cn)  (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
1159
  int    (*zomatcopy_k_ct)  (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
1160
  int    (*zomatcopy_k_rn)  (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
1161
  int    (*zomatcopy_k_rt)  (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
1162
1163
  int    (*zomatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
1164
  int    (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
1165
  int    (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
1166
  int    (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
1167
#endif
1168
1169
#if (BUILD_SINGLE==1)
1170
  int    (*simatcopy_k_cn)  (BLASLONG, BLASLONG, float, float*, BLASLONG);
1171
  int    (*simatcopy_k_ct)  (BLASLONG, BLASLONG, float, float*, BLASLONG);
1172
  int    (*simatcopy_k_rn)  (BLASLONG, BLASLONG, float, float*, BLASLONG);
1173
  int    (*simatcopy_k_rt)  (BLASLONG, BLASLONG, float, float*, BLASLONG);
1174
#endif
1175
1176
#if (BUILD_DOUBLE==1)
1177
  int    (*dimatcopy_k_cn)  (BLASLONG, BLASLONG, double, double*, BLASLONG);
1178
  int    (*dimatcopy_k_ct)  (BLASLONG, BLASLONG, double, double*, BLASLONG);
1179
  int    (*dimatcopy_k_rn)  (BLASLONG, BLASLONG, double, double*, BLASLONG);
1180
  int    (*dimatcopy_k_rt)  (BLASLONG, BLASLONG, double, double*, BLASLONG);
1181
#endif
1182
1183
#if (BUILD_COMPLEX==1)
1184
  int    (*cimatcopy_k_cn)  (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
1185
  int    (*cimatcopy_k_ct)  (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
1186
  int    (*cimatcopy_k_rn)  (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
1187
  int    (*cimatcopy_k_rt)  (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
1188
1189
  int    (*cimatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
1190
  int    (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
1191
  int    (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
1192
  int    (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
1193
#endif
1194
1195
#if (BUILD_COMPLEX16==1)
1196
  int    (*zimatcopy_k_cn)  (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
1197
  int    (*zimatcopy_k_ct)  (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
1198
  int    (*zimatcopy_k_rn)  (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
1199
  int    (*zimatcopy_k_rt)  (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
1200
1201
  int    (*zimatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
1202
  int    (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
1203
  int    (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
1204
  int    (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
1205
#endif
1206
1207
#if (BUILD_SINGLE==1)
1208
  int    (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); 
1209
#endif
1210
#if (BUILD_DOUBLE==1)
1211
  int    (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG); 
1212
#endif
1213
#if (BUILD_COMPLEX==1)
1214
  int    (*cgeadd_k) (BLASLONG, BLASLONG, float, float,  float *,  BLASLONG, float, float, float *, BLASLONG); 
1215
#endif
1216
#if (BUILD_COMPLEX16==1)
1217
  int    (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG); 
1218
#endif
1219
} gotoblas_t;
1220
1221
extern gotoblas_t *gotoblas;
1222
1223
#define FUNC_OFFSET(func) (size_t)(&((gotoblas_t *)NULL)->func)
1224
1225
#define DTB_ENTRIES  gotoblas -> dtb_entries
1226
#define GEMM_OFFSET_A gotoblas -> offsetA
1227
#define GEMM_OFFSET_B gotoblas -> offsetB
1228
#define GEMM_ALIGN  gotoblas -> align
1229
1230
#define HAVE_EX_L2  gotoblas -> exclusive_cache
1231
1232
#if (BUILD_BFLOAT16==1)
1233
#define SBGEMM_P    gotoblas -> sbgemm_p
1234
#define SBGEMM_Q    gotoblas -> sbgemm_q
1235
#define SBGEMM_R    gotoblas -> sbgemm_r
1236
#define SBGEMM_UNROLL_M gotoblas -> sbgemm_unroll_m
1237
#define SBGEMM_UNROLL_N gotoblas -> sbgemm_unroll_n
1238
#define SBGEMM_UNROLL_MN  gotoblas -> sbgemm_unroll_mn
1239
#endif
1240
1241
#if (BUILD_SINGLE==1)
1242
#define SGEMM_P   gotoblas -> sgemm_p
1243
#define SGEMM_Q   gotoblas -> sgemm_q
1244
#define SGEMM_R   gotoblas -> sgemm_r
1245
#define SGEMM_UNROLL_M  gotoblas -> sgemm_unroll_m
1246
#define SGEMM_UNROLL_N  gotoblas -> sgemm_unroll_n
1247
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
1248
#endif
1249
1250
#if (BUILD_DOUBLE==1)
1251
#define DGEMM_P   gotoblas -> dgemm_p
1252
#define DGEMM_Q   gotoblas -> dgemm_q
1253
#define DGEMM_R   gotoblas -> dgemm_r
1254
#define DGEMM_UNROLL_M  gotoblas -> dgemm_unroll_m
1255
#define DGEMM_UNROLL_N  gotoblas -> dgemm_unroll_n
1256
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
1257
#if (BUILD_SINGLE != 1)
1258
#define SGEMM_P   gotoblas -> sgemm_p
1259
#define SGEMM_Q   gotoblas -> sgemm_q
1260
#define SGEMM_R   1024
1261
#define SGEMM_UNROLL_M  gotoblas -> sgemm_unroll_m
1262
#define SGEMM_UNROLL_N  gotoblas -> sgemm_unroll_n
1263
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
1264
#endif
1265
#endif
1266
1267
#define QGEMM_P   gotoblas -> qgemm_p
1268
#define QGEMM_Q   gotoblas -> qgemm_q
1269
#define QGEMM_R   gotoblas -> qgemm_r
1270
#define QGEMM_UNROLL_M  gotoblas -> qgemm_unroll_m
1271
#define QGEMM_UNROLL_N  gotoblas -> qgemm_unroll_n
1272
#define QGEMM_UNROLL_MN gotoblas -> qgemm_unroll_mn
1273
1274
#if (BUILD_COMPLEX==1)
1275
#define CGEMM_P   gotoblas -> cgemm_p
1276
#define CGEMM_Q   gotoblas -> cgemm_q
1277
#define CGEMM_R   gotoblas -> cgemm_r
1278
#define CGEMM_UNROLL_M  gotoblas -> cgemm_unroll_m
1279
#define CGEMM_UNROLL_N  gotoblas -> cgemm_unroll_n
1280
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
1281
#if (BUILD_SINGLE != 1)
1282
#define SGEMM_P   gotoblas -> sgemm_p
1283
#define SGEMM_Q   gotoblas -> sgemm_q
1284
#define SGEMM_R   1024
1285
#define SGEMM_UNROLL_M  gotoblas -> sgemm_unroll_m
1286
#define SGEMM_UNROLL_N  gotoblas -> sgemm_unroll_n
1287
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
1288
#endif
1289
#endif
1290
1291
#if (BUILD_COMPLEX16==1)
1292
#define ZGEMM_P   gotoblas -> zgemm_p
1293
#define ZGEMM_Q   gotoblas -> zgemm_q
1294
#define ZGEMM_R   gotoblas -> zgemm_r
1295
#define ZGEMM_UNROLL_M  gotoblas -> zgemm_unroll_m
1296
#define ZGEMM_UNROLL_N  gotoblas -> zgemm_unroll_n
1297
#define ZGEMM_UNROLL_MN gotoblas -> zgemm_unroll_mn
1298
#if (BUILD_DOUBLE != 1)
1299
#define DGEMM_P   gotoblas -> dgemm_p
1300
#define DGEMM_Q   gotoblas -> dgemm_q
1301
#define DGEMM_R   1024
1302
#define DGEMM_UNROLL_M  gotoblas -> dgemm_unroll_m
1303
#define DGEMM_UNROLL_N  gotoblas -> dgemm_unroll_n
1304
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
1305
#endif
1306
#if (BUILD_COMPLEX != 1)
1307
#define CGEMM_P   gotoblas -> cgemm_p
1308
#define CGEMM_Q   gotoblas -> cgemm_q
1309
#define CGEMM_R   gotoblas -> cgemm_r
1310
#define CGEMM_UNROLL_M  gotoblas -> cgemm_unroll_m
1311
#define CGEMM_UNROLL_N  gotoblas -> cgemm_unroll_n
1312
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
1313
#endif
1314
#endif
1315
1316
#define XGEMM_P   gotoblas -> xgemm_p
1317
#define XGEMM_Q   gotoblas -> xgemm_q
1318
#define XGEMM_R   gotoblas -> xgemm_r
1319
#define XGEMM_UNROLL_M  gotoblas -> xgemm_unroll_m
1320
#define XGEMM_UNROLL_N  gotoblas -> xgemm_unroll_n
1321
#define XGEMM_UNROLL_MN gotoblas -> xgemm_unroll_mn
1322
1323
#define CGEMM3M_P   gotoblas -> cgemm3m_p
1324
#define CGEMM3M_Q   gotoblas -> cgemm3m_q
1325
#define CGEMM3M_R   gotoblas -> cgemm3m_r
1326
#define CGEMM3M_UNROLL_M  gotoblas -> cgemm3m_unroll_m
1327
#define CGEMM3M_UNROLL_N  gotoblas -> cgemm3m_unroll_n
1328
#define CGEMM3M_UNROLL_MN gotoblas -> cgemm3m_unroll_mn
1329
1330
#define ZGEMM3M_P   gotoblas -> zgemm3m_p
1331
#define ZGEMM3M_Q   gotoblas -> zgemm3m_q
1332
#define ZGEMM3M_R   gotoblas -> zgemm3m_r
1333
#define ZGEMM3M_UNROLL_M  gotoblas -> zgemm3m_unroll_m
1334
#define ZGEMM3M_UNROLL_N  gotoblas -> zgemm3m_unroll_n
1335
#define ZGEMM3M_UNROLL_MN gotoblas -> zgemm3m_unroll_mn
1336
1337
#define XGEMM3M_P   gotoblas -> xgemm3m_p
1338
#define XGEMM3M_Q   gotoblas -> xgemm3m_q
1339
#define XGEMM3M_R   gotoblas -> xgemm3m_r
1340
#define XGEMM3M_UNROLL_M  gotoblas -> xgemm3m_unroll_m
1341
#define XGEMM3M_UNROLL_N  gotoblas -> xgemm3m_unroll_n
1342
#define XGEMM3M_UNROLL_MN gotoblas -> xgemm3m_unroll_mn
1343
1344
#else
1345
1346
#define FUNC_OFFSET(func) (size_t)(func)
1347
1348
0
#define DTB_ENTRIES  DTB_DEFAULT_ENTRIES
1349
1350
36
#define GEMM_OFFSET_A GEMM_DEFAULT_OFFSET_A
1351
0
#define GEMM_OFFSET_B GEMM_DEFAULT_OFFSET_B
1352
72
#define GEMM_ALIGN  GEMM_DEFAULT_ALIGN
1353
1354
#ifdef HAVE_EXCLUSIVE_CACHE
1355
#define HAVE_EX_L2  1
1356
#else
1357
0
#define HAVE_EX_L2  0
1358
#endif
1359
1360
#if (BUILD_BFLOAT16 == 1)
1361
#define SBGEMM_P    SBGEMM_DEFAULT_P
1362
#define SBGEMM_Q    SBGEMM_DEFAULT_Q
1363
#define SBGEMM_R    SBGEMM_DEFAULT_R
1364
#define SBGEMM_UNROLL_M SBGEMM_DEFAULT_UNROLL_M
1365
#define SBGEMM_UNROLL_N SBGEMM_DEFAULT_UNROLL_N
1366
#ifdef  SBGEMM_DEFAULT_UNROLL_MN
1367
#define SBGEMM_UNROLL_MN  SBGEMM_DEFAULT_UNROLL_MN
1368
#else
1369
#define SBGEMM_UNROLL_MN  MAX((SBGEMM_UNROLL_M), (SBGEMM_UNROLL_N))
1370
#endif
1371
#endif
1372
1373
9
#define SGEMM_P   SGEMM_DEFAULT_P
1374
18
#define SGEMM_Q   SGEMM_DEFAULT_Q
1375
0
#define SGEMM_R   SGEMM_DEFAULT_R
1376
27
#define SGEMM_UNROLL_M  SGEMM_DEFAULT_UNROLL_M
1377
0
#define SGEMM_UNROLL_N  SGEMM_DEFAULT_UNROLL_N
1378
#ifdef  SGEMM_DEFAULT_UNROLL_MN
1379
#define SGEMM_UNROLL_MN SGEMM_DEFAULT_UNROLL_MN
1380
#else
1381
0
#define SGEMM_UNROLL_MN MAX((SGEMM_UNROLL_M), (SGEMM_UNROLL_N))
1382
#endif
1383
1384
9
#define DGEMM_P   DGEMM_DEFAULT_P
1385
18
#define DGEMM_Q   DGEMM_DEFAULT_Q
1386
0
#define DGEMM_R   DGEMM_DEFAULT_R
1387
27
#define DGEMM_UNROLL_M  DGEMM_DEFAULT_UNROLL_M
1388
0
#define DGEMM_UNROLL_N  DGEMM_DEFAULT_UNROLL_N
1389
#ifdef  DGEMM_DEFAULT_UNROLL_MN
1390
#define DGEMM_UNROLL_MN DGEMM_DEFAULT_UNROLL_MN
1391
#else
1392
0
#define DGEMM_UNROLL_MN MAX((DGEMM_UNROLL_M), (DGEMM_UNROLL_N))
1393
#endif
1394
1395
#define QGEMM_P   QGEMM_DEFAULT_P
1396
#define QGEMM_Q   QGEMM_DEFAULT_Q
1397
#define QGEMM_R   QGEMM_DEFAULT_R
1398
#define QGEMM_UNROLL_M  QGEMM_DEFAULT_UNROLL_M
1399
#define QGEMM_UNROLL_N  QGEMM_DEFAULT_UNROLL_N
1400
#define QGEMM_UNROLL_MN MAX((QGEMM_UNROLL_M), (QGEMM_UNROLL_N))
1401
1402
9
#define CGEMM_P   CGEMM_DEFAULT_P
1403
18
#define CGEMM_Q   CGEMM_DEFAULT_Q
1404
#define CGEMM_R   CGEMM_DEFAULT_R
1405
27
#define CGEMM_UNROLL_M  CGEMM_DEFAULT_UNROLL_M
1406
#define CGEMM_UNROLL_N  CGEMM_DEFAULT_UNROLL_N
1407
#ifdef  CGEMM_DEFAULT_UNROLL_MN
1408
#define CGEMM_UNROLL_MN CGEMM_DEFAULT_UNROLL_MN
1409
#else
1410
0
#define CGEMM_UNROLL_MN MAX((CGEMM_UNROLL_M), (CGEMM_UNROLL_N))
1411
#endif
1412
1413
9
#define ZGEMM_P   ZGEMM_DEFAULT_P
1414
18
#define ZGEMM_Q   ZGEMM_DEFAULT_Q
1415
#define ZGEMM_R   ZGEMM_DEFAULT_R
1416
27
#define ZGEMM_UNROLL_M  ZGEMM_DEFAULT_UNROLL_M
1417
#define ZGEMM_UNROLL_N  ZGEMM_DEFAULT_UNROLL_N
1418
#ifdef  ZGEMM_DEFAULT_UNROLL_MN
1419
#define ZGEMM_UNROLL_MN ZGEMM_DEFAULT_UNROLL_MN
1420
#else
1421
0
#define ZGEMM_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N))
1422
#endif
1423
1424
#define XGEMM_P   XGEMM_DEFAULT_P
1425
#define XGEMM_Q   XGEMM_DEFAULT_Q
1426
#define XGEMM_R   XGEMM_DEFAULT_R
1427
#define XGEMM_UNROLL_M  XGEMM_DEFAULT_UNROLL_M
1428
#define XGEMM_UNROLL_N  XGEMM_DEFAULT_UNROLL_N
1429
#define XGEMM_UNROLL_MN MAX((XGEMM_UNROLL_M), (XGEMM_UNROLL_N))
1430
1431
#ifdef CGEMM3M_DEFAULT_UNROLL_N
1432
1433
#define CGEMM3M_P   CGEMM3M_DEFAULT_P
1434
#define CGEMM3M_Q   CGEMM3M_DEFAULT_Q
1435
#define CGEMM3M_R   CGEMM3M_DEFAULT_R
1436
#define CGEMM3M_UNROLL_M  CGEMM3M_DEFAULT_UNROLL_M
1437
#define CGEMM3M_UNROLL_N  CGEMM3M_DEFAULT_UNROLL_N
1438
#define CGEMM3M_UNROLL_MN MAX((CGEMM3M_UNROLL_M), (CGEMM3M_UNROLL_N))
1439
1440
#else
1441
1442
#define CGEMM3M_P   SGEMM_DEFAULT_P
1443
#define CGEMM3M_Q   SGEMM_DEFAULT_Q
1444
#define CGEMM3M_R   SGEMM_DEFAULT_R
1445
#define CGEMM3M_UNROLL_M  SGEMM_DEFAULT_UNROLL_M
1446
#define CGEMM3M_UNROLL_N  SGEMM_DEFAULT_UNROLL_N
1447
#define CGEMM3M_UNROLL_MN MAX((CGEMM_UNROLL_M), (CGEMM_UNROLL_N))
1448
1449
#endif
1450
1451
1452
#ifdef ZGEMM3M_DEFAULT_UNROLL_N
1453
1454
#define ZGEMM3M_P   ZGEMM3M_DEFAULT_P
1455
#define ZGEMM3M_Q   ZGEMM3M_DEFAULT_Q
1456
#define ZGEMM3M_R   ZGEMM3M_DEFAULT_R
1457
#define ZGEMM3M_UNROLL_M  ZGEMM3M_DEFAULT_UNROLL_M
1458
#define ZGEMM3M_UNROLL_N  ZGEMM3M_DEFAULT_UNROLL_N
1459
#define ZGEMM3M_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N))
1460
1461
#else
1462
1463
#define ZGEMM3M_P   DGEMM_DEFAULT_P
1464
#define ZGEMM3M_Q   DGEMM_DEFAULT_Q
1465
#define ZGEMM3M_R   DGEMM_DEFAULT_R
1466
#define ZGEMM3M_UNROLL_M  DGEMM_DEFAULT_UNROLL_M
1467
#define ZGEMM3M_UNROLL_N  DGEMM_DEFAULT_UNROLL_N
1468
#define ZGEMM3M_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N))
1469
1470
#endif
1471
1472
#define XGEMM3M_P   QGEMM_DEFAULT_P
1473
#define XGEMM3M_Q   QGEMM_DEFAULT_Q
1474
#define XGEMM3M_R   QGEMM_DEFAULT_R
1475
#define XGEMM3M_UNROLL_M  QGEMM_DEFAULT_UNROLL_M
1476
#define XGEMM3M_UNROLL_N  QGEMM_DEFAULT_UNROLL_N
1477
#define XGEMM3M_UNROLL_MN MAX((QGEMM_UNROLL_M), (QGEMM_UNROLL_N))
1478
1479
1480
#endif
1481
#endif
1482
1483
#ifndef COMPLEX
1484
#if   defined(XDOUBLE)
1485
#define GEMM_P      QGEMM_P
1486
#define GEMM_Q      QGEMM_Q
1487
#define GEMM_R      QGEMM_R
1488
#define GEMM_UNROLL_M   QGEMM_UNROLL_M
1489
#define GEMM_UNROLL_N   QGEMM_UNROLL_N
1490
#define GEMM_UNROLL_MN    QGEMM_UNROLL_MN
1491
#define GEMM_DEFAULT_P    QGEMM_DEFAULT_P
1492
#define GEMM_DEFAULT_Q    QGEMM_DEFAULT_Q
1493
#define GEMM_DEFAULT_R    QGEMM_DEFAULT_R
1494
#define GEMM_DEFAULT_UNROLL_M QGEMM_DEFAULT_UNROLL_M
1495
#define GEMM_DEFAULT_UNROLL_N QGEMM_DEFAULT_UNROLL_N
1496
#elif defined(DOUBLE)
1497
0
#define GEMM_P      DGEMM_P
1498
0
#define GEMM_Q      DGEMM_Q
1499
0
#define GEMM_R      DGEMM_R
1500
0
#define GEMM_UNROLL_M   DGEMM_UNROLL_M
1501
0
#define GEMM_UNROLL_N   DGEMM_UNROLL_N
1502
0
#define GEMM_UNROLL_MN    DGEMM_UNROLL_MN
1503
#define GEMM_DEFAULT_P    DGEMM_DEFAULT_P
1504
#define GEMM_DEFAULT_Q    DGEMM_DEFAULT_Q
1505
#define GEMM_DEFAULT_R    DGEMM_DEFAULT_R
1506
#define GEMM_DEFAULT_UNROLL_M DGEMM_DEFAULT_UNROLL_M
1507
#define GEMM_DEFAULT_UNROLL_N DGEMM_DEFAULT_UNROLL_N
1508
#elif defined(BFLOAT16)
1509
#define GEMM_P      SBGEMM_P
1510
#define GEMM_Q      SBGEMM_Q
1511
#define GEMM_R      SBGEMM_R
1512
#define GEMM_UNROLL_M   SBGEMM_UNROLL_M
1513
#define GEMM_UNROLL_N   SBGEMM_UNROLL_N
1514
#define GEMM_UNROLL_MN    SBGEMM_UNROLL_MN
1515
#define GEMM_DEFAULT_P    SBGEMM_DEFAULT_P
1516
#define GEMM_DEFAULT_Q    SBGEMM_DEFAULT_Q
1517
#define GEMM_DEFAULT_R    SBGEMM_DEFAULT_R
1518
#define GEMM_DEFAULT_UNROLL_M SBGEMM_DEFAULT_UNROLL_M
1519
#define GEMM_DEFAULT_UNROLL_N SBGEMM_DEFAULT_UNROLL_N
1520
#else
1521
0
#define GEMM_P      SGEMM_P
1522
0
#define GEMM_Q      SGEMM_Q
1523
0
#define GEMM_R      SGEMM_R
1524
0
#define GEMM_UNROLL_M   SGEMM_UNROLL_M
1525
0
#define GEMM_UNROLL_N   SGEMM_UNROLL_N
1526
0
#define GEMM_UNROLL_MN    SGEMM_UNROLL_MN
1527
#define GEMM_DEFAULT_P    SGEMM_DEFAULT_P
1528
#define GEMM_DEFAULT_Q    SGEMM_DEFAULT_Q
1529
#define GEMM_DEFAULT_R    SGEMM_DEFAULT_R
1530
#define GEMM_DEFAULT_UNROLL_M SGEMM_DEFAULT_UNROLL_M
1531
#define GEMM_DEFAULT_UNROLL_N SGEMM_DEFAULT_UNROLL_N
1532
#endif
1533
#else
1534
#if   defined(XDOUBLE)
1535
#define GEMM_P      XGEMM_P
1536
#define GEMM_Q      XGEMM_Q
1537
#define GEMM_R      XGEMM_R
1538
#define GEMM_UNROLL_M   XGEMM_UNROLL_M
1539
#define GEMM_UNROLL_N   XGEMM_UNROLL_N
1540
#define GEMM_UNROLL_MN    XGEMM_UNROLL_MN
1541
#define GEMM_DEFAULT_P    XGEMM_DEFAULT_P
1542
#define GEMM_DEFAULT_Q    XGEMM_DEFAULT_Q
1543
#define GEMM_DEFAULT_R    XGEMM_DEFAULT_R
1544
#define GEMM_DEFAULT_UNROLL_M XGEMM_DEFAULT_UNROLL_M
1545
#define GEMM_DEFAULT_UNROLL_N XGEMM_DEFAULT_UNROLL_N
1546
#elif defined(DOUBLE)
1547
#define GEMM_P      ZGEMM_P
1548
#define GEMM_Q      ZGEMM_Q
1549
#define GEMM_R      ZGEMM_R
1550
#define GEMM_UNROLL_M   ZGEMM_UNROLL_M
1551
#define GEMM_UNROLL_N   ZGEMM_UNROLL_N
1552
#define GEMM_UNROLL_MN    ZGEMM_UNROLL_MN
1553
#define GEMM_DEFAULT_P    ZGEMM_DEFAULT_P
1554
#define GEMM_DEFAULT_Q    ZGEMM_DEFAULT_Q
1555
#define GEMM_DEFAULT_R    ZGEMM_DEFAULT_R
1556
#define GEMM_DEFAULT_UNROLL_M ZGEMM_DEFAULT_UNROLL_M
1557
#define GEMM_DEFAULT_UNROLL_N ZGEMM_DEFAULT_UNROLL_N
1558
#else
1559
#define GEMM_P      CGEMM_P
1560
#define GEMM_Q      CGEMM_Q
1561
#define GEMM_R      CGEMM_R
1562
#define GEMM_UNROLL_M   CGEMM_UNROLL_M
1563
#define GEMM_UNROLL_N   CGEMM_UNROLL_N
1564
#define GEMM_UNROLL_MN    CGEMM_UNROLL_MN
1565
#define GEMM_DEFAULT_P    CGEMM_DEFAULT_P
1566
#define GEMM_DEFAULT_Q    CGEMM_DEFAULT_Q
1567
#define GEMM_DEFAULT_R    CGEMM_DEFAULT_R
1568
#define GEMM_DEFAULT_UNROLL_M CGEMM_DEFAULT_UNROLL_M
1569
#define GEMM_DEFAULT_UNROLL_N CGEMM_DEFAULT_UNROLL_N
1570
#endif
1571
#endif
1572
1573
#ifdef XDOUBLE
1574
#define GEMM3M_UNROLL_M XGEMM3M_UNROLL_M
1575
#define GEMM3M_UNROLL_N XGEMM3M_UNROLL_N
1576
#elif defined(DOUBLE)
1577
#define GEMM3M_UNROLL_M ZGEMM3M_UNROLL_M
1578
#define GEMM3M_UNROLL_N ZGEMM3M_UNROLL_N
1579
#else
1580
#define GEMM3M_UNROLL_M CGEMM3M_UNROLL_M
1581
#define GEMM3M_UNROLL_N CGEMM3M_UNROLL_N
1582
#endif
1583
1584
1585
#ifndef QGEMM_DEFAULT_UNROLL_M
1586
#define QGEMM_DEFAULT_UNROLL_M 2
1587
#endif
1588
1589
#ifndef QGEMM_DEFAULT_UNROLL_N
1590
#define QGEMM_DEFAULT_UNROLL_N 2
1591
#endif
1592
1593
#ifndef XGEMM_DEFAULT_UNROLL_M
1594
#define XGEMM_DEFAULT_UNROLL_M 2
1595
#endif
1596
1597
#ifndef XGEMM_DEFAULT_UNROLL_N
1598
#define XGEMM_DEFAULT_UNROLL_N 2
1599
#endif
1600
1601
#ifndef GEMM_THREAD
1602
#define GEMM_THREAD gemm_thread_n
1603
#endif
1604
1605
#ifndef SBGEMM_DEFAULT_R
1606
#define SBGEMM_DEFAULT_R (((BUFFER_SIZE - ((SBGEMM_DEFAULT_P * SBGEMM_DEFAULT_Q *  4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SBGEMM_DEFAULT_Q *  4) - 15) & ~15UL)
1607
#endif
1608
1609
#ifndef SGEMM_DEFAULT_R
1610
#define SGEMM_DEFAULT_R (((BUFFER_SIZE - ((SGEMM_DEFAULT_P * SGEMM_DEFAULT_Q *  4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SGEMM_DEFAULT_Q *  4) - 15) & ~15UL)
1611
#endif
1612
1613
#ifndef DGEMM_DEFAULT_R
1614
#define DGEMM_DEFAULT_R (((BUFFER_SIZE - ((DGEMM_DEFAULT_P * DGEMM_DEFAULT_Q *  8 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (DGEMM_DEFAULT_Q *  8) - 15) & ~15UL)
1615
#endif
1616
1617
#ifndef QGEMM_DEFAULT_R
1618
#define QGEMM_DEFAULT_R (((BUFFER_SIZE - ((QGEMM_DEFAULT_P * QGEMM_DEFAULT_Q * 16 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (QGEMM_DEFAULT_Q * 16) - 15) & ~15UL)
1619
#endif
1620
1621
#ifndef CGEMM_DEFAULT_R
1622
#define CGEMM_DEFAULT_R (((BUFFER_SIZE - ((CGEMM_DEFAULT_P * CGEMM_DEFAULT_Q *  8 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (CGEMM_DEFAULT_Q *  8) - 15) & ~15UL)
1623
#endif
1624
1625
#ifndef ZGEMM_DEFAULT_R
1626
#define ZGEMM_DEFAULT_R (((BUFFER_SIZE - ((ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_Q * 16 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (ZGEMM_DEFAULT_Q * 16) - 15) & ~15UL)
1627
#endif
1628
1629
#ifndef XGEMM_DEFAULT_R
1630
#define XGEMM_DEFAULT_R (((BUFFER_SIZE - ((XGEMM_DEFAULT_P * XGEMM_DEFAULT_Q * 32 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (XGEMM_DEFAULT_Q * 32) - 15) & ~15UL)
1631
#endif
1632
1633
#ifndef SNUMOPT
1634
#define SNUMOPT   2
1635
#endif
1636
1637
#ifndef DNUMOPT
1638
#define DNUMOPT   2
1639
#endif
1640
1641
#ifndef QNUMOPT
1642
#define QNUMOPT   1
1643
#endif
1644
1645
#ifndef GEMM3M_P
1646
#ifdef XDOUBLE
1647
#define GEMM3M_P  XGEMM3M_P
1648
#elif defined(DOUBLE)
1649
#define GEMM3M_P  ZGEMM3M_P
1650
#else
1651
#define GEMM3M_P  CGEMM3M_P
1652
#endif
1653
#endif
1654
1655
#ifndef GEMM3M_Q
1656
#ifdef XDOUBLE
1657
#define GEMM3M_Q  XGEMM3M_Q
1658
#elif defined(DOUBLE)
1659
#define GEMM3M_Q  ZGEMM3M_Q
1660
#else
1661
#define GEMM3M_Q  CGEMM3M_Q
1662
#endif
1663
#endif
1664
1665
#ifndef GEMM3M_R
1666
#ifdef XDOUBLE
1667
#define GEMM3M_R  XGEMM3M_R
1668
#elif defined(DOUBLE)
1669
#define GEMM3M_R  ZGEMM3M_R
1670
#else
1671
#define GEMM3M_R  CGEMM3M_R
1672
#endif
1673
#endif
1674
1675
1676
#endif