/root/doris/contrib/openblas/interface/syr2k.c
Line | Count | Source |
1 | | /*********************************************************************/ |
2 | | /* Copyright 2009, 2010 The University of Texas at Austin. */ |
3 | | /* All rights reserved. */ |
4 | | /* */ |
5 | | /* Redistribution and use in source and binary forms, with or */ |
6 | | /* without modification, are permitted provided that the following */ |
7 | | /* conditions are met: */ |
8 | | /* */ |
9 | | /* 1. Redistributions of source code must retain the above */ |
10 | | /* copyright notice, this list of conditions and the following */ |
11 | | /* disclaimer. */ |
12 | | /* */ |
13 | | /* 2. Redistributions in binary form must reproduce the above */ |
14 | | /* copyright notice, this list of conditions and the following */ |
15 | | /* disclaimer in the documentation and/or other materials */ |
16 | | /* provided with the distribution. */ |
17 | | /* */ |
18 | | /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ |
19 | | /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ |
20 | | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ |
21 | | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ |
22 | | /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ |
23 | | /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ |
24 | | /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ |
25 | | /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ |
26 | | /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ |
27 | | /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ |
28 | | /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ |
29 | | /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ |
30 | | /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ |
31 | | /* POSSIBILITY OF SUCH DAMAGE. */ |
32 | | /* */ |
33 | | /* The views and conclusions contained in the software and */ |
34 | | /* documentation are those of the authors and should not be */ |
35 | | /* interpreted as representing official policies, either expressed */ |
36 | | /* or implied, of The University of Texas at Austin. */ |
37 | | /*********************************************************************/ |
38 | | |
39 | | #include <stdio.h> |
40 | | #include <ctype.h> |
41 | | #include "common.h" |
42 | | #ifdef FUNCTION_PROFILE |
43 | | #include "functable.h" |
44 | | #endif |
45 | | |
46 | | #ifndef COMPLEX |
47 | | #ifdef XDOUBLE |
48 | | #define ERROR_NAME "QSYR2K" |
49 | | #elif defined(DOUBLE) |
50 | 0 | #define ERROR_NAME "DSYR2K" |
51 | | #else |
52 | | #define ERROR_NAME "SSYR2K" |
53 | | #endif |
54 | | #else |
55 | | #ifndef HEMM |
56 | | #ifdef XDOUBLE |
57 | | #define ERROR_NAME "XSYR2K" |
58 | | #elif defined(DOUBLE) |
59 | | #define ERROR_NAME "ZSYR2K" |
60 | | #else |
61 | | #define ERROR_NAME "CSYR2K" |
62 | | #endif |
63 | | #else |
64 | | #ifdef XDOUBLE |
65 | | #define ERROR_NAME "XHER2K" |
66 | | #elif defined(DOUBLE) |
67 | | #define ERROR_NAME "ZHER2K" |
68 | | #else |
69 | | #define ERROR_NAME "CHER2K" |
70 | | #endif |
71 | | #endif |
72 | | #endif |
73 | | |
74 | | static int (*syr2k[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { |
75 | | #ifndef HEMM |
76 | | SYR2K_UN, SYR2K_UC, SYR2K_LN, SYR2K_LC, |
77 | | #else |
78 | | HER2K_UN, HER2K_UC, HER2K_LN, HER2K_LC, |
79 | | #endif |
80 | | }; |
81 | | |
82 | | #ifndef CBLAS |
83 | | |
84 | | void NAME(char *UPLO, char *TRANS, |
85 | | blasint *N, blasint *K, |
86 | | FLOAT *alpha, FLOAT *a, blasint *ldA, |
87 | | FLOAT *b, blasint *ldB, |
88 | 0 | FLOAT *beta, FLOAT *c, blasint *ldC){ |
89 | |
|
90 | 0 | char uplo_arg = *UPLO; |
91 | 0 | char trans_arg = *TRANS; |
92 | |
|
93 | 0 | blas_arg_t args; |
94 | |
|
95 | 0 | FLOAT *buffer; |
96 | 0 | FLOAT *sa, *sb; |
97 | |
|
98 | 0 | #ifdef SMP |
99 | 0 | #ifndef COMPLEX |
100 | | #ifdef XDOUBLE |
101 | | int mode = BLAS_XDOUBLE | BLAS_REAL; |
102 | | #elif defined(DOUBLE) |
103 | 0 | int mode = BLAS_DOUBLE | BLAS_REAL; |
104 | | #else |
105 | | int mode = BLAS_SINGLE | BLAS_REAL; |
106 | | #endif |
107 | | #else |
108 | | #ifdef XDOUBLE |
109 | | int mode = BLAS_XDOUBLE | BLAS_COMPLEX; |
110 | | #elif defined(DOUBLE) |
111 | | int mode = BLAS_DOUBLE | BLAS_COMPLEX; |
112 | | #else |
113 | | int mode = BLAS_SINGLE | BLAS_COMPLEX; |
114 | | #endif |
115 | | #endif |
116 | 0 | #endif |
117 | |
|
118 | 0 | blasint info; |
119 | 0 | int uplo; |
120 | 0 | int trans; |
121 | 0 | int nrowa; |
122 | |
|
123 | 0 | PRINT_DEBUG_NAME; |
124 | |
|
125 | 0 | args.n = *N; |
126 | 0 | args.k = *K; |
127 | |
|
128 | 0 | args.a = (void *)a; |
129 | 0 | args.b = (void *)b; |
130 | 0 | args.c = (void *)c; |
131 | |
|
132 | 0 | args.lda = *ldA; |
133 | 0 | args.ldb = *ldB; |
134 | 0 | args.ldc = *ldC; |
135 | |
|
136 | 0 | args.alpha = (void *)alpha; |
137 | 0 | args.beta = (void *)beta; |
138 | |
|
139 | 0 | TOUPPER(uplo_arg); |
140 | 0 | TOUPPER(trans_arg); |
141 | |
|
142 | 0 | uplo = -1; |
143 | 0 | trans = -1; |
144 | |
|
145 | 0 | if (uplo_arg == 'U') uplo = 0; |
146 | 0 | if (uplo_arg == 'L') uplo = 1; |
147 | |
|
148 | 0 | #ifndef COMPLEX |
149 | 0 | if (trans_arg == 'N') trans = 0; |
150 | 0 | if (trans_arg == 'T') trans = 1; |
151 | 0 | if (trans_arg == 'C') trans = 1; |
152 | | #else |
153 | | #ifdef HEMM |
154 | | if (trans_arg == 'N') trans = 0; |
155 | | if (trans_arg == 'C') trans = 1; |
156 | | #else |
157 | | if (trans_arg == 'N') trans = 0; |
158 | | if (trans_arg == 'T') trans = 1; |
159 | | #endif |
160 | | |
161 | | #endif |
162 | | |
163 | |
|
164 | 0 | nrowa = args.n; |
165 | 0 | if (trans & 1) nrowa = args.k; |
166 | |
|
167 | 0 | info = 0; |
168 | |
|
169 | 0 | if (args.ldc < MAX(1,args.n)) info = 12; |
170 | 0 | if (args.ldb < MAX(1,nrowa)) info = 9; |
171 | 0 | if (args.lda < MAX(1,nrowa)) info = 7; |
172 | 0 | if (args.k < 0) info = 4; |
173 | 0 | if (args.n < 0) info = 3; |
174 | 0 | if (trans < 0) info = 2; |
175 | 0 | if (uplo < 0) info = 1; |
176 | |
|
177 | 0 | if (info != 0) { |
178 | 0 | BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); |
179 | 0 | return; |
180 | 0 | } |
181 | | |
182 | | #else |
183 | | |
184 | | void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, |
185 | | blasint n, blasint k, |
186 | | #ifndef COMPLEX |
187 | | FLOAT alpha, |
188 | | FLOAT *a, blasint lda, |
189 | | FLOAT *b, blasint ldb, |
190 | | #else |
191 | | void *valpha, |
192 | | void *va, blasint lda, |
193 | | void *vb, blasint ldb, |
194 | | #endif |
195 | | #if !defined(COMPLEX) || defined(HEMM) |
196 | | FLOAT beta, |
197 | | #else |
198 | | void *vbeta, |
199 | | #endif |
200 | | #ifndef COMPLEX |
201 | | FLOAT *c, |
202 | | #else |
203 | | void *vc, |
204 | | #endif |
205 | | blasint ldc) { |
206 | | |
207 | | #ifdef COMPLEX |
208 | | FLOAT* alpha = (FLOAT*) valpha; |
209 | | #if !defined(HEMM) |
210 | | FLOAT* beta = (FLOAT*) vbeta; |
211 | | #endif |
212 | | FLOAT* a = (FLOAT*) va; |
213 | | FLOAT* b = (FLOAT*) vb; |
214 | | FLOAT* c = (FLOAT*) vc; |
215 | | #endif |
216 | | |
217 | | blas_arg_t args; |
218 | | int uplo, trans; |
219 | | blasint info, nrowa; |
220 | | |
221 | | FLOAT *buffer; |
222 | | FLOAT *sa, *sb; |
223 | | |
224 | | #ifdef HEMM |
225 | | FLOAT CAlpha[2]; |
226 | | #endif |
227 | | |
228 | | #ifdef SMP |
229 | | #ifndef COMPLEX |
230 | | #ifdef XDOUBLE |
231 | | int mode = BLAS_XDOUBLE | BLAS_REAL; |
232 | | #elif defined(DOUBLE) |
233 | | int mode = BLAS_DOUBLE | BLAS_REAL; |
234 | | #else |
235 | | int mode = BLAS_SINGLE | BLAS_REAL; |
236 | | #endif |
237 | | #else |
238 | | #ifdef XDOUBLE |
239 | | int mode = BLAS_XDOUBLE | BLAS_COMPLEX; |
240 | | #elif defined(DOUBLE) |
241 | | int mode = BLAS_DOUBLE | BLAS_COMPLEX; |
242 | | #else |
243 | | int mode = BLAS_SINGLE | BLAS_COMPLEX; |
244 | | #endif |
245 | | #endif |
246 | | #endif |
247 | | |
248 | | PRINT_DEBUG_CNAME; |
249 | | |
250 | | args.n = n; |
251 | | args.k = k; |
252 | | |
253 | | args.a = (void *)a; |
254 | | args.b = (void *)b; |
255 | | args.c = (void *)c; |
256 | | |
257 | | args.lda = lda; |
258 | | args.ldb = ldb; |
259 | | args.ldc = ldc; |
260 | | |
261 | | #ifndef COMPLEX |
262 | | args.alpha = (void *)α |
263 | | #else |
264 | | args.alpha = (void *)alpha; |
265 | | #endif |
266 | | |
267 | | #if !defined(COMPLEX) || defined(HEMM) |
268 | | args.beta = (void *)β |
269 | | #else |
270 | | args.beta = (void *)beta; |
271 | | #endif |
272 | | |
273 | | trans = -1; |
274 | | uplo = -1; |
275 | | info = 0; |
276 | | |
277 | | if (order == CblasColMajor) { |
278 | | if (Uplo == CblasUpper) uplo = 0; |
279 | | if (Uplo == CblasLower) uplo = 1; |
280 | | |
281 | | if (Trans == CblasNoTrans) trans = 0; |
282 | | #ifndef COMPLEX |
283 | | if (Trans == CblasTrans) trans = 1; |
284 | | if (Trans == CblasConjNoTrans) trans = 0; |
285 | | if (Trans == CblasConjTrans) trans = 1; |
286 | | #elif !defined(HEMM) |
287 | | if (Trans == CblasTrans) trans = 1; |
288 | | #else |
289 | | if (Trans == CblasConjTrans) trans = 1; |
290 | | #endif |
291 | | |
292 | | info = -1; |
293 | | |
294 | | nrowa = args.n; |
295 | | if (trans & 1) nrowa = args.k; |
296 | | |
297 | | if (args.ldc < MAX(1,args.n)) info = 12; |
298 | | if (args.ldb < MAX(1,nrowa)) info = 9; |
299 | | if (args.lda < MAX(1,nrowa)) info = 7; |
300 | | if (args.k < 0) info = 4; |
301 | | if (args.n < 0) info = 3; |
302 | | if (trans < 0) info = 2; |
303 | | if (uplo < 0) info = 1; |
304 | | } |
305 | | |
306 | | if (order == CblasRowMajor) { |
307 | | |
308 | | #ifdef HEMM |
309 | | CAlpha[0] = alpha[0]; |
310 | | CAlpha[1] = -alpha[1]; |
311 | | |
312 | | args.alpha = (void *)CAlpha; |
313 | | #endif |
314 | | |
315 | | if (Uplo == CblasUpper) uplo = 1; |
316 | | if (Uplo == CblasLower) uplo = 0; |
317 | | |
318 | | if (Trans == CblasNoTrans) trans = 1; |
319 | | #ifndef COMPLEX |
320 | | if (Trans == CblasTrans) trans = 0; |
321 | | if (Trans == CblasConjNoTrans) trans = 1; |
322 | | if (Trans == CblasConjTrans) trans = 0; |
323 | | #elif !defined(HEMM) |
324 | | if (Trans == CblasTrans) trans = 0; |
325 | | #else |
326 | | if (Trans == CblasConjTrans) trans = 0; |
327 | | #endif |
328 | | |
329 | | info = -1; |
330 | | |
331 | | nrowa = args.n; |
332 | | if (trans & 1) nrowa = args.k; |
333 | | |
334 | | if (args.ldc < MAX(1,args.n)) info = 12; |
335 | | if (args.ldb < MAX(1,nrowa)) info = 9; |
336 | | if (args.lda < MAX(1,nrowa)) info = 7; |
337 | | if (args.k < 0) info = 4; |
338 | | if (args.n < 0) info = 3; |
339 | | if (trans < 0) info = 2; |
340 | | if (uplo < 0) info = 1; |
341 | | } |
342 | | |
343 | | if (info >= 0) { |
344 | | BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); |
345 | | return; |
346 | | } |
347 | | |
348 | | #endif |
349 | | |
350 | 0 | if (args.n == 0) return; |
351 | | |
352 | 0 | IDEBUG_START; |
353 | |
|
354 | 0 | FUNCTION_PROFILE_START(); |
355 | |
|
356 | 0 | buffer = (FLOAT *)blas_memory_alloc(0); |
357 | |
|
358 | 0 | sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A); |
359 | 0 | sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); |
360 | |
|
361 | 0 | #ifdef SMP |
362 | 0 | if (!trans){ |
363 | 0 | mode |= (BLAS_TRANSA_N | BLAS_TRANSB_T); |
364 | 0 | } else { |
365 | 0 | mode |= (BLAS_TRANSA_T | BLAS_TRANSB_N); |
366 | 0 | } |
367 | |
|
368 | 0 | mode |= (uplo << BLAS_UPLO_SHIFT); |
369 | |
|
370 | 0 | args.common = NULL; |
371 | 0 | if (args.n*args.k <1000) |
372 | 0 | args.nthreads =1 ; |
373 | 0 | else |
374 | 0 | args.nthreads = num_cpu_avail(3); |
375 | |
|
376 | 0 | if (args.nthreads == 1) { |
377 | 0 | #endif |
378 | |
|
379 | 0 | (syr2k[(uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0); |
380 | |
|
381 | 0 | #ifdef SMP |
382 | |
|
383 | 0 | } else { |
384 | |
|
385 | 0 | syrk_thread(mode, &args, NULL, NULL, syr2k[(uplo << 1) | trans ], sa, sb, args.nthreads); |
386 | |
|
387 | 0 | } |
388 | 0 | #endif |
389 | |
|
390 | 0 | blas_memory_free(buffer); |
391 | |
|
392 | 0 | FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, 2 * args.n * args.k + args.n * args.n, 2 * args.n * args.n * args.k); |
393 | |
|
394 | 0 | IDEBUG_END; |
395 | |
|
396 | 0 | return; |
397 | 0 | } |