Coverage Report

Created: 2025-09-02 13:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/openblas/interface/syrk.c
Line
Count
Source
1
/*********************************************************************/
2
/* Copyright 2009, 2010 The University of Texas at Austin.           */
3
/* All rights reserved.                                              */
4
/*                                                                   */
5
/* Redistribution and use in source and binary forms, with or        */
6
/* without modification, are permitted provided that the following   */
7
/* conditions are met:                                               */
8
/*                                                                   */
9
/*   1. Redistributions of source code must retain the above         */
10
/*      copyright notice, this list of conditions and the following  */
11
/*      disclaimer.                                                  */
12
/*                                                                   */
13
/*   2. Redistributions in binary form must reproduce the above      */
14
/*      copyright notice, this list of conditions and the following  */
15
/*      disclaimer in the documentation and/or other materials       */
16
/*      provided with the distribution.                              */
17
/*                                                                   */
18
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
32
/*                                                                   */
33
/* The views and conclusions contained in the software and           */
34
/* documentation are those of the authors and should not be          */
35
/* interpreted as representing official policies, either expressed   */
36
/* or implied, of The University of Texas at Austin.                 */
37
/*********************************************************************/
38
39
#include <stdio.h>
40
#include <ctype.h>
41
#include "common.h"
42
#ifdef FUNCTION_PROFILE
43
#include "functable.h"
44
#endif
45
46
#ifndef COMPLEX
47
0
#define SMP_THRESHOLD_MIN 109944.
48
#ifdef XDOUBLE
49
#define ERROR_NAME "QSYRK "
50
#elif defined(DOUBLE)
51
#define ERROR_NAME "DSYRK "
52
#else
53
0
#define ERROR_NAME "SSYRK "
54
#endif
55
#else
56
#define SMP_THRESHOLD_MIN 14824.
57
#ifndef HEMM
58
#ifdef XDOUBLE
59
#define ERROR_NAME "XSYRK "
60
#elif defined(DOUBLE)
61
#define ERROR_NAME "ZSYRK "
62
#else
63
#define ERROR_NAME "CSYRK "
64
#endif
65
#else
66
#ifdef XDOUBLE
67
#define ERROR_NAME "XHERK "
68
#elif defined(DOUBLE)
69
#define ERROR_NAME "ZHERK "
70
#else
71
#define ERROR_NAME "CHERK "
72
#endif
73
#endif
74
#endif
75
76
#ifndef GEMM_MULTITHREAD_THRESHOLD
77
#define GEMM_MULTITHREAD_THRESHOLD 4
78
#endif
79
80
static int (*syrk[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
81
#ifndef HEMM
82
  SYRK_UN, SYRK_UC, SYRK_LN, SYRK_LC,
83
#if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3)
84
  SYRK_THREAD_UN, SYRK_THREAD_UC, SYRK_THREAD_LN, SYRK_THREAD_LC,
85
#endif
86
#else
87
  HERK_UN, HERK_UC, HERK_LN, HERK_LC,
88
#if defined(SMP) && !defined(USE_SIMPLE_THREADED_LEVEL3)
89
  HERK_THREAD_UN, HERK_THREAD_UC, HERK_THREAD_LN, HERK_THREAD_LC,
90
#endif
91
#endif
92
};
93
94
#ifndef CBLAS
95
96
void NAME(char *UPLO, char *TRANS,
97
         blasint *N, blasint *K,
98
         FLOAT *alpha, FLOAT *a, blasint *ldA,
99
0
         FLOAT *beta,  FLOAT *c, blasint *ldC){
100
101
0
  char uplo_arg  = *UPLO;
102
0
  char trans_arg = *TRANS;
103
104
0
  blas_arg_t args;
105
106
0
  FLOAT *buffer;
107
0
  FLOAT *sa, *sb;
108
109
0
#ifdef SMP
110
0
  double NNK;
111
#ifdef USE_SIMPLE_THREADED_LEVEL3
112
#ifndef COMPLEX
113
#ifdef XDOUBLE
114
  int mode  =  BLAS_XDOUBLE | BLAS_REAL;
115
#elif defined(DOUBLE)
116
  int mode  =  BLAS_DOUBLE  | BLAS_REAL;
117
#else
118
  int mode  =  BLAS_SINGLE  | BLAS_REAL;
119
#endif
120
#else
121
#ifdef XDOUBLE
122
  int mode  =  BLAS_XDOUBLE | BLAS_COMPLEX;
123
#elif defined(DOUBLE)
124
  int mode  =  BLAS_DOUBLE  | BLAS_COMPLEX;
125
#else
126
  int mode  =  BLAS_SINGLE  | BLAS_COMPLEX;
127
#endif
128
#endif
129
#endif
130
0
#endif
131
132
0
  blasint info;
133
0
  int uplo;
134
0
  int trans;
135
0
  int nrowa;
136
137
0
  PRINT_DEBUG_NAME;
138
139
0
  args.n = *N;
140
0
  args.k = *K;
141
142
0
  args.a = (void *)a;
143
0
  args.c = (void *)c;
144
145
0
  args.lda = *ldA;
146
0
  args.ldc = *ldC;
147
148
0
  args.alpha = (void *)alpha;
149
0
  args.beta  = (void *)beta;
150
151
0
  TOUPPER(uplo_arg);
152
0
  TOUPPER(trans_arg);
153
154
0
  uplo  = -1;
155
0
  trans = -1;
156
157
0
  if (uplo_arg  == 'U') uplo  = 0;
158
0
  if (uplo_arg  == 'L') uplo  = 1;
159
160
161
0
#ifndef COMPLEX
162
0
  if (trans_arg == 'N') trans = 0;
163
0
  if (trans_arg == 'T') trans = 1;
164
0
  if (trans_arg == 'C') trans = 1;
165
#else
166
#ifdef HEMM
167
  if (trans_arg == 'N') trans = 0;
168
  if (trans_arg == 'C') trans = 1;
169
#else
170
  if (trans_arg == 'N') trans = 0;
171
  if (trans_arg == 'T') trans = 1;
172
#endif
173
174
#endif
175
176
0
  nrowa = args.n;
177
0
  if (trans & 1) nrowa = args.k;
178
179
0
  info = 0;
180
181
0
  if (args.ldc < MAX(1,args.n)) info = 10;
182
0
  if (args.lda < MAX(1,nrowa))  info =  7;
183
0
  if (args.k < 0)               info =  4;
184
0
  if (args.n < 0)               info =  3;
185
0
  if (trans < 0)                info =  2;
186
0
  if (uplo  < 0)                info =  1;
187
188
0
  if (info != 0) {
189
0
    BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
190
0
    return;
191
0
  }
192
193
#else
194
195
void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans,
196
     blasint n, blasint k,
197
#if !defined(COMPLEX) || defined(HEMM)
198
     FLOAT alpha,
199
#else
200
     void *valpha,
201
#endif
202
#if !defined(COMPLEX)
203
     FLOAT *a, blasint lda,
204
#else
205
     void *va, blasint lda,
206
#endif
207
#if !defined(COMPLEX) || defined(HEMM)
208
     FLOAT beta,
209
#else
210
     void *vbeta,
211
#endif
212
#if !defined(COMPLEX)
213
     FLOAT *c, blasint ldc) {
214
#else
215
     void *vc, blasint ldc) {
216
#endif
217
218
#ifdef COMPLEX
219
#if !defined(HEMM)
220
  FLOAT* alpha = (FLOAT*) valpha;
221
  FLOAT* beta = (FLOAT*) vbeta;
222
#endif
223
  FLOAT* a = (FLOAT*) va;
224
  FLOAT* c = (FLOAT*) vc;
225
#endif
226
227
  blas_arg_t args;
228
  int uplo, trans;
229
  blasint info, nrowa;
230
231
  FLOAT *buffer;
232
  FLOAT *sa, *sb;
233
234
#ifdef SMP
235
double NNK;
236
237
#ifdef USE_SIMPLE_THREADED_LEVEL3
238
#ifndef COMPLEX
239
#ifdef XDOUBLE
240
  int mode  =  BLAS_XDOUBLE | BLAS_REAL;
241
#elif defined(DOUBLE)
242
  int mode  =  BLAS_DOUBLE  | BLAS_REAL;
243
#else
244
  int mode  =  BLAS_SINGLE  | BLAS_REAL;
245
#endif
246
#else
247
#ifdef XDOUBLE
248
  int mode  =  BLAS_XDOUBLE | BLAS_COMPLEX;
249
#elif defined(DOUBLE)
250
  int mode  =  BLAS_DOUBLE  | BLAS_COMPLEX;
251
#else
252
  int mode  =  BLAS_SINGLE  | BLAS_COMPLEX;
253
#endif
254
#endif
255
#endif
256
#endif
257
258
  PRINT_DEBUG_CNAME;
259
260
  args.n = n;
261
  args.k = k;
262
263
  args.a = (void *)a;
264
  args.c = (void *)c;
265
266
  args.lda = lda;
267
  args.ldc = ldc;
268
269
#if !defined(COMPLEX) || defined(HEMM)
270
  args.alpha = (void *)&alpha;
271
  args.beta  = (void *)&beta;
272
#else
273
  args.alpha = (void *)alpha;
274
  args.beta  = (void *)beta;
275
#endif
276
277
  trans = -1;
278
  uplo  = -1;
279
  info  =  0;
280
281
  if (order == CblasColMajor) {
282
    if (Uplo == CblasUpper) uplo  = 0;
283
    if (Uplo == CblasLower) uplo  = 1;
284
285
    if (Trans == CblasNoTrans)     trans = 0;
286
#ifndef COMPLEX
287
    if (Trans == CblasTrans)       trans = 1;
288
    if (Trans == CblasConjNoTrans) trans = 0;
289
    if (Trans == CblasConjTrans)   trans = 1;
290
#elif !defined(HEMM)
291
    if (Trans == CblasTrans)       trans = 1;
292
#else
293
    if (Trans == CblasConjTrans)   trans = 1;
294
#endif
295
296
    info = -1;
297
298
    nrowa = args.n;
299
    if (trans & 1) nrowa = args.k;
300
301
    if (args.ldc < MAX(1,args.n)) info = 10;
302
    if (args.lda < MAX(1,nrowa))  info =  7;
303
    if (args.k < 0)               info =  4;
304
    if (args.n < 0)               info =  3;
305
    if (trans < 0)                info =  2;
306
    if (uplo  < 0)                info =  1;
307
  }
308
309
  if (order == CblasRowMajor) {
310
    if (Uplo == CblasUpper) uplo  = 1;
311
    if (Uplo == CblasLower) uplo  = 0;
312
313
    if (Trans == CblasNoTrans)     trans = 1;
314
#ifndef COMPLEX
315
    if (Trans == CblasTrans)       trans = 0;
316
    if (Trans == CblasConjNoTrans) trans = 1;
317
    if (Trans == CblasConjTrans)   trans = 0;
318
#elif !defined(HEMM)
319
    if (Trans == CblasTrans)       trans = 0;
320
#else
321
    if (Trans == CblasConjTrans)   trans = 0;
322
#endif
323
324
    info = -1;
325
326
    nrowa = args.n;
327
    if (trans & 1) nrowa = args.k;
328
329
    if (args.ldc < MAX(1,args.n)) info = 10;
330
    if (args.lda < MAX(1,nrowa))  info =  7;
331
    if (args.k < 0)               info =  4;
332
    if (args.n < 0)               info =  3;
333
    if (trans < 0)                info =  2;
334
    if (uplo  < 0)                info =  1;
335
  }
336
337
  if (info >= 0) {
338
    BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
339
    return;
340
  }
341
342
#endif
343
344
0
  if (args.n == 0) return;
345
346
0
  IDEBUG_START;
347
348
0
  FUNCTION_PROFILE_START();
349
350
0
  buffer = (FLOAT *)blas_memory_alloc(0);
351
352
0
  sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A);
353
0
  sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
354
355
0
#ifdef SMP
356
#ifdef USE_SIMPLE_THREADED_LEVEL3
357
  if (!trans){
358
    mode |= (BLAS_TRANSA_N | BLAS_TRANSB_T);
359
  } else {
360
    mode |= (BLAS_TRANSA_T | BLAS_TRANSB_N);
361
  }
362
  mode |= (uplo  << BLAS_UPLO_SHIFT);
363
#endif
364
365
0
  args.common = NULL;
366
367
0
  NNK = (double)(args.n+1)*(double)args.n*(double)args.k;
368
0
  if (NNK <= (SMP_THRESHOLD_MIN * GEMM_MULTITHREAD_THRESHOLD)) {
369
0
  args.nthreads = 1;
370
0
  } else {
371
0
  args.nthreads = num_cpu_avail(3);
372
0
  }
373
374
0
  if (args.nthreads == 1) {
375
0
#endif
376
377
0
    (syrk[(uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0);
378
379
0
#ifdef SMP
380
0
  } else {
381
382
0
#ifndef USE_SIMPLE_THREADED_LEVEL3
383
384
0
    (syrk[4 | (uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0);
385
386
#else
387
388
    syrk_thread(mode, &args, NULL, NULL, syrk[(uplo << 1) | trans ], sa, sb, args.nthreads);
389
390
#endif
391
392
0
  }
393
0
#endif
394
395
0
 blas_memory_free(buffer);
396
397
0
  FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.n * args.k + args.n * args.n / 2, args.n * args.n * args.k);
398
399
0
  IDEBUG_END;
400
401
0
  return;
402
0
}