Coverage Report

Created: 2025-09-12 10:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/contrib/openblas/interface/gemv.c
Line
Count
Source
1
/*********************************************************************/
2
/* Copyright 2009, 2010 The University of Texas at Austin.           */
3
/* All rights reserved.                                              */
4
/*                                                                   */
5
/* Redistribution and use in source and binary forms, with or        */
6
/* without modification, are permitted provided that the following   */
7
/* conditions are met:                                               */
8
/*                                                                   */
9
/*   1. Redistributions of source code must retain the above         */
10
/*      copyright notice, this list of conditions and the following  */
11
/*      disclaimer.                                                  */
12
/*                                                                   */
13
/*   2. Redistributions in binary form must reproduce the above      */
14
/*      copyright notice, this list of conditions and the following  */
15
/*      disclaimer in the documentation and/or other materials       */
16
/*      provided with the distribution.                              */
17
/*                                                                   */
18
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
32
/*                                                                   */
33
/* The views and conclusions contained in the software and           */
34
/* documentation are those of the authors and should not be          */
35
/* interpreted as representing official policies, either expressed   */
36
/* or implied, of The University of Texas at Austin.                 */
37
/*********************************************************************/
38
39
#include <stdio.h>
40
#include "common.h"
41
#include "l1param.h"
42
#ifdef FUNCTION_PROFILE
43
#include "functable.h"
44
#endif
45
46
#ifdef XDOUBLE
47
#define ERROR_NAME "QGEMV "
48
#elif defined(DOUBLE)
49
0
#define ERROR_NAME "DGEMV "
50
#else
51
0
#define ERROR_NAME "SGEMV "
52
#endif
53
54
#ifdef SMP
55
static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG,  FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
56
#ifdef XDOUBLE
57
  qgemv_thread_n, qgemv_thread_t,
58
#elif defined DOUBLE
59
  dgemv_thread_n, dgemv_thread_t,
60
#else
61
  sgemv_thread_n, sgemv_thread_t,
62
#endif
63
};
64
#endif
65
66
#ifdef SMP
67
#ifdef DYNAMIC_ARCH
68
 extern char* gotoblas_corename(void);
69
#endif
70
71
#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1)
72
static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) {
73
  #ifdef DOUBLE
74
      return (MN < 8100L)      ? 1
75
          : (MN < 12100L)     ? MIN(ncpu, 2)
76
          : (MN < 36100L)     ? MIN(ncpu, 4)
77
          : (MN < 84100L)     ? MIN(ncpu, 8)
78
          : (MN < 348100L)    ? MIN(ncpu, 16)
79
          : (MN < 435600L)    ? MIN(ncpu, 24)
80
          : (MN < 810000L)    ? MIN(ncpu, 32)
81
          : (MN < 1050625L)   ? MIN(ncpu, 40)
82
          : ncpu;
83
  #else
84
      return (MN < 25600L)     ? 1
85
          : (MN < 63001L)     ? MIN(ncpu, 4)
86
          : (MN < 459684L)    ? MIN(ncpu, 16)
87
          : ncpu;
88
  #endif
89
}
90
#endif
91
92
#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV2)
93
static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) {
94
  return
95
      MN < 24964L    ? 1
96
    : MN < 65536L    ? MIN(ncpu, 8)
97
    : MN < 262144L   ? MIN(ncpu, 32)
98
    : MN < 1638400L  ? MIN(ncpu, 64)
99
    : ncpu;
100
}
101
#endif
102
103
0
static inline int get_gemv_optimal_nthreads(BLASLONG MN) {
104
0
  int ncpu = num_cpu_avail(3);
105
#if defined(_WIN64) && defined(_M_ARM64)
106
  if (MN > 100000000L)
107
    return num_cpu_avail(4);
108
  return 1;
109
#endif
110
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(BFLOAT16)
111
  return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
112
#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
113
  return get_gemv_optimal_nthreads_neoversev2(MN, ncpu);
114
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(BFLOAT16)
115
  if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
116
    return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
117
  }
118
  if (strcmp(gotoblas_corename(), "neoversev2") == 0) {
119
    return get_gemv_optimal_nthreads_neoversev2(MN, ncpu);
120
  }
121
#endif
122
123
0
  if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD )
124
0
    return 1;
125
0
  else
126
0
    return num_cpu_avail(2);
127
0
}
Unexecuted instantiation: sgemv.c:get_gemv_optimal_nthreads
Unexecuted instantiation: dgemv.c:get_gemv_optimal_nthreads
128
#endif
129
130
#ifndef CBLAS
131
132
void NAME(char *TRANS, blasint *M, blasint *N,
133
     FLOAT *ALPHA, FLOAT *a, blasint *LDA,
134
     FLOAT *x, blasint *INCX,
135
0
     FLOAT *BETA, FLOAT *y, blasint *INCY){
136
137
0
  char trans = *TRANS;
138
0
  blasint m = *M;
139
0
  blasint n = *N;
140
0
  blasint lda = *LDA;
141
0
  blasint incx = *INCX;
142
0
  blasint incy = *INCY;
143
0
  FLOAT alpha = *ALPHA;
144
0
  FLOAT beta  = *BETA;
145
0
  FLOAT *buffer;
146
0
  int buffer_size;
147
0
#ifdef SMP
148
0
  int nthreads;
149
0
#endif
150
151
0
  int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG,  FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = {
152
0
    GEMV_N, GEMV_T,
153
0
  };
154
155
0
  blasint info;
156
0
  blasint lenx, leny;
157
0
  blasint i;
158
159
0
  PRINT_DEBUG_NAME;
160
161
0
  TOUPPER(trans);
162
163
0
  info = 0;
164
165
0
  i = -1;
166
167
0
  if (trans == 'N') i = 0;
168
0
  if (trans == 'T') i = 1;
169
0
  if (trans == 'R') i = 0;
170
0
  if (trans == 'C') i = 1;
171
172
0
  if (incy == 0) info = 11;
173
0
  if (incx == 0) info = 8;
174
0
  if (lda < MAX(1, m)) info = 6;
175
0
  if (n < 0)   info = 3;
176
0
  if (m < 0)   info = 2;
177
0
  if (i < 0)          info = 1;
178
179
0
  trans = i;
180
181
0
  if (info != 0){
182
0
    BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
183
0
    return;
184
0
  }
185
186
#else
187
188
void CNAME(enum CBLAS_ORDER order,
189
     enum CBLAS_TRANSPOSE TransA,
190
     blasint m, blasint n,
191
     FLOAT alpha,
192
     FLOAT  *a, blasint lda,
193
     FLOAT  *x, blasint incx,
194
     FLOAT beta,
195
     FLOAT  *y, blasint incy){
196
197
  FLOAT *buffer;
198
  blasint lenx, leny;
199
  int trans, buffer_size;
200
  blasint info, t;
201
#ifdef SMP
202
  int nthreads;
203
#endif
204
205
  int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG,  FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = {
206
    GEMV_N, GEMV_T,
207
  };
208
209
  PRINT_DEBUG_CNAME;
210
211
  trans = -1;
212
  info  =  0;
213
214
  if (order == CblasColMajor) {
215
    if (TransA == CblasNoTrans)     trans = 0;
216
    if (TransA == CblasTrans)       trans = 1;
217
    if (TransA == CblasConjNoTrans) trans = 0;
218
    if (TransA == CblasConjTrans)   trans = 1;
219
220
    info = -1;
221
222
    if (incy == 0)    info = 11;
223
    if (incx == 0)    info = 8;
224
    if (lda < MAX(1, m))  info = 6;
225
    if (n < 0)      info = 3;
226
    if (m < 0)      info = 2;
227
    if (trans < 0)        info = 1;
228
229
  }
230
231
  if (order == CblasRowMajor) {
232
    if (TransA == CblasNoTrans)     trans = 1;
233
    if (TransA == CblasTrans)       trans = 0;
234
    if (TransA == CblasConjNoTrans) trans = 1;
235
    if (TransA == CblasConjTrans)   trans = 0;
236
237
    info = -1;
238
239
    t = n;
240
    n = m;
241
    m = t;
242
243
    if (incy == 0)    info = 11;
244
    if (incx == 0)    info = 8;
245
    if (lda < MAX(1, m))  info = 6;
246
    if (n < 0)      info = 3;
247
    if (m < 0)      info = 2;
248
    if (trans < 0)        info = 1;
249
250
  }
251
252
  if (info >= 0) {
253
    BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
254
    return;
255
  }
256
257
#endif
258
0
  if ((m==0) || (n==0)) return;
259
260
0
  lenx = n;
261
0
  leny = m;
262
0
  if (trans) lenx = m;
263
0
  if (trans) leny = n;
264
265
0
  if (beta != ONE) SCAL_K(leny, 0, 0, beta, y, blasabs(incy), NULL, 0, NULL, 0);
266
267
0
  if (alpha == ZERO) return;
268
  
269
0
  IDEBUG_START;
270
271
0
  FUNCTION_PROFILE_START();
272
273
0
  if (incx < 0) x -= (lenx - 1) * incx;
274
0
  if (incy < 0) y -= (leny - 1) * incy;
275
276
0
  buffer_size = m + n + 128 / sizeof(FLOAT);
277
#ifdef WINDOWS_ABI
278
  buffer_size += 160 / sizeof(FLOAT) ;
279
#endif
280
  // for alignment
281
0
  buffer_size = (buffer_size + 3) & ~3;
282
0
  STACK_ALLOC(buffer_size, FLOAT, buffer);
283
284
0
#ifdef SMP
285
0
  nthreads = get_gemv_optimal_nthreads(1L * m * n);
286
287
0
  if (nthreads == 1) {
288
0
#endif
289
290
0
    (gemv[(int)trans])(m, n, 0, alpha, a, lda, x, incx, y, incy, buffer);
291
292
0
#ifdef SMP
293
0
  } else {
294
295
0
    (gemv_thread[(int)trans])(m, n, alpha, a, lda, x, incx, y, incy, buffer, nthreads);
296
297
0
  }
298
0
#endif
299
300
0
  STACK_FREE(buffer);
301
0
  FUNCTION_PROFILE_END(1, m * n + m + n,  2 * m * n);
302
303
0
  IDEBUG_END;
304
305
0
  return;
306
307
0
}
Unexecuted instantiation: sgemv_
Unexecuted instantiation: dgemv_