/root/doris/contrib/openblas/interface/gemv.c
Line | Count | Source |
1 | | /*********************************************************************/ |
2 | | /* Copyright 2009, 2010 The University of Texas at Austin. */ |
3 | | /* All rights reserved. */ |
4 | | /* */ |
5 | | /* Redistribution and use in source and binary forms, with or */ |
6 | | /* without modification, are permitted provided that the following */ |
7 | | /* conditions are met: */ |
8 | | /* */ |
9 | | /* 1. Redistributions of source code must retain the above */ |
10 | | /* copyright notice, this list of conditions and the following */ |
11 | | /* disclaimer. */ |
12 | | /* */ |
13 | | /* 2. Redistributions in binary form must reproduce the above */ |
14 | | /* copyright notice, this list of conditions and the following */ |
15 | | /* disclaimer in the documentation and/or other materials */ |
16 | | /* provided with the distribution. */ |
17 | | /* */ |
18 | | /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ |
19 | | /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ |
20 | | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ |
21 | | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ |
22 | | /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ |
23 | | /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ |
24 | | /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ |
25 | | /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ |
26 | | /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ |
27 | | /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ |
28 | | /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ |
29 | | /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ |
30 | | /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ |
31 | | /* POSSIBILITY OF SUCH DAMAGE. */ |
32 | | /* */ |
33 | | /* The views and conclusions contained in the software and */ |
34 | | /* documentation are those of the authors and should not be */ |
35 | | /* interpreted as representing official policies, either expressed */ |
36 | | /* or implied, of The University of Texas at Austin. */ |
37 | | /*********************************************************************/ |
38 | | |
39 | | #include <stdio.h> |
40 | | #include "common.h" |
41 | | #include "l1param.h" |
42 | | #ifdef FUNCTION_PROFILE |
43 | | #include "functable.h" |
44 | | #endif |
45 | | |
46 | | #ifdef XDOUBLE |
47 | | #define ERROR_NAME "QGEMV " |
48 | | #elif defined(DOUBLE) |
49 | 0 | #define ERROR_NAME "DGEMV " |
50 | | #else |
51 | 0 | #define ERROR_NAME "SGEMV " |
52 | | #endif |
53 | | |
54 | | #ifdef SMP |
55 | | static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { |
56 | | #ifdef XDOUBLE |
57 | | qgemv_thread_n, qgemv_thread_t, |
58 | | #elif defined DOUBLE |
59 | | dgemv_thread_n, dgemv_thread_t, |
60 | | #else |
61 | | sgemv_thread_n, sgemv_thread_t, |
62 | | #endif |
63 | | }; |
64 | | #endif |
65 | | |
66 | | #ifdef SMP |
67 | | #ifdef DYNAMIC_ARCH |
68 | | extern char* gotoblas_corename(void); |
69 | | #endif |
70 | | |
71 | | #if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1) |
72 | | static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) { |
73 | | #ifdef DOUBLE |
74 | | return (MN < 8100L) ? 1 |
75 | | : (MN < 12100L) ? MIN(ncpu, 2) |
76 | | : (MN < 36100L) ? MIN(ncpu, 4) |
77 | | : (MN < 84100L) ? MIN(ncpu, 8) |
78 | | : (MN < 348100L) ? MIN(ncpu, 16) |
79 | | : (MN < 435600L) ? MIN(ncpu, 24) |
80 | | : (MN < 810000L) ? MIN(ncpu, 32) |
81 | | : (MN < 1050625L) ? MIN(ncpu, 40) |
82 | | : ncpu; |
83 | | #else |
84 | | return (MN < 25600L) ? 1 |
85 | | : (MN < 63001L) ? MIN(ncpu, 4) |
86 | | : (MN < 459684L) ? MIN(ncpu, 16) |
87 | | : ncpu; |
88 | | #endif |
89 | | } |
90 | | #endif |
91 | | |
92 | | #if defined(DYNAMIC_ARCH) || defined(NEOVERSEV2) |
93 | | static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) { |
94 | | return |
95 | | MN < 24964L ? 1 |
96 | | : MN < 65536L ? MIN(ncpu, 8) |
97 | | : MN < 262144L ? MIN(ncpu, 32) |
98 | | : MN < 1638400L ? MIN(ncpu, 64) |
99 | | : ncpu; |
100 | | } |
101 | | #endif |
102 | | |
103 | 0 | static inline int get_gemv_optimal_nthreads(BLASLONG MN) { |
104 | 0 | int ncpu = num_cpu_avail(3); |
105 | | #if defined(_WIN64) && defined(_M_ARM64) |
106 | | if (MN > 100000000L) |
107 | | return num_cpu_avail(4); |
108 | | return 1; |
109 | | #endif |
110 | | #if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(BFLOAT16) |
111 | | return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); |
112 | | #elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) |
113 | | return get_gemv_optimal_nthreads_neoversev2(MN, ncpu); |
114 | | #elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(BFLOAT16) |
115 | | if (strcmp(gotoblas_corename(), "neoversev1") == 0) { |
116 | | return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); |
117 | | } |
118 | | if (strcmp(gotoblas_corename(), "neoversev2") == 0) { |
119 | | return get_gemv_optimal_nthreads_neoversev2(MN, ncpu); |
120 | | } |
121 | | #endif |
122 | |
|
123 | 0 | if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD ) |
124 | 0 | return 1; |
125 | 0 | else |
126 | 0 | return num_cpu_avail(2); |
127 | 0 | } Unexecuted instantiation: sgemv.c:get_gemv_optimal_nthreads Unexecuted instantiation: dgemv.c:get_gemv_optimal_nthreads |
128 | | #endif |
129 | | |
130 | | #ifndef CBLAS |
131 | | |
132 | | void NAME(char *TRANS, blasint *M, blasint *N, |
133 | | FLOAT *ALPHA, FLOAT *a, blasint *LDA, |
134 | | FLOAT *x, blasint *INCX, |
135 | 0 | FLOAT *BETA, FLOAT *y, blasint *INCY){ |
136 | |
|
137 | 0 | char trans = *TRANS; |
138 | 0 | blasint m = *M; |
139 | 0 | blasint n = *N; |
140 | 0 | blasint lda = *LDA; |
141 | 0 | blasint incx = *INCX; |
142 | 0 | blasint incy = *INCY; |
143 | 0 | FLOAT alpha = *ALPHA; |
144 | 0 | FLOAT beta = *BETA; |
145 | 0 | FLOAT *buffer; |
146 | 0 | int buffer_size; |
147 | 0 | #ifdef SMP |
148 | 0 | int nthreads; |
149 | 0 | #endif |
150 | |
|
151 | 0 | int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { |
152 | 0 | GEMV_N, GEMV_T, |
153 | 0 | }; |
154 | |
|
155 | 0 | blasint info; |
156 | 0 | blasint lenx, leny; |
157 | 0 | blasint i; |
158 | |
|
159 | 0 | PRINT_DEBUG_NAME; |
160 | |
|
161 | 0 | TOUPPER(trans); |
162 | |
|
163 | 0 | info = 0; |
164 | |
|
165 | 0 | i = -1; |
166 | |
|
167 | 0 | if (trans == 'N') i = 0; |
168 | 0 | if (trans == 'T') i = 1; |
169 | 0 | if (trans == 'R') i = 0; |
170 | 0 | if (trans == 'C') i = 1; |
171 | |
|
172 | 0 | if (incy == 0) info = 11; |
173 | 0 | if (incx == 0) info = 8; |
174 | 0 | if (lda < MAX(1, m)) info = 6; |
175 | 0 | if (n < 0) info = 3; |
176 | 0 | if (m < 0) info = 2; |
177 | 0 | if (i < 0) info = 1; |
178 | |
|
179 | 0 | trans = i; |
180 | |
|
181 | 0 | if (info != 0){ |
182 | 0 | BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); |
183 | 0 | return; |
184 | 0 | } |
185 | | |
186 | | #else |
187 | | |
188 | | void CNAME(enum CBLAS_ORDER order, |
189 | | enum CBLAS_TRANSPOSE TransA, |
190 | | blasint m, blasint n, |
191 | | FLOAT alpha, |
192 | | FLOAT *a, blasint lda, |
193 | | FLOAT *x, blasint incx, |
194 | | FLOAT beta, |
195 | | FLOAT *y, blasint incy){ |
196 | | |
197 | | FLOAT *buffer; |
198 | | blasint lenx, leny; |
199 | | int trans, buffer_size; |
200 | | blasint info, t; |
201 | | #ifdef SMP |
202 | | int nthreads; |
203 | | #endif |
204 | | |
205 | | int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { |
206 | | GEMV_N, GEMV_T, |
207 | | }; |
208 | | |
209 | | PRINT_DEBUG_CNAME; |
210 | | |
211 | | trans = -1; |
212 | | info = 0; |
213 | | |
214 | | if (order == CblasColMajor) { |
215 | | if (TransA == CblasNoTrans) trans = 0; |
216 | | if (TransA == CblasTrans) trans = 1; |
217 | | if (TransA == CblasConjNoTrans) trans = 0; |
218 | | if (TransA == CblasConjTrans) trans = 1; |
219 | | |
220 | | info = -1; |
221 | | |
222 | | if (incy == 0) info = 11; |
223 | | if (incx == 0) info = 8; |
224 | | if (lda < MAX(1, m)) info = 6; |
225 | | if (n < 0) info = 3; |
226 | | if (m < 0) info = 2; |
227 | | if (trans < 0) info = 1; |
228 | | |
229 | | } |
230 | | |
231 | | if (order == CblasRowMajor) { |
232 | | if (TransA == CblasNoTrans) trans = 1; |
233 | | if (TransA == CblasTrans) trans = 0; |
234 | | if (TransA == CblasConjNoTrans) trans = 1; |
235 | | if (TransA == CblasConjTrans) trans = 0; |
236 | | |
237 | | info = -1; |
238 | | |
239 | | t = n; |
240 | | n = m; |
241 | | m = t; |
242 | | |
243 | | if (incy == 0) info = 11; |
244 | | if (incx == 0) info = 8; |
245 | | if (lda < MAX(1, m)) info = 6; |
246 | | if (n < 0) info = 3; |
247 | | if (m < 0) info = 2; |
248 | | if (trans < 0) info = 1; |
249 | | |
250 | | } |
251 | | |
252 | | if (info >= 0) { |
253 | | BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); |
254 | | return; |
255 | | } |
256 | | |
257 | | #endif |
258 | 0 | if ((m==0) || (n==0)) return; |
259 | | |
260 | 0 | lenx = n; |
261 | 0 | leny = m; |
262 | 0 | if (trans) lenx = m; |
263 | 0 | if (trans) leny = n; |
264 | |
|
265 | 0 | if (beta != ONE) SCAL_K(leny, 0, 0, beta, y, blasabs(incy), NULL, 0, NULL, 0); |
266 | |
|
267 | 0 | if (alpha == ZERO) return; |
268 | | |
269 | 0 | IDEBUG_START; |
270 | |
|
271 | 0 | FUNCTION_PROFILE_START(); |
272 | |
|
273 | 0 | if (incx < 0) x -= (lenx - 1) * incx; |
274 | 0 | if (incy < 0) y -= (leny - 1) * incy; |
275 | |
|
276 | 0 | buffer_size = m + n + 128 / sizeof(FLOAT); |
277 | | #ifdef WINDOWS_ABI |
278 | | buffer_size += 160 / sizeof(FLOAT) ; |
279 | | #endif |
280 | | // for alignment |
281 | 0 | buffer_size = (buffer_size + 3) & ~3; |
282 | 0 | STACK_ALLOC(buffer_size, FLOAT, buffer); |
283 | |
|
284 | 0 | #ifdef SMP |
285 | 0 | nthreads = get_gemv_optimal_nthreads(1L * m * n); |
286 | |
|
287 | 0 | if (nthreads == 1) { |
288 | 0 | #endif |
289 | |
|
290 | 0 | (gemv[(int)trans])(m, n, 0, alpha, a, lda, x, incx, y, incy, buffer); |
291 | |
|
292 | 0 | #ifdef SMP |
293 | 0 | } else { |
294 | |
|
295 | 0 | (gemv_thread[(int)trans])(m, n, alpha, a, lda, x, incx, y, incy, buffer, nthreads); |
296 | |
|
297 | 0 | } |
298 | 0 | #endif |
299 | |
|
300 | 0 | STACK_FREE(buffer); |
301 | 0 | FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); |
302 | |
|
303 | 0 | IDEBUG_END; |
304 | |
|
305 | 0 | return; |
306 | |
|
307 | 0 | } Unexecuted instantiation: sgemv_ Unexecuted instantiation: dgemv_ |