/root/doris/contrib/openblas/interface/ger.c
Line | Count | Source |
1 | | /*********************************************************************/ |
2 | | /* Copyright 2009, 2010 The University of Texas at Austin. */ |
3 | | /* All rights reserved. */ |
4 | | /* */ |
5 | | /* Redistribution and use in source and binary forms, with or */ |
6 | | /* without modification, are permitted provided that the following */ |
7 | | /* conditions are met: */ |
8 | | /* */ |
9 | | /* 1. Redistributions of source code must retain the above */ |
10 | | /* copyright notice, this list of conditions and the following */ |
11 | | /* disclaimer. */ |
12 | | /* */ |
13 | | /* 2. Redistributions in binary form must reproduce the above */ |
14 | | /* copyright notice, this list of conditions and the following */ |
15 | | /* disclaimer in the documentation and/or other materials */ |
16 | | /* provided with the distribution. */ |
17 | | /* */ |
18 | | /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ |
19 | | /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ |
20 | | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ |
21 | | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ |
22 | | /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ |
23 | | /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ |
24 | | /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ |
25 | | /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ |
26 | | /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ |
27 | | /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ |
28 | | /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ |
29 | | /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ |
30 | | /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ |
31 | | /* POSSIBILITY OF SUCH DAMAGE. */ |
32 | | /* */ |
33 | | /* The views and conclusions contained in the software and */ |
34 | | /* documentation are those of the authors and should not be */ |
35 | | /* interpreted as representing official policies, either expressed */ |
36 | | /* or implied, of The University of Texas at Austin. */ |
37 | | /*********************************************************************/ |
38 | | |
39 | | #include <stdio.h> |
40 | | #include "common.h" |
41 | | #ifdef FUNCTION_PROFILE |
42 | | #include "functable.h" |
43 | | #endif |
44 | | |
45 | | #ifdef SMP |
46 | | #ifdef __64BIT__ |
47 | | #define SMPTEST 1 |
48 | | #endif |
49 | | #endif |
50 | | |
51 | | #ifdef XDOUBLE |
52 | | #define ERROR_NAME "QGER " |
53 | | #elif defined DOUBLE |
54 | 0 | #define ERROR_NAME "DGER " |
55 | | #else |
56 | 0 | #define ERROR_NAME "SGER " |
57 | | #endif |
58 | | |
59 | 0 | #define GER GERU_K |
60 | | |
61 | | #if defined XDOUBLE |
62 | | #define GER_THREAD qger_thread |
63 | | #elif defined DOUBLE |
64 | 0 | #define GER_THREAD dger_thread |
65 | | #else |
66 | 0 | #define GER_THREAD sger_thread |
67 | | #endif |
68 | | |
69 | | |
70 | | #ifndef CBLAS |
71 | | |
72 | | void NAME(blasint *M, blasint *N, FLOAT *Alpha, |
73 | | FLOAT *x, blasint *INCX, |
74 | | FLOAT *y, blasint *INCY, |
75 | 0 | FLOAT *a, blasint *LDA){ |
76 | |
|
77 | 0 | blasint m = *M; |
78 | 0 | blasint n = *N; |
79 | 0 | FLOAT alpha = *Alpha; |
80 | 0 | blasint incx = *INCX; |
81 | 0 | blasint incy = *INCY; |
82 | 0 | blasint lda = *LDA; |
83 | 0 | FLOAT *buffer; |
84 | 0 | #ifdef SMPTEST |
85 | 0 | int nthreads; |
86 | 0 | #endif |
87 | |
|
88 | 0 | blasint info; |
89 | |
|
90 | 0 | PRINT_DEBUG_NAME; |
91 | |
|
92 | 0 | info = 0; |
93 | |
|
94 | 0 | if (lda < MAX(1,m)) info = 9; |
95 | 0 | if (incy == 0) info = 7; |
96 | 0 | if (incx == 0) info = 5; |
97 | 0 | if (n < 0) info = 2; |
98 | 0 | if (m < 0) info = 1; |
99 | |
|
100 | 0 | if (info){ |
101 | 0 | BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); |
102 | 0 | return; |
103 | 0 | } |
104 | | |
105 | | #else |
106 | | |
107 | | void CNAME(enum CBLAS_ORDER order, |
108 | | blasint m, blasint n, |
109 | | FLOAT alpha, |
110 | | FLOAT *x, blasint incx, |
111 | | FLOAT *y, blasint incy, |
112 | | FLOAT *a, blasint lda) { |
113 | | |
114 | | FLOAT *buffer; |
115 | | blasint info, t; |
116 | | #ifdef SMPTEST |
117 | | int nthreads; |
118 | | #endif |
119 | | |
120 | | PRINT_DEBUG_CNAME; |
121 | | |
122 | | info = 0; |
123 | | |
124 | | if (order == CblasColMajor) { |
125 | | info = -1; |
126 | | |
127 | | if (lda < MAX(1,m)) info = 9; |
128 | | if (incy == 0) info = 7; |
129 | | if (incx == 0) info = 5; |
130 | | if (n < 0) info = 2; |
131 | | if (m < 0) info = 1; |
132 | | } |
133 | | |
134 | | if (order == CblasRowMajor) { |
135 | | info = -1; |
136 | | |
137 | | t = n; |
138 | | n = m; |
139 | | m = t; |
140 | | |
141 | | t = incx; |
142 | | incx = incy; |
143 | | incy = t; |
144 | | |
145 | | buffer = x; |
146 | | x = y; |
147 | | y = buffer; |
148 | | |
149 | | if (lda < MAX(1,m)) info = 9; |
150 | | if (incy == 0) info = 7; |
151 | | if (incx == 0) info = 5; |
152 | | if (n < 0) info = 2; |
153 | | if (m < 0) info = 1; |
154 | | } |
155 | | |
156 | | if (info >= 0) { |
157 | | BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); |
158 | | return; |
159 | | } |
160 | | |
161 | | #endif |
162 | | |
163 | | /* Quick return if possible. */ |
164 | 0 | if (m == 0 || n == 0) return; |
165 | 0 | if (alpha == 0.) return; |
166 | | |
167 | 0 | if (incx == 1 && incy == 1 && 1L*m*n <= 2048 *GEMM_MULTITHREAD_THRESHOLD) { |
168 | 0 | GER(m, n, 0, alpha, x, incx, y, incy, a, lda, NULL); |
169 | 0 | return; |
170 | 0 | } |
171 | | |
172 | 0 | IDEBUG_START; |
173 | |
|
174 | 0 | FUNCTION_PROFILE_START(); |
175 | |
|
176 | 0 | if (incy < 0) y -= (n - 1) * incy; |
177 | 0 | if (incx < 0) x -= (m - 1) * incx; |
178 | |
|
179 | 0 | STACK_ALLOC(m, FLOAT, buffer); |
180 | |
|
181 | 0 | #ifdef SMPTEST |
182 | | // Threshold chosen so that speed-up is > 1 on a Xeon E5-2630 |
183 | 0 | if(1L * m * n > 2048L * GEMM_MULTITHREAD_THRESHOLD) |
184 | 0 | nthreads = num_cpu_avail(2); |
185 | 0 | else |
186 | 0 | nthreads = 1; |
187 | |
|
188 | 0 | if (nthreads == 1) { |
189 | 0 | #endif |
190 | |
|
191 | 0 | GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer); |
192 | |
|
193 | 0 | #ifdef SMPTEST |
194 | 0 | } else { |
195 | |
|
196 | 0 | GER_THREAD(m, n, alpha, x, incx, y, incy, a, lda, buffer, nthreads); |
197 | |
|
198 | 0 | } |
199 | 0 | #endif |
200 | |
|
201 | 0 | STACK_FREE(buffer); |
202 | 0 | FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); |
203 | |
|
204 | 0 | IDEBUG_END; |
205 | |
|
206 | 0 | return; |
207 | 0 | } Unexecuted instantiation: sger_ Unexecuted instantiation: dger_ |