/root/doris/contrib/openblas/symcopy.h
Line | Count | Source |
1 | | /*********************************************************************/ |
2 | | /* Copyright 2009, 2010 The University of Texas at Austin. */ |
3 | | /* All rights reserved. */ |
4 | | /* */ |
5 | | /* Redistribution and use in source and binary forms, with or */ |
6 | | /* without modification, are permitted provided that the following */ |
7 | | /* conditions are met: */ |
8 | | /* */ |
9 | | /* 1. Redistributions of source code must retain the above */ |
10 | | /* copyright notice, this list of conditions and the following */ |
11 | | /* disclaimer. */ |
12 | | /* */ |
13 | | /* 2. Redistributions in binary form must reproduce the above */ |
14 | | /* copyright notice, this list of conditions and the following */ |
15 | | /* disclaimer in the documentation and/or other materials */ |
16 | | /* provided with the distribution. */ |
17 | | /* */ |
18 | | /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ |
19 | | /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ |
20 | | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ |
21 | | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ |
22 | | /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ |
23 | | /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ |
24 | | /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ |
25 | | /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ |
26 | | /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ |
27 | | /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ |
28 | | /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ |
29 | | /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ |
30 | | /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ |
31 | | /* POSSIBILITY OF SUCH DAMAGE. */ |
32 | | /* */ |
33 | | /* The views and conclusions contained in the software and */ |
34 | | /* documentation are those of the authors and should not be */ |
35 | | /* interpreted as representing official policies, either expressed */ |
36 | | /* or implied, of The University of Texas at Austin. */ |
37 | | /*********************************************************************/ |
38 | | |
39 | | /* This implementation is completely wrong. I'll rewrite this */ |
40 | | |
41 | | #ifndef SYMCOPY_H |
42 | | #define SYMCOPY_H |
43 | | |
44 | | #if !defined(XDOUBLE) || !defined(QUAD_PRECISION) |
45 | | |
46 | 0 | static __inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ |
47 | 0 | BLASLONG is, js; |
48 | |
|
49 | 0 | FLOAT *aa1, *aa2; |
50 | 0 | FLOAT *b1, *b2; |
51 | 0 | FLOAT *bb1, *bb2; |
52 | 0 | FLOAT *cc1, *cc2; |
53 | 0 | FLOAT a11, a12; |
54 | 0 | FLOAT a21, a22; |
55 | |
|
56 | 0 | b1 = b; |
57 | 0 | b2 = b; |
58 | |
|
59 | 0 | for (js = 0; js < m; js += 2){ |
60 | |
|
61 | 0 | aa1 = a + 0 * lda; |
62 | 0 | aa2 = a + 1 * lda; |
63 | 0 | a += 2 * lda + 2; |
64 | |
|
65 | 0 | bb1 = b1 + 0 * m; |
66 | 0 | bb2 = b1 + 1 * m; |
67 | 0 | b1 += 2 * m + 2; |
68 | |
|
69 | 0 | cc1 = b2 + 0 * m; |
70 | 0 | cc2 = b2 + 1 * m; |
71 | 0 | b2 += 2 * m + 2; |
72 | |
|
73 | 0 | if (m - js >= 2){ |
74 | |
|
75 | 0 | a11 = *(aa1 + 0); |
76 | 0 | a21 = *(aa1 + 1); |
77 | |
|
78 | 0 | a22 = *(aa2 + 1); |
79 | |
|
80 | 0 | *(bb1 + 0) = a11; |
81 | 0 | *(bb1 + 1) = a21; |
82 | 0 | *(bb2 + 0) = a21; |
83 | 0 | *(bb2 + 1) = a22; |
84 | 0 | aa1 += 2; |
85 | 0 | aa2 += 2; |
86 | 0 | bb1 += 2; |
87 | 0 | bb2 += 2; |
88 | |
|
89 | 0 | cc1 += 2 * m; |
90 | 0 | cc2 += 2 * m; |
91 | |
|
92 | 0 | is = ((m - js - 2) >> 1); |
93 | |
|
94 | 0 | while (is > 0){ |
95 | 0 | a11 = *(aa1 + 0); |
96 | 0 | a21 = *(aa1 + 1); |
97 | 0 | a12 = *(aa2 + 0); |
98 | 0 | a22 = *(aa2 + 1); |
99 | |
|
100 | 0 | aa1 += 2; |
101 | 0 | aa2 += 2; |
102 | |
|
103 | 0 | *(bb1 + 0) = a11; |
104 | 0 | *(bb1 + 1) = a21; |
105 | 0 | *(bb2 + 0) = a12; |
106 | 0 | *(bb2 + 1) = a22; |
107 | |
|
108 | 0 | *(cc1 + 0) = a11; |
109 | 0 | *(cc1 + 1) = a12; |
110 | 0 | *(cc2 + 0) = a21; |
111 | 0 | *(cc2 + 1) = a22; |
112 | |
|
113 | 0 | bb1 += 2; |
114 | 0 | bb2 += 2; |
115 | |
|
116 | 0 | cc1 += 2 * m; |
117 | 0 | cc2 += 2 * m; |
118 | |
|
119 | 0 | is --; |
120 | 0 | } |
121 | |
|
122 | 0 | is = ((m - js - 2) & 1); |
123 | |
|
124 | 0 | if (is == 1){ |
125 | 0 | a11 = *(aa1 + 0); |
126 | 0 | a12 = *(aa2 + 0); |
127 | |
|
128 | 0 | *(bb1 + 0) = a11; |
129 | 0 | *(bb2 + 0) = a12; |
130 | |
|
131 | 0 | *(cc1 + 0) = a11; |
132 | 0 | *(cc1 + 1) = a12; |
133 | 0 | } |
134 | 0 | } |
135 | |
|
136 | 0 | if (m - js == 1){ |
137 | 0 | a11 = *(aa1 + 0); |
138 | 0 | *(bb1 + 0) = a11; |
139 | 0 | } |
140 | |
|
141 | 0 | } |
142 | 0 | } Unexecuted instantiation: sger_thread.c:SYMCOPY_L Unexecuted instantiation: strmv_thread_NUU.c:SYMCOPY_L Unexecuted instantiation: strmv_thread_NUN.c:SYMCOPY_L Unexecuted instantiation: strmv_thread_NLU.c:SYMCOPY_L Unexecuted instantiation: strmv_thread_NLN.c:SYMCOPY_L Unexecuted instantiation: strmv_thread_TUU.c:SYMCOPY_L Unexecuted instantiation: strmv_thread_TUN.c:SYMCOPY_L Unexecuted instantiation: strmv_thread_TLU.c:SYMCOPY_L Unexecuted instantiation: strmv_thread_TLN.c:SYMCOPY_L Unexecuted instantiation: dger_thread.c:SYMCOPY_L Unexecuted instantiation: dtrmv_thread_NUU.c:SYMCOPY_L Unexecuted instantiation: dtrmv_thread_NUN.c:SYMCOPY_L Unexecuted instantiation: dtrmv_thread_NLU.c:SYMCOPY_L Unexecuted instantiation: dtrmv_thread_NLN.c:SYMCOPY_L Unexecuted instantiation: dtrmv_thread_TUU.c:SYMCOPY_L Unexecuted instantiation: dtrmv_thread_TUN.c:SYMCOPY_L Unexecuted instantiation: dtrmv_thread_TLU.c:SYMCOPY_L Unexecuted instantiation: dtrmv_thread_TLN.c:SYMCOPY_L Unexecuted instantiation: dsymv_U.c:SYMCOPY_L Unexecuted instantiation: dsymv_L.c:SYMCOPY_L Unexecuted instantiation: dsymv_thread_U.c:SYMCOPY_L Unexecuted instantiation: dsymv_thread_L.c:SYMCOPY_L |
143 | | |
144 | 0 | static __inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ |
145 | 0 | BLASLONG is, js; |
146 | |
|
147 | 0 | FLOAT *aa1, *aa2; |
148 | 0 | FLOAT *b1, *b2; |
149 | 0 | FLOAT *bb1, *bb2; |
150 | 0 | FLOAT *cc1, *cc2; |
151 | 0 | FLOAT a11, a12; |
152 | 0 | FLOAT a21, a22; |
153 | |
|
154 | 0 | b1 = b; |
155 | 0 | b2 = b; |
156 | |
|
157 | 0 | for (js = 0; js < m; js += 2){ |
158 | |
|
159 | 0 | aa1 = a + 0 * lda; |
160 | 0 | aa2 = a + 1 * lda; |
161 | 0 | a += 2 * lda; |
162 | |
|
163 | 0 | bb1 = b1 + 0 * m; |
164 | 0 | bb2 = b1 + 1 * m; |
165 | 0 | b1 += 2 * m; |
166 | |
|
167 | 0 | cc1 = b2 + 0 * m; |
168 | 0 | cc2 = b2 + 1 * m; |
169 | 0 | b2 += 2; |
170 | |
|
171 | 0 | if (m - js >= 2){ |
172 | |
|
173 | 0 | for (is = 0; is < js; is += 2){ |
174 | |
|
175 | 0 | a11 = *(aa1 + 0); |
176 | 0 | a21 = *(aa1 + 1); |
177 | 0 | a12 = *(aa2 + 0); |
178 | 0 | a22 = *(aa2 + 1); |
179 | |
|
180 | 0 | aa1 += 2; |
181 | 0 | aa2 += 2; |
182 | |
|
183 | 0 | *(bb1 + 0) = a11; |
184 | 0 | *(bb1 + 1) = a21; |
185 | 0 | *(bb2 + 0) = a12; |
186 | 0 | *(bb2 + 1) = a22; |
187 | |
|
188 | 0 | *(cc1 + 0) = a11; |
189 | 0 | *(cc1 + 1) = a12; |
190 | 0 | *(cc2 + 0) = a21; |
191 | 0 | *(cc2 + 1) = a22; |
192 | |
|
193 | 0 | bb1 += 2; |
194 | 0 | bb2 += 2; |
195 | |
|
196 | 0 | cc1 += 2 * m; |
197 | 0 | cc2 += 2 * m; |
198 | 0 | } |
199 | |
|
200 | 0 | a11 = *(aa1 + 0); |
201 | |
|
202 | 0 | a12 = *(aa2 + 0); |
203 | 0 | a22 = *(aa2 + 1); |
204 | |
|
205 | 0 | *(bb1 + 0) = a11; |
206 | 0 | *(bb1 + 1) = a12; |
207 | 0 | *(bb2 + 0) = a12; |
208 | 0 | *(bb2 + 1) = a22; |
209 | 0 | } |
210 | |
|
211 | 0 | if (m - js == 1){ |
212 | 0 | for (is = 0; is < js; is += 2){ |
213 | |
|
214 | 0 | a11 = *(aa1 + 0); |
215 | 0 | a21 = *(aa1 + 1); |
216 | 0 | aa1 += 2; |
217 | |
|
218 | 0 | *(bb1 + 0) = a11; |
219 | 0 | *(bb1 + 1) = a21; |
220 | 0 | *(cc1 + 0) = a11; |
221 | 0 | *(cc2 + 0) = a21; |
222 | 0 | bb1 += 2; |
223 | |
|
224 | 0 | cc1 += 2 * m; |
225 | 0 | cc2 += 2 * m; |
226 | 0 | } |
227 | |
|
228 | 0 | a11 = *(aa1 + 0); |
229 | 0 | *(bb1 + 0) = a11; |
230 | 0 | } |
231 | 0 | } |
232 | 0 | } Unexecuted instantiation: sger_thread.c:SYMCOPY_U Unexecuted instantiation: strmv_thread_NUU.c:SYMCOPY_U Unexecuted instantiation: strmv_thread_NUN.c:SYMCOPY_U Unexecuted instantiation: strmv_thread_NLU.c:SYMCOPY_U Unexecuted instantiation: strmv_thread_NLN.c:SYMCOPY_U Unexecuted instantiation: strmv_thread_TUU.c:SYMCOPY_U Unexecuted instantiation: strmv_thread_TUN.c:SYMCOPY_U Unexecuted instantiation: strmv_thread_TLU.c:SYMCOPY_U Unexecuted instantiation: strmv_thread_TLN.c:SYMCOPY_U Unexecuted instantiation: dger_thread.c:SYMCOPY_U Unexecuted instantiation: dtrmv_thread_NUU.c:SYMCOPY_U Unexecuted instantiation: dtrmv_thread_NUN.c:SYMCOPY_U Unexecuted instantiation: dtrmv_thread_NLU.c:SYMCOPY_U Unexecuted instantiation: dtrmv_thread_NLN.c:SYMCOPY_U Unexecuted instantiation: dtrmv_thread_TUU.c:SYMCOPY_U Unexecuted instantiation: dtrmv_thread_TUN.c:SYMCOPY_U Unexecuted instantiation: dtrmv_thread_TLU.c:SYMCOPY_U Unexecuted instantiation: dtrmv_thread_TLN.c:SYMCOPY_U Unexecuted instantiation: dsymv_U.c:SYMCOPY_U Unexecuted instantiation: dsymv_L.c:SYMCOPY_U Unexecuted instantiation: dsymv_thread_U.c:SYMCOPY_U Unexecuted instantiation: dsymv_thread_L.c:SYMCOPY_U |
233 | | |
234 | | |
235 | 0 | static __inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ |
236 | 0 | BLASLONG is, js; |
237 | 0 |
|
238 | 0 | FLOAT *aa1, *aa2; |
239 | 0 | FLOAT *b1, *b2; |
240 | 0 | FLOAT *bb1, *bb2; |
241 | 0 | FLOAT *cc1, *cc2; |
242 | 0 | FLOAT a11, a21, a31, a41; |
243 | 0 | FLOAT a12, a22, a32, a42; |
244 | 0 |
|
245 | 0 | b1 = b; |
246 | 0 | b2 = b; |
247 | 0 |
|
248 | 0 | lda *= 2; |
249 | 0 |
|
250 | 0 | for (js = 0; js < m; js += 2){ |
251 | 0 |
|
252 | 0 | aa1 = a + 0 * lda; |
253 | 0 | aa2 = a + 1 * lda; |
254 | 0 | a += 2 * lda + 4; |
255 | 0 |
|
256 | 0 | bb1 = b1 + 0 * m; |
257 | 0 | bb2 = b1 + 2 * m; |
258 | 0 | b1 += 4 * m + 4; |
259 | 0 |
|
260 | 0 | cc1 = b2 + 0 * m; |
261 | 0 | cc2 = b2 + 2 * m; |
262 | 0 | b2 += 4 * m + 4; |
263 | 0 |
|
264 | 0 | if (m - js >= 2){ |
265 | 0 |
|
266 | 0 | a11 = *(aa1 + 0); |
267 | 0 | a21 = *(aa1 + 1); |
268 | 0 | a31 = *(aa1 + 2); |
269 | 0 | a41 = *(aa1 + 3); |
270 | 0 |
|
271 | 0 | a12 = *(aa2 + 2); |
272 | 0 | a22 = *(aa2 + 3); |
273 | 0 |
|
274 | 0 | *(bb1 + 0) = a11; |
275 | 0 | *(bb1 + 1) = a21; |
276 | 0 | *(bb1 + 2) = a31; |
277 | 0 | *(bb1 + 3) = a41; |
278 | 0 |
|
279 | 0 | *(bb2 + 0) = a31; |
280 | 0 | *(bb2 + 1) = a41; |
281 | 0 | *(bb2 + 2) = a12; |
282 | 0 | *(bb2 + 3) = a22; |
283 | 0 |
|
284 | 0 | aa1 += 4; |
285 | 0 | aa2 += 4; |
286 | 0 | bb1 += 4; |
287 | 0 | bb2 += 4; |
288 | 0 |
|
289 | 0 | cc1 += 4 * m; |
290 | 0 | cc2 += 4 * m; |
291 | 0 |
|
292 | 0 | is = ((m - js - 2) >> 1); |
293 | 0 |
|
294 | 0 | while (is > 0){ |
295 | 0 | a11 = *(aa1 + 0); |
296 | 0 | a21 = *(aa1 + 1); |
297 | 0 | a31 = *(aa1 + 2); |
298 | 0 | a41 = *(aa1 + 3); |
299 | 0 |
|
300 | 0 | a12 = *(aa2 + 0); |
301 | 0 | a22 = *(aa2 + 1); |
302 | 0 | a32 = *(aa2 + 2); |
303 | 0 | a42 = *(aa2 + 3); |
304 | 0 |
|
305 | 0 | aa1 += 4; |
306 | 0 | aa2 += 4; |
307 | 0 |
|
308 | 0 | *(bb1 + 0) = a11; |
309 | 0 | *(bb1 + 1) = a21; |
310 | 0 | *(bb1 + 2) = a31; |
311 | 0 | *(bb1 + 3) = a41; |
312 | 0 |
|
313 | 0 | *(bb2 + 0) = a12; |
314 | 0 | *(bb2 + 1) = a22; |
315 | 0 | *(bb2 + 2) = a32; |
316 | 0 | *(bb2 + 3) = a42; |
317 | 0 |
|
318 | 0 | *(cc1 + 0) = a11; |
319 | 0 | *(cc1 + 1) = a21; |
320 | 0 | *(cc1 + 2) = a12; |
321 | 0 | *(cc1 + 3) = a22; |
322 | 0 |
|
323 | 0 | *(cc2 + 0) = a31; |
324 | 0 | *(cc2 + 1) = a41; |
325 | 0 | *(cc2 + 2) = a32; |
326 | 0 | *(cc2 + 3) = a42; |
327 | 0 |
|
328 | 0 | bb1 += 4; |
329 | 0 | bb2 += 4; |
330 | 0 |
|
331 | 0 | cc1 += 4 * m; |
332 | 0 | cc2 += 4 * m; |
333 | 0 |
|
334 | 0 | is --; |
335 | 0 | } |
336 | 0 |
|
337 | 0 | if (m & 1){ |
338 | 0 | a11 = *(aa1 + 0); |
339 | 0 | a21 = *(aa1 + 1); |
340 | 0 | a12 = *(aa2 + 0); |
341 | 0 | a22 = *(aa2 + 1); |
342 | 0 |
|
343 | 0 | *(bb1 + 0) = a11; |
344 | 0 | *(bb1 + 1) = a21; |
345 | 0 | *(bb2 + 0) = a12; |
346 | 0 | *(bb2 + 1) = a22; |
347 | 0 |
|
348 | 0 | *(cc1 + 0) = a11; |
349 | 0 | *(cc1 + 1) = a21; |
350 | 0 | *(cc1 + 2) = a12; |
351 | 0 | *(cc1 + 3) = a22; |
352 | 0 | } |
353 | 0 | } |
354 | 0 |
|
355 | 0 | if (m - js == 1){ |
356 | 0 | a11 = *(aa1 + 0); |
357 | 0 | a21 = *(aa1 + 1); |
358 | 0 | *(bb1 + 0) = a11; |
359 | 0 | *(bb1 + 1) = a21; |
360 | 0 | } |
361 | 0 |
|
362 | 0 | } |
363 | 0 | } Unexecuted instantiation: sger_thread.c:ZSYMCOPY_L Unexecuted instantiation: strmv_thread_NUU.c:ZSYMCOPY_L Unexecuted instantiation: strmv_thread_NUN.c:ZSYMCOPY_L Unexecuted instantiation: strmv_thread_NLU.c:ZSYMCOPY_L Unexecuted instantiation: strmv_thread_NLN.c:ZSYMCOPY_L Unexecuted instantiation: strmv_thread_TUU.c:ZSYMCOPY_L Unexecuted instantiation: strmv_thread_TUN.c:ZSYMCOPY_L Unexecuted instantiation: strmv_thread_TLU.c:ZSYMCOPY_L Unexecuted instantiation: strmv_thread_TLN.c:ZSYMCOPY_L Unexecuted instantiation: dger_thread.c:ZSYMCOPY_L Unexecuted instantiation: dtrmv_thread_NUU.c:ZSYMCOPY_L Unexecuted instantiation: dtrmv_thread_NUN.c:ZSYMCOPY_L Unexecuted instantiation: dtrmv_thread_NLU.c:ZSYMCOPY_L Unexecuted instantiation: dtrmv_thread_NLN.c:ZSYMCOPY_L Unexecuted instantiation: dtrmv_thread_TUU.c:ZSYMCOPY_L Unexecuted instantiation: dtrmv_thread_TUN.c:ZSYMCOPY_L Unexecuted instantiation: dtrmv_thread_TLU.c:ZSYMCOPY_L Unexecuted instantiation: dtrmv_thread_TLN.c:ZSYMCOPY_L Unexecuted instantiation: dsymv_U.c:ZSYMCOPY_L Unexecuted instantiation: dsymv_L.c:ZSYMCOPY_L Unexecuted instantiation: dsymv_thread_U.c:ZSYMCOPY_L Unexecuted instantiation: dsymv_thread_L.c:ZSYMCOPY_L |
364 | | |
365 | 0 | static __inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ |
366 | 0 | BLASLONG is, js; |
367 | 0 |
|
368 | 0 | FLOAT *aa1, *aa2; |
369 | 0 | FLOAT *b1, *b2; |
370 | 0 | FLOAT *bb1, *bb2; |
371 | 0 | FLOAT *cc1, *cc2; |
372 | 0 | FLOAT a11, a21, a31, a41; |
373 | 0 | FLOAT a12, a22, a32, a42; |
374 | 0 |
|
375 | 0 | b1 = b; |
376 | 0 | b2 = b; |
377 | 0 |
|
378 | 0 | lda *= 2; |
379 | 0 |
|
380 | 0 | for (js = 0; js < m; js += 2){ |
381 | 0 |
|
382 | 0 | aa1 = a + 0 * lda; |
383 | 0 | aa2 = a + 1 * lda; |
384 | 0 | a += 2 * lda; |
385 | 0 |
|
386 | 0 | bb1 = b1 + 0 * m; |
387 | 0 | bb2 = b1 + 2 * m; |
388 | 0 | b1 += 4 * m; |
389 | 0 |
|
390 | 0 | cc1 = b2 + 0 * m; |
391 | 0 | cc2 = b2 + 2 * m; |
392 | 0 | b2 += 4; |
393 | 0 |
|
394 | 0 | if (m - js >= 2){ |
395 | 0 |
|
396 | 0 | for (is = 0; is < js; is += 2){ |
397 | 0 |
|
398 | 0 | a11 = *(aa1 + 0); |
399 | 0 | a21 = *(aa1 + 1); |
400 | 0 | a31 = *(aa1 + 2); |
401 | 0 | a41 = *(aa1 + 3); |
402 | 0 |
|
403 | 0 | a12 = *(aa2 + 0); |
404 | 0 | a22 = *(aa2 + 1); |
405 | 0 | a32 = *(aa2 + 2); |
406 | 0 | a42 = *(aa2 + 3); |
407 | 0 |
|
408 | 0 | aa1 += 4; |
409 | 0 | aa2 += 4; |
410 | 0 |
|
411 | 0 | *(bb1 + 0) = a11; |
412 | 0 | *(bb1 + 1) = a21; |
413 | 0 | *(bb1 + 2) = a31; |
414 | 0 | *(bb1 + 3) = a41; |
415 | 0 |
|
416 | 0 | *(bb2 + 0) = a12; |
417 | 0 | *(bb2 + 1) = a22; |
418 | 0 | *(bb2 + 2) = a32; |
419 | 0 | *(bb2 + 3) = a42; |
420 | 0 |
|
421 | 0 | *(cc1 + 0) = a11; |
422 | 0 | *(cc1 + 1) = a21; |
423 | 0 | *(cc1 + 2) = a12; |
424 | 0 | *(cc1 + 3) = a22; |
425 | 0 |
|
426 | 0 | *(cc2 + 0) = a31; |
427 | 0 | *(cc2 + 1) = a41; |
428 | 0 | *(cc2 + 2) = a32; |
429 | 0 | *(cc2 + 3) = a42; |
430 | 0 |
|
431 | 0 | bb1 += 4; |
432 | 0 | bb2 += 4; |
433 | 0 |
|
434 | 0 | cc1 += 4 * m; |
435 | 0 | cc2 += 4 * m; |
436 | 0 | } |
437 | 0 |
|
438 | 0 | a11 = *(aa1 + 0); |
439 | 0 | a21 = *(aa1 + 1); |
440 | 0 |
|
441 | 0 | a12 = *(aa2 + 0); |
442 | 0 | a22 = *(aa2 + 1); |
443 | 0 | a32 = *(aa2 + 2); |
444 | 0 | a42 = *(aa2 + 3); |
445 | 0 |
|
446 | 0 | *(bb1 + 0) = a11; |
447 | 0 | *(bb1 + 1) = a21; |
448 | 0 | *(bb1 + 2) = a12; |
449 | 0 | *(bb1 + 3) = a22; |
450 | 0 |
|
451 | 0 | *(bb2 + 0) = a12; |
452 | 0 | *(bb2 + 1) = a22; |
453 | 0 | *(bb2 + 2) = a32; |
454 | 0 | *(bb2 + 3) = a42; |
455 | 0 | } |
456 | 0 |
|
457 | 0 | if (m - js == 1){ |
458 | 0 | for (is = 0; is < js; is += 2){ |
459 | 0 |
|
460 | 0 | a11 = *(aa1 + 0); |
461 | 0 | a21 = *(aa1 + 1); |
462 | 0 | a31 = *(aa1 + 2); |
463 | 0 | a41 = *(aa1 + 3); |
464 | 0 | aa1 += 4; |
465 | 0 |
|
466 | 0 | *(bb1 + 0) = a11; |
467 | 0 | *(bb1 + 1) = a21; |
468 | 0 | *(bb1 + 2) = a31; |
469 | 0 | *(bb1 + 3) = a41; |
470 | 0 |
|
471 | 0 | *(cc1 + 0) = a11; |
472 | 0 | *(cc1 + 1) = a21; |
473 | 0 | *(cc2 + 0) = a31; |
474 | 0 | *(cc2 + 1) = a41; |
475 | 0 | bb1 += 4; |
476 | 0 |
|
477 | 0 | cc1 += 4 * m; |
478 | 0 | cc2 += 4 * m; |
479 | 0 | } |
480 | 0 |
|
481 | 0 | a11 = *(aa1 + 0); |
482 | 0 | a21 = *(aa1 + 1); |
483 | 0 | *(bb1 + 0) = a11; |
484 | 0 | *(bb1 + 1) = a21; |
485 | 0 | } |
486 | 0 | } |
487 | 0 | } Unexecuted instantiation: sger_thread.c:ZSYMCOPY_U Unexecuted instantiation: strmv_thread_NUU.c:ZSYMCOPY_U Unexecuted instantiation: strmv_thread_NUN.c:ZSYMCOPY_U Unexecuted instantiation: strmv_thread_NLU.c:ZSYMCOPY_U Unexecuted instantiation: strmv_thread_NLN.c:ZSYMCOPY_U Unexecuted instantiation: strmv_thread_TUU.c:ZSYMCOPY_U Unexecuted instantiation: strmv_thread_TUN.c:ZSYMCOPY_U Unexecuted instantiation: strmv_thread_TLU.c:ZSYMCOPY_U Unexecuted instantiation: strmv_thread_TLN.c:ZSYMCOPY_U Unexecuted instantiation: dger_thread.c:ZSYMCOPY_U Unexecuted instantiation: dtrmv_thread_NUU.c:ZSYMCOPY_U Unexecuted instantiation: dtrmv_thread_NUN.c:ZSYMCOPY_U Unexecuted instantiation: dtrmv_thread_NLU.c:ZSYMCOPY_U Unexecuted instantiation: dtrmv_thread_NLN.c:ZSYMCOPY_U Unexecuted instantiation: dtrmv_thread_TUU.c:ZSYMCOPY_U Unexecuted instantiation: dtrmv_thread_TUN.c:ZSYMCOPY_U Unexecuted instantiation: dtrmv_thread_TLU.c:ZSYMCOPY_U Unexecuted instantiation: dtrmv_thread_TLN.c:ZSYMCOPY_U Unexecuted instantiation: dsymv_U.c:ZSYMCOPY_U Unexecuted instantiation: dsymv_L.c:ZSYMCOPY_U Unexecuted instantiation: dsymv_thread_U.c:ZSYMCOPY_U Unexecuted instantiation: dsymv_thread_L.c:ZSYMCOPY_U |
488 | | |
489 | 0 | static __inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ |
490 | 0 | BLASLONG is, js; |
491 | 0 |
|
492 | 0 | FLOAT *aa1, *aa2; |
493 | 0 | FLOAT *b1, *b2; |
494 | 0 | FLOAT *bb1, *bb2; |
495 | 0 | FLOAT *cc1, *cc2; |
496 | 0 | FLOAT a11, a21, a31, a41; |
497 | 0 | FLOAT a12, a22, a32, a42; |
498 | 0 |
|
499 | 0 | b1 = b; |
500 | 0 | b2 = b; |
501 | 0 |
|
502 | 0 | lda *= 2; |
503 | 0 |
|
504 | 0 | for (js = 0; js < m; js += 2){ |
505 | 0 |
|
506 | 0 | aa1 = a + 0 * lda; |
507 | 0 | aa2 = a + 1 * lda; |
508 | 0 | a += 2 * lda + 4; |
509 | 0 |
|
510 | 0 | bb1 = b1 + 0 * m; |
511 | 0 | bb2 = b1 + 2 * m; |
512 | 0 | b1 += 4 * m + 4; |
513 | 0 |
|
514 | 0 | cc1 = b2 + 0 * m; |
515 | 0 | cc2 = b2 + 2 * m; |
516 | 0 | b2 += 4 * m + 4; |
517 | 0 |
|
518 | 0 | if (m - js >= 2){ |
519 | 0 |
|
520 | 0 | a11 = *(aa1 + 0); |
521 | 0 | a31 = *(aa1 + 2); |
522 | 0 | a41 = *(aa1 + 3); |
523 | 0 |
|
524 | 0 | a12 = *(aa2 + 2); |
525 | 0 |
|
526 | 0 | *(bb1 + 0) = a11; |
527 | 0 | *(bb1 + 1) = 0.; |
528 | 0 | *(bb1 + 2) = a31; |
529 | 0 | *(bb1 + 3) = a41; |
530 | 0 |
|
531 | 0 | *(bb2 + 0) = a31; |
532 | 0 | *(bb2 + 1) = -a41; |
533 | 0 | *(bb2 + 2) = a12; |
534 | 0 | *(bb2 + 3) = 0.; |
535 | 0 |
|
536 | 0 | aa1 += 4; |
537 | 0 | aa2 += 4; |
538 | 0 | bb1 += 4; |
539 | 0 | bb2 += 4; |
540 | 0 |
|
541 | 0 | cc1 += 4 * m; |
542 | 0 | cc2 += 4 * m; |
543 | 0 |
|
544 | 0 | is = ((m - js - 2) >> 1); |
545 | 0 |
|
546 | 0 | while (is > 0){ |
547 | 0 | a11 = *(aa1 + 0); |
548 | 0 | a21 = *(aa1 + 1); |
549 | 0 | a31 = *(aa1 + 2); |
550 | 0 | a41 = *(aa1 + 3); |
551 | 0 |
|
552 | 0 | a12 = *(aa2 + 0); |
553 | 0 | a22 = *(aa2 + 1); |
554 | 0 | a32 = *(aa2 + 2); |
555 | 0 | a42 = *(aa2 + 3); |
556 | 0 |
|
557 | 0 | aa1 += 4; |
558 | 0 | aa2 += 4; |
559 | 0 |
|
560 | 0 | *(bb1 + 0) = a11; |
561 | 0 | *(bb1 + 1) = a21; |
562 | 0 | *(bb1 + 2) = a31; |
563 | 0 | *(bb1 + 3) = a41; |
564 | 0 |
|
565 | 0 | *(bb2 + 0) = a12; |
566 | 0 | *(bb2 + 1) = a22; |
567 | 0 | *(bb2 + 2) = a32; |
568 | 0 | *(bb2 + 3) = a42; |
569 | 0 |
|
570 | 0 | *(cc1 + 0) = a11; |
571 | 0 | *(cc1 + 1) = -a21; |
572 | 0 | *(cc1 + 2) = a12; |
573 | 0 | *(cc1 + 3) = -a22; |
574 | 0 |
|
575 | 0 | *(cc2 + 0) = a31; |
576 | 0 | *(cc2 + 1) = -a41; |
577 | 0 | *(cc2 + 2) = a32; |
578 | 0 | *(cc2 + 3) = -a42; |
579 | 0 |
|
580 | 0 | bb1 += 4; |
581 | 0 | bb2 += 4; |
582 | 0 |
|
583 | 0 | cc1 += 4 * m; |
584 | 0 | cc2 += 4 * m; |
585 | 0 |
|
586 | 0 | is --; |
587 | 0 | } |
588 | 0 |
|
589 | 0 | if (m & 1){ |
590 | 0 | a11 = *(aa1 + 0); |
591 | 0 | a21 = *(aa1 + 1); |
592 | 0 | a12 = *(aa2 + 0); |
593 | 0 | a22 = *(aa2 + 1); |
594 | 0 |
|
595 | 0 | *(bb1 + 0) = a11; |
596 | 0 | *(bb1 + 1) = a21; |
597 | 0 | *(bb2 + 0) = a12; |
598 | 0 | *(bb2 + 1) = a22; |
599 | 0 |
|
600 | 0 | *(cc1 + 0) = a11; |
601 | 0 | *(cc1 + 1) = -a21; |
602 | 0 | *(cc1 + 2) = a12; |
603 | 0 | *(cc1 + 3) = -a22; |
604 | 0 | } |
605 | 0 | } |
606 | 0 |
|
607 | 0 | if (m - js == 1){ |
608 | 0 | a11 = *(aa1 + 0); |
609 | 0 | *(bb1 + 0) = a11; |
610 | 0 | *(bb1 + 1) = 0.; |
611 | 0 | } |
612 | 0 |
|
613 | 0 | } |
614 | 0 | } Unexecuted instantiation: sger_thread.c:ZHEMCOPY_L Unexecuted instantiation: strmv_thread_NUU.c:ZHEMCOPY_L Unexecuted instantiation: strmv_thread_NUN.c:ZHEMCOPY_L Unexecuted instantiation: strmv_thread_NLU.c:ZHEMCOPY_L Unexecuted instantiation: strmv_thread_NLN.c:ZHEMCOPY_L Unexecuted instantiation: strmv_thread_TUU.c:ZHEMCOPY_L Unexecuted instantiation: strmv_thread_TUN.c:ZHEMCOPY_L Unexecuted instantiation: strmv_thread_TLU.c:ZHEMCOPY_L Unexecuted instantiation: strmv_thread_TLN.c:ZHEMCOPY_L Unexecuted instantiation: dger_thread.c:ZHEMCOPY_L Unexecuted instantiation: dtrmv_thread_NUU.c:ZHEMCOPY_L Unexecuted instantiation: dtrmv_thread_NUN.c:ZHEMCOPY_L Unexecuted instantiation: dtrmv_thread_NLU.c:ZHEMCOPY_L Unexecuted instantiation: dtrmv_thread_NLN.c:ZHEMCOPY_L Unexecuted instantiation: dtrmv_thread_TUU.c:ZHEMCOPY_L Unexecuted instantiation: dtrmv_thread_TUN.c:ZHEMCOPY_L Unexecuted instantiation: dtrmv_thread_TLU.c:ZHEMCOPY_L Unexecuted instantiation: dtrmv_thread_TLN.c:ZHEMCOPY_L Unexecuted instantiation: dsymv_U.c:ZHEMCOPY_L Unexecuted instantiation: dsymv_L.c:ZHEMCOPY_L Unexecuted instantiation: dsymv_thread_U.c:ZHEMCOPY_L Unexecuted instantiation: dsymv_thread_L.c:ZHEMCOPY_L |
615 | | |
616 | 0 | static __inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ |
617 | 0 | BLASLONG is, js; |
618 | 0 |
|
619 | 0 | FLOAT *aa1, *aa2; |
620 | 0 | FLOAT *b1, *b2; |
621 | 0 | FLOAT *bb1, *bb2; |
622 | 0 | FLOAT *cc1, *cc2; |
623 | 0 | FLOAT a11, a21, a31, a41; |
624 | 0 | FLOAT a12, a22, a32, a42; |
625 | 0 |
|
626 | 0 | b1 = b; |
627 | 0 | b2 = b; |
628 | 0 |
|
629 | 0 | lda *= 2; |
630 | 0 |
|
631 | 0 | for (js = 0; js < m; js += 2){ |
632 | 0 |
|
633 | 0 | aa1 = a + 0 * lda; |
634 | 0 | aa2 = a + 1 * lda; |
635 | 0 | a += 2 * lda; |
636 | 0 |
|
637 | 0 | bb1 = b1 + 0 * m; |
638 | 0 | bb2 = b1 + 2 * m; |
639 | 0 | b1 += 4 * m; |
640 | 0 |
|
641 | 0 | cc1 = b2 + 0 * m; |
642 | 0 | cc2 = b2 + 2 * m; |
643 | 0 | b2 += 4; |
644 | 0 |
|
645 | 0 | if (m - js >= 2){ |
646 | 0 |
|
647 | 0 | for (is = 0; is < js; is += 2){ |
648 | 0 |
|
649 | 0 | a11 = *(aa1 + 0); |
650 | 0 | a21 = *(aa1 + 1); |
651 | 0 | a31 = *(aa1 + 2); |
652 | 0 | a41 = *(aa1 + 3); |
653 | 0 |
|
654 | 0 | a12 = *(aa2 + 0); |
655 | 0 | a22 = *(aa2 + 1); |
656 | 0 | a32 = *(aa2 + 2); |
657 | 0 | a42 = *(aa2 + 3); |
658 | 0 |
|
659 | 0 | aa1 += 4; |
660 | 0 | aa2 += 4; |
661 | 0 |
|
662 | 0 | *(bb1 + 0) = a11; |
663 | 0 | *(bb1 + 1) = a21; |
664 | 0 | *(bb1 + 2) = a31; |
665 | 0 | *(bb1 + 3) = a41; |
666 | 0 |
|
667 | 0 | *(bb2 + 0) = a12; |
668 | 0 | *(bb2 + 1) = a22; |
669 | 0 | *(bb2 + 2) = a32; |
670 | 0 | *(bb2 + 3) = a42; |
671 | 0 |
|
672 | 0 | *(cc1 + 0) = a11; |
673 | 0 | *(cc1 + 1) = -a21; |
674 | 0 | *(cc1 + 2) = a12; |
675 | 0 | *(cc1 + 3) = -a22; |
676 | 0 |
|
677 | 0 | *(cc2 + 0) = a31; |
678 | 0 | *(cc2 + 1) = -a41; |
679 | 0 | *(cc2 + 2) = a32; |
680 | 0 | *(cc2 + 3) = -a42; |
681 | 0 |
|
682 | 0 | bb1 += 4; |
683 | 0 | bb2 += 4; |
684 | 0 |
|
685 | 0 | cc1 += 4 * m; |
686 | 0 | cc2 += 4 * m; |
687 | 0 | } |
688 | 0 |
|
689 | 0 | a11 = *(aa1 + 0); |
690 | 0 |
|
691 | 0 | a12 = *(aa2 + 0); |
692 | 0 | a22 = *(aa2 + 1); |
693 | 0 | a32 = *(aa2 + 2); |
694 | 0 |
|
695 | 0 | *(bb1 + 0) = a11; |
696 | 0 | *(bb1 + 1) = 0.; |
697 | 0 | *(bb1 + 2) = a12; |
698 | 0 | *(bb1 + 3) = -a22; |
699 | 0 |
|
700 | 0 | *(bb2 + 0) = a12; |
701 | 0 | *(bb2 + 1) = a22; |
702 | 0 | *(bb2 + 2) = a32; |
703 | 0 | *(bb2 + 3) = 0.; |
704 | 0 | } |
705 | 0 |
|
706 | 0 | if (m - js == 1){ |
707 | 0 | for (is = 0; is < js; is += 2){ |
708 | 0 |
|
709 | 0 | a11 = *(aa1 + 0); |
710 | 0 | a21 = *(aa1 + 1); |
711 | 0 | a31 = *(aa1 + 2); |
712 | 0 | a41 = *(aa1 + 3); |
713 | 0 | aa1 += 4; |
714 | 0 |
|
715 | 0 | *(bb1 + 0) = a11; |
716 | 0 | *(bb1 + 1) = a21; |
717 | 0 | *(bb1 + 2) = a31; |
718 | 0 | *(bb1 + 3) = a41; |
719 | 0 |
|
720 | 0 | *(cc1 + 0) = a11; |
721 | 0 | *(cc1 + 1) = -a21; |
722 | 0 | *(cc2 + 0) = a31; |
723 | 0 | *(cc2 + 1) = -a41; |
724 | 0 | bb1 += 4; |
725 | 0 |
|
726 | 0 | cc1 += 4 * m; |
727 | 0 | cc2 += 4 * m; |
728 | 0 | } |
729 | 0 |
|
730 | 0 | a11 = *(aa1 + 0); |
731 | 0 | *(bb1 + 0) = a11; |
732 | 0 | *(bb1 + 1) = 0.; |
733 | 0 | } |
734 | 0 | } |
735 | 0 | } Unexecuted instantiation: sger_thread.c:ZHEMCOPY_U Unexecuted instantiation: strmv_thread_NUU.c:ZHEMCOPY_U Unexecuted instantiation: strmv_thread_NUN.c:ZHEMCOPY_U Unexecuted instantiation: strmv_thread_NLU.c:ZHEMCOPY_U Unexecuted instantiation: strmv_thread_NLN.c:ZHEMCOPY_U Unexecuted instantiation: strmv_thread_TUU.c:ZHEMCOPY_U Unexecuted instantiation: strmv_thread_TUN.c:ZHEMCOPY_U Unexecuted instantiation: strmv_thread_TLU.c:ZHEMCOPY_U Unexecuted instantiation: strmv_thread_TLN.c:ZHEMCOPY_U Unexecuted instantiation: dger_thread.c:ZHEMCOPY_U Unexecuted instantiation: dtrmv_thread_NUU.c:ZHEMCOPY_U Unexecuted instantiation: dtrmv_thread_NUN.c:ZHEMCOPY_U Unexecuted instantiation: dtrmv_thread_NLU.c:ZHEMCOPY_U Unexecuted instantiation: dtrmv_thread_NLN.c:ZHEMCOPY_U Unexecuted instantiation: dtrmv_thread_TUU.c:ZHEMCOPY_U Unexecuted instantiation: dtrmv_thread_TUN.c:ZHEMCOPY_U Unexecuted instantiation: dtrmv_thread_TLU.c:ZHEMCOPY_U Unexecuted instantiation: dtrmv_thread_TLN.c:ZHEMCOPY_U Unexecuted instantiation: dsymv_U.c:ZHEMCOPY_U Unexecuted instantiation: dsymv_L.c:ZHEMCOPY_U Unexecuted instantiation: dsymv_thread_U.c:ZHEMCOPY_U Unexecuted instantiation: dsymv_thread_L.c:ZHEMCOPY_U |
736 | | |
737 | | |
738 | 0 | static __inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ |
739 | 0 | BLASLONG is, js; |
740 | 0 |
|
741 | 0 | FLOAT *aa1, *aa2; |
742 | 0 | FLOAT *b1, *b2; |
743 | 0 | FLOAT *bb1, *bb2; |
744 | 0 | FLOAT *cc1, *cc2; |
745 | 0 | FLOAT a11, a21, a31, a41; |
746 | 0 | FLOAT a12, a22, a32, a42; |
747 | 0 |
|
748 | 0 | b1 = b; |
749 | 0 | b2 = b; |
750 | 0 |
|
751 | 0 | lda *= 2; |
752 | 0 |
|
753 | 0 | for (js = 0; js < m; js += 2){ |
754 | 0 |
|
755 | 0 | aa1 = a + 0 * lda; |
756 | 0 | aa2 = a + 1 * lda; |
757 | 0 | a += 2 * lda + 4; |
758 | 0 |
|
759 | 0 | bb1 = b1 + 0 * m; |
760 | 0 | bb2 = b1 + 2 * m; |
761 | 0 | b1 += 4 * m + 4; |
762 | 0 |
|
763 | 0 | cc1 = b2 + 0 * m; |
764 | 0 | cc2 = b2 + 2 * m; |
765 | 0 | b2 += 4 * m + 4; |
766 | 0 |
|
767 | 0 | if (m - js >= 2){ |
768 | 0 |
|
769 | 0 | a11 = *(aa1 + 0); |
770 | 0 | a31 = *(aa1 + 2); |
771 | 0 | a41 = *(aa1 + 3); |
772 | 0 |
|
773 | 0 | a12 = *(aa2 + 2); |
774 | 0 |
|
775 | 0 | *(bb1 + 0) = a11; |
776 | 0 | *(bb1 + 1) = 0.; |
777 | 0 | *(bb1 + 2) = a31; |
778 | 0 | *(bb1 + 3) = -a41; |
779 | 0 |
|
780 | 0 | *(bb2 + 0) = a31; |
781 | 0 | *(bb2 + 1) = a41; |
782 | 0 | *(bb2 + 2) = a12; |
783 | 0 | *(bb2 + 3) = 0.; |
784 | 0 |
|
785 | 0 | aa1 += 4; |
786 | 0 | aa2 += 4; |
787 | 0 | bb1 += 4; |
788 | 0 | bb2 += 4; |
789 | 0 |
|
790 | 0 | cc1 += 4 * m; |
791 | 0 | cc2 += 4 * m; |
792 | 0 |
|
793 | 0 | is = ((m - js - 2) >> 1); |
794 | 0 |
|
795 | 0 | while (is > 0){ |
796 | 0 | a11 = *(aa1 + 0); |
797 | 0 | a21 = *(aa1 + 1); |
798 | 0 | a31 = *(aa1 + 2); |
799 | 0 | a41 = *(aa1 + 3); |
800 | 0 |
|
801 | 0 | a12 = *(aa2 + 0); |
802 | 0 | a22 = *(aa2 + 1); |
803 | 0 | a32 = *(aa2 + 2); |
804 | 0 | a42 = *(aa2 + 3); |
805 | 0 |
|
806 | 0 | aa1 += 4; |
807 | 0 | aa2 += 4; |
808 | 0 |
|
809 | 0 | *(bb1 + 0) = a11; |
810 | 0 | *(bb1 + 1) = -a21; |
811 | 0 | *(bb1 + 2) = a31; |
812 | 0 | *(bb1 + 3) = -a41; |
813 | 0 |
|
814 | 0 | *(bb2 + 0) = a12; |
815 | 0 | *(bb2 + 1) = -a22; |
816 | 0 | *(bb2 + 2) = a32; |
817 | 0 | *(bb2 + 3) = -a42; |
818 | 0 |
|
819 | 0 | *(cc1 + 0) = a11; |
820 | 0 | *(cc1 + 1) = a21; |
821 | 0 | *(cc1 + 2) = a12; |
822 | 0 | *(cc1 + 3) = a22; |
823 | 0 |
|
824 | 0 | *(cc2 + 0) = a31; |
825 | 0 | *(cc2 + 1) = a41; |
826 | 0 | *(cc2 + 2) = a32; |
827 | 0 | *(cc2 + 3) = a42; |
828 | 0 |
|
829 | 0 | bb1 += 4; |
830 | 0 | bb2 += 4; |
831 | 0 |
|
832 | 0 | cc1 += 4 * m; |
833 | 0 | cc2 += 4 * m; |
834 | 0 |
|
835 | 0 | is --; |
836 | 0 | } |
837 | 0 |
|
838 | 0 | if (m & 1){ |
839 | 0 | a11 = *(aa1 + 0); |
840 | 0 | a21 = *(aa1 + 1); |
841 | 0 | a12 = *(aa2 + 0); |
842 | 0 | a22 = *(aa2 + 1); |
843 | 0 |
|
844 | 0 | *(bb1 + 0) = a11; |
845 | 0 | *(bb1 + 1) = -a21; |
846 | 0 | *(bb2 + 0) = a12; |
847 | 0 | *(bb2 + 1) = -a22; |
848 | 0 |
|
849 | 0 | *(cc1 + 0) = a11; |
850 | 0 | *(cc1 + 1) = a21; |
851 | 0 | *(cc1 + 2) = a12; |
852 | 0 | *(cc1 + 3) = a22; |
853 | 0 | } |
854 | 0 | } |
855 | 0 |
|
856 | 0 | if (m - js == 1){ |
857 | 0 | a11 = *(aa1 + 0); |
858 | 0 | *(bb1 + 0) = a11; |
859 | 0 | *(bb1 + 1) = 0.; |
860 | 0 | } |
861 | 0 |
|
862 | 0 | } |
863 | 0 | } Unexecuted instantiation: sger_thread.c:ZHEMCOPY_M Unexecuted instantiation: strmv_thread_NUU.c:ZHEMCOPY_M Unexecuted instantiation: strmv_thread_NUN.c:ZHEMCOPY_M Unexecuted instantiation: strmv_thread_NLU.c:ZHEMCOPY_M Unexecuted instantiation: strmv_thread_NLN.c:ZHEMCOPY_M Unexecuted instantiation: strmv_thread_TUU.c:ZHEMCOPY_M Unexecuted instantiation: strmv_thread_TUN.c:ZHEMCOPY_M Unexecuted instantiation: strmv_thread_TLU.c:ZHEMCOPY_M Unexecuted instantiation: strmv_thread_TLN.c:ZHEMCOPY_M Unexecuted instantiation: dger_thread.c:ZHEMCOPY_M Unexecuted instantiation: dtrmv_thread_NUU.c:ZHEMCOPY_M Unexecuted instantiation: dtrmv_thread_NUN.c:ZHEMCOPY_M Unexecuted instantiation: dtrmv_thread_NLU.c:ZHEMCOPY_M Unexecuted instantiation: dtrmv_thread_NLN.c:ZHEMCOPY_M Unexecuted instantiation: dtrmv_thread_TUU.c:ZHEMCOPY_M Unexecuted instantiation: dtrmv_thread_TUN.c:ZHEMCOPY_M Unexecuted instantiation: dtrmv_thread_TLU.c:ZHEMCOPY_M Unexecuted instantiation: dtrmv_thread_TLN.c:ZHEMCOPY_M Unexecuted instantiation: dsymv_U.c:ZHEMCOPY_M Unexecuted instantiation: dsymv_L.c:ZHEMCOPY_M Unexecuted instantiation: dsymv_thread_U.c:ZHEMCOPY_M Unexecuted instantiation: dsymv_thread_L.c:ZHEMCOPY_M |
864 | | |
865 | 0 | static __inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ |
866 | 0 | BLASLONG is, js; |
867 | 0 |
|
868 | 0 | FLOAT *aa1, *aa2; |
869 | 0 | FLOAT *b1, *b2; |
870 | 0 | FLOAT *bb1, *bb2; |
871 | 0 | FLOAT *cc1, *cc2; |
872 | 0 | FLOAT a11, a21, a31, a41; |
873 | 0 | FLOAT a12, a22, a32, a42; |
874 | 0 |
|
875 | 0 | b1 = b; |
876 | 0 | b2 = b; |
877 | 0 |
|
878 | 0 | lda *= 2; |
879 | 0 |
|
880 | 0 | for (js = 0; js < m; js += 2){ |
881 | 0 |
|
882 | 0 | aa1 = a + 0 * lda; |
883 | 0 | aa2 = a + 1 * lda; |
884 | 0 | a += 2 * lda; |
885 | 0 |
|
886 | 0 | bb1 = b1 + 0 * m; |
887 | 0 | bb2 = b1 + 2 * m; |
888 | 0 | b1 += 4 * m; |
889 | 0 |
|
890 | 0 | cc1 = b2 + 0 * m; |
891 | 0 | cc2 = b2 + 2 * m; |
892 | 0 | b2 += 4; |
893 | 0 |
|
894 | 0 | if (m - js >= 2){ |
895 | 0 |
|
896 | 0 | for (is = 0; is < js; is += 2){ |
897 | 0 |
|
898 | 0 | a11 = *(aa1 + 0); |
899 | 0 | a21 = *(aa1 + 1); |
900 | 0 | a31 = *(aa1 + 2); |
901 | 0 | a41 = *(aa1 + 3); |
902 | 0 |
|
903 | 0 | a12 = *(aa2 + 0); |
904 | 0 | a22 = *(aa2 + 1); |
905 | 0 | a32 = *(aa2 + 2); |
906 | 0 | a42 = *(aa2 + 3); |
907 | 0 |
|
908 | 0 | aa1 += 4; |
909 | 0 | aa2 += 4; |
910 | 0 |
|
911 | 0 | *(bb1 + 0) = a11; |
912 | 0 | *(bb1 + 1) = -a21; |
913 | 0 | *(bb1 + 2) = a31; |
914 | 0 | *(bb1 + 3) = -a41; |
915 | 0 |
|
916 | 0 | *(bb2 + 0) = a12; |
917 | 0 | *(bb2 + 1) = -a22; |
918 | 0 | *(bb2 + 2) = a32; |
919 | 0 | *(bb2 + 3) = -a42; |
920 | 0 |
|
921 | 0 | *(cc1 + 0) = a11; |
922 | 0 | *(cc1 + 1) = a21; |
923 | 0 | *(cc1 + 2) = a12; |
924 | 0 | *(cc1 + 3) = a22; |
925 | 0 |
|
926 | 0 | *(cc2 + 0) = a31; |
927 | 0 | *(cc2 + 1) = a41; |
928 | 0 | *(cc2 + 2) = a32; |
929 | 0 | *(cc2 + 3) = a42; |
930 | 0 |
|
931 | 0 | bb1 += 4; |
932 | 0 | bb2 += 4; |
933 | 0 |
|
934 | 0 | cc1 += 4 * m; |
935 | 0 | cc2 += 4 * m; |
936 | 0 | } |
937 | 0 |
|
938 | 0 | a11 = *(aa1 + 0); |
939 | 0 |
|
940 | 0 | a12 = *(aa2 + 0); |
941 | 0 | a22 = *(aa2 + 1); |
942 | 0 | a32 = *(aa2 + 2); |
943 | 0 |
|
944 | 0 | *(bb1 + 0) = a11; |
945 | 0 | *(bb1 + 1) = 0.; |
946 | 0 | *(bb1 + 2) = a12; |
947 | 0 | *(bb1 + 3) = a22; |
948 | 0 |
|
949 | 0 | *(bb2 + 0) = a12; |
950 | 0 | *(bb2 + 1) = -a22; |
951 | 0 | *(bb2 + 2) = a32; |
952 | 0 | *(bb2 + 3) = 0.; |
953 | 0 | } |
954 | 0 |
|
955 | 0 | if (m - js == 1){ |
956 | 0 | for (is = 0; is < js; is += 2){ |
957 | 0 |
|
958 | 0 | a11 = *(aa1 + 0); |
959 | 0 | a21 = *(aa1 + 1); |
960 | 0 | a31 = *(aa1 + 2); |
961 | 0 | a41 = *(aa1 + 3); |
962 | 0 | aa1 += 4; |
963 | 0 |
|
964 | 0 | *(bb1 + 0) = a11; |
965 | 0 | *(bb1 + 1) = -a21; |
966 | 0 | *(bb1 + 2) = a31; |
967 | 0 | *(bb1 + 3) = -a41; |
968 | 0 |
|
969 | 0 | *(cc1 + 0) = a11; |
970 | 0 | *(cc1 + 1) = a21; |
971 | 0 | *(cc2 + 0) = a31; |
972 | 0 | *(cc2 + 1) = a41; |
973 | 0 | bb1 += 4; |
974 | 0 |
|
975 | 0 | cc1 += 4 * m; |
976 | 0 | cc2 += 4 * m; |
977 | 0 | } |
978 | 0 |
|
979 | 0 | a11 = *(aa1 + 0); |
980 | 0 | *(bb1 + 0) = a11; |
981 | 0 | *(bb1 + 1) = 0.; |
982 | 0 | } |
983 | 0 | } |
984 | 0 | } Unexecuted instantiation: sger_thread.c:ZHEMCOPY_V Unexecuted instantiation: strmv_thread_NUU.c:ZHEMCOPY_V Unexecuted instantiation: strmv_thread_NUN.c:ZHEMCOPY_V Unexecuted instantiation: strmv_thread_NLU.c:ZHEMCOPY_V Unexecuted instantiation: strmv_thread_NLN.c:ZHEMCOPY_V Unexecuted instantiation: strmv_thread_TUU.c:ZHEMCOPY_V Unexecuted instantiation: strmv_thread_TUN.c:ZHEMCOPY_V Unexecuted instantiation: strmv_thread_TLU.c:ZHEMCOPY_V Unexecuted instantiation: strmv_thread_TLN.c:ZHEMCOPY_V Unexecuted instantiation: dger_thread.c:ZHEMCOPY_V Unexecuted instantiation: dtrmv_thread_NUU.c:ZHEMCOPY_V Unexecuted instantiation: dtrmv_thread_NUN.c:ZHEMCOPY_V Unexecuted instantiation: dtrmv_thread_NLU.c:ZHEMCOPY_V Unexecuted instantiation: dtrmv_thread_NLN.c:ZHEMCOPY_V Unexecuted instantiation: dtrmv_thread_TUU.c:ZHEMCOPY_V Unexecuted instantiation: dtrmv_thread_TUN.c:ZHEMCOPY_V Unexecuted instantiation: dtrmv_thread_TLU.c:ZHEMCOPY_V Unexecuted instantiation: dtrmv_thread_TLN.c:ZHEMCOPY_V Unexecuted instantiation: dsymv_U.c:ZHEMCOPY_V Unexecuted instantiation: dsymv_L.c:ZHEMCOPY_V Unexecuted instantiation: dsymv_thread_U.c:ZHEMCOPY_V Unexecuted instantiation: dsymv_thread_L.c:ZHEMCOPY_V |
985 | | |
986 | | |
987 | 0 | static __inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ |
988 | 0 | BLASLONG is, js; |
989 | 0 |
|
990 | 0 | FLOAT *aa1, *aa2; |
991 | 0 | FLOAT *b1, *b2; |
992 | 0 | FLOAT *bb1, *bb2; |
993 | 0 | FLOAT *cc1, *cc2; |
994 | 0 | FLOAT a11, a12; |
995 | 0 | FLOAT a21, a22; |
996 | 0 |
|
997 | 0 | b1 = b; |
998 | 0 | b2 = b; |
999 | 0 |
|
1000 | 0 | for (js = 0; js < m; js += 2){ |
1001 | 0 |
|
1002 | 0 | aa1 = a + 0 * lda; |
1003 | 0 | aa2 = a + 1 * lda; |
1004 | 0 | a += 2 * lda + 2; |
1005 | 0 |
|
1006 | 0 | bb1 = b1 + 0 * m; |
1007 | 0 | bb2 = b1 + 1 * m; |
1008 | 0 | b1 += 2 * m + 2; |
1009 | 0 |
|
1010 | 0 | cc1 = b2 + 0 * m; |
1011 | 0 | cc2 = b2 + 1 * m; |
1012 | 0 | b2 += 2 * m + 2; |
1013 | 0 |
|
1014 | 0 | if (m - js >= 2){ |
1015 | 0 |
|
1016 | 0 | a11 = *(aa1 + 0); |
1017 | 0 | a21 = *(aa1 + 1); |
1018 | 0 |
|
1019 | 0 | a22 = *(aa2 + 1); |
1020 | 0 |
|
1021 | 0 | *(bb1 + 0) = a11; |
1022 | 0 | *(bb1 + 1) = a21; |
1023 | 0 | *(bb2 + 0) = a21; |
1024 | 0 | *(bb2 + 1) = a22; |
1025 | 0 | aa1 += 2; |
1026 | 0 | aa2 += 2; |
1027 | 0 | bb1 += 2; |
1028 | 0 | bb2 += 2; |
1029 | 0 |
|
1030 | 0 | cc1 += 2 * m; |
1031 | 0 | cc2 += 2 * m; |
1032 | 0 |
|
1033 | 0 | is = ((m - js - 2) >> 1); |
1034 | 0 |
|
1035 | 0 | while (is > 0){ |
1036 | 0 | a11 = *(aa1 + 0); |
1037 | 0 | a21 = *(aa1 + 1); |
1038 | 0 | a12 = *(aa2 + 0); |
1039 | 0 | a22 = *(aa2 + 1); |
1040 | 0 |
|
1041 | 0 | aa1 += 2; |
1042 | 0 | aa2 += 2; |
1043 | 0 |
|
1044 | 0 | *(bb1 + 0) = a11; |
1045 | 0 | *(bb1 + 1) = a21; |
1046 | 0 | *(bb2 + 0) = a12; |
1047 | 0 | *(bb2 + 1) = a22; |
1048 | 0 |
|
1049 | 0 | *(cc1 + 0) = a11; |
1050 | 0 | *(cc1 + 1) = a12; |
1051 | 0 | *(cc2 + 0) = a21; |
1052 | 0 | *(cc2 + 1) = a22; |
1053 | 0 |
|
1054 | 0 | bb1 += 2; |
1055 | 0 | bb2 += 2; |
1056 | 0 |
|
1057 | 0 | cc1 += 2 * m; |
1058 | 0 | cc2 += 2 * m; |
1059 | 0 |
|
1060 | 0 | is --; |
1061 | 0 | } |
1062 | 0 |
|
1063 | 0 | is = ((m - js - 2) & 1); |
1064 | 0 |
|
1065 | 0 | if (is == 1){ |
1066 | 0 | a11 = *(aa1 + 0); |
1067 | 0 | a12 = *(aa2 + 0); |
1068 | 0 |
|
1069 | 0 | *(bb1 + 0) = a11; |
1070 | 0 | *(bb2 + 0) = a12; |
1071 | 0 |
|
1072 | 0 | *(cc1 + 0) = a11; |
1073 | 0 | *(cc1 + 1) = a12; |
1074 | 0 | } |
1075 | 0 | } |
1076 | 0 |
|
1077 | 0 | if (m - js == 1){ |
1078 | 0 | a11 = *(aa1 + 0); |
1079 | 0 | *(bb1 + 0) = a11; |
1080 | 0 | } |
1081 | 0 |
|
1082 | 0 | } |
1083 | 0 | } Unexecuted instantiation: sger_thread.c:TRMCOPY_NL Unexecuted instantiation: strmv_thread_NUU.c:TRMCOPY_NL Unexecuted instantiation: strmv_thread_NUN.c:TRMCOPY_NL Unexecuted instantiation: strmv_thread_NLU.c:TRMCOPY_NL Unexecuted instantiation: strmv_thread_NLN.c:TRMCOPY_NL Unexecuted instantiation: strmv_thread_TUU.c:TRMCOPY_NL Unexecuted instantiation: strmv_thread_TUN.c:TRMCOPY_NL Unexecuted instantiation: strmv_thread_TLU.c:TRMCOPY_NL Unexecuted instantiation: strmv_thread_TLN.c:TRMCOPY_NL Unexecuted instantiation: dger_thread.c:TRMCOPY_NL Unexecuted instantiation: dtrmv_thread_NUU.c:TRMCOPY_NL Unexecuted instantiation: dtrmv_thread_NUN.c:TRMCOPY_NL Unexecuted instantiation: dtrmv_thread_NLU.c:TRMCOPY_NL Unexecuted instantiation: dtrmv_thread_NLN.c:TRMCOPY_NL Unexecuted instantiation: dtrmv_thread_TUU.c:TRMCOPY_NL Unexecuted instantiation: dtrmv_thread_TUN.c:TRMCOPY_NL Unexecuted instantiation: dtrmv_thread_TLU.c:TRMCOPY_NL Unexecuted instantiation: dtrmv_thread_TLN.c:TRMCOPY_NL Unexecuted instantiation: dsymv_U.c:TRMCOPY_NL Unexecuted instantiation: dsymv_L.c:TRMCOPY_NL Unexecuted instantiation: dsymv_thread_U.c:TRMCOPY_NL Unexecuted instantiation: dsymv_thread_L.c:TRMCOPY_NL |
1084 | | |
1085 | 0 | static __inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ |
1086 | 0 | BLASLONG is, js; |
1087 | 0 |
|
1088 | 0 | FLOAT *aa1, *aa2; |
1089 | 0 | FLOAT *b1, *b2; |
1090 | 0 | FLOAT *bb1, *bb2; |
1091 | 0 | FLOAT *cc1, *cc2; |
1092 | 0 | FLOAT a11, a12; |
1093 | 0 | FLOAT a21, a22; |
1094 | 0 |
|
1095 | 0 | b1 = b; |
1096 | 0 | b2 = b; |
1097 | 0 |
|
1098 | 0 | for (js = 0; js < m; js += 2){ |
1099 | 0 |
|
1100 | 0 | aa1 = a + 0 * lda; |
1101 | 0 | aa2 = a + 1 * lda; |
1102 | 0 | a += 2 * lda + 2; |
1103 | 0 |
|
1104 | 0 | bb1 = b1 + 0 * m; |
1105 | 0 | bb2 = b1 + 1 * m; |
1106 | 0 | b1 += 2 * m + 2; |
1107 | 0 |
|
1108 | 0 | cc1 = b2 + 0 * m; |
1109 | 0 | cc2 = b2 + 1 * m; |
1110 | 0 | b2 += 2 * m + 2; |
1111 | 0 |
|
1112 | 0 | if (m - js >= 2){ |
1113 | 0 |
|
1114 | 0 | a11 = *(aa1 + 0); |
1115 | 0 | a21 = *(aa1 + 1); |
1116 | 0 |
|
1117 | 0 | a22 = *(aa2 + 1); |
1118 | 0 |
|
1119 | 0 | *(bb1 + 0) = a11; |
1120 | 0 | *(bb1 + 1) = a21; |
1121 | 0 | *(bb2 + 0) = a21; |
1122 | 0 | *(bb2 + 1) = a22; |
1123 | 0 | aa1 += 2; |
1124 | 0 | aa2 += 2; |
1125 | 0 | bb1 += 2; |
1126 | 0 | bb2 += 2; |
1127 | 0 |
|
1128 | 0 | cc1 += 2 * m; |
1129 | 0 | cc2 += 2 * m; |
1130 | 0 |
|
1131 | 0 | is = ((m - js - 2) >> 1); |
1132 | 0 |
|
1133 | 0 | while (is > 0){ |
1134 | 0 | a11 = *(aa1 + 0); |
1135 | 0 | a21 = *(aa1 + 1); |
1136 | 0 | a12 = *(aa2 + 0); |
1137 | 0 | a22 = *(aa2 + 1); |
1138 | 0 |
|
1139 | 0 | aa1 += 2; |
1140 | 0 | aa2 += 2; |
1141 | 0 |
|
1142 | 0 | *(bb1 + 0) = a11; |
1143 | 0 | *(bb1 + 1) = a21; |
1144 | 0 | *(bb2 + 0) = a12; |
1145 | 0 | *(bb2 + 1) = a22; |
1146 | 0 |
|
1147 | 0 | *(cc1 + 0) = a11; |
1148 | 0 | *(cc1 + 1) = a12; |
1149 | 0 | *(cc2 + 0) = a21; |
1150 | 0 | *(cc2 + 1) = a22; |
1151 | 0 |
|
1152 | 0 | bb1 += 2; |
1153 | 0 | bb2 += 2; |
1154 | 0 |
|
1155 | 0 | cc1 += 2 * m; |
1156 | 0 | cc2 += 2 * m; |
1157 | 0 |
|
1158 | 0 | is --; |
1159 | 0 | } |
1160 | 0 |
|
1161 | 0 | is = ((m - js - 2) & 1); |
1162 | 0 |
|
1163 | 0 | if (is == 1){ |
1164 | 0 | a11 = *(aa1 + 0); |
1165 | 0 | a12 = *(aa2 + 0); |
1166 | 0 |
|
1167 | 0 | *(bb1 + 0) = a11; |
1168 | 0 | *(bb2 + 0) = a12; |
1169 | 0 |
|
1170 | 0 | *(cc1 + 0) = a11; |
1171 | 0 | *(cc1 + 1) = a12; |
1172 | 0 | } |
1173 | 0 | } |
1174 | 0 |
|
1175 | 0 | if (m - js == 1){ |
1176 | 0 | a11 = *(aa1 + 0); |
1177 | 0 | *(bb1 + 0) = a11; |
1178 | 0 | } |
1179 | 0 |
|
1180 | 0 | } |
1181 | 0 | } Unexecuted instantiation: sger_thread.c:TRMCOPY_TL Unexecuted instantiation: strmv_thread_NUU.c:TRMCOPY_TL Unexecuted instantiation: strmv_thread_NUN.c:TRMCOPY_TL Unexecuted instantiation: strmv_thread_NLU.c:TRMCOPY_TL Unexecuted instantiation: strmv_thread_NLN.c:TRMCOPY_TL Unexecuted instantiation: strmv_thread_TUU.c:TRMCOPY_TL Unexecuted instantiation: strmv_thread_TUN.c:TRMCOPY_TL Unexecuted instantiation: strmv_thread_TLU.c:TRMCOPY_TL Unexecuted instantiation: strmv_thread_TLN.c:TRMCOPY_TL Unexecuted instantiation: dger_thread.c:TRMCOPY_TL Unexecuted instantiation: dtrmv_thread_NUU.c:TRMCOPY_TL Unexecuted instantiation: dtrmv_thread_NUN.c:TRMCOPY_TL Unexecuted instantiation: dtrmv_thread_NLU.c:TRMCOPY_TL Unexecuted instantiation: dtrmv_thread_NLN.c:TRMCOPY_TL Unexecuted instantiation: dtrmv_thread_TUU.c:TRMCOPY_TL Unexecuted instantiation: dtrmv_thread_TUN.c:TRMCOPY_TL Unexecuted instantiation: dtrmv_thread_TLU.c:TRMCOPY_TL Unexecuted instantiation: dtrmv_thread_TLN.c:TRMCOPY_TL Unexecuted instantiation: dsymv_U.c:TRMCOPY_TL Unexecuted instantiation: dsymv_L.c:TRMCOPY_TL Unexecuted instantiation: dsymv_thread_U.c:TRMCOPY_TL Unexecuted instantiation: dsymv_thread_L.c:TRMCOPY_TL |
1182 | | |
1183 | 0 | static __inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ |
1184 | 0 | BLASLONG is, js; |
1185 | 0 |
|
1186 | 0 | FLOAT *aa1, *aa2; |
1187 | 0 | FLOAT *b1, *b2; |
1188 | 0 | FLOAT *bb1, *bb2; |
1189 | 0 | FLOAT *cc1, *cc2; |
1190 | 0 | FLOAT a11, a12; |
1191 | 0 | FLOAT a21, a22; |
1192 | 0 |
|
1193 | 0 | b1 = b; |
1194 | 0 | b2 = b; |
1195 | 0 |
|
1196 | 0 | for (js = 0; js < m; js += 2){ |
1197 | 0 |
|
1198 | 0 | aa1 = a + 0 * lda; |
1199 | 0 | aa2 = a + 1 * lda; |
1200 | 0 | a += 2 * lda; |
1201 | 0 |
|
1202 | 0 | bb1 = b1 + 0 * m; |
1203 | 0 | bb2 = b1 + 1 * m; |
1204 | 0 | b1 += 2 * m; |
1205 | 0 |
|
1206 | 0 | cc1 = b2 + 0 * m; |
1207 | 0 | cc2 = b2 + 1 * m; |
1208 | 0 | b2 += 2; |
1209 | 0 |
|
1210 | 0 | if (m - js >= 2){ |
1211 | 0 |
|
1212 | 0 | for (is = 0; is < js; is += 2){ |
1213 | 0 |
|
1214 | 0 | a11 = *(aa1 + 0); |
1215 | 0 | a21 = *(aa1 + 1); |
1216 | 0 | a12 = *(aa2 + 0); |
1217 | 0 | a22 = *(aa2 + 1); |
1218 | 0 |
|
1219 | 0 | aa1 += 2; |
1220 | 0 | aa2 += 2; |
1221 | 0 |
|
1222 | 0 | *(bb1 + 0) = a11; |
1223 | 0 | *(bb1 + 1) = a21; |
1224 | 0 | *(bb2 + 0) = a12; |
1225 | 0 | *(bb2 + 1) = a22; |
1226 | 0 |
|
1227 | 0 | *(cc1 + 0) = a11; |
1228 | 0 | *(cc1 + 1) = a12; |
1229 | 0 | *(cc2 + 0) = a21; |
1230 | 0 | *(cc2 + 1) = a22; |
1231 | 0 |
|
1232 | 0 | bb1 += 2; |
1233 | 0 | bb2 += 2; |
1234 | 0 |
|
1235 | 0 | cc1 += 2 * m; |
1236 | 0 | cc2 += 2 * m; |
1237 | 0 | } |
1238 | 0 |
|
1239 | 0 | a11 = *(aa1 + 0); |
1240 | 0 |
|
1241 | 0 | a12 = *(aa2 + 0); |
1242 | 0 | a22 = *(aa2 + 1); |
1243 | 0 |
|
1244 | 0 | *(bb1 + 0) = a11; |
1245 | 0 | *(bb1 + 1) = a12; |
1246 | 0 | *(bb2 + 0) = a12; |
1247 | 0 | *(bb2 + 1) = a22; |
1248 | 0 | } |
1249 | 0 |
|
1250 | 0 | if (m - js == 1){ |
1251 | 0 | for (is = 0; is < js; is += 2){ |
1252 | 0 |
|
1253 | 0 | a11 = *(aa1 + 0); |
1254 | 0 | a21 = *(aa1 + 1); |
1255 | 0 | aa1 += 2; |
1256 | 0 |
|
1257 | 0 | *(bb1 + 0) = a11; |
1258 | 0 | *(bb1 + 1) = a21; |
1259 | 0 | *(cc1 + 0) = a11; |
1260 | 0 | *(cc2 + 0) = a21; |
1261 | 0 | bb1 += 2; |
1262 | 0 |
|
1263 | 0 | cc1 += 2 * m; |
1264 | 0 | cc2 += 2 * m; |
1265 | 0 | } |
1266 | 0 |
|
1267 | 0 | a11 = *(aa1 + 0); |
1268 | 0 | *(bb1 + 0) = a11; |
1269 | 0 | } |
1270 | 0 | } |
1271 | 0 | } Unexecuted instantiation: sger_thread.c:TRMCOPY_NU Unexecuted instantiation: strmv_thread_NUU.c:TRMCOPY_NU Unexecuted instantiation: strmv_thread_NUN.c:TRMCOPY_NU Unexecuted instantiation: strmv_thread_NLU.c:TRMCOPY_NU Unexecuted instantiation: strmv_thread_NLN.c:TRMCOPY_NU Unexecuted instantiation: strmv_thread_TUU.c:TRMCOPY_NU Unexecuted instantiation: strmv_thread_TUN.c:TRMCOPY_NU Unexecuted instantiation: strmv_thread_TLU.c:TRMCOPY_NU Unexecuted instantiation: strmv_thread_TLN.c:TRMCOPY_NU Unexecuted instantiation: dger_thread.c:TRMCOPY_NU Unexecuted instantiation: dtrmv_thread_NUU.c:TRMCOPY_NU Unexecuted instantiation: dtrmv_thread_NUN.c:TRMCOPY_NU Unexecuted instantiation: dtrmv_thread_NLU.c:TRMCOPY_NU Unexecuted instantiation: dtrmv_thread_NLN.c:TRMCOPY_NU Unexecuted instantiation: dtrmv_thread_TUU.c:TRMCOPY_NU Unexecuted instantiation: dtrmv_thread_TUN.c:TRMCOPY_NU Unexecuted instantiation: dtrmv_thread_TLU.c:TRMCOPY_NU Unexecuted instantiation: dtrmv_thread_TLN.c:TRMCOPY_NU Unexecuted instantiation: dsymv_U.c:TRMCOPY_NU Unexecuted instantiation: dsymv_L.c:TRMCOPY_NU Unexecuted instantiation: dsymv_thread_U.c:TRMCOPY_NU Unexecuted instantiation: dsymv_thread_L.c:TRMCOPY_NU |
1272 | | |
1273 | 0 | static __inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ |
1274 | 0 | BLASLONG is, js; |
1275 | 0 |
|
1276 | 0 | FLOAT *aa1, *aa2; |
1277 | 0 | FLOAT *b1, *b2; |
1278 | 0 | FLOAT *bb1, *bb2; |
1279 | 0 | FLOAT *cc1, *cc2; |
1280 | 0 | FLOAT a11, a12; |
1281 | 0 | FLOAT a21, a22; |
1282 | 0 |
|
1283 | 0 | b1 = b; |
1284 | 0 | b2 = b; |
1285 | 0 |
|
1286 | 0 | for (js = 0; js < m; js += 2){ |
1287 | 0 |
|
1288 | 0 | aa1 = a + 0 * lda; |
1289 | 0 | aa2 = a + 1 * lda; |
1290 | 0 | a += 2 * lda; |
1291 | 0 |
|
1292 | 0 | bb1 = b1 + 0 * m; |
1293 | 0 | bb2 = b1 + 1 * m; |
1294 | 0 | b1 += 2 * m; |
1295 | 0 |
|
1296 | 0 | cc1 = b2 + 0 * m; |
1297 | 0 | cc2 = b2 + 1 * m; |
1298 | 0 | b2 += 2; |
1299 | 0 |
|
1300 | 0 | if (m - js >= 2){ |
1301 | 0 |
|
1302 | 0 | for (is = 0; is < js; is += 2){ |
1303 | 0 |
|
1304 | 0 | a11 = *(aa1 + 0); |
1305 | 0 | a21 = *(aa1 + 1); |
1306 | 0 | a12 = *(aa2 + 0); |
1307 | 0 | a22 = *(aa2 + 1); |
1308 | 0 |
|
1309 | 0 | aa1 += 2; |
1310 | 0 | aa2 += 2; |
1311 | 0 |
|
1312 | 0 | *(bb1 + 0) = a11; |
1313 | 0 | *(bb1 + 1) = a21; |
1314 | 0 | *(bb2 + 0) = a12; |
1315 | 0 | *(bb2 + 1) = a22; |
1316 | 0 |
|
1317 | 0 | *(cc1 + 0) = a11; |
1318 | 0 | *(cc1 + 1) = a12; |
1319 | 0 | *(cc2 + 0) = a21; |
1320 | 0 | *(cc2 + 1) = a22; |
1321 | 0 |
|
1322 | 0 | bb1 += 2; |
1323 | 0 | bb2 += 2; |
1324 | 0 |
|
1325 | 0 | cc1 += 2 * m; |
1326 | 0 | cc2 += 2 * m; |
1327 | 0 | } |
1328 | 0 |
|
1329 | 0 | a11 = *(aa1 + 0); |
1330 | 0 |
|
1331 | 0 | a12 = *(aa2 + 0); |
1332 | 0 | a22 = *(aa2 + 1); |
1333 | 0 |
|
1334 | 0 | *(bb1 + 0) = a11; |
1335 | 0 | *(bb1 + 1) = a12; |
1336 | 0 | *(bb2 + 0) = a12; |
1337 | 0 | *(bb2 + 1) = a22; |
1338 | 0 | } |
1339 | 0 |
|
1340 | 0 | if (m - js == 1){ |
1341 | 0 | for (is = 0; is < js; is += 2){ |
1342 | 0 |
|
1343 | 0 | a11 = *(aa1 + 0); |
1344 | 0 | a21 = *(aa1 + 1); |
1345 | 0 | aa1 += 2; |
1346 | 0 |
|
1347 | 0 | *(bb1 + 0) = a11; |
1348 | 0 | *(bb1 + 1) = a21; |
1349 | 0 | *(cc1 + 0) = a11; |
1350 | 0 | *(cc2 + 0) = a21; |
1351 | 0 | bb1 += 2; |
1352 | 0 |
|
1353 | 0 | cc1 += 2 * m; |
1354 | 0 | cc2 += 2 * m; |
1355 | 0 | } |
1356 | 0 |
|
1357 | 0 | a11 = *(aa1 + 0); |
1358 | 0 | *(bb1 + 0) = a11; |
1359 | 0 | } |
1360 | 0 | } |
1361 | 0 | } Unexecuted instantiation: sger_thread.c:TRMCOPY_TU Unexecuted instantiation: strmv_thread_NUU.c:TRMCOPY_TU Unexecuted instantiation: strmv_thread_NUN.c:TRMCOPY_TU Unexecuted instantiation: strmv_thread_NLU.c:TRMCOPY_TU Unexecuted instantiation: strmv_thread_NLN.c:TRMCOPY_TU Unexecuted instantiation: strmv_thread_TUU.c:TRMCOPY_TU Unexecuted instantiation: strmv_thread_TUN.c:TRMCOPY_TU Unexecuted instantiation: strmv_thread_TLU.c:TRMCOPY_TU Unexecuted instantiation: strmv_thread_TLN.c:TRMCOPY_TU Unexecuted instantiation: dger_thread.c:TRMCOPY_TU Unexecuted instantiation: dtrmv_thread_NUU.c:TRMCOPY_TU Unexecuted instantiation: dtrmv_thread_NUN.c:TRMCOPY_TU Unexecuted instantiation: dtrmv_thread_NLU.c:TRMCOPY_TU Unexecuted instantiation: dtrmv_thread_NLN.c:TRMCOPY_TU Unexecuted instantiation: dtrmv_thread_TUU.c:TRMCOPY_TU Unexecuted instantiation: dtrmv_thread_TUN.c:TRMCOPY_TU Unexecuted instantiation: dtrmv_thread_TLU.c:TRMCOPY_TU Unexecuted instantiation: dtrmv_thread_TLN.c:TRMCOPY_TU Unexecuted instantiation: dsymv_U.c:TRMCOPY_TU Unexecuted instantiation: dsymv_L.c:TRMCOPY_TU Unexecuted instantiation: dsymv_thread_U.c:TRMCOPY_TU Unexecuted instantiation: dsymv_thread_L.c:TRMCOPY_TU |
1362 | | |
1363 | 0 | static __inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ |
1364 | 0 | BLASLONG is, js; |
1365 | 0 |
|
1366 | 0 | FLOAT *aa1, *aa2; |
1367 | 0 | FLOAT *b1, *b2; |
1368 | 0 | FLOAT *bb1, *bb2; |
1369 | 0 | FLOAT *cc1, *cc2; |
1370 | 0 | FLOAT a11, a21, a31, a41; |
1371 | 0 | FLOAT a12, a22, a32, a42; |
1372 | 0 |
|
1373 | 0 | b1 = b; |
1374 | 0 | b2 = b; |
1375 | 0 |
|
1376 | 0 | lda *= 2; |
1377 | 0 |
|
1378 | 0 | for (js = 0; js < m; js += 2){ |
1379 | 0 |
|
1380 | 0 | aa1 = a + 0 * lda; |
1381 | 0 | aa2 = a + 1 * lda; |
1382 | 0 | a += 2 * lda + 4; |
1383 | 0 |
|
1384 | 0 | bb1 = b1 + 0 * m; |
1385 | 0 | bb2 = b1 + 2 * m; |
1386 | 0 | b1 += 4 * m + 4; |
1387 | 0 |
|
1388 | 0 | cc1 = b2 + 0 * m; |
1389 | 0 | cc2 = b2 + 2 * m; |
1390 | 0 | b2 += 4 * m + 4; |
1391 | 0 |
|
1392 | 0 | if (m - js >= 2){ |
1393 | 0 |
|
1394 | 0 | a11 = *(aa1 + 0); |
1395 | 0 | a21 = *(aa1 + 1); |
1396 | 0 | a31 = *(aa1 + 2); |
1397 | 0 | a41 = *(aa1 + 3); |
1398 | 0 |
|
1399 | 0 | a12 = *(aa2 + 2); |
1400 | 0 | a22 = *(aa2 + 3); |
1401 | 0 |
|
1402 | 0 | *(bb1 + 0) = a11; |
1403 | 0 | *(bb1 + 1) = a21; |
1404 | 0 | *(bb1 + 2) = a31; |
1405 | 0 | *(bb1 + 3) = a41; |
1406 | 0 |
|
1407 | 0 | *(bb2 + 0) = a31; |
1408 | 0 | *(bb2 + 1) = a41; |
1409 | 0 | *(bb2 + 2) = a12; |
1410 | 0 | *(bb2 + 3) = a22; |
1411 | 0 |
|
1412 | 0 | aa1 += 4; |
1413 | 0 | aa2 += 4; |
1414 | 0 | bb1 += 4; |
1415 | 0 | bb2 += 4; |
1416 | 0 |
|
1417 | 0 | cc1 += 4 * m; |
1418 | 0 | cc2 += 4 * m; |
1419 | 0 |
|
1420 | 0 | is = ((m - js - 2) >> 1); |
1421 | 0 |
|
1422 | 0 | while (is > 0){ |
1423 | 0 | a11 = *(aa1 + 0); |
1424 | 0 | a21 = *(aa1 + 1); |
1425 | 0 | a31 = *(aa1 + 2); |
1426 | 0 | a41 = *(aa1 + 3); |
1427 | 0 |
|
1428 | 0 | a12 = *(aa2 + 0); |
1429 | 0 | a22 = *(aa2 + 1); |
1430 | 0 | a32 = *(aa2 + 2); |
1431 | 0 | a42 = *(aa2 + 3); |
1432 | 0 |
|
1433 | 0 | aa1 += 4; |
1434 | 0 | aa2 += 4; |
1435 | 0 |
|
1436 | 0 | *(bb1 + 0) = a11; |
1437 | 0 | *(bb1 + 1) = a21; |
1438 | 0 | *(bb1 + 2) = a31; |
1439 | 0 | *(bb1 + 3) = a41; |
1440 | 0 |
|
1441 | 0 | *(bb2 + 0) = a12; |
1442 | 0 | *(bb2 + 1) = a22; |
1443 | 0 | *(bb2 + 2) = a32; |
1444 | 0 | *(bb2 + 3) = a42; |
1445 | 0 |
|
1446 | 0 | *(cc1 + 0) = a11; |
1447 | 0 | *(cc1 + 1) = a21; |
1448 | 0 | *(cc1 + 2) = a12; |
1449 | 0 | *(cc1 + 3) = a22; |
1450 | 0 |
|
1451 | 0 | *(cc2 + 0) = a31; |
1452 | 0 | *(cc2 + 1) = a41; |
1453 | 0 | *(cc2 + 2) = a32; |
1454 | 0 | *(cc2 + 3) = a42; |
1455 | 0 |
|
1456 | 0 | bb1 += 4; |
1457 | 0 | bb2 += 4; |
1458 | 0 |
|
1459 | 0 | cc1 += 4 * m; |
1460 | 0 | cc2 += 4 * m; |
1461 | 0 |
|
1462 | 0 | is --; |
1463 | 0 | } |
1464 | 0 |
|
1465 | 0 | if (m & 1){ |
1466 | 0 | a11 = *(aa1 + 0); |
1467 | 0 | a21 = *(aa1 + 1); |
1468 | 0 | a12 = *(aa2 + 0); |
1469 | 0 | a22 = *(aa2 + 1); |
1470 | 0 |
|
1471 | 0 | *(bb1 + 0) = a11; |
1472 | 0 | *(bb1 + 1) = a21; |
1473 | 0 | *(bb2 + 0) = a12; |
1474 | 0 | *(bb2 + 1) = a22; |
1475 | 0 |
|
1476 | 0 | *(cc1 + 0) = a11; |
1477 | 0 | *(cc1 + 1) = a21; |
1478 | 0 | *(cc1 + 2) = a12; |
1479 | 0 | *(cc1 + 3) = a22; |
1480 | 0 | } |
1481 | 0 | } |
1482 | 0 |
|
1483 | 0 | if (m - js == 1){ |
1484 | 0 | a11 = *(aa1 + 0); |
1485 | 0 | a21 = *(aa1 + 1); |
1486 | 0 | *(bb1 + 0) = a11; |
1487 | 0 | *(bb1 + 1) = a21; |
1488 | 0 | } |
1489 | 0 |
|
1490 | 0 | } |
1491 | 0 | } Unexecuted instantiation: sger_thread.c:ZTRMCOPY_NL Unexecuted instantiation: strmv_thread_NUU.c:ZTRMCOPY_NL Unexecuted instantiation: strmv_thread_NUN.c:ZTRMCOPY_NL Unexecuted instantiation: strmv_thread_NLU.c:ZTRMCOPY_NL Unexecuted instantiation: strmv_thread_NLN.c:ZTRMCOPY_NL Unexecuted instantiation: strmv_thread_TUU.c:ZTRMCOPY_NL Unexecuted instantiation: strmv_thread_TUN.c:ZTRMCOPY_NL Unexecuted instantiation: strmv_thread_TLU.c:ZTRMCOPY_NL Unexecuted instantiation: strmv_thread_TLN.c:ZTRMCOPY_NL Unexecuted instantiation: dger_thread.c:ZTRMCOPY_NL Unexecuted instantiation: dtrmv_thread_NUU.c:ZTRMCOPY_NL Unexecuted instantiation: dtrmv_thread_NUN.c:ZTRMCOPY_NL Unexecuted instantiation: dtrmv_thread_NLU.c:ZTRMCOPY_NL Unexecuted instantiation: dtrmv_thread_NLN.c:ZTRMCOPY_NL Unexecuted instantiation: dtrmv_thread_TUU.c:ZTRMCOPY_NL Unexecuted instantiation: dtrmv_thread_TUN.c:ZTRMCOPY_NL Unexecuted instantiation: dtrmv_thread_TLU.c:ZTRMCOPY_NL Unexecuted instantiation: dtrmv_thread_TLN.c:ZTRMCOPY_NL Unexecuted instantiation: dsymv_U.c:ZTRMCOPY_NL Unexecuted instantiation: dsymv_L.c:ZTRMCOPY_NL Unexecuted instantiation: dsymv_thread_U.c:ZTRMCOPY_NL Unexecuted instantiation: dsymv_thread_L.c:ZTRMCOPY_NL |
1492 | | |
1493 | 0 | static __inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ |
1494 | 0 | BLASLONG is, js; |
1495 | 0 |
|
1496 | 0 | FLOAT *aa1, *aa2; |
1497 | 0 | FLOAT *b1, *b2; |
1498 | 0 | FLOAT *bb1, *bb2; |
1499 | 0 | FLOAT *cc1, *cc2; |
1500 | 0 | FLOAT a11, a21, a31, a41; |
1501 | 0 | FLOAT a12, a22, a32, a42; |
1502 | 0 |
|
1503 | 0 | b1 = b; |
1504 | 0 | b2 = b; |
1505 | 0 |
|
1506 | 0 | lda *= 2; |
1507 | 0 |
|
1508 | 0 | for (js = 0; js < m; js += 2){ |
1509 | 0 |
|
1510 | 0 | aa1 = a + 0 * lda; |
1511 | 0 | aa2 = a + 1 * lda; |
1512 | 0 | a += 2 * lda + 4; |
1513 | 0 |
|
1514 | 0 | bb1 = b1 + 0 * m; |
1515 | 0 | bb2 = b1 + 2 * m; |
1516 | 0 | b1 += 4 * m + 4; |
1517 | 0 |
|
1518 | 0 | cc1 = b2 + 0 * m; |
1519 | 0 | cc2 = b2 + 2 * m; |
1520 | 0 | b2 += 4 * m + 4; |
1521 | 0 |
|
1522 | 0 | if (m - js >= 2){ |
1523 | 0 |
|
1524 | 0 | a11 = *(aa1 + 0); |
1525 | 0 | a21 = *(aa1 + 1); |
1526 | 0 | a31 = *(aa1 + 2); |
1527 | 0 | a41 = *(aa1 + 3); |
1528 | 0 |
|
1529 | 0 | a12 = *(aa2 + 2); |
1530 | 0 | a22 = *(aa2 + 3); |
1531 | 0 |
|
1532 | 0 | *(bb1 + 0) = a11; |
1533 | 0 | *(bb1 + 1) = a21; |
1534 | 0 | *(bb1 + 2) = a31; |
1535 | 0 | *(bb1 + 3) = a41; |
1536 | 0 |
|
1537 | 0 | *(bb2 + 0) = a31; |
1538 | 0 | *(bb2 + 1) = a41; |
1539 | 0 | *(bb2 + 2) = a12; |
1540 | 0 | *(bb2 + 3) = a22; |
1541 | 0 |
|
1542 | 0 | aa1 += 4; |
1543 | 0 | aa2 += 4; |
1544 | 0 | bb1 += 4; |
1545 | 0 | bb2 += 4; |
1546 | 0 |
|
1547 | 0 | cc1 += 4 * m; |
1548 | 0 | cc2 += 4 * m; |
1549 | 0 |
|
1550 | 0 | is = ((m - js - 2) >> 1); |
1551 | 0 |
|
1552 | 0 | while (is > 0){ |
1553 | 0 | a11 = *(aa1 + 0); |
1554 | 0 | a21 = *(aa1 + 1); |
1555 | 0 | a31 = *(aa1 + 2); |
1556 | 0 | a41 = *(aa1 + 3); |
1557 | 0 |
|
1558 | 0 | a12 = *(aa2 + 0); |
1559 | 0 | a22 = *(aa2 + 1); |
1560 | 0 | a32 = *(aa2 + 2); |
1561 | 0 | a42 = *(aa2 + 3); |
1562 | 0 |
|
1563 | 0 | aa1 += 4; |
1564 | 0 | aa2 += 4; |
1565 | 0 |
|
1566 | 0 | *(bb1 + 0) = a11; |
1567 | 0 | *(bb1 + 1) = a21; |
1568 | 0 | *(bb1 + 2) = a31; |
1569 | 0 | *(bb1 + 3) = a41; |
1570 | 0 |
|
1571 | 0 | *(bb2 + 0) = a12; |
1572 | 0 | *(bb2 + 1) = a22; |
1573 | 0 | *(bb2 + 2) = a32; |
1574 | 0 | *(bb2 + 3) = a42; |
1575 | 0 |
|
1576 | 0 | *(cc1 + 0) = a11; |
1577 | 0 | *(cc1 + 1) = a21; |
1578 | 0 | *(cc1 + 2) = a12; |
1579 | 0 | *(cc1 + 3) = a22; |
1580 | 0 |
|
1581 | 0 | *(cc2 + 0) = a31; |
1582 | 0 | *(cc2 + 1) = a41; |
1583 | 0 | *(cc2 + 2) = a32; |
1584 | 0 | *(cc2 + 3) = a42; |
1585 | 0 |
|
1586 | 0 | bb1 += 4; |
1587 | 0 | bb2 += 4; |
1588 | 0 |
|
1589 | 0 | cc1 += 4 * m; |
1590 | 0 | cc2 += 4 * m; |
1591 | 0 |
|
1592 | 0 | is --; |
1593 | 0 | } |
1594 | 0 |
|
1595 | 0 | if (m & 1){ |
1596 | 0 | a11 = *(aa1 + 0); |
1597 | 0 | a21 = *(aa1 + 1); |
1598 | 0 | a12 = *(aa2 + 0); |
1599 | 0 | a22 = *(aa2 + 1); |
1600 | 0 |
|
1601 | 0 | *(bb1 + 0) = a11; |
1602 | 0 | *(bb1 + 1) = a21; |
1603 | 0 | *(bb2 + 0) = a12; |
1604 | 0 | *(bb2 + 1) = a22; |
1605 | 0 |
|
1606 | 0 | *(cc1 + 0) = a11; |
1607 | 0 | *(cc1 + 1) = a21; |
1608 | 0 | *(cc1 + 2) = a12; |
1609 | 0 | *(cc1 + 3) = a22; |
1610 | 0 | } |
1611 | 0 | } |
1612 | 0 |
|
1613 | 0 | if (m - js == 1){ |
1614 | 0 | a11 = *(aa1 + 0); |
1615 | 0 | a21 = *(aa1 + 1); |
1616 | 0 | *(bb1 + 0) = a11; |
1617 | 0 | *(bb1 + 1) = a21; |
1618 | 0 | } |
1619 | 0 |
|
1620 | 0 | } |
1621 | 0 | } Unexecuted instantiation: sger_thread.c:ZTRMCOPY_TL Unexecuted instantiation: strmv_thread_NUU.c:ZTRMCOPY_TL Unexecuted instantiation: strmv_thread_NUN.c:ZTRMCOPY_TL Unexecuted instantiation: strmv_thread_NLU.c:ZTRMCOPY_TL Unexecuted instantiation: strmv_thread_NLN.c:ZTRMCOPY_TL Unexecuted instantiation: strmv_thread_TUU.c:ZTRMCOPY_TL Unexecuted instantiation: strmv_thread_TUN.c:ZTRMCOPY_TL Unexecuted instantiation: strmv_thread_TLU.c:ZTRMCOPY_TL Unexecuted instantiation: strmv_thread_TLN.c:ZTRMCOPY_TL Unexecuted instantiation: dger_thread.c:ZTRMCOPY_TL Unexecuted instantiation: dtrmv_thread_NUU.c:ZTRMCOPY_TL Unexecuted instantiation: dtrmv_thread_NUN.c:ZTRMCOPY_TL Unexecuted instantiation: dtrmv_thread_NLU.c:ZTRMCOPY_TL Unexecuted instantiation: dtrmv_thread_NLN.c:ZTRMCOPY_TL Unexecuted instantiation: dtrmv_thread_TUU.c:ZTRMCOPY_TL Unexecuted instantiation: dtrmv_thread_TUN.c:ZTRMCOPY_TL Unexecuted instantiation: dtrmv_thread_TLU.c:ZTRMCOPY_TL Unexecuted instantiation: dtrmv_thread_TLN.c:ZTRMCOPY_TL Unexecuted instantiation: dsymv_U.c:ZTRMCOPY_TL Unexecuted instantiation: dsymv_L.c:ZTRMCOPY_TL Unexecuted instantiation: dsymv_thread_U.c:ZTRMCOPY_TL Unexecuted instantiation: dsymv_thread_L.c:ZTRMCOPY_TL |
1622 | | |
1623 | 0 | static __inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ |
1624 | 0 | BLASLONG is, js; |
1625 | 0 |
|
1626 | 0 | FLOAT *aa1, *aa2; |
1627 | 0 | FLOAT *b1, *b2; |
1628 | 0 | FLOAT *bb1, *bb2; |
1629 | 0 | FLOAT *cc1, *cc2; |
1630 | 0 | FLOAT a11, a21, a31, a41; |
1631 | 0 | FLOAT a12, a22, a32, a42; |
1632 | 0 |
|
1633 | 0 | b1 = b; |
1634 | 0 | b2 = b; |
1635 | 0 |
|
1636 | 0 | lda *= 2; |
1637 | 0 |
|
1638 | 0 | for (js = 0; js < m; js += 2){ |
1639 | 0 |
|
1640 | 0 | aa1 = a + 0 * lda; |
1641 | 0 | aa2 = a + 1 * lda; |
1642 | 0 | a += 2 * lda; |
1643 | 0 |
|
1644 | 0 | bb1 = b1 + 0 * m; |
1645 | 0 | bb2 = b1 + 2 * m; |
1646 | 0 | b1 += 4 * m; |
1647 | 0 |
|
1648 | 0 | cc1 = b2 + 0 * m; |
1649 | 0 | cc2 = b2 + 2 * m; |
1650 | 0 | b2 += 4; |
1651 | 0 |
|
1652 | 0 | if (m - js >= 2){ |
1653 | 0 |
|
1654 | 0 | for (is = 0; is < js; is += 2){ |
1655 | 0 |
|
1656 | 0 | a11 = *(aa1 + 0); |
1657 | 0 | a21 = *(aa1 + 1); |
1658 | 0 | a31 = *(aa1 + 2); |
1659 | 0 | a41 = *(aa1 + 3); |
1660 | 0 |
|
1661 | 0 | a12 = *(aa2 + 0); |
1662 | 0 | a22 = *(aa2 + 1); |
1663 | 0 | a32 = *(aa2 + 2); |
1664 | 0 | a42 = *(aa2 + 3); |
1665 | 0 |
|
1666 | 0 | aa1 += 4; |
1667 | 0 | aa2 += 4; |
1668 | 0 |
|
1669 | 0 | *(bb1 + 0) = a11; |
1670 | 0 | *(bb1 + 1) = a21; |
1671 | 0 | *(bb1 + 2) = a31; |
1672 | 0 | *(bb1 + 3) = a41; |
1673 | 0 |
|
1674 | 0 | *(bb2 + 0) = a12; |
1675 | 0 | *(bb2 + 1) = a22; |
1676 | 0 | *(bb2 + 2) = a32; |
1677 | 0 | *(bb2 + 3) = a42; |
1678 | 0 |
|
1679 | 0 | *(cc1 + 0) = a11; |
1680 | 0 | *(cc1 + 1) = a21; |
1681 | 0 | *(cc1 + 2) = a12; |
1682 | 0 | *(cc1 + 3) = a22; |
1683 | 0 |
|
1684 | 0 | *(cc2 + 0) = a31; |
1685 | 0 | *(cc2 + 1) = a41; |
1686 | 0 | *(cc2 + 2) = a32; |
1687 | 0 | *(cc2 + 3) = a42; |
1688 | 0 |
|
1689 | 0 | bb1 += 4; |
1690 | 0 | bb2 += 4; |
1691 | 0 |
|
1692 | 0 | cc1 += 4 * m; |
1693 | 0 | cc2 += 4 * m; |
1694 | 0 | } |
1695 | 0 |
|
1696 | 0 | a11 = *(aa1 + 0); |
1697 | 0 | a21 = *(aa1 + 1); |
1698 | 0 |
|
1699 | 0 | a12 = *(aa2 + 0); |
1700 | 0 | a22 = *(aa2 + 1); |
1701 | 0 | a32 = *(aa2 + 2); |
1702 | 0 | a42 = *(aa2 + 3); |
1703 | 0 |
|
1704 | 0 | *(bb1 + 0) = a11; |
1705 | 0 | *(bb1 + 1) = a21; |
1706 | 0 | *(bb1 + 2) = a12; |
1707 | 0 | *(bb1 + 3) = a22; |
1708 | 0 |
|
1709 | 0 | *(bb2 + 0) = a12; |
1710 | 0 | *(bb2 + 1) = a22; |
1711 | 0 | *(bb2 + 2) = a32; |
1712 | 0 | *(bb2 + 3) = a42; |
1713 | 0 | } |
1714 | 0 |
|
1715 | 0 | if (m - js == 1){ |
1716 | 0 | for (is = 0; is < js; is += 2){ |
1717 | 0 |
|
1718 | 0 | a11 = *(aa1 + 0); |
1719 | 0 | a21 = *(aa1 + 1); |
1720 | 0 | a31 = *(aa1 + 2); |
1721 | 0 | a41 = *(aa1 + 3); |
1722 | 0 | aa1 += 4; |
1723 | 0 |
|
1724 | 0 | *(bb1 + 0) = a11; |
1725 | 0 | *(bb1 + 1) = a21; |
1726 | 0 | *(bb1 + 2) = a31; |
1727 | 0 | *(bb1 + 3) = a41; |
1728 | 0 |
|
1729 | 0 | *(cc1 + 0) = a11; |
1730 | 0 | *(cc1 + 1) = a21; |
1731 | 0 | *(cc2 + 0) = a31; |
1732 | 0 | *(cc2 + 1) = a41; |
1733 | 0 | bb1 += 4; |
1734 | 0 |
|
1735 | 0 | cc1 += 4 * m; |
1736 | 0 | cc2 += 4 * m; |
1737 | 0 | } |
1738 | 0 |
|
1739 | 0 | a11 = *(aa1 + 0); |
1740 | 0 | a21 = *(aa1 + 1); |
1741 | 0 | *(bb1 + 0) = a11; |
1742 | 0 | *(bb1 + 1) = a21; |
1743 | 0 | } |
1744 | 0 | } |
1745 | 0 | } Unexecuted instantiation: sger_thread.c:ZTRMCOPY_NU Unexecuted instantiation: strmv_thread_NUU.c:ZTRMCOPY_NU Unexecuted instantiation: strmv_thread_NUN.c:ZTRMCOPY_NU Unexecuted instantiation: strmv_thread_NLU.c:ZTRMCOPY_NU Unexecuted instantiation: strmv_thread_NLN.c:ZTRMCOPY_NU Unexecuted instantiation: strmv_thread_TUU.c:ZTRMCOPY_NU Unexecuted instantiation: strmv_thread_TUN.c:ZTRMCOPY_NU Unexecuted instantiation: strmv_thread_TLU.c:ZTRMCOPY_NU Unexecuted instantiation: strmv_thread_TLN.c:ZTRMCOPY_NU Unexecuted instantiation: dger_thread.c:ZTRMCOPY_NU Unexecuted instantiation: dtrmv_thread_NUU.c:ZTRMCOPY_NU Unexecuted instantiation: dtrmv_thread_NUN.c:ZTRMCOPY_NU Unexecuted instantiation: dtrmv_thread_NLU.c:ZTRMCOPY_NU Unexecuted instantiation: dtrmv_thread_NLN.c:ZTRMCOPY_NU Unexecuted instantiation: dtrmv_thread_TUU.c:ZTRMCOPY_NU Unexecuted instantiation: dtrmv_thread_TUN.c:ZTRMCOPY_NU Unexecuted instantiation: dtrmv_thread_TLU.c:ZTRMCOPY_NU Unexecuted instantiation: dtrmv_thread_TLN.c:ZTRMCOPY_NU Unexecuted instantiation: dsymv_U.c:ZTRMCOPY_NU Unexecuted instantiation: dsymv_L.c:ZTRMCOPY_NU Unexecuted instantiation: dsymv_thread_U.c:ZTRMCOPY_NU Unexecuted instantiation: dsymv_thread_L.c:ZTRMCOPY_NU |
1746 | | |
1747 | 0 | static __inline void ZTRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ |
1748 | 0 | BLASLONG is, js; |
1749 | 0 |
|
1750 | 0 | FLOAT *aa1, *aa2; |
1751 | 0 | FLOAT *b1, *b2; |
1752 | 0 | FLOAT *bb1, *bb2; |
1753 | 0 | FLOAT *cc1, *cc2; |
1754 | 0 | FLOAT a11, a21, a31, a41; |
1755 | 0 | FLOAT a12, a22, a32, a42; |
1756 | 0 |
|
1757 | 0 | b1 = b; |
1758 | 0 | b2 = b; |
1759 | 0 |
|
1760 | 0 | lda *= 2; |
1761 | 0 |
|
1762 | 0 | for (js = 0; js < m; js += 2){ |
1763 | 0 |
|
1764 | 0 | aa1 = a + 0 * lda; |
1765 | 0 | aa2 = a + 1 * lda; |
1766 | 0 | a += 2 * lda; |
1767 | 0 |
|
1768 | 0 | bb1 = b1 + 0 * m; |
1769 | 0 | bb2 = b1 + 2 * m; |
1770 | 0 | b1 += 4 * m; |
1771 | 0 |
|
1772 | 0 | cc1 = b2 + 0 * m; |
1773 | 0 | cc2 = b2 + 2 * m; |
1774 | 0 | b2 += 4; |
1775 | 0 |
|
1776 | 0 | if (m - js >= 2){ |
1777 | 0 |
|
1778 | 0 | for (is = 0; is < js; is += 2){ |
1779 | 0 |
|
1780 | 0 | a11 = *(aa1 + 0); |
1781 | 0 | a21 = *(aa1 + 1); |
1782 | 0 | a31 = *(aa1 + 2); |
1783 | 0 | a41 = *(aa1 + 3); |
1784 | 0 |
|
1785 | 0 | a12 = *(aa2 + 0); |
1786 | 0 | a22 = *(aa2 + 1); |
1787 | 0 | a32 = *(aa2 + 2); |
1788 | 0 | a42 = *(aa2 + 3); |
1789 | 0 |
|
1790 | 0 | aa1 += 4; |
1791 | 0 | aa2 += 4; |
1792 | 0 |
|
1793 | 0 | *(bb1 + 0) = a11; |
1794 | 0 | *(bb1 + 1) = a21; |
1795 | 0 | *(bb1 + 2) = a31; |
1796 | 0 | *(bb1 + 3) = a41; |
1797 | 0 |
|
1798 | 0 | *(bb2 + 0) = a12; |
1799 | 0 | *(bb2 + 1) = a22; |
1800 | 0 | *(bb2 + 2) = a32; |
1801 | 0 | *(bb2 + 3) = a42; |
1802 | 0 |
|
1803 | 0 | *(cc1 + 0) = a11; |
1804 | 0 | *(cc1 + 1) = a21; |
1805 | 0 | *(cc1 + 2) = a12; |
1806 | 0 | *(cc1 + 3) = a22; |
1807 | 0 |
|
1808 | 0 | *(cc2 + 0) = a31; |
1809 | 0 | *(cc2 + 1) = a41; |
1810 | 0 | *(cc2 + 2) = a32; |
1811 | 0 | *(cc2 + 3) = a42; |
1812 | 0 |
|
1813 | 0 | bb1 += 4; |
1814 | 0 | bb2 += 4; |
1815 | 0 |
|
1816 | 0 | cc1 += 4 * m; |
1817 | 0 | cc2 += 4 * m; |
1818 | 0 | } |
1819 | 0 |
|
1820 | 0 | a11 = *(aa1 + 0); |
1821 | 0 | a21 = *(aa1 + 1); |
1822 | 0 |
|
1823 | 0 | a12 = *(aa2 + 0); |
1824 | 0 | a22 = *(aa2 + 1); |
1825 | 0 | a32 = *(aa2 + 2); |
1826 | 0 | a42 = *(aa2 + 3); |
1827 | 0 |
|
1828 | 0 | *(bb1 + 0) = a11; |
1829 | 0 | *(bb1 + 1) = a21; |
1830 | 0 | *(bb1 + 2) = a12; |
1831 | 0 | *(bb1 + 3) = a22; |
1832 | 0 |
|
1833 | 0 | *(bb2 + 0) = a12; |
1834 | 0 | *(bb2 + 1) = a22; |
1835 | 0 | *(bb2 + 2) = a32; |
1836 | 0 | *(bb2 + 3) = a42; |
1837 | 0 | } |
1838 | 0 |
|
1839 | 0 | if (m - js == 1){ |
1840 | 0 | for (is = 0; is < js; is += 2){ |
1841 | 0 |
|
1842 | 0 | a11 = *(aa1 + 0); |
1843 | 0 | a21 = *(aa1 + 1); |
1844 | 0 | a31 = *(aa1 + 2); |
1845 | 0 | a41 = *(aa1 + 3); |
1846 | 0 | aa1 += 4; |
1847 | 0 |
|
1848 | 0 | *(bb1 + 0) = a11; |
1849 | 0 | *(bb1 + 1) = a21; |
1850 | 0 | *(bb1 + 2) = a31; |
1851 | 0 | *(bb1 + 3) = a41; |
1852 | 0 |
|
1853 | 0 | *(cc1 + 0) = a11; |
1854 | 0 | *(cc1 + 1) = a21; |
1855 | 0 | *(cc2 + 0) = a31; |
1856 | 0 | *(cc2 + 1) = a41; |
1857 | 0 | bb1 += 4; |
1858 | 0 |
|
1859 | 0 | cc1 += 4 * m; |
1860 | 0 | cc2 += 4 * m; |
1861 | 0 | } |
1862 | 0 |
|
1863 | 0 | a11 = *(aa1 + 0); |
1864 | 0 | a21 = *(aa1 + 1); |
1865 | 0 | *(bb1 + 0) = a11; |
1866 | 0 | *(bb1 + 1) = a21; |
1867 | 0 | } |
1868 | 0 | } |
1869 | 0 | } Unexecuted instantiation: sger_thread.c:ZTRMCOPY_TU Unexecuted instantiation: strmv_thread_NUU.c:ZTRMCOPY_TU Unexecuted instantiation: strmv_thread_NUN.c:ZTRMCOPY_TU Unexecuted instantiation: strmv_thread_NLU.c:ZTRMCOPY_TU Unexecuted instantiation: strmv_thread_NLN.c:ZTRMCOPY_TU Unexecuted instantiation: strmv_thread_TUU.c:ZTRMCOPY_TU Unexecuted instantiation: strmv_thread_TUN.c:ZTRMCOPY_TU Unexecuted instantiation: strmv_thread_TLU.c:ZTRMCOPY_TU Unexecuted instantiation: strmv_thread_TLN.c:ZTRMCOPY_TU Unexecuted instantiation: dger_thread.c:ZTRMCOPY_TU Unexecuted instantiation: dtrmv_thread_NUU.c:ZTRMCOPY_TU Unexecuted instantiation: dtrmv_thread_NUN.c:ZTRMCOPY_TU Unexecuted instantiation: dtrmv_thread_NLU.c:ZTRMCOPY_TU Unexecuted instantiation: dtrmv_thread_NLN.c:ZTRMCOPY_TU Unexecuted instantiation: dtrmv_thread_TUU.c:ZTRMCOPY_TU Unexecuted instantiation: dtrmv_thread_TUN.c:ZTRMCOPY_TU Unexecuted instantiation: dtrmv_thread_TLU.c:ZTRMCOPY_TU Unexecuted instantiation: dtrmv_thread_TLN.c:ZTRMCOPY_TU Unexecuted instantiation: dsymv_U.c:ZTRMCOPY_TU Unexecuted instantiation: dsymv_L.c:ZTRMCOPY_TU Unexecuted instantiation: dsymv_thread_U.c:ZTRMCOPY_TU Unexecuted instantiation: dsymv_thread_L.c:ZTRMCOPY_TU |
1870 | | |
1871 | | #endif |
1872 | | #endif |
1873 | | |