contrib/openblas/driver/others/blas_server_omp.c
Line | Count | Source |
1 | | /*********************************************************************/ |
2 | | /* Copyright 2009, 2010 The University of Texas at Austin. */ |
3 | | /* All rights reserved. */ |
4 | | /* */ |
5 | | /* Redistribution and use in source and binary forms, with or */ |
6 | | /* without modification, are permitted provided that the following */ |
7 | | /* conditions are met: */ |
8 | | /* */ |
9 | | /* 1. Redistributions of source code must retain the above */ |
10 | | /* copyright notice, this list of conditions and the following */ |
11 | | /* disclaimer. */ |
12 | | /* */ |
13 | | /* 2. Redistributions in binary form must reproduce the above */ |
14 | | /* copyright notice, this list of conditions and the following */ |
15 | | /* disclaimer in the documentation and/or other materials */ |
16 | | /* provided with the distribution. */ |
17 | | /* */ |
18 | | /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ |
19 | | /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ |
20 | | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ |
21 | | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ |
22 | | /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ |
23 | | /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ |
24 | | /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ |
25 | | /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ |
26 | | /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ |
27 | | /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ |
28 | | /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ |
29 | | /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ |
30 | | /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ |
31 | | /* POSSIBILITY OF SUCH DAMAGE. */ |
32 | | /* */ |
33 | | /* The views and conclusions contained in the software and */ |
34 | | /* documentation are those of the authors and should not be */ |
35 | | /* interpreted as representing official policies, either expressed */ |
36 | | /* or implied, of The University of Texas at Austin. */ |
37 | | /*********************************************************************/ |
38 | | |
39 | | #include <stdbool.h> |
40 | | #include <stdio.h> |
41 | | #include <stdlib.h> |
42 | | //#include <sys/mman.h> |
43 | | #include "common.h" |
44 | | |
45 | | #ifndef USE_OPENMP |
46 | | |
47 | | #include "blas_server.c" |
48 | | |
49 | | #else |
50 | | |
51 | | #ifndef likely |
52 | | #ifdef __GNUC__ |
53 | | #define likely(x) __builtin_expect(!!(x), 1) |
54 | | #else |
55 | | #define likely(x) (x) |
56 | | #endif |
57 | | #endif |
58 | | #ifndef unlikely |
59 | | #ifdef __GNUC__ |
60 | 0 | #define unlikely(x) __builtin_expect(!!(x), 0) |
61 | | #else |
62 | | #define unlikely(x) (x) |
63 | | #endif |
64 | | #endif |
65 | | |
66 | | #ifndef OMP_SCHED |
67 | | #define OMP_SCHED static |
68 | | #endif |
69 | | |
70 | | int blas_server_avail = 0; |
71 | | int blas_omp_number_max = 0; |
72 | | int blas_omp_threads_local = 1; |
73 | | |
74 | | extern int openblas_omp_adaptive_env(void); |
75 | | |
76 | | static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER]; |
77 | | #ifdef HAVE_C11 |
78 | | static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; |
79 | | #else |
80 | | static _Bool blas_buffer_inuse[MAX_PARALLEL_NUMBER]; |
81 | | #endif |
82 | | |
83 | 9 | static void adjust_thread_buffers(void) { |
84 | | |
85 | 9 | int i=0, j=0; |
86 | | |
87 | | //adjust buffer for each thread |
88 | 18 | for(i=0; i < MAX_PARALLEL_NUMBER; i++) { |
89 | 85 | for(j=0; j < blas_cpu_number; j++){ |
90 | 76 | if(blas_thread_buffer[i][j] == NULL){ |
91 | 76 | blas_thread_buffer[i][j] = blas_memory_alloc(2); |
92 | 76 | } |
93 | 76 | } |
94 | 9 | for(; j < MAX_CPU_NUMBER; j++){ |
95 | 0 | if(blas_thread_buffer[i][j] != NULL){ |
96 | 0 | blas_memory_free(blas_thread_buffer[i][j]); |
97 | 0 | blas_thread_buffer[i][j] = NULL; |
98 | 0 | } |
99 | 0 | } |
100 | 9 | } |
101 | 9 | } |
102 | | |
103 | 0 | void goto_set_num_threads(int num_threads) { |
104 | |
|
105 | 0 | if (num_threads < 1) num_threads = blas_num_threads; |
106 | |
|
107 | 0 | if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER; |
108 | |
|
109 | 0 | if (num_threads > blas_num_threads) { |
110 | 0 | blas_num_threads = num_threads; |
111 | 0 | } |
112 | |
|
113 | 0 | blas_cpu_number = num_threads; |
114 | |
|
115 | 0 | adjust_thread_buffers(); |
116 | | #if defined(ARCH_MIPS64) || defined(ARCH_LOONGARCH64) |
117 | | #ifndef DYNAMIC_ARCH |
118 | | //set parameters for different number of threads. |
119 | | blas_set_parameter(); |
120 | | #endif |
121 | | #endif |
122 | |
|
123 | 0 | } |
124 | 0 | void openblas_set_num_threads(int num_threads) { |
125 | |
|
126 | 0 | goto_set_num_threads(num_threads); |
127 | 0 | } |
128 | | |
129 | | #ifdef OS_LINUX |
130 | | |
131 | 0 | int openblas_setaffinity(int thread_idx, size_t cpusetsize, cpu_set_t* cpu_set) { |
132 | 0 | fprintf(stderr,"OpenBLAS: use OpenMP environment variables for setting cpu affinity\n"); |
133 | 0 | return -1; |
134 | 0 | } |
135 | 0 | int openblas_getaffinity(int thread_idx, size_t cpusetsize, cpu_set_t* cpu_set) { |
136 | 0 | fprintf(stderr,"OpenBLAS: use OpenMP environment variables for querying cpu affinity\n"); |
137 | 0 | return -1; |
138 | 0 | } |
139 | | #endif |
140 | | |
141 | 9 | int blas_thread_init(void){ |
142 | | |
143 | | #if defined(__FreeBSD__) && defined(__clang__) |
144 | | extern int openblas_omp_num_threads_env(void); |
145 | | |
146 | | if(blas_omp_number_max <= 0) |
147 | | blas_omp_number_max= openblas_omp_num_threads_env(); |
148 | | if (blas_omp_number_max <= 0) |
149 | | blas_omp_number_max=MAX_CPU_NUMBER; |
150 | | #else |
151 | 9 | blas_omp_number_max = omp_get_max_threads(); |
152 | 9 | #endif |
153 | | |
154 | 9 | blas_get_cpu_number(); |
155 | | |
156 | 9 | adjust_thread_buffers(); |
157 | | |
158 | 9 | blas_server_avail = 1; |
159 | | |
160 | 9 | return 0; |
161 | 9 | } |
162 | | |
163 | 162 | int BLASFUNC(blas_thread_shutdown)(void){ |
164 | 162 | int i=0, j=0; |
165 | 162 | blas_server_avail = 0; |
166 | | |
167 | 324 | for(i=0; i<MAX_PARALLEL_NUMBER; i++) { |
168 | 1.85k | for(j=0; j<MAX_CPU_NUMBER; j++){ |
169 | 1.68k | if(blas_thread_buffer[i][j]!=NULL){ |
170 | 20 | blas_memory_free(blas_thread_buffer[i][j]); |
171 | 20 | blas_thread_buffer[i][j]=NULL; |
172 | 20 | } |
173 | 1.68k | } |
174 | 162 | } |
175 | | |
176 | 162 | return 0; |
177 | 162 | } |
178 | | |
179 | 0 | static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ |
180 | |
|
181 | 0 | if (!(mode & BLAS_COMPLEX)){ |
182 | | #ifdef EXPRECISION |
183 | | if ((mode & BLAS_PREC) == BLAS_XDOUBLE){ |
184 | | /* REAL / Extended Double */ |
185 | | void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble, |
186 | | xdouble *, BLASLONG, xdouble *, BLASLONG, |
187 | | xdouble *, BLASLONG, void *) = func; |
188 | | |
189 | | afunc(args -> m, args -> n, args -> k, |
190 | | ((xdouble *)args -> alpha)[0], |
191 | | args -> a, args -> lda, |
192 | | args -> b, args -> ldb, |
193 | | args -> c, args -> ldc, sb); |
194 | | } else |
195 | | #endif |
196 | 0 | if ((mode & BLAS_PREC) == BLAS_DOUBLE){ |
197 | | /* REAL / Double */ |
198 | 0 | void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, |
199 | 0 | double *, BLASLONG, double *, BLASLONG, |
200 | 0 | double *, BLASLONG, void *) = func; |
201 | |
|
202 | 0 | afunc(args -> m, args -> n, args -> k, |
203 | 0 | ((double *)args -> alpha)[0], |
204 | 0 | args -> a, args -> lda, |
205 | 0 | args -> b, args -> ldb, |
206 | 0 | args -> c, args -> ldc, sb); |
207 | 0 | } else if ((mode & BLAS_PREC) == BLAS_SINGLE){ |
208 | | /* REAL / Single */ |
209 | 0 | void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, |
210 | 0 | float *, BLASLONG, float *, BLASLONG, |
211 | 0 | float *, BLASLONG, void *) = func; |
212 | |
|
213 | 0 | afunc(args -> m, args -> n, args -> k, |
214 | 0 | ((float *)args -> alpha)[0], |
215 | 0 | args -> a, args -> lda, |
216 | 0 | args -> b, args -> ldb, |
217 | 0 | args -> c, args -> ldc, sb); |
218 | | #ifdef BUILD_BFLOAT16 |
219 | | } else if ((mode & BLAS_PREC) == BLAS_BFLOAT16){ |
220 | | /* REAL / BFLOAT16 */ |
221 | | void (*afunc)(BLASLONG, BLASLONG, BLASLONG, bfloat16, |
222 | | bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, |
223 | | bfloat16 *, BLASLONG, void *) = func; |
224 | | |
225 | | afunc(args -> m, args -> n, args -> k, |
226 | | ((bfloat16 *)args -> alpha)[0], |
227 | | args -> a, args -> lda, |
228 | | args -> b, args -> ldb, |
229 | | args -> c, args -> ldc, sb); |
230 | | } else if ((mode & BLAS_PREC) == BLAS_STOBF16){ |
231 | | /* REAL / BLAS_STOBF16 */ |
232 | | void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, |
233 | | float *, BLASLONG, bfloat16 *, BLASLONG, |
234 | | float *, BLASLONG, void *) = func; |
235 | | |
236 | | afunc(args -> m, args -> n, args -> k, |
237 | | ((float *)args -> alpha)[0], |
238 | | args -> a, args -> lda, |
239 | | args -> b, args -> ldb, |
240 | | args -> c, args -> ldc, sb); |
241 | | } else if ((mode & BLAS_PREC) == BLAS_DTOBF16){ |
242 | | /* REAL / BLAS_DTOBF16 */ |
243 | | void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, |
244 | | double *, BLASLONG, bfloat16 *, BLASLONG, |
245 | | double *, BLASLONG, void *) = func; |
246 | | |
247 | | afunc(args -> m, args -> n, args -> k, |
248 | | ((double *)args -> alpha)[0], |
249 | | args -> a, args -> lda, |
250 | | args -> b, args -> ldb, |
251 | | args -> c, args -> ldc, sb); |
252 | | #endif |
253 | 0 | } else { |
254 | | /* REAL / Other types in future */ |
255 | 0 | } |
256 | 0 | } else { |
257 | | #ifdef EXPRECISION |
258 | | if ((mode & BLAS_PREC) == BLAS_XDOUBLE){ |
259 | | /* COMPLEX / Extended Double */ |
260 | | void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, |
261 | | xdouble *, BLASLONG, xdouble *, BLASLONG, |
262 | | xdouble *, BLASLONG, void *) = func; |
263 | | |
264 | | afunc(args -> m, args -> n, args -> k, |
265 | | ((xdouble *)args -> alpha)[0], |
266 | | ((xdouble *)args -> alpha)[1], |
267 | | args -> a, args -> lda, |
268 | | args -> b, args -> ldb, |
269 | | args -> c, args -> ldc, sb); |
270 | | } else |
271 | | #endif |
272 | 0 | if ((mode & BLAS_PREC) == BLAS_DOUBLE){ |
273 | | /* COMPLEX / Double */ |
274 | 0 | void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, double, |
275 | 0 | double *, BLASLONG, double *, BLASLONG, |
276 | 0 | double *, BLASLONG, void *) = func; |
277 | |
|
278 | 0 | afunc(args -> m, args -> n, args -> k, |
279 | 0 | ((double *)args -> alpha)[0], |
280 | 0 | ((double *)args -> alpha)[1], |
281 | 0 | args -> a, args -> lda, |
282 | 0 | args -> b, args -> ldb, |
283 | 0 | args -> c, args -> ldc, sb); |
284 | 0 | } else if ((mode & BLAS_PREC) == BLAS_SINGLE){ |
285 | | /* COMPLEX / Single */ |
286 | 0 | void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float, float, |
287 | 0 | float *, BLASLONG, float *, BLASLONG, |
288 | 0 | float *, BLASLONG, void *) = func; |
289 | |
|
290 | 0 | afunc(args -> m, args -> n, args -> k, |
291 | 0 | ((float *)args -> alpha)[0], |
292 | 0 | ((float *)args -> alpha)[1], |
293 | 0 | args -> a, args -> lda, |
294 | 0 | args -> b, args -> ldb, |
295 | 0 | args -> c, args -> ldc, sb); |
296 | 0 | } else { |
297 | | /* COMPLEX / Other types in future */ |
298 | 0 | } |
299 | 0 | } |
300 | 0 | } |
301 | | |
302 | 0 | static void exec_threads(int thread_num, blas_queue_t *queue, int buf_index){ |
303 | |
|
304 | 0 | void *buffer, *sa, *sb; |
305 | 0 | int pos=0, release_flag=0; |
306 | |
|
307 | 0 | buffer = NULL; |
308 | 0 | sa = queue -> sa; |
309 | 0 | sb = queue -> sb; |
310 | |
|
311 | | #ifdef CONSISTENT_FPCSR |
312 | | #ifdef __aarch64__ |
313 | | __asm__ __volatile__ ("msr fpcr, %0" : : "r" (queue -> sse_mode)); |
314 | | #else |
315 | | __asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode)); |
316 | | __asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode)); |
317 | | #endif |
318 | | #endif |
319 | |
|
320 | 0 | if ((sa == NULL) && (sb == NULL) && ((queue -> mode & BLAS_PTHREAD) == 0)) { |
321 | |
|
322 | 0 | pos= thread_num; |
323 | 0 | buffer = blas_thread_buffer[buf_index][pos]; |
324 | | |
325 | | //fallback |
326 | 0 | if(buffer==NULL) { |
327 | 0 | buffer = blas_memory_alloc(2); |
328 | 0 | release_flag=1; |
329 | 0 | } |
330 | |
|
331 | 0 | if (sa == NULL) { |
332 | 0 | sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A); |
333 | 0 | queue->sa=sa; |
334 | 0 | } |
335 | |
|
336 | 0 | if (sb == NULL) { |
337 | 0 | if (!(queue -> mode & BLAS_COMPLEX)){ |
338 | | #ifdef EXPRECISION |
339 | | if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE){ |
340 | | sb = (void *)(((BLASLONG)sa + ((QGEMM_P * QGEMM_Q * sizeof(xdouble) |
341 | | + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); |
342 | | } else |
343 | | #endif |
344 | 0 | if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ |
345 | 0 | #if defined ( BUILD_DOUBLE) || defined (BUILD_COMPLEX16) |
346 | 0 | sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double) |
347 | 0 | + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); |
348 | 0 | #endif |
349 | 0 | } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE){ |
350 | 0 | #if defined (BUILD_SINGLE) || defined (BUILD_COMPLEX) |
351 | 0 | sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float) |
352 | 0 | + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); |
353 | 0 | #endif |
354 | 0 | } else { |
355 | | /* Other types in future */ |
356 | 0 | } |
357 | 0 | } else { |
358 | | #ifdef EXPRECISION |
359 | | if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE){ |
360 | | sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * 2 * sizeof(xdouble) |
361 | | + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); |
362 | | } else |
363 | | #endif |
364 | 0 | if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ |
365 | 0 | #ifdef BUILD_COMPLEX16 |
366 | 0 | sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double) |
367 | 0 | + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); |
368 | | #else |
369 | | fprintf(stderr,"UNHANDLED COMPLEX16\n"); |
370 | | #endif |
371 | 0 | } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) { |
372 | 0 | #ifdef BUILD_COMPLEX |
373 | 0 | sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float) |
374 | 0 | + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); |
375 | | #else |
376 | | fprintf(stderr,"UNHANDLED COMPLEX\n"); |
377 | | #endif |
378 | 0 | } else { |
379 | | /* Other types in future */ |
380 | 0 | } |
381 | 0 | } |
382 | 0 | queue->sb=sb; |
383 | 0 | } |
384 | 0 | } |
385 | |
|
386 | 0 | if (queue -> mode & BLAS_LEGACY) { |
387 | 0 | legacy_exec(queue -> routine, queue -> mode, queue -> args, sb); |
388 | 0 | } else |
389 | 0 | if (queue -> mode & BLAS_PTHREAD) { |
390 | 0 | void (*pthreadcompat)(void *) = queue -> routine; |
391 | 0 | (pthreadcompat)(queue -> args); |
392 | |
|
393 | 0 | } else { |
394 | 0 | int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine; |
395 | |
|
396 | 0 | (routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position); |
397 | |
|
398 | 0 | } |
399 | |
|
400 | 0 | if (release_flag) blas_memory_free(buffer); |
401 | |
|
402 | 0 | } |
403 | | |
404 | 0 | int exec_blas(BLASLONG num, blas_queue_t *queue){ |
405 | | |
406 | | // Handle lazy re-init of the thread-pool after a POSIX fork |
407 | 0 | if (unlikely(blas_server_avail == 0)) blas_thread_init(); |
408 | |
|
409 | 0 | BLASLONG i, buf_index; |
410 | |
|
411 | 0 | if ((num <= 0) || (queue == NULL)) return 0; |
412 | | |
413 | | #ifdef CONSISTENT_FPCSR |
414 | | for (i = 0; i < num; i ++) { |
415 | | #ifdef __aarch64__ |
416 | | __asm__ __volatile__ ("mrs %0, fpcr" : "=r" (queue[i].sse_mode)); |
417 | | #else |
418 | | __asm__ __volatile__ ("fnstcw %0" : "=m" (queue[i].x87_mode)); |
419 | | __asm__ __volatile__ ("stmxcsr %0" : "=m" (queue[i].sse_mode)); |
420 | | #endif |
421 | | } |
422 | | #endif |
423 | | |
424 | 0 | while (true) { |
425 | 0 | for(i=0; i < MAX_PARALLEL_NUMBER; i++) { |
426 | 0 | #ifdef HAVE_C11 |
427 | 0 | _Bool inuse = false; |
428 | 0 | if(atomic_compare_exchange_weak(&blas_buffer_inuse[i], &inuse, true)) { |
429 | | #else |
430 | | if(blas_buffer_inuse[i] == false) { |
431 | | blas_buffer_inuse[i] = true; |
432 | | #endif |
433 | 0 | buf_index = i; |
434 | 0 | break; |
435 | 0 | } |
436 | 0 | } |
437 | 0 | if(i != MAX_PARALLEL_NUMBER) |
438 | 0 | break; |
439 | 0 | } |
440 | | /*For caller-managed threading, if caller has registered the callback, pass exec_thread as callback function*/ |
441 | 0 | if (openblas_threads_callback_) { |
442 | 0 | #ifndef USE_SIMPLE_THREADED_LEVEL3 |
443 | 0 | for (i = 0; i < num; i ++) |
444 | 0 | queue[i].position = i; |
445 | 0 | #endif |
446 | 0 | openblas_threads_callback_(1, (openblas_dojob_callback) exec_threads, num, sizeof(blas_queue_t), (void*) queue, buf_index); |
447 | 0 | } else { |
448 | |
|
449 | 0 | if (openblas_omp_adaptive_env() != 0) { |
450 | 0 | #pragma omp parallel for num_threads(num) schedule(OMP_SCHED) |
451 | 0 | for (i = 0; i < num; i ++) { |
452 | 0 | #ifndef USE_SIMPLE_THREADED_LEVEL3 |
453 | 0 | queue[i].position = i; |
454 | 0 | #endif |
455 | 0 | exec_threads(omp_get_thread_num(), &queue[i], buf_index); |
456 | 0 | } |
457 | 0 | } else { |
458 | 0 | #pragma omp parallel for schedule(OMP_SCHED) |
459 | 0 | for (i = 0; i < num; i ++) { |
460 | |
|
461 | 0 | #ifndef USE_SIMPLE_THREADED_LEVEL3 |
462 | 0 | queue[i].position = i; |
463 | 0 | #endif |
464 | |
|
465 | 0 | exec_threads(omp_get_thread_num(), &queue[i], buf_index); |
466 | 0 | } |
467 | 0 | } |
468 | 0 | } |
469 | |
|
470 | 0 | #ifdef HAVE_C11 |
471 | 0 | atomic_store(&blas_buffer_inuse[buf_index], false); |
472 | | #else |
473 | | blas_buffer_inuse[buf_index] = false; |
474 | | #endif |
475 | |
|
476 | 0 | return 0; |
477 | 0 | } |
478 | | |
479 | | #endif |