Skip to content

Commit 553031f

Browse files
committed
Restored win32 File
1 parent a9d2b01 commit 553031f

File tree

1 file changed

+87
-137
lines changed

1 file changed

+87
-137
lines changed

driver/others/blas_server_win32.c

Lines changed: 87 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
/*********************************************************************/
32
/* Copyright 2009, 2010 The University of Texas at Austin. */
43
/* All rights reserved. */
@@ -68,19 +67,13 @@ int blas_server_avail = 0;
6867

6968
int blas_omp_threads_local = 1;
7069

71-
static void * blas_thread_buffer[MAX_CPU_NUMBER];
72-
7370
/* Local Variables */
7471
static BLASULONG server_lock = 0;
7572

7673
static HANDLE blas_threads [MAX_CPU_NUMBER];
7774
static DWORD blas_threads_id[MAX_CPU_NUMBER];
7875
static volatile int thread_target; // target num of live threads, volatile for cross-thread reads
7976

80-
//Prototypes
81-
static void exec_threads(int , blas_queue_t *, int);
82-
static void adjust_thread_buffers();
83-
8477
//
8578
// Legacy code path
8679
//
@@ -215,8 +208,12 @@ static DWORD WINAPI blas_thread_server(void *arg) {
215208
/* Thread identifier */
216209
BLASLONG cpu = (BLASLONG)arg;
217210

211+
void *buffer, *sa, *sb;
218212
blas_queue_t *queue;
219213

214+
/* Each server needs each buffer */
215+
buffer = blas_memory_alloc(2);
216+
220217
MT_TRACE("Server[%2ld] Thread is started!\n", cpu);
221218

222219
while (1) {
@@ -244,8 +241,84 @@ static DWORD WINAPI blas_thread_server(void *arg) {
244241
LeaveCriticalSection(&queue_lock);
245242

246243
if (queue) {
244+
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
245+
246+
sa = queue -> sa;
247+
sb = queue -> sb;
248+
249+
#ifdef CONSISTENT_FPCSR
250+
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
251+
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
252+
#endif
253+
254+
MT_TRACE("Server[%2ld] Started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",
255+
cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);
256+
257+
// fprintf(stderr, "queue start[%ld]!!!\n", cpu);
258+
259+
#ifdef MONITOR
260+
main_status[cpu] = MAIN_RUNNING1;
261+
#endif
262+
263+
if (sa == NULL)
264+
sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
265+
266+
if (sb == NULL) {
267+
if (!(queue -> mode & BLAS_COMPLEX)) {
268+
#ifdef EXPRECISION
269+
if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE) {
270+
sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * sizeof(xdouble)
271+
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
272+
} else
273+
#endif
274+
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) {
275+
#ifdef BUILD_DOUBLE
276+
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
277+
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
278+
#endif
279+
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
280+
#ifdef BUILD_SINGLE
281+
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
282+
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
283+
#endif
284+
} else {
285+
/* Other types in future */
286+
}
287+
} else {
288+
#ifdef EXPRECISION
289+
if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE){
290+
sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * 2 * sizeof(xdouble)
291+
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
292+
} else
293+
#endif
294+
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
295+
#ifdef BUILD_COMPLEX16
296+
sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
297+
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
298+
#endif
299+
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
300+
#ifdef BUILD_COMPLEX
301+
sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
302+
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
303+
#endif
304+
} else {
305+
/* Other types in future */
306+
}
307+
}
308+
queue->sb=sb;
309+
}
310+
311+
#ifdef MONITOR
312+
main_status[cpu] = MAIN_RUNNING2;
313+
#endif
247314

248-
exec_threads(cpu, queue, 0);
315+
if (!(queue -> mode & BLAS_LEGACY)) {
316+
(routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);
317+
} else {
318+
legacy_exec(routine, queue -> mode, queue -> args, sb);
319+
}
320+
} else {
321+
continue; //if queue == NULL
249322
}
250323

251324
MT_TRACE("Server[%2ld] Finished!\n", cpu);
@@ -257,6 +330,8 @@ static DWORD WINAPI blas_thread_server(void *arg) {
257330

258331
MT_TRACE("Server[%2ld] Shutdown!\n", cpu);
259332

333+
blas_memory_free(buffer);
334+
260335
return 0;
261336
}
262337

@@ -270,8 +345,6 @@ int blas_thread_init(void) {
270345

271346
LOCK_COMMAND(&server_lock);
272347

273-
adjust_thread_buffers();
274-
275348
MT_TRACE("Initializing Thread(Num. threads = %d)\n", blas_cpu_number);
276349

277350
if (!blas_server_avail) {
@@ -336,14 +409,14 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue) {
336409
}
337410
else
338411
{
339-
blas_queue_t *next_item = work_queue;
412+
blas_queue_t *queue_item = work_queue;
340413

341414
// find the end of the work queue
342-
while (next_item)
343-
next_item = next_item->next;
415+
while (queue_item->next)
416+
queue_item = queue_item->next;
344417

345418
// add new work to the end
346-
next_item = queue;
419+
queue_item->next = queue;
347420
}
348421

349422
LeaveCriticalSection(&queue_lock);
@@ -400,17 +473,6 @@ int exec_blas(BLASLONG num, blas_queue_t *queue) {
400473

401474
if ((num <= 0) || (queue == NULL)) return 0;
402475

403-
//Redirect to caller's callback routine
404-
if (openblas_threads_callback_) {
405-
int buf_index = 0;
406-
#ifndef USE_SIMPLE_THREADED_LEVEL3
407-
for (int i = 0; i < num; i ++)
408-
queue[i].position = i;
409-
#endif
410-
openblas_threads_callback_(1, (openblas_dojob_callback) exec_threads, num, sizeof(blas_queue_t), (void*) queue, buf_index);
411-
return 0;
412-
}
413-
414476
if ((num > 1) && queue -> next)
415477
exec_blas_async(1, queue -> next);
416478

@@ -445,14 +507,6 @@ int BLASFUNC(blas_thread_shutdown)(void) {
445507

446508
LOCK_COMMAND(&server_lock);
447509

448-
//Free buffers allocated for threads
449-
for(i=0; i<MAX_CPU_NUMBER; i++){
450-
if(blas_thread_buffer[i]!=NULL){
451-
blas_memory_free(blas_thread_buffer[i]);
452-
blas_thread_buffer[i]=NULL;
453-
}
454-
}
455-
456510
if (blas_server_avail) {
457511

458512
for (i = 0; i < blas_num_threads - 1; i++) {
@@ -555,108 +609,4 @@ void goto_set_num_threads(int num_threads)
555609
void openblas_set_num_threads(int num)
556610
{
557611
goto_set_num_threads(num);
558-
}
559-
560-
static void adjust_thread_buffers() {
561-
562-
int i=0;
563-
564-
//adjust buffer for each thread
565-
for(i=0; i < blas_cpu_number; i++){
566-
if(blas_thread_buffer[i] == NULL){
567-
blas_thread_buffer[i] = blas_memory_alloc(2);
568-
}
569-
}
570-
for(; i < MAX_CPU_NUMBER; i++){
571-
if(blas_thread_buffer[i] != NULL){
572-
blas_memory_free(blas_thread_buffer[i]);
573-
blas_thread_buffer[i] = NULL;
574-
}
575-
}
576-
}
577-
578-
//Indivitual threads work executor, Helps in setting by synchronization environment and calling inner_threads routine
579-
static void exec_threads(int cpu, blas_queue_t *queue, int buf_index)
580-
{
581-
582-
void *buffer, *sa, *sb;
583-
584-
buffer = blas_thread_buffer[cpu];
585-
sa = queue -> sa;
586-
sb = queue -> sb;
587-
588-
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
589-
590-
#ifdef CONSISTENT_FPCSR
591-
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
592-
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
593-
#endif
594-
595-
MT_TRACE("Server[%2ld] Started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",
596-
cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);
597-
598-
// fprintf(stderr, "queue start[%ld]!!!\n", cpu);
599-
600-
#ifdef MONITOR
601-
main_status[cpu] = MAIN_RUNNING1;
602-
#endif
603-
604-
if (sa == NULL)
605-
sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
606-
607-
if (sb == NULL) {
608-
if (!(queue -> mode & BLAS_COMPLEX)) {
609-
#ifdef EXPRECISION
610-
if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE) {
611-
sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * sizeof(xdouble)
612-
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
613-
} else
614-
#endif
615-
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) {
616-
#ifdef BUILD_DOUBLE
617-
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
618-
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
619-
#endif
620-
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
621-
#ifdef BUILD_SINGLE
622-
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
623-
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
624-
#endif
625-
} else {
626-
/* Other types in future */
627-
}
628-
} else {
629-
#ifdef EXPRECISION
630-
if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE){
631-
sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * 2 * sizeof(xdouble)
632-
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
633-
} else
634-
#endif
635-
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
636-
#ifdef BUILD_COMPLEX16
637-
sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
638-
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
639-
#endif
640-
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
641-
#ifdef BUILD_COMPLEX
642-
sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
643-
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
644-
#endif
645-
} else {
646-
/* Other types in future */
647-
}
648-
}
649-
queue->sb=sb;
650-
}
651-
652-
#ifdef MONITOR
653-
main_status[cpu] = MAIN_RUNNING2;
654-
#endif
655-
656-
if (!(queue -> mode & BLAS_LEGACY)) {
657-
(routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);
658-
} else {
659-
legacy_exec(routine, queue -> mode, queue -> args, sb);
660-
}
661-
662612
}

0 commit comments

Comments
 (0)