diff --git a/src/fastpath.c b/src/fastpath.c index 7c4196c..0e803b9 100644 --- a/src/fastpath.c +++ b/src/fastpath.c @@ -39,7 +39,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_fastpath( (pthreadpool_task_1d_t)pthreadpool_load_relaxed_void_p(&threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -77,7 +77,7 @@ pthreadpool_thread_parallelize_1d_with_thread_fastpath( &threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -126,7 +126,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_with_uarch_fastpath( } #endif - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -163,7 +163,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_tile_1d_fastpath( &threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -206,7 +206,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_fastpath( (pthreadpool_task_2d_t)pthreadpool_load_relaxed_void_p(&threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -258,7 +258,7 @@ pthreadpool_thread_parallelize_2d_with_thread_fastpath( &threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -309,7 +309,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_1d_fastpath( &threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -378,7 +378,7 @@ pthreadpool_thread_parallelize_2d_tile_1d_with_uarch_fastpath( } #endif - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -449,7 +449,7 @@ pthreadpool_thread_parallelize_2d_tile_1d_with_uarch_with_thread_fastpath( } #endif - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -508,7 +508,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_2d_fastpath( &threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -581,7 +581,7 @@ pthreadpool_thread_parallelize_2d_tile_2d_with_uarch_fastpath( } #endif - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -644,7 +644,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_fastpath( (pthreadpool_task_3d_t)pthreadpool_load_relaxed_void_p(&threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -706,7 +706,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_1d_fastpath( &threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -773,7 +773,7 @@ pthreadpool_thread_parallelize_3d_tile_1d_with_thread_fastpath( pthreadpool_load_relaxed_void_p(&threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -853,7 +853,7 @@ pthreadpool_thread_parallelize_3d_tile_1d_with_uarch_fastpath( } #endif - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -934,7 +934,7 @@ pthreadpool_thread_parallelize_3d_tile_1d_with_uarch_with_thread_fastpath( } #endif - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -1003,7 +1003,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_2d_fastpath( &threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -1087,7 +1087,7 @@ pthreadpool_thread_parallelize_3d_tile_2d_with_uarch_fastpath( } #endif - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -1161,7 +1161,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_fastpath( (pthreadpool_task_4d_t)pthreadpool_load_relaxed_void_p(&threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -1234,7 +1234,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_1d_fastpath( &threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -1311,7 +1311,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_2d_fastpath( &threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -1405,7 +1405,7 @@ pthreadpool_thread_parallelize_4d_tile_2d_with_uarch_fastpath( } #endif - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -1490,7 +1490,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_fastpath( (pthreadpool_task_5d_t)pthreadpool_load_relaxed_void_p(&threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -1573,7 +1573,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_tile_1d_fastpath( &threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -1661,7 +1661,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_tile_2d_fastpath( &threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -1752,7 +1752,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_fastpath( (pthreadpool_task_6d_t)pthreadpool_load_relaxed_void_p(&threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -1846,7 +1846,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_tile_1d_fastpath( &threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ @@ -1944,7 +1944,7 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_tile_2d_fastpath( &threadpool->task); void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument); - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; const size_t range_threshold = -threads_count; /* Process thread's own range of items */ diff --git a/src/portable-api.c b/src/portable-api.c index e7c219b..68e2f2f 100644 --- a/src/portable-api.c +++ b/src/portable-api.c @@ -68,7 +68,7 @@ size_t pthreadpool_get_threads_count(struct pthreadpool* threadpool) { if (threadpool == NULL) { return 1; } - return threadpool->threads_count.value; + return threadpool->threads_count; } static void thread_parallelize_1d(struct pthreadpool* threadpool, @@ -88,7 +88,7 @@ static void thread_parallelize_1d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -122,7 +122,7 @@ static void thread_parallelize_1d_with_thread(struct pthreadpool* threadpool, } /* There still may be other threads with work */ - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -168,7 +168,7 @@ static void thread_parallelize_1d_with_uarch(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -208,7 +208,7 @@ static void thread_parallelize_1d_tile_1d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -233,7 +233,7 @@ static void thread_parallelize_1d_tile_1d_dynamic( // Get a handle on the params. struct pthreadpool_1d_tile_1d_dynamic_params* params = &threadpool->params.parallelize_1d_tile_1d_dynamic; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_i = params->range; const size_t tile_i = params->tile; const pthreadpool_task_1d_tile_1d_dynamic_t task = @@ -289,7 +289,7 @@ static void thread_parallelize_1d_tile_1d_dynamic_with_thread( // Get a handle on the params. struct pthreadpool_1d_tile_1d_dynamic_params* params = &threadpool->params.parallelize_1d_tile_1d_dynamic; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_i = params->range; const size_t tile_i = params->tile; const pthreadpool_task_1d_tile_1d_dynamic_with_id_t task = @@ -345,7 +345,7 @@ static void thread_parallelize_1d_tile_1d_dynamic_with_uarch_with_thread( // Get a handle on the params. struct pthreadpool_1d_tile_1d_dynamic_with_uarch_params* params = &threadpool->params.parallelize_1d_tile_1d_dynamic_with_uarch; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_i = params->range; const size_t tile_i = params->tile; const pthreadpool_task_1d_tile_1d_dynamic_with_id_with_thread_t task = @@ -432,7 +432,7 @@ static void thread_parallelize_2d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -480,7 +480,7 @@ static void thread_parallelize_2d_with_thread(struct pthreadpool* threadpool, } /* There still may be other threads with work */ - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -531,7 +531,7 @@ static void thread_parallelize_2d_tile_1d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -598,7 +598,7 @@ static void thread_parallelize_2d_tile_1d_with_uarch( /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -666,7 +666,7 @@ static void thread_parallelize_2d_tile_1d_with_uarch_with_thread( } /* There still may be other threads with work */ - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -694,7 +694,7 @@ static void thread_parallelize_2d_tile_1d_dynamic( // Get a handle on the params. struct pthreadpool_2d_tile_1d_dynamic_params* params = &threadpool->params.parallelize_2d_tile_1d_dynamic; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_j = params->range_j; const size_t tile_j = params->tile_j; const size_t tile_range_j = divide_round_up(range_j, tile_j); @@ -764,7 +764,7 @@ static void thread_parallelize_2d_tile_1d_dynamic_with_thread( // Get a handle on the params. struct pthreadpool_2d_tile_1d_dynamic_params* params = &threadpool->params.parallelize_2d_tile_1d_dynamic; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_j = params->range_j; const size_t tile_j = params->tile_j; const size_t tile_range_j = divide_round_up(range_j, tile_j); @@ -834,7 +834,7 @@ static void thread_parallelize_2d_tile_1d_dynamic_with_uarch_with_thread( // Get a handle on the params. struct pthreadpool_2d_tile_1d_dynamic_with_uarch_params* params = &threadpool->params.parallelize_2d_tile_1d_dynamic_with_uarch; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_j = params->range_j; const size_t tile_j = params->tile_j; const size_t tile_range_j = divide_round_up(range_j, tile_j); @@ -942,7 +942,7 @@ static void thread_parallelize_2d_tile_2d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -1015,7 +1015,7 @@ static void thread_parallelize_2d_tile_2d_with_uarch( /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -1044,7 +1044,7 @@ static void thread_parallelize_2d_tile_2d_dynamic( // Get a handle on the params. struct pthreadpool_2d_tile_2d_dynamic_params* params = &threadpool->params.parallelize_2d_tile_2d_dynamic; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_i = params->range_i; const size_t range_j = params->range_j; const size_t tile_i = params->tile_i; @@ -1146,7 +1146,7 @@ static void thread_parallelize_2d_tile_2d_dynamic_with_uarch( // Get a handle on the params. struct pthreadpool_2d_tile_2d_dynamic_with_uarch_params* params = &threadpool->params.parallelize_2d_tile_2d_dynamic_with_uarch; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_i = params->range_i; const size_t range_j = params->range_j; const size_t tile_i = params->tile_i; @@ -1234,7 +1234,7 @@ static void thread_parallelize_2d_tile_2d_dynamic_with_thread( // Get a handle on the params. struct pthreadpool_2d_tile_2d_dynamic_params* params = &threadpool->params.parallelize_2d_tile_2d_dynamic; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_i = params->range_i; const size_t range_j = params->range_j; const size_t tile_i = params->tile_i; @@ -1351,7 +1351,7 @@ static void thread_parallelize_3d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -1413,7 +1413,7 @@ static void thread_parallelize_3d_tile_1d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -1477,7 +1477,7 @@ static void thread_parallelize_3d_tile_1d_with_thread( } /* There still may be other threads with work */ - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -1554,7 +1554,7 @@ static void thread_parallelize_3d_tile_1d_with_uarch( /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -1632,7 +1632,7 @@ static void thread_parallelize_3d_tile_1d_with_uarch_with_thread( } /* There still may be other threads with work */ - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -1662,7 +1662,7 @@ static void thread_parallelize_3d_tile_1d_dynamic( // Get a handle on the params. struct pthreadpool_3d_tile_1d_dynamic_params* params = &threadpool->params.parallelize_3d_tile_1d_dynamic; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_j = params->range_j; const size_t range_k = params->range_k; const size_t tile_k = params->tile_k; @@ -1737,7 +1737,7 @@ static void thread_parallelize_3d_tile_1d_dynamic_with_thread( // Get a handle on the params. struct pthreadpool_3d_tile_1d_dynamic_params* params = &threadpool->params.parallelize_3d_tile_1d_dynamic; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_j = params->range_j; const size_t range_k = params->range_k; const size_t tile_k = params->tile_k; @@ -1812,7 +1812,7 @@ static void thread_parallelize_3d_tile_1d_dynamic_with_uarch_with_thread( // Get a handle on the params. struct pthreadpool_3d_tile_1d_dynamic_with_uarch_params* params = &threadpool->params.parallelize_3d_tile_1d_dynamic_with_uarch; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_j = params->range_j; const size_t range_k = params->range_k; const size_t tile_k = params->tile_k; @@ -1938,7 +1938,7 @@ static void thread_parallelize_3d_tile_2d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -2022,7 +2022,7 @@ static void thread_parallelize_3d_tile_2d_with_uarch( /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -2053,7 +2053,7 @@ static void thread_parallelize_3d_tile_2d_dynamic( // Get a handle on the params. struct pthreadpool_3d_tile_2d_dynamic_params* params = &threadpool->params.parallelize_3d_tile_2d_dynamic; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_j = params->range_j; const size_t range_k = params->range_k; const size_t tile_j = params->tile_j; @@ -2168,7 +2168,7 @@ static void thread_parallelize_3d_tile_2d_dynamic_with_uarch( // Get a handle on the params. struct pthreadpool_3d_tile_2d_dynamic_with_uarch_params* params = &threadpool->params.parallelize_3d_tile_2d_dynamic_with_uarch; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_j = params->range_j; const size_t range_k = params->range_k; const size_t tile_j = params->tile_j; @@ -2271,7 +2271,7 @@ static void thread_parallelize_3d_tile_2d_dynamic_with_thread( // Get a handle on the params. struct pthreadpool_3d_tile_2d_dynamic_params* params = &threadpool->params.parallelize_3d_tile_2d_dynamic; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_j = params->range_j; const size_t range_k = params->range_k; const size_t tile_j = params->tile_j; @@ -2412,7 +2412,7 @@ static void thread_parallelize_4d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -2485,7 +2485,7 @@ static void thread_parallelize_4d_tile_1d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -2562,7 +2562,7 @@ static void thread_parallelize_4d_tile_2d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -2656,7 +2656,7 @@ static void thread_parallelize_4d_tile_2d_with_uarch( /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -2690,7 +2690,7 @@ static void thread_parallelize_4d_tile_2d_dynamic( // Get a handle on the params. struct pthreadpool_4d_tile_2d_dynamic_params* params = &threadpool->params.parallelize_4d_tile_2d_dynamic; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_j = params->range_j; const size_t range_k = params->range_k; const size_t range_l = params->range_l; @@ -2816,7 +2816,7 @@ static void thread_parallelize_4d_tile_2d_dynamic_with_uarch( // Get a handle on the params. struct pthreadpool_4d_tile_2d_dynamic_with_uarch_params* params = &threadpool->params.parallelize_4d_tile_2d_dynamic_with_uarch; - const size_t num_threads = threadpool->threads_count.value; + const size_t num_threads = threadpool->threads_count; const size_t range_j = params->range_j; const size_t range_k = params->range_k; const size_t range_l = params->range_l; @@ -2975,7 +2975,7 @@ static void thread_parallelize_5d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -3058,7 +3058,7 @@ static void thread_parallelize_5d_tile_1d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -3146,7 +3146,7 @@ static void thread_parallelize_5d_tile_2d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -3236,7 +3236,7 @@ static void thread_parallelize_6d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -3330,7 +3330,7 @@ static void thread_parallelize_6d_tile_1d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -3429,7 +3429,7 @@ static void thread_parallelize_6d_tile_2d(struct pthreadpool* threadpool, /* There still may be other threads with work */ const size_t thread_number = thread->thread_number; - const size_t threads_count = threadpool->threads_count.value; + const size_t threads_count = threadpool->threads_count; for (size_t tid = modulo_decrement(thread_number, threads_count); tid != thread_number; tid = modulo_decrement(tid, threads_count)) { struct thread_info* other_thread = &threadpool->threads[tid]; @@ -3463,8 +3463,8 @@ void pthreadpool_parallelize_1d(struct pthreadpool* threadpool, pthreadpool_task_1d_t function, void* context, size_t range, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || range <= 1) { + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || + range <= 1) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; if (flags & PTHREADPOOL_FLAG_DISABLE_DENORMALS) { @@ -3494,8 +3494,8 @@ void pthreadpool_parallelize_1d_with_thread( struct pthreadpool* threadpool, pthreadpool_task_1d_with_thread_t function, void* context, size_t range, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || range <= 1) { + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || + range <= 1) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; if (flags & PTHREADPOOL_FLAG_DISABLE_DENORMALS) { @@ -3528,8 +3528,8 @@ void pthreadpool_parallelize_1d_with_uarch( void* context, uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || range <= 1) { + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || + range <= 1) { /* No thread pool used: execute task sequentially on the calling thread */ uint32_t uarch_index = default_uarch_index; @@ -3576,8 +3576,8 @@ void pthreadpool_parallelize_1d_tile_1d(pthreadpool_t threadpool, void* context, size_t range, size_t tile, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || range <= tile) { + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || + range <= tile) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; if (flags & PTHREADPOOL_FLAG_DISABLE_DENORMALS) { @@ -3614,8 +3614,8 @@ void pthreadpool_parallelize_1d_tile_1d_dynamic( pthreadpool_t threadpool, pthreadpool_task_1d_tile_1d_dynamic_t function, void* context, size_t range, size_t tile, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || range <= tile) { + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || + range <= tile) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; if (flags & PTHREADPOOL_FLAG_DISABLE_DENORMALS) { @@ -3643,8 +3643,8 @@ void pthreadpool_parallelize_1d_tile_1d_dynamic_with_thread( pthreadpool_task_1d_tile_1d_dynamic_with_id_t function, void* context, size_t range, size_t tile, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || range <= tile) { + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || + range <= tile) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; if (flags & PTHREADPOOL_FLAG_DISABLE_DENORMALS) { @@ -3673,8 +3673,8 @@ void pthreadpool_parallelize_1d_tile_1d_dynamic_with_uarch_with_thread( void* context, uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range, size_t tile, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || range <= tile) { + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || + range <= tile) { uint32_t uarch_index = default_uarch_index; #if PTHREADPOOL_USE_CPUINFO uarch_index = @@ -3714,8 +3714,7 @@ void pthreadpool_parallelize_2d(pthreadpool_t threadpool, size_t range_i, size_t range_j, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || (range_i | range_j) <= 1) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -3752,8 +3751,7 @@ void pthreadpool_parallelize_2d_with_thread( pthreadpool_t threadpool, pthreadpool_task_2d_with_thread_t function, void* context, size_t range_i, size_t range_j, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || (range_i | range_j) <= 1) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -3794,8 +3792,7 @@ void pthreadpool_parallelize_2d_tile_1d(pthreadpool_t threadpool, size_t range_j, size_t tile_j, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || (range_i <= 1 && range_j <= tile_j)) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -3838,8 +3835,7 @@ void pthreadpool_parallelize_2d_tile_1d_with_uarch( void* context, uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i, size_t range_j, size_t tile_j, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || (range_i <= 1 && range_j <= tile_j)) { /* No thread pool used: execute task sequentially on the calling thread */ @@ -3896,8 +3892,7 @@ void pthreadpool_parallelize_2d_tile_1d_with_uarch_with_thread( uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i, size_t range_j, size_t tile_j, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || (range_i <= 1 && range_j <= tile_j)) { /* No thread pool used: execute task sequentially on the calling thread */ @@ -3952,7 +3947,7 @@ void pthreadpool_parallelize_2d_tile_1d_dynamic( pthreadpool_t threadpool, pthreadpool_task_2d_tile_1d_dynamic_t function, void* context, size_t range_i, size_t range_j, size_t tile_j, uint32_t flags) { - if (threadpool == NULL || threadpool->threads_count.value <= 1 || + if (threadpool == NULL || threadpool->threads_count <= 1 || (range_i <= 1 && range_j <= tile_j)) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -3984,7 +3979,7 @@ void pthreadpool_parallelize_2d_tile_1d_dynamic_with_thread( pthreadpool_t threadpool, pthreadpool_task_2d_tile_1d_dynamic_with_id_t function, void* context, size_t range_i, size_t range_j, size_t tile_j, uint32_t flags) { - if (threadpool == NULL || threadpool->threads_count.value <= 1 || + if (threadpool == NULL || threadpool->threads_count <= 1 || (range_i <= 1 && range_j <= tile_j)) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -4017,7 +4012,7 @@ void pthreadpool_parallelize_2d_tile_1d_dynamic_with_uarch_with_thread( pthreadpool_task_2d_tile_1d_dynamic_with_id_with_thread_t function, void* context, uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i, size_t range_j, size_t tile_j, uint32_t flags) { - if (threadpool == NULL || threadpool->threads_count.value <= 1 || + if (threadpool == NULL || threadpool->threads_count <= 1 || (range_i <= 1 && range_j <= tile_j)) { uint32_t uarch_index = default_uarch_index; #if PTHREADPOOL_USE_CPUINFO @@ -4064,8 +4059,7 @@ void pthreadpool_parallelize_2d_tile_2d(pthreadpool_t threadpool, size_t range_j, size_t tile_i, size_t tile_j, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || (range_i <= tile_i && range_j <= tile_j)) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -4111,7 +4105,7 @@ void pthreadpool_parallelize_2d_tile_2d_dynamic( pthreadpool_t threadpool, pthreadpool_task_2d_tile_2d_dynamic_t function, void* context, size_t range_i, size_t range_j, size_t tile_i, size_t tile_j, uint32_t flags) { - if (threadpool == NULL || threadpool->threads_count.value <= 1 || + if (threadpool == NULL || threadpool->threads_count <= 1 || (range_i <= tile_i && range_j <= tile_j)) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -4151,7 +4145,7 @@ void pthreadpool_parallelize_2d_tile_2d_dynamic_with_uarch( pthreadpool_task_2d_tile_2d_dynamic_with_id_t function, void* context, uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i, size_t range_j, size_t tile_i, size_t tile_j, uint32_t flags) { - if (threadpool == NULL || threadpool->threads_count.value <= 1 || + if (threadpool == NULL || threadpool->threads_count <= 1 || (range_i <= tile_i && range_j <= tile_j)) { /* No thread pool used: execute task sequentially on the calling thread */ uint32_t uarch_index = default_uarch_index; @@ -4203,7 +4197,7 @@ void pthreadpool_parallelize_2d_tile_2d_dynamic_with_thread( pthreadpool_task_2d_tile_2d_dynamic_with_id_t function, void* context, size_t range_i, size_t range_j, size_t tile_i, size_t tile_j, uint32_t flags) { - if (threadpool == NULL || threadpool->threads_count.value <= 1 || + if (threadpool == NULL || threadpool->threads_count <= 1 || (range_i <= tile_i && range_j <= tile_j)) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -4245,8 +4239,7 @@ void pthreadpool_parallelize_2d_tile_2d_with_uarch( size_t range_i, size_t range_j, size_t tile_i, size_t tile_j, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || (range_i <= tile_i && range_j <= tile_j)) { /* No thread pool used: execute task sequentially on the calling thread */ @@ -4306,8 +4299,7 @@ void pthreadpool_parallelize_3d(pthreadpool_t threadpool, size_t range_i, size_t range_j, size_t range_k, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || (range_i | range_j | range_k) <= 1) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -4349,8 +4341,7 @@ void pthreadpool_parallelize_3d_tile_1d(pthreadpool_t threadpool, size_t range_j, size_t range_k, size_t tile_k, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || ((range_i | range_j) <= 1 && range_k <= tile_k)) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -4397,8 +4388,7 @@ void pthreadpool_parallelize_3d_tile_1d_with_thread( size_t range_i, size_t range_j, size_t range_k, size_t tile_k, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || ((range_i | range_j) <= 1 && range_k <= tile_k)) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -4446,8 +4436,7 @@ void pthreadpool_parallelize_3d_tile_1d_with_uarch( size_t range_i, size_t range_j, size_t range_k, size_t tile_k, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || ((range_i | range_j) <= 1 && range_k <= tile_k)) { /* No thread pool used: execute task sequentially on the calling thread */ @@ -4507,8 +4496,7 @@ void pthreadpool_parallelize_3d_tile_1d_with_uarch_with_thread( uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i, size_t range_j, size_t range_k, size_t tile_k, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || ((range_i | range_j) <= 1 && range_k <= tile_k)) { /* No thread pool used: execute task sequentially on the calling thread */ @@ -4566,7 +4554,7 @@ void pthreadpool_parallelize_3d_tile_1d_dynamic( pthreadpool_t threadpool, pthreadpool_task_3d_tile_1d_dynamic_t function, void* context, size_t range_i, size_t range_j, size_t range_k, size_t tile_k, uint32_t flags) { - if (threadpool == NULL || threadpool->threads_count.value <= 1 || + if (threadpool == NULL || threadpool->threads_count <= 1 || (range_i <= 1 && range_j <= 1 && range_k <= tile_k)) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -4602,7 +4590,7 @@ void pthreadpool_parallelize_3d_tile_1d_dynamic_with_thread( pthreadpool_task_3d_tile_1d_dynamic_with_id_t function, void* context, size_t range_i, size_t range_j, size_t range_k, size_t tile_k, uint32_t flags) { - if (threadpool == NULL || threadpool->threads_count.value <= 1 || + if (threadpool == NULL || threadpool->threads_count <= 1 || (range_i <= 1 && range_j <= 1 && range_k <= tile_k)) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -4640,7 +4628,7 @@ void pthreadpool_parallelize_3d_tile_1d_dynamic_with_uarch_with_thread( void* context, uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i, size_t range_j, size_t range_k, size_t tile_k, uint32_t flags) { - if (threadpool == NULL || threadpool->threads_count.value <= 1 || + if (threadpool == NULL || threadpool->threads_count <= 1 || (range_i <= 1 && range_j <= 1 && range_k <= tile_k)) { /* No thread pool used: execute task sequentially on the calling thread */ uint32_t uarch_index = default_uarch_index; @@ -4691,8 +4679,7 @@ void pthreadpool_parallelize_3d_tile_2d(pthreadpool_t threadpool, size_t tile_j, size_t tile_k, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || (range_i <= 1 && range_j <= tile_j && range_k <= tile_k)) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -4741,7 +4728,7 @@ void pthreadpool_parallelize_3d_tile_2d_dynamic( pthreadpool_t threadpool, pthreadpool_task_3d_tile_2d_dynamic_t function, void* context, size_t range_i, size_t range_j, size_t range_k, size_t tile_j, size_t tile_k, uint32_t flags) { - if (threadpool == NULL || threadpool->threads_count.value <= 1 || + if (threadpool == NULL || threadpool->threads_count <= 1 || (range_i <= 1 && range_j <= tile_j && range_k <= tile_k)) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -4788,7 +4775,7 @@ void pthreadpool_parallelize_3d_tile_2d_dynamic_with_uarch( uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i, size_t range_j, size_t range_k, size_t tile_j, size_t tile_k, uint32_t flags) { - if (threadpool == NULL || threadpool->threads_count.value <= 1 || + if (threadpool == NULL || threadpool->threads_count <= 1 || (range_i <= 1 && range_j <= tile_j && range_k <= tile_k)) { /* No thread pool used: execute task sequentially on the calling thread */ uint32_t uarch_index = default_uarch_index; @@ -4845,7 +4832,7 @@ void pthreadpool_parallelize_3d_tile_2d_dynamic_with_thread( pthreadpool_task_3d_tile_2d_dynamic_with_id_t function, void* context, size_t range_i, size_t range_j, size_t range_k, size_t tile_j, size_t tile_k, uint32_t flags) { - if (threadpool == NULL || threadpool->threads_count.value <= 1 || + if (threadpool == NULL || threadpool->threads_count <= 1 || (range_i <= 1 && range_j <= tile_j && range_k <= tile_k)) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -4892,8 +4879,7 @@ void pthreadpool_parallelize_3d_tile_2d_with_uarch( size_t range_i, size_t range_j, size_t range_k, size_t tile_j, size_t tile_k, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || (range_i <= 1 && range_j <= tile_j && range_k <= tile_k)) { /* No thread pool used: execute task sequentially on the calling thread */ @@ -4956,8 +4942,7 @@ void pthreadpool_parallelize_4d(pthreadpool_t threadpool, size_t range_i, size_t range_j, size_t range_k, size_t range_l, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || (range_i | range_j | range_k | range_l) <= 1) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -5005,8 +4990,7 @@ void pthreadpool_parallelize_4d_tile_1d(pthreadpool_t threadpool, size_t range_l, size_t tile_l, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || ((range_i | range_j | range_k) <= 1 && range_l <= tile_l)) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -5059,8 +5043,7 @@ void pthreadpool_parallelize_4d_tile_2d(pthreadpool_t threadpool, size_t range_l, size_t tile_k, size_t tile_l, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || ((range_i | range_j) <= 1 && range_k <= tile_k && range_l <= tile_l)) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -5115,8 +5098,7 @@ void pthreadpool_parallelize_4d_tile_2d_with_uarch( size_t range_i, size_t range_j, size_t range_k, size_t range_l, size_t tile_k, size_t tile_l, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || ((range_i | range_j) <= 1 && range_k <= tile_k && range_l <= tile_l)) { /* No thread pool used: execute task sequentially on the calling thread */ @@ -5182,7 +5164,7 @@ void pthreadpool_parallelize_4d_tile_2d_dynamic( pthreadpool_t threadpool, pthreadpool_task_4d_tile_2d_dynamic_t function, void* context, size_t range_i, size_t range_j, size_t range_k, size_t range_l, size_t tile_k, size_t tile_l, uint32_t flags) { - if (threadpool == NULL || threadpool->threads_count.value <= 1 || + if (threadpool == NULL || threadpool->threads_count <= 1 || (range_i <= 1 && range_j <= 1 && range_k <= tile_k && range_l <= tile_l)) { /* No thread pool used: execute task sequentially on the calling thread */ @@ -5235,7 +5217,7 @@ void pthreadpool_parallelize_4d_tile_2d_dynamic_with_uarch( uint32_t default_uarch_index, uint32_t max_uarch_index, size_t range_i, size_t range_j, size_t range_k, size_t range_l, size_t tile_k, size_t tile_l, uint32_t flags) { - if (threadpool == NULL || threadpool->threads_count.value <= 1 || + if (threadpool == NULL || threadpool->threads_count <= 1 || (range_i <= 1 && range_j <= 1 && range_k <= tile_k && range_l <= tile_l)) { /* No thread pool used: execute task sequentially on the calling thread */ @@ -5299,8 +5281,7 @@ void pthreadpool_parallelize_5d(pthreadpool_t threadpool, size_t range_l, size_t range_m, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || (range_i | range_j | range_k | range_l | range_m) <= 1) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -5351,8 +5332,7 @@ void pthreadpool_parallelize_5d_tile_1d(pthreadpool_t threadpool, size_t range_l, size_t range_m, size_t tile_m, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || ((range_i | range_j | range_k | range_l) <= 1 && range_m <= tile_m)) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -5409,8 +5389,7 @@ void pthreadpool_parallelize_5d_tile_2d(pthreadpool_t threadpool, size_t tile_l, size_t tile_m, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || ((range_i | range_j | range_k) <= 1 && range_l <= tile_l && range_m <= tile_m)) { /* No thread pool used: execute task sequentially on the calling thread */ @@ -5469,8 +5448,7 @@ void pthreadpool_parallelize_6d(pthreadpool_t threadpool, size_t range_l, size_t range_m, size_t range_n, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || (range_i | range_j | range_k | range_l | range_m | range_n) <= 1) { /* No thread pool used: execute task sequentially on the calling thread */ struct fpu_state saved_fpu_state = {0}; @@ -5525,8 +5503,7 @@ void pthreadpool_parallelize_6d_tile_1d(pthreadpool_t threadpool, size_t range_n, size_t tile_n, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || ((range_i | range_j | range_k | range_l | range_m) <= 1 && range_n <= tile_n)) { /* No thread pool used: execute task sequentially on the calling thread */ @@ -5587,8 +5564,7 @@ void pthreadpool_parallelize_6d_tile_2d(pthreadpool_t threadpool, size_t range_n, size_t tile_m, size_t tile_n, uint32_t flags) { size_t threads_count; - if (threadpool == NULL || - (threads_count = threadpool->threads_count.value) <= 1 || + if (threadpool == NULL || (threads_count = threadpool->threads_count) <= 1 || ((range_i | range_j | range_k | range_l) <= 1 && range_m <= tile_m && range_n <= tile_n)) { /* No thread pool used: execute task sequentially on the calling thread */ diff --git a/src/pthreads.c b/src/pthreads.c index ea09e53..26109ae 100644 --- a/src/pthreads.c +++ b/src/pthreads.c @@ -166,11 +166,8 @@ size_t pthreadpool_set_threads_count(struct pthreadpool* threadpool, } // Check whether this is really a change. - if (num_threads != threadpool->threads_count.value) { - threadpool->threads_count = fxdiv_init_size_t(num_threads); - pthreadpool_store_release_size_t( - (pthreadpool_atomic_size_t*)&threadpool->threads_count.value, - num_threads); + if (num_threads != threadpool->threads_count) { + pthreadpool_store_release_size_t(&threadpool->threads_count, num_threads); } pthreadpool_log_debug("setting max_num_threads to %zu.", num_threads); @@ -444,34 +441,40 @@ static void ensure_num_threads(struct pthreadpool* threadpool, } // Get the number of required threads. - const uint32_t max_num_threads = pthreadpool_load_acquire_size_t( - (pthreadpool_atomic_size_t*)&threadpool->threads_count.value); + const uint32_t max_num_threads = + pthreadpool_load_acquire_size_t(&threadpool->threads_count); // Start up to two other threads so that we fan out exponentially. - uint32_t num_threads_to_start = 2; // thread_id > 0 ? 2 : 1; + uint32_t num_threads_to_start = 2; /* Schedule any missing threads for this threadpool. */ for (uint32_t tid = thread_id + 1; num_threads_to_start && tid < max_num_threads; tid++) { // Check whether this thread was active, and if not, schedule it. struct thread_info* thread = &threadpool->threads[tid]; - if (!pthreadpool_load_relaxed_uint32_t(&thread->is_active) && - !pthreadpool_exchange_sequentially_consistent_uint32_t( - &thread->is_active, 1)) { - // Make sure there is still ongoing work. - if (thread_id) { - const int32_t curr_active_threads = - pthreadpool_load_consume_int32_t(&threadpool->num_active_threads); - if (curr_active_threads < 0 || - curr_active_threads == PTHREADPOOL_NUM_ACTIVE_THREADS_DONE) { - return; - } + if (!pthreadpool_load_relaxed_uint32_t(&thread->is_active)) { + // Make sure there is still ongoing work (unless we're the main thread). + const int32_t curr_active_threads = + thread_id ? pthreadpool_load_consume_int32_t( + &threadpool->num_active_threads) + : 1; + if (curr_active_threads < 0 || + curr_active_threads == PTHREADPOOL_NUM_ACTIVE_THREADS_DONE) { + return; } - pthreadpool_register_threads(threadpool, 1); - executor->schedule(threadpool->executor_context, thread, - (void (*)(void*))thread_main); - num_threads_to_start--; + // Schedule this worker thread. + if (!pthreadpool_exchange_sequentially_consistent_uint32_t( + &thread->is_active, 1)) { + // Note that `threadpool->num_recruited_threads` is always non-zero + // because this function is only ever called by the main thread or + // another active thread, so we don't have to worry about activating a + // thread on an already stopped threadpool. + pthreadpool_register_threads(threadpool, 1); + executor->schedule(threadpool->executor_context, thread, + (void (*)(void*))thread_main); + num_threads_to_start--; + } } } } @@ -487,13 +490,18 @@ static pthreadpool_thread_return_t thread_main(void* arg) { offsetof(struct pthreadpool, threads)); uint32_t last_job_id = 0; - // Check whether we have to wake up any other threads. - ensure_num_threads(threadpool, thread_id); - // Get the current threadpool state. int32_t curr_active_threads = pthreadpool_load_consume_int32_t(&threadpool->num_active_threads); + // Check whether we have to wake up any other threads. + if (curr_active_threads < 0 || + curr_active_threads == PTHREADPOOL_NUM_ACTIVE_THREADS_DONE) { + ensure_num_threads(threadpool, thread_id); + curr_active_threads = + pthreadpool_load_consume_int32_t(&threadpool->num_active_threads); + } + // Main loop. while (true) { if (curr_active_threads == PTHREADPOOL_NUM_ACTIVE_THREADS_DONE) { @@ -534,8 +542,8 @@ static pthreadpool_thread_return_t thread_main(void* arg) { last_job_id = threadpool->job_id; // Do we already have too many threads working on this? - const uint32_t max_active_threads = pthreadpool_load_acquire_size_t( - (pthreadpool_atomic_size_t*)&threadpool->threads_count.value); + const uint32_t max_active_threads = + pthreadpool_load_acquire_size_t(&threadpool->threads_count); if (curr_active_threads < max_active_threads) { const uint32_t assumed_thread_id = (max_active_threads < threadpool->max_num_threads) @@ -616,7 +624,7 @@ struct pthreadpool* pthreadpool_create_v2(struct pthreadpool_executor* executor, threadpool->executor_context = executor_context; } threadpool->max_num_threads = num_threads; - threadpool->threads_count = fxdiv_init_size_t(num_threads); + threadpool->threads_count = num_threads; for (size_t tid = 0; tid < num_threads; tid++) { threadpool->threads[tid].thread_number = tid; threadpool->threads[tid].threadpool = threadpool; @@ -683,16 +691,16 @@ PTHREADPOOL_INTERNAL void pthreadpool_parallelize( } // How many threads should we parallelize over? - const uint32_t prev_num_threads = threadpool->threads_count.value; + const uint32_t prev_num_threads = threadpool->threads_count; const uint32_t num_threads = min(linear_range, prev_num_threads); - threadpool->threads_count = fxdiv_init_size_t(num_threads); + pthreadpool_store_relaxed_size_t(&threadpool->threads_count, num_threads); pthreadpool_log_debug("main thread starting job %u with %u threads.", (uint32_t)threadpool->job_id, num_threads); /* Populate a `thread_info` struct for each thread */ const struct fxdiv_result_size_t range_params = - fxdiv_divide_size_t(linear_range, threadpool->threads_count); + fxdiv_divide_size_t(linear_range, fxdiv_init_size_t(num_threads)); size_t range_start = 0; for (size_t tid = 0; tid < num_threads; tid++) { struct thread_info* thread = &threadpool->threads[tid]; @@ -735,7 +743,10 @@ PTHREADPOOL_INTERNAL void pthreadpool_parallelize( pthreadpool_fence_acquire(); /* Re-set the number of threads in case it was reduced for this task. */ - threadpool->threads_count = fxdiv_init_size_t(prev_num_threads); + if (prev_num_threads != num_threads) { + pthreadpool_store_relaxed_size_t(&threadpool->threads_count, + prev_num_threads); + } /* Unprotect the global threadpool structures now that we're done. */ pthreadpool_mutex_unlock(&threadpool->execution_mutex); diff --git a/src/threadpool-object.h b/src/threadpool-object.h index 9be2a68..da4066a 100644 --- a/src/threadpool-object.h +++ b/src/threadpool-object.h @@ -1234,9 +1234,9 @@ struct PTHREADPOOL_CACHELINE_ALIGNED pthreadpool { size_t max_num_threads; /** - * FXdiv divisor for the current number of active threads in the thread pool. + * The current number of active threads in the thread pool. */ - struct fxdiv_divisor_size_t threads_count; + pthreadpool_atomic_size_t threads_count; /** * Thread information structures that immediately follow this structure.