Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions include/pthreadpool.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,25 @@ pthreadpool_t pthreadpool_create(size_t threads_count);
*/
size_t pthreadpool_get_threads_count(pthreadpool_t threadpool);

/*
* API to enable doing work with fewer threads than available in
* threadpool.
* Purpose of this is to ameliorate some perf degradation observed
* due to OS mapping a given set of threads to fewer cores.
*
* @param num_threads num threads to use for the subsequent tasks
* submitted.
*/
void pthreadpool_set_num_threads_to_use(size_t num_threads);

/*
* Query current setting of the number of threads to use
*
* @returns The number of threads to be used for the subsequent tasks
* submitted.
*/
size_t pthreadpool_get_num_threads_to_use(void);

/**
* Process items on a 1D grid.
*
Expand Down
84 changes: 56 additions & 28 deletions src/fastpath.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_fastpath(
(pthreadpool_task_1d_t)pthreadpool_load_relaxed_void_p(&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -77,7 +78,8 @@ pthreadpool_thread_parallelize_1d_with_thread_fastpath(
&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -126,7 +128,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_with_uarch_fastpath(
}
#endif

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -163,7 +166,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_1d_tile_1d_fastpath(
&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -206,7 +210,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_fastpath(
(pthreadpool_task_2d_t)pthreadpool_load_relaxed_void_p(&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -258,7 +263,8 @@ pthreadpool_thread_parallelize_2d_with_thread_fastpath(
&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -309,7 +315,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_1d_fastpath(
&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -378,7 +385,8 @@ pthreadpool_thread_parallelize_2d_tile_1d_with_uarch_fastpath(
}
#endif

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -449,7 +457,8 @@ pthreadpool_thread_parallelize_2d_tile_1d_with_uarch_with_thread_fastpath(
}
#endif

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -508,7 +517,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_2d_tile_2d_fastpath(
&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -581,7 +591,8 @@ pthreadpool_thread_parallelize_2d_tile_2d_with_uarch_fastpath(
}
#endif

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -644,7 +655,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_fastpath(
(pthreadpool_task_3d_t)pthreadpool_load_relaxed_void_p(&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -706,7 +718,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_1d_fastpath(
&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -773,7 +786,8 @@ pthreadpool_thread_parallelize_3d_tile_1d_with_thread_fastpath(
pthreadpool_load_relaxed_void_p(&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -853,7 +867,8 @@ pthreadpool_thread_parallelize_3d_tile_1d_with_uarch_fastpath(
}
#endif

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -934,7 +949,8 @@ pthreadpool_thread_parallelize_3d_tile_1d_with_uarch_with_thread_fastpath(
}
#endif

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -1003,7 +1019,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_3d_tile_2d_fastpath(
&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -1087,7 +1104,8 @@ pthreadpool_thread_parallelize_3d_tile_2d_with_uarch_fastpath(
}
#endif

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -1161,7 +1179,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_fastpath(
(pthreadpool_task_4d_t)pthreadpool_load_relaxed_void_p(&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -1234,7 +1253,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_1d_fastpath(
&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -1311,7 +1331,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_4d_tile_2d_fastpath(
&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -1405,7 +1426,8 @@ pthreadpool_thread_parallelize_4d_tile_2d_with_uarch_fastpath(
}
#endif

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -1490,7 +1512,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_fastpath(
(pthreadpool_task_5d_t)pthreadpool_load_relaxed_void_p(&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -1573,7 +1596,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_tile_1d_fastpath(
&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -1661,7 +1685,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_5d_tile_2d_fastpath(
&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -1752,7 +1777,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_fastpath(
(pthreadpool_task_6d_t)pthreadpool_load_relaxed_void_p(&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -1846,7 +1872,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_tile_1d_fastpath(
&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down Expand Up @@ -1944,7 +1971,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_thread_parallelize_6d_tile_2d_fastpath(
&threadpool->task);
void* const argument = pthreadpool_load_relaxed_void_p(&threadpool->argument);

const size_t threads_count = threadpool->threads_count.value;
const size_t threads_count =
pthreadpool_load_relaxed_size_t(&threadpool->num_threads_to_use);
const size_t range_threshold = -threads_count;

/* Process thread's own range of items */
Expand Down
13 changes: 12 additions & 1 deletion src/gcd.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
#include "threadpool-object.h"
#include "threadpool-utils.h"

thread_local size_t max_num_threads = UINT_MAX;

static void thread_main(void* arg, size_t thread_index) {
struct pthreadpool* threadpool = (struct pthreadpool*)arg;
struct thread_info* thread = &threadpool->threads[thread_index];
Expand Down Expand Up @@ -73,6 +75,8 @@ struct pthreadpool* pthreadpool_create(size_t threads_count) {
return NULL;
}
threadpool->threads_count = fxdiv_init_size_t(threads_count);
pthreadpool_store_relaxed_size_t(&threadpool->num_threads_to_use,
threads_count);
for (size_t tid = 0; tid < threads_count; tid++) {
threadpool->threads[tid].thread_number = tid;
}
Expand All @@ -85,6 +89,12 @@ struct pthreadpool* pthreadpool_create(size_t threads_count) {
return threadpool;
}

void pthreadpool_set_num_threads_to_use(size_t num_threads) {
max_num_threads = num_threads;
}

size_t pthreadpool_get_num_threads_to_use() { return max_num_threads; }

PTHREADPOOL_INTERNAL void pthreadpool_parallelize(
struct pthreadpool* threadpool, thread_function_t thread_function,
const void* params, size_t params_size, void* task, void* context,
Expand All @@ -107,7 +117,8 @@ PTHREADPOOL_INTERNAL void pthreadpool_parallelize(

/* Locking of completion_mutex not needed: readers are sleeping on
* command_condvar */
const struct fxdiv_divisor_size_t threads_count = threadpool->threads_count;
const struct fxdiv_divisor_size_t threads_count = fxdiv_init_size_t(min(
threadpool->threads_count.value, pthreadpool_get_num_threads_to_use()));

if (params_size != 0) {
memcpy(&threadpool->params, params, params_size);
Expand Down
Loading