-
Notifications
You must be signed in to change notification settings - Fork 0
fix(thread-safety): eliminate races in log mutex, watcher, and index threads #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -140,16 +140,41 @@ static char g_log_ring[LOG_RING_SIZE][LOG_LINE_MAX]; | |||||||||||||||||||||
| static int g_log_head = 0; | ||||||||||||||||||||||
| static int g_log_count = 0; | ||||||||||||||||||||||
| static cbm_mutex_t g_log_mutex; | ||||||||||||||||||||||
| static atomic_int g_log_mutex_init = 0; | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| enum { | ||||||||||||||||||||||
| CBM_LOG_MUTEX_UNINIT = 0, | ||||||||||||||||||||||
| CBM_LOG_MUTEX_INITING = 1, | ||||||||||||||||||||||
| CBM_LOG_MUTEX_INITED = 2 | ||||||||||||||||||||||
| }; | ||||||||||||||||||||||
| static atomic_int g_log_mutex_init = CBM_LOG_MUTEX_UNINIT; | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /* Safe for concurrent callers: only publishes INITED after cbm_mutex_init() | ||||||||||||||||||||||
| * has completed. Callers that lose the CAS race spin until init finishes. */ | ||||||||||||||||||||||
| void cbm_ui_log_init(void) { | ||||||||||||||||||||||
| int state = atomic_load(&g_log_mutex_init); | ||||||||||||||||||||||
| if (state == CBM_LOG_MUTEX_INITED) | ||||||||||||||||||||||
| return; | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| state = CBM_LOG_MUTEX_UNINIT; | ||||||||||||||||||||||
| if (atomic_compare_exchange_strong(&g_log_mutex_init, &state, CBM_LOG_MUTEX_INITING)) { | ||||||||||||||||||||||
| cbm_mutex_init(&g_log_mutex); | ||||||||||||||||||||||
| atomic_store(&g_log_mutex_init, CBM_LOG_MUTEX_INITED); | ||||||||||||||||||||||
| return; | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /* Another thread is initializing — spin until done */ | ||||||||||||||||||||||
| while (atomic_load(&g_log_mutex_init) != CBM_LOG_MUTEX_INITED) { | ||||||||||||||||||||||
| cbm_usleep(1000); /* 1ms */ | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /* Called from a log hook — appends a line to the ring buffer (thread-safe) */ | ||||||||||||||||||||||
| void cbm_ui_log_append(const char *line) { | ||||||||||||||||||||||
| if (!line) | ||||||||||||||||||||||
| return; | ||||||||||||||||||||||
| if (!atomic_load(&g_log_mutex_init)) { | ||||||||||||||||||||||
| cbm_mutex_init(&g_log_mutex); | ||||||||||||||||||||||
| atomic_store(&g_log_mutex_init, 1); | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| /* Ensure mutex is initialized (safe for early single-threaded logging | ||||||||||||||||||||||
| * and concurrent calls via atomic_exchange once-init pattern). */ | ||||||||||||||||||||||
| cbm_ui_log_init(); | ||||||||||||||||||||||
| cbm_mutex_lock(&g_log_mutex); | ||||||||||||||||||||||
| snprintf(g_log_ring[g_log_head], LOG_LINE_MAX, "%s", line); | ||||||||||||||||||||||
| g_log_head = (g_log_head + 1) % LOG_RING_SIZE; | ||||||||||||||||||||||
|
|
@@ -791,6 +816,7 @@ static void handle_index_start(struct mg_connection *c, struct mg_http_message * | |||||||||||||||||||||
| mg_http_reply(c, 500, g_cors_json, "{\"error\":\"thread creation failed\"}"); | ||||||||||||||||||||||
| return; | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| cbm_thread_detach(&tid); /* Don't leak thread handle */ | ||||||||||||||||||||||
|
||||||||||||||||||||||
| cbm_thread_detach(&tid); /* Don't leak thread handle */ | |
| { | |
| int detach_rc = cbm_thread_detach(&tid); | |
| if (detach_rc != 0) { | |
| fprintf(stderr, | |
| "warning: cbm_thread_detach failed for index job slot %d (path: %s), rc=%d; " | |
| "thread handle may leak\n", | |
| slot, job->root_path, detach_rc); | |
| } | |
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
2. Index job slot reuse race 🐞 Bug ≡ Correctness
handle_index_start() reuses slots when status is 2/3, but index_thread_fn sets status=2/3 and then still reads job->root_path for its final log. With detached threads, a new request can overwrite job->root_path while the old thread is still running, causing a data race and corrupted logging/state.
Agent Prompt
## Issue description
A detached `index_thread_fn` writes `job->status` to 2/3 (making the slot reusable) and then reads `job->root_path` for the final `cbm_log_info`. `handle_index_start` can immediately reuse that slot and overwrite `job->root_path`, creating a race.
## Issue Context
Slots are considered free when `status` is 0/2/3. Once a thread stores 2/3, it must not access any `job` fields that the next request will overwrite.
## Fix Focus Areas
- Ensure that after `job->status` is set to a reusable value, the thread no longer reads/writes any `job` fields (especially `root_path` and `error_msg`).
- Simple options:
- Move the final `cbm_log_info("ui.index.done", ...)` to *before* storing status=2/3, or
- Copy `job->root_path` into a local buffer before storing status and log using the local copy, and do not touch `job` thereafter.
- Consider tightening slot reuse rules (e.g., only reuse when `status==0`, and add an explicit “reset to 0” API/state) if you want stronger guarantees.
- src/ui/http_server.c[725-733]
- src/ui/http_server.c[768-801]
ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
| #include "foundation/log.h" | ||
| #include "foundation/hash_table.h" | ||
| #include "foundation/compat.h" | ||
| #include "foundation/compat_thread.h" | ||
| #include "foundation/compat_fs.h" | ||
| #include "foundation/str_util.h" | ||
|
|
||
|
|
@@ -50,6 +51,7 @@ struct cbm_watcher { | |
| cbm_index_fn index_fn; | ||
| void *user_data; | ||
| CBMHashTable *projects; /* name → project_state_t* */ | ||
| cbm_mutex_t projects_lock; | ||
| atomic_int stopped; | ||
| }; | ||
|
Comment on lines
51
to
56
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 3. Watcher apis still racy cbm_watcher_touch() and cbm_watcher_watch_count() access w->projects without projects_lock, so they can race with watch/unwatch/poll_once and potentially dereference freed project_state_t. The new lock only covers some access paths, leaving the hash table unsafely shared. Agent Prompt
|
||
|
|
||
|
|
@@ -236,6 +238,7 @@ cbm_watcher_t *cbm_watcher_new(cbm_store_t *store, cbm_index_fn index_fn, void * | |
| w->index_fn = index_fn; | ||
| w->user_data = user_data; | ||
| w->projects = cbm_ht_create(CBM_SZ_32); | ||
| cbm_mutex_init(&w->projects_lock); | ||
| atomic_init(&w->stopped, 0); | ||
| return w; | ||
| } | ||
|
|
@@ -244,8 +247,11 @@ void cbm_watcher_free(cbm_watcher_t *w) { | |
| if (!w) { | ||
| return; | ||
| } | ||
| cbm_mutex_lock(&w->projects_lock); | ||
| cbm_ht_foreach(w->projects, free_state_entry, NULL); | ||
| cbm_ht_free(w->projects); | ||
| cbm_mutex_unlock(&w->projects_lock); | ||
| cbm_mutex_destroy(&w->projects_lock); | ||
| free(w); | ||
| } | ||
|
|
||
|
|
@@ -264,25 +270,38 @@ void cbm_watcher_watch(cbm_watcher_t *w, const char *project_name, const char *r | |
| } | ||
|
|
||
| /* Remove old entry first (key points to state's project_name) */ | ||
| cbm_mutex_lock(&w->projects_lock); | ||
| project_state_t *old = cbm_ht_get(w->projects, project_name); | ||
| if (old) { | ||
| cbm_ht_delete(w->projects, project_name); | ||
| state_free(old); | ||
| } | ||
|
|
||
| project_state_t *s = state_new(project_name, root_path); | ||
| if (!s) { | ||
| cbm_mutex_unlock(&w->projects_lock); | ||
| cbm_log_warn("watcher.watch.oom", "project", project_name, "path", root_path); | ||
| return; | ||
| } | ||
| cbm_ht_set(w->projects, s->project_name, s); | ||
| cbm_mutex_unlock(&w->projects_lock); | ||
| cbm_log_info("watcher.watch", "project", project_name, "path", root_path); | ||
| } | ||
|
|
||
| void cbm_watcher_unwatch(cbm_watcher_t *w, const char *project_name) { | ||
| if (!w || !project_name) { | ||
| return; | ||
| } | ||
| bool removed = false; | ||
| cbm_mutex_lock(&w->projects_lock); | ||
| project_state_t *s = cbm_ht_get(w->projects, project_name); | ||
| if (s) { | ||
| cbm_ht_delete(w->projects, project_name); | ||
| state_free(s); | ||
| removed = true; | ||
| } | ||
| cbm_mutex_unlock(&w->projects_lock); | ||
| if (removed) { | ||
| cbm_log_info("watcher.unwatch", "project", project_name); | ||
| } | ||
| } | ||
|
|
@@ -291,18 +310,23 @@ void cbm_watcher_touch(cbm_watcher_t *w, const char *project_name) { | |
| if (!w || !project_name) { | ||
| return; | ||
| } | ||
| cbm_mutex_lock(&w->projects_lock); | ||
| project_state_t *s = cbm_ht_get(w->projects, project_name); | ||
| if (s) { | ||
| /* Reset backoff — poll immediately on next cycle */ | ||
| s->next_poll_ns = 0; | ||
| } | ||
| cbm_mutex_unlock(&w->projects_lock); | ||
| } | ||
|
|
||
| int cbm_watcher_watch_count(const cbm_watcher_t *w) { | ||
| if (!w) { | ||
| return 0; | ||
| } | ||
| return (int)cbm_ht_count(w->projects); | ||
| cbm_mutex_lock(&((cbm_watcher_t *)w)->projects_lock); | ||
| int count = (int)cbm_ht_count(w->projects); | ||
| cbm_mutex_unlock(&((cbm_watcher_t *)w)->projects_lock); | ||
| return count; | ||
| } | ||
|
|
||
| /* ── Single poll cycle ──────────────────────────────────────────── */ | ||
|
|
@@ -411,17 +435,53 @@ static void poll_project(const char *key, void *val, void *ud) { | |
| s->next_poll_ns = ctx->now + ((int64_t)s->interval_ms * US_PER_MS); | ||
| } | ||
|
|
||
| /* Callback to snapshot project state pointers into an array. */ | ||
| typedef struct { | ||
| project_state_t **items; | ||
| int count; | ||
| int cap; | ||
| } snapshot_ctx_t; | ||
|
|
||
| static void snapshot_project(const char *key, void *val, void *ud) { | ||
| (void)key; | ||
| snapshot_ctx_t *sc = ud; | ||
| if (val && sc->count < sc->cap) { | ||
| sc->items[sc->count++] = val; | ||
| } | ||
| } | ||
|
|
||
| int cbm_watcher_poll_once(cbm_watcher_t *w) { | ||
| if (!w) { | ||
| return 0; | ||
| } | ||
|
|
||
| /* Snapshot project pointers under lock, then poll without holding it. | ||
| * This keeps the critical section small — poll_project does git I/O | ||
| * and may invoke index_fn which runs the full pipeline. */ | ||
| cbm_mutex_lock(&w->projects_lock); | ||
| int n = cbm_ht_count(w->projects); | ||
| if (n == 0) { | ||
| cbm_mutex_unlock(&w->projects_lock); | ||
| return 0; | ||
| } | ||
| project_state_t **snap = malloc(n * sizeof(project_state_t *)); | ||
| if (!snap) { | ||
| cbm_mutex_unlock(&w->projects_lock); | ||
| return 0; | ||
| } | ||
| snapshot_ctx_t sc = {.items = snap, .count = 0, .cap = n}; | ||
| cbm_ht_foreach(w->projects, snapshot_project, &sc); | ||
| cbm_mutex_unlock(&w->projects_lock); | ||
|
|
||
| poll_ctx_t ctx = { | ||
| .w = w, | ||
| .now = now_ns(), | ||
| .reindexed = 0, | ||
| }; | ||
| cbm_ht_foreach(w->projects, poll_project, &ctx); | ||
| for (int i = 0; i < sc.count; i++) { | ||
| poll_project(NULL, snap[i], &ctx); | ||
| } | ||
| free(snap); | ||
| return ctx.reindexed; | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cbm_ui_log_init()is called here, butcbm_log_set_sink(cbm_ui_log_append)and earlycbm_log_info("server.start", ...)happen earlier inmain(). With the newcbm_ui_log_append()guard (!g_log_mutex_init→ return), those startup logs will now be dropped. Alsocbm_diag_start()may create a background thread before this call (when enabled), which conflicts with the “before any threads are created” requirement. Please movecbm_ui_log_init()earlier (before setting the log sink / first log / any optional thread creation) or make log initialization automatic inside the UI logging/server setup.