Skip to content

Commit 06e52c5

Browse files
gbaraldiKristofferC
authored andcommitted
Fix synchronization issue on the GC scheduler (#53355)
This aims to slightly simplify the synchronization by making `n_threads_marking` the sole memory location of relevance for it, it also removes the fast path, because being protected by the lock is quite important so that the observed gc state arrays are valid. Fixes: #53350 Fixes: #52757 Maybe fixes: #53026 Co-authored-by: Jameson Nash <[email protected]> (cherry picked from commit a96726b)
1 parent 2fdf5df commit 06e52c5

File tree

7 files changed

+37
-57
lines changed

7 files changed

+37
-57
lines changed

src/gc.c

Lines changed: 22 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "gc.h"
44
#include "gc-page-profiler.h"
55
#include "julia.h"
6+
#include "julia_atomics.h"
67
#include "julia_gcext.h"
78
#include "julia_assert.h"
89
#ifdef __GLIBC__
@@ -540,7 +541,7 @@ void jl_gc_run_all_finalizers(jl_task_t *ct)
540541

541542
void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
542543
{
543-
assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
544+
assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_STATE_UNSAFE);
544545
arraylist_t *a = &ptls->finalizers;
545546
// This acquire load and the release store at the end are used to
546547
// synchronize with `finalize_object` on another thread. Apart from the GC,
@@ -1676,7 +1677,7 @@ void gc_sweep_wake_all(jl_ptls_t ptls, jl_gc_padded_page_stack_t *new_gc_allocd_
16761677
void gc_sweep_wait_for_all(void)
16771678
{
16781679
jl_atomic_store(&gc_allocd_scratch, NULL);
1679-
while (jl_atomic_load_relaxed(&gc_n_threads_sweeping) != 0) {
1680+
while (jl_atomic_load_acquire(&gc_n_threads_sweeping) != 0) {
16801681
jl_cpu_pause();
16811682
}
16821683
}
@@ -2966,9 +2967,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
29662967
jl_gc_markqueue_t *mq = &ptls->mark_queue;
29672968
jl_gc_markqueue_t *mq_master = NULL;
29682969
int master_tid = jl_atomic_load(&gc_master_tid);
2969-
if (master_tid == -1) {
2970-
return;
2971-
}
2970+
assert(master_tid != -1);
29722971
mq_master = &gc_all_tls_states[master_tid]->mark_queue;
29732972
void *new_obj;
29742973
jl_gc_chunk_t c;
@@ -3060,54 +3059,37 @@ size_t gc_count_work_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
30603059
* Correctness argument for the mark-loop termination protocol.
30613060
*
30623061
* Safety properties:
3063-
* - No work items shall be in any thread's queues when `gc_mark_loop_barrier` observes
3062+
* - No work items shall be in any thread's queues when `gc_should_mark` observes
30643063
* that `gc_n_threads_marking` is zero.
30653064
*
30663065
* - No work item shall be stolen from the master thread (i.e. mutator thread which started
30673066
* GC and which helped the `jl_n_markthreads` - 1 threads to mark) after
3068-
* `gc_mark_loop_barrier` observes that `gc_n_threads_marking` is zero. This property is
3067+
* `gc_should_mark` observes that `gc_n_threads_marking` is zero. This property is
30693068
* necessary because we call `gc_mark_loop_serial` after marking the finalizer list in
30703069
* `_jl_gc_collect`, and want to ensure that we have the serial mark-loop semantics there,
30713070
* and that no work is stolen from us at that point.
30723071
*
30733072
* Proof:
3074-
* - Suppose the master thread observes that `gc_n_threads_marking` is zero in
3075-
* `gc_mark_loop_barrier` and there is a work item left in one thread's queue at that point.
3076-
* Since threads try to steal from all threads' queues, this implies that all threads must
3077-
* have tried to steal from the queue which still has a work item left, but failed to do so,
3078-
* which violates the semantics of Chase-Lev's work-stealing queue.
3079-
*
3080-
* - Let E1 be the event "master thread writes -1 to gc_master_tid" and E2 be the event
3081-
* "master thread observes that `gc_n_threads_marking` is zero". Since we're using
3082-
* sequentially consistent atomics, E1 => E2. Now suppose one thread which is spinning in
3083-
* `gc_should_mark` tries to enter the mark-loop after E2. In order to do so, it must
3084-
* increment `gc_n_threads_marking` to 1 in an event E3, and then read `gc_master_tid` in an
3085-
* event E4. Since we're using sequentially consistent atomics, E3 => E4. Since we observed
3086-
* `gc_n_threads_marking` as zero in E2, then E2 => E3, and we conclude E1 => E4, so that
3087-
* the thread which is spinning in `gc_should_mark` must observe that `gc_master_tid` is -1
3088-
* and therefore won't enter the mark-loop.
3073+
* - If a thread observes that `gc_n_threads_marking` is zero inside `gc_should_mark`, that
3074+
* means that no thread has work on their queue, this is guaranteed because a thread may only exit
3075+
* `gc_mark_and_steal` when its own queue is empty, this information is synchronized by the
3076+
* seq-cst fetch_add to a thread that is in `gc_should_mark`. `gc_queue_observer_lock`
3077+
* guarantees that once `gc_n_threads_marking` reaches zero, no thread will increment it again,
3078+
* because incrementing is only legal from inside the lock. Therefore, no thread will reenter
3079+
* the mark-loop after `gc_n_threads_marking` reaches zero.
30893080
*/
30903081

30913082
int gc_should_mark(void)
30923083
{
30933084
int should_mark = 0;
3094-
int n_threads_marking = jl_atomic_load(&gc_n_threads_marking);
3095-
// fast path
3096-
if (n_threads_marking == 0) {
3097-
return 0;
3098-
}
30993085
uv_mutex_lock(&gc_queue_observer_lock);
31003086
while (1) {
3101-
int tid = jl_atomic_load(&gc_master_tid);
3102-
// fast path
3103-
if (tid == -1) {
3104-
break;
3105-
}
3106-
n_threads_marking = jl_atomic_load(&gc_n_threads_marking);
3107-
// fast path
3087+
int n_threads_marking = jl_atomic_load(&gc_n_threads_marking);
31083088
if (n_threads_marking == 0) {
31093089
break;
31103090
}
3091+
int tid = jl_atomic_load_relaxed(&gc_master_tid);
3092+
assert(tid != -1);
31113093
size_t work = gc_count_work_in_queue(gc_all_tls_states[tid]);
31123094
for (tid = gc_first_tid; tid < gc_first_tid + jl_n_markthreads; tid++) {
31133095
jl_ptls_t ptls2 = gc_all_tls_states[tid];
@@ -3118,7 +3100,8 @@ int gc_should_mark(void)
31183100
}
31193101
// if there is a lot of work left, enter the mark loop
31203102
if (work >= 16 * n_threads_marking) {
3121-
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
3103+
jl_atomic_fetch_add(&gc_n_threads_marking, 1); // A possibility would be to allow a thread that found lots
3104+
// of work to increment this
31223105
should_mark = 1;
31233106
break;
31243107
}
@@ -3130,16 +3113,16 @@ int gc_should_mark(void)
31303113

31313114
void gc_wake_all_for_marking(jl_ptls_t ptls)
31323115
{
3133-
jl_atomic_store(&gc_master_tid, ptls->tid);
31343116
uv_mutex_lock(&gc_threads_lock);
3135-
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
31363117
uv_cond_broadcast(&gc_threads_cond);
31373118
uv_mutex_unlock(&gc_threads_lock);
31383119
}
31393120

31403121
void gc_mark_loop_parallel(jl_ptls_t ptls, int master)
31413122
{
31423123
if (master) {
3124+
jl_atomic_store(&gc_master_tid, ptls->tid);
3125+
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
31433126
gc_wake_all_for_marking(ptls);
31443127
gc_mark_and_steal(ptls);
31453128
jl_atomic_fetch_add(&gc_n_threads_marking, -1);
@@ -3166,10 +3149,8 @@ void gc_mark_loop(jl_ptls_t ptls)
31663149

31673150
void gc_mark_loop_barrier(void)
31683151
{
3169-
jl_atomic_store(&gc_master_tid, -1);
3170-
while (jl_atomic_load(&gc_n_threads_marking) != 0) {
3171-
jl_cpu_pause();
3172-
}
3152+
assert(jl_atomic_load_relaxed(&gc_n_threads_marking) == 0);
3153+
jl_atomic_store_relaxed(&gc_master_tid, -1);
31733154
}
31743155

31753156
void gc_mark_clean_reclaim_sets(void)

src/julia_threads.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,9 @@ typedef struct _jl_tls_states_t {
192192
_Atomic(volatile size_t *) safepoint; // may be changed to the suspend page by any thread
193193
_Atomic(int8_t) sleep_check_state; // read/write from foreign threads
194194
// Whether it is safe to execute GC at the same time.
195+
#define JL_GC_STATE_UNSAFE 0
196+
// gc_state = 0 means the thread is running Julia code and is not
197+
// safe to run concurrently to the GC
195198
#define JL_GC_STATE_WAITING 1
196199
// gc_state = 1 means the thread is doing GC or is waiting for the GC to
197200
// finish.
@@ -330,9 +333,7 @@ STATIC_INLINE int8_t jl_gc_state_set(jl_ptls_t ptls, int8_t state,
330333
int8_t old_state)
331334
{
332335
jl_atomic_store_release(&ptls->gc_state, state);
333-
if (state == JL_GC_STATE_SAFE && old_state == 0)
334-
jl_gc_safepoint_(ptls);
335-
if (state == 0 && old_state == JL_GC_STATE_SAFE)
336+
if (state == JL_GC_STATE_UNSAFE || old_state == JL_GC_STATE_UNSAFE)
336337
jl_gc_safepoint_(ptls);
337338
return old_state;
338339
}
@@ -347,8 +348,8 @@ void jl_gc_unsafe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT JL_NOTSAFE
347348
int8_t jl_gc_safe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER;
348349
void jl_gc_safe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT_LEAVE; // this might not be a safepoint, but we have to assume it could be (statically)
349350
#else
350-
#define jl_gc_unsafe_enter(ptls) jl_gc_state_save_and_set(ptls, 0)
351-
#define jl_gc_unsafe_leave(ptls, state) ((void)jl_gc_state_set(ptls, (state), 0))
351+
#define jl_gc_unsafe_enter(ptls) jl_gc_state_save_and_set(ptls, JL_GC_STATE_UNSAFE)
352+
#define jl_gc_unsafe_leave(ptls, state) ((void)jl_gc_state_set(ptls, (state), JL_GC_STATE_UNSAFE))
352353
#define jl_gc_safe_enter(ptls) jl_gc_state_save_and_set(ptls, JL_GC_STATE_SAFE)
353354
#define jl_gc_safe_leave(ptls, state) ((void)jl_gc_state_set(ptls, (state), JL_GC_STATE_SAFE))
354355
#endif

src/llvm-codegen-shared.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm
325325
static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final)
326326
{
327327
using namespace llvm;
328-
Value *old_state = builder.getInt8(0);
328+
Value *old_state = builder.getInt8(JL_GC_STATE_UNSAFE);
329329
return emit_gc_state_set(builder, T_size, ptls, state, old_state, final);
330330
}
331331

src/safepoint.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ void jl_safepoint_wait_gc(void) JL_NOTSAFEPOINT
234234
jl_task_t *ct = jl_current_task; (void)ct;
235235
JL_TIMING_SUSPEND_TASK(GC_SAFEPOINT, ct);
236236
// The thread should have set this is already
237-
assert(jl_atomic_load_relaxed(&ct->ptls->gc_state) != 0);
237+
assert(jl_atomic_load_relaxed(&ct->ptls->gc_state) != JL_GC_STATE_UNSAFE);
238238
// Use normal volatile load in the loop for speed until GC finishes.
239239
// Then use an acquire load to make sure the GC result is visible on this thread.
240240
while (jl_atomic_load_relaxed(&jl_gc_running) || jl_atomic_load_acquire(&jl_gc_running)) {
@@ -309,7 +309,7 @@ int jl_safepoint_suspend_thread(int tid, int waitstate)
309309
}
310310
while (jl_atomic_load_acquire(&ptls2->suspend_count) != 0) {
311311
int8_t state2 = jl_atomic_load_acquire(&ptls2->gc_state);
312-
if (waitstate <= 2 && state2 != 0)
312+
if (waitstate <= 2 && state2 != JL_GC_STATE_UNSAFE)
313313
break;
314314
if (waitstate == 3 && state2 == JL_GC_STATE_WAITING)
315315
break;

src/scheduler.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ void jl_parallel_gc_threadfun(void *arg)
131131
jl_ptls_t ptls = jl_init_threadtls(targ->tid);
132132

133133
// wait for all threads
134-
jl_gc_state_set(ptls, JL_GC_STATE_WAITING, 0);
134+
jl_gc_state_set(ptls, JL_GC_STATE_WAITING, JL_GC_STATE_UNSAFE);
135135
uv_barrier_wait(targ->barrier);
136136

137137
// free the thread argument here
@@ -143,9 +143,7 @@ void jl_parallel_gc_threadfun(void *arg)
143143
uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
144144
}
145145
uv_mutex_unlock(&gc_threads_lock);
146-
if (may_mark()) {
147-
gc_mark_loop_parallel(ptls, 0);
148-
}
146+
gc_mark_loop_parallel(ptls, 0);
149147
if (may_sweep(ptls)) { // not an else!
150148
gc_sweep_pool_parallel(ptls);
151149
jl_atomic_fetch_add(&ptls->gc_sweeps_requested, -1);
@@ -162,7 +160,7 @@ void jl_concurrent_gc_threadfun(void *arg)
162160
jl_ptls_t ptls = jl_init_threadtls(targ->tid);
163161

164162
// wait for all threads
165-
jl_gc_state_set(ptls, JL_GC_STATE_WAITING, 0);
163+
jl_gc_state_set(ptls, JL_GC_STATE_WAITING, JL_GC_STATE_UNSAFE);
166164
uv_barrier_wait(targ->barrier);
167165

168166
// free the thread argument here
@@ -190,7 +188,7 @@ void jl_threadfun(void *arg)
190188
JL_GC_PROMISE_ROOTED(ct);
191189

192190
// wait for all threads
193-
jl_gc_state_set(ptls, JL_GC_STATE_SAFE, 0);
191+
jl_gc_state_set(ptls, JL_GC_STATE_SAFE, JL_GC_STATE_UNSAFE);
194192
uv_barrier_wait(targ->barrier);
195193

196194
// free the thread argument here

src/signal-handling.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ void jl_task_frame_noreturn(jl_task_t *ct) JL_NOTSAFEPOINT
440440
ct->ptls->in_finalizer = 0;
441441
ct->ptls->defer_signal = 0;
442442
// forcibly exit GC (if we were in it) or safe into unsafe, without the mandatory safepoint
443-
jl_atomic_store_release(&ct->ptls->gc_state, 0);
443+
jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_UNSAFE);
444444
// allow continuing to use a Task that should have already died--unsafe necromancy!
445445
jl_atomic_store_relaxed(&ct->_state, JL_TASK_STATE_RUNNABLE);
446446
}

src/threading.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
364364
}
365365
}
366366
#endif
367-
jl_atomic_store_relaxed(&ptls->gc_state, 0); // GC unsafe
367+
jl_atomic_store_relaxed(&ptls->gc_state, JL_GC_STATE_UNSAFE); // GC unsafe
368368
// Conditionally initialize the safepoint address. See comment in
369369
// `safepoint.c`
370370
if (tid == 0) {

0 commit comments

Comments
 (0)