33#include "gc.h"
44#include "gc-page-profiler.h"
55#include "julia.h"
6+ #include "julia_atomics.h"
67#include "julia_gcext.h"
78#include "julia_assert.h"
89#ifdef __GLIBC__
@@ -540,7 +541,7 @@ void jl_gc_run_all_finalizers(jl_task_t *ct)
540541
541542void jl_gc_add_finalizer_ (jl_ptls_t ptls , void * v , void * f ) JL_NOTSAFEPOINT
542543{
543- assert (jl_atomic_load_relaxed (& ptls -> gc_state ) == 0 );
544+ assert (jl_atomic_load_relaxed (& ptls -> gc_state ) == JL_GC_STATE_UNSAFE );
544545 arraylist_t * a = & ptls -> finalizers ;
545546 // This acquire load and the release store at the end are used to
546547 // synchronize with `finalize_object` on another thread. Apart from the GC,
@@ -1676,7 +1677,7 @@ void gc_sweep_wake_all(jl_ptls_t ptls, jl_gc_padded_page_stack_t *new_gc_allocd_
16761677void gc_sweep_wait_for_all (void )
16771678{
16781679 jl_atomic_store (& gc_allocd_scratch , NULL );
1679- while (jl_atomic_load_relaxed (& gc_n_threads_sweeping ) != 0 ) {
1680+ while (jl_atomic_load_acquire (& gc_n_threads_sweeping ) != 0 ) {
16801681 jl_cpu_pause ();
16811682 }
16821683}
@@ -2966,9 +2967,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
29662967 jl_gc_markqueue_t * mq = & ptls -> mark_queue ;
29672968 jl_gc_markqueue_t * mq_master = NULL ;
29682969 int master_tid = jl_atomic_load (& gc_master_tid );
2969- if (master_tid == -1 ) {
2970- return ;
2971- }
2970+ assert (master_tid != -1 );
29722971 mq_master = & gc_all_tls_states [master_tid ]-> mark_queue ;
29732972 void * new_obj ;
29742973 jl_gc_chunk_t c ;
@@ -3060,54 +3059,37 @@ size_t gc_count_work_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
30603059 * Correctness argument for the mark-loop termination protocol.
30613060 *
30623061 * Safety properties:
3063- * - No work items shall be in any thread's queues when `gc_mark_loop_barrier ` observes
3062+ * - No work items shall be in any thread's queues when `gc_should_mark ` observes
30643063 * that `gc_n_threads_marking` is zero.
30653064 *
30663065 * - No work item shall be stolen from the master thread (i.e. mutator thread which started
30673066 * GC and which helped the `jl_n_markthreads` - 1 threads to mark) after
3068- * `gc_mark_loop_barrier ` observes that `gc_n_threads_marking` is zero. This property is
3067+ * `gc_should_mark ` observes that `gc_n_threads_marking` is zero. This property is
30693068 * necessary because we call `gc_mark_loop_serial` after marking the finalizer list in
30703069 * `_jl_gc_collect`, and want to ensure that we have the serial mark-loop semantics there,
30713070 * and that no work is stolen from us at that point.
30723071 *
30733072 * Proof:
3074- * - Suppose the master thread observes that `gc_n_threads_marking` is zero in
3075- * `gc_mark_loop_barrier` and there is a work item left in one thread's queue at that point.
3076- * Since threads try to steal from all threads' queues, this implies that all threads must
3077- * have tried to steal from the queue which still has a work item left, but failed to do so,
3078- * which violates the semantics of Chase-Lev's work-stealing queue.
3079- *
3080- * - Let E1 be the event "master thread writes -1 to gc_master_tid" and E2 be the event
3081- * "master thread observes that `gc_n_threads_marking` is zero". Since we're using
3082- * sequentially consistent atomics, E1 => E2. Now suppose one thread which is spinning in
3083- * `gc_should_mark` tries to enter the mark-loop after E2. In order to do so, it must
3084- * increment `gc_n_threads_marking` to 1 in an event E3, and then read `gc_master_tid` in an
3085- * event E4. Since we're using sequentially consistent atomics, E3 => E4. Since we observed
3086- * `gc_n_threads_marking` as zero in E2, then E2 => E3, and we conclude E1 => E4, so that
3087- * the thread which is spinning in `gc_should_mark` must observe that `gc_master_tid` is -1
3088- * and therefore won't enter the mark-loop.
3073+ * - If a thread observes that `gc_n_threads_marking` is zero inside `gc_should_mark`, that
3074+ * means that no thread has work on their queue, this is guaranteed because a thread may only exit
3075+ * `gc_mark_and_steal` when its own queue is empty, this information is synchronized by the
3076+ * seq-cst fetch_add to a thread that is in `gc_should_mark`. `gc_queue_observer_lock`
3077+ * guarantees that once `gc_n_threads_marking` reaches zero, no thread will increment it again,
3078+ * because incrementing is only legal from inside the lock. Therefore, no thread will reenter
3079+ * the mark-loop after `gc_n_threads_marking` reaches zero.
30893080 */
30903081
30913082int gc_should_mark (void )
30923083{
30933084 int should_mark = 0 ;
3094- int n_threads_marking = jl_atomic_load (& gc_n_threads_marking );
3095- // fast path
3096- if (n_threads_marking == 0 ) {
3097- return 0 ;
3098- }
30993085 uv_mutex_lock (& gc_queue_observer_lock );
31003086 while (1 ) {
3101- int tid = jl_atomic_load (& gc_master_tid );
3102- // fast path
3103- if (tid == -1 ) {
3104- break ;
3105- }
3106- n_threads_marking = jl_atomic_load (& gc_n_threads_marking );
3107- // fast path
3087+ int n_threads_marking = jl_atomic_load (& gc_n_threads_marking );
31083088 if (n_threads_marking == 0 ) {
31093089 break ;
31103090 }
3091+ int tid = jl_atomic_load_relaxed (& gc_master_tid );
3092+ assert (tid != -1 );
31113093 size_t work = gc_count_work_in_queue (gc_all_tls_states [tid ]);
31123094 for (tid = gc_first_tid ; tid < gc_first_tid + jl_n_markthreads ; tid ++ ) {
31133095 jl_ptls_t ptls2 = gc_all_tls_states [tid ];
@@ -3118,7 +3100,8 @@ int gc_should_mark(void)
31183100 }
31193101 // if there is a lot of work left, enter the mark loop
31203102 if (work >= 16 * n_threads_marking ) {
3121- jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
3103+ jl_atomic_fetch_add (& gc_n_threads_marking , 1 ); // A possibility would be to allow a thread that found lots
3104+ // of work to increment this
31223105 should_mark = 1 ;
31233106 break ;
31243107 }
@@ -3130,16 +3113,16 @@ int gc_should_mark(void)
31303113
31313114void gc_wake_all_for_marking (jl_ptls_t ptls )
31323115{
3133- jl_atomic_store (& gc_master_tid , ptls -> tid );
31343116 uv_mutex_lock (& gc_threads_lock );
3135- jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
31363117 uv_cond_broadcast (& gc_threads_cond );
31373118 uv_mutex_unlock (& gc_threads_lock );
31383119}
31393120
31403121void gc_mark_loop_parallel (jl_ptls_t ptls , int master )
31413122{
31423123 if (master ) {
3124+ jl_atomic_store (& gc_master_tid , ptls -> tid );
3125+ jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
31433126 gc_wake_all_for_marking (ptls );
31443127 gc_mark_and_steal (ptls );
31453128 jl_atomic_fetch_add (& gc_n_threads_marking , -1 );
@@ -3166,10 +3149,8 @@ void gc_mark_loop(jl_ptls_t ptls)
31663149
31673150void gc_mark_loop_barrier (void )
31683151{
3169- jl_atomic_store (& gc_master_tid , -1 );
3170- while (jl_atomic_load (& gc_n_threads_marking ) != 0 ) {
3171- jl_cpu_pause ();
3172- }
3152+ assert (jl_atomic_load_relaxed (& gc_n_threads_marking ) == 0 );
3153+ jl_atomic_store_relaxed (& gc_master_tid , -1 );
31733154}
31743155
31753156void gc_mark_clean_reclaim_sets (void )
0 commit comments