@@ -2960,9 +2960,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
29602960 jl_gc_markqueue_t * mq = & ptls -> mark_queue ;
29612961 jl_gc_markqueue_t * mq_master = NULL ;
29622962 int master_tid = jl_atomic_load (& gc_master_tid );
2963- if (master_tid == -1 ) {
2964- return ;
2965- }
2963+ assert (master_tid != -1 );
29662964 mq_master = & gc_all_tls_states [master_tid ]-> mark_queue ;
29672965 void * new_obj ;
29682966 jl_gc_chunk_t c ;
@@ -3053,61 +3051,49 @@ size_t gc_count_work_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
30533051 * Correctness argument for the mark-loop termination protocol.
30543052 *
30553053 * Safety properties:
3056- * - No work items shall be in any thread's queues when `gc_mark_loop_barrier ` observes
3054+ * - No work items shall be in any thread's queues when `gc_should_mark ` observes
30573055 * that `gc_n_threads_marking` is zero.
30583056 *
30593057 * - No work item shall be stolen from the master thread (i.e. mutator thread which started
30603058 * GC and which helped the `jl_n_markthreads` - 1 threads to mark) after
3061- * `gc_mark_loop_barrier ` observes that `gc_n_threads_marking` is zero. This property is
3059+ * `gc_should_mark ` observes that `gc_n_threads_marking` is zero. This property is
30623060 * necessary because we call `gc_mark_loop_serial` after marking the finalizer list in
30633061 * `_jl_gc_collect`, and want to ensure that we have the serial mark-loop semantics there,
30643062 * and that no work is stolen from us at that point.
30653063 *
30663064 * Proof:
3067- * - Suppose the master thread observes that `gc_n_threads_marking` is zero in
3068- * `gc_mark_loop_barrier` and there is a work item left in one thread's queue at that point.
3069- * Since threads try to steal from all threads' queues, this implies that all threads must
3070- * have tried to steal from the queue which still has a work item left, but failed to do so,
3071- * which violates the semantics of Chase-Lev's work-stealing queue.
3072- *
3073- * - Let E1 be the event "master thread writes -1 to gc_master_tid" and E2 be the event
3074- * "master thread observes that `gc_n_threads_marking` is zero". Since we're using
3075- * sequentially consistent atomics, E1 => E2. Now suppose one thread which is spinning in
3076- * `gc_should_mark` tries to enter the mark-loop after E2. In order to do so, it must
3077- * increment `gc_n_threads_marking` to 1 in an event E3, and then read `gc_master_tid` in an
3078- * event E4. Since we're using sequentially consistent atomics, E3 => E4. Since we observed
3079- * `gc_n_threads_marking` as zero in E2, then E2 => E3, and we conclude E1 => E4, so that
3080- * the thread which is spinning in `gc_should_mark` must observe that `gc_master_tid` is -1
3081- * and therefore won't enter the mark-loop.
3065+ * - If a thread observes that `gc_n_threads_marking` is zero inside `gc_should_mark`, that
3066+ * means that no thread has work on their queue, this is guaranteed because a thread may only exit
3067+ * `gc_mark_and_steal` when its own queue is empty, this information is synchronized by the
3068+ * seq-cst fetch_add to a thread that is in `gc_should_mark`. `gc_queue_observer_lock`
3069+ * guarantees that once `gc_n_threads_marking` reaches zero, no thread will increment it again,
3070+ * because incrementing is only legal from inside the lock. Therefore, no thread will reenter
3071+ * the mark-loop after `gc_n_threads_marking` reaches zero.
30823072 */
30833073
3084- int gc_should_mark (jl_ptls_t ptls )
3074+ int gc_should_mark (void )
30853075{
30863076 int should_mark = 0 ;
3087- int n_threads_marking = jl_atomic_load (& gc_n_threads_marking );
3088- // fast path
3089- if (n_threads_marking == 0 ) {
3090- return 0 ;
3091- }
30923077 uv_mutex_lock (& gc_queue_observer_lock );
30933078 while (1 ) {
3094- int tid = jl_atomic_load (& gc_master_tid );
3095- // fast path
3096- if (tid == -1 ) {
3097- break ;
3098- }
3099- n_threads_marking = jl_atomic_load (& gc_n_threads_marking );
3100- // fast path
3079+ int n_threads_marking = jl_atomic_load (& gc_n_threads_marking );
31013080 if (n_threads_marking == 0 ) {
31023081 break ;
31033082 }
3083+ int tid = jl_atomic_load_relaxed (& gc_master_tid );
3084+ assert (tid != -1 );
31043085 size_t work = gc_count_work_in_queue (gc_all_tls_states [tid ]);
31053086 for (tid = gc_first_tid ; tid < gc_first_tid + jl_n_markthreads ; tid ++ ) {
3106- work += gc_count_work_in_queue (gc_all_tls_states [tid ]);
3087+ jl_ptls_t ptls2 = gc_all_tls_states [tid ];
3088+ if (ptls2 == NULL ) {
3089+ continue ;
3090+ }
3091+ work += gc_count_work_in_queue (ptls2 );
31073092 }
31083093 // if there is a lot of work left, enter the mark loop
31093094 if (work >= 16 * n_threads_marking ) {
3110- jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
3095+ jl_atomic_fetch_add (& gc_n_threads_marking , 1 ); // A possibility would be to allow a thread that found lots
3096+ // of work to increment this
31113097 should_mark = 1 ;
31123098 break ;
31133099 }
@@ -3119,22 +3105,22 @@ int gc_should_mark(jl_ptls_t ptls)
31193105
31203106void gc_wake_all_for_marking (jl_ptls_t ptls )
31213107{
3122- jl_atomic_store (& gc_master_tid , ptls -> tid );
31233108 uv_mutex_lock (& gc_threads_lock );
3124- jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
31253109 uv_cond_broadcast (& gc_threads_cond );
31263110 uv_mutex_unlock (& gc_threads_lock );
31273111}
31283112
31293113void gc_mark_loop_parallel (jl_ptls_t ptls , int master )
31303114{
31313115 if (master ) {
3116+ jl_atomic_store (& gc_master_tid , ptls -> tid );
3117+ jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
31323118 gc_wake_all_for_marking (ptls );
31333119 gc_mark_and_steal (ptls );
31343120 jl_atomic_fetch_add (& gc_n_threads_marking , -1 );
31353121 }
31363122 while (1 ) {
3137- int should_mark = gc_should_mark (ptls );
3123+ int should_mark = gc_should_mark ();
31383124 if (!should_mark ) {
31393125 break ;
31403126 }
0 commit comments