@@ -29,6 +29,11 @@ static const int16_t sleeping = 1;
2929// this thread is dead.
3030static const int16_t sleeping_like_the_dead JL_UNUSED = 2 ;
3131
32+ // a running count of how many threads are currently not_sleeping
33+ // plus a running count of the number of in-flight wake-ups
34+ // n.b. this may temporarily exceed jl_n_threads
35+ static _Atomic (int ) nrunning = 1 ;
36+
3237// invariant: No thread is ever asleep unless sleep_check_state is sleeping (or we have a wakeup signal pending).
3338// invariant: Any particular thread is not asleep unless that thread's sleep_check_state is sleeping.
3439// invariant: The transition of a thread state to sleeping must be followed by a check that there wasn't work pending for it.
@@ -64,7 +69,7 @@ JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int16_t tid) JL_NOTSAFEPOINT
6469 if (was == tid )
6570 return 1 ;
6671 if (was == -1 )
67- return jl_atomic_cmpswap (& task -> tid , & was , tid );
72+ return jl_atomic_cmpswap (& task -> tid , & was , tid ) || was == tid ;
6873 return 0 ;
6974}
7075
@@ -180,6 +185,8 @@ void jl_threadfun(void *arg)
180185 jl_init_stack_limits (0 , & stack_lo , & stack_hi );
181186 // warning: this changes `jl_current_task`, so be careful not to call that from this function
182187 jl_task_t * ct = jl_init_root_task (ptls , stack_lo , stack_hi );
188+ int wasrunning = jl_atomic_fetch_add_relaxed (& nrunning , 1 );
189+ assert (wasrunning ); (void )wasrunning ;
183190 JL_GC_PROMISE_ROOTED (ct );
184191
185192 // wait for all threads
@@ -220,7 +227,7 @@ int jl_running_under_rr(int recheck)
220227
221228
222229// sleep_check_after_threshold() -- if sleep_threshold ns have passed, return 1
223- static int sleep_check_after_threshold (uint64_t * start_cycles )
230+ static int sleep_check_after_threshold (uint64_t * start_cycles ) JL_NOTSAFEPOINT
224231{
225232 JULIA_DEBUG_SLEEPWAKE ( return 1 ); // hammer on the sleep/wake logic much harder
226233 /**
@@ -243,18 +250,31 @@ static int sleep_check_after_threshold(uint64_t *start_cycles)
243250 return 0 ;
244251}
245252
253+ static int set_not_sleeping (jl_ptls_t ptls ) JL_NOTSAFEPOINT
254+ {
255+ if (jl_atomic_load_relaxed (& ptls -> sleep_check_state ) != not_sleeping ) {
256+ if (jl_atomic_exchange_relaxed (& ptls -> sleep_check_state , not_sleeping ) != not_sleeping ) {
257+ return 1 ;
258+ }
259+ }
260+ int wasrunning = jl_atomic_fetch_add_relaxed (& nrunning , -1 ); // consume in-flight wakeup
261+ assert (wasrunning > 1 ); (void )wasrunning ;
262+ return 0 ;
263+ }
246264
247265static int wake_thread (int16_t tid ) JL_NOTSAFEPOINT
248266{
249- jl_ptls_t other = jl_atomic_load_relaxed (& jl_all_tls_states )[tid ];
250- int8_t state = sleeping ;
251-
252- if (jl_atomic_load_relaxed (& other -> sleep_check_state ) == sleeping ) {
253- if (jl_atomic_cmpswap_relaxed (& other -> sleep_check_state , & state , not_sleeping )) {
254- JL_PROBE_RT_SLEEP_CHECK_WAKE (other , state );
255- uv_mutex_lock (& other -> sleep_lock );
256- uv_cond_signal (& other -> wake_signal );
257- uv_mutex_unlock (& other -> sleep_lock );
267+ jl_ptls_t ptls2 = jl_atomic_load_relaxed (& jl_all_tls_states )[tid ];
268+
269+ if (jl_atomic_load_relaxed (& ptls2 -> sleep_check_state ) != not_sleeping ) {
270+ int8_t state = sleeping ;
271+ if (jl_atomic_cmpswap_relaxed (& ptls2 -> sleep_check_state , & state , not_sleeping )) {
272+ int wasrunning = jl_atomic_fetch_add_relaxed (& nrunning , 1 ); // increment in-flight wakeup count
273+ assert (wasrunning ); (void )wasrunning ;
274+ JL_PROBE_RT_SLEEP_CHECK_WAKE (ptls2 , state );
275+ uv_mutex_lock (& ptls2 -> sleep_lock );
276+ uv_cond_signal (& ptls2 -> wake_signal );
277+ uv_mutex_unlock (& ptls2 -> sleep_lock );
258278 return 1 ;
259279 }
260280 }
@@ -280,10 +300,14 @@ JL_DLLEXPORT void jl_wakeup_thread(int16_t tid) JL_NOTSAFEPOINT
280300 JULIA_DEBUG_SLEEPWAKE ( wakeup_enter = cycleclock () );
281301 if (tid == self || tid == -1 ) {
282302 // we're already awake, but make sure we'll exit uv_run
303+ // and that nrunning is updated if this is now considered in-flight
283304 jl_ptls_t ptls = ct -> ptls ;
284- if (jl_atomic_load_relaxed (& ptls -> sleep_check_state ) == sleeping ) {
285- jl_atomic_store_relaxed (& ptls -> sleep_check_state , not_sleeping );
286- JL_PROBE_RT_SLEEP_CHECK_WAKEUP (ptls );
305+ if (jl_atomic_load_relaxed (& ptls -> sleep_check_state ) != not_sleeping ) {
306+ if (jl_atomic_exchange_relaxed (& ptls -> sleep_check_state , not_sleeping ) != not_sleeping ) {
307+ int wasrunning = jl_atomic_fetch_add_relaxed (& nrunning , 1 );
308+ assert (wasrunning ); (void )wasrunning ;
309+ JL_PROBE_RT_SLEEP_CHECK_WAKEUP (ptls );
310+ }
287311 }
288312 if (uvlock == ct )
289313 uv_stop (jl_global_event_loop ());
@@ -360,8 +384,10 @@ void jl_task_wait_empty(void)
360384 // we are back from jl_task_get_next now
361385 ct -> world_age = lastage ;
362386 wait_empty = NULL ;
363- // TODO: move this lock acquire-release pair to the caller, so that we ensure new work
364- // (from uv_unref objects) didn't unexpectedly get scheduled and start running behind our back
387+ // TODO: move this lock acquire to before the wait_empty return and the
388+ // unlock to the caller, so that we ensure new work (from uv_unref
389+ // objects) didn't unexpectedly get scheduled and start running behind
390+ // our back during the function return
365391 JL_UV_LOCK ();
366392 jl_wait_empty_end ();
367393 JL_UV_UNLOCK ();
@@ -378,6 +404,7 @@ static int may_sleep(jl_ptls_t ptls) JL_NOTSAFEPOINT
378404 return jl_atomic_load_relaxed (& ptls -> sleep_check_state ) == sleeping ;
379405}
380406
407+
381408extern _Atomic (unsigned ) _threadedregion ;
382409
383410JL_DLLEXPORT jl_task_t * jl_task_get_next (jl_value_t * trypoptask , jl_value_t * q , jl_value_t * checkempty )
@@ -405,8 +432,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
405432 jl_fence (); // [^store_buffering_1]
406433 JL_PROBE_RT_SLEEP_CHECK_SLEEP (ptls );
407434 if (!check_empty (checkempty )) { // uses relaxed loads
408- if (jl_atomic_load_relaxed (& ptls -> sleep_check_state ) != not_sleeping ) {
409- jl_atomic_store_relaxed (& ptls -> sleep_check_state , not_sleeping ); // let other threads know they don't need to wake us
435+ if (set_not_sleeping (ptls )) {
410436 JL_PROBE_RT_SLEEP_CHECK_TASKQ_WAKE (ptls );
411437 }
412438 continue ;
@@ -415,23 +441,20 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
415441 if (ptls != ct -> ptls ) {
416442 // sigh, a yield was detected, so let's go ahead and handle it anyway by starting over
417443 ptls = ct -> ptls ;
418- if (jl_atomic_load_relaxed (& ptls -> sleep_check_state ) != not_sleeping ) {
419- jl_atomic_store_relaxed (& ptls -> sleep_check_state , not_sleeping ); // let other threads know they don't need to wake us
444+ if (set_not_sleeping (ptls )) {
420445 JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE (ptls );
421446 }
422447 if (task )
423448 return task ;
424449 continue ;
425450 }
426451 if (task ) {
427- if (jl_atomic_load_relaxed (& ptls -> sleep_check_state ) != not_sleeping ) {
428- jl_atomic_store_relaxed (& ptls -> sleep_check_state , not_sleeping ); // let other threads know they don't need to wake us
452+ if (set_not_sleeping (ptls )) {
429453 JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE (ptls );
430454 }
431455 return task ;
432456 }
433457
434-
435458 // IO is always permitted, but outside a threaded region, only
436459 // thread 0 will process messages.
437460 // Inside a threaded region, any thread can listen for IO messages,
@@ -485,41 +508,64 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
485508 // right back to sleep on the individual wake signal to let
486509 // them take it from us without conflict.
487510 if (active || !may_sleep (ptls )) {
511+ if (set_not_sleeping (ptls )) {
512+ JL_PROBE_RT_SLEEP_CHECK_UV_WAKE (ptls );
513+ }
488514 start_cycles = 0 ;
489515 continue ;
490516 }
491517 if (!enter_eventloop && !jl_atomic_load_relaxed (& _threadedregion ) && ptls -> tid == 0 ) {
492518 // thread 0 is the only thread permitted to run the event loop
493519 // so it needs to stay alive, just spin-looping if necessary
494- if (jl_atomic_load_relaxed (& ptls -> sleep_check_state ) != not_sleeping ) {
495- jl_atomic_store_relaxed (& ptls -> sleep_check_state , not_sleeping ); // let other threads know they don't need to wake us
520+ if (set_not_sleeping (ptls )) {
496521 JL_PROBE_RT_SLEEP_CHECK_UV_WAKE (ptls );
497522 }
498523 start_cycles = 0 ;
499524 continue ;
500525 }
501526 }
502527
528+ // any thread which wants us running again will have to observe
529+ // sleep_check_state==sleeping and increment nrunning for us
530+ int wasrunning = jl_atomic_fetch_add_relaxed (& nrunning , -1 );
531+ assert (wasrunning );
532+ if (wasrunning == 1 ) {
533+ // This was the last running thread, and there is no thread with !may_sleep
534+ // so make sure tid 0 is notified to check wait_empty
535+ // TODO: this also might be a good time to check again that
536+ // libuv's queue is truly empty, instead of during delete_thread
537+ if (ptls -> tid != 0 ) {
538+ uv_mutex_lock (& ptls -> sleep_lock );
539+ uv_cond_wait (& ptls -> wake_signal , & ptls -> sleep_lock );
540+ uv_mutex_unlock (& ptls -> sleep_lock );
541+ }
542+ }
543+
503544 // the other threads will just wait for an individual wake signal to resume
504545 JULIA_DEBUG_SLEEPWAKE ( ptls -> sleep_enter = cycleclock () );
505546 int8_t gc_state = jl_gc_safe_enter (ptls );
506547 uv_mutex_lock (& ptls -> sleep_lock );
507548 while (may_sleep (ptls )) {
508- if (ptls -> tid == 0 && wait_empty ) {
509- task = wait_empty ;
510- if (jl_atomic_load_relaxed (& ptls -> sleep_check_state ) != not_sleeping ) {
511- jl_atomic_store_relaxed (& ptls -> sleep_check_state , not_sleeping ); // let other threads know they don't need to wake us
512- JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE (ptls );
513- }
549+ task = wait_empty ;
550+ if (ptls -> tid == 0 && task && jl_atomic_load_relaxed (& nrunning ) == 0 ) {
551+ wasrunning = jl_atomic_fetch_add_relaxed (& nrunning , 1 );
552+ assert (!wasrunning );
553+ wasrunning = !set_not_sleeping (ptls );
554+ assert (!wasrunning );
555+ JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE (ptls );
556+ if (!ptls -> finalizers_inhibited )
557+ ptls -> finalizers_inhibited ++ ; // this annoyingly is rather sticky (we should like to reset it at the end of jl_task_wait_empty)
514558 break ;
515559 }
560+ // else should we warn the user of certain deadlock here if tid == 0 && nrunning == 0?
516561 uv_cond_wait (& ptls -> wake_signal , & ptls -> sleep_lock );
517562 }
518563 assert (jl_atomic_load_relaxed (& ptls -> sleep_check_state ) == not_sleeping );
564+ assert (jl_atomic_load_relaxed (& nrunning ));
565+ start_cycles = 0 ;
519566 uv_mutex_unlock (& ptls -> sleep_lock );
520567 JULIA_DEBUG_SLEEPWAKE ( ptls -> sleep_leave = cycleclock () );
521568 jl_gc_safe_leave (ptls , gc_state ); // contains jl_gc_safepoint
522- start_cycles = 0 ;
523569 if (task ) {
524570 assert (task == wait_empty );
525571 wait_empty = NULL ;
@@ -533,6 +579,23 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
533579 }
534580}
535581
582+ void scheduler_delete_thread (jl_ptls_t ptls ) JL_NOTSAFEPOINT
583+ {
584+ if (jl_atomic_exchange_relaxed (& ptls -> sleep_check_state , sleeping_like_the_dead ) != sleeping ) {
585+ int wasrunning = jl_atomic_fetch_add_relaxed (& nrunning , -1 );
586+ if (wasrunning == 1 ) {
587+ jl_ptls_t ptls2 = jl_atomic_load_relaxed (& jl_all_tls_states )[0 ];
588+ // This was the last running thread, and there is no thread with !may_sleep
589+ // so make sure tid 0 is notified to check wait_empty
590+ uv_mutex_lock (& ptls2 -> sleep_lock );
591+ uv_cond_signal (& ptls2 -> wake_signal );
592+ uv_mutex_unlock (& ptls2 -> sleep_lock );
593+ }
594+ }
595+ jl_fence ();
596+ jl_wakeup_thread (0 ); // force thread 0 to see that we do not have the IO lock (and am dead)
597+ }
598+
536599#ifdef __cplusplus
537600}
538601#endif
0 commit comments