Skip to content

Deadlock in resume_mutator #278

@wks

Description

@wks

This is reproducible when running lusearch in DaCapo 2006. It seems it is more reproducible if we use taskset to limit the number of processors to use. With the following command, it is almost 100% reproducible.

MMTK_PLAN=SemiSpace MMTK_THREADS=1 taskset -c 0-3 /path/to/openjdk/build/linux-x86_64-normal-server-fastdebug/jdk/bin/java -XX:+UseThirdPartyHeap -server -XX:MetaspaceSize=500M -Xm{s,x}32M -jar dacapo-2006-10-MR2.jar lusearch
  • MMTK_THREADS=1 is not important. It can be reproduced with any number of GC threads.
  • taskset -c 0-3` seems to influence the probability to reproduce. The smaller the number of cores, the more likely it is to reproduce.

The stack trace shows that there is one mutator and one GC thread waiting.

The mutator thread is triggering a GC using GCRequester, which in turn is waiting for the mutex in the WorkerMonitor.

#0  syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
#1  0x00007ffff50c35e4 in std::sys::pal::unix::futex::futex_wait () at library/std/src/sys/pal/unix/futex.rs:62
#2  std::sys::pal::unix::locks::futex_mutex::Mutex::lock_contended () at library/std/src/sys/pal/unix/locks/futex_mutex.rs:56
#3  0x00007ffff540dd08 in std::sys::pal::unix::locks::futex_mutex::Mutex::lock (self=0x7ffff004aed0)
    at /rustc/aedd173a2c086e558c2b66d3743b344f977621a7/library/std/src/sys/pal/unix/locks/futex_mutex.rs:28
#4  0x00007ffff54228b5 in std::sync::mutex::Mutex<mmtk::scheduler::worker_monitor::WorkerMonitorSync>::lock<mmtk::scheduler::worker_monitor::WorkerMonitorSync> (self=0x7ffff004aed0)
    at /rustc/aedd173a2c086e558c2b66d3743b344f977621a7/library/std/src/sync/mutex.rs:273
#5  0x00007ffff53dce9b in mmtk::scheduler::worker_monitor::WorkerMonitor::make_request (self=0x7ffff004aed0, goal=mmtk::scheduler::worker_goals::WorkerGoal::Gc) at src/scheduler/worker_monitor.rs:101
#6  0x00007ffff52fd045 in mmtk::scheduler::scheduler::GCWorkScheduler<mmtk_openjdk::OpenJDK<true>>::request_schedule_collection<mmtk_openjdk::OpenJDK<true>> (self=0x7ffff0054400)
    at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/scheduler/scheduler.rs:132
#7  0x00007ffff50c4ffc in mmtk::plan::gc_requester::GCRequester<mmtk_openjdk::OpenJDK<true>>::request<mmtk_openjdk::OpenJDK<true>> (self=0x7ffff004cee0)
    at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/plan/gc_requester.rs:33
#8  0x00007ffff50e66d3 in mmtk::util::heap::gc_trigger::GCTrigger<mmtk_openjdk::OpenJDK<true>>::poll<mmtk_openjdk::OpenJDK<true>> (self=0x7ffff004b160, space_full=false, space=...)
    at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/util/heap/gc_trigger.rs:90
#9  0x00007ffff50de465 in mmtk::policy::space::Space::acquire<mmtk::policy::immix::immixspace::ImmixSpace<mmtk_openjdk::OpenJDK<true>>, mmtk_openjdk::OpenJDK<true>> (self=0x7ffff027ae80, tls=..., 
    pages=8) at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/policy/space.rs:105
#10 0x00007ffff50d70b9 in mmtk::policy::immix::immixspace::ImmixSpace<mmtk_openjdk::OpenJDK<true>>::get_clean_block<mmtk_openjdk::OpenJDK<true>> (self=0x7ffff027ae80, tls=..., copy=false)
    at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/policy/immix/immixspace.rs:503
#11 0x00007ffff5397978 in mmtk::util::alloc::immix_allocator::ImmixAllocator<mmtk_openjdk::OpenJDK<true>>::acquire_clean_block<mmtk_openjdk::OpenJDK<true>> (self=0x7ffff07a0450, size=24, align=8, 
    offset=0) at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/util/alloc/immix_allocator.rs:292
#12 0x00007ffff53952eb in mmtk::util::alloc::immix_allocator::{impl#1}::alloc_slow_once<mmtk_openjdk::OpenJDK<true>> (self=0x7ffff07a0450, size=24, align=8, offset=0)
    at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/util/alloc/immix_allocator.rs:106
#13 0x00007ffff539b2bd in mmtk::util::alloc::allocator::Allocator::alloc_slow_once_traced<mmtk::util::alloc::immix_allocator::ImmixAllocator<mmtk_openjdk::OpenJDK<true>>, mmtk_openjdk::OpenJDK<true>> (
    self=0x7ffff07a0450, size=24, align=8, offset=0) at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/util/alloc/allocator.rs:382
#14 0x00007ffff539abf9 in mmtk::util::alloc::allocator::Allocator::alloc_slow_inline<mmtk::util::alloc::immix_allocator::ImmixAllocator<mmtk_openjdk::OpenJDK<true>>, mmtk_openjdk::OpenJDK<true>> (
    self=0x7ffff07a0450, size=24, align=8, offset=0) at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/util/alloc/allocator.rs:251
#15 0x00007ffff539719b in mmtk::util::alloc::immix_allocator::ImmixAllocator<mmtk_openjdk::OpenJDK<true>>::alloc_slow_hot<mmtk_openjdk::OpenJDK<true>> (self=0x7ffff07a0450, size=24, align=8, offset=0)
    at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/util/alloc/immix_allocator.rs:231
#16 0x00007ffff53966d3 in mmtk::util::alloc::immix_allocator::{impl#1}::alloc<mmtk_openjdk::OpenJDK<true>> (self=0x7ffff07a0450, size=24, align=8, offset=0)
    at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/util/alloc/immix_allocator.rs:85
#17 0x00007ffff52e23ca in mmtk::plan::mutator_context::{impl#1}::alloc<mmtk_openjdk::OpenJDK<true>> (self=0x7ffff07a02e8, size=24, align=8, offset=0, 
    allocator=mmtk::plan::global::AllocationSemantics::Default) at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/plan/mutator_context.rs:116
#18 0x00007ffff51b9097 in mmtk::memory_manager::alloc<mmtk_openjdk::OpenJDK<true>> (mutator=0x7ffff07a02e8, size=24, align=8, offset=0, semantics=mmtk::plan::global::AllocationSemantics::Default)
    at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/memory_manager.rs:176
#19 0x00007ffff51ed029 in mmtk_openjdk::api::alloc (mutator=0x7ffff07a02e8, size=24, align=8, offset=0, allocator=mmtk::plan::global::AllocationSemantics::Default) at src/api.rs:170
#20 0x00007ffff7189a6f in MMTkMutatorContext::alloc (this=0x7ffff07a02e8, bytes=24, allocator=<optimized out>)
    at /home/wks/projects/mmtk-github/parallels/fix/side-fwd-bits/openjdk/../mmtk-openjdk/openjdk/mmtkMutator.cpp:29
#21 0x00007ffff70e7e00 in MemAllocator::allocate_outside_tlab (this=0x7fff8a9fc3b0, allocation=...)
    at /home/wks/projects/mmtk-github/parallels/fix/side-fwd-bits/openjdk/src/hotspot/share/gc/shared/memAllocator.cpp:272
#22 MemAllocator::mem_allocate (this=0x7fff8a9fc3b0, allocation=...) at /home/wks/projects/mmtk-github/parallels/fix/side-fwd-bits/openjdk/src/hotspot/share/gc/shared/memAllocator.cpp:370
#23 MemAllocator::allocate (this=this@entry=0x7fff8a9fc3b0) at /home/wks/projects/mmtk-github/parallels/fix/side-fwd-bits/openjdk/src/hotspot/share/gc/shared/memAllocator.cpp:377
#24 0x00007ffff67182fd in CollectedHeap::obj_allocate (this=<optimized out>, klass=<optimized out>, size=<optimized out>, __the_thread__=<optimized out>)
    at /home/wks/projects/mmtk-github/parallels/fix/side-fwd-bits/openjdk/src/hotspot/share/gc/shared/collectedHeap.cpp:452
#25 0x00007ffff6b9da06 in InstanceKlass::allocate_instance (this=this@entry=0x100001868, __the_thread__=__the_thread__@entry=0x7ffff07a0000)
    at /home/wks/projects/mmtk-github/parallels/fix/side-fwd-bits/openjdk/src/hotspot/share/oops/instanceKlass.cpp:1256
#26 0x00007ffff73fb677 in OptoRuntime::new_instance_C (klass=0x100001868, thread=0x7ffff07a0000)
    at /home/wks/projects/mmtk-github/parallels/fix/side-fwd-bits/openjdk/src/hotspot/share/oops/instanceKlass.hpp:1051
#27 0x00007fffe1040145 in ?? ()

Currently the WorkerMonitor is held by a GC thread which is still in resume_mutators (the GC thread holds the mutex in WorkerMutator until the end of on_gc_finished). But the GC thread is waiting for the gc_lock in the OpenJDK binding.

#0  0x00007ffff7df14e9 in __futex_abstimed_wait_common64 (private=0, futex_word=0x7ffff0410280, expected=0, op=393, abstime=0x0, cancel=true) at futex-internal.c:57
#1  __futex_abstimed_wait_common (futex_word=futex_word@entry=0x7ffff0410280, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=0, 
    cancel=cancel@entry=true) at futex-internal.c:87
#2  0x00007ffff7df156f in __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x7ffff0410280, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, 
    private=private@entry=0) at futex-internal.c:139
#3  0x00007ffff7df3ed9 in __pthread_cond_wait_common (cond=0x7ffff0410258, mutex=<optimized out>, clockid=0, abstime=0x0) at pthread_cond_wait.c:503
#4  ___pthread_cond_wait (cond=0x7ffff0410258, mutex=<optimized out>) at pthread_cond_wait.c:618
#5  0x00007ffff7287e1b in os::PlatformEvent::park (this=0x7ffff0410200) at /home/wks/projects/mmtk-github/parallels/fix/side-fwd-bits/openjdk/src/hotspot/os/posix/os_posix.cpp:2022
#6  0x00007ffff71c8c70 in ParkCommon (ev=0x7ffff0410200, timo=0) at /home/wks/projects/mmtk-github/parallels/fix/side-fwd-bits/openjdk/src/hotspot/share/runtime/mutex.cpp:404
#7  Monitor::ILock (this=0x7ffff00498d0, Self=0x7ffff040e800) at /home/wks/projects/mmtk-github/parallels/fix/side-fwd-bits/openjdk/src/hotspot/share/runtime/mutex.cpp:466
#8  0x00007ffff71cb73d in Monitor::lock_without_safepoint_check (this=0x7ffff00498d0, Self=0x7ffff040e800)
    at /home/wks/projects/mmtk-github/parallels/fix/side-fwd-bits/openjdk/src/hotspot/share/runtime/mutex.cpp:938
#9  Monitor::lock_without_safepoint_check (this=this@entry=0x7ffff00498d0) at /home/wks/projects/mmtk-github/parallels/fix/side-fwd-bits/openjdk/src/hotspot/share/runtime/mutex.cpp:944
#10 0x00007ffff7191fe8 in MutexLockerEx::MutexLockerEx (this=<synthetic pointer>, mutex=0x7ffff00498d0, no_safepoint_check=true)
    at /home/wks/projects/mmtk-github/parallels/fix/side-fwd-bits/openjdk/src/hotspot/share/runtime/mutexLocker.hpp:233
#11 mmtk_resume_mutators (tls=<optimized out>) at /home/wks/projects/mmtk-github/parallels/fix/side-fwd-bits/openjdk/../mmtk-openjdk/openjdk/mmtkUpcalls.cpp:88
#12 0x00007ffff5274496 in mmtk_openjdk::collection::{impl#0}::resume_mutators<true> (tls=...) at src/collection.rs:28
#13 0x00007ffff52f36b2 in mmtk::scheduler::scheduler::GCWorkScheduler<mmtk_openjdk::OpenJDK<true>>::on_gc_finished<mmtk_openjdk::OpenJDK<true>> (self=0x7ffff0054400, worker=0x7ffff040bd00)
    at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/scheduler/scheduler.rs:583
#14 0x00007ffff52f3fbe in mmtk::scheduler::scheduler::GCWorkScheduler<mmtk_openjdk::OpenJDK<true>>::on_last_parked<mmtk_openjdk::OpenJDK<true>> (self=0x7ffff0054400, worker=0x7ffff040bd00, 
    goals=0x7ffff004aee8) at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/scheduler/scheduler.rs:442
#15 0x00007ffff52fe990 in mmtk::scheduler::scheduler::{impl#1}::poll_slow::{closure#0}<mmtk_openjdk::OpenJDK<true>> (goals=0x7ffff004aee8)
    at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/scheduler/scheduler.rs:406
#16 0x00007ffff5306098 in mmtk::scheduler::worker_monitor::WorkerMonitor::park_and_wait<mmtk::scheduler::scheduler::{impl#1}::poll_slow::{closure_env#0}<mmtk_openjdk::OpenJDK<true>>> (
    self=0x7ffff004aed0, ordinal=1, on_last_parked=...) at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/scheduler/worker_monitor.rs:151
#17 0x00007ffff52fe800 in mmtk::scheduler::scheduler::GCWorkScheduler<mmtk_openjdk::OpenJDK<true>>::poll_slow<mmtk_openjdk::OpenJDK<true>> (self=0x7ffff0054400, worker=0x7ffff040bd00)
    at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/scheduler/scheduler.rs:405
#18 0x00007ffff52fe70a in mmtk::scheduler::scheduler::GCWorkScheduler<mmtk_openjdk::OpenJDK<true>>::poll<mmtk_openjdk::OpenJDK<true>> (self=0x7ffff0054400, worker=0x7ffff040bd00)
    at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/scheduler/scheduler.rs:394
#19 0x00007ffff50f1a37 in mmtk::scheduler::worker::GCWorker<mmtk_openjdk::OpenJDK<true>>::poll<mmtk_openjdk::OpenJDK<true>> (self=0x7ffff040bd00)
    at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/scheduler/worker.rs:194
#20 0x00007ffff50f0949 in mmtk::scheduler::worker::GCWorker<mmtk_openjdk::OpenJDK<true>>::run<mmtk_openjdk::OpenJDK<true>> (self=0x7ffff040bd00, tls=..., 
    mmtk=0x7ffff5a5b090 <<mmtk_openjdk::SINGLETON_COMPRESSED as core::ops::deref::Deref>::deref::__stability::LAZY>)
    at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/scheduler/worker.rs:229
#21 0x00007ffff51b6475 in mmtk::memory_manager::start_worker<mmtk_openjdk::OpenJDK<true>> (
    mmtk=0x7ffff5a5b090 <<mmtk_openjdk::SINGLETON_COMPRESSED as core::ops::deref::Deref>::deref::__stability::LAZY>, tls=..., worker=0x7ffff040bd00)
    at /home/wks/.cargo/git/checkouts/mmtk-core-4a4b541a6bb74ff9/805b3b8/src/memory_manager.rs:492
#22 0x00007ffff51ed2e8 in mmtk_openjdk::api::start_worker (tls=..., worker=0x7ffff040bd00) at src/api.rs:215
#23 0x00007ffff7597d97 in Thread::call_run (this=0x7ffff040e800) at /home/wks/projects/mmtk-github/parallels/fix/side-fwd-bits/openjdk/src/hotspot/share/runtime/thread.cpp:402
#24 0x00007ffff72774a6 in thread_native_entry (thread=0x7ffff040e800) at /home/wks/projects/mmtk-github/parallels/fix/side-fwd-bits/openjdk/src/hotspot/os/linux/os_linux.cpp:826
#25 0x00007ffff7df4ded in start_thread (arg=<optimized out>) at pthread_create.c:447
#26 0x00007ffff7e780dc in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78

The gc_lock is only used by block_for_gc and resume_mutators. The former waits until the current GC counter is incremented, while the latter increments the GC counter and notifies the mutators in block_for_gc.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions