diff --git a/.github/scripts/ci-test-only-normal-no-compressed-oops.sh b/.github/scripts/ci-test-only-normal-no-compressed-oops.sh index 1d6f3aa6..a4e336b3 100755 --- a/.github/scripts/ci-test-only-normal-no-compressed-oops.sh +++ b/.github/scripts/ci-test-only-normal-no-compressed-oops.sh @@ -10,6 +10,7 @@ run_all_no_compressed_oop() { runbms_dacapo2006_with_heap_multiplier antlr $heap_multiplier -XX:-UseCompressedOops -XX:-UseCompressedClassPointers runbms_dacapo2006_with_heap_multiplier fop $heap_multiplier -XX:-UseCompressedOops -XX:-UseCompressedClassPointers runbms_dacapo2006_with_heap_multiplier luindex $heap_multiplier -XX:-UseCompressedOops -XX:-UseCompressedClassPointers + runbms_dacapo2006_with_heap_multiplier lusearch $heap_multiplier -XX:-UseCompressedOops -XX:-UseCompressedClassPointers runbms_dacapo2006_with_heap_multiplier pmd $heap_multiplier -XX:-UseCompressedOops -XX:-UseCompressedClassPointers runbms_dacapo2006_with_heap_multiplier hsqldb $heap_multiplier -XX:-UseCompressedOops -XX:-UseCompressedClassPointers runbms_dacapo2006_with_heap_multiplier eclipse $heap_multiplier -XX:-UseCompressedOops -XX:-UseCompressedClassPointers diff --git a/.github/scripts/ci-test-only-normal.sh b/.github/scripts/ci-test-only-normal.sh index fe16b773..8f834d1b 100755 --- a/.github/scripts/ci-test-only-normal.sh +++ b/.github/scripts/ci-test-only-normal.sh @@ -10,6 +10,7 @@ run_all() { runbms_dacapo2006_with_heap_multiplier antlr $heap_multiplier runbms_dacapo2006_with_heap_multiplier fop $heap_multiplier runbms_dacapo2006_with_heap_multiplier luindex $heap_multiplier + runbms_dacapo2006_with_heap_multiplier lusearch $heap_multiplier runbms_dacapo2006_with_heap_multiplier pmd $heap_multiplier runbms_dacapo2006_with_heap_multiplier hsqldb $heap_multiplier runbms_dacapo2006_with_heap_multiplier eclipse $heap_multiplier diff --git a/openjdk/mmtkHeap.cpp b/openjdk/mmtkHeap.cpp index 1f626102..09279351 100644 --- a/openjdk/mmtkHeap.cpp +++ b/openjdk/mmtkHeap.cpp @@ -69,7 +69,7 @@ MMTkHeap::MMTkHeap(MMTkCollectorPolicy* policy) : _collector_policy(policy), _num_root_scan_tasks(0), _n_workers(0), - _gc_lock(new Monitor(Mutex::safepoint, "MMTkHeap::_gc_lock", true, Monitor::_safepoint_check_sometimes)), + _gc_lock(new Monitor(Mutex::safepoint, "MMTkHeap::_gc_lock", true, Monitor::_safepoint_check_never)), _soft_ref_policy() { _heap = this; diff --git a/openjdk/mmtkUpcalls.cpp b/openjdk/mmtkUpcalls.cpp index 7b8d0db5..c9f952ea 100644 --- a/openjdk/mmtkUpcalls.cpp +++ b/openjdk/mmtkUpcalls.cpp @@ -74,19 +74,19 @@ static void mmtk_resume_mutators(void *tls) { #endif // Note: we don't have to hold gc_lock to increment the counter. - // The increment has to be done before mutators can be resumed - // otherwise, mutators might see a stale value + // The increment has to be done before mutators can be resumed (from `block_for_gc` or yieldpoints). + // Otherwise, mutators might see an outdated start-the-world count. Atomic::inc(&mmtk_start_the_world_count); + log_debug(gc)("Incremented start_the_world counter to %zu.", Atomic::load(&mmtk_start_the_world_count)); - log_debug(gc)("Requesting the VM to resume all mutators..."); + log_debug(gc)("Requesting the companion thread to resume all mutators blocking on yieldpoints..."); MMTkHeap::heap()->companion_thread()->request(MMTkVMCompanionThread::_threads_resumed, true); - log_debug(gc)("Mutators resumed. Now notify any mutators waiting for GC to finish..."); + log_debug(gc)("Notifying mutators blocking on the start-the-world counter..."); { - MutexLockerEx locker(MMTkHeap::heap()->gc_lock(), true); + MutexLockerEx locker(MMTkHeap::heap()->gc_lock(), Mutex::_no_safepoint_check_flag); MMTkHeap::heap()->gc_lock()->notify_all(); } - log_debug(gc)("Mutators notified."); } static const int GC_THREAD_KIND_WORKER = 1; @@ -111,30 +111,37 @@ static void mmtk_spawn_gc_thread(void* tls, int kind, void* ctx) { static void mmtk_block_for_gc() { MMTkHeap::heap()->_last_gc_time = os::javaTimeNanos() / NANOSECS_PER_MILLISEC; - log_debug(gc)("Thread (id=%d) will block waiting for GC to finish.", Thread::current()->osthread()->thread_id()); // We must read the counter before entering safepoint. - // This thread has just triggered GC. - // Before this thread enters safe point, the GC cannot start, and therefore cannot finish, - // and cannot increment the counter mmtk_start_the_world_count. - // Otherwise, if we attempt to acquire the gc_lock first, GC may have triggered stop-the-world - // first, and this thread will be blocked for the entire stop-the-world duration before it can - // get the lock. Once that happens, the current thread will read the counter after the GC, and - // wait for the next non-existing GC forever. + // This thread (or another mutator) has just triggered GC. + // The GC cannot start until all mutators enter safepoint. + // If the GC cannot start, it cannot finish, and cannot increment mmtk_start_the_world_count. + // Otherwise, if we enter safepoint before reading mmtk_start_the_world_count, + // we will allow the GC to start before we read the counter, + // and the GC workers may run so fast that they have finished one whole GC and incremented the + // counter before this mutator reads the counter for the first time. + // Once that happens, the current mutator will wait for the next GC forever, + // which may not happen at all before the program exits. size_t my_count = Atomic::load(&mmtk_start_the_world_count); size_t next_count = my_count + 1; + log_debug(gc)("Will block until the start_the_world counter reaches %zu.", next_count); + { - // Once this thread acquires the lock, the VM will consider this thread to be "in safe point". - MutexLocker locker(MMTkHeap::heap()->gc_lock()); + // Enter safepoint. + JavaThread* thread = JavaThread::current(); + ThreadBlockInVM tbivm(thread); + + // No safepoint check. We are already in safepoint. + MutexLockerEx locker(MMTkHeap::heap()->gc_lock(), Mutex::_no_safepoint_check_flag); while (Atomic::load(&mmtk_start_the_world_count) < next_count) { // wait() may wake up spuriously, but the authoritative condition for unblocking is // mmtk_start_the_world_count being incremented. - MMTkHeap::heap()->gc_lock()->wait(); + MMTkHeap::heap()->gc_lock()->wait(Mutex::_no_safepoint_check_flag); } } - log_debug(gc)("Thread (id=%d) resumed after GC finished.", Thread::current()->osthread()->thread_id()); + log_debug(gc)("Resumed after GC finished."); } static void mmtk_out_of_memory(void* tls, MMTkAllocationError err_kind) { @@ -167,7 +174,7 @@ static bool mmtk_is_mutator(void* tls) { static void mmtk_get_mutators(MutatorClosure closure) { JavaThread *thr; - for (JavaThreadIteratorWithHandle jtiwh; thr = jtiwh.next();) { + for (JavaThreadIteratorWithHandle jtiwh; (thr = jtiwh.next());) { closure.invoke(&thr->third_party_heap_mutator); } } diff --git a/openjdk/mmtkVMCompanionThread.cpp b/openjdk/mmtkVMCompanionThread.cpp index 17f2d2c1..4f7835d5 100644 --- a/openjdk/mmtkVMCompanionThread.cpp +++ b/openjdk/mmtkVMCompanionThread.cpp @@ -53,7 +53,7 @@ void MMTkVMCompanionThread::run() { MutexLockerEx locker(_lock, Mutex::_no_safepoint_check_flag); assert(_reached_state == _threads_resumed, "Threads should be running at this moment."); while (_desired_state != _threads_suspended) { - _lock->wait(true); + _lock->wait(Mutex::_no_safepoint_check_flag); } assert(_reached_state == _threads_resumed, "Threads should still be running at this moment."); } @@ -63,25 +63,9 @@ void MMTkVMCompanionThread::run() { VM_MMTkSTWOperation op(this); // VMThread::execute() is blocking. The companion thread will be blocked // here waiting for the VM thread to execute op, and the VM thread will - // be blocked in reach_suspended_and_wait_for_resume() until a GC thread + // be blocked in do_mmtk_stw_operation() until a GC thread // calls request(_threads_resumed). VMThread::execute(&op); - - // Tell the waiter thread that the world has resumed. - log_trace(gc)("MMTkVMCompanionThread: Notifying threads resumption..."); - { - MutexLockerEx locker(_lock, Mutex::_no_safepoint_check_flag); - assert(_desired_state == _threads_resumed, "start-the-world should be requested."); - assert(_reached_state == _threads_suspended, "Threads should still be suspended at this moment."); - _reached_state = _threads_resumed; - _lock->notify_all(); - } - { - MutexLocker x(Heap_lock); - if (Universe::has_reference_pending_list()) { - Heap_lock->notify_all(); - } - } } } @@ -107,7 +91,7 @@ void MMTkVMCompanionThread::request(stw_state desired_state, bool wait_until_rea if (wait_until_reached) { while (_reached_state != desired_state) { - _lock->wait(true); + _lock->wait(Mutex::_no_safepoint_check_flag); } } } @@ -123,23 +107,70 @@ void MMTkVMCompanionThread::wait_for_reached(stw_state desired_state) { assert(_desired_state == desired_state, "State %d not requested.", desired_state); while (_reached_state != desired_state) { - _lock->wait(true); + _lock->wait(Mutex::_no_safepoint_check_flag); } } -// Called by the VM thread to indicate that all Java threads have stopped. -// This method will block until the GC requests start-the-world. -void MMTkVMCompanionThread::reach_suspended_and_wait_for_resume() { - assert(Thread::current()->is_VM_thread(), "reach_suspended_and_wait_for_resume can only be executed by the VM thread"); +// Called by the VM thread in `VM_MMTkSTWOperation`. +// This method notify that all Java threads have yielded, and will block the VM thread (thereby +// blocking Java threads) until the GC requests start-the-world. +void MMTkVMCompanionThread::do_mmtk_stw_operation() { + assert(Thread::current()->is_VM_thread(), "do_mmtk_stw_operation can only be executed by the VM thread"); - MutexLockerEx locker(_lock, Mutex::_no_safepoint_check_flag); + { + MutexLockerEx locker(_lock, Mutex::_no_safepoint_check_flag); - // Tell the waiter thread that the world has stopped. - _reached_state = _threads_suspended; - _lock->notify_all(); + // Tell the waiter thread that Java threads have stopped at yieldpoints. + _reached_state = _threads_suspended; + log_trace(gc)("do_mmtk_stw_operation: Reached _thread_suspended state. Notifying..."); + _lock->notify_all(); + + // Wait until resume-the-world is requested + while (_desired_state != _threads_resumed) { + _lock->wait(Mutex::_no_safepoint_check_flag); + } - // Wait until resume-the-world is requested - while (_desired_state != _threads_resumed) { - _lock->wait(true); + // Tell the waiter thread that Java threads will eventually resume from yieldpoints. This + // function will return, and, as soon as the VM thread stops executing safepoint VM operations, + // Java threads will resume from yieldpoints. + // + // Note: We have to notify *now* instead of after `VMThread::execute()`. For reasons unknown + // (likely a bug in OpenJDK 11), the VMThread fails to notify the companion thread after + // evaluating `VM_MMTkSTWOperation`, and continues to execute other VM operations (such as + // `RevokeBias`). This leaves the companion thread blocking on `VMThread::execute()` until the + // VM thread finishes executing the next batch of queued VM operations. If we notify after + // `VMThread::execute` in `run()`, it will cause a deadlock like the following: + // + // - The companion thread is blocked at `VMThread::execute()`, waiting for the next batch of VM + // operations to finish. + // - The VM thread is blocked in `SafepointSynchronize::begin()`, waiting for all mutators to + // reach safepoints. + // - One mutator is allocating too fast and triggers a GC, which requires the `WorkerMonitor` + // lock in mmtk-core. + // - A GC worker is still executing `mmtk_resume_mutator`, holding the `WorkerMutator` (as the + // last parked GC worker). It is asking the companion thread to resume mutators, and is still + // waiting for the companion thread to reach the `_thread_resumed` state. As we see before, + // the companion thread is waiting, too. + // + // By notifying now, we unblock the last parked GC worker before the companion thread returns + // from `VMThread::execute`, allowing the GC worker to finish `resume_mutators`, breaking the + // deadlock. When the next GC starts, the GC worker running `mmtk_stop_all_mutators` will need + // to wait a little longer (as it always should) until the VM thread finishes executing other VM + // operations and the companion thread is ready to respond to another request from GC workers. + // + // Also note that OpenJDK 17 changed the way the VM thread executes VM operations. The same + // problem may not manifest in OpenJDK 17 or 21. + assert(_desired_state == _threads_resumed, "start-the-world should be requested."); + assert(_reached_state == _threads_suspended, "Threads should still be suspended at this moment."); + _reached_state = _threads_resumed; + log_trace(gc)("do_mmtk_stw_operation: Reached _thread_resumed state. Notifying..."); + _lock->notify_all(); + } + + { + MutexLockerEx x(Heap_lock, Mutex::_no_safepoint_check_flag); + if (Universe::has_reference_pending_list()) { + Heap_lock->notify_all(); + } } } diff --git a/openjdk/mmtkVMCompanionThread.hpp b/openjdk/mmtkVMCompanionThread.hpp index 335eaa85..5a147e17 100644 --- a/openjdk/mmtkVMCompanionThread.hpp +++ b/openjdk/mmtkVMCompanionThread.hpp @@ -63,7 +63,7 @@ class MMTkVMCompanionThread: public NamedThread { void wait_for_reached(stw_state reached_state); // Interface for the VM_MMTkSTWOperation - void reach_suspended_and_wait_for_resume(); + void do_mmtk_stw_operation(); }; #endif // MMTK_OPENJDK_MMTK_VM_COMPANION_THREAD_HPP diff --git a/openjdk/mmtkVMOperation.cpp b/openjdk/mmtkVMOperation.cpp index 73676eec..51227ee3 100644 --- a/openjdk/mmtkVMOperation.cpp +++ b/openjdk/mmtkVMOperation.cpp @@ -34,6 +34,6 @@ VM_MMTkSTWOperation::VM_MMTkSTWOperation(MMTkVMCompanionThread *companion_thread void VM_MMTkSTWOperation::doit() { log_trace(vmthread)("Entered VM_MMTkSTWOperation::doit()."); - _companion_thread->reach_suspended_and_wait_for_resume(); + _companion_thread->do_mmtk_stw_operation(); log_trace(vmthread)("Leaving VM_MMTkSTWOperation::doit()"); }