Skip to content

Commit df09c6e

Browse files
authored
fix(Executor): Fix segfault if callback group is deleted during rmw_wait (#2682)
Signed-off-by: Janosch Machowinski <[email protected]>
1 parent 06ae92a commit df09c6e

File tree

3 files changed

+78
-0
lines changed

3 files changed

+78
-0
lines changed

rclcpp/src/rclcpp/executor.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,13 +729,33 @@ Executor::wait_for_work(std::chrono::nanoseconds timeout)
729729
// Clear any previous wait result
730730
this->wait_result_.reset();
731731

732+
// we need to make sure that callback groups don't get out of scope
733+
// during the wait. As in jazzy, they are not covered by the DynamicStorage,
734+
// we explicitly hold them here as a bugfix
735+
std::vector<rclcpp::CallbackGroup::SharedPtr> cbgs;
736+
732737
{
733738
std::lock_guard<std::mutex> guard(mutex_);
739+
734740
if (this->entities_need_rebuild_.exchange(false) || current_collection_.empty()) {
735741
this->collect_entities();
736742
}
743+
744+
auto callback_groups = this->collector_.get_all_callback_groups();
745+
cbgs.resize(callback_groups.size());
746+
for(const auto & w_ptr : callback_groups) {
747+
auto shr_ptr = w_ptr.lock();
748+
if(shr_ptr) {
749+
cbgs.push_back(std::move(shr_ptr));
750+
}
751+
}
737752
}
753+
738754
this->wait_result_.emplace(wait_set_.wait(timeout));
755+
756+
// drop references to the callback groups, before trying to execute anything
757+
cbgs.clear();
758+
739759
if (!this->wait_result_ || this->wait_result_->kind() == WaitResultKind::Empty) {
740760
RCUTILS_LOG_WARN_NAMED(
741761
"rclcpp",

rclcpp/src/rclcpp/executors/static_single_threaded_executor.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,29 @@ StaticSingleThreadedExecutor::spin_once_impl(std::chrono::nanoseconds timeout)
110110
std::optional<rclcpp::WaitResult<rclcpp::WaitSet>>
111111
StaticSingleThreadedExecutor::collect_and_wait(std::chrono::nanoseconds timeout)
112112
{
113+
// we need to make sure that callback groups don't get out of scope
114+
// during the wait. As in jazzy, they are not covered by the DynamicStorage,
115+
// we explicitly hold them here as a bugfix
116+
std::vector<rclcpp::CallbackGroup::SharedPtr> cbgs;
117+
113118
if (this->entities_need_rebuild_.exchange(false) || current_collection_.empty()) {
114119
this->collect_entities();
115120
}
121+
122+
auto callback_groups = this->collector_.get_all_callback_groups();
123+
cbgs.resize(callback_groups.size());
124+
for(const auto & w_ptr : callback_groups) {
125+
auto shr_ptr = w_ptr.lock();
126+
if(shr_ptr) {
127+
cbgs.push_back(std::move(shr_ptr));
128+
}
129+
}
130+
116131
auto wait_result = wait_set_.wait(std::chrono::nanoseconds(timeout));
132+
133+
// drop references to the callback groups, before trying to execute anything
134+
cbgs.clear();
135+
117136
if (wait_result.kind() == WaitResultKind::Empty) {
118137
RCUTILS_LOG_WARN_NAMED(
119138
"rclcpp",

rclcpp/test/rclcpp/executors/test_executors.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,3 +795,42 @@ TYPED_TEST(TestExecutors, release_ownership_entity_after_spinning_cancel)
795795

796796
EXPECT_EQ(server.use_count(), 1);
797797
}
798+
799+
TYPED_TEST(TestExecutors, testRaceDropCallbackGroupFromSecondThread)
800+
{
801+
using ExecutorType = TypeParam;
802+
// rmw_connextdds doesn't support events-executor
803+
if (
804+
std::is_same<ExecutorType, rclcpp::experimental::executors::EventsExecutor>() &&
805+
std::string(rmw_get_implementation_identifier()).find("rmw_connextdds") == 0)
806+
{
807+
GTEST_SKIP();
808+
}
809+
810+
// Create an executor
811+
auto executor = std::make_shared<ExecutorType>();
812+
executor->add_node(this->node);
813+
814+
// Start spinning
815+
auto executor_thread = std::thread(
816+
[executor]() {
817+
executor->spin();
818+
});
819+
820+
// As the problem is a race, we do this multiple times,
821+
// to raise our chances of hitting the problem
822+
for(size_t i = 0; i < 10; i++) {
823+
auto cg = this->node->create_callback_group(rclcpp::CallbackGroupType::MutuallyExclusive);
824+
auto timer = this->node->create_timer(1s, [] {}, cg);
825+
// sleep a bit, so that the spin thread can pick up the callback group
826+
// and add it to the executor
827+
std::this_thread::sleep_for(5ms);
828+
829+
// At this point the callbackgroup should be used within the waitset of the executor
830+
// as we leave the scope, the reference to cg will be dropped.
831+
// If the executor has a race, we will experience a segfault at this point.
832+
}
833+
834+
executor->cancel();
835+
executor_thread.join();
836+
}

0 commit comments

Comments
 (0)