From 4cf90e13fa98171787372a47e06dcfdf9b953c26 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 13:46:15 -0500 Subject: [PATCH 01/17] Move `start_doing_background_updates()` to `HomeServer.start_background_tasks()` --- synapse/app/homeserver.py | 5 ----- synapse/server.py | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index b9ac86c2fc3..23119c5cc53 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -317,11 +317,6 @@ def start_listening(self) -> None: # during parsing logger.warning("Unrecognized listener type: %s", listener.type) - def start_background_tasks(self) -> None: - super().start_background_tasks() - - self.get_datastores().main.db_pool.updates.start_doing_background_updates() - def load_or_generate_config(argv_options: List[str]) -> HomeServerConfig: """ diff --git a/synapse/server.py b/synapse/server.py index b63a11273a7..d3a0381434b 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -640,12 +640,16 @@ def start_background_tasks(self) -> None: Some handlers have side effects on instantiation (like registering background updates). This function causes them to be fetched, and therefore instantiated, to run those side effects. + + We assume these background tasks are only run on a single Synapse instance at a + time. """ for i in self.REQUIRED_ON_BACKGROUND_TASK_STARTUP: getattr(self, "get_" + i + "_handler")() self.get_task_scheduler() self.get_common_usage_metrics_manager().setup() start_phone_stats_home(self) + self.get_datastores().main.db_pool.updates.start_doing_background_updates() def get_reactor(self) -> ISynapseReactor: """ From 78fd66ce87999cb6f4ad418ad1e80096e91d4e32 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 13:51:31 -0500 Subject: [PATCH 02/17] Add changelog --- changelog.d/19036.misc | 2 +- changelog.d/19057.misc | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/19057.misc diff --git a/changelog.d/19036.misc b/changelog.d/19036.misc index 95b8daab9b0..8790cc2342b 100644 --- a/changelog.d/19036.misc +++ b/changelog.d/19036.misc @@ -1 +1 @@ -Move `start_doing_background_updates()` to `SynapseHomeServer.start_background_tasks()`. +Move `start_doing_background_updates()` to `HomeServer.start_background_tasks()`. diff --git a/changelog.d/19057.misc b/changelog.d/19057.misc new file mode 100644 index 00000000000..8790cc2342b --- /dev/null +++ b/changelog.d/19057.misc @@ -0,0 +1 @@ +Move `start_doing_background_updates()` to `HomeServer.start_background_tasks()`. From d1ed34a9aca1100a32ce1954602624a92ad9903b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 14:21:26 -0500 Subject: [PATCH 03/17] Fix logcontext problems See https://github.com/element-hq/synapse/pull/19057#discussion_r2427137480 Running any `HomeserverTestCase` results in: ``` $ SYNAPSE_TEST_LOG_LEVEL=DEBUG poetry run trial tests.rest.client.sliding_sync.test_sliding_sync.SlidingSyncTestCase_new.test_sync_list ... builtins.AssertionError: Expected `call_later` callback from the reactor to start with the sentinel logcontext but saw run_bg_updates. In other words, another task shouldn't have leaked their logcontext to us. ``` --- tests/unittest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unittest.py b/tests/unittest.py index 9ab052e7c0c..5fab5bbfabe 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -679,7 +679,7 @@ def setup_test_homeserver( async def run_bg_updates() -> None: with LoggingContext(name="run_bg_updates", server_name=server_name): - self.get_success(stor.db_pool.updates.run_background_updates(False)) + await stor.db_pool.updates.run_background_updates(False) hs = setup_test_homeserver( cleanup_func=self.addCleanup, From 01343d651962d84e36860a9c241de09e8c4574d7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 14:33:28 -0500 Subject: [PATCH 04/17] Remove duplicate database background update running This already happens as part of `HomeServer.start_background_tasks()` --- tests/unittest.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tests/unittest.py b/tests/unittest.py index 5fab5bbfabe..47f8cd717bf 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -677,10 +677,6 @@ def setup_test_homeserver( # construct a homeserver with a matching name. server_name = config_obj.server.server_name - async def run_bg_updates() -> None: - with LoggingContext(name="run_bg_updates", server_name=server_name): - await stor.db_pool.updates.run_background_updates(False) - hs = setup_test_homeserver( cleanup_func=self.addCleanup, server_name=server_name, @@ -689,11 +685,10 @@ async def run_bg_updates() -> None: clock=clock, **extra_homeserver_attributes, ) - stor = hs.get_datastores().main - # Run the database background updates, when running against "master". - if hs.__class__.__name__ == "TestHomeServer": - self.get_success(run_bg_updates()) + # Wait for the database background updates to complete. This is important + # because tests assume that the database is using the latest schema. + self.wait_for_background_updates() return hs From 2fcd70dbf89f38b6450d0a182ca20fc434ae575f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 14:35:05 -0500 Subject: [PATCH 05/17] Add future TODO --- tests/unittest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/unittest.py b/tests/unittest.py index 47f8cd717bf..6a6cfd7c1b5 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -690,6 +690,9 @@ def setup_test_homeserver( # because tests assume that the database is using the latest schema. self.wait_for_background_updates() + # TODO: How can we concretely know that the database background updates were + # scheduled and now complete? + return hs def pump(self, by: float = 0.0) -> None: From 74c5e64dd2d97b4c1c4a233863724a50105b4e9a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 14:37:45 -0500 Subject: [PATCH 06/17] Fix lints --- tests/unittest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unittest.py b/tests/unittest.py index 6a6cfd7c1b5..42dbc8a94e5 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -73,7 +73,6 @@ from synapse.http.site import SynapseRequest, SynapseSite from synapse.logging.context import ( SENTINEL_CONTEXT, - LoggingContext, current_context, set_current_context, ) From c1d89d89b58c587bd924061f0476cdeafc8a34c3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 14:39:17 -0500 Subject: [PATCH 07/17] Remove assumption See https://github.com/element-hq/synapse/pull/19057#discussion_r2427083757 --- synapse/server.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/server.py b/synapse/server.py index d3a0381434b..35b878d4b11 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -640,9 +640,6 @@ def start_background_tasks(self) -> None: Some handlers have side effects on instantiation (like registering background updates). This function causes them to be fetched, and therefore instantiated, to run those side effects. - - We assume these background tasks are only run on a single Synapse instance at a - time. """ for i in self.REQUIRED_ON_BACKGROUND_TASK_STARTUP: getattr(self, "get_" + i + "_handler")() From 064b30b6f013265aa67c838160a80a4bfbacf961 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 15:14:49 -0500 Subject: [PATCH 08/17] Wait for homeserver to drive database background updates --- tests/unittest.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/tests/unittest.py b/tests/unittest.py index 42dbc8a94e5..09d6b189d88 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -107,6 +107,11 @@ R = TypeVar("R") S = TypeVar("S") +BACKGROUND_UPDATE_TIMEOUT_SECONDS = 5 +""" +We expect this to be pretty immediate as we're working with an empty database. +""" + class _TypedFailure(Generic[_ExcType], Protocol): """Extension to twisted.Failure, where the 'value' has a certain type.""" @@ -489,7 +494,11 @@ def wait_on_thread(self, deferred: Deferred, timeout: int = 10) -> None: def wait_for_background_updates(self) -> None: """Block until all background database updates have completed.""" - store = self.hs.get_datastores().main + self._wait_for_background_updates(self.hs) + + def _wait_for_background_updates(self, hs: HomeServer) -> None: + """Block until all background database updates have completed.""" + store = hs.get_datastores().main while not self.get_success( store.db_pool.updates.has_completed_background_updates() ): @@ -687,10 +696,23 @@ def setup_test_homeserver( # Wait for the database background updates to complete. This is important # because tests assume that the database is using the latest schema. - self.wait_for_background_updates() + # + # We could use `self._wait_for_background_updates(hs)` to accomplish the same + # thing but we don't want to start or drive the background updates here. We want + # to ensure the homeserver itself is doing that. + start_time_s = time.time() + store = hs.get_datastores().main + while not self.get_success( + store.db_pool.updates.has_completed_background_updates() + ): + current_time_s = time.time() + if current_time_s - start_time_s > BACKGROUND_UPDATE_TIMEOUT_SECONDS: + raise AssertionError( + f"Timed out waiting for background updates to complete ({BACKGROUND_UPDATE_TIMEOUT_SECONDS}s). " + "Did you forget to `start_doing_background_updates()`?" + ) - # TODO: How can we concretely know that the database background updates were - # scheduled and now complete? + self.pump(by=0.1) return hs From 5e87f89bf41f21e0b00ae1592654b32cedd7477c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 15:23:27 -0500 Subject: [PATCH 09/17] Better context for error --- tests/unittest.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tests/unittest.py b/tests/unittest.py index 09d6b189d88..5186e66c314 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -79,6 +79,7 @@ from synapse.rest import RegisterServletsFunc from synapse.server import HomeServer from synapse.storage.keys import FetchKeyResult +from synapse.storage.background_updates import UpdaterStatus from synapse.types import ISynapseReactor, JsonDict, Requester, UserID, create_requester from synapse.util.clock import Clock from synapse.util.httpresourcetree import create_resource_tree @@ -705,11 +706,28 @@ def setup_test_homeserver( while not self.get_success( store.db_pool.updates.has_completed_background_updates() ): + # Timeout if it takes too long. This should be pretty immediate as we're + # working with an empty database. current_time_s = time.time() if current_time_s - start_time_s > BACKGROUND_UPDATE_TIMEOUT_SECONDS: + background_update_status = store.db_pool.updates.get_status() + + # Add some better context when we give up + extra_message = "" + if background_update_status == UpdaterStatus.NOT_STARTED: + extra_message = ( + "Did you forget to `start_doing_background_updates()`?" + ) + elif background_update_status == UpdaterStatus.RUNNING_UPDATE: + extra_message = "Background updates were still running when we gave up. Are they stuck?" + else: + extra_message = ( + f"Background update status was {background_update_status}." + ) + raise AssertionError( f"Timed out waiting for background updates to complete ({BACKGROUND_UPDATE_TIMEOUT_SECONDS}s). " - "Did you forget to `start_doing_background_updates()`?" + + extra_message ) self.pump(by=0.1) From edf9ef0b1351ec34667e3e73628ab522b29f4ad7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 15:25:09 -0500 Subject: [PATCH 10/17] Comment about slight naivety --- tests/unittest.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/unittest.py b/tests/unittest.py index 5186e66c314..76bd1fb6ab4 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -704,6 +704,11 @@ def setup_test_homeserver( start_time_s = time.time() store = hs.get_datastores().main while not self.get_success( + # This check is slightly naive. It only checks if there is anything left in + # the `background_updates` database table so it is possible that the + # homeserver mistakenly never registered any background updates to be run. + # Since `register_background_xxx(...)` is done across the codebase, we can't + # really assert that everything was registered as expected. store.db_pool.updates.has_completed_background_updates() ): # Timeout if it takes too long. This should be pretty immediate as we're From e14e5d3fb62ba3f35ed142388d8570010122e174 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 15:27:47 -0500 Subject: [PATCH 11/17] Fix lints --- tests/unittest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unittest.py b/tests/unittest.py index 76bd1fb6ab4..e63a1a6e892 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -78,8 +78,8 @@ ) from synapse.rest import RegisterServletsFunc from synapse.server import HomeServer -from synapse.storage.keys import FetchKeyResult from synapse.storage.background_updates import UpdaterStatus +from synapse.storage.keys import FetchKeyResult from synapse.types import ISynapseReactor, JsonDict, Requester, UserID, create_requester from synapse.util.clock import Clock from synapse.util.httpresourcetree import create_resource_tree From 19b181b7bc6b690ab6e87efb673efa70a053d208 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 16:21:52 -0500 Subject: [PATCH 12/17] Fix logcontext problems in `tests/util/test_task_scheduler.py` ``` builtins.AssertionError: Expected `looping_call` callback from the reactor to start with the sentinel logcontext but saw task-_resumable_task-0-IBzAmHUoepQfLnEA. In other words, another task shouldn't have leaked their logcontext to us. ``` --- tests/util/test_task_scheduler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/util/test_task_scheduler.py b/tests/util/test_task_scheduler.py index e97f0ed6111..f8f3c9dafda 100644 --- a/tests/util/test_task_scheduler.py +++ b/tests/util/test_task_scheduler.py @@ -23,6 +23,7 @@ from twisted.internet.task import deferLater from twisted.internet.testing import MemoryReactor +from synapse.logging.context import make_deferred_yieldable from synapse.server import HomeServer from synapse.types import JsonMapping, ScheduledTask, TaskStatus from synapse.util.clock import Clock @@ -87,7 +88,7 @@ async def _sleeping_task( self, task: ScheduledTask ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]: # Sleep for a second - await deferLater(self.reactor, 1, lambda: None) + await make_deferred_yieldable(deferLater(self.reactor, 1, lambda: None)) return TaskStatus.COMPLETE, None, None def test_schedule_lot_of_tasks(self) -> None: @@ -171,7 +172,7 @@ async def _resumable_task( else: await self.task_scheduler.update_task(task.id, result={"in_progress": True}) # Await forever to simulate an aborted task because of a restart - await deferLater(self.reactor, 2**16, lambda: None) + await make_deferred_yieldable(deferLater(self.reactor, 2**16, lambda: None)) # This should never been called return TaskStatus.ACTIVE, None, None From b4d17898bec352b5aa3ee95d522050c4d19456ea Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 16:26:50 -0500 Subject: [PATCH 13/17] Better conversion --- tests/util/test_task_scheduler.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/util/test_task_scheduler.py b/tests/util/test_task_scheduler.py index f8f3c9dafda..992e05d76c3 100644 --- a/tests/util/test_task_scheduler.py +++ b/tests/util/test_task_scheduler.py @@ -20,10 +20,8 @@ # from typing import List, Optional, Tuple -from twisted.internet.task import deferLater from twisted.internet.testing import MemoryReactor -from synapse.logging.context import make_deferred_yieldable from synapse.server import HomeServer from synapse.types import JsonMapping, ScheduledTask, TaskStatus from synapse.util.clock import Clock @@ -88,7 +86,7 @@ async def _sleeping_task( self, task: ScheduledTask ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]: # Sleep for a second - await make_deferred_yieldable(deferLater(self.reactor, 1, lambda: None)) + await self.hs.get_clock().sleep(1) return TaskStatus.COMPLETE, None, None def test_schedule_lot_of_tasks(self) -> None: @@ -172,7 +170,7 @@ async def _resumable_task( else: await self.task_scheduler.update_task(task.id, result={"in_progress": True}) # Await forever to simulate an aborted task because of a restart - await make_deferred_yieldable(deferLater(self.reactor, 2**16, lambda: None)) + await self.hs.get_clock().sleep(2**16) # This should never been called return TaskStatus.ACTIVE, None, None From 6a11d64acf1efad1e5b57fd4ab48ec4a6b2ebacd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 16:33:15 -0500 Subject: [PATCH 14/17] Even better adaption `incomplete_d` --- tests/util/test_task_scheduler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/util/test_task_scheduler.py b/tests/util/test_task_scheduler.py index 992e05d76c3..bebe51a12ba 100644 --- a/tests/util/test_task_scheduler.py +++ b/tests/util/test_task_scheduler.py @@ -20,8 +20,10 @@ # from typing import List, Optional, Tuple +from twisted.internet.defer import Deferred from twisted.internet.testing import MemoryReactor +from synapse.logging.context import make_deferred_yieldable from synapse.server import HomeServer from synapse.types import JsonMapping, ScheduledTask, TaskStatus from synapse.util.clock import Clock @@ -169,8 +171,10 @@ async def _resumable_task( return TaskStatus.COMPLETE, {"success": True}, None else: await self.task_scheduler.update_task(task.id, result={"in_progress": True}) + # Create a deferred which we will never complete + incomplete_d: Deferred = Deferred() # Await forever to simulate an aborted task because of a restart - await self.hs.get_clock().sleep(2**16) + await make_deferred_yieldable(incomplete_d) # This should never been called return TaskStatus.ACTIVE, None, None From 4f4bf8064ebd8afb2dd082b322698621c80f2e9e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 16:35:22 -0500 Subject: [PATCH 15/17] Remove `deferLater from other tests --- tests/rest/admin/test_room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index 30b2de26e4a..4d90b83b069 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -861,7 +861,7 @@ def test_delete_same_room_twice(self) -> None: # Mock PaginationHandler.purge_room to sleep for 100s, so we have time to do a second call # before the purge is over. Note that it doesn't purge anymore, but we don't care. async def purge_room(room_id: str, force: bool) -> None: - await deferLater(self.hs.get_reactor(), 100, lambda: None) + await self.hs.get_clock().sleep(100) self.pagination_handler.purge_room = AsyncMock(side_effect=purge_room) # type: ignore[method-assign] From 53fd1b29f875e7e5aeb2995450715216e582b79c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 16:35:52 -0500 Subject: [PATCH 16/17] Remove mentions of `deferLater` --- synapse/util/task_scheduler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/util/task_scheduler.py b/synapse/util/task_scheduler.py index 8dd6f12feb3..91174769cd3 100644 --- a/synapse/util/task_scheduler.py +++ b/synapse/util/task_scheduler.py @@ -53,8 +53,8 @@ class TaskScheduler: """ This is a simple task scheduler designed for resumable tasks. Normally, - you'd use `run_in_background` to start a background task or Twisted's - `deferLater` if you want to run it later. + you'd use `run_in_background` to start a background task or `clock.call_later` + if you want to run it later. The issue is that these tasks stop completely and won't resume if Synapse is shut down for any reason. From 9e99547f658c83dcf14d60467e5e1e39dd79b267 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 13 Oct 2025 17:45:31 -0500 Subject: [PATCH 17/17] Fix lints --- tests/rest/admin/test_room.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index 4d90b83b069..2885b068883 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -27,7 +27,6 @@ from parameterized import parameterized -from twisted.internet.task import deferLater from twisted.internet.testing import MemoryReactor import synapse.rest.admin