Remove sentinel logcontext in Clock utilities (looping_call, looping_call_now, call_later) (#18907)

MadLittleMods · itsoyou · commit c240dfae1b83 · 2025-10-13T09:56:25.000+02:00
Part of element-hq/synapse#18905 Lints for ensuring we use `Clock.call_later` instead of `reactor.callLater`, etc are coming in element-hq/synapse#18944 ### Testing strategy 1. Configure Synapse to log at the `DEBUG` level 1. Start Synapse: `poetry run synapse_homeserver --config-path homeserver.yaml` 1. Wait 10 seconds for the [database profiling loop](https://github.com/element-hq/synapse/blob/9cc400177822805e2a08d4d934daad6f3bc2a4df/synapse/storage/database.py#L711) to execute 1. Notice the logcontext being used for the `Total database time` log line Before (`sentinel`): ``` 2025-09-10 16:36:58,651 - synapse.storage.TIME - 707 - DEBUG - sentinel - Total database time: 0.646% {room_forgetter_stream_pos(2): 0.131%, reap_monthly_active_users(1): 0.083%, get_device_change_last_converted_pos(1): 0.078%} ``` After (`looping_call`): ``` 2025-09-10 16:36:58,651 - synapse.storage.TIME - 707 - DEBUG - looping_call - Total database time: 0.646% {room_forgetter_stream_pos(2): 0.131%, reap_monthly_active_users(1): 0.083%, get_device_change_last_converted_pos(1): 0.078%} ```
diff --git a/changelog.d/18907.misc b/changelog.d/18907.misc
@@ -0,0 +1 @@
+Remove `sentinel` logcontext usage in `Clock` utilities like `looping_call` and `call_later`.
diff --git a/synapse/util/clock.py b/synapse/util/clock.py
@@ -23,6 +23,7 @@
 from typing_extensions import ParamSpec
 
 from twisted.internet import defer, task
+from twisted.internet.defer import Deferred
 from twisted.internet.interfaces import IDelayedCall
 from twisted.internet.task import LoopingCall
 
@@ -46,6 +47,8 @@ class Clock:
 
     async def sleep(self, seconds: float) -> None:
         d: defer.Deferred[float] = defer.Deferred()
+        # Start task in the `sentinel` logcontext, to avoid leaking the current context
+        # into the reactor once it finishes.
         with context.PreserveLoggingContext():
             self._reactor.callLater(seconds, d.callback, seconds)
             await d
@@ -74,8 +77,9 @@ def looping_call(
         this functionality thanks to this function being a thin wrapper around
         `twisted.internet.task.LoopingCall`.
 
-        Note that the function will be called with no logcontext, so if it is anything
-        other than trivial, you probably want to wrap it in run_as_background_process.
+        Note that the function will be called with generic `looping_call` logcontext, so
+        if it is anything other than a trivial task, you probably want to wrap it in
+        `run_as_background_process` to give it more specific label and track metrics.
 
         Args:
             f: The function to call repeatedly.
@@ -97,8 +101,9 @@ def looping_call_now(
         As with `looping_call`: subsequent calls are not scheduled until after the
         the Awaitable returned by a previous call has finished.
 
-        Also as with `looping_call`: the function is called with no logcontext and
-        you probably want to wrap it in `run_as_background_process`.
+        Note that the function will be called with generic `looping_call` logcontext, so
+        if it is anything other than a trivial task, you probably want to wrap it in
+        `run_as_background_process` to give it more specific label and track metrics.
 
         Args:
             f: The function to call repeatedly.
@@ -117,9 +122,43 @@ def _looping_call_common(
         **kwargs: P.kwargs,
     ) -> LoopingCall:
         """Common functionality for `looping_call` and `looping_call_now`"""
-        call = task.LoopingCall(f, *args, **kwargs)
+
+        def wrapped_f(*args: P.args, **kwargs: P.kwargs) -> Deferred:
+            assert context.current_context() is context.SENTINEL_CONTEXT, (
+                "Expected `looping_call` callback from the reactor to start with the sentinel logcontext "
+                f"but saw {context.current_context()}. In other words, another task shouldn't have "
+                "leaked their logcontext to us."
+            )
+
+            # Because this is a callback from the reactor, we will be using the
+            # `sentinel` log context at this point. We want the function to log with
+            # some logcontext as we want to know which server the logs came from.
+            #
+            # We use `PreserveLoggingContext` to prevent our new `looping_call`
+            # logcontext from finishing as soon as we exit this function, in case `f`
+            # returns an awaitable/deferred which would continue running and may try to
+            # restore the `loop_call` context when it's done (because it's trying to
+            # adhere to the Synapse logcontext rules.)
+            #
+            # This also ensures that we return to the `sentinel` context when we exit
+            # this function and yield control back to the reactor to avoid leaking the
+            # current logcontext to the reactor (which would then get picked up and
+            # associated with the next thing the reactor does)
+            with context.PreserveLoggingContext(context.LoggingContext("looping_call")):
+                # We use `run_in_background` to reset the logcontext after `f` (or the
+                # awaitable returned by `f`) completes to avoid leaking the current
+                # logcontext to the reactor
+                return context.run_in_background(f, *args, **kwargs)
+
+        call = task.LoopingCall(wrapped_f, *args, **kwargs)
         call.clock = self._reactor
-        d = call.start(msec / 1000.0, now=now)
+        # If `now=true`, the function will be called here immediately so we need to be
+        # in the sentinel context now.
+        #
+        # We want to start the task in the `sentinel` logcontext, to avoid leaking the
+        # current context into the reactor after the function finishes.
+        with context.PreserveLoggingContext():
+            d = call.start(msec / 1000.0, now=now)
         d.addErrback(log_failure, "Looping call died", consumeErrors=False)
         return call
 
@@ -128,8 +167,9 @@ def call_later(
     ) -> IDelayedCall:
         """Call something later
 
-        Note that the function will be called with no logcontext, so if it is anything
-        other than trivial, you probably want to wrap it in run_as_background_process.
+        Note that the function will be called with generic `call_later` logcontext, so
+        if it is anything other than a trivial task, you probably want to wrap it in
+        `run_as_background_process` to give it more specific label and track metrics.
 
         Args:
             delay: How long to wait in seconds.
@@ -139,11 +179,33 @@ def call_later(
         """
 
         def wrapped_callback(*args: Any, **kwargs: Any) -> None:
-            with context.PreserveLoggingContext():
-                callback(*args, **kwargs)
+            assert context.current_context() is context.SENTINEL_CONTEXT, (
+                "Expected `call_later` callback from the reactor to start with the sentinel logcontext "
+                f"but saw {context.current_context()}. In other words, another task shouldn't have "
+                "leaked their logcontext to us."
+            )
 
-        with context.PreserveLoggingContext():
-            return self._reactor.callLater(delay, wrapped_callback, *args, **kwargs)
+            # Because this is a callback from the reactor, we will be using the
+            # `sentinel` log context at this point. We want the function to log with
+            # some logcontext as we want to know which server the logs came from.
+            #
+            # We use `PreserveLoggingContext` to prevent our new `call_later`
+            # logcontext from finishing as soon as we exit this function, in case `f`
+            # returns an awaitable/deferred which would continue running and may try to
+            # restore the `loop_call` context when it's done (because it's trying to
+            # adhere to the Synapse logcontext rules.)
+            #
+            # This also ensures that we return to the `sentinel` context when we exit
+            # this function and yield control back to the reactor to avoid leaking the
+            # current logcontext to the reactor (which would then get picked up and
+            # associated with the next thing the reactor does)
+            with context.PreserveLoggingContext(context.LoggingContext("call_later")):
+                # We use `run_in_background` to reset the logcontext after `f` (or the
+                # awaitable returned by `f`) completes to avoid leaking the current
+                # logcontext to the reactor
+                context.run_in_background(callback, *args, **kwargs)
+
+        return self._reactor.callLater(delay, wrapped_callback, *args, **kwargs)
 
     def cancel_call_later(self, timer: IDelayedCall, ignore_errs: bool = False) -> None:
         try:
diff --git a/tests/push/test_email.py b/tests/push/test_email.py
@@ -31,6 +31,7 @@
 
 import synapse.rest.admin
 from synapse.api.errors import Codes, SynapseError
+from synapse.logging.context import make_deferred_yieldable
 from synapse.push.emailpusher import EmailPusher
 from synapse.rest.client import login, room
 from synapse.rest.synapse.client.unsubscribe import UnsubscribeResource
@@ -89,7 +90,7 @@ def sendmail(*args: Any, **kwargs: Any) -> Deferred:
             # This mocks out synapse.reactor.send_email._sendmail.
             d: Deferred = Deferred()
             self.email_attempts.append((d, args, kwargs))
-            return d
+            return make_deferred_yieldable(d)
 
         hs.get_send_email_handler()._sendmail = sendmail  # type: ignore[assignment]
 
diff --git a/tests/util/test_logcontext.py b/tests/util/test_logcontext.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+Remove `sentinel` logcontext usage in `Clock` utilities like `looping_call` and `call_later`.