8080T = TypeVar ("T" )
8181
8282
83+ @attr .s (frozen = True , slots = True , auto_attribs = True )
84+ class PulledPduInfo :
85+ """
86+ A result object that stores the PDU and info about it like which homeserver we
87+ pulled it from (`pull_origin`)
88+ """
89+
90+ pdu : EventBase
91+ # Which homeserver we pulled the PDU from
92+ pull_origin : str
93+
94+
8395class InvalidResponseError (RuntimeError ):
8496 """Helper for _try_destination_list: indicates that the server returned a response
8597 we couldn't parse
@@ -114,7 +126,9 @@ def __init__(self, hs: "HomeServer"):
114126 self .hostname = hs .hostname
115127 self .signing_key = hs .signing_key
116128
117- self ._get_pdu_cache : ExpiringCache [str , EventBase ] = ExpiringCache (
129+ # Cache mapping `event_id` to a tuple of the event itself and the `pull_origin`
130+ # (which server we pulled the event from)
131+ self ._get_pdu_cache : ExpiringCache [str , Tuple [EventBase , str ]] = ExpiringCache (
118132 cache_name = "get_pdu_cache" ,
119133 clock = self ._clock ,
120134 max_len = 1000 ,
@@ -352,11 +366,11 @@ async def _record_failure_callback(
352366 @tag_args
353367 async def get_pdu (
354368 self ,
355- destinations : Iterable [str ],
369+ destinations : Collection [str ],
356370 event_id : str ,
357371 room_version : RoomVersion ,
358372 timeout : Optional [int ] = None ,
359- ) -> Optional [EventBase ]:
373+ ) -> Optional [PulledPduInfo ]:
360374 """Requests the PDU with given origin and ID from the remote home
361375 servers.
362376
@@ -371,11 +385,11 @@ async def get_pdu(
371385 moving to the next destination. None indicates no timeout.
372386
373387 Returns:
374- The requested PDU, or None if we were unable to find it.
388+ The requested PDU wrapped in `PulledPduInfo` , or None if we were unable to find it.
375389 """
376390
377391 logger .debug (
378- "get_pdu: event_id=%s from destinations=%s" , event_id , destinations
392+ "get_pdu( event_id=%s): from destinations=%s" , event_id , destinations
379393 )
380394
381395 # TODO: Rate limit the number of times we try and get the same event.
@@ -384,19 +398,25 @@ async def get_pdu(
384398 # it gets persisted to the database), so we cache the results of the lookup.
385399 # Note that this is separate to the regular get_event cache which caches
386400 # events once they have been persisted.
387- event = self ._get_pdu_cache .get (event_id )
401+ get_pdu_cache_entry = self ._get_pdu_cache .get (event_id )
388402
403+ event = None
404+ pull_origin = None
405+ if get_pdu_cache_entry :
406+ event , pull_origin = get_pdu_cache_entry
389407 # If we don't see the event in the cache, go try to fetch it from the
390408 # provided remote federated destinations
391- if not event :
409+ else :
392410 pdu_attempts = self .pdu_destination_tried .setdefault (event_id , {})
393411
412+ # TODO: We can probably refactor this to use `_try_destination_list`
394413 for destination in destinations :
395414 now = self ._clock .time_msec ()
396415 last_attempt = pdu_attempts .get (destination , 0 )
397416 if last_attempt + PDU_RETRY_TIME_MS > now :
398417 logger .debug (
399- "get_pdu: skipping destination=%s because we tried it recently last_attempt=%s and we only check every %s (now=%s)" ,
418+ "get_pdu(event_id=%s): skipping destination=%s because we tried it recently last_attempt=%s and we only check every %s (now=%s)" ,
419+ event_id ,
400420 destination ,
401421 last_attempt ,
402422 PDU_RETRY_TIME_MS ,
@@ -411,43 +431,48 @@ async def get_pdu(
411431 room_version = room_version ,
412432 timeout = timeout ,
413433 )
434+ pull_origin = destination
414435
415436 pdu_attempts [destination ] = now
416437
417438 if event :
418439 # Prime the cache
419- self ._get_pdu_cache [event .event_id ] = event
440+ self ._get_pdu_cache [event .event_id ] = ( event , pull_origin )
420441
421442 # Now that we have an event, we can break out of this
422443 # loop and stop asking other destinations.
423444 break
424445
446+ except NotRetryingDestination as e :
447+ logger .info ("get_pdu(event_id=%s): %s" , event_id , e )
448+ continue
449+ except FederationDeniedError :
450+ logger .info (
451+ "get_pdu(event_id=%s): Not attempting to fetch PDU from %s because the homeserver is not on our federation whitelist" ,
452+ event_id ,
453+ destination ,
454+ )
455+ continue
425456 except SynapseError as e :
426457 logger .info (
427- "Failed to get PDU %s from %s because %s" ,
458+ "get_pdu(event_id=%s): Failed to get PDU from %s because %s" ,
428459 event_id ,
429460 destination ,
430461 e ,
431462 )
432463 continue
433- except NotRetryingDestination as e :
434- logger .info (str (e ))
435- continue
436- except FederationDeniedError as e :
437- logger .info (str (e ))
438- continue
439464 except Exception as e :
440465 pdu_attempts [destination ] = now
441466
442467 logger .info (
443- "Failed to get PDU %s from %s because %s" ,
468+ "get_pdu(event_id=): Failed to get PDU from %s because %s" ,
444469 event_id ,
445470 destination ,
446471 e ,
447472 )
448473 continue
449474
450- if not event :
475+ if not event or not pull_origin :
451476 return None
452477
453478 # `event` now refers to an object stored in `get_pdu_cache`. Our
@@ -459,7 +484,7 @@ async def get_pdu(
459484 event .room_version ,
460485 )
461486
462- return event_copy
487+ return PulledPduInfo ( event_copy , pull_origin )
463488
464489 @trace
465490 @tag_args
@@ -699,12 +724,14 @@ async def _check_sigs_and_hash_and_fetch_one(
699724 pdu_origin = get_domain_from_id (pdu .sender )
700725 if not res and pdu_origin != origin :
701726 try :
702- res = await self .get_pdu (
727+ pulled_pdu_info = await self .get_pdu (
703728 destinations = [pdu_origin ],
704729 event_id = pdu .event_id ,
705730 room_version = room_version ,
706731 timeout = 10000 ,
707732 )
733+ if pulled_pdu_info is not None :
734+ res = pulled_pdu_info .pdu
708735 except SynapseError :
709736 pass
710737
@@ -806,6 +833,7 @@ async def _try_destination_list(
806833 )
807834
808835 for destination in destinations :
836+ # We don't want to ask our own server for information we don't have
809837 if destination == self .server_name :
810838 continue
811839
@@ -814,9 +842,21 @@ async def _try_destination_list(
814842 except (
815843 RequestSendFailed ,
816844 InvalidResponseError ,
817- NotRetryingDestination ,
818845 ) as e :
819846 logger .warning ("Failed to %s via %s: %s" , description , destination , e )
847+ # Skip to the next homeserver in the list to try.
848+ continue
849+ except NotRetryingDestination as e :
850+ logger .info ("%s: %s" , description , e )
851+ continue
852+ except FederationDeniedError :
853+ logger .info (
854+ "%s: Not attempting to %s from %s because the homeserver is not on our federation whitelist" ,
855+ description ,
856+ description ,
857+ destination ,
858+ )
859+ continue
820860 except UnsupportedRoomVersionError :
821861 raise
822862 except HttpResponseException as e :
@@ -1609,6 +1649,54 @@ async def send_request(
16091649 return result
16101650
16111651 async def timestamp_to_event (
1652+ self , * , destinations : List [str ], room_id : str , timestamp : int , direction : str
1653+ ) -> Optional ["TimestampToEventResponse" ]:
1654+ """
1655+ Calls each remote federating server from `destinations` asking for their closest
1656+ event to the given timestamp in the given direction until we get a response.
1657+ Also validates the response to always return the expected keys or raises an
1658+ error.
1659+
1660+ Args:
1661+ destinations: The domains of homeservers to try fetching from
1662+ room_id: Room to fetch the event from
1663+ timestamp: The point in time (inclusive) we should navigate from in
1664+ the given direction to find the closest event.
1665+ direction: ["f"|"b"] to indicate whether we should navigate forward
1666+ or backward from the given timestamp to find the closest event.
1667+
1668+ Returns:
1669+ A parsed TimestampToEventResponse including the closest event_id
1670+ and origin_server_ts or None if no destination has a response.
1671+ """
1672+
1673+ async def _timestamp_to_event_from_destination (
1674+ destination : str ,
1675+ ) -> TimestampToEventResponse :
1676+ return await self ._timestamp_to_event_from_destination (
1677+ destination , room_id , timestamp , direction
1678+ )
1679+
1680+ try :
1681+ # Loop through each homeserver candidate until we get a succesful response
1682+ timestamp_to_event_response = await self ._try_destination_list (
1683+ "timestamp_to_event" ,
1684+ destinations ,
1685+ # TODO: The requested timestamp may lie in a part of the
1686+ # event graph that the remote server *also* didn't have,
1687+ # in which case they will have returned another event
1688+ # which may be nowhere near the requested timestamp. In
1689+ # the future, we may need to reconcile that gap and ask
1690+ # other homeservers, and/or extend `/timestamp_to_event`
1691+ # to return events on *both* sides of the timestamp to
1692+ # help reconcile the gap faster.
1693+ _timestamp_to_event_from_destination ,
1694+ )
1695+ return timestamp_to_event_response
1696+ except SynapseError :
1697+ return None
1698+
1699+ async def _timestamp_to_event_from_destination (
16121700 self , destination : str , room_id : str , timestamp : int , direction : str
16131701 ) -> "TimestampToEventResponse" :
16141702 """
0 commit comments