Skip to content

Commit 429ecb7

Browse files
authored
Handle remote download responses with UNKNOWN_LENGTH more gracefully (#17439)
Prior to this PR, remote downloads which did not provide a `content-length` were decremented from the remote download ratelimiter at the max allowable size, leading to excessive ratelimiting - see #17394. This PR adds a linearizer to limit concurrent remote downloads to 6 per IP address, and decrements remote downloads without a `content-length` from the ratelimiter *after* the download is complete and the response length is known. Also adds logic to ensure that responses with a known length respect the `max_download_size`.
1 parent 9e1acea commit 429ecb7

File tree

4 files changed

+166
-60
lines changed

4 files changed

+166
-60
lines changed

changelog.d/17439.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Limit concurrent remote downloads to 6 per IP address, and decrement remote downloads without a content-length from the ratelimiter after the download is complete.

synapse/http/matrixfederationclient.py

Lines changed: 80 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@
9090
from synapse.logging.opentracing import set_tag, start_active_span, tags
9191
from synapse.types import JsonDict
9292
from synapse.util import json_decoder
93-
from synapse.util.async_helpers import AwakenableSleeper, timeout_deferred
93+
from synapse.util.async_helpers import AwakenableSleeper, Linearizer, timeout_deferred
9494
from synapse.util.metrics import Measure
9595
from synapse.util.stringutils import parse_and_validate_server_name
9696

@@ -475,6 +475,8 @@ def __init__(
475475
use_proxy=True,
476476
)
477477

478+
self.remote_download_linearizer = Linearizer("remote_download_linearizer", 6)
479+
478480
def wake_destination(self, destination: str) -> None:
479481
"""Called when the remote server may have come back online."""
480482

@@ -1486,35 +1488,44 @@ async def get_file(
14861488
)
14871489

14881490
headers = dict(response.headers.getAllRawHeaders())
1489-
14901491
expected_size = response.length
1491-
# if we don't get an expected length then use the max length
1492+
14921493
if expected_size == UNKNOWN_LENGTH:
14931494
expected_size = max_size
1494-
logger.debug(
1495-
f"File size unknown, assuming file is max allowable size: {max_size}"
1496-
)
1495+
else:
1496+
if int(expected_size) > max_size:
1497+
msg = "Requested file is too large > %r bytes" % (max_size,)
1498+
logger.warning(
1499+
"{%s} [%s] %s",
1500+
request.txn_id,
1501+
request.destination,
1502+
msg,
1503+
)
1504+
raise SynapseError(HTTPStatus.BAD_GATEWAY, msg, Codes.TOO_LARGE)
14971505

1498-
read_body, _ = await download_ratelimiter.can_do_action(
1499-
requester=None,
1500-
key=ip_address,
1501-
n_actions=expected_size,
1502-
)
1503-
if not read_body:
1504-
msg = "Requested file size exceeds ratelimits"
1505-
logger.warning(
1506-
"{%s} [%s] %s",
1507-
request.txn_id,
1508-
request.destination,
1509-
msg,
1506+
read_body, _ = await download_ratelimiter.can_do_action(
1507+
requester=None,
1508+
key=ip_address,
1509+
n_actions=expected_size,
15101510
)
1511-
raise SynapseError(HTTPStatus.TOO_MANY_REQUESTS, msg, Codes.LIMIT_EXCEEDED)
1511+
if not read_body:
1512+
msg = "Requested file size exceeds ratelimits"
1513+
logger.warning(
1514+
"{%s} [%s] %s",
1515+
request.txn_id,
1516+
request.destination,
1517+
msg,
1518+
)
1519+
raise SynapseError(
1520+
HTTPStatus.TOO_MANY_REQUESTS, msg, Codes.LIMIT_EXCEEDED
1521+
)
15121522

15131523
try:
1514-
# add a byte of headroom to max size as function errs at >=
1515-
d = read_body_with_max_size(response, output_stream, expected_size + 1)
1516-
d.addTimeout(self.default_timeout_seconds, self.reactor)
1517-
length = await make_deferred_yieldable(d)
1524+
async with self.remote_download_linearizer.queue(ip_address):
1525+
# add a byte of headroom to max size as function errs at >=
1526+
d = read_body_with_max_size(response, output_stream, expected_size + 1)
1527+
d.addTimeout(self.default_timeout_seconds, self.reactor)
1528+
length = await make_deferred_yieldable(d)
15181529
except BodyExceededMaxSize:
15191530
msg = "Requested file is too large > %r bytes" % (expected_size,)
15201531
logger.warning(
@@ -1560,6 +1571,13 @@ async def get_file(
15601571
request.method,
15611572
request.uri.decode("ascii"),
15621573
)
1574+
1575+
# if we didn't know the length upfront, decrement the actual size from ratelimiter
1576+
if response.length == UNKNOWN_LENGTH:
1577+
download_ratelimiter.record_action(
1578+
requester=None, key=ip_address, n_actions=length
1579+
)
1580+
15631581
return length, headers
15641582

15651583
async def federation_get_file(
@@ -1630,29 +1648,37 @@ async def federation_get_file(
16301648
)
16311649

16321650
headers = dict(response.headers.getAllRawHeaders())
1633-
16341651
expected_size = response.length
1635-
# if we don't get an expected length then use the max length
1652+
16361653
if expected_size == UNKNOWN_LENGTH:
16371654
expected_size = max_size
1638-
logger.debug(
1639-
f"File size unknown, assuming file is max allowable size: {max_size}"
1640-
)
1655+
else:
1656+
if int(expected_size) > max_size:
1657+
msg = "Requested file is too large > %r bytes" % (max_size,)
1658+
logger.warning(
1659+
"{%s} [%s] %s",
1660+
request.txn_id,
1661+
request.destination,
1662+
msg,
1663+
)
1664+
raise SynapseError(HTTPStatus.BAD_GATEWAY, msg, Codes.TOO_LARGE)
16411665

1642-
read_body, _ = await download_ratelimiter.can_do_action(
1643-
requester=None,
1644-
key=ip_address,
1645-
n_actions=expected_size,
1646-
)
1647-
if not read_body:
1648-
msg = "Requested file size exceeds ratelimits"
1649-
logger.warning(
1650-
"{%s} [%s] %s",
1651-
request.txn_id,
1652-
request.destination,
1653-
msg,
1666+
read_body, _ = await download_ratelimiter.can_do_action(
1667+
requester=None,
1668+
key=ip_address,
1669+
n_actions=expected_size,
16541670
)
1655-
raise SynapseError(HTTPStatus.TOO_MANY_REQUESTS, msg, Codes.LIMIT_EXCEEDED)
1671+
if not read_body:
1672+
msg = "Requested file size exceeds ratelimits"
1673+
logger.warning(
1674+
"{%s} [%s] %s",
1675+
request.txn_id,
1676+
request.destination,
1677+
msg,
1678+
)
1679+
raise SynapseError(
1680+
HTTPStatus.TOO_MANY_REQUESTS, msg, Codes.LIMIT_EXCEEDED
1681+
)
16561682

16571683
# this should be a multipart/mixed response with the boundary string in the header
16581684
try:
@@ -1672,11 +1698,12 @@ async def federation_get_file(
16721698
raise SynapseError(HTTPStatus.BAD_GATEWAY, msg)
16731699

16741700
try:
1675-
# add a byte of headroom to max size as `_MultipartParserProtocol.dataReceived` errs at >=
1676-
deferred = read_multipart_response(
1677-
response, output_stream, boundary, expected_size + 1
1678-
)
1679-
deferred.addTimeout(self.default_timeout_seconds, self.reactor)
1701+
async with self.remote_download_linearizer.queue(ip_address):
1702+
# add a byte of headroom to max size as `_MultipartParserProtocol.dataReceived` errs at >=
1703+
deferred = read_multipart_response(
1704+
response, output_stream, boundary, expected_size + 1
1705+
)
1706+
deferred.addTimeout(self.default_timeout_seconds, self.reactor)
16801707
except BodyExceededMaxSize:
16811708
msg = "Requested file is too large > %r bytes" % (expected_size,)
16821709
logger.warning(
@@ -1743,6 +1770,13 @@ async def federation_get_file(
17431770
request.method,
17441771
request.uri.decode("ascii"),
17451772
)
1773+
1774+
# if we didn't know the length upfront, decrement the actual size from ratelimiter
1775+
if response.length == UNKNOWN_LENGTH:
1776+
download_ratelimiter.record_action(
1777+
requester=None, key=ip_address, n_actions=length
1778+
)
1779+
17461780
return length, headers, multipart_response.json
17471781

17481782

tests/media/test_media_storage.py

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,13 +1057,15 @@ async def _send_request(*args: Any, **kwargs: Any) -> IResponse:
10571057
)
10581058
assert channel.code == 200
10591059

1060+
@override_config({"remote_media_download_burst_count": "87M"})
10601061
@patch(
10611062
"synapse.http.matrixfederationclient.read_body_with_max_size",
10621063
read_body_with_max_size_30MiB,
10631064
)
1064-
def test_download_ratelimit_max_size_sub(self) -> None:
1065+
def test_download_ratelimit_unknown_length(self) -> None:
10651066
"""
1066-
Test that if no content-length is provided, the default max size is applied instead
1067+
Test that if no content-length is provided, ratelimit will still be applied after
1068+
download once length is known
10671069
"""
10681070

10691071
# mock out actually sending the request
@@ -1077,19 +1079,48 @@ async def _send_request(*args: Any, **kwargs: Any) -> IResponse:
10771079

10781080
self.client._send_request = _send_request # type: ignore
10791081

1080-
# ten requests should go through using the max size (500MB/50MB)
1081-
for i in range(10):
1082-
channel2 = self.make_request(
1082+
# 3 requests should go through (note 3rd one would technically violate ratelimit but
1083+
# is applied *after* download - the next one will be ratelimited)
1084+
for i in range(3):
1085+
channel = self.make_request(
10831086
"GET",
10841087
f"/_matrix/media/v3/download/remote.org/abcdefghijklmnopqrstuvwxy{i}",
10851088
shorthand=False,
10861089
)
1087-
assert channel2.code == 200
1090+
assert channel.code == 200
10881091

1089-
# eleventh will hit ratelimit
1090-
channel3 = self.make_request(
1092+
# 4th will hit ratelimit
1093+
channel2 = self.make_request(
10911094
"GET",
10921095
"/_matrix/media/v3/download/remote.org/abcdefghijklmnopqrstuvwxyx",
10931096
shorthand=False,
10941097
)
1095-
assert channel3.code == 429
1098+
assert channel2.code == 429
1099+
1100+
@override_config({"max_upload_size": "29M"})
1101+
@patch(
1102+
"synapse.http.matrixfederationclient.read_body_with_max_size",
1103+
read_body_with_max_size_30MiB,
1104+
)
1105+
def test_max_download_respected(self) -> None:
1106+
"""
1107+
Test that the max download size is enforced - note that max download size is determined
1108+
by the max_upload_size
1109+
"""
1110+
1111+
# mock out actually sending the request
1112+
async def _send_request(*args: Any, **kwargs: Any) -> IResponse:
1113+
resp = MagicMock(spec=IResponse)
1114+
resp.code = 200
1115+
resp.length = 31457280
1116+
resp.headers = Headers({"Content-Type": ["application/octet-stream"]})
1117+
resp.phrase = b"OK"
1118+
return resp
1119+
1120+
self.client._send_request = _send_request # type: ignore
1121+
1122+
channel = self.make_request(
1123+
"GET", "/_matrix/media/v3/download/remote.org/abcd", shorthand=False
1124+
)
1125+
assert channel.code == 502
1126+
assert channel.json_body["errcode"] == "M_TOO_LARGE"

tests/rest/client/test_media.py

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1809,13 +1809,19 @@ async def _send_request(*args: Any, **kwargs: Any) -> IResponse:
18091809
)
18101810
assert channel.code == 200
18111811

1812+
@override_config(
1813+
{
1814+
"remote_media_download_burst_count": "87M",
1815+
}
1816+
)
18121817
@patch(
18131818
"synapse.http.matrixfederationclient.read_multipart_response",
18141819
read_multipart_response_30MiB,
18151820
)
1816-
def test_download_ratelimit_max_size_sub(self) -> None:
1821+
def test_download_ratelimit_unknown_length(self) -> None:
18171822
"""
1818-
Test that if no content-length is provided, the default max size is applied instead
1823+
Test that if no content-length is provided, ratelimiting is still applied after
1824+
media is downloaded and length is known
18191825
"""
18201826

18211827
# mock out actually sending the request
@@ -1831,8 +1837,9 @@ async def _send_request(*args: Any, **kwargs: Any) -> IResponse:
18311837

18321838
self.client._send_request = _send_request # type: ignore
18331839

1834-
# ten requests should go through using the max size (500MB/50MB)
1835-
for i in range(10):
1840+
# first 3 will go through (note that 3rd request technically violates rate limit but
1841+
# that since the ratelimiting is applied *after* download it goes through, but next one fails)
1842+
for i in range(3):
18361843
channel2 = self.make_request(
18371844
"GET",
18381845
f"/_matrix/client/v1/media/download/remote.org/abc{i}",
@@ -1841,7 +1848,7 @@ async def _send_request(*args: Any, **kwargs: Any) -> IResponse:
18411848
)
18421849
assert channel2.code == 200
18431850

1844-
# eleventh will hit ratelimit
1851+
# 4th will hit ratelimit
18451852
channel3 = self.make_request(
18461853
"GET",
18471854
"/_matrix/client/v1/media/download/remote.org/abcd",
@@ -1850,6 +1857,39 @@ async def _send_request(*args: Any, **kwargs: Any) -> IResponse:
18501857
)
18511858
assert channel3.code == 429
18521859

1860+
@override_config({"max_upload_size": "29M"})
1861+
@patch(
1862+
"synapse.http.matrixfederationclient.read_multipart_response",
1863+
read_multipart_response_30MiB,
1864+
)
1865+
def test_max_download_respected(self) -> None:
1866+
"""
1867+
Test that the max download size is enforced - note that max download size is determined
1868+
by the max_upload_size
1869+
"""
1870+
1871+
# mock out actually sending the request, returns a 30MiB response
1872+
async def _send_request(*args: Any, **kwargs: Any) -> IResponse:
1873+
resp = MagicMock(spec=IResponse)
1874+
resp.code = 200
1875+
resp.length = 31457280
1876+
resp.headers = Headers(
1877+
{"Content-Type": ["multipart/mixed; boundary=gc0p4Jq0M2Yt08jU534c0p"]}
1878+
)
1879+
resp.phrase = b"OK"
1880+
return resp
1881+
1882+
self.client._send_request = _send_request # type: ignore
1883+
1884+
channel = self.make_request(
1885+
"GET",
1886+
"/_matrix/client/v1/media/download/remote.org/abcd",
1887+
shorthand=False,
1888+
access_token=self.tok,
1889+
)
1890+
assert channel.code == 502
1891+
assert channel.json_body["errcode"] == "M_TOO_LARGE"
1892+
18531893
def test_file_download(self) -> None:
18541894
content = io.BytesIO(b"file_to_stream")
18551895
content_uri = self.get_success(

0 commit comments

Comments
 (0)