diff --git a/changelog.d/18848.feature b/changelog.d/18848.feature new file mode 100644 index 00000000000..302a6e7b668 --- /dev/null +++ b/changelog.d/18848.feature @@ -0,0 +1 @@ +Add `get_media_upload_limits_for_user` and `on_media_upload_limit_exceeded` module API callbacks for media repository. diff --git a/docs/modules/media_repository_callbacks.md b/docs/modules/media_repository_callbacks.md index fc371304393..7c724038a7a 100644 --- a/docs/modules/media_repository_callbacks.md +++ b/docs/modules/media_repository_callbacks.md @@ -64,3 +64,68 @@ If multiple modules implement this callback, they will be considered in order. I returns `True`, Synapse falls through to the next one. The value of the first callback that returns `False` will be used. If this happens, Synapse will not call any of the subsequent implementations of this callback. + +### `get_media_upload_limits_for_user` + +_First introduced in Synapse v1.139.0_ + +```python +async def get_media_upload_limits_for_user(user_id: str, size: int) -> Optional[List[synapse.module_api.MediaUploadLimit]] +``` + +** +Caution: This callback is currently experimental. The method signature or behaviour +may change without notice. +** + +Called when processing a request to store content in the media repository. This can be used to dynamically override +the [media upload limits configuration](../usage/configuration/config_documentation.html#media_upload_limits). + +The arguments passed to this callback are: + +* `user_id`: The Matrix user ID of the user (e.g. `@alice:example.com`) making the request. + +If the callback returns a list then it will be used as the limits instead of those in the configuration (if any). + +If an empty list is returned then no limits are applied (**warning:** users will be able +to upload as much data as they desire). + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `None`, Synapse falls through to the next one. The value of the first +callback that does not return `None` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + +If there are no registered modules, or if all modules return `None`, then +the default +[media upload limits configuration](../usage/configuration/config_documentation.html#media_upload_limits) +will be used. + +### `on_media_upload_limit_exceeded` + +_First introduced in Synapse v1.139.0_ + +```python +async def on_media_upload_limit_exceeded(user_id: str, limit: synapse.module_api.MediaUploadLimit, sent_bytes: int, attempted_bytes: int) -> None +``` + +** +Caution: This callback is currently experimental. The method signature or behaviour +may change without notice. +** + +Called when a user attempts to upload media that would exceed a +[configured media upload limit](../usage/configuration/config_documentation.html#media_upload_limits). + +This callback will only be called on workers which handle +[POST /_matrix/media/v3/upload](https://spec.matrix.org/v1.15/client-server-api/#post_matrixmediav3upload) +requests. + +This could be used to inform the user that they have reached a media upload limit through +some external method. + +The arguments passed to this callback are: + +* `user_id`: The Matrix user ID of the user (e.g. `@alice:example.com`) making the request. +* `limit`: The `synapse.module_api.MediaUploadLimit` representing the limit that was reached. +* `sent_bytes`: The number of bytes already sent during the period of the limit. +* `attempted_bytes`: The number of bytes that the user attempted to send. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 68303308cd6..3c401d569bb 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -2168,9 +2168,12 @@ max_upload_size: 60M ### `media_upload_limits` *(array)* A list of media upload limits defining how much data a given user can upload in a given time period. +These limits are applied in addition to the `max_upload_size` limit above (which applies to individual uploads). An empty list means no limits are applied. +These settings can be overridden using the `get_media_upload_limits_for_user` module API [callback](../../modules/media_repository_callbacks.md#get_media_upload_limits_for_user). + Defaults to `[]`. Example configuration: diff --git a/schema/synapse-config.schema.yaml b/schema/synapse-config.schema.yaml index 83e16de397d..fdce4219ae4 100644 --- a/schema/synapse-config.schema.yaml +++ b/schema/synapse-config.schema.yaml @@ -2415,8 +2415,15 @@ properties: A list of media upload limits defining how much data a given user can upload in a given time period. + These limits are applied in addition to the `max_upload_size` limit above + (which applies to individual uploads). + An empty list means no limits are applied. + + + These settings can be overridden using the `get_media_upload_limits_for_user` + module API [callback](../../modules/media_repository_callbacks.md#get_media_upload_limits_for_user). default: [] items: time_period: diff --git a/synapse/config/repository.py b/synapse/config/repository.py index efdc5056599..e7d23740f9e 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -120,11 +120,19 @@ def parse_thumbnail_requirements( @attr.s(auto_attribs=True, slots=True, frozen=True) class MediaUploadLimit: - """A limit on the amount of data a user can upload in a given time - period.""" + """ + Represents a limit on the amount of data a user can upload in a given time + period. + + These can be configured through the `media_upload_limits` [config option](https://element-hq.github.io/synapse/latest/usage/configuration/config_documentation.html#media_upload_limits) + or via the `get_media_upload_limits_for_user` module API [callback](https://element-hq.github.io/synapse/latest/modules/media_repository_callbacks.html#get_media_upload_limits_for_user). + """ max_bytes: int + """The maximum number of bytes that can be uploaded in the given time period.""" + time_period_ms: int + """The time period in milliseconds.""" class ContentRepositoryConfig(Config): diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py index aae88d25c99..54791f43a72 100644 --- a/synapse/media/media_repository.py +++ b/synapse/media/media_repository.py @@ -179,11 +179,13 @@ def __init__(self, hs: "HomeServer"): # We get the media upload limits and sort them in descending order of # time period, so that we can apply some optimizations. - self.media_upload_limits = hs.config.media.media_upload_limits - self.media_upload_limits.sort( + self.default_media_upload_limits = hs.config.media.media_upload_limits + self.default_media_upload_limits.sort( key=lambda limit: limit.time_period_ms, reverse=True ) + self.media_repository_callbacks = hs.get_module_api_callbacks().media_repository + def _start_update_recently_accessed(self) -> Deferred: return run_as_background_process( "update_recently_accessed_media", @@ -340,16 +342,27 @@ async def create_or_update_content( # Check that the user has not exceeded any of the media upload limits. + # Use limits from module API if provided + media_upload_limits = ( + await self.media_repository_callbacks.get_media_upload_limits_for_user( + auth_user.to_string() + ) + ) + + # Otherwise use the default limits from config + if media_upload_limits is None: + # Note: the media upload limits are sorted so larger time periods are + # first. + media_upload_limits = self.default_media_upload_limits + # This is the total size of media uploaded by the user in the last # `time_period_ms` milliseconds, or None if we haven't checked yet. uploaded_media_size: Optional[int] = None - # Note: the media upload limits are sorted so larger time periods are - # first. - for limit in self.media_upload_limits: + for limit in media_upload_limits: # We only need to check the amount of media uploaded by the user in # this latest (smaller) time period if the amount of media uploaded - # in a previous (larger) time period is above the limit. + # in a previous (larger) time period is below the limit. # # This optimization means that in the common case where the user # hasn't uploaded much media, we only need to query the database @@ -363,6 +376,12 @@ async def create_or_update_content( ) if uploaded_media_size + content_length > limit.max_bytes: + await self.media_repository_callbacks.on_media_upload_limit_exceeded( + user_id=auth_user.to_string(), + limit=limit, + sent_bytes=uploaded_media_size, + attempted_bytes=content_length, + ) raise SynapseError( 400, "Media upload limit exceeded", Codes.RESOURCE_LIMIT_EXCEEDED ) diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 9309aa93942..62181355135 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -50,6 +50,7 @@ from synapse.api.errors import SynapseError from synapse.api.presence import UserPresenceState from synapse.config import ConfigError +from synapse.config.repository import MediaUploadLimit from synapse.events import EventBase from synapse.events.presence_router import ( GET_INTERESTED_USERS_CALLBACK, @@ -94,7 +95,9 @@ ) from synapse.module_api.callbacks.media_repository_callbacks import ( GET_MEDIA_CONFIG_FOR_USER_CALLBACK, + GET_MEDIA_UPLOAD_LIMITS_FOR_USER_CALLBACK, IS_USER_ALLOWED_TO_UPLOAD_MEDIA_OF_SIZE_CALLBACK, + ON_MEDIA_UPLOAD_LIMIT_EXCEEDED_CALLBACK, ) from synapse.module_api.callbacks.ratelimit_callbacks import ( GET_RATELIMIT_OVERRIDE_FOR_USER_CALLBACK, @@ -205,6 +208,7 @@ "RoomAlias", "UserProfile", "RatelimitOverride", + "MediaUploadLimit", ] logger = logging.getLogger(__name__) @@ -462,6 +466,12 @@ def register_media_repository_callbacks( is_user_allowed_to_upload_media_of_size: Optional[ IS_USER_ALLOWED_TO_UPLOAD_MEDIA_OF_SIZE_CALLBACK ] = None, + get_media_upload_limits_for_user: Optional[ + GET_MEDIA_UPLOAD_LIMITS_FOR_USER_CALLBACK + ] = None, + on_media_upload_limit_exceeded: Optional[ + ON_MEDIA_UPLOAD_LIMIT_EXCEEDED_CALLBACK + ] = None, ) -> None: """Registers callbacks for media repository capabilities. Added in Synapse v1.132.0. @@ -469,6 +479,8 @@ def register_media_repository_callbacks( return self._callbacks.media_repository.register_callbacks( get_media_config_for_user=get_media_config_for_user, is_user_allowed_to_upload_media_of_size=is_user_allowed_to_upload_media_of_size, + get_media_upload_limits_for_user=get_media_upload_limits_for_user, + on_media_upload_limit_exceeded=on_media_upload_limit_exceeded, ) def register_third_party_rules_callbacks( diff --git a/synapse/module_api/callbacks/media_repository_callbacks.py b/synapse/module_api/callbacks/media_repository_callbacks.py index 2ab65f9fd64..7d3aed9d666 100644 --- a/synapse/module_api/callbacks/media_repository_callbacks.py +++ b/synapse/module_api/callbacks/media_repository_callbacks.py @@ -15,6 +15,7 @@ import logging from typing import TYPE_CHECKING, Awaitable, Callable, List, Optional +from synapse.config.repository import MediaUploadLimit from synapse.types import JsonDict from synapse.util.async_helpers import delay_cancellation from synapse.util.metrics import Measure @@ -28,6 +29,14 @@ IS_USER_ALLOWED_TO_UPLOAD_MEDIA_OF_SIZE_CALLBACK = Callable[[str, int], Awaitable[bool]] +GET_MEDIA_UPLOAD_LIMITS_FOR_USER_CALLBACK = Callable[ + [str], Awaitable[Optional[List[MediaUploadLimit]]] +] + +ON_MEDIA_UPLOAD_LIMIT_EXCEEDED_CALLBACK = Callable[ + [str, MediaUploadLimit, int, int], Awaitable[None] +] + class MediaRepositoryModuleApiCallbacks: def __init__(self, hs: "HomeServer") -> None: @@ -39,6 +48,12 @@ def __init__(self, hs: "HomeServer") -> None: self._is_user_allowed_to_upload_media_of_size_callbacks: List[ IS_USER_ALLOWED_TO_UPLOAD_MEDIA_OF_SIZE_CALLBACK ] = [] + self._get_media_upload_limits_for_user_callbacks: List[ + GET_MEDIA_UPLOAD_LIMITS_FOR_USER_CALLBACK + ] = [] + self._on_media_upload_limit_exceeded_callbacks: List[ + ON_MEDIA_UPLOAD_LIMIT_EXCEEDED_CALLBACK + ] = [] def register_callbacks( self, @@ -46,6 +61,12 @@ def register_callbacks( is_user_allowed_to_upload_media_of_size: Optional[ IS_USER_ALLOWED_TO_UPLOAD_MEDIA_OF_SIZE_CALLBACK ] = None, + get_media_upload_limits_for_user: Optional[ + GET_MEDIA_UPLOAD_LIMITS_FOR_USER_CALLBACK + ] = None, + on_media_upload_limit_exceeded: Optional[ + ON_MEDIA_UPLOAD_LIMIT_EXCEEDED_CALLBACK + ] = None, ) -> None: """Register callbacks from module for each hook.""" if get_media_config_for_user is not None: @@ -56,6 +77,16 @@ def register_callbacks( is_user_allowed_to_upload_media_of_size ) + if get_media_upload_limits_for_user is not None: + self._get_media_upload_limits_for_user_callbacks.append( + get_media_upload_limits_for_user + ) + + if on_media_upload_limit_exceeded is not None: + self._on_media_upload_limit_exceeded_callbacks.append( + on_media_upload_limit_exceeded + ) + async def get_media_config_for_user(self, user_id: str) -> Optional[JsonDict]: for callback in self._get_media_config_for_user_callbacks: with Measure( @@ -83,3 +114,47 @@ async def is_user_allowed_to_upload_media_of_size( return res return True + + async def get_media_upload_limits_for_user( + self, user_id: str + ) -> Optional[List[MediaUploadLimit]]: + """ + Get the first non-None list of MediaUploadLimits for the user from the registered callbacks. + If a list is returned it will be sorted in descending order of duration. + """ + for callback in self._get_media_upload_limits_for_user_callbacks: + with Measure( + self.clock, + name=f"{callback.__module__}.{callback.__qualname__}", + server_name=self.server_name, + ): + res: Optional[List[MediaUploadLimit]] = await delay_cancellation( + callback(user_id) + ) + if res is not None: # to allow [] to be returned meaning no limit + # We sort them in descending order of time period + res.sort(key=lambda limit: limit.time_period_ms, reverse=True) + return res + + return None + + async def on_media_upload_limit_exceeded( + self, + user_id: str, + limit: MediaUploadLimit, + sent_bytes: int, + attempted_bytes: int, + ) -> None: + for callback in self._on_media_upload_limit_exceeded_callbacks: + with Measure( + self.clock, + name=f"{callback.__module__}.{callback.__qualname__}", + server_name=self.server_name, + ): + # Use a copy of the data in case the module modifies it + limit_copy = MediaUploadLimit( + max_bytes=limit.max_bytes, time_period_ms=limit.time_period_ms + ) + await delay_cancellation( + callback(user_id, limit_copy, sent_bytes, attempted_bytes) + ) diff --git a/tests/rest/client/test_media.py b/tests/rest/client/test_media.py index e6ed47f83a1..ec6760feea0 100644 --- a/tests/rest/client/test_media.py +++ b/tests/rest/client/test_media.py @@ -46,6 +46,7 @@ from synapse.api.errors import HttpResponseException from synapse.api.ratelimiting import Ratelimiter +from synapse.config._base import Config from synapse.config.oembed import OEmbedEndpointConfig from synapse.http.client import MultipartResponse from synapse.http.types import QueryParams @@ -53,6 +54,7 @@ from synapse.media._base import FileInfo, ThumbnailInfo from synapse.media.thumbnailer import ThumbnailProvider from synapse.media.url_previewer import IMAGE_CACHE_EXPIRY_MS +from synapse.module_api import MediaUploadLimit from synapse.rest import admin from synapse.rest.client import login, media from synapse.server import HomeServer @@ -2967,3 +2969,192 @@ def test_over_weekly_limit(self) -> None: # This will succeed as the weekly limit has reset channel = self.upload_media(900) self.assertEqual(channel.code, 200) + + +class MediaUploadLimitsModuleOverrides(unittest.HomeserverTestCase): + """ + This test case simulates a homeserver with media upload limits being overridden by the module API. + """ + + servlets = [ + media.register_servlets, + login.register_servlets, + admin.register_servlets, + ] + + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: + config = self.default_config() + + self.storage_path = self.mktemp() + self.media_store_path = self.mktemp() + os.mkdir(self.storage_path) + os.mkdir(self.media_store_path) + config["media_store_path"] = self.media_store_path + + provider_config = { + "module": "synapse.media.storage_provider.FileStorageProviderBackend", + "store_local": True, + "store_synchronous": False, + "store_remote": True, + "config": {"directory": self.storage_path}, + } + + config["media_storage_providers"] = [provider_config] + + # default limits to use + config["media_upload_limits"] = [ + {"time_period": "1d", "max_size": "1K"}, + {"time_period": "1w", "max_size": "3K"}, + ] + + return self.setup_test_homeserver(config=config) + + async def _get_media_upload_limits_for_user( + self, + user_id: str, + ) -> Optional[List[MediaUploadLimit]]: + # user1 has custom limits + if user_id == self.user1: + # n.b. we return these in increasing duration order and Synapse will need to sort them correctly + return [ + MediaUploadLimit( + time_period_ms=Config.parse_duration("1d"), max_bytes=5000 + ), + MediaUploadLimit( + time_period_ms=Config.parse_duration("1w"), max_bytes=15000 + ), + ] + # user2 has no limits + if user_id == self.user2: + return [] + # otherwise use default + return None + + async def _on_media_upload_limit_exceeded( + self, + user_id: str, + limit: MediaUploadLimit, + sent_bytes: int, + attempted_bytes: int, + ) -> None: + self.last_media_upload_limit_exceeded: Optional[dict[str, object]] = { + "user_id": user_id, + "limit": limit, + "sent_bytes": sent_bytes, + "attempted_bytes": attempted_bytes, + } + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.repo = hs.get_media_repository() + self.client = hs.get_federation_http_client() + self.store = hs.get_datastores().main + self.user1 = self.register_user("user1", "pass") + self.tok1 = self.login("user1", "pass") + self.user2 = self.register_user("user2", "pass") + self.tok2 = self.login("user2", "pass") + self.user3 = self.register_user("user3", "pass") + self.tok3 = self.login("user3", "pass") + self.last_media_upload_limit_exceeded = None + self.hs.get_module_api().register_media_repository_callbacks( + get_media_upload_limits_for_user=self._get_media_upload_limits_for_user, + on_media_upload_limit_exceeded=self._on_media_upload_limit_exceeded, + ) + + def create_resource_dict(self) -> Dict[str, Resource]: + resources = super().create_resource_dict() + resources["/_matrix/media"] = self.hs.get_media_repository_resource() + return resources + + def upload_media(self, size: int, tok: str) -> FakeChannel: + """Helper to upload media of a given size with a given token.""" + return self.make_request( + "POST", + "/_matrix/media/v3/upload", + content=b"0" * size, + access_token=tok, + shorthand=False, + content_type=b"text/plain", + custom_headers=[("Content-Length", str(size))], + ) + + def test_upload_under_limit(self) -> None: + """Test that uploading media under the limit works.""" + + # User 1 uploads 100 bytes + channel = self.upload_media(100, self.tok1) + self.assertEqual(channel.code, 200) + + # User 2 (unlimited) uploads 100 bytes + channel = self.upload_media(100, self.tok2) + self.assertEqual(channel.code, 200) + + # User 3 (default) uploads 100 bytes + channel = self.upload_media(100, self.tok3) + self.assertEqual(channel.code, 200) + + self.assertEqual(self.last_media_upload_limit_exceeded, None) + + def test_uses_custom_limit(self) -> None: + """Test that uploading media over the module provided daily limit fails.""" + + # User 1 uploads 3000 bytes + channel = self.upload_media(3000, self.tok1) + self.assertEqual(channel.code, 200) + + # User 1 attempts to upload 4000 bytes taking it over the limit + channel = self.upload_media(4000, self.tok1) + self.assertEqual(channel.code, 400) + assert self.last_media_upload_limit_exceeded is not None + self.assertEqual(self.last_media_upload_limit_exceeded["user_id"], self.user1) + self.assertEqual( + self.last_media_upload_limit_exceeded["limit"], + MediaUploadLimit( + max_bytes=5000, time_period_ms=Config.parse_duration("1d") + ), + ) + self.assertEqual(self.last_media_upload_limit_exceeded["sent_bytes"], 3000) + self.assertEqual(self.last_media_upload_limit_exceeded["attempted_bytes"], 4000) + + # User 1 attempts to upload 20000 bytes which is over the weekly limit + # This tests that the limits have been sorted as expected + channel = self.upload_media(20000, self.tok1) + self.assertEqual(channel.code, 400) + assert self.last_media_upload_limit_exceeded is not None + self.assertEqual(self.last_media_upload_limit_exceeded["user_id"], self.user1) + self.assertEqual( + self.last_media_upload_limit_exceeded["limit"], + MediaUploadLimit( + max_bytes=15000, time_period_ms=Config.parse_duration("1w") + ), + ) + self.assertEqual(self.last_media_upload_limit_exceeded["sent_bytes"], 3000) + self.assertEqual( + self.last_media_upload_limit_exceeded["attempted_bytes"], 20000 + ) + + def test_uses_unlimited(self) -> None: + """Test that unlimited user is not limited when module returns [].""" + # User 2 uploads 10000 bytes which is over the default limit + channel = self.upload_media(10000, self.tok2) + self.assertEqual(channel.code, 200) + self.assertEqual(self.last_media_upload_limit_exceeded, None) + + def test_uses_defaults(self) -> None: + """Test that the default limits are applied when module returned None.""" + # User 3 uploads 500 bytes + channel = self.upload_media(500, self.tok3) + self.assertEqual(channel.code, 200) + + # User 3 uploads 800 bytes which is over the limit + channel = self.upload_media(800, self.tok3) + self.assertEqual(channel.code, 400) + assert self.last_media_upload_limit_exceeded is not None + self.assertEqual(self.last_media_upload_limit_exceeded["user_id"], self.user3) + self.assertEqual( + self.last_media_upload_limit_exceeded["limit"], + MediaUploadLimit( + max_bytes=1024, time_period_ms=Config.parse_duration("1d") + ), + ) + self.assertEqual(self.last_media_upload_limit_exceeded["sent_bytes"], 500) + self.assertEqual(self.last_media_upload_limit_exceeded["attempted_bytes"], 800)