Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/18911.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add an Admin API to query a piece of local or cached remote media by ID.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the motivating use case for this API?

Why is it useful?

How do I get a origin/media_id to use with this API?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MadLittleMods thanks for the review! I've re-written the newsfile - if you are happy with the changes, could you hit the merge button for me?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original newsfile was good. I was just asking about the background and context of the PR itself.

Can you expand on that?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah - motivating case for this is to allow server admins to more easily investigate media on their server, and provide more in-depth information about a piece of media that may have been reported or otherwise flagged. Adding this as an API also allows moderation tooling to built against it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm assuming from the reports, it's easy and convenient to grab the origin/media_id?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, reports will generally either directly provide an mxc, which contains origin/media_id, or they will provide an event id for an event which contains an mxc which the origin/media_id can be pulled from.

34 changes: 34 additions & 0 deletions docs/admin_api/media_admin_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,40 @@ the use of the
[List media uploaded by a user](user_admin_api.md#list-media-uploaded-by-a-user)
Admin API.

## Query a piece of media by ID

This API returns information about a piece of local or cached remote media given the origin server name and media id. If
information is requested for remote media which is not cached the endpoint will return 404.

Request:
```http
GET /_synapse/admin/v1/media/<origin>/<media_id>
```

The API returns a JSON body with media info like the following:

Response:
```json
{
"media_info": {
"media_origin": "remote.com",
"user_id": null,
"media_id": "sdginwegWEG",
"media_type": "img/png",
"media_length": 67,
"upload_name": "test.png",
"created_ts": 300,
"filesystem_id": "wgeweg",
"url_cache": null,
"last_access_ts": 400,
"quarantined_by": null,
"authenticated": false,
"safe_from_quarantine": null,
"sha256": "ebf4f635a17d10d6eb46ba680b70142419aa3220f228001a036d311a22ee9d2a",
}
}
```

# Quarantine media

Quarantining media means that it is marked as inaccessible by users. It applies
Expand Down
17 changes: 17 additions & 0 deletions synapse/media/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,23 @@ def respond_not_yet_uploaded(self, request: SynapseRequest) -> None:
send_cors=True,
)

async def get_cached_remote_media_info(
self, origin: str, media_id: str
) -> Optional[RemoteMedia]:
"""
Get cached remote media info for a given origin/media ID combo. If the requested
media is not found locally, it will not be requested over federation and the
call will return None.

Args:
origin: The origin of the remote media
media_id: The media ID of the requested content

Returns:
The info for the cached remote media or None if it was not found
"""
return await self.store.get_cached_remote_media(origin, media_id)

async def get_local_media_info(
self, request: SynapseRequest, media_id: str, max_timeout_ms: int
) -> Optional[LocalMedia]:
Expand Down
72 changes: 70 additions & 2 deletions synapse/rest/admin/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
# [This file includes modifications made by New Vector Limited]
#
#

import logging
from http import HTTPStatus
from typing import TYPE_CHECKING, Optional, Tuple
Expand All @@ -41,7 +40,9 @@
assert_requester_is_admin,
assert_user_is_admin,
)
from synapse.storage.databases.main.media_repository import MediaSortOrder
from synapse.storage.databases.main.media_repository import (
MediaSortOrder,
)
from synapse.types import JsonDict, UserID

if TYPE_CHECKING:
Expand All @@ -50,6 +51,72 @@
logger = logging.getLogger(__name__)


class QueryMediaById(RestServlet):
"""
Fetch info about a piece of local or cached remote media.
"""

PATTERNS = admin_patterns("/media/(?P<server_name>[^/]*)/(?P<media_id>[^/]*)$")

def __init__(self, hs: "HomeServer"):
self.store = hs.get_datastores().main
self.auth = hs.get_auth()
self.server_name = hs.hostname
self.hs = hs
self.media_repo = hs.get_media_repository()

async def on_GET(
self, request: SynapseRequest, server_name: str, media_id: str
) -> Tuple[int, JsonDict]:
requester = await self.auth.get_user_by_req(request)
await assert_user_is_admin(self.auth, requester)

if not self.hs.is_mine_server_name(server_name):
remote_media_info = await self.media_repo.get_cached_remote_media_info(
server_name, media_id
)
if remote_media_info is None:
raise NotFoundError("Unknown media")
resp = {
"media_origin": remote_media_info.media_origin,
"user_id": None,
"media_id": remote_media_info.media_id,
"media_type": remote_media_info.media_type,
"media_length": remote_media_info.media_length,
"upload_name": remote_media_info.upload_name,
"created_ts": remote_media_info.created_ts,
"filesystem_id": remote_media_info.filesystem_id,
"url_cache": None,
"last_access_ts": remote_media_info.last_access_ts,
"quarantined_by": remote_media_info.quarantined_by,
"authenticated": remote_media_info.authenticated,
"safe_from_quarantine": None,
"sha256": remote_media_info.sha256,
}
else:
local_media_info = await self.store.get_local_media(media_id)
if local_media_info is None:
raise NotFoundError("Unknown media")
resp = {
"media_origin": None,
"user_id": local_media_info.user_id,
"media_id": local_media_info.media_id,
"media_type": local_media_info.media_type,
"media_length": local_media_info.media_length,
"upload_name": local_media_info.upload_name,
"created_ts": local_media_info.created_ts,
"filesystem_id": None,
"url_cache": local_media_info.url_cache,
"last_access_ts": local_media_info.last_access_ts,
"quarantined_by": local_media_info.quarantined_by,
"authenticated": local_media_info.authenticated,
"safe_from_quarantine": local_media_info.safe_from_quarantine,
"sha256": local_media_info.sha256,
}

return HTTPStatus.OK, {"media_info": resp}


class QuarantineMediaInRoom(RestServlet):
"""Quarantines all media in a room so that no one can download it via
this server.
Expand Down Expand Up @@ -470,3 +537,4 @@ def register_servlets_for_media_repo(hs: "HomeServer", http_server: HttpServer)
DeleteMediaByDateSize(hs).register(http_server)
DeleteMediaByID(hs).register(http_server)
UserMediaRestServlet(hs).register(http_server)
QueryMediaById(hs).register(http_server)
172 changes: 167 additions & 5 deletions tests/rest/admin/test_media.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@

import synapse.rest.admin
from synapse.api.errors import Codes
from synapse.media._base import FileInfo
from synapse.media.filepath import MediaFilePaths
from synapse.rest.client import login, profile, room
from synapse.rest.client import login, media, profile, room
from synapse.server import HomeServer
from synapse.util import Clock

Expand All @@ -47,6 +48,7 @@ class _AdminMediaTests(unittest.HomeserverTestCase):
synapse.rest.admin.register_servlets,
synapse.rest.admin.register_servlets_for_media_repo,
login.register_servlets,
media.register_servlets,
]

def create_resource_dict(self) -> Dict[str, Resource]:
Expand All @@ -55,6 +57,166 @@ def create_resource_dict(self) -> Dict[str, Resource]:
return resources


class QueryMediaByIDTestCase(_AdminMediaTests):
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self.hs = hs
self.clock = clock
self.server_name = hs.hostname
self.store = hs.get_datastores().main

self.admin_user = self.register_user("admin", "pass", admin=True)
self.admin_user_tok = self.login("admin", "pass")

self.filepaths = MediaFilePaths(hs.config.media.media_store_path)

def _cache_remote_media(self, file_id: str) -> None:
file_info = FileInfo(server_name="remote.com", file_id=file_id)

media_storage = self.hs.get_media_repository().media_storage

ctx = media_storage.store_into_file(file_info)
(f, fname) = self.get_success(ctx.__aenter__())
f.write(SMALL_PNG)
self.get_success(ctx.__aexit__(None, None, None))

self.get_success(
self.store.store_cached_remote_media(
origin="remote.com",
media_id=file_id,
media_type="image/png",
media_length=len(SMALL_PNG),
time_now_ms=self.clock.time_msec(),
upload_name="test.png",
filesystem_id=file_id,
sha256=file_id,
)
)

channel = self.make_request(
"GET",
f"/_matrix/client/v1/media/download/remote.com/{file_id}",
shorthand=False,
access_token=self.admin_user_tok,
)

# Should be successful
self.assertEqual(
200,
channel.code,
msg=("Expected to receive a 200 on accessing media"),
)

def test_no_auth(self) -> None:
"""
Try to query media without authentication.
"""
url = f"/_synapse/admin/v1/media/{self.server_name}/12345"
channel = self.make_request("GET", url)

self.assertEqual(
401,
channel.code,
msg=channel.json_body,
)
self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"])

def test_requester_is_no_admin(self) -> None:
"""
If the user is not a server admin, an error is returned.
"""
self.other_user = self.register_user("user", "pass")
self.other_user_token = self.login("user", "pass")

channel = self.make_request(
"GET",
f"/_synapse/admin/v1/media/{self.server_name}/12345",
access_token=self.other_user_token,
)

self.assertEqual(403, channel.code, msg=channel.json_body)
self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"])

def test_local_media_does_not_exist(self) -> None:
"""
Tests that a lookup for local media that does not exist returns a 404
"""
channel = self.make_request(
"GET",
f"/_synapse/admin/v1/media/{self.server_name}/12345",
access_token=self.admin_user_tok,
)

self.assertEqual(404, channel.code, msg=channel.json_body)
self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"])

def test_remote_media_does_not_exist(self) -> None:
"""
Tests that a lookup for remote media that is not cached returns a 404
"""
channel = self.make_request(
"GET",
f"/_synapse/admin/v1/media/{self.server_name}/12345",
access_token=self.admin_user_tok,
)

self.assertEqual(404, channel.code, msg=channel.json_body)
self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"])

def test_query_local_media(self) -> None:
"""
Tests that querying an existing local media returns appropriate media info
"""

# Upload some media into the room
response = self.helper.upload_media(
SMALL_PNG,
tok=self.admin_user_tok,
expect_code=200,
)
# Extract media ID from the response
server_and_media_id = response["content_uri"][6:] # Cut off 'mxc://'
server_name, media_id = server_and_media_id.split("/")
self.assertEqual(server_name, self.server_name)

channel = self.make_request(
"GET",
f"/_synapse/admin/v1/media/{self.server_name}/{media_id}",
access_token=self.admin_user_tok,
)

self.assertEqual(200, channel.code, msg=channel.json_body)
self.assertEqual(channel.json_body["media_info"]["authenticated"], True)
self.assertEqual(channel.json_body["media_info"]["media_id"], media_id)
self.assertEqual(
channel.json_body["media_info"]["media_length"], len(SMALL_PNG)
)
self.assertEqual(
channel.json_body["media_info"]["media_type"], "application/json"
)
self.assertEqual(channel.json_body["media_info"]["upload_name"], "test.png")
self.assertEqual(channel.json_body["media_info"]["user_id"], "@admin:test")

def test_query_remote_media(self) -> None:
file_id = "abcdefg12345"
self._cache_remote_media(file_id)

channel = self.make_request(
"GET",
f"/_synapse/admin/v1/media/remote.com/{file_id}",
access_token=self.admin_user_tok,
)

self.assertEqual(200, channel.code, msg=channel.json_body)
self.assertEqual(channel.json_body["media_info"]["authenticated"], True)
self.assertEqual(channel.json_body["media_info"]["media_id"], file_id)
self.assertEqual(
channel.json_body["media_info"]["media_length"], len(SMALL_PNG)
)
self.assertEqual(channel.json_body["media_info"]["media_type"], "image/png")
self.assertEqual(channel.json_body["media_info"]["upload_name"], "test.png")
self.assertEqual(channel.json_body["media_info"]["media_origin"], "remote.com")


class DeleteMediaByIDTestCase(_AdminMediaTests):
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self.server_name = hs.hostname
Expand Down Expand Up @@ -710,8 +872,8 @@ def test_quarantine_media_match_hash(self) -> None:
self.assertFalse(channel.json_body)

# Test that ALL similar media was quarantined.
for media in [self.media_id, self.media_id_2, self.media_id_3]:
media_info = self.get_success(self.store.get_local_media(media))
for media_item in [self.media_id, self.media_id_2, self.media_id_3]:
media_info = self.get_success(self.store.get_local_media(media_item))
assert media_info is not None
self.assertTrue(media_info.quarantined_by)

Expand All @@ -731,8 +893,8 @@ def test_quarantine_media_match_hash(self) -> None:
self.assertFalse(channel.json_body)

# Test that ALL similar media is now reset.
for media in [self.media_id, self.media_id_2, self.media_id_3]:
media_info = self.get_success(self.store.get_local_media(media))
for media_item in [self.media_id, self.media_id_2, self.media_id_3]:
media_info = self.get_success(self.store.get_local_media(media_item))
assert media_info is not None
self.assertFalse(media_info.quarantined_by)

Expand Down
Loading