diff --git a/src/sentry/api/authentication.py b/src/sentry/api/authentication.py index f76da2f1380494..436adc328d2dd0 100644 --- a/src/sentry/api/authentication.py +++ b/src/sentry/api/authentication.py @@ -1,5 +1,7 @@ from __future__ import annotations +import hashlib +import random from collections.abc import Callable, Iterable from typing import Any, ClassVar @@ -306,10 +308,82 @@ def authenticate(self, request: Request): return self.transform_auth(user_id, None) +class TokenStrLookupRequired(Exception): + """ + Used in combination with `apitoken.use-and-update-hash-rate` option. + + If raised, calling code should peform API token lookups based on its + plaintext value and not its hashed value. + """ + + pass + + @AuthenticationSiloLimit(SiloMode.REGION, SiloMode.CONTROL) class UserAuthTokenAuthentication(StandardAuthentication): token_name = b"bearer" + def _find_or_update_token_by_hash(self, token_str: str) -> ApiToken | ApiTokenReplica: + """ + Find token by hash or update token's hash value if only found via plaintext. + + 1. Hash provided plaintext token. + 2. Perform lookup based on hashed value. + 3. If found, return the token. + 4. If not found, search for the token based on its plaintext value. + 5. If found, update the token's hashed value and return the token. + 6. If not found via hash or plaintext value, raise AuthenticationFailed + + Returns `ApiTokenReplica` if running in REGION silo or + `ApiToken` if running in CONTROL silo. + """ + + hashed_token = hashlib.sha256(token_str.encode()).hexdigest() + + rate = options.get("apitoken.use-and-update-hash-rate") + random_rate = random.random() + use_hashed_token = rate > random_rate + + if SiloMode.get_current_mode() == SiloMode.REGION: + try: + if use_hashed_token: + # Try to find the token by its hashed value first + return ApiTokenReplica.objects.get(hashed_token=hashed_token) + else: + raise TokenStrLookupRequired + except (ApiTokenReplica.DoesNotExist, TokenStrLookupRequired): + try: + # If we can't find it by hash, use the plaintext string + return ApiTokenReplica.objects.get(token=token_str) + except ApiTokenReplica.DoesNotExist: + # If the token does not exist by plaintext either, it is not a valid token + raise AuthenticationFailed("Invalid token") + else: + try: + # Try to find the token by its hashed value first + if use_hashed_token: + return ApiToken.objects.select_related("user", "application").get( + hashed_token=hashed_token + ) + else: + raise TokenStrLookupRequired + except (ApiToken.DoesNotExist, TokenStrLookupRequired): + try: + # If we can't find it by hash, use the plaintext string + api_token = ApiToken.objects.select_related("user", "application").get( + token=token_str + ) + except ApiToken.DoesNotExist: + # If the token does not exist by plaintext either, it is not a valid token + raise AuthenticationFailed("Invalid token") + else: + if use_hashed_token: + # Update it with the hashed value if found by plaintext + api_token.hashed_token = hashed_token + api_token.save(update_fields=["hashed_token"]) + + return api_token + def accepts_auth(self, auth: list[bytes]) -> bool: if not super().accepts_auth(auth): return False @@ -332,26 +406,16 @@ def authenticate_token(self, request: Request, token_str: str) -> tuple[Any, Any application_is_inactive = False if not token: - if SiloMode.get_current_mode() == SiloMode.REGION: - try: - atr = token = ApiTokenReplica.objects.get(token=token_str) - except ApiTokenReplica.DoesNotExist: - raise AuthenticationFailed("Invalid token") - user = user_service.get_user(user_id=atr.user_id) - application_is_inactive = not atr.application_is_active - else: - try: - at = token = ( - ApiToken.objects.filter(token=token_str) - .select_related("user", "application") - .get() - ) - except ApiToken.DoesNotExist: - raise AuthenticationFailed("Invalid token") - user = at.user + token = self._find_or_update_token_by_hash(token_str) + if isinstance(token, ApiTokenReplica): # we're running as a REGION silo + user = user_service.get_user(user_id=token.user_id) + application_is_inactive = not token.application_is_active + else: # the token returned is an ApiToken from the CONTROL silo + user = token.user application_is_inactive = ( - at.application is not None and not at.application.is_active + token.application is not None and not token.application.is_active ) + elif isinstance(token, SystemToken): user = token.user @@ -401,9 +465,9 @@ def authenticate_token(self, request: Request, token_str: str) -> tuple[Any, Any raise AuthenticationFailed("Invalid org token") else: try: - token = OrgAuthToken.objects.filter( + token = OrgAuthToken.objects.get( token_hashed=token_hashed, date_deactivated__isnull=True - ).get() + ) except OrgAuthToken.DoesNotExist: raise AuthenticationFailed("Invalid org token") diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py index 66b5383cec733e..bd88572d16156c 100644 --- a/src/sentry/options/defaults.py +++ b/src/sentry/options/defaults.py @@ -294,6 +294,20 @@ type=Bool, flags=FLAG_ALLOW_EMPTY | FLAG_PRIORITIZE_DISK | FLAG_AUTOMATOR_MODIFIABLE, ) +register( + "apitoken.save-hash-on-create", + default=True, + type=Bool, + flags=FLAG_ALLOW_EMPTY | FLAG_PRIORITIZE_DISK | FLAG_AUTOMATOR_MODIFIABLE, +) + +# Controls the rate of using the hashed value of User API tokens for lookups when logging in +# and also updates tokens which are not hashed +register( + "apitoken.use-and-update-hash-rate", + default=0.0, + flags=FLAG_AUTOMATOR_MODIFIABLE, +) register( "api.rate-limit.org-create", diff --git a/src/sentry/services/hybrid_cloud/auth/model.py b/src/sentry/services/hybrid_cloud/auth/model.py index 21ad5dda2947d6..905d110eb5f64a 100644 --- a/src/sentry/services/hybrid_cloud/auth/model.py +++ b/src/sentry/services/hybrid_cloud/auth/model.py @@ -34,6 +34,7 @@ class RpcApiToken(RpcModel): application_id: int | None = None application_is_active: bool = False token: str = "" + hashed_token: str | None = None expires_at: datetime.datetime | None = None allowed_origins: list[str] = Field(default_factory=list) scope_list: list[str] = Field(default_factory=list) diff --git a/src/sentry/services/hybrid_cloud/auth/serial.py b/src/sentry/services/hybrid_cloud/auth/serial.py index 12fad1442a4b09..11cb8ffcee0eac 100644 --- a/src/sentry/services/hybrid_cloud/auth/serial.py +++ b/src/sentry/services/hybrid_cloud/auth/serial.py @@ -87,6 +87,7 @@ def serialize_api_token(at: ApiToken) -> RpcApiToken: organization_id=at.organization_id, application_is_active=at.application_id is None or at.application.is_active, token=at.token, + hashed_token=at.hashed_token, expires_at=at.expires_at, allowed_origins=list(at.get_allowed_origins()), scope_list=at.get_scopes(), diff --git a/src/sentry/services/hybrid_cloud/replica/impl.py b/src/sentry/services/hybrid_cloud/replica/impl.py index 199af8d8cd46ea..d377ba5a628e18 100644 --- a/src/sentry/services/hybrid_cloud/replica/impl.py +++ b/src/sentry/services/hybrid_cloud/replica/impl.py @@ -160,6 +160,7 @@ def upsert_replicated_api_token(self, *, api_token: RpcApiToken, region_name: st organization=organization, application_is_active=api_token.application_is_active, token=api_token.token, + hashed_token=api_token.hashed_token, expires_at=api_token.expires_at, apitoken_id=api_token.id, scope_list=api_token.scope_list, diff --git a/tests/sentry/api/test_authentication.py b/tests/sentry/api/test_authentication.py index b75b188ae3a0d4..698a8b8fe93961 100644 --- a/tests/sentry/api/test_authentication.py +++ b/tests/sentry/api/test_authentication.py @@ -1,3 +1,4 @@ +import hashlib import uuid from datetime import UTC, datetime @@ -30,8 +31,11 @@ ) from sentry.silo import SiloMode from sentry.testutils.cases import TestCase +from sentry.testutils.helpers import override_options +from sentry.testutils.outbox import outbox_runner from sentry.testutils.pytest.fixtures import django_db_all from sentry.testutils.silo import assume_test_silo_mode, control_silo_test, no_silo_test +from sentry.types.token import AuthTokenType from sentry.utils.security.orgauthtoken_token import hash_token @@ -202,6 +206,92 @@ def test_no_match(self): with pytest.raises(AuthenticationFailed): self.auth.authenticate(request) + @override_options({"apitoken.save-hash-on-create": False}) + @override_options({"apitoken.use-and-update-hash-rate": 1.0}) + def test_token_hashed_with_option_off(self): + # see https://github.com/getsentry/sentry/pull/65941 + # the UserAuthTokenAuthentication middleware was updated to hash tokens as + # they were used, this test verifies the hash + api_token = ApiToken.objects.create(user=self.user, token_type=AuthTokenType.USER) + expected_hash = hashlib.sha256(api_token.token.encode()).hexdigest() + + # we haven't authenticated to the API endpoint yet, so this value should be empty + assert api_token.hashed_token is None + + request = HttpRequest() + request.META["HTTP_AUTHORIZATION"] = f"Bearer {api_token.token}" + + # trigger the authentication middleware, and thus the hashing + result = self.auth.authenticate(request) + assert result is not None + + # check for the expected hash value + api_token.refresh_from_db() + assert api_token.hashed_token == expected_hash + + @override_options({"apitoken.save-hash-on-create": False}) + @override_options({"apitoken.use-and-update-hash-rate": 0.0}) + def test_token_not_hashed_with_0_rate(self): + api_token = ApiToken.objects.create(user=self.user, token_type=AuthTokenType.USER) + + # we haven't authenticated to the API endpoint yet, so this value should be empty + assert api_token.hashed_token is None + + request = HttpRequest() + request.META["HTTP_AUTHORIZATION"] = f"Bearer {api_token.token}" + + # trigger the authentication middleware + result = self.auth.authenticate(request) + assert result is not None + + # check for the expected hash value + api_token.refresh_from_db() + assert api_token.hashed_token is None + + +@no_silo_test +class TestTokenAuthenticationReplication(TestCase): + def setUp(self): + super().setUp() + + self.auth = UserAuthTokenAuthentication() + + @override_options({"apitoken.save-hash-on-create": False}) + @override_options({"apitoken.use-and-update-hash-rate": 1.0}) + def test_hash_is_replicated(self): + api_token = ApiToken.objects.create(user=self.user, token_type=AuthTokenType.USER) + expected_hash = hashlib.sha256(api_token.token.encode()).hexdigest() + + # we haven't authenticated to the API endpoint yet, so this value should be empty + assert api_token.hashed_token is None + + request = HttpRequest() + request.META["HTTP_AUTHORIZATION"] = f"Bearer {api_token.token}" + + with assume_test_silo_mode(SiloMode.REGION): + with outbox_runner(): + # make sure the token was replicated + api_token_replica = ApiTokenReplica.objects.get(apitoken_id=api_token.id) + assert api_token.token == api_token_replica.token + assert ( + api_token_replica.hashed_token is None + ) # we don't expect to have a hashed value yet + + # trigger the authentication middleware, and thus the hashing backfill + result = self.auth.authenticate(request) + assert result is not None + + # check for the expected hash value + api_token.refresh_from_db() + assert api_token.hashed_token == expected_hash + + # ApiTokenReplica should also be updated + api_token_replica.refresh_from_db() + assert api_token_replica.hashed_token == expected_hash + + # just for good measure + assert api_token.hashed_token == api_token_replica.hashed_token + @django_db_all @pytest.mark.parametrize("internal", [True, False])