Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
301 changes: 301 additions & 0 deletions plugins/animals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@


import abc
from collections import defaultdict
from dataclasses import dataclass
import logging
from typing import Any, Literal, Optional

from bot.acl import privileged
from bot.config import plugin_config_command
from discord.ext.commands import group
import discord
from bot.commands import Context, plugin_command
from discord.ext.commands import command
import aiohttp
import datetime as dt

from util.discord import UserError

CAT_API_ROOT = 'https://api.thecatapi.com/v1/images/search'
DOG_API_ROOT = 'https://api.thedogapi.com/v1/images/search'
TIMEOUT = 10 # seconds

def with_suffix(obj: dict[str, Any], *path: str, suffix: str) -> Optional[str]:
val = obj
try:
for p in path:
val = val[p]
except (KeyError, TypeError):
return None
if val is not None:
return f'{val}{suffix}'
return None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes very little sense as a function. I understand you're trying to avoid manually handling Nones, but this isn't the way to do it.


class RateLimiter(abc.ABC):
@abc.abstractmethod
def is_allowed(self, ctx: Context) -> tuple[bool, str]:
pass

class PerPersonRateLimiter(RateLimiter):
def __init__(self, calls: int, period: dt.timedelta):
self.calls = calls
self.period = period
self.users: dict[int, list[dt.datetime]] = defaultdict(list)

def __repr__(self) -> str:
return f'{self.__class__.__name__}<{self.calls=}, {self.period=}>'

def __str__(self) -> str:
return f'{self.__class__.__name__}<{self.calls} calls per {self.period}>'

def _is_allowed(self, user_id: int) -> tuple[bool, str]:
now = dt.datetime.now()
timestamps = self.users[user_id]
# Remove timestamps outside the period
while timestamps and now - timestamps[0] > self.period:
timestamps.pop(0)
if len(timestamps) < self.calls:
timestamps.append(now)
return True, ''
else:
next_time = timestamps[0] + self.period
return False, f'Rate limit exceeded: {self.calls} calls per {self.period}. Please try again <t:{int(next_time.timestamp())}:R>.'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ratelimiting is a hard problem, and this implementation is a good example of why.

You're storing for each user that has ever interacted with these commands a list of timestamps of when they last interacted. The list for a specific user is only truncated when that specific user interacts again, but otherwise will remain fixed (and occupy memory) until the bot is restarted. The memory usage of this algorithm is thus essentially unbounded.

All while we actually know that at most 14400 requests could have been made in a day, and requests made more than a day ago have no effect.

So in a sense this ratelimiter is actually making the system worse than it would've been without it. Ratelimiters are supposed to make a system better in presence of adversaries, so they have to be very well designed in terms of their own resource usages.

We could discuss and design a better algorithm, but I think implementing a good ratelimiter is outside the scope of this module.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess so. To me that doesn't seem like that much data, but I haven't actually run any numbers on it. Mainly I'm trying to be a good citizen and not smoke the API in case someone starts scripting or whatever, as well provide some semblance of fairness and DoS protection.

I'd like to see a rate limiter be command independent, similar to the ACL system.


def is_allowed(self, ctx: Context):
user_id = ctx.author.id
return self._is_allowed(user_id)

class GlobalRateLimiter(PerPersonRateLimiter):
def is_allowed(self, ctx: Context):
return self._is_allowed(0) # Use a single user ID for global rate limiting

@dataclass
class CatRequest:
limit: Optional[int] = None
page: Optional[int] = None
order: Literal['ASC', 'DESC', 'RAND', None] = None
has_breeds: Optional[bool] = None
breed_ids: Optional[list[str]] = None
#sub_id: Optional[str]

def __post_init__(self):
try:
assert self.limit is None or 1 <= self.limit <= 100
assert self.page is None or 0 <= self.page
except AssertionError:
raise ValueError()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lotsa unused here. I only see this class initialized once as CatRequest(). Also the API has some built-in defaults, e.g. limit defaults to 1 if unspecified, which is something I was worried about. Would be good to know this information.

I'm guessing it's also untested. I don't think has_breeds can be a bool, see https://yarl.aio-libs.org/en/latest/#why-isn-t-boolean-supported-by-the-url-query-api . Also the API recommends using the values 1 and 0.

Also per my reading of the docs, the cat API and the dog API are identical, no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does actually convert the has_breeds bool to an integer properly, see to_dict. I expected parameter passing to be simpler than it was (.cat --breed lynx wants to give me breed="--breed lynx"), which is why I built all of this infrastructure. Getting rid of it does seem reasonable, though.

Annoyingly, the cat and dog APIs are almost identical, except that the dog API does not support breed_ids, and they have some slight differences on the returned data.


def to_dict(self) -> dict[str, Any]:
result = {}
if self.limit is not None:
result['limit'] = self.limit
if self.page is not None:
result['page'] = self.page
if self.order is not None:
result['order'] = self.order
if self.has_breeds is not None:
result['has_breeds'] = int(self.has_breeds)
if self.breed_ids is not None:
result['breed_ids'] = ','.join(self.breed_ids)
return result

@dataclass
class DogRequest:
size: Literal['full', 'med', 'small', 'thumb'] = 'small'
mime_types: Optional[list[Literal['jpg', 'png', 'gif']]] = None
format: Literal['json', 'src'] = 'json'
order: Literal['ASC', 'DESC', 'RAND', None] = None
limit: Optional[int] = None
page: Optional[int] = None
has_breeds: Optional[bool] = None

def to_dict(self) -> dict[str, Any]:
result = {}
result['size'] = self.size
if self.mime_types is not None:
result['mime_types'] = ','.join(self.mime_types)
result['format'] = self.format
result['order'] = self.order
result['limit'] = self.limit
result['page'] = self.page
if self.has_breeds is not None:
result['has_breeds'] = int(self.has_breeds)
return {k: v for k, v in result.items() if v is not None}

@dataclass
class AnimalResponse:
id: str
url: str
width: int
height: int
categories: Optional[list[Any]] = None
breeds: Optional[list[dict[str, Any]]] = None

def get_weight(self) -> Optional[str]:
if self.breeds:
breed = self.breeds[0]
return (
with_suffix(breed, 'weight', 'metric', suffix=' kg') or
with_suffix(breed, 'weight', 'imperial', suffix=' lbs') or
with_suffix(breed, 'weight', suffix='')
)
return None

def get_life_span(self) -> Optional[str]:
if self.breeds:
breed = self.breeds[0]
lifespan = breed.get('life_span', None)
if not lifespan:
return None
if 'years' in lifespan:
return lifespan
else:
return f'{lifespan} years'
return None


def get_description(self) -> str:
if self.breeds:
breed = self.breeds[0]
out = {}
out['Name'] = breed.get('name', None)
out['Temperament'] = breed.get('temperament', None)
out['Origin'] = breed.get('origin', None)
out['Description'] = breed.get('description', None)
out['Weight'] = self.get_weight()
out['Life span'] = self.get_life_span()
if len(self.breeds) > 1:
other_breeds = [b.get('name', None) for b in self.breeds[1:]]
out['Other breeds'] = ', '.join(b for b in other_breeds if b)
description = '\n'.join(f'{key}: {value}' for key, value in out.items() if value)
return description
else:
return 'No breed information available.'

class AnimalApi:
def __init__(self, api_root: str, api_key: Optional[str], rate_limiters: list[RateLimiter]) -> None:
self.api_root = api_root
self.api_key = api_key
self.rate_limiters = rate_limiters

def configure(self, api_key: str) -> None:
self.api_key = api_key

def get_configuration(self) -> str:
out = ''
out += f'API Root: {self.api_root}\n'
out += f'API Key: {self.api_key}\n'
for limiter in self.rate_limiters:
out += f'Rate Limiter: {limiter}\n'
return out

async def fetch_random_animal(self, ctx: Context, params: dict[str, Any]) -> AnimalResponse:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Strange separation of responsibilities here.

Ultimately we want to take the user input (which in our case is "I want a random cat" -- with no other parameters -- an empty tuple, zero bits of information), and turn it into a discord Embed for display (because that's the cutoff point where discord.py takes over). We do this by talking to a third party API, which accepts an HTTP request of a particular format, and returns an HTTP response of a particular format.

Conceptually I can identify 3 steps here: convert user input into HTTP request, perform HTTP, convert HTTP response into discord Embed. We "perform HTTP" using aiohttp, which dictates what constitutes "HTTP request" and "HTTP response" -- aiohttp's types. What we are left with are the conversion from user input to HTTP request, and the conversion from HTTP response to discord Embed.

Now these conversion procedures can be broken up in various ways because they consist of multiple steps. To name a few: convert to/from JSON, specify/extract correct field names, specify the correct server and API key. This function seems to include the responsibility of fully parsing the reponse JSON and extracting the correct field names from it (by the virtue of calling AnimalResponse(...)), but does not include the responsiblity of specifying the correct query string field names (by the virtue of accepting params: dict[str, Any] rather than specific parameters, or a dataclass). I find this very strange.

Considering how simple our use case is -- we only make one kind of request (random, limit=1, no filters), I would probably hardcode the request, or at most, have a simple function that returns params: dict[str, str]. Converting JSON to Embed could probably also just be a simple function.

A CatRequest dataclass makes sense, provided we understand what its single responsiblity is, be it simply "specify correct field names" or "always be convertible to a valid request". AnimalResponse likewise, but yours for example seems to mix the responsibilities of validating correct field names (but only at the root), and extracting data in a format specific to the Embed we're formatting.

logging.debug(f'Fetching random animal with params: {params}')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please create a separate logger with logging.getLogger(__name__) so that the logs would be attributed to this module specifically.

if not self.api_key:
raise UserError('Animal API is not configured.')
headers = {'x-api-key': self.api_key}
for limiter in self.rate_limiters:
allowed, message = limiter.is_allowed(ctx)
if not allowed:
raise UserError(message)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a very golang idiom, returning a tuple[bool, str]. The problem here is that if the bool is True, then the str part is unused, and you have to come up with a dummy value to put there (empty string in your case).

I would prefer if limiter returned a Union[str, None] indicating either a problem or a lack of problem. Or maybe even if limiter raised UserError itself.

async with aiohttp.ClientSession() as session:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You shouldn't create a ClientSession on every request. E.g. phish uses a module global with a finalizer:

bot/plugins/phish.py

Lines 97 to 98 in 5cc74bd

http: aiohttp.ClientSession = aiohttp.ClientSession()
plugins.finalizer(http.close)

async with session.get(self.api_root, params=params, headers=headers, timeout=TIMEOUT) as resp:
data = await resp.json()
if not data:
raise UserError('No animal found!')
return AnimalResponse(**data[0])

class CatApi(AnimalApi):
def __init__(self, api_key: Optional[str], rate_limiters: list[RateLimiter]) -> None:
super().__init__(CAT_API_ROOT, api_key, rate_limiters)

async def fetch_random_cat(self, ctx: Context, req: CatRequest):
return await self.fetch_random_animal(ctx, req.to_dict())

# cat api key should be set via the config command
cat_api = CatApi('', [
# reasonable rate limits, I don't see a good way to configure these dynamically
PerPersonRateLimiter(calls=3, period=dt.timedelta(minutes=1)),
PerPersonRateLimiter(calls=20, period=dt.timedelta(hours=1)),
PerPersonRateLimiter(calls=100, period=dt.timedelta(days=1)),

GlobalRateLimiter(calls=10, period=dt.timedelta(minutes=1)),
])

class DogApi(AnimalApi):
def __init__(self, api_key: Optional[str], rate_limiters: list[RateLimiter]) -> None:
super().__init__(DOG_API_ROOT, api_key, rate_limiters)

async def fetch_random_dog(self, ctx: Context, req: DogRequest):
return await self.fetch_random_animal(ctx, req.to_dict())

# dog api key should be set via the config command
dog_api = DogApi('', [
# reasonable rate limits, I don't see a good way to configure these dynamically
PerPersonRateLimiter(calls=3, period=dt.timedelta(minutes=1)),
PerPersonRateLimiter(calls=20, period=dt.timedelta(hours=1)),
PerPersonRateLimiter(calls=100, period=dt.timedelta(days=1)),

GlobalRateLimiter(calls=10, period=dt.timedelta(minutes=1)),
])

@plugin_command
@privileged
@command('cat')
async def random_cat(ctx: Context) -> None:
''' Fetches and displays a random cat image '''

async with ctx.typing():
# Prepare request params
req = CatRequest()

cat = await cat_api.fetch_random_cat(ctx, req)
embed = discord.Embed(
title='Here is your random cat! 🐱',
url=cat.url,
color=discord.Color.random()
)
embed.set_image(url=cat.url)
footer = cat.get_description()
if footer:
footer += '\n'
footer += 'thecatapi.com'
embed.set_footer(text=footer)
await ctx.send(embed=embed)

@plugin_command
@privileged
@command('dog')
async def random_dog(ctx: Context) -> None:
''' Fetches and displays a random dog image '''

async with ctx.typing():
# Prepare request params
req = DogRequest()

dog = await dog_api.fetch_random_dog(ctx, req)
embed = discord.Embed(
title='Here is your random dog! 🐶',
url=dog.url,
color=discord.Color.random()
)
embed.set_image(url=dog.url)
footer = dog.get_description()
if footer:
footer += '\n'
footer += 'thedogapi.com'
embed.set_footer(text=footer)
await ctx.send(embed=embed)

@plugin_config_command
@privileged
@group("animals", invoke_without_command=True)
async def animals_config(ctx: Context, cat_api_key: Optional[str], dog_api_key: Optional[str]) -> None:
''' Configures the animals plugin '''
if cat_api_key:
cat_api.configure(api_key=cat_api_key)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This .configure call doesn't persist in any way, meaning if the bot is restarted it will lose the API key.

if dog_api_key:
dog_api.configure(api_key=dog_api_key)

await ctx.send(f'Animals plugin configuration:\n**Cat**\n{cat_api.get_configuration()}\n**Dog**\n{dog_api.get_configuration()}')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will leak the API key, I would be more careful with it. E.g.

bot/plugins/phish.py

Lines 422 to 435 in 5cc74bd

@config.command("submit_token")
@privileged
async def config_submit_token(ctx: Context, submit_token: Optional[str]) -> None:
global conf
async with sessionmaker() as session:
c = await session.get(GlobalConfig, 0)
assert c
if submit_token is None:
await ctx.send("None" if c.submit_token is None else format("{!i}", conf.submit_token))
else:
c.submit_token = None if submit_token == "None" else submit_token
await session.commit()
conf = c
await ctx.send("\u2705")
will only output the token if you specifically ask .config phish submit_token

Loading