@@ -475,7 +475,8 @@ async def add_request_async(
475475 * ,
476476 inputs : Optional [PromptType ] = None , # DEPRECATED
477477 ) -> None :
478- """Async version of {meth}`add_request`."""
478+ """Async version of
479+ [`add_request`][vllm.engine.llm_engine.LLMEngine.add_request]."""
479480 if inputs is not None :
480481 prompt = inputs
481482 assert prompt is not None and params is not None
@@ -582,20 +583,21 @@ async def build_guided_decoding_logits_processor_async(
582583
583584
584585class AsyncLLMEngine (EngineClient ):
585- """An asynchronous wrapper for {class} `LLMEngine`.
586+ """An asynchronous wrapper for [ `LLMEngine`][vllm.LLMEngine] .
586587
587- This class is used to wrap the {class}`LLMEngine` class to make it
588- asynchronous. It uses asyncio to create a background loop that keeps
589- processing incoming requests. The {class}`LLMEngine` is kicked by the
590- generate method when there are requests in the waiting queue. The generate
591- method yields the outputs from the {class}`LLMEngine` to the caller.
588+ This class is used to wrap the [`LLMEngine`][vllm.LLMEngine] class to
589+ make it asynchronous. It uses asyncio to create a background loop that keeps
590+ processing incoming requests. The [`LLMEngine`][vllm.LLMEngine] is kicked
591+ by the generate method when there are requests in the waiting queue. The
592+ generate method yields the outputs from the [`LLMEngine`][vllm.LLMEngine]
593+ to the caller.
592594
593595 Args:
594596 log_requests: Whether to log the requests.
595597 start_engine_loop: If True, the background task to run the engine
596598 will be automatically started in the generate call.
597- *args: Arguments for {class} `LLMEngine`.
598- **kwargs: Arguments for {class} `LLMEngine`.
599+ *args: Arguments for [ `LLMEngine`][vllm.LLMEngine] .
600+ **kwargs: Arguments for [ `LLMEngine`][vllm.LLMEngine] .
599601 """
600602
601603 _engine_class : Type [_AsyncLLMEngine ] = _AsyncLLMEngine
@@ -985,8 +987,9 @@ async def generate(
985987 from the LLMEngine to the caller.
986988
987989 Args:
988- prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
989- for more details about the format of each input.
990+ prompt: The prompt to the LLM. See
991+ [`PromptType`][vllm.inputs.PromptType] for more details about
992+ the format of each input.
990993 sampling_params: The sampling parameters of the request.
991994 request_id: The unique id of the request.
992995 lora_request: LoRA request to use for generation, if any.
@@ -1003,7 +1006,7 @@ async def generate(
10031006 Details:
10041007 - If the engine is not running, start the background loop,
10051008 which iteratively invokes
1006- {meth}`~ vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step`
1009+ [`engine_step`][ vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step]
10071010 to process the waiting requests.
10081011 - Add the request to the engine's `RequestTracker`.
10091012 On the next background loop, this request will be sent to
@@ -1075,8 +1078,9 @@ async def encode(
10751078 from the LLMEngine to the caller.
10761079
10771080 Args:
1078- prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
1079- for more details about the format of each input.
1081+ prompt: The prompt to the LLM. See
1082+ [`PromptType`][vllm.inputs.PromptType] for more details about
1083+ the format of each input.
10801084 pooling_params: The pooling parameters of the request.
10811085 request_id: The unique id of the request.
10821086 lora_request: LoRA request to use for generation, if any.
0 commit comments