Skip to content

Commit b8edae1

Browse files
committed
docs: class migration from sphinx to mkdocs (engine)
Signed-off-by: Zerohertz <[email protected]>
1 parent 44a68d3 commit b8edae1

File tree

6 files changed

+58
-39
lines changed

6 files changed

+58
-39
lines changed

vllm/engine/async_llm_engine.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,8 @@ async def add_request_async(
475475
*,
476476
inputs: Optional[PromptType] = None, # DEPRECATED
477477
) -> None:
478-
"""Async version of {meth}`add_request`."""
478+
"""Async version of
479+
[`add_request`][vllm.engine.llm_engine.LLMEngine.add_request]."""
479480
if inputs is not None:
480481
prompt = inputs
481482
assert prompt is not None and params is not None
@@ -582,20 +583,21 @@ async def build_guided_decoding_logits_processor_async(
582583

583584

584585
class AsyncLLMEngine(EngineClient):
585-
"""An asynchronous wrapper for {class}`LLMEngine`.
586+
"""An asynchronous wrapper for [`LLMEngine`][vllm.LLMEngine].
586587
587-
This class is used to wrap the {class}`LLMEngine` class to make it
588-
asynchronous. It uses asyncio to create a background loop that keeps
589-
processing incoming requests. The {class}`LLMEngine` is kicked by the
590-
generate method when there are requests in the waiting queue. The generate
591-
method yields the outputs from the {class}`LLMEngine` to the caller.
588+
This class is used to wrap the [`LLMEngine`][vllm.LLMEngine] class to
589+
make it asynchronous. It uses asyncio to create a background loop that keeps
590+
processing incoming requests. The [`LLMEngine`][vllm.LLMEngine] is kicked
591+
by the generate method when there are requests in the waiting queue. The
592+
generate method yields the outputs from the [`LLMEngine`][vllm.LLMEngine]
593+
to the caller.
592594
593595
Args:
594596
log_requests: Whether to log the requests.
595597
start_engine_loop: If True, the background task to run the engine
596598
will be automatically started in the generate call.
597-
*args: Arguments for {class}`LLMEngine`.
598-
**kwargs: Arguments for {class}`LLMEngine`.
599+
*args: Arguments for [`LLMEngine`][vllm.LLMEngine].
600+
**kwargs: Arguments for [`LLMEngine`][vllm.LLMEngine].
599601
"""
600602

601603
_engine_class: Type[_AsyncLLMEngine] = _AsyncLLMEngine
@@ -985,8 +987,9 @@ async def generate(
985987
from the LLMEngine to the caller.
986988
987989
Args:
988-
prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
989-
for more details about the format of each input.
990+
prompt: The prompt to the LLM. See
991+
[`PromptType`][vllm.inputs.PromptType] for more details about
992+
the format of each input.
990993
sampling_params: The sampling parameters of the request.
991994
request_id: The unique id of the request.
992995
lora_request: LoRA request to use for generation, if any.
@@ -1003,7 +1006,7 @@ async def generate(
10031006
Details:
10041007
- If the engine is not running, start the background loop,
10051008
which iteratively invokes
1006-
{meth}`~vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step`
1009+
[`engine_step`][vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step]
10071010
to process the waiting requests.
10081011
- Add the request to the engine's `RequestTracker`.
10091012
On the next background loop, this request will be sent to
@@ -1075,8 +1078,9 @@ async def encode(
10751078
from the LLMEngine to the caller.
10761079
10771080
Args:
1078-
prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
1079-
for more details about the format of each input.
1081+
prompt: The prompt to the LLM. See
1082+
[`PromptType`][vllm.inputs.PromptType] for more details about
1083+
the format of each input.
10801084
pooling_params: The pooling parameters of the request.
10811085
request_id: The unique id of the request.
10821086
lora_request: LoRA request to use for generation, if any.

vllm/engine/llm_engine.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,11 @@ class LLMEngine:
130130
iteration-level scheduling and efficient memory management to maximize the
131131
serving throughput.
132132
133-
The [LLM][vllm.LLM] class wraps this class for offline batched inference
134-
and the [AsyncLLMEngine][] class wraps this class for online serving.
133+
The [`LLM`][vllm.LLM] class wraps this class for offline batched inference
134+
and the [`AsyncLLMEngine`][vllm.engine.async_llm_engine.AsyncLLMEngine]
135+
class wraps this class for online serving.
135136
136-
The config arguments are derived from [EngineArgs][vllm.EngineArgs]. (See
137-
[engine-args][])
137+
The config arguments are derived from [`EngineArgs`][vllm.EngineArgs].
138138
139139
Args:
140140
vllm_config: The configuration for initializing and running vLLM.

vllm/engine/multiprocessing/client.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -492,8 +492,9 @@ def generate(
492492
from the LLMEngine to the caller.
493493
494494
Args:
495-
prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
496-
for more details about the format of each input.
495+
prompt: The prompt to the LLM. See
496+
[`PromptType`][vllm.inputs.PromptType] for more details about
497+
the format of each input.
497498
sampling_params: The sampling parameters of the request.
498499
request_id: The unique id of the request.
499500
lora_request: LoRA request to use for generation, if any.
@@ -561,8 +562,9 @@ def encode(
561562
from the LLMEngine to the caller.
562563
563564
Args:
564-
prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
565-
for more details about the format of each input.
565+
prompt: The prompt to the LLM. See
566+
[`PromptType`][vllm.inputs.PromptType] for more details about
567+
the format of each input.
566568
pooling_params: The pooling parameters of the request.
567569
request_id: The unique id of the request.
568570
lora_request: LoRA request to use for generation, if any.

vllm/engine/multiprocessing/engine.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,19 +42,22 @@
4242

4343

4444
class MQLLMEngine:
45-
"""A multiprocessing wrapper for {class}`LLMEngine`.
45+
"""A multiprocessing wrapper for
46+
[`LLMEngine`][vllm.engine.llm_engine.LLMEngine].
4647
47-
This class is used to wrap the {class}`LLMEngine` class to enable use
48+
This class is used to wrap the
49+
[`LLMEngine`][vllm.engine.llm_engine.LLMEngine] class to enable use
4850
in concurrnet manner. It runs a background loop and uses zeromq to
4951
receive new requests and stream outputs incrementally via ipc.
5052
51-
The {class}`LLMEngine` generate or encode process is kicked off when a new
52-
RPCProcessRequest is received by the input_socket.
53+
The [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] generate or encode
54+
process is kicked off when a new RPCProcessRequest is received by the
55+
input_socket.
5356
5457
The self.engine_loop checks the input_socket for new requests,
5558
adds them to the LLMEngine if there are any, calls the internal
56-
{class}`LLMEngine.step()`, and sends the RequestOutputs back over
57-
the output_socket.
59+
[`LLMEngine.step()`][vllm.engine.llm_engine.LLMEngine.step], and sends
60+
the RequestOutputs back over the output_socket.
5861
5962
If use_async_sockets is set, the logic associated with reading new
6063
requests from the socket and sending data to the socket is passed
@@ -65,8 +68,8 @@ class MQLLMEngine:
6568
ipc_path: Base path for zeromq interprocess messaging
6669
use_async_sockets: Whether to make send/recv async with GPU
6770
log_requests: Whether to log the requests.
68-
*args: Arguments for {class}`LLMEngine`.
69-
**kwargs: Arguments for {class}`LLMEngine`.
71+
*args: Arguments for [`LLMEngine`][vllm.engine.llm_engine.LLMEngine].
72+
**kwargs: Arguments for [`LLMEngine`][vllm.engine.llm_engine.LLMEngine].
7073
"""
7174

7275
def __init__(self,

vllm/engine/output_processor/multi_step.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,11 @@ def process_prompt_logprob(self, seq_group: SequenceGroup,
5656
scheduled computation.
5757
5858
Args:
59-
seq_group: the outputs are associated with this {class}`SequenceGroup`
60-
outputs: the {class}`SequenceGroupOutput`s for all scheduler steps
59+
seq_group: the outputs are associated with this
60+
[`SequenceGroup`][vllm.sequence.SequenceGroup]
61+
outputs: the
62+
[`SequenceGroupOutput`][vllm.sequence.SequenceGroupOutput]s
63+
for all scheduler steps
6164
"""
6265
for output in outputs:
6366
# Concatenate single-step prompt logprob processing results.

vllm/engine/output_processor/single_step.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,21 @@
1919
def single_step_process_prompt_logprob(
2020
sg_output_proc: SequenceGroupOutputProcessor, seq_group: SequenceGroup,
2121
output: CompletionSequenceGroupOutput) -> None:
22-
"""Process prompt logprobs associated with the {class}`SequenceGroupOutput`
23-
for a given step.
22+
"""Process prompt logprobs associated with the
23+
[`SequenceGroupOutput`][vllm.sequence.SequenceGroupOutput] for a given step.
2424
2525
Do nothing if the output has no prompt logprobs.
2626
2727
Account for the fact that transformers do not compute first-token logprobs.
2828
2929
Args:
30-
sg_output_proc: {class}`SequenceGroupOutputProcessor` instance
31-
seq_group: the output is associated with this {class}`SequenceGroup`
32-
output: the {class}`SequenceGroupOutput` for a single scheduler step
30+
sg_output_proc:
31+
[`SequenceGroupOutputProcessor`][vllm.engine.output_processor.interfaces.SequenceGroupOutputProcessor]
32+
instance
33+
seq_group: the output is associated with this
34+
[`SequenceGroup`][vllm.sequence.SequenceGroup]
35+
output: the [`SequenceGroupOutput`][vllm.sequence.SequenceGroupOutput]
36+
for a single scheduler step
3337
"""
3438
prompt_logprobs = output.prompt_logprobs
3539

@@ -103,8 +107,11 @@ def process_prompt_logprob(self, seq_group: SequenceGroup,
103107
scheduled computation.
104108
105109
Args:
106-
seq_group: the output is associated with this {class}`SequenceGroup`
107-
outputs: the {class}`SequenceGroupOutput` for a single scheduler step
110+
seq_group: the output is associated with this
111+
[`SequenceGroup`][vllm.sequence.SequenceGroup]
112+
outputs: the
113+
[`SequenceGroupOutput`][vllm.sequence.SequenceGroupOutput]
114+
for a single scheduler step
108115
"""
109116
assert len(outputs) == 1, "Single step should only have 1 output."
110117
output = outputs[0]

0 commit comments

Comments
 (0)