Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions router/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,18 @@ pub(crate) struct ChatRequest {

#[schema(nullable = true, example = 42)]
pub seed: Option<u64>,

/// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while
/// lower values like 0.2 will make it more focused and deterministic.
///
/// We generally recommend altering this or `top_p` but not both.
#[serde(default)]
pub temperature: Option<f32>,

/// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the
/// tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
#[serde(default)]
pub top_p: Option<f32>,
}

#[derive(Clone, Serialize, Deserialize)]
Expand Down
6 changes: 3 additions & 3 deletions router/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -592,10 +592,10 @@ async fn chat_completions(
inputs: inputs.to_string(),
parameters: GenerateParameters {
best_of: None,
temperature: None,
temperature: req.temperature,
repetition_penalty,
top_k: None,
top_p: None,
top_p: req.top_p,
typical_p: None,
do_sample: true,
max_new_tokens,
Expand All @@ -604,7 +604,7 @@ async fn chat_completions(
truncate: None,
watermark: false,
details: true,
decoder_input_details: true,
decoder_input_details: !stream,
seed,
top_n_tokens: None,
},
Expand Down