Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions docker/llm/serving/xpu/docker/vllm_online_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,12 +435,14 @@ def benchmark(llm_urls, model, prompt, num_requests, max_concurrent_requests, ma
MODEL = "/llm/models/" + model_name
MAX_TOKENS = output_length # 修改 MAX_TOKENS 为 output_length

if "Qwen" not in MODEL and "chatglm" not in MODEL:
# print("using Llama PROMPT")
PROMPT = ENGLISH_PROMPT
else:
# print("using Qwen/chatglm PROMPT")
PROMPT = CHINESE_PROMPT
# if "Qwen" not in MODEL and "chatglm" not in MODEL:
# print("using Llama PROMPT")
# PROMPT = ENGLISH_PROMPT
# else:
# print("using Qwen/chatglm PROMPT")
# PROMPT = CHINESE_PROMPT

PROMPT = ENGLISH_PROMPT

# 加载模型的 tokenizer
from transformers import AutoTokenizer
Expand Down