@@ -105,7 +105,7 @@ async def send_request(
105105 best_of : int ,
106106 use_beam_search : bool ,
107107) -> None :
108- request_start_time = time .time ()
108+ request_start_time = time .perf_counter ()
109109
110110 headers = {"User-Agent" : "Benchmark Client" }
111111 if backend == "vllm" :
@@ -148,7 +148,7 @@ async def send_request(
148148 if "error" not in output :
149149 break
150150
151- request_end_time = time .time ()
151+ request_end_time = time .perf_counter ()
152152 request_latency = request_end_time - request_start_time
153153 REQUEST_LATENCY .append ((prompt_len , output_len , request_latency ))
154154
@@ -180,10 +180,10 @@ def main(args: argparse.Namespace):
180180 tokenizer = get_tokenizer (args .tokenizer , trust_remote_code = args .trust_remote_code )
181181 input_requests = sample_requests (args .dataset , args .num_prompts , tokenizer )
182182
183- benchmark_start_time = time .time ()
183+ benchmark_start_time = time .perf_counter ()
184184 asyncio .run (benchmark (args .backend , api_url , input_requests , args .best_of ,
185185 args .use_beam_search , args .request_rate ))
186- benchmark_end_time = time .time ()
186+ benchmark_end_time = time .perf_counter ()
187187 benchmark_time = benchmark_end_time - benchmark_start_time
188188 print (f"Total time: { benchmark_time :.2f} s" )
189189 print (f"Throughput: { args .num_prompts / benchmark_time :.2f} requests/s" )
0 commit comments