File tree Expand file tree Collapse file tree 1 file changed +21
-3
lines changed Expand file tree Collapse file tree 1 file changed +21
-3
lines changed Original file line number Diff line number Diff line change 33NOTE: start a supported chat completion model server with `vllm serve`, e.g.
44 vllm serve meta-llama/Llama-2-7b-chat-hf
55"""
6+
7+ import argparse
8+
69from openai import OpenAI
710
811# Modify OpenAI's API key and API base to use vLLM's API server.
2427}]
2528
2629
27- def main ():
30+ def parse_args ():
31+ parser = argparse .ArgumentParser (description = "Client for vLLM API server" )
32+ parser .add_argument ("--stream" ,
33+ action = "store_true" ,
34+ help = "Enable streaming response" )
35+ return parser .parse_args ()
36+
37+
38+ def main (args ):
2839 client = OpenAI (
2940 # defaults to os.environ.get("OPENAI_API_KEY")
3041 api_key = openai_api_key ,
@@ -34,16 +45,23 @@ def main():
3445 models = client .models .list ()
3546 model = models .data [0 ].id
3647
48+ # Chat Completion API
3749 chat_completion = client .chat .completions .create (
3850 messages = messages ,
3951 model = model ,
52+ stream = args .stream ,
4053 )
4154
4255 print ("-" * 50 )
4356 print ("Chat completion results:" )
44- print (chat_completion )
57+ if args .stream :
58+ for c in chat_completion :
59+ print (c )
60+ else :
61+ print (chat_completion )
4562 print ("-" * 50 )
4663
4764
4865if __name__ == "__main__" :
49- main ()
66+ args = parse_args ()
67+ main (args )
You can’t perform that action at this time.
0 commit comments