File tree Expand file tree Collapse file tree 3 files changed +27
-26
lines changed Expand file tree Collapse file tree 3 files changed +27
-26
lines changed Original file line number Diff line number Diff line change @@ -38,7 +38,7 @@ runtime_common = [
3838 " pyzmq>=25.1.2" ,
3939 " soundfile==0.13.1" ,
4040 " torchao>=0.7.0" ,
41- " transformers==4.51.0 " ,
41+ " transformers==4.51.1 " ,
4242 " uvicorn" ,
4343 " uvloop" ,
4444 " compressed-tensors" ,
@@ -50,6 +50,7 @@ srt = [
5050 " sgl-kernel==0.0.8" ,
5151 " flashinfer_python==0.2.3" ,
5252 " torch==2.5.1" ,
53+ " torchvision==0.20.1" ,
5354 " cuda-python" ,
5455 " outlines>=0.0.44,<=0.1.11" ,
5556 " partial_json_parser" ,
Original file line number Diff line number Diff line change @@ -840,7 +840,6 @@ def handle_generate_request(
840840 bootstrap_room = recv_req .bootstrap_room ,
841841 )
842842 req .tokenizer = self .tokenizer
843- req .queue_time_start = time .time ()
844843
845844 if (
846845 recv_req .session_params is not None
@@ -855,7 +854,6 @@ def handle_generate_request(
855854 # Create a new request from a previous session
856855 session = self .sessions [recv_req .session_params .id ]
857856 req = session .create_req (recv_req , self .tokenizer )
858- req .queue_time_start = time .time ()
859857 if isinstance (req .finished_reason , FINISH_ABORT ):
860858 self ._add_request_to_queue (req )
861859 return
@@ -958,6 +956,7 @@ def _add_request_to_queue(self, req: Req):
958956 self .disagg_decode_prealloc_queue .add (req )
959957
960958 else :
959+ req .queue_time_start = time .time ()
961960 self .waiting_queue .append (req )
962961
963962 def _extend_requests_to_queue (self , reqs : List [Req ], is_retracted : bool = False ):
Original file line number Diff line number Diff line change @@ -682,29 +682,30 @@ def test_single_image_chat_completion(self):
682682 pass
683683
684684
685- class TestLlama4Server (TestOpenAIVisionServer ):
686- @classmethod
687- def setUpClass (cls ):
688- cls .model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
689- cls .base_url = DEFAULT_URL_FOR_TEST
690- cls .api_key = "sk-123456"
691- cls .process = popen_launch_server (
692- cls .model ,
693- cls .base_url ,
694- timeout = DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH ,
695- other_args = [
696- "--chat-template" ,
697- "llama-4" ,
698- "--mem-fraction-static" ,
699- "0.8" ,
700- "--tp-size=8" ,
701- "--context-length=8192" ,
702- ],
703- )
704- cls .base_url += "/v1"
705-
706- def test_video_chat_completion (self ):
707- pass
685+ ## Skip for ci test
686+ # class TestLlama4Server(TestOpenAIVisionServer):
687+ # @classmethod
688+ # def setUpClass(cls):
689+ # cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
690+ # cls.base_url = DEFAULT_URL_FOR_TEST
691+ # cls.api_key = "sk-123456"
692+ # cls.process = popen_launch_server(
693+ # cls.model,
694+ # cls.base_url,
695+ # timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
696+ # other_args=[
697+ # "--chat-template",
698+ # "llama-4",
699+ # "--mem-fraction-static",
700+ # "0.8",
701+ # "--tp-size=8",
702+ # "--context-length=8192",
703+ # ],
704+ # )
705+ # cls.base_url += "/v1"
706+
707+ # def test_video_chat_completion(self):
708+ # pass
708709
709710
710711class TestGemma3itServer (TestOpenAIVisionServer ):
You can’t perform that action at this time.
0 commit comments