Skip to content

Commit 41e1bee

Browse files
ispobockthyecust
authored andcommitted
Update deps for mllama4 (sgl-project#5215)
1 parent a85f762 commit 41e1bee

File tree

3 files changed

+27
-26
lines changed

3 files changed

+27
-26
lines changed

python/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ runtime_common = [
3838
"pyzmq>=25.1.2",
3939
"soundfile==0.13.1",
4040
"torchao>=0.7.0",
41-
"transformers==4.51.0",
41+
"transformers==4.51.1",
4242
"uvicorn",
4343
"uvloop",
4444
"compressed-tensors",
@@ -50,6 +50,7 @@ srt = [
5050
"sgl-kernel==0.0.8",
5151
"flashinfer_python==0.2.3",
5252
"torch==2.5.1",
53+
"torchvision==0.20.1",
5354
"cuda-python",
5455
"outlines>=0.0.44,<=0.1.11",
5556
"partial_json_parser",

python/sglang/srt/managers/scheduler.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -840,7 +840,6 @@ def handle_generate_request(
840840
bootstrap_room=recv_req.bootstrap_room,
841841
)
842842
req.tokenizer = self.tokenizer
843-
req.queue_time_start = time.time()
844843

845844
if (
846845
recv_req.session_params is not None
@@ -855,7 +854,6 @@ def handle_generate_request(
855854
# Create a new request from a previous session
856855
session = self.sessions[recv_req.session_params.id]
857856
req = session.create_req(recv_req, self.tokenizer)
858-
req.queue_time_start = time.time()
859857
if isinstance(req.finished_reason, FINISH_ABORT):
860858
self._add_request_to_queue(req)
861859
return
@@ -958,6 +956,7 @@ def _add_request_to_queue(self, req: Req):
958956
self.disagg_decode_prealloc_queue.add(req)
959957

960958
else:
959+
req.queue_time_start = time.time()
961960
self.waiting_queue.append(req)
962961

963962
def _extend_requests_to_queue(self, reqs: List[Req], is_retracted: bool = False):

test/srt/test_vision_openai_server.py

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -682,29 +682,30 @@ def test_single_image_chat_completion(self):
682682
pass
683683

684684

685-
class TestLlama4Server(TestOpenAIVisionServer):
686-
@classmethod
687-
def setUpClass(cls):
688-
cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
689-
cls.base_url = DEFAULT_URL_FOR_TEST
690-
cls.api_key = "sk-123456"
691-
cls.process = popen_launch_server(
692-
cls.model,
693-
cls.base_url,
694-
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
695-
other_args=[
696-
"--chat-template",
697-
"llama-4",
698-
"--mem-fraction-static",
699-
"0.8",
700-
"--tp-size=8",
701-
"--context-length=8192",
702-
],
703-
)
704-
cls.base_url += "/v1"
705-
706-
def test_video_chat_completion(self):
707-
pass
685+
## Skip for ci test
686+
# class TestLlama4Server(TestOpenAIVisionServer):
687+
# @classmethod
688+
# def setUpClass(cls):
689+
# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
690+
# cls.base_url = DEFAULT_URL_FOR_TEST
691+
# cls.api_key = "sk-123456"
692+
# cls.process = popen_launch_server(
693+
# cls.model,
694+
# cls.base_url,
695+
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
696+
# other_args=[
697+
# "--chat-template",
698+
# "llama-4",
699+
# "--mem-fraction-static",
700+
# "0.8",
701+
# "--tp-size=8",
702+
# "--context-length=8192",
703+
# ],
704+
# )
705+
# cls.base_url += "/v1"
706+
707+
# def test_video_chat_completion(self):
708+
# pass
708709

709710

710711
class TestGemma3itServer(TestOpenAIVisionServer):

0 commit comments

Comments
 (0)