From a4f2b86848bedf704f7e29cd74cabc3325fe6a89 Mon Sep 17 00:00:00 2001 From: jinbridge <2635480475@qq.com> Date: Tue, 15 Oct 2024 16:09:07 +0800 Subject: [PATCH 1/2] Revert "Temporarily change ERRORLEVEL check" This reverts commit 909dbbc930ab4283737161a55bb32006e6ca1991. --- .github/workflows/llm_performance_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 4b28da33a32..1f358f12061 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -662,7 +662,7 @@ jobs: move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16.yaml config.yaml set PYTHONIOENCODING=utf-8 python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 if %ERRORLEVEL% neq -1073740791 (exit /b 1) + if %ERRORLEVEL% neq 0 (exit /b 1) python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1 if %ERRORLEVEL% neq 0 (exit /b 1) From 7e4829b3b152ab7cf434f10bbdc85fbbfa6d5c6a Mon Sep 17 00:00:00 2001 From: jinbridge <2635480475@qq.com> Date: Tue, 15 Oct 2024 16:09:44 +0800 Subject: [PATCH 2/2] Revert "Temporarily disable model for test" This reverts commit 95322dc3c6429aa836f21bda0b5ba8d9b48592f8. --- .../test/benchmark/igpu-perf/1024-128.yaml | 38 +++++++++---------- .../benchmark/igpu-perf/1024-128_443.yaml | 4 +- .../igpu-perf/1024-128_int4_fp16.yaml | 36 +++++++++--------- .../igpu-perf/1024-128_int4_fp16_443.yaml | 4 +- .../1024-128_int4_fp16_loadlowbit.yaml | 34 ++++++++--------- .../1024-128_int4_fp16_loadlowbit_443.yaml | 4 +- .../igpu-perf/2048-256_int4_fp16.yaml | 36 +++++++++--------- .../igpu-perf/2048-256_int4_fp16_443.yaml | 4 +- .../igpu-perf/3072-384_int4_fp16.yaml | 34 ++++++++--------- .../igpu-perf/3072-384_int4_fp16_443.yaml | 2 +- .../benchmark/igpu-perf/32-32_int4_fp16.yaml | 36 +++++++++--------- .../igpu-perf/32-32_int4_fp16_443.yaml | 4 +- .../igpu-perf/4096-512_int4_fp16.yaml | 34 ++++++++--------- .../igpu-perf/4096-512_int4_fp16_443.yaml | 2 +- 14 files changed, 136 insertions(+), 136 deletions(-) diff --git a/python/llm/test/benchmark/igpu-perf/1024-128.yaml b/python/llm/test/benchmark/igpu-perf/1024-128.yaml index d599a82383f..759a7566237 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128.yaml @@ -1,23 +1,23 @@ repo_id: - # - 'THUDM/chatglm3-6b' - # - 'THUDM/glm-4-9b-chat' - # - 'baichuan-inc/Baichuan2-7B-Chat' - # - 'baichuan-inc/Baichuan2-13B-Chat' - # - 'meta-llama/Llama-2-7b-chat-hf' - # - 'meta-llama/Llama-2-13b-chat-hf' - # - 'meta-llama/Meta-Llama-3-8B-Instruct' - # - 'mistralai/Mistral-7B-Instruct-v0.2' - # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - # - 'RWKV/v5-Eagle-7B-HF' - # - '01-ai/Yi-6B-Chat' - # - 'openbmb/MiniCPM-1B-sft-bf16' - # - 'openbmb/MiniCPM-2B-sft-bf16' - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' - # - 'microsoft/phi-3-vision-128k-instruct' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - 'RWKV/v5-Eagle-7B-HF' + - '01-ai/Yi-6B-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' + - 'microsoft/phi-3-vision-128k-instruct' - 'openbmb/MiniCPM-V-2_6' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_443.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_443.yaml index c7aa64e39f0..74d0cdc5e4b 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_443.yaml @@ -1,6 +1,6 @@ repo_id: - # - 'google/gemma-2-2b-it' - # - 'google/gemma-2-9b-it' + - 'google/gemma-2-2b-it' + - 'google/gemma-2-9b-it' - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml index e3d667110a0..f66172d9a39 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml @@ -1,22 +1,22 @@ repo_id: - # - 'THUDM/chatglm3-6b' - # - 'THUDM/glm-4-9b-chat' - # - 'baichuan-inc/Baichuan2-7B-Chat' - # - 'baichuan-inc/Baichuan2-13B-Chat' - # - 'meta-llama/Llama-2-7b-chat-hf' - # - 'meta-llama/Llama-2-13b-chat-hf' - # - 'meta-llama/Meta-Llama-3-8B-Instruct' - # - 'mistralai/Mistral-7B-Instruct-v0.2' - # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - # - '01-ai/Yi-6B-Chat' - # - 'openbmb/MiniCPM-1B-sft-bf16' - # - 'openbmb/MiniCPM-2B-sft-bf16' - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' - # - 'microsoft/phi-3-vision-128k-instruct' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' + - 'microsoft/phi-3-vision-128k-instruct' - 'openbmb/MiniCPM-V-2_6' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml index 9f9dd2a2f73..ec529fc8a07 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml @@ -1,6 +1,6 @@ repo_id: - # - 'google/gemma-2-2b-it' - # - 'google/gemma-2-9b-it' + - 'google/gemma-2-2b-it' + - 'google/gemma-2-9b-it' - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml index 52862ffcc63..76c35d4dde7 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml @@ -1,21 +1,21 @@ repo_id: - # - 'THUDM/chatglm3-6b' - # - 'THUDM/glm-4-9b-chat' - # - 'baichuan-inc/Baichuan2-7B-Chat' - # - 'baichuan-inc/Baichuan2-13B-Chat' - # - 'meta-llama/Llama-2-7b-chat-hf' - # - 'meta-llama/Llama-2-13b-chat-hf' - # - 'meta-llama/Meta-Llama-3-8B-Instruct' - # - 'mistralai/Mistral-7B-Instruct-v0.2' - # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - # - '01-ai/Yi-6B-Chat' - # - 'openbmb/MiniCPM-1B-sft-bf16' - # - 'openbmb/MiniCPM-2B-sft-bf16' - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' - 'microsoft/phi-3-vision-128k-instruct' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml index e9df748f127..031085730e6 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml @@ -1,6 +1,6 @@ repo_id: - # - 'google/gemma-2-2b-it' - # - 'google/gemma-2-9b-it' + - 'google/gemma-2-2b-it' + - 'google/gemma-2-9b-it' - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml index 119f0269c94..bf5fc1e978b 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml @@ -1,22 +1,22 @@ repo_id: - # - 'THUDM/chatglm3-6b' - # - 'THUDM/glm-4-9b-chat' - # - 'baichuan-inc/Baichuan2-7B-Chat' - # - 'baichuan-inc/Baichuan2-13B-Chat' - # - 'meta-llama/Llama-2-7b-chat-hf' - # - 'meta-llama/Llama-2-13b-chat-hf' - # - 'meta-llama/Meta-Llama-3-8B-Instruct' - # - 'mistralai/Mistral-7B-Instruct-v0.2' - # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - # - '01-ai/Yi-6B-Chat' - # - 'openbmb/MiniCPM-1B-sft-bf16' - # - 'openbmb/MiniCPM-2B-sft-bf16' - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' - # - 'microsoft/phi-3-vision-128k-instruct' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' + - 'microsoft/phi-3-vision-128k-instruct' - 'openbmb/MiniCPM-V-2_6' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml index 04834213904..975d0a0232c 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml @@ -1,6 +1,6 @@ repo_id: - # - 'google/gemma-2-2b-it' - # - 'google/gemma-2-9b-it' + - 'google/gemma-2-2b-it' + - 'google/gemma-2-9b-it' - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml index 50cc69678fd..60202594cba 100644 --- a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml @@ -1,21 +1,21 @@ repo_id: - # - 'THUDM/chatglm3-6b' - # - 'THUDM/glm-4-9b-chat' - # - 'baichuan-inc/Baichuan2-7B-Chat' - # - 'meta-llama/Llama-2-7b-chat-hf' - # - 'meta-llama/Llama-2-13b-chat-hf' - # - 'meta-llama/Meta-Llama-3-8B-Instruct' - # - 'mistralai/Mistral-7B-Instruct-v0.2' - # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - # - '01-ai/Yi-6B-Chat' - # - 'openbmb/MiniCPM-1B-sft-bf16' - # - 'openbmb/MiniCPM-2B-sft-bf16' - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' - # - 'microsoft/phi-3-vision-128k-instruct' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' + - 'microsoft/phi-3-vision-128k-instruct' - 'openbmb/MiniCPM-V-2_6' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml index 1c44882fd1a..9127111b066 100644 --- a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml @@ -1,5 +1,5 @@ repo_id: - # - 'google/gemma-2-2b-it' + - 'google/gemma-2-2b-it' # - 'google/gemma-2-9b-it' - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' diff --git a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml index 063ff9a6ddf..e70178744a3 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml @@ -1,22 +1,22 @@ repo_id: - # - 'THUDM/chatglm3-6b' - # - 'THUDM/glm-4-9b-chat' - # - 'baichuan-inc/Baichuan2-7B-Chat' - # - 'baichuan-inc/Baichuan2-13B-Chat' - # - 'meta-llama/Llama-2-7b-chat-hf' - # - 'meta-llama/Llama-2-13b-chat-hf' - # - 'meta-llama/Meta-Llama-3-8B-Instruct' - # - 'mistralai/Mistral-7B-Instruct-v0.2' - # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - # - '01-ai/Yi-6B-Chat' - # - 'openbmb/MiniCPM-1B-sft-bf16' - # - 'openbmb/MiniCPM-2B-sft-bf16' - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' - # - 'microsoft/phi-3-vision-128k-instruct' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' + - 'microsoft/phi-3-vision-128k-instruct' - 'openbmb/MiniCPM-V-2_6' local_model_hub: 'path to your local model hub' warm_up: 3 diff --git a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml index d4111a367bd..45a7809fb11 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml @@ -1,6 +1,6 @@ repo_id: - # - 'google/gemma-2-2b-it' - # - 'google/gemma-2-9b-it' + - 'google/gemma-2-2b-it' + - 'google/gemma-2-9b-it' - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 3 diff --git a/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml index 97549150c5f..514037a7380 100644 --- a/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml @@ -1,21 +1,21 @@ repo_id: - # - 'THUDM/chatglm3-6b' - # - 'THUDM/glm-4-9b-chat' - # - 'baichuan-inc/Baichuan2-7B-Chat' - # - 'meta-llama/Llama-2-7b-chat-hf' - # - 'meta-llama/Llama-2-13b-chat-hf' - # - 'meta-llama/Meta-Llama-3-8B-Instruct' - # - 'mistralai/Mistral-7B-Instruct-v0.2' - # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - # - '01-ai/Yi-6B-Chat' - # - 'openbmb/MiniCPM-1B-sft-bf16' - # - 'openbmb/MiniCPM-2B-sft-bf16' - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' - # - 'microsoft/phi-3-vision-128k-instruct' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' + - 'microsoft/phi-3-vision-128k-instruct' - 'openbmb/MiniCPM-V-2_6' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml index b8338733178..8579e3c95a2 100644 --- a/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml @@ -1,5 +1,5 @@ repo_id: - # - 'google/gemma-2-2b-it' + - 'google/gemma-2-2b-it' # - 'google/gemma-2-9b-it' - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub'