diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 4b28da33a32..1f358f12061 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -662,7 +662,7 @@ jobs: move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16.yaml config.yaml set PYTHONIOENCODING=utf-8 python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1 - if %ERRORLEVEL% neq 0 if %ERRORLEVEL% neq -1073740791 (exit /b 1) + if %ERRORLEVEL% neq 0 (exit /b 1) python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1 if %ERRORLEVEL% neq 0 (exit /b 1) diff --git a/python/llm/test/benchmark/igpu-perf/1024-128.yaml b/python/llm/test/benchmark/igpu-perf/1024-128.yaml index d599a82383f..759a7566237 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128.yaml @@ -1,23 +1,23 @@ repo_id: - # - 'THUDM/chatglm3-6b' - # - 'THUDM/glm-4-9b-chat' - # - 'baichuan-inc/Baichuan2-7B-Chat' - # - 'baichuan-inc/Baichuan2-13B-Chat' - # - 'meta-llama/Llama-2-7b-chat-hf' - # - 'meta-llama/Llama-2-13b-chat-hf' - # - 'meta-llama/Meta-Llama-3-8B-Instruct' - # - 'mistralai/Mistral-7B-Instruct-v0.2' - # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - # - 'RWKV/v5-Eagle-7B-HF' - # - '01-ai/Yi-6B-Chat' - # - 'openbmb/MiniCPM-1B-sft-bf16' - # - 'openbmb/MiniCPM-2B-sft-bf16' - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' - # - 'microsoft/phi-3-vision-128k-instruct' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - 'RWKV/v5-Eagle-7B-HF' + - '01-ai/Yi-6B-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' + - 'microsoft/phi-3-vision-128k-instruct' - 'openbmb/MiniCPM-V-2_6' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_443.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_443.yaml index c7aa64e39f0..74d0cdc5e4b 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_443.yaml @@ -1,6 +1,6 @@ repo_id: - # - 'google/gemma-2-2b-it' - # - 'google/gemma-2-9b-it' + - 'google/gemma-2-2b-it' + - 'google/gemma-2-9b-it' - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml index e3d667110a0..f66172d9a39 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml @@ -1,22 +1,22 @@ repo_id: - # - 'THUDM/chatglm3-6b' - # - 'THUDM/glm-4-9b-chat' - # - 'baichuan-inc/Baichuan2-7B-Chat' - # - 'baichuan-inc/Baichuan2-13B-Chat' - # - 'meta-llama/Llama-2-7b-chat-hf' - # - 'meta-llama/Llama-2-13b-chat-hf' - # - 'meta-llama/Meta-Llama-3-8B-Instruct' - # - 'mistralai/Mistral-7B-Instruct-v0.2' - # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - # - '01-ai/Yi-6B-Chat' - # - 'openbmb/MiniCPM-1B-sft-bf16' - # - 'openbmb/MiniCPM-2B-sft-bf16' - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' - # - 'microsoft/phi-3-vision-128k-instruct' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' + - 'microsoft/phi-3-vision-128k-instruct' - 'openbmb/MiniCPM-V-2_6' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml index 9f9dd2a2f73..ec529fc8a07 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml @@ -1,6 +1,6 @@ repo_id: - # - 'google/gemma-2-2b-it' - # - 'google/gemma-2-9b-it' + - 'google/gemma-2-2b-it' + - 'google/gemma-2-9b-it' - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml index 52862ffcc63..76c35d4dde7 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml @@ -1,21 +1,21 @@ repo_id: - # - 'THUDM/chatglm3-6b' - # - 'THUDM/glm-4-9b-chat' - # - 'baichuan-inc/Baichuan2-7B-Chat' - # - 'baichuan-inc/Baichuan2-13B-Chat' - # - 'meta-llama/Llama-2-7b-chat-hf' - # - 'meta-llama/Llama-2-13b-chat-hf' - # - 'meta-llama/Meta-Llama-3-8B-Instruct' - # - 'mistralai/Mistral-7B-Instruct-v0.2' - # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - # - '01-ai/Yi-6B-Chat' - # - 'openbmb/MiniCPM-1B-sft-bf16' - # - 'openbmb/MiniCPM-2B-sft-bf16' - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' - 'microsoft/phi-3-vision-128k-instruct' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml index e9df748f127..031085730e6 100644 --- a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml @@ -1,6 +1,6 @@ repo_id: - # - 'google/gemma-2-2b-it' - # - 'google/gemma-2-9b-it' + - 'google/gemma-2-2b-it' + - 'google/gemma-2-9b-it' - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml index 119f0269c94..bf5fc1e978b 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml @@ -1,22 +1,22 @@ repo_id: - # - 'THUDM/chatglm3-6b' - # - 'THUDM/glm-4-9b-chat' - # - 'baichuan-inc/Baichuan2-7B-Chat' - # - 'baichuan-inc/Baichuan2-13B-Chat' - # - 'meta-llama/Llama-2-7b-chat-hf' - # - 'meta-llama/Llama-2-13b-chat-hf' - # - 'meta-llama/Meta-Llama-3-8B-Instruct' - # - 'mistralai/Mistral-7B-Instruct-v0.2' - # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - # - '01-ai/Yi-6B-Chat' - # - 'openbmb/MiniCPM-1B-sft-bf16' - # - 'openbmb/MiniCPM-2B-sft-bf16' - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' - # - 'microsoft/phi-3-vision-128k-instruct' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' + - 'microsoft/phi-3-vision-128k-instruct' - 'openbmb/MiniCPM-V-2_6' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml index 04834213904..975d0a0232c 100644 --- a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml @@ -1,6 +1,6 @@ repo_id: - # - 'google/gemma-2-2b-it' - # - 'google/gemma-2-9b-it' + - 'google/gemma-2-2b-it' + - 'google/gemma-2-9b-it' - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml index 50cc69678fd..60202594cba 100644 --- a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml @@ -1,21 +1,21 @@ repo_id: - # - 'THUDM/chatglm3-6b' - # - 'THUDM/glm-4-9b-chat' - # - 'baichuan-inc/Baichuan2-7B-Chat' - # - 'meta-llama/Llama-2-7b-chat-hf' - # - 'meta-llama/Llama-2-13b-chat-hf' - # - 'meta-llama/Meta-Llama-3-8B-Instruct' - # - 'mistralai/Mistral-7B-Instruct-v0.2' - # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - # - '01-ai/Yi-6B-Chat' - # - 'openbmb/MiniCPM-1B-sft-bf16' - # - 'openbmb/MiniCPM-2B-sft-bf16' - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' - # - 'microsoft/phi-3-vision-128k-instruct' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' + - 'microsoft/phi-3-vision-128k-instruct' - 'openbmb/MiniCPM-V-2_6' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml index 1c44882fd1a..9127111b066 100644 --- a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml @@ -1,5 +1,5 @@ repo_id: - # - 'google/gemma-2-2b-it' + - 'google/gemma-2-2b-it' # - 'google/gemma-2-9b-it' - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' diff --git a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml index 063ff9a6ddf..e70178744a3 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml @@ -1,22 +1,22 @@ repo_id: - # - 'THUDM/chatglm3-6b' - # - 'THUDM/glm-4-9b-chat' - # - 'baichuan-inc/Baichuan2-7B-Chat' - # - 'baichuan-inc/Baichuan2-13B-Chat' - # - 'meta-llama/Llama-2-7b-chat-hf' - # - 'meta-llama/Llama-2-13b-chat-hf' - # - 'meta-llama/Meta-Llama-3-8B-Instruct' - # - 'mistralai/Mistral-7B-Instruct-v0.2' - # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - # - '01-ai/Yi-6B-Chat' - # - 'openbmb/MiniCPM-1B-sft-bf16' - # - 'openbmb/MiniCPM-2B-sft-bf16' - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' - # - 'microsoft/phi-3-vision-128k-instruct' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'baichuan-inc/Baichuan2-13B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' + - 'microsoft/phi-3-vision-128k-instruct' - 'openbmb/MiniCPM-V-2_6' local_model_hub: 'path to your local model hub' warm_up: 3 diff --git a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml index d4111a367bd..45a7809fb11 100644 --- a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml @@ -1,6 +1,6 @@ repo_id: - # - 'google/gemma-2-2b-it' - # - 'google/gemma-2-9b-it' + - 'google/gemma-2-2b-it' + - 'google/gemma-2-9b-it' - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub' warm_up: 3 diff --git a/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml index 97549150c5f..514037a7380 100644 --- a/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml +++ b/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml @@ -1,21 +1,21 @@ repo_id: - # - 'THUDM/chatglm3-6b' - # - 'THUDM/glm-4-9b-chat' - # - 'baichuan-inc/Baichuan2-7B-Chat' - # - 'meta-llama/Llama-2-7b-chat-hf' - # - 'meta-llama/Llama-2-13b-chat-hf' - # - 'meta-llama/Meta-Llama-3-8B-Instruct' - # - 'mistralai/Mistral-7B-Instruct-v0.2' - # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' - # - '01-ai/Yi-6B-Chat' - # - 'openbmb/MiniCPM-1B-sft-bf16' - # - 'openbmb/MiniCPM-2B-sft-bf16' - # - 'Qwen/Qwen1.5-7B-Chat' - # - 'Qwen/Qwen2-1.5B-Instruct' - # - 'Qwen/Qwen2-7B-Instruct' - # - 'microsoft/Phi-3-mini-4k-instruct' - # - 'microsoft/Phi-3-mini-128k-instruct' - # - 'microsoft/phi-3-vision-128k-instruct' + - 'THUDM/chatglm3-6b' + - 'THUDM/glm-4-9b-chat' + - 'baichuan-inc/Baichuan2-7B-Chat' + - 'meta-llama/Llama-2-7b-chat-hf' + - 'meta-llama/Llama-2-13b-chat-hf' + - 'meta-llama/Meta-Llama-3-8B-Instruct' + - 'mistralai/Mistral-7B-Instruct-v0.2' + - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5' + - '01-ai/Yi-6B-Chat' + - 'openbmb/MiniCPM-1B-sft-bf16' + - 'openbmb/MiniCPM-2B-sft-bf16' + - 'Qwen/Qwen1.5-7B-Chat' + - 'Qwen/Qwen2-1.5B-Instruct' + - 'Qwen/Qwen2-7B-Instruct' + - 'microsoft/Phi-3-mini-4k-instruct' + - 'microsoft/Phi-3-mini-128k-instruct' + - 'microsoft/phi-3-vision-128k-instruct' - 'openbmb/MiniCPM-V-2_6' local_model_hub: 'path to your local model hub' warm_up: 1 diff --git a/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml index b8338733178..8579e3c95a2 100644 --- a/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml +++ b/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml @@ -1,5 +1,5 @@ repo_id: - # - 'google/gemma-2-2b-it' + - 'google/gemma-2-2b-it' # - 'google/gemma-2-9b-it' - 'meta-llama/Llama-3.1-8B-Instruct' local_model_hub: 'path to your local model hub'