intel · Oscilloscope98 · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024
diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml
@@ -662,7 +662,7 @@ jobs:
           move ..\..\..\test\benchmark\igpu-perf\32-32_int4_fp16.yaml config.yaml
           set PYTHONIOENCODING=utf-8
           python run.py >> %CSV_SAVE_PATH%\32-32_int4_fp16\log\%LOG_FILE% 2>&1
-          if %ERRORLEVEL% neq 0 if %ERRORLEVEL% neq -1073740791 (exit /b 1)
+          if %ERRORLEVEL% neq 0 (exit /b 1)
           python ..\..\..\test\benchmark\igpu-perf\check_csv_results.py --yaml-file config.yaml --suffix test1
           if %ERRORLEVEL% neq 0 (exit /b 1)
 

diff --git a/python/llm/test/benchmark/igpu-perf/1024-128.yaml b/python/llm/test/benchmark/igpu-perf/1024-128.yaml
@@ -1,23 +1,23 @@
 repo_id:
-  # - 'THUDM/chatglm3-6b'
-  # - 'THUDM/glm-4-9b-chat'
-  # - 'baichuan-inc/Baichuan2-7B-Chat'
-  # - 'baichuan-inc/Baichuan2-13B-Chat'
-  # - 'meta-llama/Llama-2-7b-chat-hf'
-  # - 'meta-llama/Llama-2-13b-chat-hf'
-  # - 'meta-llama/Meta-Llama-3-8B-Instruct'
-  # - 'mistralai/Mistral-7B-Instruct-v0.2'
-  # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5'
-  # - 'RWKV/v5-Eagle-7B-HF'
-  # - '01-ai/Yi-6B-Chat'
-  # - 'openbmb/MiniCPM-1B-sft-bf16'
-  # - 'openbmb/MiniCPM-2B-sft-bf16'
-  # - 'Qwen/Qwen1.5-7B-Chat'
-  # - 'Qwen/Qwen2-1.5B-Instruct'
-  # - 'Qwen/Qwen2-7B-Instruct'
-  # - 'microsoft/Phi-3-mini-4k-instruct'
-  # - 'microsoft/Phi-3-mini-128k-instruct'
-  # - 'microsoft/phi-3-vision-128k-instruct'
+  - 'THUDM/chatglm3-6b'
+  - 'THUDM/glm-4-9b-chat'
+  - 'baichuan-inc/Baichuan2-7B-Chat'
+  - 'baichuan-inc/Baichuan2-13B-Chat'
+  - 'meta-llama/Llama-2-7b-chat-hf'
+  - 'meta-llama/Llama-2-13b-chat-hf'
+  - 'meta-llama/Meta-Llama-3-8B-Instruct'
+  - 'mistralai/Mistral-7B-Instruct-v0.2'
+  - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5'
+  - 'RWKV/v5-Eagle-7B-HF'
+  - '01-ai/Yi-6B-Chat'
+  - 'openbmb/MiniCPM-1B-sft-bf16'
+  - 'openbmb/MiniCPM-2B-sft-bf16'
+  - 'Qwen/Qwen1.5-7B-Chat'
+  - 'Qwen/Qwen2-1.5B-Instruct'
+  - 'Qwen/Qwen2-7B-Instruct'
+  - 'microsoft/Phi-3-mini-4k-instruct'
+  - 'microsoft/Phi-3-mini-128k-instruct'
+  - 'microsoft/phi-3-vision-128k-instruct'
   - 'openbmb/MiniCPM-V-2_6'
 local_model_hub: 'path to your local model hub'
 warm_up: 1

diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_443.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_443.yaml
@@ -1,6 +1,6 @@
 repo_id:
-  # - 'google/gemma-2-2b-it'
-  # - 'google/gemma-2-9b-it'
+  - 'google/gemma-2-2b-it'
+  - 'google/gemma-2-9b-it'
   - 'meta-llama/Llama-3.1-8B-Instruct'
 local_model_hub: 'path to your local model hub'
 warm_up: 1

diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16.yaml
@@ -1,22 +1,22 @@
 repo_id:
-  # - 'THUDM/chatglm3-6b'
-  # - 'THUDM/glm-4-9b-chat'
-  # - 'baichuan-inc/Baichuan2-7B-Chat'
-  # - 'baichuan-inc/Baichuan2-13B-Chat'
-  # - 'meta-llama/Llama-2-7b-chat-hf'
-  # - 'meta-llama/Llama-2-13b-chat-hf'
-  # - 'meta-llama/Meta-Llama-3-8B-Instruct'
-  # - 'mistralai/Mistral-7B-Instruct-v0.2'
-  # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5'
-  # - '01-ai/Yi-6B-Chat'
-  # - 'openbmb/MiniCPM-1B-sft-bf16'
-  # - 'openbmb/MiniCPM-2B-sft-bf16'
-  # - 'Qwen/Qwen1.5-7B-Chat'
-  # - 'Qwen/Qwen2-1.5B-Instruct'
-  # - 'Qwen/Qwen2-7B-Instruct'
-  # - 'microsoft/Phi-3-mini-4k-instruct'
-  # - 'microsoft/Phi-3-mini-128k-instruct'
-  # - 'microsoft/phi-3-vision-128k-instruct'
+  - 'THUDM/chatglm3-6b'
+  - 'THUDM/glm-4-9b-chat'
+  - 'baichuan-inc/Baichuan2-7B-Chat'
+  - 'baichuan-inc/Baichuan2-13B-Chat'
+  - 'meta-llama/Llama-2-7b-chat-hf'
+  - 'meta-llama/Llama-2-13b-chat-hf'
+  - 'meta-llama/Meta-Llama-3-8B-Instruct'
+  - 'mistralai/Mistral-7B-Instruct-v0.2'
+  - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5'
+  - '01-ai/Yi-6B-Chat'
+  - 'openbmb/MiniCPM-1B-sft-bf16'
+  - 'openbmb/MiniCPM-2B-sft-bf16'
+  - 'Qwen/Qwen1.5-7B-Chat'
+  - 'Qwen/Qwen2-1.5B-Instruct'
+  - 'Qwen/Qwen2-7B-Instruct'
+  - 'microsoft/Phi-3-mini-4k-instruct'
+  - 'microsoft/Phi-3-mini-128k-instruct'
+  - 'microsoft/phi-3-vision-128k-instruct'
   - 'openbmb/MiniCPM-V-2_6'
 local_model_hub: 'path to your local model hub'
 warm_up: 1

diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_443.yaml
@@ -1,6 +1,6 @@
 repo_id:
-  # - 'google/gemma-2-2b-it'
-  # - 'google/gemma-2-9b-it'
+  - 'google/gemma-2-2b-it'
+  - 'google/gemma-2-9b-it'
   - 'meta-llama/Llama-3.1-8B-Instruct'
 local_model_hub: 'path to your local model hub'
 warm_up: 1

diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit.yaml
@@ -1,21 +1,21 @@
 repo_id:
-  # - 'THUDM/chatglm3-6b'
-  # - 'THUDM/glm-4-9b-chat'
-  # - 'baichuan-inc/Baichuan2-7B-Chat'
-  # - 'baichuan-inc/Baichuan2-13B-Chat'
-  # - 'meta-llama/Llama-2-7b-chat-hf'
-  # - 'meta-llama/Llama-2-13b-chat-hf'
-  # - 'meta-llama/Meta-Llama-3-8B-Instruct'
-  # - 'mistralai/Mistral-7B-Instruct-v0.2'
-  # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5'
-  # - '01-ai/Yi-6B-Chat'
-  # - 'openbmb/MiniCPM-1B-sft-bf16'
-  # - 'openbmb/MiniCPM-2B-sft-bf16'
-  # - 'Qwen/Qwen1.5-7B-Chat'
-  # - 'Qwen/Qwen2-1.5B-Instruct'
-  # - 'Qwen/Qwen2-7B-Instruct'
-  # - 'microsoft/Phi-3-mini-4k-instruct'
-  # - 'microsoft/Phi-3-mini-128k-instruct'
+  - 'THUDM/chatglm3-6b'
+  - 'THUDM/glm-4-9b-chat'
+  - 'baichuan-inc/Baichuan2-7B-Chat'
+  - 'baichuan-inc/Baichuan2-13B-Chat'
+  - 'meta-llama/Llama-2-7b-chat-hf'
+  - 'meta-llama/Llama-2-13b-chat-hf'
+  - 'meta-llama/Meta-Llama-3-8B-Instruct'
+  - 'mistralai/Mistral-7B-Instruct-v0.2'
+  - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5'
+  - '01-ai/Yi-6B-Chat'
+  - 'openbmb/MiniCPM-1B-sft-bf16'
+  - 'openbmb/MiniCPM-2B-sft-bf16'
+  - 'Qwen/Qwen1.5-7B-Chat'
+  - 'Qwen/Qwen2-1.5B-Instruct'
+  - 'Qwen/Qwen2-7B-Instruct'
+  - 'microsoft/Phi-3-mini-4k-instruct'
+  - 'microsoft/Phi-3-mini-128k-instruct'
   - 'microsoft/phi-3-vision-128k-instruct'
 local_model_hub: 'path to your local model hub'
 warm_up: 1

diff --git a/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml b/python/llm/test/benchmark/igpu-perf/1024-128_int4_fp16_loadlowbit_443.yaml
@@ -1,6 +1,6 @@
 repo_id:
-  # - 'google/gemma-2-2b-it'
-  # - 'google/gemma-2-9b-it'
+  - 'google/gemma-2-2b-it'
+  - 'google/gemma-2-9b-it'
   - 'meta-llama/Llama-3.1-8B-Instruct'
 local_model_hub: 'path to your local model hub'
 warm_up: 1

diff --git a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16.yaml
@@ -1,22 +1,22 @@
 repo_id:
-  # - 'THUDM/chatglm3-6b'
-  # - 'THUDM/glm-4-9b-chat'
-  # - 'baichuan-inc/Baichuan2-7B-Chat'
-  # - 'baichuan-inc/Baichuan2-13B-Chat'
-  # - 'meta-llama/Llama-2-7b-chat-hf'
-  # - 'meta-llama/Llama-2-13b-chat-hf'
-  # - 'meta-llama/Meta-Llama-3-8B-Instruct'
-  # - 'mistralai/Mistral-7B-Instruct-v0.2'
-  # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5'
-  # - '01-ai/Yi-6B-Chat'
-  # - 'openbmb/MiniCPM-1B-sft-bf16'
-  # - 'openbmb/MiniCPM-2B-sft-bf16'
-  # - 'Qwen/Qwen1.5-7B-Chat'
-  # - 'Qwen/Qwen2-1.5B-Instruct'
-  # - 'Qwen/Qwen2-7B-Instruct'
-  # - 'microsoft/Phi-3-mini-4k-instruct'
-  # - 'microsoft/Phi-3-mini-128k-instruct'
-  # - 'microsoft/phi-3-vision-128k-instruct'
+  - 'THUDM/chatglm3-6b'
+  - 'THUDM/glm-4-9b-chat'
+  - 'baichuan-inc/Baichuan2-7B-Chat'
+  - 'baichuan-inc/Baichuan2-13B-Chat'
+  - 'meta-llama/Llama-2-7b-chat-hf'
+  - 'meta-llama/Llama-2-13b-chat-hf'
+  - 'meta-llama/Meta-Llama-3-8B-Instruct'
+  - 'mistralai/Mistral-7B-Instruct-v0.2'
+  - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5'
+  - '01-ai/Yi-6B-Chat'
+  - 'openbmb/MiniCPM-1B-sft-bf16'
+  - 'openbmb/MiniCPM-2B-sft-bf16'
+  - 'Qwen/Qwen1.5-7B-Chat'
+  - 'Qwen/Qwen2-1.5B-Instruct'
+  - 'Qwen/Qwen2-7B-Instruct'
+  - 'microsoft/Phi-3-mini-4k-instruct'
+  - 'microsoft/Phi-3-mini-128k-instruct'
+  - 'microsoft/phi-3-vision-128k-instruct'
   - 'openbmb/MiniCPM-V-2_6'
 local_model_hub: 'path to your local model hub'
 warm_up: 1

diff --git a/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/2048-256_int4_fp16_443.yaml
@@ -1,6 +1,6 @@
 repo_id:
-  # - 'google/gemma-2-2b-it'
-  # - 'google/gemma-2-9b-it'
+  - 'google/gemma-2-2b-it'
+  - 'google/gemma-2-9b-it'
   - 'meta-llama/Llama-3.1-8B-Instruct'
 local_model_hub: 'path to your local model hub'
 warm_up: 1

diff --git a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16.yaml
@@ -1,21 +1,21 @@
 repo_id:
-  # - 'THUDM/chatglm3-6b'
-  # - 'THUDM/glm-4-9b-chat'
-  # - 'baichuan-inc/Baichuan2-7B-Chat'
-  # - 'meta-llama/Llama-2-7b-chat-hf'
-  # - 'meta-llama/Llama-2-13b-chat-hf'
-  # - 'meta-llama/Meta-Llama-3-8B-Instruct'
-  # - 'mistralai/Mistral-7B-Instruct-v0.2'
-  # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5'
-  # - '01-ai/Yi-6B-Chat'
-  # - 'openbmb/MiniCPM-1B-sft-bf16'
-  # - 'openbmb/MiniCPM-2B-sft-bf16'
-  # - 'Qwen/Qwen1.5-7B-Chat'
-  # - 'Qwen/Qwen2-1.5B-Instruct'
-  # - 'Qwen/Qwen2-7B-Instruct'
-  # - 'microsoft/Phi-3-mini-4k-instruct'
-  # - 'microsoft/Phi-3-mini-128k-instruct'
-  # - 'microsoft/phi-3-vision-128k-instruct'
+  - 'THUDM/chatglm3-6b'
+  - 'THUDM/glm-4-9b-chat'
+  - 'baichuan-inc/Baichuan2-7B-Chat'
+  - 'meta-llama/Llama-2-7b-chat-hf'
+  - 'meta-llama/Llama-2-13b-chat-hf'
+  - 'meta-llama/Meta-Llama-3-8B-Instruct'
+  - 'mistralai/Mistral-7B-Instruct-v0.2'
+  - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5'
+  - '01-ai/Yi-6B-Chat'
+  - 'openbmb/MiniCPM-1B-sft-bf16'
+  - 'openbmb/MiniCPM-2B-sft-bf16'
+  - 'Qwen/Qwen1.5-7B-Chat'
+  - 'Qwen/Qwen2-1.5B-Instruct'
+  - 'Qwen/Qwen2-7B-Instruct'
+  - 'microsoft/Phi-3-mini-4k-instruct'
+  - 'microsoft/Phi-3-mini-128k-instruct'
+  - 'microsoft/phi-3-vision-128k-instruct'
   - 'openbmb/MiniCPM-V-2_6'
 local_model_hub: 'path to your local model hub'
 warm_up: 1

diff --git a/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/3072-384_int4_fp16_443.yaml
@@ -1,5 +1,5 @@
 repo_id:
-  # - 'google/gemma-2-2b-it'
+  - 'google/gemma-2-2b-it'
   # - 'google/gemma-2-9b-it'
   - 'meta-llama/Llama-3.1-8B-Instruct'
 local_model_hub: 'path to your local model hub'

diff --git a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16.yaml
@@ -1,22 +1,22 @@
 repo_id:
-  # - 'THUDM/chatglm3-6b'
-  # - 'THUDM/glm-4-9b-chat'
-  # - 'baichuan-inc/Baichuan2-7B-Chat'
-  # - 'baichuan-inc/Baichuan2-13B-Chat'
-  # - 'meta-llama/Llama-2-7b-chat-hf'
-  # - 'meta-llama/Llama-2-13b-chat-hf'
-  # - 'meta-llama/Meta-Llama-3-8B-Instruct'
-  # - 'mistralai/Mistral-7B-Instruct-v0.2'
-  # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5'
-  # - '01-ai/Yi-6B-Chat'
-  # - 'openbmb/MiniCPM-1B-sft-bf16'
-  # - 'openbmb/MiniCPM-2B-sft-bf16'
-  # - 'Qwen/Qwen1.5-7B-Chat'
-  # - 'Qwen/Qwen2-1.5B-Instruct'
-  # - 'Qwen/Qwen2-7B-Instruct'
-  # - 'microsoft/Phi-3-mini-4k-instruct'
-  # - 'microsoft/Phi-3-mini-128k-instruct'
-  # - 'microsoft/phi-3-vision-128k-instruct'
+  - 'THUDM/chatglm3-6b'
+  - 'THUDM/glm-4-9b-chat'
+  - 'baichuan-inc/Baichuan2-7B-Chat'
+  - 'baichuan-inc/Baichuan2-13B-Chat'
+  - 'meta-llama/Llama-2-7b-chat-hf'
+  - 'meta-llama/Llama-2-13b-chat-hf'
+  - 'meta-llama/Meta-Llama-3-8B-Instruct'
+  - 'mistralai/Mistral-7B-Instruct-v0.2'
+  - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5'
+  - '01-ai/Yi-6B-Chat'
+  - 'openbmb/MiniCPM-1B-sft-bf16'
+  - 'openbmb/MiniCPM-2B-sft-bf16'
+  - 'Qwen/Qwen1.5-7B-Chat'
+  - 'Qwen/Qwen2-1.5B-Instruct'
+  - 'Qwen/Qwen2-7B-Instruct'
+  - 'microsoft/Phi-3-mini-4k-instruct'
+  - 'microsoft/Phi-3-mini-128k-instruct'
+  - 'microsoft/phi-3-vision-128k-instruct'
   - 'openbmb/MiniCPM-V-2_6'
 local_model_hub: 'path to your local model hub'
 warm_up: 3

diff --git a/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/32-32_int4_fp16_443.yaml
@@ -1,6 +1,6 @@
 repo_id:
-  # - 'google/gemma-2-2b-it'
-  # - 'google/gemma-2-9b-it'
+  - 'google/gemma-2-2b-it'
+  - 'google/gemma-2-9b-it'
   - 'meta-llama/Llama-3.1-8B-Instruct'
 local_model_hub: 'path to your local model hub'
 warm_up: 3

diff --git a/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml b/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16.yaml
@@ -1,21 +1,21 @@
 repo_id:
-  # - 'THUDM/chatglm3-6b'
-  # - 'THUDM/glm-4-9b-chat'
-  # - 'baichuan-inc/Baichuan2-7B-Chat'
-  # - 'meta-llama/Llama-2-7b-chat-hf'
-  # - 'meta-llama/Llama-2-13b-chat-hf'
-  # - 'meta-llama/Meta-Llama-3-8B-Instruct'
-  # - 'mistralai/Mistral-7B-Instruct-v0.2'
-  # - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5'
-  # - '01-ai/Yi-6B-Chat'
-  # - 'openbmb/MiniCPM-1B-sft-bf16'
-  # - 'openbmb/MiniCPM-2B-sft-bf16'
-  # - 'Qwen/Qwen1.5-7B-Chat'
-  # - 'Qwen/Qwen2-1.5B-Instruct'
-  # - 'Qwen/Qwen2-7B-Instruct'
-  # - 'microsoft/Phi-3-mini-4k-instruct'
-  # - 'microsoft/Phi-3-mini-128k-instruct'
-  # - 'microsoft/phi-3-vision-128k-instruct'
+  - 'THUDM/chatglm3-6b'
+  - 'THUDM/glm-4-9b-chat'
+  - 'baichuan-inc/Baichuan2-7B-Chat'
+  - 'meta-llama/Llama-2-7b-chat-hf'
+  - 'meta-llama/Llama-2-13b-chat-hf'
+  - 'meta-llama/Meta-Llama-3-8B-Instruct'
+  - 'mistralai/Mistral-7B-Instruct-v0.2'
+  - 'deepseek-ai/deepseek-coder-7b-instruct-v1.5'
+  - '01-ai/Yi-6B-Chat'
+  - 'openbmb/MiniCPM-1B-sft-bf16'
+  - 'openbmb/MiniCPM-2B-sft-bf16'
+  - 'Qwen/Qwen1.5-7B-Chat'
+  - 'Qwen/Qwen2-1.5B-Instruct'
+  - 'Qwen/Qwen2-7B-Instruct'
+  - 'microsoft/Phi-3-mini-4k-instruct'
+  - 'microsoft/Phi-3-mini-128k-instruct'
+  - 'microsoft/phi-3-vision-128k-instruct'
   - 'openbmb/MiniCPM-V-2_6'
 local_model_hub: 'path to your local model hub'
 warm_up: 1

diff --git a/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml b/python/llm/test/benchmark/igpu-perf/4096-512_int4_fp16_443.yaml
@@ -1,5 +1,5 @@
 repo_id:
-  # - 'google/gemma-2-2b-it'
+  - 'google/gemma-2-2b-it'
   # - 'google/gemma-2-9b-it'
   - 'meta-llama/Llama-3.1-8B-Instruct'
 local_model_hub: 'path to your local model hub'