huggingface
diff --git a/‎docs/source/_toctree.yml‎
Lines changed: 1 addition & 3 deletions b/‎docs/source/_toctree.yml‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎docs/source/package_reference/model_config.mdx‎
Lines changed: 0 additions & 10 deletions b/‎docs/source/package_reference/model_config.mdx‎
Lines changed: 0 additions & 10 deletions
diff --git a/‎docs/source/package_reference/models.mdx‎
Lines changed: 23 additions & 9 deletions b/‎docs/source/package_reference/models.mdx‎
Lines changed: 23 additions & 9 deletions
diff --git a/‎src/lighteval/main_accelerate.py‎
Lines changed: 3 additions & 1 deletion b/‎src/lighteval/main_accelerate.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/lighteval/main_endpoint.py‎
Lines changed: 1 addition & 1 deletion b/‎src/lighteval/main_endpoint.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/lighteval/main_vllm.py‎
Lines changed: 1 addition & 1 deletion b/‎src/lighteval/main_vllm.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/lighteval/models/dummy_model.py‎ renamed to ‎src/lighteval/models/dummy/dummy_model.py‎
Lines changed: 6 additions & 1 deletion b/‎src/lighteval/models/dummy_model.py‎ renamed to ‎src/lighteval/models/dummy/dummy_model.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎src/lighteval/models/endpoint_model.py‎ renamed to ‎src/lighteval/models/endpoints/endpoint_model.py‎
Lines changed: 55 additions & 2 deletions b/‎src/lighteval/models/endpoint_model.py‎ renamed to ‎src/lighteval/models/endpoints/endpoint_model.py‎
Lines changed: 55 additions & 2 deletions
diff --git a/‎src/lighteval/models/openai_model.py‎ renamed to ‎src/lighteval/models/endpoints/openai_model.py‎
Lines changed: 7 additions & 1 deletion b/‎src/lighteval/models/openai_model.py‎ renamed to ‎src/lighteval/models/endpoints/openai_model.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎src/lighteval/models/tgi_model.py‎ renamed to ‎src/lighteval/models/endpoints/tgi_model.py‎
Lines changed: 9 additions & 1 deletion b/‎src/lighteval/models/tgi_model.py‎ renamed to ‎src/lighteval/models/endpoints/tgi_model.py‎
Lines changed: 9 additions & 1 deletion
@@ -33,9 +33,7 @@
     - local: package_reference/evaluation_tracker
       title: EvaluationTracker
     - local: package_reference/models
-      title: Models
-    - local: package_reference/model_config
-      title: ModelConfig
+      title: Models and ModelConfigs
     - local: package_reference/pipeline
       title: Pipeline
     title: Main classes
 
@@ -4,24 +4,38 @@
 ### LightevalModel
 [[autodoc]] models.abstract_model.LightevalModel
 
+
 ## Accelerate and Transformers Models
 ### BaseModel
-[[autodoc]] models.base_model.BaseModel
+[[autodoc]] models.transformers.base_model.BaseModelConfig
+[[autodoc]] models.transformers.base_model.BaseModel
+
 ### AdapterModel
-[[autodoc]] models.adapter_model.AdapterModel
+[[autodoc]] models.transformers.adapter_model.AdapterModelConfig
+[[autodoc]] models.transformers.adapter_model.AdapterModel
+
 ### DeltaModel
-[[autodoc]] models.delta_model.DeltaModel
+[[autodoc]] models.transformers.delta_model.DeltaModelConfig
+[[autodoc]] models.transformers.delta_model.DeltaModel
 
-## Inference Endpoints and TGI Models
+## Endpoints-based Models
 ### InferenceEndpointModel
-[[autodoc]] models.endpoint_model.InferenceEndpointModel
-### ModelClient
-[[autodoc]] models.tgi_model.ModelClient
+[[autodoc]] models.endpoints.endpoint_model.InferenceEndpointModelConfig
+[[autodoc]] models.endpoints.endpoint_model.InferenceModelConfig
+[[autodoc]] models.endpoints.endpoint_model.InferenceEndpointModel
+
+### TGI ModelClient
+[[autodoc]] models.endpoints.tgi_model.TGIModelConfig
+[[autodoc]] models.endpoints.tgi_model.ModelClient
+
+### Open AI Models
+[[autodoc]] models.endpoints.openai_model.OpenAIClient
 
 ## Nanotron Model
 ### NanotronLightevalModel
-[[autodoc]] models.nanotron_model.NanotronLightevalModel
+[[autodoc]] models.nanotron.nanotron_model.NanotronLightevalModel
 
 ## VLLM Model
 ### VLLMModel
-[[autodoc]] models.vllm_model.VLLMModel
+[[autodoc]] models.vllm.vllm_model.VLLMModelConfig
+[[autodoc]] models.vllm.vllm_model.VLLMModel
@@ -107,7 +107,9 @@ def accelerate(  # noqa C901
     from accelerate import Accelerator, InitProcessGroupKwargs
 
     from lighteval.logging.evaluation_tracker import EvaluationTracker
-    from lighteval.models.model_config import AdapterModelConfig, BaseModelConfig, BitsAndBytesConfig, DeltaModelConfig
+    from lighteval.models.transformers.adapter_model import AdapterModelConfig
+    from lighteval.models.transformers.base_model import BaseModelConfig, BitsAndBytesConfig
+    from lighteval.models.transformers.delta_model import DeltaModelConfig
     from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters
 
     accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))])
 
@@ -201,7 +201,7 @@ def inference_endpoint(
     import yaml
 
     from lighteval.logging.evaluation_tracker import EvaluationTracker
-    from lighteval.models.model_config import (
+    from lighteval.models.endpoints.endpoint_model import (
         InferenceEndpointModelConfig,
     )
     from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters
 
@@ -89,7 +89,7 @@ def vllm(
     Evaluate models using vllm as backend.
     """
     from lighteval.logging.evaluation_tracker import EvaluationTracker
-    from lighteval.models.model_config import VLLMModelConfig
+    from lighteval.models.vllm.vllm_model import VLLMModelConfig
     from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters
 
     TOKEN = os.getenv("HF_TOKEN")
 
@@ -23,12 +23,12 @@
 # inspired by https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/models/dummy.py
 
 import random
+from dataclasses import dataclass
 from typing import Optional
 
 from transformers import AutoTokenizer
 
 from lighteval.models.abstract_model import LightevalModel, ModelInfo
-from lighteval.models.model_config import DummyModelConfig
 from lighteval.models.model_output import GenerativeResponse, LoglikelihoodResponse, LoglikelihoodSingleTokenResponse
 from lighteval.tasks.requests import (
     GreedyUntilRequest,
@@ -39,6 +39,11 @@
 from lighteval.utils.utils import EnvConfig
 
 
+@dataclass
+class DummyModelConfig:
+    seed: int = 42
+
+
 class DummyModel(LightevalModel):
     """Dummy model to generate random baselines."""
 
 
@@ -24,7 +24,8 @@
 import logging
 import re
 import time
-from typing import Coroutine, List, Optional, Union
+from dataclasses import dataclass
+from typing import Coroutine, Dict, List, Optional, Union
 
 import requests
 import torch
@@ -47,7 +48,6 @@
 
 from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset
 from lighteval.models.abstract_model import LightevalModel, ModelInfo
-from lighteval.models.model_config import InferenceEndpointModelConfig, InferenceModelConfig
 from lighteval.models.model_output import GenerativeResponse, LoglikelihoodResponse, LoglikelihoodSingleTokenResponse
 from lighteval.tasks.requests import (
     GreedyUntilRequest,
@@ -74,6 +74,59 @@
 ]
 
 
+@dataclass
+class InferenceModelConfig:
+    model: str
+    add_special_tokens: bool = True
+
+
+@dataclass
+class InferenceEndpointModelConfig:
+    endpoint_name: str = None
+    model_name: str = None
+    should_reuse_existing: bool = False
+    accelerator: str = "gpu"
+    model_dtype: str = None  # if empty, we use the default
+    vendor: str = "aws"
+    region: str = "us-east-1"  # this region has the most hardware options available
+    instance_size: str = None  # if none, we autoscale
+    instance_type: str = None  # if none, we autoscale
+    framework: str = "pytorch"
+    endpoint_type: str = "protected"
+    add_special_tokens: bool = True
+    revision: str = "main"
+    namespace: str = None  # The namespace under which to launch the endopint. Defaults to the current user's namespace
+    image_url: str = None
+    env_vars: dict = None
+
+    def __post_init__(self):
+        # xor operator, one is None but not the other
+        if (self.instance_size is None) ^ (self.instance_type is None):
+            raise ValueError(
+                "When creating an inference endpoint, you need to specify explicitely both instance_type and instance_size, or none of them for autoscaling."
+            )
+
+        if not (self.endpoint_name is None) ^ int(self.model_name is None):
+            raise ValueError("You need to set either endpoint_name or model_name (but not both).")
+
+    def get_dtype_args(self) -> Dict[str, str]:
+        if self.model_dtype is None:
+            return {}
+        model_dtype = self.model_dtype.lower()
+        if model_dtype in ["awq", "eetq", "gptq"]:
+            return {"QUANTIZE": model_dtype}
+        if model_dtype == "8bit":
+            return {"QUANTIZE": "bitsandbytes"}
+        if model_dtype == "4bit":
+            return {"QUANTIZE": "bitsandbytes-nf4"}
+        if model_dtype in ["bfloat16", "float16"]:
+            return {"DTYPE": model_dtype}
+        return {}
+
+    def get_custom_env_vars(self) -> Dict[str, str]:
+        return {k: str(v) for k, v in self.env_vars.items()} if self.env_vars else {}
+
+
 class InferenceEndpointModel(LightevalModel):
     """InferenceEndpointModels can be used both with the free inference client, or with inference
     endpoints, which will use text-generation-inference to deploy your model for the duration of the evaluation.
 
@@ -24,13 +24,14 @@
 import os
 import time
 from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass
 from typing import Optional
 
 from tqdm import tqdm
 
 from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset
 from lighteval.models.abstract_model import LightevalModel
-from lighteval.models.endpoint_model import ModelInfo
+from lighteval.models.endpoints.endpoint_model import ModelInfo
 from lighteval.models.model_output import (
     GenerativeResponse,
     LoglikelihoodResponse,
@@ -58,6 +59,11 @@
     logging.getLogger("httpx").setLevel(logging.ERROR)
 
 
+@dataclass
+class OpenAIModelConfig:
+    model: str
+
+
 class OpenAIClient(LightevalModel):
     _DEFAULT_MAX_LENGTH: int = 4096
 
 
@@ -21,13 +21,14 @@
 # SOFTWARE.
 
 import asyncio
+from dataclasses import dataclass
 from typing import Coroutine, Optional
 
 import requests
 from huggingface_hub import TextGenerationInputGrammarType, TextGenerationOutput
 from transformers import AutoTokenizer
 
-from lighteval.models.endpoint_model import InferenceEndpointModel, ModelInfo
+from lighteval.models.endpoints.endpoint_model import InferenceEndpointModel, ModelInfo
 from lighteval.utils.imports import NO_TGI_ERROR_MSG, is_tgi_available
 
 
@@ -44,6 +45,13 @@ def divide_chunks(array, n):
         yield array[i : i + n]
 
 
+@dataclass
+class TGIModelConfig:
+    inference_server_address: str
+    inference_server_auth: str
+    model_id: str
+
+
 # inherit from InferenceEndpointModel instead of LightevalModel since they both use the same interface, and only overwrite
 # the client functions, since they use a different client.
 class ModelClient(InferenceEndpointModel):
Original file line number	Diff line number	Diff line change
`@@ -201,7 +201,7 @@ def inference_endpoint(`
`201`	`201`	`import yaml`
`202`	`202`
`203`	`203`	`from lighteval.logging.evaluation_tracker import EvaluationTracker`
`204`		`- from lighteval.models.model_config import (`
	`204`	`+ from lighteval.models.endpoints.endpoint_model import (`
`205`	`205`	`InferenceEndpointModelConfig,`
`206`	`206`	`)`
`207`	`207`	`from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters`