Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
17783b2
Added inference using litellm.
JoelNiklaus Nov 7, 2024
9e92150
Add Udmurt (udm) translation literals (#381)
codemurt Nov 8, 2024
30a624c
This PR adds translation literals for Belarusian language. (#382)
Kryuski Nov 8, 2024
6e6fed6
fix: cache directory variable (#378)
NazimHAli Nov 8, 2024
d1d4c69
greedy_until() fix (#344)
vsabolcec Nov 8, 2024
f69811f
Fixed some params in completion call to enable more model providers.
JoelNiklaus Nov 11, 2024
dabb4a7
Added diskcache.
JoelNiklaus Nov 13, 2024
65f759c
Merge branch 'main' into add_litellm_inference
JoelNiklaus Nov 20, 2024
f74afd4
Merge branch 'main' into add_litellm_inference
JoelNiklaus Nov 22, 2024
88a9838
Fix issue for openai evaluation.
JoelNiklaus Nov 25, 2024
02ed461
Added support for stop sequences and generation size.
JoelNiklaus Nov 26, 2024
34596c2
Merge branch 'main' into add_litellm_inference
JoelNiklaus Nov 26, 2024
190738f
Fixed issue with too many concurrent calls to APIs.
JoelNiklaus Nov 27, 2024
2bb1917
Merge branch 'main' into add_litellm_inference
clefourrier Nov 28, 2024
81e4404
Merge branch 'main' into add_litellm_inference
JoelNiklaus Dec 4, 2024
ebdd900
Merge branch 'main' into add_litellm_inference
NathanHB Dec 5, 2024
251e181
few fixes
NathanHB Dec 6, 2024
47b1888
Fixed issues with stop_sequence, max_completion_tokens and system_pro…
JoelNiklaus Dec 9, 2024
20a1191
Merge branch 'main' into add_litellm_inference
JoelNiklaus Dec 9, 2024
ade8f0c
Revert weird change to __main__.py.
JoelNiklaus Dec 9, 2024
a2587d6
Made configuration simpler.
JoelNiklaus Dec 9, 2024
7c0856e
Merge branch 'main' into add_litellm_inference
JoelNiklaus Dec 12, 2024
932fd2c
Fixed import issues.
JoelNiklaus Dec 12, 2024
8fc9b13
Merge branch 'main' into add_litellm_inference
NathanHB Dec 16, 2024
45d6d1d
fix import location
NathanHB Dec 16, 2024
2a23836
Merge branch 'add_litellm_inference' of github.com:JoelNiklaus/lighte…
NathanHB Dec 16, 2024
cca1446
Merge branch 'main' into add_litellm_inference
JoelNiklaus Dec 16, 2024
1a10351
Enabled passing through system prompt to the models in the requests.
JoelNiklaus Dec 16, 2024
ff6d5de
Fixed some bugs.
JoelNiklaus Dec 17, 2024
8d831b8
Merge branch 'main' into add_litellm_inference
JoelNiklaus Dec 17, 2024
5115403
Made litellm inference robust to content management errors.
JoelNiklaus Dec 17, 2024
78789c1
allow bette rmessage managment for litellm
NathanHB Dec 17, 2024
3ebff6c
Merge branch 'main' into add_litellm_inference
NathanHB Dec 17, 2024
be77b15
allow system prompt to be passed to litellm models
NathanHB Dec 17, 2024
21d6112
Merge branch 'main' into add_litellm_inference
JoelNiklaus Dec 17, 2024
d045d92
use system prompt from the request and use litellm encode functino as…
NathanHB Dec 18, 2024
f1ed682
fixes from review
NathanHB Dec 18, 2024
ec306fd
Merge branch 'add_litellm_inference' of github.com:JoelNiklaus/lighte…
NathanHB Dec 18, 2024
bae4506
fix tests
NathanHB Dec 18, 2024
6b0cb60
fix tests
NathanHB Dec 18, 2024
c826b0e
Merge branch 'main' into add_litellm_inference
JoelNiklaus Dec 18, 2024
a6747f4
remove unecessary doc
NathanHB Dec 19, 2024
5554787
Merge branch 'add_litellm_inference' of github.com:JoelNiklaus/lighte…
NathanHB Dec 19, 2024
5b2b72d
Update src/lighteval/models/litellm_model.py
NathanHB Dec 19, 2024
0265a74
Update src/lighteval/models/litellm_model.py
NathanHB Dec 19, 2024
4fa8311
Merge branch 'main' into add_litellm_inference
NathanHB Dec 19, 2024
86dd849
Support retrying of empty cached model responses.
JoelNiklaus Dec 21, 2024
db983e3
Merge branch 'main' into add_litellm_inference
JoelNiklaus Dec 22, 2024
221d5d5
Fixed error when stop sequence is None.
JoelNiklaus Dec 22, 2024
81f02ca
Added support for litellm as judge backend.
JoelNiklaus Dec 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions src/lighteval/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,22 @@
logging_config = dict( # noqa C408
version=1,
formatters={
"c": {
"json": {
"()": "lighteval.logger.JSONFormatter",
"fmt_keys": {
"level": "levelname",
"message": "message",
"timestamp": "timestamp",
"logger": "name",
"module": "module",
"function": "funcName",
"line": "lineno",
"thread_name": "threadName",
},
},
"colored": {
"()": colorlog.ColoredFormatter,
"format": "[%(asctime)s] [%(log_color)s%(levelname)8s%(reset)s]: %(message)s (%(filename)s:%(lineno)s)",
"format": "[%(asctime)s] [%(log_color)s%(levelname)8s%(reset)s]: %(message)s",
"log_colors": {
"DEBUG": "cyan",
"INFO": "green",
Expand All @@ -50,10 +63,22 @@
},
},
},
handlers={"h": {"class": "logging.StreamHandler", "formatter": "c", "level": logging.INFO}},
root={
"handlers": ["h"],
"level": logging.INFO,
handlers={
"stdout": {"class": "logging.StreamHandler", "formatter": "colored", "level": logging.INFO},
"file": {
"class": "logging.handlers.RotatingFileHandler",
"formatter": "json",
"level": logging.INFO,
"filename": "lighteval.log",
"maxBytes": 10485760,
"backupCount": 2,
},
},
loggers={
"root": {
"handlers": ["stdout", "file"],
"level": logging.INFO,
}
},
)

Expand Down
108 changes: 108 additions & 0 deletions src/lighteval/main_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,3 +390,111 @@ def tgi(
pipeline.save_and_push_results()

return results


@app.command(rich_help_panel="Evaluation Backends")
def litellm(
# === general ===
provider: Annotated[str, Argument(help="")],
model: Annotated[str, Argument(help="")],
tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
# === Common parameters ===
use_chat_template: Annotated[
bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4)
] = False,
system_prompt: Annotated[
Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4)
] = None,
dataset_loading_processes: Annotated[
int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANNEL_NAME_1)
] = 1,
custom_tasks: Annotated[
Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANNEL_NAME_1)
] = None,
cache_dir: Annotated[
str, Option(help="Cache directory for datasets and models.", rich_help_panel=HELP_PANNEL_NAME_1)
] = CACHE_DIR,
num_fewshot_seeds: Annotated[
int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANNEL_NAME_1)
] = 1,
# === saving ===
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANNEL_NAME_2)
] = "results",
push_to_hub: Annotated[
bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANNEL_NAME_2)
] = False,
push_to_tensorboard: Annotated[
bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANNEL_NAME_2)
] = False,
public_run: Annotated[
bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANNEL_NAME_2)
] = False,
results_org: Annotated[
Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANNEL_NAME_2)
] = None,
save_details: Annotated[
bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANNEL_NAME_2)
] = False,
# === debug ===
max_samples: Annotated[
Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANNEL_NAME_3)
] = None,
override_batch_size: Annotated[
int, Option(help="Override batch size for evaluation.", rich_help_panel=HELP_PANNEL_NAME_3)
] = -1,
job_id: Annotated[
int, Option(help="Optional job id for future refenrence.", rich_help_panel=HELP_PANNEL_NAME_3)
] = 0,
):
"""
Evaluate models using TGI as backend.
"""

from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.model_config import LiteLLMModelConfig
from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters

env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir)
evaluation_tracker = EvaluationTracker(
output_dir=output_dir,
save_details=save_details,
push_to_hub=push_to_hub,
push_to_tensorboard=push_to_tensorboard,
public=public_run,
hub_results_org=results_org,
)

# TODO (nathan): better handling of model_args
parallelism_manager = ParallelismManager.NONE

model_config = LiteLLMModelConfig(provider=provider, model=model)

pipeline_params = PipelineParameters(
launcher_type=parallelism_manager,
env_config=env_config,
job_id=job_id,
dataset_loading_processes=dataset_loading_processes,
custom_tasks_directory=custom_tasks,
override_batch_size=override_batch_size,
num_fewshot_seeds=num_fewshot_seeds,
max_samples=max_samples,
use_chat_template=use_chat_template,
system_prompt=system_prompt,
)
pipeline = Pipeline(
tasks=tasks,
pipeline_parameters=pipeline_params,
evaluation_tracker=evaluation_tracker,
model_config=model_config,
)

pipeline.evaluate()

pipeline.show_results()

results = pipeline.get_results()

pipeline.save_and_push_results()

return results
30 changes: 12 additions & 18 deletions src/lighteval/models/litellm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import logging
import os
import time
from concurrent.futures import ThreadPoolExecutor
Expand All @@ -29,7 +30,6 @@
from transformers import AutoTokenizer

from lighteval.data import GenerativeTaskDataset
from lighteval.logging.hierarchical_logger import hlog_warn
from lighteval.models.abstract_model import LightevalModel
from lighteval.models.endpoint_model import ModelInfo
from lighteval.models.model_output import (
Expand All @@ -46,14 +46,14 @@
from lighteval.utils.imports import is_litellm_available


if is_litellm_available():
import logging
logger = logging.getLogger(__name__)

if is_litellm_available():
import litellm
from litellm.caching.caching import Cache

logging.getLogger("litellm").setLevel(logging.ERROR)
logging.getLogger("httpx").setLevel(logging.ERROR)
logging.getLogger("LiteLLM").setLevel(logging.WARNING)
logging.getLogger("LiteLLM").handlers.clear()

litellm.cache = Cache(type="disk")

Expand Down Expand Up @@ -84,6 +84,7 @@ def __init__(self, config, env_config) -> None:
self._tokenizer = AutoTokenizer.from_pretrained("gpt2") # Use a dummy tokenizer for compatibility
self.pairwise_tokenization = False
litellm.drop_params = True
litellm.verbose = True

def __call_api(self, prompt, return_logits, max_new_tokens, num_samples, stop_sequence, generation_size):
for attempt in range(self.API_MAX_RETRY):
Expand All @@ -98,25 +99,18 @@ def __call_api(self, prompt, return_logits, max_new_tokens, num_samples, stop_se
temperature=self.TEMPERATURE,
top_p=self.TOP_P,
stop=["\n"] if stop_sequence is None else stop_sequence,
max_completion_tokens=generation_size if generation_size > 0 else None,
# max_completion_tokens=generation_size if generation_size > 0 else None,
caching=True,
)
return response
except litellm.exceptions.RateLimitError:
if attempt == self.API_MAX_RETRY - 1:
raise
except Exception as e:
wait_time = min(64, self.API_RETRY_SLEEP * (2**attempt)) # Exponential backoff with max 64s
hlog_warn(
f"Rate limit hit. Waiting {wait_time} seconds before retry {attempt + 1}/{self.API_MAX_RETRY}"
logger.warning(
f"Error in API call: {e}, waiting {wait_time} seconds before retry {attempt + 1}/{self.API_MAX_RETRY}"
)
time.sleep(wait_time)
except Exception as e:
hlog_warn(f"{type(e), e}")
if attempt == self.API_MAX_RETRY - 1:
raise
wait_time = self.API_RETRY_SLEEP * (self.API_RETRY_MULTIPLIER**attempt)
hlog_warn(f"Retrying in {wait_time} seconds")
time.sleep(wait_time)

logger.error(f"API call failed after {self.API_MAX_RETRY} attempts, skipping entry.")

def __call_api_parallel(
self,
Expand Down
2 changes: 1 addition & 1 deletion src/lighteval/models/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,4 +317,4 @@ def get_dtype_args(self) -> Dict[str, str]:
return {}

def get_custom_env_vars(self) -> Dict[str, str]:
return {k: str(v) for k, v in self.env_vars.items()} if self.env_vars else {}
return {k: str(v) for k, v in self.env_vars.items()} if self.env_vars else {}
6 changes: 6 additions & 0 deletions src/lighteval/models/vllm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@
from vllm import LLM, SamplingParams
from vllm.distributed.parallel_state import destroy_distributed_environment, destroy_model_parallel
from vllm.transformers_utils.tokenizer import get_tokenizer

logging.getLogger("vllm").propagate = True
logging.getLogger("vllm").handlers.clear()

logging.getLogger("ray").propagate = True
logging.getLogger("ray").handlers.clear()
else:
LLM = None
SamplingParams = None
Expand Down
9 changes: 4 additions & 5 deletions src/lighteval/utils/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ def is_tensorboardX_available() -> bool:
return importlib.util.find_spec("tensorboardX") is not None


NO_TENSORBOARDX_WARN_MSG = "You are trying to log using tensorboardX, which is not installed. Please install it using pip. Skipping."
NO_TENSORBOARDX_WARN_MSG = (
"You are trying to log using tensorboardX, which is not installed. Please install it using pip. Skipping."
)


def is_openai_available() -> bool:
Expand All @@ -83,10 +85,7 @@ def is_litellm_available() -> bool:


def is_vllm_available() -> bool:
return (
importlib.util.find_spec("vllm") is not None
and importlib.util.find_spec("ray") is not None
)
return importlib.util.find_spec("vllm") is not None and importlib.util.find_spec("ray") is not None


NO_VLLM_ERROR_MSG = "You are trying to use an VLLM model, for which you need `vllm` and `ray`, which are not available in your environment. Please install them using pip, `pip install vllm ray`."
Expand Down
Loading