Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion examples/model_configs/base_model.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
model:
type: "base" # can be base, tgi, or endpoint
base_params:
model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ...
dtype: "bfloat16"
Expand Down
1 change: 0 additions & 1 deletion examples/model_configs/endpoint_model.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
model:
type: "endpoint" # can be base, tgi, or endpoint
base_params:
endpoint_name: "llama-2-7B-lighteval" # needs to be lower case without special characters
model: "meta-llama/Llama-2-7b-hf"
Expand Down
1 change: 0 additions & 1 deletion examples/model_configs/peft_model.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
model:
type: "base"
base_params:
model_args: "pretrained=predibase/customer_support,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... For a PEFT model, the pretrained model should be the one trained with PEFT and the base model below will contain the original model on which the adapters will be applied.
dtype: "4bit" # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization.
Expand Down
1 change: 0 additions & 1 deletion examples/model_configs/quantized_model.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
model:
type: "base"
base_params:
model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ...
dtype: "4bit" # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization.
Expand Down
1 change: 0 additions & 1 deletion examples/model_configs/tgi_model.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
model:
type: "tgi" # can be base, tgi, or endpoint
instance:
inference_server_address: ""
inference_server_auth: null
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ dependencies = [
"sentencepiece>=0.1.99",
"protobuf==3.20.*", # pinned for sentencepiece compat
"pycountry",
"fsspec>=2023.12.2",
"fsspec>=2024.10.0",
]

[project.optional-dependencies]
Expand Down Expand Up @@ -112,4 +112,4 @@ Issues = "https://github.com/huggingface/lighteval/issues"
# Changelog = "https://github.com/huggingface/lighteval/blob/master/CHANGELOG.md"

[project.scripts]
lighteval = "lighteval.__main__:cli_evaluate"
lighteval = "lighteval.__main__:app"
97 changes: 25 additions & 72 deletions src/lighteval/__main__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#!/usr/bin/env python

# MIT License

# Copyright (c) 2024 Taratra D. RAHARISON and The HuggingFace Team
Expand All @@ -22,81 +20,36 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import typer

import argparse
import os
from dataclasses import asdict
from pprint import pformat

from lighteval.parsers import parser_accelerate, parser_baseline, parser_nanotron, parser_utils_tasks
from lighteval.tasks.registry import Registry, taskinfo_selector


CACHE_DIR = os.getenv("HF_HOME")


def cli_evaluate(): # noqa: C901
parser = argparse.ArgumentParser(description="CLI tool for lighteval, a lightweight framework for LLM evaluation")
subparsers = parser.add_subparsers(help="help for subcommand", dest="subcommand")

# Subparser for the "accelerate" command
parser_a = subparsers.add_parser("accelerate", help="use accelerate and transformers as backend for evaluation.")
parser_accelerate(parser_a)

# Subparser for the "nanotron" command
parser_b = subparsers.add_parser("nanotron", help="use nanotron as backend for evaluation.")
parser_nanotron(parser_b)

parser_c = subparsers.add_parser("baseline", help="compute baseline for a task")
parser_baseline(parser_c)

# Subparser for task utils functions
parser_d = subparsers.add_parser("tasks", help="display information about available tasks and samples.")
parser_utils_tasks(parser_d)

args = parser.parse_args()

if args.subcommand == "accelerate":
from lighteval.main_accelerate import main as main_accelerate

main_accelerate(args)

elif args.subcommand == "nanotron":
from lighteval.main_nanotron import main as main_nanotron

main_nanotron(args.checkpoint_config_path, args.lighteval_config_path, args.cache_dir)
import lighteval.main_accelerate
import lighteval.main_baseline
import lighteval.main_endpoint
import lighteval.main_nanotron
import lighteval.main_tasks
import lighteval.main_vllm

elif args.subcommand == "baseline":
from lighteval.main_baseline import main as main_baseline

main_baseline(args)
app = typer.Typer()

elif args.subcommand == "tasks":
registry = Registry(cache_dir=args.cache_dir, custom_tasks=args.custom_tasks)
if args.list:
registry.print_all_tasks()

if args.inspect:
print(f"Loading the tasks dataset to cache folder: {args.cache_dir}")
print(
"All examples will be displayed without few shot, as few shot sample construction requires loading a model and using its tokenizer. "
)
# Loading task
task_names_list, _ = taskinfo_selector(args.inspect, task_registry=registry)
task_dict = registry.get_task_dict(task_names_list)
for name, task in task_dict.items():
print("-" * 10, name, "-" * 10)
if args.show_config:
print("-" * 10, "CONFIG")
task.cfg.print()
for ix, sample in enumerate(task.eval_docs()[: int(args.num_samples)]):
if ix == 0:
print("-" * 10, "SAMPLES")
print(f"-- sample {ix} --")
print(pformat(asdict(sample), indent=1))
else:
print("You did not provide any argument. Exiting")
app.command(rich_help_panel="Evaluation Backends")(lighteval.main_accelerate.accelerate)
app.command(rich_help_panel="Evaluation Utils")(lighteval.main_baseline.baseline)
app.command(rich_help_panel="Evaluation Backends")(lighteval.main_nanotron.nanotron)
app.command(rich_help_panel="Evaluation Backends")(lighteval.main_vllm.vllm)
app.add_typer(
lighteval.main_endpoint.app,
name="endpoint",
rich_help_panel="Evaluation Backends",
help="Evaluate models using some endpoint (tgi, inference endpoint, openai) as backend.",
)
app.add_typer(
lighteval.main_tasks.app,
name="tasks",
rich_help_panel="Utils",
help="List or inspect tasks.",
)


if __name__ == "__main__":
cli_evaluate()
app()
18 changes: 2 additions & 16 deletions src/lighteval/logging/hierarchical_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,24 +26,10 @@
from logging import Logger
from typing import Any, Callable

from lighteval.utils.imports import is_accelerate_available, is_nanotron_available


if is_nanotron_available():
from nanotron.logging import get_logger

logger = get_logger(__name__, log_level="INFO")
elif is_accelerate_available():
from accelerate import Accelerator, InitProcessGroupKwargs
from accelerate.logging import get_logger
from colorama import Fore, Style

# We must init the accelerator before using the logger
accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))])
logger = get_logger(__name__, log_level="INFO")
else:
logger = Logger(__name__, level="INFO")

from colorama import Fore, Style
logger = Logger(__name__, level="INFO")


class HierarchicalLogger:
Expand Down
Loading