Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Setting up eval pipeline #1

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,4 @@ def main():


if __name__ == "__main__":
main()
main()
90 changes: 90 additions & 0 deletions main_eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import argparse
import json
import logging
import fnmatch
import wandb
from pathlib import Path
from typing import Union
import yaml
from pydantic import BaseModel

from lm_eval import tasks, evaluator

logging.getLogger("openai").setLevel(logging.WARNING)


def load_config(path: Union[str, Path]):
with open(path, "r") as stream:
try:
return yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)


class EvalPipelineConfig(BaseModel):
model: str
model_args: str = ""
tasks: str = None # check the types
num_fewshot: int = 0
batch_size: int = None
device: str = None
limit: int = None
decontamination_ngrams_path: str = None
check_integrity: bool = False
wandb_log: bool = False
wandb_project: str = None
wandb_run_name: str = None


# Returns a list containing all values of the source_list that
# match at least one of the patterns
def pattern_match(patterns, source_list):
task_names = set()
for pattern in patterns:
for matching in fnmatch.filter(source_list, pattern):
task_names.add(matching)
return list(task_names)


def main(config_path: str) -> None:

raw_config = load_config(config_path)
args = EvalPipelineConfig(**raw_config)

if args.wandb_log:
assert (args.wandb_project is not None) and (args.wandb_run_name is not None)
wandb.init(project=args.wandb_project, name=args.wandb_run_name, config=args)

if args.tasks is None:
task_names = tasks.ALL_TASKS
else:
task_names = pattern_match(args.tasks.split(","), tasks.ALL_TASKS)

print(f"Selected Tasks: {task_names}")

results = evaluator.simple_evaluate(
model=args.model,
model_args=args.model_args,
tasks=task_names,
num_fewshot=args.num_fewshot,
batch_size=args.batch_size,
device=args.device,
limit=args.limit,
decontamination_ngrams_path=args.decontamination_ngrams_path,
check_integrity=args.check_integrity,
)

dumped = json.dumps(results, indent=2)
print(dumped)

if args.wandb_log:
# TODO: where is "filter" coming from?
for task, metrics in results["results"].items():
wandb.log({task.split()[0]: metrics})


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("config_path", help="The full path to the YAML config file.")
args = parser.parse_args()
main(args.config_path)
2 changes: 2 additions & 0 deletions setup.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"omegaconf>=2.2",
"pybind11>=2.6.2",
"pycountry",
"pydantic",
"pytablewriter",
"rouge-score>=0.0.4",
"sacrebleu==1.5.0",
Expand All @@ -36,6 +37,7 @@
"torch>=1.7",
"tqdm-multiprocess",
"transformers>=4.1",
"wandb",
"zstandard",
],
extras_require={
Expand Down