Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Setting up eval pipeline #1

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 39 additions & 48 deletions main.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,46 +2,38 @@
import json
import logging
import fnmatch
import wandb
from pathlib import Path
from typing import Union
import yaml
from pydantic import BaseModel

from lm_eval import tasks, evaluator

logging.getLogger("openai").setLevel(logging.WARNING)


class MultiChoice:
def __init__(self, choices):
self.choices = choices
def load_config(path: Union[str, Path]):
with open(path, "r") as stream:
try:
return yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)

# Simple wildcard support (linux filename patterns)
def __contains__(self, values):
for value in values.split(","):
if len(fnmatch.filter(self.choices, value)) == 0:
return False

return True

def __iter__(self):
for choice in self.choices:
yield choice


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--model", required=True)
parser.add_argument("--model_args", default="")
parser.add_argument("--tasks", default=None, choices=MultiChoice(tasks.ALL_TASKS))
parser.add_argument("--provide_description", action="store_true")
parser.add_argument("--num_fewshot", type=int, default=0)
parser.add_argument("--batch_size", type=int, default=None)
parser.add_argument("--device", type=str, default=None)
parser.add_argument("--output_path", default=None)
parser.add_argument("--limit", type=int, default=None)
parser.add_argument("--no_cache", action="store_true")
parser.add_argument("--decontamination_ngrams_path", default=None)
parser.add_argument("--description_dict_path", default=None)
parser.add_argument("--check_integrity", action="store_true")

return parser.parse_args()
class EvalPipelineConfig(BaseModel):
model: str
model_args: str = ""
tasks: str = None # check the types
num_fewshot: int = 0
batch_size: int = None
device: str = None
limit: int = None
decontamination_ngrams_path: str = None
check_integrity: bool = False
wandb_log: bool = False
wandb_project: str = None
wandb_run_name: str = None


# Returns a list containing all values of the source_list that
Expand All @@ -54,13 +46,14 @@ def pattern_match(patterns, source_list):
return list(task_names)


def main():
args = parse_args()
def main(config_path: str) -> None:

if args.limit:
print(
"WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
)
raw_config = load_config(config_path)
args = EvalPipelineConfig(**raw_config)

if args.wandb_log:
assert (args.wandb_project is not None) and (args.wandb_run_name is not None)
wandb.init(project=args.wandb_project, name=args.wandb_run_name, config=args)

if args.tasks is None:
task_names = tasks.ALL_TASKS
Expand All @@ -84,16 +77,14 @@ def main():
dumped = json.dumps(results, indent=2)
print(dumped)

if args.output_path:
with open(args.output_path, "w") as f:
f.write(dumped)

print(
f"{args.model} ({args.model_args}), limit: {args.limit}, provide_description: {args.provide_description}, "
f"num_fewshot: {args.num_fewshot}, batch_size: {args.batch_size}"
)
print(evaluator.make_table(results))
if args.wandb_log:
# TODO: where is "filter" coming from?
for task, metrics in results["results"].items():
wandb.log({task.split()[0]: metrics})


if __name__ == "__main__":
main()
parser = argparse.ArgumentParser()
parser.add_argument("config_path", help="The full path to the YAML config file.")
args = parser.parse_args()
main(args.config_path)
2 changes: 2 additions & 0 deletions setup.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"omegaconf>=2.2",
"pybind11>=2.6.2",
"pycountry",
"pydantic",
"pytablewriter",
"rouge-score>=0.0.4",
"sacrebleu==1.5.0",
Expand All @@ -36,6 +37,7 @@
"torch>=1.7",
"tqdm-multiprocess",
"transformers>=4.1",
"wandb",
"zstandard",
],
extras_require={
Expand Down