Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions mason.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,8 @@ def remove_arg_from_list(lst: List[str], item: str, remove_value: bool = False):
dataset_config_hashes.append(dataset_config_hash)
stderr = result.stderr
return_code = result.returncode
if return_code != 0:
raise Exception(f"Error code {return_code} when creating cached dataset")
console.log("✅✅✅ Finished running the caching command")

if file in OPEN_INSTRUCT_RESUMABLES and idx != -1 and len(args.auto_checkpoint_state_dir) > 0:
Expand Down Expand Up @@ -706,8 +708,13 @@ def remove_arg_from_list(lst: List[str], item: str, remove_value: bool = False):
break

commit_hash = get_commit_hash(model_name_or_path, model_revision, "config.json", "model")
download_from_hf(model_name_or_path, model_revision) # first download the model
path = download_from_hf(model_name_or_path, model_revision) # then get the path
if os.path.exists(model_name_or_path):
path = model_name_or_path
model_name_or_path = os.path.basename(model_name_or_path)
console.log(f"Local model is already downloaded, using path basename as model name {model_name_or_path}, note that commit hash is {commit_hash}")
else:
download_from_hf(model_name_or_path, model_revision) # first download the model
path = download_from_hf(model_name_or_path, model_revision) # then get the path
gs_saved_path = f"gs://ai2-llm/post-training/deletable_cache_models/{model_name_or_path}/{commit_hash}"
gs_folder = gs_folder_exists(gs_saved_path) # race condition exists, but it's fine since we are launching mason sequentially
if not gs_folder:
Expand Down