From 99558a50d1df3c8161476259a2b621dd3cba998b Mon Sep 17 00:00:00 2001 From: Bethany Connolly Date: Mon, 3 Apr 2023 17:01:04 +0000 Subject: [PATCH 1/6] working on eval pipeline --- .../stability-cluster/env_creation_eval.sh | 29 ++++++++++++++++ .../scripts/stability-cluster/run_eval.sh | 34 +++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 experiments/scripts/stability-cluster/env_creation_eval.sh create mode 100644 experiments/scripts/stability-cluster/run_eval.sh diff --git a/experiments/scripts/stability-cluster/env_creation_eval.sh b/experiments/scripts/stability-cluster/env_creation_eval.sh new file mode 100644 index 000000000..a9efa77dd --- /dev/null +++ b/experiments/scripts/stability-cluster/env_creation_eval.sh @@ -0,0 +1,29 @@ +#! /bin/bash +### This script creates a conda environment for chemnlp +### The first arg ($1) is the prefix directory where the environment is saved +### The second arg ($2) is the directory to use when building the environment + +## Must already have miniconda installed! +export CONDA_ENV_PATH=/fsx/proj-chemnlp/$1/conda/env/chemnlp-standard +export PYTHON_VER=3.8 +CUDA_VERSION=11.7 +CONDA_BASE=$(conda info --base) + +## ensure we can use activate syntax in slurm scripts +source $CONDA_BASE/etc/profile.d/conda.sh + +# Create Python environment through conda +if [ -d "${CONDA_ENV_PATH}" ]; then rm -Rf ${CONDA_ENV_PATH}; fi +conda create --force --prefix ${CONDA_ENV_PATH} python=${PYTHON_VER} -y +conda activate ${CONDA_ENV_PATH} + +# Python requirements +## cd into your directory inside of proj-chemnlp +cd /fsx/proj-chemnlp/$2 + +## clone + submodules (ok if exists) +[ ! -d 'chemnlp' ] && git clone --recurse-submodules --remote-submodules git@github.com:OpenBioML/chemnlp.git + +## install +# conda install -y pytorch torchvision torchaudio cudatoolkit=${CUDA_VERSION} -c pytorch -c conda-forge +pip install chemnlp/lm-eval2 diff --git a/experiments/scripts/stability-cluster/run_eval.sh b/experiments/scripts/stability-cluster/run_eval.sh new file mode 100644 index 000000000..6c25cbb8c --- /dev/null +++ b/experiments/scripts/stability-cluster/run_eval.sh @@ -0,0 +1,34 @@ +#! /bin/bash +#SBATCH --job-name="chemtest" +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=12 +# #SBATCH --gres=gpu:2 +#SBATCH --output=/fsx/proj-chemnlp/experiments/logs/testing_beth.out +#SBATCH --error=/fsx/proj-chemnlp/experiments/logs/testing_beth.err +#SBATCH --open-mode=append +#SBATCH --account=chemnlp +#SBATCH --partition=g40 +#SBATCH --exclusive +# #SBATCH --nodelist=ip-26-0-128-[46,48,85,93-94,101,106,111,123,136,142-143,168-169,175,183,189,211,215,223,231,244],ip-26-0-129-[0-1,4,6,11,45,48,60,81-82,84-85,94,105],ip-26-0-130-[183,193],ip-26-0-131-[4-5,38,51,77,85,89,107-108,111-112,130,143,150-152,168,182-183,188],ip-26-0-132-[130,139,141-142,149,154,184],ip-26-0-133-[159-160,226,242],ip-26-0-134-[0,26-27,43,52,61],ip-26-0-137-[92,94,97,102,115-116,121,124,139,168,175],ip-26-0-139-[191,200,214,216,218,226,229,235,237,241,246],ip-26-0-142-[106,125,144,146,166,184,186,198,204,217,235,237,246,251,254],ip-26-0-143-[30,39,46,53,61,66,145,164,171,175,180,206,225,230,235,250],ip-26-0-129-122,ip-26-0-130-[12-13,19,116,127,132,134,147-148,150,163-164],ip-26-0-131-[239-240,244,247],ip-26-0-132-[7,10,21,37,93,98,107,118],ip-26-0-133-[67,76,81,89,111,115,126,131-133,140,145,148,151],ip-26-0-134-[66,76,83,90-91,105,120,134,141,157,201,219,226-227,248,254],ip-26-0-135-[1,4,22,49,55,64,67,110,118,163,173,184,186,190,192-193,204,208,219,242,255],ip-26-0-136-13,ip-26-0-137-[176,184,196,212,214,240],ip-26-0-138-[3,13,51,62,66,69,71,79,93,101,159,166,171,178,186,188,208,213],ip-26-0-141-[140,146,157,161,166,178,217,228,247],ip-26-0-142-[3,13,21,24,29,33,36,38,41,45,49,67,71,103],ip-26-0-143-[111,121],ip-26-0-128-146,ip-26-0-137-76 + +### This script runs lm_eval2 experiments +### The first arg ($1) is the prefix directory where the environment is saved +### The second arg ($2) is the directory to use when building the environment +### The third arg ($3) is the name of the eval config.yaml file + +set -ex # allow for exiting based on non-0 codes + +# set workdir +CHEMNLP_PATH=/fsx/proj-chemnlp/$1/chemnlp + +# create environment +source $CHEMNLP_PATH/experiments/scripts/stability-cluster/env_creation_eval.sh $1 $2 + +# trigger run +cd $CHEMNLP_PATH/lm-eval2 +python main.py \ + --model hf-causal \ + --model_args pretrained=EleutherAI/pythia-160m \ + --tasks arc_challenge \ + --device 0 \ \ No newline at end of file From 41264ee155d4c16ae5857f28adf28f914a3747c7 Mon Sep 17 00:00:00 2001 From: Bethany Connolly Date: Tue, 4 Apr 2023 12:06:01 +0000 Subject: [PATCH 2/6] scripts for running eval pipeline --- experiments/scripts/stability-cluster/env_creation_eval.sh | 2 +- experiments/scripts/stability-cluster/run_eval.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/experiments/scripts/stability-cluster/env_creation_eval.sh b/experiments/scripts/stability-cluster/env_creation_eval.sh index a9efa77dd..8289ef8d2 100644 --- a/experiments/scripts/stability-cluster/env_creation_eval.sh +++ b/experiments/scripts/stability-cluster/env_creation_eval.sh @@ -25,5 +25,5 @@ cd /fsx/proj-chemnlp/$2 [ ! -d 'chemnlp' ] && git clone --recurse-submodules --remote-submodules git@github.com:OpenBioML/chemnlp.git ## install -# conda install -y pytorch torchvision torchaudio cudatoolkit=${CUDA_VERSION} -c pytorch -c conda-forge +conda install pytorch torchvision torchaudio pytorch-cuda=${CUDA_VERSION} -c pytorch -c nvidia --verbose pip install chemnlp/lm-eval2 diff --git a/experiments/scripts/stability-cluster/run_eval.sh b/experiments/scripts/stability-cluster/run_eval.sh index 6c25cbb8c..b6d3e6f27 100644 --- a/experiments/scripts/stability-cluster/run_eval.sh +++ b/experiments/scripts/stability-cluster/run_eval.sh @@ -30,5 +30,5 @@ cd $CHEMNLP_PATH/lm-eval2 python main.py \ --model hf-causal \ --model_args pretrained=EleutherAI/pythia-160m \ - --tasks arc_challenge \ + --tasks arc_easy \ --device 0 \ \ No newline at end of file From ae71192f22aeff10849e2fae598298033a6a715e Mon Sep 17 00:00:00 2001 From: Bethany Connolly Date: Wed, 5 Apr 2023 09:36:56 +0000 Subject: [PATCH 3/6] updated scripts --- experiments/scripts/stability-cluster/env_creation_eval.sh | 2 +- experiments/scripts/stability-cluster/run_eval.sh | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/experiments/scripts/stability-cluster/env_creation_eval.sh b/experiments/scripts/stability-cluster/env_creation_eval.sh index 8289ef8d2..74dfbef8e 100644 --- a/experiments/scripts/stability-cluster/env_creation_eval.sh +++ b/experiments/scripts/stability-cluster/env_creation_eval.sh @@ -26,4 +26,4 @@ cd /fsx/proj-chemnlp/$2 ## install conda install pytorch torchvision torchaudio pytorch-cuda=${CUDA_VERSION} -c pytorch -c nvidia --verbose -pip install chemnlp/lm-eval2 +pip install -e chemnlp/lm-eval2 diff --git a/experiments/scripts/stability-cluster/run_eval.sh b/experiments/scripts/stability-cluster/run_eval.sh index b6d3e6f27..3f61f6904 100644 --- a/experiments/scripts/stability-cluster/run_eval.sh +++ b/experiments/scripts/stability-cluster/run_eval.sh @@ -31,4 +31,7 @@ python main.py \ --model hf-causal \ --model_args pretrained=EleutherAI/pythia-160m \ --tasks arc_easy \ - --device 0 \ \ No newline at end of file + --device 0 \ + --wandb_log True \ + --wandb_project LLCheM \ + --wandb_run_name testing_eval_pipeline_3 \ \ No newline at end of file From 128d6e65ed971232e5808fbef845e5d52a566cde Mon Sep 17 00:00:00 2001 From: Bethany Connolly Date: Wed, 5 Apr 2023 10:30:41 +0000 Subject: [PATCH 4/6] ran precommit on scripts --- experiments/scripts/stability-cluster/env_creation_eval.sh | 2 +- experiments/scripts/stability-cluster/run_eval.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/experiments/scripts/stability-cluster/env_creation_eval.sh b/experiments/scripts/stability-cluster/env_creation_eval.sh index 74dfbef8e..c654520ad 100644 --- a/experiments/scripts/stability-cluster/env_creation_eval.sh +++ b/experiments/scripts/stability-cluster/env_creation_eval.sh @@ -24,6 +24,6 @@ cd /fsx/proj-chemnlp/$2 ## clone + submodules (ok if exists) [ ! -d 'chemnlp' ] && git clone --recurse-submodules --remote-submodules git@github.com:OpenBioML/chemnlp.git -## install +## install conda install pytorch torchvision torchaudio pytorch-cuda=${CUDA_VERSION} -c pytorch -c nvidia --verbose pip install -e chemnlp/lm-eval2 diff --git a/experiments/scripts/stability-cluster/run_eval.sh b/experiments/scripts/stability-cluster/run_eval.sh index 3f61f6904..2701987a7 100644 --- a/experiments/scripts/stability-cluster/run_eval.sh +++ b/experiments/scripts/stability-cluster/run_eval.sh @@ -15,7 +15,7 @@ ### This script runs lm_eval2 experiments ### The first arg ($1) is the prefix directory where the environment is saved ### The second arg ($2) is the directory to use when building the environment -### The third arg ($3) is the name of the eval config.yaml file +### The third arg ($3) is the name of the eval config.yaml file set -ex # allow for exiting based on non-0 codes @@ -34,4 +34,4 @@ python main.py \ --device 0 \ --wandb_log True \ --wandb_project LLCheM \ - --wandb_run_name testing_eval_pipeline_3 \ \ No newline at end of file + --wandb_run_name testing_eval_pipeline_3 \ From edaa598455d6e8d411ff3a7dd593bcf36c1c4e0e Mon Sep 17 00:00:00 2001 From: Bethany Connolly Date: Wed, 5 Apr 2023 13:41:12 +0000 Subject: [PATCH 5/6] eval script updated to take .yaml args as input --- experiments/scripts/stability-cluster/run_eval.sh | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/experiments/scripts/stability-cluster/run_eval.sh b/experiments/scripts/stability-cluster/run_eval.sh index 2701987a7..4b2a3d90d 100644 --- a/experiments/scripts/stability-cluster/run_eval.sh +++ b/experiments/scripts/stability-cluster/run_eval.sh @@ -4,8 +4,8 @@ #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=12 # #SBATCH --gres=gpu:2 -#SBATCH --output=/fsx/proj-chemnlp/experiments/logs/testing_beth.out -#SBATCH --error=/fsx/proj-chemnlp/experiments/logs/testing_beth.err +#SBATCH --output=/fsx/proj-chemnlp/experiments/logs/testing_%j.out +#SBATCH --error=/fsx/proj-chemnlp/experiments/logs/testing_%j.err #SBATCH --open-mode=append #SBATCH --account=chemnlp #SBATCH --partition=g40 @@ -27,11 +27,4 @@ source $CHEMNLP_PATH/experiments/scripts/stability-cluster/env_creation_eval.sh # trigger run cd $CHEMNLP_PATH/lm-eval2 -python main.py \ - --model hf-causal \ - --model_args pretrained=EleutherAI/pythia-160m \ - --tasks arc_easy \ - --device 0 \ - --wandb_log True \ - --wandb_project LLCheM \ - --wandb_run_name testing_eval_pipeline_3 \ +python main.py $3 From 78ebba262a106924e2365d5d8839dbf965af0ce1 Mon Sep 17 00:00:00 2001 From: Bethany Connolly Date: Tue, 11 Apr 2023 12:50:25 +0000 Subject: [PATCH 6/6] removed --remote_submodules tag from env script --- experiments/scripts/stability-cluster/env_creation_eval.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiments/scripts/stability-cluster/env_creation_eval.sh b/experiments/scripts/stability-cluster/env_creation_eval.sh index c654520ad..b41f1e4ac 100644 --- a/experiments/scripts/stability-cluster/env_creation_eval.sh +++ b/experiments/scripts/stability-cluster/env_creation_eval.sh @@ -22,7 +22,7 @@ conda activate ${CONDA_ENV_PATH} cd /fsx/proj-chemnlp/$2 ## clone + submodules (ok if exists) -[ ! -d 'chemnlp' ] && git clone --recurse-submodules --remote-submodules git@github.com:OpenBioML/chemnlp.git +[ ! -d 'chemnlp' ] && git clone --recurse-submodules git@github.com:OpenBioML/chemnlp.git ## install conda install pytorch torchvision torchaudio pytorch-cuda=${CUDA_VERSION} -c pytorch -c nvidia --verbose