Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 68 additions & 51 deletions tests/auto/rt_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def parse_args_in():
parser = argparse.ArgumentParser()

# Setup Input Arguments
choices = ['hera.intel', 'orion.intel', 'gaea.intel', 'jet.intel', 'wcoss_dell_p3']
parser.add_argument('-m', '--machine', help='Machine and Compiler combination', required=True, choices=choices, type=str)
choices = ['hera', 'orion', 'gaea', 'jet', 'wcoss_dell_p3']
parser.add_argument('-m', '--machine', help='Machine name', required=True, choices=choices, type=str)
parser.add_argument('-w', '--workdir', help='Working directory', required=True, type=str)

# Get Arguments
Expand Down Expand Up @@ -73,15 +73,19 @@ def input_data(args):

def match_label_with_action(machine, actions, label):
''' Match the label that initiates a job with an action in the dict'''
# <machine>-<compiler>-<test> i.e. hera-gnu-RT
# RT = full regression test suite
logger = logging.getLogger('MATCH_LABEL_WITH_ACTIONS')
split_label = label.name.split('-')

if len(split_label) != 3: return False
if not re.match(split_label[0], 'Auto'): return False
if not re.match(split_label[2], machine['name'].split('.')[0]): return False
action_match = next((action for action in actions if re.match(action['name'], split_label[1])), False)

return action_match
if len(split_label) != 3: return False, False #Make sure it has three parts
if not re.match(split_label[0], machine['name']): return False, False #First check machine name matches
compiler = split_label[1]
if not str(compiler) in ["intel", "gnu"]: return False, False
action_match = next((action for action in actions if re.match(action['name'], split_label[2])), False)
action_match["command"] = f'export RT_COMPILER="{compiler}" && {action_match["command"]}'
if split_label[2] == "RT" and compiler == "gnu":
action_match["command"] = f'{action_match["command"]} -l rt_gnu.conf'
return compiler, action_match


def get_preqs_with_actions(repos, machine, ghinterface_obj, actions):
Expand All @@ -92,9 +96,10 @@ def get_preqs_with_actions(repos, machine, ghinterface_obj, actions):
preq_labels = [{'preq': pr, 'label': label} for pr in each_pr for label in pr.get_labels()]

for i, pr_label in enumerate(preq_labels):
match = match_label_with_action(machine, actions, pr_label['label'])
compiler, match = match_label_with_action(machine, actions, pr_label['label'])
if match:
preq_labels[i]['action'] = match
preq_labels[i]['compiler'] = compiler
else:
preq_labels[i] = False

Expand Down Expand Up @@ -130,8 +135,20 @@ def remove_pr_label(self):
self.logger.info(f'Removing Label: {self.preq_dict["label"]}')
self.preq_dict['preq'].remove_from_labels(self.preq_dict['label'])

def send_log_name_as_comment(self):
def check_label_before_job_start(self):
# LETS Check the label still exists before the start of the job in the
# case of multiple jobs
label_to_check = f'{self.machine["name"]}-{self.preq_dict["compiler"]}-{self.preq_dict["action"]["name"]}'
labels = self.preq_dict['preq'].get_labels()
label_match = next((label for label in labels if re.match(label.name, label_to_check)), False)

return label_match


def send_log_name_as_comment(self, log_filename):
logger = logging.getLogger('JOB/SEND_LOG_NAME_AS_COMMENT')

#Remove LAST MONTHS LOGS
logger.info('Removing last months logs (if any)')
last_month = datetime.date.today().replace(day=1) - datetime.timedelta(days=1)
rm_command = [[f'rm rt_auto_*_{last_month.strftime("%Y%m")}*.log', os.getcwd()]]
Expand All @@ -141,24 +158,16 @@ def send_log_name_as_comment(self):
except Exception as e:
logger.warning(f'"{rm_command}" failed with error:{e}')

new_log_name = f'rt_auto_{self.machine["name"]}_'\
f'{datetime.datetime.now().strftime("%Y%m%d%H%M%S")}.log'
cp_command = [[f'cp rt_auto.log {new_log_name}', os.getcwd()]]
logger.info(f'Running "{cp_command}"')
# Add log information to PR.
comment_text = f'Log Name:{log_filename}\n'\
f'Log Location:{os.getcwd()}\n'\
'Logs are kept for one month'
try:
self.run_commands(cp_command)
self.preq_dict['preq'].create_issue_comment(comment_text)
except Exception as e:
logger.warning('Renaming rt_auto failed')
logger.warning('Creating comment with log location failed with:{e}')
else:
comment_text = f'Log Name:{new_log_name}\n'\
f'Log Location:{os.getcwd()}\n'\
'Logs are kept for one month'
try:
self.preq_dict['preq'].create_issue_comment(comment_text)
except Exception as e:
logger.warning('Creating comment with log location failed with:{e}')
else:
logger.info(f'{comment_text}')
logger.info(f'{comment_text}')

def run_commands(self, commands_with_cwd):
logger = logging.getLogger('JOB/RUN_COMMANDS')
Expand Down Expand Up @@ -212,9 +221,12 @@ def clone_pr_repo(self):
def run_function(self):
''' Run the command associted with the label used to initiate this job '''
logger = logging.getLogger('JOB/RUN_FUNCTION')
compiler = self.preq_dict['compiler']
logger.info(f'Compiler being used for command is {compiler}')
command = self.preq_dict["action"]["command"]
try:
logger.info(f'Running: "{self.preq_dict["action"]["command"]}" in "{self.pr_repo_loc}"')
output = subprocess.Popen(self.preq_dict['action']['command'], cwd=self.pr_repo_loc, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
logger.info(f'Running: "{command}" in "{self.pr_repo_loc}"')
output = subprocess.Popen(command, cwd=self.pr_repo_loc, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
out,err = output.communicate()
out = [] if not out else out.decode('utf8').split('\n')
err = [] if not err else err.decode('utf8').split('\n')
Expand All @@ -225,7 +237,13 @@ def run_function(self):
assert(e)
else:
if output.returncode != 0:
logger.critical(f'{self.preq_dict["action"]["command"]} Failed')
comment_text = f'rt.sh failed \n'\
f'machine: {self.machine["name"]} \n'\
f'compiler: {self.preq_dict["compiler"]}\n'\
f'STDOUT: {out} \n'\
f'STDERR: {err}'
self.preq_dict['preq'].create_issue_comment(comment_text)
logger.critical(f'{command} Failed')
[logger.critical(f'stdout: {item}') for item in out if not None]
[logger.critical(f'stderr: {eitem}') for eitem in err if not None]
else:
Expand All @@ -245,28 +263,27 @@ def run_function(self):
def move_rt_logs(self):
''' This is the callback function associated with the "RT" command '''
logger = logging.getLogger('JOB/MOVE_RT_LOGS')
rt_log = f'tests/RegressionTests_{self.machine["name"]}.log'
rt_log = f'tests/RegressionTests_{self.machine["name"]}.{self.preq_dict["compiler"]}.log'
filepath = f'{self.pr_repo_loc}/{rt_log}'
rm_filepath = '/'.join((self.pr_repo_loc.split('/'))[:-1])
if os.path.exists(filepath):
move_rt_commands = [
[f'git pull --ff-only origin {self.branch}', self.pr_repo_loc],
[f'git add {rt_log}', self.pr_repo_loc],
[f'git commit -m "Auto: Added Updated RT Log file: {rt_log}"', self.pr_repo_loc],
[f'git pull --no-edit origin {self.branch}', self.pr_repo_loc],
[f'git commit -m "Auto: Add RT Log file: {rt_log} skip-ci"', self.pr_repo_loc],
['sleep 10', self.pr_repo_loc],
[f'git push origin {self.branch}', self.pr_repo_loc]
]
self.run_commands(move_rt_commands)

else:
logger.critical('Could not find RT log')
raise FileNotFoundError('Could not find RT log')
logger.critical('Could not find Intel RT log')
raise FileNotFoundError('Could not find Intel RT log')

def main():

# handle logging
log_path = os.getcwd()
log_filename = 'rt_auto.log'
log_filename = f'rt_auto_{datetime.datetime.now().strftime("%Y%m%d%H%M%S")}.log'
# Please don't run the following on cron with level=logging.DEBUG
# as it exposes the GH API Token
# Only set it to DEBUG while debugging
Expand All @@ -288,26 +305,26 @@ def main():
# get all pull requests from the GitHub object
logger.info('Getting all pull requests, labels and actions applicable to this machine.')
preq_dict = get_preqs_with_actions(repos, machine, ghinterface_obj, actions)

# add Job objects and run them
logger.info('Adding all jobs to an object list and running them.')
jobs = [Job(pullreq, ghinterface_obj, machine) for pullreq in preq_dict]
for job in jobs:
logger.info(f'Starting Job: {job}')
try:
logger.info('Calling remove_pr_label')
job.remove_pr_label()
logger.info('Calling clone_pr_repo')
job.clone_pr_repo()
logger.info('Calling run_function')
job.run_function()
logger.info('Calling remove_pr_dir')
job.remove_pr_dir()
logger.info('Calling send_log_name_as_comment')
job.send_log_name_as_comment()
except Exception as e:
logger.critical(e)
assert(e)
if job.check_label_before_job_start():
try:
logger.info('Calling remove_pr_label')
job.remove_pr_label()
logger.info('Calling clone_pr_repo')
job.clone_pr_repo()
logger.info('Calling run_function')
job.run_function()
logger.info('Calling remove_pr_dir')
# job.remove_pr_dir()
# logger.info('Calling send_log_name_as_comment')
job.send_log_name_as_comment(log_filename)
except Exception as e:
logger.critical(e)
assert(e)

logger.info('Script Finished')

Expand Down
36 changes: 22 additions & 14 deletions tests/auto/rt_auto.sh
Original file line number Diff line number Diff line change
@@ -1,45 +1,53 @@
#!/bin/bash --login
set -eux
if [ -f "accesstoken.sh" ]; then
source ./accesstoken.sh
if [ $(stat -c "%a" "accesstoken.sh") == "600" ]; then
echo "Sourcing accesstoken.sh"
source ./accesstoken.sh
else
echo "accesstoken.sh permissions NEED to be set to 600 before starting"
exit 1
fi
else
echo "Please create accesstoken.sh (600) with the following content\n"
echo "export ghapitoken=<GitHub API Token Here>"
exit 1
fi

export RT_COMPILER='intel'
source ../detect_machine.sh
echo "Machine ID: "+$MACHINE_ID
if [[ $MACHINE_ID = hera.* ]]; then
if [[ $HOSTNAME == hfe* ]]; then
MACHINE_NAME=hera
WORKDIR=/scratch1/NCEPDEV/nems/Brian.Curtis/test
export PATH=/scratch1/NCEPDEV/nems/emc.nemspara/soft/miniconda3/bin:$PATH
export PYTHONPATH=/scratch1/NCEPDEV/nems/emc.nemspara/soft/miniconda3/lib/python3.8/site-packages
elif [[ $MACHINE_ID = orion.* ]]; then
elif [[ $HOSTNAME == Orion-login-* ]]; then
MACHINE_NAME=orion
WORKDIR=/work/noaa/nems/bcurtis/test
export PATH=/work/noaa/nems/emc.nemspara/soft/miniconda3/bin:$PATH
export PYTHONPATH=/work/noaa/nems/emc.nemspara/soft/miniconda3/lib/python3.8/site-packages
elif [[ $MACHINE_ID = jet.* ]]; then
elif [[ $HOSTNAME == fe* ]]; then
MACHINE_NAME=jet
WORKDIR=/lfs4/HFIP/h-nems/Brian.Curtis/test
export ACCNR="h-nems"
export PATH=/lfs4/HFIP/hfv3gfs/software/miniconda3/4.8.3/envs/ufs-weather-model/bin:/lfs4/HFIP/hfv3gfs/software/miniconda3/4.8.3/bin:$PATH
export PYTHONPATH=/lfs4/HFIP/hfv3gfs/software/miniconda3/4.8.3/envs/ufs-weather-model/lib/python3.8/site-packages:/lfs4/HFIP/hfv3gfs/software/miniconda3/4.8.3/lib/python3.8/site-packages
elif [[ $MACHINE_ID = gaea.* ]]; then
elif [[ $HOSTNAME == gaea* ]]; then
MACHINE_NAME=gaea
WORKDIR=/lustre/f2/pdata/ncep/Brian.Curtis/test
export LOADEDMODULES=$LOADEDMODULES
export ACCNR="nggps_emc" # This applies to Brian.Curtis, may need change later
export PATH=/lustre/f2/pdata/esrl/gsd/contrib/miniconda3/4.8.3/envs/ufs-weather-model/bin:$PATH
export PYTHONPATH=/lustre/f2/pdata/esrl/gsd/contrib/miniconda3/4.8.3/lib/python3.8/site-packages
elif [[ $MACHINE_ID = cheyenne.* ]]; then
#export PATH=/glade/p/ral/jntp/tools/ecFlow-5.3.1/bin:$PATH
#export PYTHONPATH=/glade/p/ral/jntp/tools/ecFlow-5.3.1/lib/python2.7/site-packages
echo "cheyenne not currently supported. automated RT not starting"
exit 1
elif [[ $HOSTNAME = cheyenne* ]]; then
MACHINE_NAME=cheyenne
WORKDIR=/glade/work/heinzell/fv3/ufs-weather-model/auto-rt
export ACCNR="P48503002"
export PATH=/glade/p/ral/jntp/tools/miniconda3/4.8.3/envs/ufs-weather-model/bin:/glade/p/ral/jntp/tools/miniconda3/4.8.3/bin:$PATH
export PYTHONPATH=/glade/p/ral/jntp/tools/miniconda3/4.8.3/envs/ufs-weather-model/lib/python3.8/site-packages:/glade/p/ral/jntp/tools/miniconda3/4.8.3/lib/python3.8/site-packages
else
echo "No Python Path for this machine. automated RT not starting"
exit 1
fi

python rt_auto.py -m $MACHINE_ID -w $WORKDIR
python rt_auto.py -m $MACHINE_NAME -w $WORKDIR

exit 0