From 6e21e47f72ee055be5baa4f8d98304aef66d0925 Mon Sep 17 00:00:00 2001 From: asyatrhl <123384000+asyatrhl@users.noreply.github.com> Date: Wed, 2 Aug 2023 22:55:35 +0300 Subject: [PATCH] Add framework for automated regression tests (#232) * More details in the documentation (Regression.md) --- .github/workflows/test.yml | 98 ++++++++++++++ docs/Regression.md | 37 ++++++ regression/create_onnx_script.py | 128 ++++++++++++++++++ regression/create_test_script.py | 100 ++++++++++++++ regression/last_dev.py | 152 ++++++++++++++++++++++ regression/log_comparison.py | 215 +++++++++++++++++++++++++++++++ regression/pass_fail.py | 107 +++++++++++++++ regression/paths.yaml | 33 +++++ regression/test_config.yaml | 94 ++++++++++++++ requirements-cu11.txt | 1 + requirements-win-cu11.txt | 1 + requirements.txt | 1 + train.py | 3 + 13 files changed, 970 insertions(+) create mode 100644 .github/workflows/test.yml create mode 100644 docs/Regression.md create mode 100644 regression/create_onnx_script.py create mode 100644 regression/create_test_script.py create mode 100644 regression/last_dev.py create mode 100644 regression/log_comparison.py create mode 100644 regression/pass_fail.py create mode 100644 regression/paths.yaml create mode 100644 regression/test_config.yaml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 000000000..26a5295b6 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,98 @@ +name: auto-testing +on: + pull_request: + branches: + - develop + +jobs: + build: + runs-on: self-hosted + timeout-minutes: 345600 + steps: + - name: Checkout last-dev + uses: actions/checkout@v2 + with: + repository: MaximIntegratedAI/ai8x-training + ref: develop + submodules: recursive + - name: Setup Pyenv and Install Dependencies + uses: gabrielfalcao/pyenv-action@v13 + with: + default: 3.8.11 + - name: Create Venv + run: | + pyenv local 3.8.11 + python3 -m venv venv --prompt ai8x-training + - name: Activate Venv + run: source venv/bin/activate + - name: Install Dependencies + run: | + pip3 install -U pip wheel setuptools + pip3 install -r requirements-cu11.txt + - name: Last Develop Check + run: python ./regression/last_dev.py --testconf ./regression/test_config.yaml --testpaths ./regression/paths.yaml + + new-code: + runs-on: self-hosted + needs: [build] + timeout-minutes: 345600 + steps: + - uses: actions/checkout@v2 + with: + repository: MaximIntegratedAI/ai8x-training + ref: develop + submodules: recursive + - name: Setup Pyenv and Install Dependencies + uses: gabrielfalcao/pyenv-action@v13 + with: + default: 3.8.11 + - name: Create Venv + run: | + pyenv local 3.8.11 + python3 -m venv venv --prompt ai8x-training + - name: Activate Venv + run: source venv/bin/activate + - name: Install Dependencies + run: | + pip3 install -U pip wheel setuptools + pip3 install -r requirements-cu11.txt + - name: Create Test Script + run: python ./regression/create_test_script.py --testconf ./regression/test_config.yaml --testpaths ./regression/paths.yaml + - name: Run Training Scripts + run: bash /home/test/actions-runner/_work/ai8x-training/ai8x-training/scripts/output_file.sh + - name: Save Log Files + run: cp -r /home/test/actions-runner/_work/ai8x-training/ai8x-training/logs//home/test/max7800x/test_logs/$(date +%Y-%m-%d_%H-%M-%S) + - name: Save Test Scripts + run: cp -r /home/test/actions-runner/_work/ai8x-training/ai8x-training/scripts/output_file.sh/home/test/max7800x/test_scripts/ + - name: Create and run ONNX script + run: python ./regression/create_onnx_script.py --testconf ./regression/test_config.yaml --testpaths ./regression/paths.yaml + + test-results: + runs-on: self-hosted + needs: [new-code] + timeout-minutes: 345600 + steps: + - uses: actions/checkout@v2 + name: Checkout Test Codes + with: + repository: MaximIntegratedAI/ai8x-training + ref: develop + submodules: recursive + - name: Setup Pyenv and Install Dependencies + uses: gabrielfalcao/pyenv-action@v13 + with: + default: 3.8.11 + - name: Create Venv + run: | + pyenv local 3.8.11 + python3 -m venv venv --prompt ai8x-training + - name: Activate Venv + run: source venv/bin/activate + - name: Install Dependencies + run: | + pip3 install -U pip wheel setuptools + pip3 install -r requirements-cu11.txt + - name: Log Diff + run: python ./regression/log_comparison.py --testconf ./regression/test_config.yaml --testpaths ./regression/paths.yaml + - name: Test Results + run: python ./regression/pass_fail.py --testconf ./regression/test_config.yaml --testpaths ./regression/paths.yaml diff --git a/docs/Regression.md b/docs/Regression.md new file mode 100644 index 000000000..cd298576a --- /dev/null +++ b/docs/Regression.md @@ -0,0 +1,37 @@ +# Regression Test + +The regression test for the `ai8x-training` repository is tested when there is a pull request for the `develop` branch of `MaximIntegratedAI/ai8x-training` by triggering `test.yaml` GitHub actions. + +## Last Tested Code + +`last_dev.py` generates the log files for the last tested code. These log files are used for comparing the newly pushed code to check if there are any significant changes in the trained values. Tracking is done by checking the hash of the commit. + +## Creating Test Scripts + +The sample training scripts are under the `scripts` path. In order to create training scripts for regression tests, these scripts are rewritten by changing their epoch numbers by running `regression/create_test_script.py`. The aim of changing the epoch number is to keep the duration of the test under control. This epoch number is defined in `regression/test_config.yaml` for each model/dataset combination. Since the sizes of the models and the datasets vary, different epoch numbers can be defined for each of them in order to create a healthy test. If a new training script is added, the epoch number and threshold values must be defined in the `regression/test_config.yaml` file for the relevant model. + +## Comparing Log Files + +After running test scripts for newly pushed code, the log files are saved and compared to the last tested code’s log files by running `regression/log_comparison.py`, and the results are saved. + +## Pass-Fail Decision + +In the comparison, the test success criterion is that the difference does not exceed the threshold values defined in `regression/test_config.yaml` as a percentage. If all the training scripts pass the test, `pass_fail.py` completes with success. Otherwise, it fails and exits. + +## ONNX Export + +Scripts for ONNX export are created and run by running `create_onnx_scripts.py` by configuring `Onnx_Status: True` in `regression/test_config.yaml`. If it is set to `False`, ONNX export will be skipped. + +## Configuration + +In `regression/test_config.yaml`, the `Onnx_Status` and `Qat_Test` settings should be defined to `True` when ONNX export or QAT tests by using `policies/qat_policy.yaml` are desired. When `Qat_Test` is set to `False`, QAT will be done according to the main training script. All threshold values and test epoch numbers for each model/dataset combination are also configured in this file. In order to set up the test on a new system, `regression/paths.yaml` needs to be configured accordingly. + +## Setting Up Regression Test + +### GitHub Actions + +GitHub Actions is a continuous integration (CI) and continuous deployment (CD) platform provided by GitHub. It allows developers to automate various tasks, workflows, and processes directly within their GitHub repositories. A GitHub Workflow is an automated process defined using a YAML file that helps automate various tasks in a GitHub repository. + +In this project, with GitHub Actions, there is a 'test.yml' workflow that is triggered when a pull request is opened for the 'develop' branch of the 'MaximIntegratedAI/ai8x-training' repository. This workflow contains and runs the jobs and steps required for the regression test. Also, a self hosted GitHub Runner is used to run regression test actions in this workflow. In order to install GitHub Runner, go to Settings -> Actions -> Runners -> New self-hosted runner on GitHub. To learn more about GitHub Actions, see [GitHub Actions](https://docs.github.com/en/actions/quickstart). + +After installing and configuring a GitHub Runner in your local environment, configure it to start as a service during system startup in order to ensure that the self-hosted runner runs continuously and automatically. You can find more information about systemd services at [Systemd Services](https://linuxhandbook.com/create-systemd-services/). diff --git a/regression/create_onnx_script.py b/regression/create_onnx_script.py new file mode 100644 index 000000000..d8564b2e1 --- /dev/null +++ b/regression/create_onnx_script.py @@ -0,0 +1,128 @@ +################################################################################################### +# +# Copyright © 2023 Analog Devices, Inc. All Rights Reserved. +# This software is proprietary and confidential to Analog Devices, Inc. and its licensors. +# +################################################################################################### +""" +Create onnx bash scripts for test +""" +import argparse +import datetime +import os +import subprocess +import sys + +import yaml + + +def joining(lst): + """ + Join list based on the ' ' delimiter + """ + joined_str = ' '.join(lst) + return joined_str + + +def time_stamp(): + """ + Take time stamp as string + """ + time = str(datetime.datetime.now()) + time = time.replace(' ', '.') + time = time.replace(':', '.') + return time + + +parser = argparse.ArgumentParser() +parser.add_argument('--testconf', help='Enter the config file for the test', required=True) +parser.add_argument('--testpaths', help='Enter the paths for the test', required=True) +args = parser.parse_args() +yaml_path = args.testconf +test_path = args.testpaths + +# Open the YAML file +with open(yaml_path, 'r', encoding='utf-8') as yaml_file: + # Load the YAML content into a Python dictionary + config = yaml.safe_load(yaml_file) + +with open(test_path, 'r', encoding='utf-8') as path_file: + # Load the YAML content into a Python dictionary + pathconfig = yaml.safe_load(path_file) + +if not config["Onnx_Status"]: + sys.exit(1) + +folder_path = pathconfig["folder_path"] +output_file_path = pathconfig["output_file_path_onnx"] +train_path = pathconfig["train_path"] + +logs_list = os.path.join(folder_path, sorted(os.listdir(folder_path))[-1]) + +models = [] +datasets = [] +devices = [] +model_paths = [] +bias = [] +tar_names = [] + + +with open(output_file_path, "w", encoding='utf-8') as onnx_scripts: + with open(train_path, "r", encoding='utf-8') as input_file: + contents = input_file.read() + lines = contents.split("#!/bin/sh ") + lines = lines[1:] + contents_t = contents.split() + + j = [i+1 for i in range(len(contents_t)) if contents_t[i] == '--model'] + for index in j: + models.append(contents_t[index]) + + j = [i+1 for i in range(len(contents_t)) if contents_t[i] == '--dataset'] + for index in j: + datasets.append(contents_t[index]) + + j = [i+1 for i in range(len(contents_t)) if contents_t[i] == '--device'] + for index in j: + devices.append(contents_t[index]) + + for i, line in enumerate(lines): + if "--use-bias" in line: + bias.append("--use-bias") + else: + bias.append("") + + for file_p in sorted(os.listdir(logs_list)): + temp_path = os.path.join(logs_list, file_p) + for temp_file in sorted(os.listdir(temp_path)): + if temp_file.endswith("_checkpoint.pth.tar"): + temp = os.path.join(temp_path, temp_file) + model_paths.append(temp) + tar_names.append(temp_file) + + for i, (model, dataset, bias_value, device_name) in enumerate( + zip(models, datasets, bias, devices) + ): + for tar in model_paths: + element = tar.split('-') + modelsearch = element[-4][3:] + datasearch = element[-3].split('_')[0] + if datasearch == dataset.split('_')[0] and modelsearch == model: + # model_paths.remove(tar) + tar_path = tar + timestamp = time_stamp() + temp = ( + f"python train.py " + f"--model {model} " + f"--dataset {dataset} " + f"--evaluate " + f"--exp-load-weights-from {tar_path} " + f"--device {device_name} " + f"--summary onnx " + f"--summary-filename {model}_{dataset}_{timestamp}_onnx " + f"{bias_value}\n" + ) + onnx_scripts.write(temp) +cmd_command = "bash " + output_file_path + +subprocess.run(cmd_command, shell=True, check=True) diff --git a/regression/create_test_script.py b/regression/create_test_script.py new file mode 100644 index 000000000..48f7cb6be --- /dev/null +++ b/regression/create_test_script.py @@ -0,0 +1,100 @@ +################################################################################################### +# +# Copyright © 2023 Analog Devices, Inc. All Rights Reserved. +# This software is proprietary and confidential to Analog Devices, Inc. and its licensors. +# +################################################################################################### +""" +Create training bash scripts for test +""" +import argparse +import os + +import yaml + + +def joining(lst): + """ + Join list based on the ' ' delimiter + """ + join_str = ' '.join(lst) + return join_str + + +parser = argparse.ArgumentParser() +parser.add_argument('--testconf', help='Enter the config file for the test', required=True) +parser.add_argument('--testpaths', help='Enter the paths for the test', required=True) +args = parser.parse_args() +yaml_path = args.testconf +test_path = args.testpaths + +# Open the YAML file +with open(yaml_path, 'r', encoding='utf-8') as yaml_file: + # Load the YAML content into a Python dictionary + config = yaml.safe_load(yaml_file) + +with open(test_path, 'r', encoding='utf-8') as path_file: + # Load the YAML content into a Python dictionary + pathconfig = yaml.safe_load(path_file) + +# Folder containing the files to be concatenated +script_path = pathconfig["script_path"] +# Output file name and path +output_file_path = pathconfig["output_file_path"] + +# global log_file_names +log_file_names = [] + +# Loop through all files in the folder +with open(output_file_path, "w", encoding='utf-8') as output_file: + for filename in os.listdir(script_path): + # Check if the file is a text file + if filename.startswith("train"): + # Open the file and read its contents + with open(os.path.join(script_path, filename), encoding='utf-8') as input_file: + contents = input_file.read() + + temp = contents.split() + temp.insert(1, "\n") + i = temp.index('--epochs') + j = temp.index('--model') + k = temp.index('--dataset') + + if config["Qat_Test"]: + if '--qat-policy' in temp: + x = temp.index('--qat-policy') + temp[x+1] = "policies/qat_policy.yaml" + else: + temp.insert(-1, ' --qat-policy policies/qat_policy.yaml') + + log_model = temp[j+1] + log_data = temp[k+1] + + if log_model == "ai87imageneteffnetv2": + num = temp.index("--batch-size") + temp[num+1] = "128" + + log_name = temp[j+1] + '-' + temp[k+1] + log_file_names.append(filename[:-3]) + + if log_data == "FaceID": + continue + + if log_data == "VGGFace2_FaceDetection": + continue + + temp[i+1] = str(config[log_data][log_model]["epoch"]) + + if '--deterministic' not in temp: + temp.insert(-1, '--deterministic') + + temp.insert(-1, '--name ' + log_name) + + data_name = temp[k+1] + if data_name in config and "datapath" in config[data_name]: + path_data = config[log_data]["datapath"] + temp.insert(-1, '--data ' + path_data) + + temp.append("\n") + contents = joining(temp) + output_file.write(contents) diff --git a/regression/last_dev.py b/regression/last_dev.py new file mode 100644 index 000000000..76b78e92d --- /dev/null +++ b/regression/last_dev.py @@ -0,0 +1,152 @@ +################################################################################################### +# +# Copyright © 2023 Analog Devices, Inc. All Rights Reserved. +# This software is proprietary and confidential to Analog Devices, Inc. and its licensors. +# +################################################################################################### +""" +Create the last developed code logs for base testing source +""" +import argparse +import datetime +import os +import subprocess + +import git +import yaml +from git.exc import InvalidGitRepositoryError + + +def joining(lst): + """ + Join based on the ' ' delimiter + """ + join_str = ' '.join(lst) + return join_str + + +parser = argparse.ArgumentParser() +parser.add_argument('--testconf', help='Enter the config file for the test', required=True) +parser.add_argument('--testpaths', help='Enter the paths for the test', required=True) +args = parser.parse_args() +yaml_path = args.testconf +test_path = args.testpaths + +# Open the YAML file +with open(yaml_path, 'r', encoding='utf-8') as yaml_file: + # Load the YAML content into a Python dictionary + config = yaml.safe_load(yaml_file) + +with open(test_path, 'r', encoding='utf-8') as path_file: + # Load the YAML content into a Python dictionary + pathconfig = yaml.safe_load(path_file) + +# Folder containing the files to be concatenated +script_path = pathconfig["script_path_dev"] +# Output file name and path +output_file_path = pathconfig["output_file_path_dev"] + +# global log_file_names +log_file_names = [] + + +def dev_scripts(script_pth, output_file_pth): + """ + Create training scripts for the last developed code + """ + with open(output_file_pth, "w", encoding='utf-8') as output_file: + for filename in os.listdir(script_pth): + # Check if the file is a text file + if filename.startswith("train"): + # Open the file and read its contents + with open(os.path.join(script_path, filename), encoding='utf-8') as input_file: + contents = input_file.read() + + temp = contents.split() + temp.insert(1, "\n") + i = temp.index('--epochs') + j = temp.index('--model') + k = temp.index('--dataset') + + if '--qat-policy' in temp: + x = temp.index('--qat-policy') + temp[x+1] = "policies/qat_policy.yaml" + else: + temp.insert(-1, ' --qat-policy policies/qat_policy.yaml') + + log_model = temp[j+1] + log_data = temp[k+1] + + if log_model == "ai87imageneteffnetv2": + num = temp.index("--batch-size") + temp[num+1] = "128" + + log_name = temp[j+1] + '-' + temp[k+1] + log_file_names.append(filename[:-3]) + + if log_data == "FaceID": + continue + if log_data == "VGGFace2_FaceDetection": + continue + if log_data == "ai85tinierssdface": + continue + + temp[i+1] = str(config[log_data][log_model]["epoch"]) + + if '--deterministic' not in temp: + temp.insert(-1, '--deterministic') + + temp.insert(-1, '--name ' + log_name) + + data_name = temp[k+1] + if data_name in config and "datapath" in config[data_name]: + path_data = config[log_data]["datapath"] + temp.insert(-1, '--data ' + path_data) + + temp.append("\n") + contents = joining(temp) + output_file.write(contents) + + +def dev_checkout(): + """ + Checkout the last developed code + """ + repo_url = "https://github.com/MaximIntegratedAI/ai8x-training.git" + local_path = pathconfig['local_path'] + + try: + repo = git.Repo(local_path) + except InvalidGitRepositoryError: + repo = git.Repo.clone_from(repo_url, local_path, branch="develop", recursive=True) + + commit_hash = repo.heads.develop.object.hexsha + commit_num_path = pathconfig['commit_num_path'] + + try: + with open(commit_num_path, "r", encoding='utf-8') as file: + saved_commit_hash = file.read().strip() + except FileNotFoundError: + saved_commit_hash = "" + + if commit_hash != saved_commit_hash: + with open(commit_num_path, "w", encoding='utf-8') as file: + file.write(commit_hash) + repo.remotes.origin.pull("develop") + + dev_scripts(script_path, output_file_path) + cmd_command = "bash " + output_file_path + subprocess.run(cmd_command, shell=True, check=True) + + path_command = "cd " + local_path + subprocess.run(path_command, shell=True, check=True) + + source_path = pathconfig["source_path"] + destination_path = os.path.join( + pathconfig["destination_path"], + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ) + subprocess.run(['mv', source_path, destination_path], check=True) + + +dev_checkout() diff --git a/regression/log_comparison.py b/regression/log_comparison.py new file mode 100644 index 000000000..41c4fcab9 --- /dev/null +++ b/regression/log_comparison.py @@ -0,0 +1,215 @@ +################################################################################################### +# +# Copyright © 2023 Analog Devices, Inc. All Rights Reserved. +# This software is proprietary and confidential to Analog Devices, Inc. and its licensors. +# +################################################################################################### +""" +Compare log files of the pulled code and the last developed +""" +import argparse +import datetime +import os +import sys + +import yaml +from tabulate import tabulate + +parser = argparse.ArgumentParser() +parser.add_argument('--testconf', help='Enter the config file for the test', required=True) +parser.add_argument('--testpaths', help='Enter the paths for the test', required=True) +args = parser.parse_args() +yaml_path = args.testconf +test_path = args.testpaths + +# Open the YAML file +with open(yaml_path, 'r', encoding='utf-8') as yaml_file: + # Load the YAML content into a Python dictionary + config = yaml.safe_load(yaml_file) + +with open(test_path, 'r', encoding='utf-8') as path_file: + # Load the YAML content into a Python dictionary + pathconfig = yaml.safe_load(path_file) + + +def compare_logs(old_log, new_log, output_name, output_pth): + """ + Take diff top1 of log files of the pulled code and the last developed + """ + header = ["Epoch number", "Top1 Diff(%)", "Top5 Diff(%)"] + header_map = ["Epoch number", "mAP Diff(%)"] + + word = 'Best' + word2 = 'Top1' + word3 = 'mAP' + ex_list = [False] + + with open(new_log, 'r', encoding='utf-8') as f2: + file2_content = f2.read() + log_name = new_log.split('/')[-1].split('___')[0] + + if word2 not in file2_content and word3 not in file2_content: + print(f"\033[31m\u2718\033[0m {log_name} does not have any trained results." + " There is an error in training.") + ex_list.append(True) + + if all(ex_list): + print("\033[31m Cancelling github actions.") + sys.exit(1) + + with open(old_log, 'r', encoding='utf-8') as f1, open(new_log, 'r', encoding='utf-8') as f2: + file1_content = f1.readlines() + file2_content = f2.readlines() + + log1_list = [] + log2_list = [] + mAP_list1 = [] + mAP_list2 = [] + + word = 'Best' + word2 = 'Top1' + word3 = 'mAP' + map_value = False + + for line in file1_content: + if word in line and word2 in line: + lst = line.split() + log1_list.append(lst[5:]) + map_value = False + elif word in line and word3 in line: + lst = line.split() + mAP_list1.append(lst[5:7]) + map_value = True + + for line in file2_content: + if word in line and word2 in line: + lst = line.split() + log2_list.append(lst[5:]) + map_value = False + elif word in line and word3 in line: + lst = line.split() + mAP_list2.append(lst[5:7]) + map_value = True + + epoch_num_top = min(len(log1_list), len(log2_list)) + epoch_num_map = min(len(mAP_list1), len(mAP_list2)) + + log1_list = log1_list[:epoch_num_top] + log2_list = log2_list[:epoch_num_top] + mAP_list1 = mAP_list1[:epoch_num_map] + mAP_list2 = mAP_list2[:epoch_num_map] + + top1 = [] + map_list = [] + + if not map_value: + i = 0 + for (list1, list2) in zip(log1_list, log2_list): + if float(list1[1]) == 0: + print("Top1 value of " + output_name + " is 0.00.") + list1[1] = 0.000001 + i = i+1 + if '[Top1:' in list2: + top1_diff = ((float(list2[1])-float(list1[1]))/float(list1[1]))*100 + top1.append([i]) + top1[i-1].append(top1_diff) + + if 'Top5:' in list2: + top5_diff = ((float(list2[3])-float(list1[3]))/float(list1[1]))*100 + top1[i-1].append(top5_diff) + + output_path_2 = os.path.join(output_pth, (output_name + '.txt')) + with open(output_path_2, "w", encoding='utf-8') as output_file: + output_file.write(tabulate(top1, headers=header)) + + if map_value: + i = 0 + for (map1, map2) in zip(mAP_list1, mAP_list2): + if float(map1[1]) == 0: + print("Map value of " + output_name + " is 0.00.") + map1[1] = 0.000001 + i = i+1 + if '[mAP:' in map2: + map_diff = ((float(map2[1])-float(map1[1]))/float(map1[1]))*100 + map_list.append([i]) + map_list[i-1].append(map_diff) + + output_path_2 = os.path.join(output_pth, (output_name + '.txt')) + with open(output_path_2, "w", encoding='utf-8') as output_file: + output_file.write(tabulate(map_list, headers=header_map)) + return map_value + + +def log_path_list(path): + """ + Create log names + """ + lst = [] + for file in sorted(os.listdir(path)): + lst.append(file.split("___")[0]) + return lst + + +log_new = pathconfig["log_new"] +log_old = pathconfig["log_old"] +script_path = pathconfig["script_path_log"] + +time = str(datetime.datetime.now()) +time = time.replace(' ', '.') +time = time.replace(':', '.') +output_path = pathconfig["output_path"] + '/' + str(time) + +os.mkdir(output_path) + +loglist = sorted(os.listdir(log_new)) +loglist_old = sorted(os.listdir(log_old)) +old_logs_path = log_old + loglist_old[-1] +new_logs_path = log_new + loglist[-1] + +new_log_list = log_path_list(new_logs_path) +old_log_list = log_path_list(old_logs_path) + +with open(script_path, 'r', encoding='utf-8') as f: + scripts_t = f.read() + scripts = scripts_t.split(' ') +name_indices = [i+1 for i, x in enumerate(scripts) if x == "--name"] +values = [scripts[j] for j in name_indices] + +ex_list2 = [False] +for log in values: + if log not in new_log_list: + print(f"\033[31m\u2718\033[0m {log} does not have any trained log file." + " There is an error in training.") + ex_list2.append(True) + +if all(ex_list2): + print("\033[31m Cancelling github actions.") + sys.exit(1) + +not_found_model = [] +map_value_list = {} + +for files_new in sorted(os.listdir(new_logs_path)): + files_new_temp = files_new.split("___")[0] + if files_new_temp not in old_log_list: + not_found_model.append(files_new_temp + " not found in last developed log files.") + for files_old in sorted(os.listdir(old_logs_path)): + files_old_temp = files_old.split("___")[0] + if files_old_temp == files_new_temp: + + old_path = os.path.join(old_logs_path, files_old) + new_path = os.path.join(new_logs_path, files_new) + + old_files = sorted(os.listdir(old_path)) + new_files = sorted(os.listdir(new_path)) + + old_log_file = [file for file in old_files if file.endswith(".log")][0] + new_log_file = [file for file in new_files if file.endswith(".log")][0] + + old_path_log = os.path.join(old_path, old_log_file) + new_path_log = os.path.join(new_path, new_log_file) + + map_value_list[files_new_temp] = compare_logs( + old_path_log, new_path_log, files_new, output_path + ) + break diff --git a/regression/pass_fail.py b/regression/pass_fail.py new file mode 100644 index 000000000..b7ca91f97 --- /dev/null +++ b/regression/pass_fail.py @@ -0,0 +1,107 @@ +################################################################################################### +# +# Copyright © 2023 Analog Devices, Inc. All Rights Reserved. +# This software is proprietary and confidential to Analog Devices, Inc. and its licensors. +# +################################################################################################### +""" +Check the test results +""" +import argparse +import os +import sys + +import yaml +from log_comparison import map_value_list, not_found_model + +parser = argparse.ArgumentParser() +parser.add_argument('--testconf', help='Enter the config file for the test', required=True) +parser.add_argument('--testpaths', help='Enter the paths for the test', required=True) +args = parser.parse_args() +yaml_path = args.testconf +test_path = args.testpaths + +# Open the YAML file +with open(yaml_path, 'r', encoding='utf-8') as yaml_file: + # Load the YAML content into a Python dictionary + config = yaml.safe_load(yaml_file) + +with open(test_path, 'r', encoding='utf-8') as path_file: + # Load the YAML content into a Python dictionary + pathconfig = yaml.safe_load(path_file) + +log_path = pathconfig["log_path"] +log_path = os.path.join(log_path, sorted(os.listdir(log_path))[-1]) + + +def check_top_value(diff_file, threshold, map_value): + """ + Compare Top1 value with threshold + """ + if not map_value: + with open(diff_file, 'r', encoding='utf-8') as f: + model_name = diff_file.split('/')[-1].split('___')[0] + # Read all lines in the diff_file + lines = f.readlines() + # Extract the last line and convert it to a float + top1 = lines[-1].split() + try: + epoch_num = int(top1[0]) + except ValueError: + print(f"\033[31m\u2718\033[0m Test failed for {model_name}: " + f"Cannot convert {top1[0]} to an epoch number.") + return False + top1_diff = float(top1[1]) + + if top1_diff < threshold: + print(f"\033[31m\u2718\033[0m Test failed for {model_name} since" + f" Top1 value changed {top1_diff} % at {epoch_num}th epoch.") + return False + print(f"\033[32m\u2714\033[0m Test passed for {model_name} since" + f" Top1 value changed {top1_diff} % at {epoch_num}th epoch.") + return True + + with open(diff_file, 'r', encoding='utf-8') as f: + model_name = diff_file.split('/')[-1].split('___')[0] + # Read all lines in the diff_file + lines = f.readlines() + # Extract the last line and convert it to a float + top1 = lines[-1].split() + try: + epoch_num = int(top1[0]) + except ValueError: + print(f"\033[31m\u2718\033[0m Test failed for {model_name}: " + f"Cannot convert {top1[0]} to an epoch number.") + return False + top1_diff = float(top1[1]) + # top5_diff = float(top1[2]) + + if top1_diff < threshold: + print(f"\033[31m\u2718\033[0m Test failed for {model_name} since" + f" mAP value changed {top1_diff} % at {epoch_num}th epoch.") + return False + print(f"\033[32m\u2714\033[0m Test passed for {model_name} since" + f" mAP value changed {top1_diff} % at {epoch_num}th epoch.") + return True + + +passing = [] +for item in not_found_model: + print("\033[93m\u26A0\033[0m " + "Warning: " + item) + +for logs in sorted(os.listdir(log_path)): + log_name = (logs.split("___"))[0] + log_model = log_name.split("-")[0] + log_data = log_name.split("-")[1] + + if log_data in config and log_model in config[log_data]: + threshold_temp = float(config[f'{log_data}'][f'{log_model}']['threshold']) + else: + threshold_temp = 0 + logs = os.path.join(log_path, str(logs)) + map_val = map_value_list[log_name] + passing.append(check_top_value(logs, threshold_temp, map_val)) + +if not all(passing): + print("\033[31mAll tests did not passed. Cancelling github actions.") + sys.exit(1) diff --git a/regression/paths.yaml b/regression/paths.yaml new file mode 100644 index 000000000..a30a2cfc9 --- /dev/null +++ b/regression/paths.yaml @@ -0,0 +1,33 @@ +--- +# Create_onnx_script.py + +folder_path: /home/test/max7800x/test_logs/ +output_file_path_onnx: ./scripts/onnx_scripts.sh +train_path: /home/test/max7800x/test_scripts/output_file.sh + + +# create_test_script.py + +script_path: ./scripts +output_file_path: ./scripts/output_file.sh + + +#last_dev.py + +script_path_dev: /home/test/max7800x/last_developed/last_dev_source/scripts +output_file_path_dev: /home/test/max7800x/last_developed/dev_scripts/last_dev_train.sh +source_path: /home/test/actions-runner/_work/ai8x-training/ai8x-training/logs/ +destination_path: /home/test/max7800x/last_developed/dev_logs/ +local_path: /home/test/max7800x/last_developed/last_dev_source/ +commit_num_path: /home/test/max7800x/last_developed/commit_number.txt + +#log_comparison.py + +log_new: /home/test/max7800x/test_logs/ +log_old: /home/test/max7800x/last_developed/dev_logs/ +script_path_log: /home/test/max7800x/test_scripts/output_file.sh +output_path: /home/test/max7800x/log_diff/ + +#pass_fail.py + +log_path: /home/test/max7800x/log_diff diff --git a/regression/test_config.yaml b/regression/test_config.yaml new file mode 100644 index 000000000..acbc62aae --- /dev/null +++ b/regression/test_config.yaml @@ -0,0 +1,94 @@ +--- +Onnx_Status: true +Qat_Test: false +AISegment_352: + datapath: /data_ssd + ai85unetlarge: + threshold: -5 + epoch: 15 +CamVid_s352_c3: + datapath: /data_ssd + ai85unetlarge: + threshold: -5 + epoch: 15 +cats_vs_dogs: + datapath: /data_ssd + ai85cdnet: + threshold: -5 + epoch: 15 +CIFAR10: + datapath: "/data_ssd" + ai85nascifarnet: + threshold: -5 + epoch: 15 + ai85net6: + threshold: -5 + epoch: 15 + ai85squeezenet: + threshold: -5 + epoch: 15 +CIFAR100: + datapath: "/data_ssd" + ai85simplenet: + threshold: -5 + epoch: 15 + ai85nascifarnet: + threshold: -5 + epoch: 15 + ai87effnetv2: + threshold: -5 + epoch: 15 + ai87netmobilenetv2cifar100_m0_5: + threshold: -5 + epoch: 15 + ai87netmobilenetv2cifar100_m0_75: + threshold: -5 + epoch: 15 + ai85ressimplenet: + threshold: -5 + epoch: 15 + ai85simplenetwide2x: + threshold: -5 + epoch: 15 +ImageNet: + datapath: "/data_ssd" + ai87imageneteffnetv2: + threshold: -5 + epoch: 13 +KWS_20: + datapath: "/data_ssd" + ai85kws20net: + threshold: -5 + epoch: 15 + ai85kws20netv2: + threshold: -5 + epoch: 15 + ai85kws20netv3: + threshold: -5 + epoch: 15 + ai87kws20netv3: + threshold: -5 + epoch: 15 +MNIST: + datapath: "/data_ssd" + ai85netextrasmall: + threshold: -5 + epoch: 15 + ai85net5: + threshold: -500 + epoch: 15 +SVHN_74: + datapath: "/data_ssd" + ai85tinierssd: + threshold: -500 + epoch: 15 +PascalVOC_2007_2012_256_320_augmented: + datapath: "/data_ssd" + ai87fpndetector: + threshold: -500 + epoch: 15 +Kinetics400: + datapath: "/data_ssd" + ai85actiontcn: + threshold: -500 + epoch: 15 diff --git a/requirements-cu11.txt b/requirements-cu11.txt index 8c7657312..35a5f5749 100644 --- a/requirements-cu11.txt +++ b/requirements-cu11.txt @@ -19,4 +19,5 @@ pycocotools==2.0.6 albumentations>=1.3.0 pytube>=12.1.3 pyffmpeg==2.0 +GitPython>=3.1.18 -e distiller diff --git a/requirements-win-cu11.txt b/requirements-win-cu11.txt index e950fafc4..ffad18fd4 100644 --- a/requirements-win-cu11.txt +++ b/requirements-win-cu11.txt @@ -19,4 +19,5 @@ pycocotools==2.0.6 albumentations>=1.3.0 pytube>=12.1.3 pyffmpeg==2.0 +GitPython>=3.1.18 -e distiller diff --git a/requirements.txt b/requirements.txt index 6e57a4364..381913204 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,4 +19,5 @@ pycocotools==2.0.6 albumentations>=1.3.0 pytube>=12.1.3 pyffmpeg==2.0 +GitPython>=3.1.18 -e distiller diff --git a/train.py b/train.py index 7932bae86..3b3d8b664 100644 --- a/train.py +++ b/train.py @@ -495,6 +495,9 @@ def main(): for epoch in range(start_epoch, ending_epoch): # pylint: disable=unsubscriptable-object if qat_policy is not None and epoch > 0 and epoch == qat_policy['start_epoch']: + msglogger.info('\n') + msglogger.info('Initiating quantization aware training (QAT)...') + # Fuse the BN parameters into conv layers before Quantization Aware Training (QAT) ai8x.fuse_bn_layers(model)