-
Notifications
You must be signed in to change notification settings - Fork 3.1k
/
Copy pathsubmit_groupwise_azureml_pytest.py
179 lines (162 loc) · 5.12 KB
/
submit_groupwise_azureml_pytest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# Copyright (c) Recommenders contributors.
# Licensed under the MIT License.
"""
This python script sets up an environment on AzureML and submits a
script to it to run pytest. It is usually intended to be used as
part of a DevOps pipeline which runs testing on a GitHub repo but
can also be used from command line.
Many parameters are set to default values and some are expected to be passed
in from either the DevOps pipeline or command line.
If calling from command line, there are some parameters you must pass in for
your job to run.
Args:
See parse_args() below for more details.
Example:
Usually, this script is run by a DevOps pipeline. It can also be
run from cmd line.
>>> python tests/ci/submit_groupwise_azureml_pytest.py \
--subid '12345678-9012-3456-abcd-123456789012' ...
"""
import argparse
import logging
from aml_utils import (
correct_resource_name,
create_or_start_compute,
get_client,
get_or_create_environment,
run_tests,
)
def parse_args():
"""
Many of the argument defaults are used as arg_parser makes it easy to
use defaults. The user has many options they can select.
"""
parser = argparse.ArgumentParser(description="Process some inputs")
parser.add_argument(
"--sha",
action="store",
help="the commit triggering the workflow",
)
parser.add_argument(
"--script",
action="store",
default="tests/ci/azureml_tests/run_groupwise_pytest.py",
help="Path of script to run pytest",
)
parser.add_argument(
"--maxnodes",
action="store",
default=4,
help="Maximum number of nodes for the run",
)
parser.add_argument(
"--testgroup",
action="store",
default="group_criteo",
help="Test Group",
)
parser.add_argument(
"--rg",
action="store",
default="recommender",
help="Azure Resource Group",
)
parser.add_argument(
"--ws",
action="store",
default="RecoWS",
help="AzureML workspace name",
)
parser.add_argument(
"--cluster",
action="store",
default="azuremlcompute",
help="AzureML cluster name",
)
parser.add_argument(
"--vmsize",
action="store",
default="STANDARD_D3_V2",
help="VM size",
)
parser.add_argument(
"--subid",
action="store",
default="123456",
help="Azure Subscription ID",
)
parser.add_argument(
"--expname",
action="store",
default="persistentAzureML",
help="Experiment name on AzureML",
)
parser.add_argument(
"--envname",
action="store",
default="recommenders",
help="Environment name on AzureML",
)
parser.add_argument(
"--conda-openjdk-version",
action="store",
default="21",
help="Conda OpenJDK package version",
)
parser.add_argument(
"--python-version",
action="store",
default="3.11",
help="Python version",
)
parser.add_argument(
"--testkind",
action="store",
default="unit",
help="Test kind - nightly or unit",
)
return parser.parse_args()
if __name__ == "__main__":
logger = logging.getLogger("submit_groupwise_azureml_pytest.py")
args = parse_args()
logger.info("Setting up workspace %s", args.ws)
client = get_client(
subscription_id=args.subid,
resource_group=args.rg,
workspace_name=args.ws,
)
logger.info("Setting up compute %s", args.cluster)
create_or_start_compute(
client=client, name=args.cluster, size=args.vmsize, max_instances=args.maxnodes
)
# TODO: Unlike Azure DevOps pipelines, GitHub Actions only has simple
# string functions like startsWith() and contains(). And AzureML
# only accepts simple names that do not contain '.' and '/'.
# correct_resource_name() is used to replace '.' and '/' with '_'
# which makes names in the workflow and on AzureML inconsistent.
# For example, a name
# * in the workflow
# recommenders-unit-group_cpu_001-python3.8-c8adeafabc011b549f875dc145313ffbe3fc53a8
# * on AzureML
# recommenders-unit-group_cpu_001-python3_8-c8adeafabc011b549f875dc145313ffbe3fc53a8
environment_name = correct_resource_name(args.envname)
logger.info("Setting up environment %s", environment_name)
get_or_create_environment(
client=client,
environment_name=environment_name,
use_gpu="gpu" in args.testgroup,
use_spark="spark" in args.testgroup,
conda_openjdk_version=args.conda_openjdk_version,
python_version=args.python_version,
)
experiment_name = correct_resource_name(args.expname)
logger.info("Running experiment %s", experiment_name)
run_tests(
client=client,
compute=args.cluster,
environment_name=environment_name,
experiment_name=experiment_name,
script=args.script,
testgroup=args.testgroup,
testkind=args.testkind,
)