Skip to content
This repository was archived by the owner on Feb 10, 2021. It is now read-only.

Commit 17b8757

Browse files
committed
RNN-T loadgen program tidy-up.
1 parent a590c73 commit 17b8757

File tree

4 files changed

+32
-100
lines changed

4 files changed

+32
-100
lines changed

cmdgen/benchmark.speech-recognition-loadgen/.cm/meta.json

+6-6
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@
2828
"build_map": {
2929
"mode": {
3030
"accuracy": {
31-
"ck_env": "--cmd_key=accuracy-bench",
31+
"ck_env": "--cmd_key=accuracy",
3232
"output_file": "accuracy.txt",
3333
"recommended_dataset_size": "<<<full_dataset_size>>>",
3434
"recommended_verbosity": "1"
3535
},
3636
"performance": {
37-
"ck_env": "--cmd_key=performance-bench",
37+
"ck_env": "--cmd_key=performance",
3838
"output_file": "mlperf_log_summary.txt",
3939
"recommended_dataset_size": "<<<performance_dataset_size>>>",
4040
"recommended_verbosity": "0"
@@ -55,16 +55,16 @@
5555
},
5656
"scenario": {
5757
"multistream": {
58-
"ck_env": "--env.CK_RNNT_SCENARIO=MultiStream"
58+
"ck_env": "--env.CK_LOADGEN_SCENARIO=MultiStream"
5959
},
6060
"offline": {
61-
"ck_env": "--env.CK_RNNT_SCENARIO=Offline"
61+
"ck_env": "--env.CK_LOADGEN_SCENARIO=Offline"
6262
},
6363
"singlestream": {
64-
"ck_env": "--env.CK_RNNT_SCENARIO=SingleStream"
64+
"ck_env": "--env.CK_LOADGEN_SCENARIO=SingleStream"
6565
},
6666
"server": {
67-
"ck_env": "--env.CK_RNNT_SCENARIO=Server"
67+
"ck_env": "--env.CK_LOADGEN_SCENARIO=Server"
6868
}
6969
},
7070
"sut": {

program/speech-recognition-pytorch-loadgen/.cm/meta.json

+11-28
Original file line numberDiff line numberDiff line change
@@ -11,36 +11,19 @@
1111
"ignore_return_code": "no",
1212
"run_time": {
1313
"pre_process_via_ck": {
14-
"script_name": "preprocess"
15-
},
16-
"run_cmd_main": "PYTHONPATH=$PYTHONPATH:$<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/pytorch $<<CK_ENV_COMPILER_PYTHON_FILE>>$ $<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/run.py --dataset_dir $<<CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR>>$/../ --manifest $<<CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR>>$/wav-list.json --pytorch_config_toml $<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/pytorch/configs/rnnt.toml --pytorch_checkpoint $<<CK_ENV_MODEL_PYTORCH>>$/rnnt.pt --log_dir $<<PWD>>$ --scenario $<<CK_RNNT_SCENARIO>>$ --backend $<<CK_RNNT_BACKEND>>$ --accuracy"
17-
}
18-
},
19-
"performance": {
20-
"ignore_return_code": "no",
21-
"run_time": {
22-
"pre_process_via_ck": {
23-
"script_name": "preprocess"
24-
},
25-
"run_cmd_main": "PYTHONPATH=$PYTHONPATH:$<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/pytorch $<<CK_ENV_COMPILER_PYTHON_FILE>>$ $<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/run.py --dataset_dir $<<CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR>>$/../ --manifest $<<CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR>>$/wav-list.json --pytorch_config_toml $<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/pytorch/configs/rnnt.toml --pytorch_checkpoint $<<CK_ENV_MODEL_PYTORCH>>$/rnnt.pt --log_dir $<<PWD>>$ --scenario $<<CK_RNNT_SCENARIO>>$ --backend $<<CK_RNNT_BACKEND>>$"
26-
}
27-
},
28-
"instr": {
29-
"ignore_return_code": "no",
30-
"run_time": {
31-
"fine_grain_timer_file": "tmp-ck-timer.json",
32-
"pre_process_via_ck": {
33-
"script_name": "preprocess"
14+
"data_uoa": "speech-recognition",
15+
"module_uoa": "script",
16+
"script_name": "loadgen_preprocess"
3417
},
3518
"post_process_via_ck": {
3619
"data_uoa": "speech-recognition",
3720
"module_uoa": "script",
3821
"script_name": "loadgen_postprocess"
3922
},
40-
"run_cmd_main": "$PYTHONPATH=$PYTHONPATH:$<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/pytorch <<CK_ENV_COMPILER_PYTHON_FILE>>$ $<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/run.py --dataset_dir $<<CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR>>$/../ --manifest $<<CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR>>$/wav-list.json --pytorch_config_toml $<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/pytorch/configs/rnnt.toml --pytorch_checkpoint $<<CK_ENV_MODEL_PYTORCH>>$/rnnt.pt --log_dir $<<PWD>>$ --scenario $<<CK_RNNT_SCENARIO>>$ --backend $<<CK_RNNT_BACKEND>>$ --instr"
23+
"run_cmd_main": "env ; $<<CK_ENV_COMPILER_PYTHON_FILE>>$ $<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/run.py --dataset_dir $<<CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR>>$/../ --manifest $<<CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR>>$/wav-list.json --pytorch_config_toml $<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/pytorch/configs/rnnt.toml --pytorch_checkpoint $<<CK_ENV_MODEL_PYTORCH>>$/rnnt.pt --log_dir $<<PWD>>$ --scenario $<<CK_LOADGEN_SCENARIO>>$ --backend $<<CK_RNNT_BACKEND>>$ --accuracy --mlperf_conf $<<CK_ENV_MLPERF_INFERENCE_MLPERF_CONF>>$ --user_conf $<<CK_LOADGEN_USER_CONF>>$"
4124
}
4225
},
43-
"accuracy-bench": {
26+
"performance": {
4427
"ignore_return_code": "no",
4528
"run_time": {
4629
"pre_process_via_ck": {
@@ -53,23 +36,22 @@
5336
"module_uoa": "script",
5437
"script_name": "loadgen_postprocess"
5538
},
56-
"run_cmd_main": "PYTHONPATH=$PYTHONPATH:$<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/pytorch $<<CK_ENV_COMPILER_PYTHON_FILE>>$ $<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/run.py --dataset_dir $<<CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR>>$/../ --manifest $<<CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR>>$/wav-list.json --pytorch_config_toml $<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/pytorch/configs/rnnt.toml --pytorch_checkpoint $<<CK_ENV_MODEL_PYTORCH>>$/rnnt.pt --log_dir $<<PWD>>$ --scenario $<<CK_RNNT_SCENARIO>>$ --backend $<<CK_RNNT_BACKEND>>$ --accuracy --mlperf_conf $<<CK_ENV_MLPERF_INFERENCE_MLPERF_CONF>>$ --user_conf $<<CK_LOADGEN_USER_CONF>>$"
39+
"run_cmd_main": "$<<CK_ENV_COMPILER_PYTHON_FILE>>$ $<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/run.py --dataset_dir $<<CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR>>$/../ --manifest $<<CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR>>$/wav-list.json --pytorch_config_toml $<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/pytorch/configs/rnnt.toml --pytorch_checkpoint $<<CK_ENV_MODEL_PYTORCH>>$/rnnt.pt --log_dir $<<PWD>>$ --scenario $<<CK_LOADGEN_SCENARIO>>$ --backend $<<CK_RNNT_BACKEND>>$ --mlperf_conf $<<CK_ENV_MLPERF_INFERENCE_MLPERF_CONF>>$ --user_conf $<<CK_LOADGEN_USER_CONF>>$"
5740
}
5841
},
59-
"performance-bench": {
42+
"instr": {
6043
"ignore_return_code": "no",
6144
"run_time": {
45+
"fine_grain_timer_file": "tmp-ck-timer.json",
6246
"pre_process_via_ck": {
63-
"data_uoa": "speech-recognition",
64-
"module_uoa": "script",
6547
"script_name": "loadgen_preprocess"
6648
},
6749
"post_process_via_ck": {
6850
"data_uoa": "speech-recognition",
6951
"module_uoa": "script",
7052
"script_name": "loadgen_postprocess"
7153
},
72-
"run_cmd_main": "PYTHONPATH=$PYTHONPATH:$<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/pytorch $<<CK_ENV_COMPILER_PYTHON_FILE>>$ $<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/run.py --dataset_dir $<<CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR>>$/../ --manifest $<<CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR>>$/wav-list.json --pytorch_config_toml $<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/pytorch/configs/rnnt.toml --pytorch_checkpoint $<<CK_ENV_MODEL_PYTORCH>>$/rnnt.pt --log_dir $<<PWD>>$ --scenario $<<CK_RNNT_SCENARIO>>$ --backend $<<CK_RNNT_BACKEND>>$ --mlperf_conf $<<CK_ENV_MLPERF_INFERENCE_MLPERF_CONF>>$ --user_conf $<<CK_LOADGEN_USER_CONF>>$"
54+
"run_cmd_main": "<<CK_ENV_COMPILER_PYTHON_FILE>>$ $<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/run.py --dataset_dir $<<CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR>>$/../ --manifest $<<CK_ENV_DATASET_AUDIO_PREPROCESSED_DIR>>$/wav-list.json --pytorch_config_toml $<<CK_ENV_MLPERF_INFERENCE>>$/speech_recognition/rnnt/pytorch/configs/rnnt.toml --pytorch_checkpoint $<<CK_ENV_MODEL_PYTORCH>>$/rnnt.pt --log_dir $<<PWD>>$ --scenario $<<CK_LOADGEN_SCENARIO>>$ --backend $<<CK_RNNT_BACKEND>>$ --instr"
7355
}
7456
}
7557
},
@@ -246,14 +228,15 @@
246228
"run_vars": {
247229
"CK_RNNT_DATASET": "dev-clean",
248230
"CK_RNNT_BACKEND": "pytorch",
249-
"CK_RNNT_SCENARIO": "Offline",
250231
"CK_RNNT_PRE_BACKEND": "pytorch",
251232
"CK_RNNT_PRE": "orig",
252233
"CK_RNNT_POST_BACKEND": "pytorch",
253234
"CK_RNNT_POST": "orig",
254235
"CK_RNNT_DEC_BACKEND": "pytorch",
255236
"CK_RNNT_DEC": "orig",
237+
"CK_LOADGEN_SCENARIO": "Offline",
256238
"CK_LOADGEN_USER_CONF": "user.conf",
239+
"CK_ENV_MLPERF_INFERENCE_MLPERF_CONF": "$<<CK_ENV_MLPERF_INFERENCE>>$/mlperf.conf",
257240
"CK_MLPERF_PRE_USER_CONF_AND_AUDIT_CONFIG": "YES"
258241
},
259242
"tags": [

program/speech-recognition-pytorch-loadgen/preprocess.py

-27
This file was deleted.

script/speech-recognition/loadgen_preprocess.py

+15-39
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def dep_env(dep, var): return i['deps'][dep]['dict']['env'].get(var)
4545

4646
model_name = "rnnt"
4747
print('\n-=-=-=-=-= Generating user.conf for model "{}" ...'.format(model_name))
48-
scenario = "-"
48+
scenario = env['CK_LOADGEN_SCENARIO']
4949
user_conf_rel_path = env['CK_LOADGEN_USER_CONF']
5050
user_conf = []
5151
env_to_conf = {
@@ -90,42 +90,7 @@ def dep_env(dep, var): return i['deps'][dep]['dict']['env'].get(var)
9090
return {'return':0}
9191

9292

93-
def schindler(i):
94-
print('\n-=-=-=-=-= Generating a list of files to be processed...')
95-
def has_env(var): return var in i['env']
96-
def my_env(var): return i['env'].get(var)
97-
def dep_env(dep, var): return i['deps'][dep]['dict']['env'].get(var)
98-
def has_dep_env(dep, var): return var in i['deps'][dep]['dict']['env']
99-
100-
image_list_filename = dep_env('images', 'CK_ENV_DATASET_IMAGENET_PREPROCESSED_SUBSET_FOF')
101-
source_dir = dep_env('images', 'CK_ENV_DATASET_IMAGENET_PREPROCESSED_DIR')
102-
preprocessed_ext = dep_env('images', 'CK_ENV_DATASET_IMAGENET_PREPROCESSED_NEW_EXTENSION')
103-
104-
image_count = int(my_env('CK_LOADGEN_DATASET_SIZE')) if has_env('CK_LOADGEN_DATASET_SIZE') else int(my_env('CK_BATCH_SIZE')) * int(my_env('CK_BATCH_COUNT'))
105-
images_offset = int(my_env('CK_SKIP_IMAGES') or '0')
106-
107-
sorted_filenames = [filename for filename in sorted(os.listdir(source_dir)) if filename.lower().endswith('.' + preprocessed_ext) ]
108-
109-
selected_filenames = sorted_filenames[images_offset:images_offset+image_count] if image_count else sorted_filenames[images_offset:]
110-
111-
selected_var_paths = [ os.path.join("$<<CK_ENV_DATASET_IMAGENET_PREPROCESSED_DIR>>$", filename) for filename in selected_filenames ]
112-
113-
with open(image_list_filename, 'w') as f:
114-
for filename in selected_filenames:
115-
f.write(filename + '\n')
116-
117-
print('=-=-=-=-=- done.\n')
118-
119-
return {
120-
'return': 0,
121-
'new_env': {},
122-
'run_input_files': [ '$<<>>$' + image_list_filename ] + selected_var_paths,
123-
'run_output_files': [],
124-
}
125-
126-
127-
# This preprocessing subroutine is a combination of several sequential processes,
128-
# but the only non-trivial output is the output of schindler(), so we call it last:
93+
# This preprocessing subroutine is a combination of several sequential processes.
12994
#
13095
def ck_preprocess(i):
13196
env=i['env']
@@ -138,8 +103,19 @@ def ck_preprocess(i):
138103
if env.get('CK_MLPERF_PRE_USER_CONF_AND_AUDIT_CONFIG','').lower() in ('yes', 'on', 'true', '1'):
139104
ret_dict = user_conf_and_audit_config(i)
140105

141-
if env.get('CK_MLPERF_PRE_SCHINDLER','').lower() in ('yes', 'on', 'true', '1'):
142-
ret_dict = schindler(i)
106+
def dep_env(dep, var): return i['deps'][dep]['dict']['env'].get(var)
107+
108+
inferencepath = dep_env('mlperf-inference', 'CK_ENV_MLPERF_INFERENCE')
109+
110+
try:
111+
pythonpath = os.environ['PYTHONPATH'] + ":"
112+
except KeyError:
113+
pythonpath = ""
114+
115+
os.environ['PYTHONPATH'] = pythonpath + \
116+
os.path.join(inferencepath,"speech_recognition/rnnt") + ":" + \
117+
os.path.join(inferencepath,"speech_recognition/rnnt/pytorch")
118+
143119

144120
return ret_dict
145121

0 commit comments

Comments
 (0)