Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

add doc and fix bug in experiment resume/view #3524

Merged
merged 2 commits into from
Apr 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/en_US/Tutorial/HowToLaunchFromPython.rst
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,12 @@ Please refer to `example usage <./python_api_connect.rst>`__ and code file :gith

.. Note:: You can use ``stop()`` to stop the experiment when connecting to an existing experiment.

Resume/View and Manage a Stopped Experiment
-------------------------------------------

You can use ``Experiment.resume()`` and ``Experiment.view()`` to resume and view a stopped experiment, these functions behave like ``nnictl resume`` and ``nnictl view``.
If you want to manage the experiment, set ``wait_completion`` as ``False`` and the functions will return an ``Experiment`` instance. For more parameters, please refer to API.

API
---

Expand Down
15 changes: 14 additions & 1 deletion nni/experiment/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,15 @@ def resume(cls, experiment_id: str, port: int, wait_completion: bool = True, deb
----------
experiment_id
The stopped experiment id.
port
The port of web UI.
wait_completion
If true, run in the foreground. If false, run in the background.
debug
Whether to start in debug mode.
"""
experiment = Experiment()
experiment.id = experiment_id
experiment.mode = 'resume'
if wait_completion:
experiment.run(port, debug)
Expand All @@ -212,16 +219,22 @@ def resume(cls, experiment_id: str, port: int, wait_completion: bool = True, deb
return experiment

@classmethod
def view(cls, experiment_id: str, port: int, wait_completion: bool = True, debug: bool = False):
def view(cls, experiment_id: str, port: int, wait_completion: bool = True):
"""
View a stopped experiment.

Parameters
----------
experiment_id
The stopped experiment id.
port
The port of web UI.
wait_completion
If true, run in the foreground. If false, run in the background.
"""
debug = False
experiment = Experiment()
experiment.id = experiment_id
experiment.mode = 'view'
if wait_completion:
experiment.run(port, debug)
Expand Down
7 changes: 3 additions & 4 deletions nni/experiment/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,9 @@ def start_experiment(exp_id: str, config: ExperimentConfig, port: int, debug: bo
_check_rest_server(port)
platform = 'hybrid' if isinstance(config.training_service, list) else config.training_service.platform
_save_experiment_information(exp_id, port, start_time, platform,
config.experiment_name, proc.pid, config.experiment_working_directory)
if mode != 'view':
_logger.info('Setting up...')
rest.post(port, '/experiment', config.json())
config.experiment_name, proc.pid, str(config.experiment_working_directory))
_logger.info('Setting up...')
rest.post(port, '/experiment', config.json())
return proc

except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion nni/experiment/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def request(method: str, port: Optional[int], api: str, data: Any = None) -> Any
if not resp.ok:
_logger.error('rest request %s %s failed: %s %s', method.upper(), url, resp.status_code, resp.text)
resp.raise_for_status()
if method.lower() in ['get', 'post']:
if method.lower() in ['get', 'post'] and len(resp.content) > 0:
return resp.json()

def get(port: Optional[int], api: str) -> Any:
Expand Down
8 changes: 4 additions & 4 deletions nni/tools/nnictl/config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Licensed under the MIT license.

import os
import json
import json_tricks
import shutil
import sqlite3
import time
Expand Down Expand Up @@ -92,7 +92,7 @@ def refresh_config(self):
'''refresh to get latest config'''
sql = 'select params from ExperimentProfile where id=? order by revision DESC'
args = (self.experiment_id,)
self.config = config_v0_to_v1(json.loads(self.conn.cursor().execute(sql, args).fetchone()[0]))
self.config = config_v0_to_v1(json_tricks.loads(self.conn.cursor().execute(sql, args).fetchone()[0]))

def get_config(self):
'''get a value according to key'''
Expand Down Expand Up @@ -155,7 +155,7 @@ def write_file(self):
'''save config to local file'''
try:
with open(self.experiment_file, 'w') as file:
json.dump(self.experiments, file, indent=4)
json_tricks.dump(self.experiments, file, indent=4)
except IOError as error:
print('Error:', error)
return ''
Expand All @@ -165,7 +165,7 @@ def read_file(self):
if os.path.exists(self.experiment_file):
try:
with open(self.experiment_file, 'r') as file:
return json.load(file)
return json_tricks.load(file)
except ValueError:
return {}
return {}
2 changes: 1 addition & 1 deletion ts/nni_manager/core/nnimanager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,9 @@ class NNIManager implements Manager {
}

public async resumeExperiment(readonly: boolean): Promise<void> {
this.log.info(`Resuming experiment: ${this.experimentProfile.id}`);
//Fetch back the experiment profile
const experimentId: string = getExperimentId();
this.log.info(`Resuming experiment: ${experimentId}`);
this.experimentProfile = await this.dataStore.getExperimentProfile(experimentId);
this.readonly = readonly;
if (readonly) {
Expand Down