Skip to content

Commit

Permalink
Replace Pandas with Tabulate in summary (#95)
Browse files Browse the repository at this point in the history
Replace usage of pandas library with smaller
pure python Tabulate package to reduce build
size and download time.

[ committed by @MattToast ]
[ reviewed by @Spartee ]
  • Loading branch information
Sam Partee authored Dec 9, 2021
2 parents ab0207d + bd9eaf5 commit b091fc1
Show file tree
Hide file tree
Showing 15 changed files with 86 additions and 55 deletions.
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ numpy>=1.18.2
toml>=0.10.1
tqdm>=4.50.2
psutil>=5.7.2
pandas>=1.1.3
tabulate>=0.8.9
black>=20.8b1
isort>=5.6.4
pylint>=2.6.0
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
psutil>=5.7.2
coloredlogs==10.0
pandas>=1.1.3
tabulate>=0.8.9
smartredis>=0.1.1
redis==3.5.3
redis-py-cluster==2.1.3
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ include_package_data = True
install_requires =
psutil>=5.7.2
coloredlogs==10.0
pandas>=1.1.3
tabulate>=0.8.9
smartredis>=0.1.1
redis-py-cluster==2.1.3
redis==3.5.3
Expand Down
9 changes: 3 additions & 6 deletions smartsim/entity/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def __init__(
:rtype: ``Ensemble``
"""
self.params = init_default({}, params, dict)
self.params_as_args = init_default({}, params_as_args, (list,str))
self.params_as_args = init_default({}, params_as_args, (list, str))
self._key_prefixing_enabled = True
self.batch_settings = init_default({}, batch_settings, BatchSettings)
self.run_settings = init_default({}, run_settings, RunSettings)
Expand All @@ -113,9 +113,7 @@ def _initialize_entities(self, **kwargs):
param_names, params = self._read_model_parameters()

# Compute all combinations of model parameters and arguments
all_model_params = strategy(
param_names, params, **kwargs
)
all_model_params = strategy(param_names, params, **kwargs)
if not isinstance(all_model_params, list):
raise UserStrategyError(strategy)

Expand Down Expand Up @@ -171,7 +169,6 @@ def _initialize_entities(self, **kwargs):
else:
logger.info("Empty ensemble created for batch launch")


def add_model(self, model):
"""Add a model to this ensemble
Expand Down Expand Up @@ -300,4 +297,4 @@ def _read_model_parameters(self):
"Incorrect type for ensemble parameters\n"
+ "Must be list, int, or string."
)
return param_names, parameters
return param_names, parameters
16 changes: 10 additions & 6 deletions smartsim/entity/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from smartsim.error.errors import SSConfigError

from ..error import EntityExistsError
from ..utils.helpers import cat_arg_and_value, init_default
from .entity import SmartSimEntity
Expand Down Expand Up @@ -119,15 +120,18 @@ def attach_generator_files(self, to_copy=None, to_symlink=None, to_configure=Non
self.files = EntityFiles(to_configure, to_copy, to_symlink)

def params_to_args(self):
"""Convert parameters to command line arguments and update run settings.
"""
"""Convert parameters to command line arguments and update run settings."""
for param in self.params_as_args:
if not param in self.params:
raise SSConfigError(f"Tried to convert {param} to command line argument " +
f"for Model {self.name}, but its value was not found in model params")
raise SSConfigError(
f"Tried to convert {param} to command line argument "
+ f"for Model {self.name}, but its value was not found in model params"
)
if self.run_settings is None:
raise SSConfigError(f"Tried to configure command line parameter for Model {self.name}, " +
"but no RunSettings are set.")
raise SSConfigError(
f"Tried to configure command line parameter for Model {self.name}, "
+ "but no RunSettings are set."
)
self.run_settings.add_exe_args(cat_arg_and_value(param, self.params[param]))

def __eq__(self, other):
Expand Down
59 changes: 32 additions & 27 deletions smartsim/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from os import getcwd
from pprint import pformat

import pandas as pd
from tabulate import tabulate
from tqdm import trange

from .control import Controller, Manifest
Expand Down Expand Up @@ -461,41 +461,46 @@ def reconnect_orchestrator(self, checkpoint):
logger.error(e)
raise

def summary(self):
def summary(self, format="github"):
"""Return a summary of the ``Experiment``
The summary will show each instance that has been
launched and completed in this ``Experiment``
:return: pandas Dataframe of ``Experiment`` history
:rtype: pd.DataFrame
:param format: the style in which the summary table is formatted,
for a full list of styles see:
https://github.com/astanin/python-tabulate#table-format,
defaults to "github"
:type format: str, optional
:return: tabulate string of ``Experiment`` history
:rtype: str
"""
index = 0
df = pd.DataFrame(
columns=[
"Name",
"Entity-Type",
"JobID",
"RunID",
"Time",
"Status",
"Returncode",
]
)
values = []
headers = [
"Name",
"Entity-Type",
"JobID",
"RunID",
"Time",
"Status",
"Returncode",
]

# TODO should this include running jobs?
for job in self._control._jobs.completed.values():
for run in range(job.history.runs + 1):
df.loc[index] = [
job.entity.name,
job.entity.type,
job.history.jids[run],
run,
job.history.job_times[run],
job.history.statuses[run],
job.history.returns[run],
]
index += 1
return df
values.append(
[
job.entity.name,
job.entity.type,
job.history.jids[run],
run,
job.history.job_times[run],
job.history.statuses[run],
job.history.returns[run],
]
)
return tabulate(values, headers, showindex=True, tablefmt=format)

def _launch_summary(self, manifest):
"""Experiment pre-launch summary of entities that will be launched
Expand Down
1 change: 0 additions & 1 deletion tests/backends/run_sklearn_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

from smartredis import Client


Expand Down
2 changes: 1 addition & 1 deletion tests/backends/run_tf.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import os

import numpy as np
from smartredis import Client
from tensorflow import keras

from smartredis import Client
from smartsim.tf import freeze_model


Expand Down
1 change: 0 additions & 1 deletion tests/backends/run_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import torch
import torch.nn as nn
import torch.nn.functional as F

from smartredis import Client


Expand Down
12 changes: 7 additions & 5 deletions tests/on_wlm/test_simple_entity_launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,19 @@ def test_summary(fileutils, wlmutils):
assert exp.get_status(bad)[0] == constants.STATUS_FAILED
assert exp.get_status(sleep)[0] == constants.STATUS_COMPLETED

summary_df = exp.summary()
print(summary_df)
row = summary_df.loc[0]
summary_str = exp.summary(format="plain")
print(summary_str)

rows = [s.split() for s in summary_str.split("\n")]
headers = ["Index"] + rows.pop(0)

row = dict(zip(headers, rows[0]))
assert sleep.name == row["Name"]
assert sleep.type == row["Entity-Type"]
assert 0 == int(row["RunID"])
assert 0 == int(row["Returncode"])

row_1 = summary_df.loc[1]

row_1 = dict(zip(headers, rows[1]))
assert bad.name == row_1["Name"]
assert bad.type == row_1["Entity-Type"]
assert 0 == int(row_1["RunID"])
Expand Down
1 change: 0 additions & 1 deletion tests/test_configs/smartredis/consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import numpy as np
import torch
import torch.nn as nn

from smartredis import Client

if __name__ == "__main__":
Expand Down
1 change: 0 additions & 1 deletion tests/test_configs/smartredis/producer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import numpy as np
import torch
import torch.nn as nn

from smartredis import Client


Expand Down
6 changes: 5 additions & 1 deletion tests/test_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,11 @@ def test_arg_and_model_params_step():
rs_copy = deepcopy(rs)
rs_orig_args = rs_copy.exe_args
ensemble = Ensemble(
"step", params, params_as_args=["H", "g_param"], run_settings=rs_copy, perm_strat="step"
"step",
params,
params_as_args=["H", "g_param"],
run_settings=rs_copy,
perm_strat="step",
)
assert len(ensemble) == 2

Expand Down
24 changes: 24 additions & 0 deletions tests/test_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,27 @@ def test_poll(fileutils):
exp.start(model, block=False)
exp.poll(interval=1)
exp.stop(model)


def test_summary(fileutils):
exp_name = "test_exp_summary"
exp = Experiment(exp_name)
test_dir = fileutils.make_test_dir(exp_name)
m = exp.create_model(
"model", path=test_dir, run_settings=RunSettings("echo", "Hello")
)
exp.start(m)
summary_str = exp.summary(format="plain")
print(summary_str)

summary_lines = summary_str.split("\n")
assert 2 == len(summary_lines)

headers, values = [s.split() for s in summary_lines]
headers = ["Index"] + headers

row = dict(zip(headers, values))
assert m.name == row["Name"]
assert m.type == row["Entity-Type"]
assert 0 == int(row["RunID"])
assert 0 == int(row["Returncode"])
3 changes: 1 addition & 2 deletions tests/test_smartredis.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@

shouldrun = True
try:
import torch

import smartredis
import torch
except ImportError:
shouldrun = False

Expand Down

0 comments on commit b091fc1

Please sign in to comment.