Skip to content

Commit

Permalink
Merge branch 'feature-py-sdk-create-exp-generateName' of github.com:b…
Browse files Browse the repository at this point in the history
…harathk005/katib into feature-py-sdk-create-exp-generateName
  • Loading branch information
bharathk005 committed Apr 2, 2024
2 parents c583c68 + 9e24312 commit a4e4b62
Show file tree
Hide file tree
Showing 37 changed files with 377 additions and 128 deletions.
22 changes: 21 additions & 1 deletion .github/workflows/test-python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,27 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: 3.9
python-version: 3.11

- name: Run Python test
run: make pytest

# The skopt service doesn't work appropriately with Python 3.11.
# So, we need to run the test with Python 3.9.
# TODO (tenzen-y): Once we stop to support skopt, we can remove this test.
# REF: https://github.com/kubeflow/katib/issues/2280
test-skopt:
name: Test Skopt
runs-on: ubuntu-22.04

steps:
- name: Check out code
uses: actions/checkout@v3

- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: 3.9

- name: Run Python test
run: make pytest-skopt
13 changes: 11 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ update-boilerplate:
prepare-pytest:
pip install --prefer-binary -r test/unit/v1beta1/requirements.txt
pip install --prefer-binary -r cmd/suggestion/hyperopt/v1beta1/requirements.txt
pip install --prefer-binary -r cmd/suggestion/skopt/v1beta1/requirements.txt
pip install --prefer-binary -r cmd/suggestion/optuna/v1beta1/requirements.txt
pip install --prefer-binary -r cmd/suggestion/hyperband/v1beta1/requirements.txt
pip install --prefer-binary -r cmd/suggestion/nas/enas/v1beta1/requirements.txt
Expand All @@ -176,6 +175,16 @@ ifeq ("$(wildcard $(TEST_TENSORFLOW_EVENT_FILE_PATH))", "")
endif

pytest: prepare-pytest prepare-pytest-testdata
PYTHONPATH=$(PYTHONPATH) pytest ./test/unit/v1beta1/suggestion
PYTHONPATH=$(PYTHONPATH) pytest ./test/unit/v1beta1/suggestion --ignore=./test/unit/v1beta1/suggestion/test_skopt_service.py
PYTHONPATH=$(PYTHONPATH) pytest ./test/unit/v1beta1/earlystopping
PYTHONPATH=$(PYTHONPATH) pytest ./test/unit/v1beta1/metricscollector

# The skopt service doesn't work appropriately with Python 3.11.
# So, we need to run the test with Python 3.9.
# TODO (tenzen-y): Once we stop to support skopt, we can remove this test.
# REF: https://github.com/kubeflow/katib/issues/2280
pytest-skopt:
pip install six
pip install --prefer-binary -r test/unit/v1beta1/requirements.txt
pip install --prefer-binary -r cmd/suggestion/skopt/v1beta1/requirements.txt
PYTHONPATH=$(PYTHONPATH) pytest ./test/unit/v1beta1/suggestion/test_skopt_service.py
2 changes: 1 addition & 1 deletion cmd/earlystopping/medianstop/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

ARG TARGETARCH
ENV TARGET_DIR /opt/katib
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

ARG TARGETARCH
ENV TARGET_DIR /opt/katib
Expand Down
2 changes: 1 addition & 1 deletion cmd/suggestion/hyperband/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

ARG TARGETARCH
ENV TARGET_DIR /opt/katib
Expand Down
2 changes: 1 addition & 1 deletion cmd/suggestion/hyperband/v1beta1/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
grpcio>=1.41.1
cloudpickle==0.5.6
numpy>=1.20.0
numpy>=1.25.2
scikit-learn>=0.24.0
scipy>=1.5.4
forestci==0.3
Expand Down
2 changes: 1 addition & 1 deletion cmd/suggestion/hyperopt/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

ARG TARGETARCH
ENV TARGET_DIR /opt/katib
Expand Down
2 changes: 1 addition & 1 deletion cmd/suggestion/hyperopt/v1beta1/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
grpcio>=1.41.1
cloudpickle==0.5.6
numpy>=1.20.0
numpy>=1.25.2
scikit-learn>=0.24.0
scipy>=1.5.4
forestci==0.3
Expand Down
2 changes: 1 addition & 1 deletion cmd/suggestion/nas/darts/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

ARG TARGETARCH
ENV TARGET_DIR /opt/katib
Expand Down
2 changes: 1 addition & 1 deletion cmd/suggestion/nas/enas/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

ARG TARGETARCH
ENV TARGET_DIR /opt/katib
Expand Down
2 changes: 1 addition & 1 deletion cmd/suggestion/optuna/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

ARG TARGETARCH
ENV TARGET_DIR /opt/katib
Expand Down
2 changes: 1 addition & 1 deletion cmd/suggestion/pbt/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

ARG TARGETARCH
ENV TARGET_DIR /opt/katib
Expand Down
2 changes: 1 addition & 1 deletion cmd/suggestion/pbt/v1beta1/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grpcio>=1.41.1
protobuf>=3.19.5, <=3.20.3
googleapis-common-protos==1.53.0
numpy==1.22.2
numpy==1.25.2
2 changes: 1 addition & 1 deletion docs/developer-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ see the following user guides:
- [Docker](https://docs.docker.com/) (20.10 or later)
- [Docker Buildx](https://docs.docker.com/build/buildx/) (0.8.0 or later)
- [Java](https://docs.oracle.com/javase/8/docs/technotes/guides/install/install_overview.html) (8 or later)
- [Python](https://www.python.org/) (3.10 or later)
- [Python](https://www.python.org/) (3.11 or later)
- [kustomize](https://kustomize.io/) (4.0.5 or later)

## Build from source code
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
FROM python:3.10-slim
FROM python:3.11-slim

ENV TARGET_DIR /opt/darts-cnn-cifar10

ADD examples/v1beta1/trial-images/darts-cnn-cifar10 ${TARGET_DIR}

WORKDIR ${TARGET_DIR}

RUN pip install --prefer-binary --no-cache-dir torch==2.2.1 torchvision==0.17.1
RUN pip install --prefer-binary --no-cache-dir -r requirements.txt
RUN chgrp -R 0 ${TARGET_DIR} \
&& chmod -R g+rwX ${TARGET_DIR}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# We need to use the nvcr.io/nvidia/pytorch image as a base image to support both linux/amd64 and linux_arm64 platforms.
# PyTorch=1.13.0, cuda=11.8.0
# Ref: https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-22-11.html#rel-22-11
FROM nvcr.io/nvidia/pytorch:22.11-py3
# PyTorch=2.2.0, cuda=12.3.2
# Ref: https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-01.html#rel-24-01
FROM nvcr.io/nvidia/pytorch:24.01-py3

ENV TARGET_DIR /opt/darts-cnn-cifar10

Expand Down
18 changes: 13 additions & 5 deletions examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ class Architect():
"""" Architect controls architecture of cell by computing gradients of alphas
"""

def __init__(self, model, w_momentum, w_weight_decay):
def __init__(self, model, w_momentum, w_weight_decay, device):
self.model = model
self.v_model = copy.deepcopy(model)
self.w_momentum = w_momentum
self.w_weight_decay = w_weight_decay
self.device = device

def virtual_step(self, train_x, train_y, xi, w_optim):
"""
Expand All @@ -43,17 +44,21 @@ def virtual_step(self, train_x, train_y, xi, w_optim):
# Forward and calculate loss
# Loss for train with w. L_train(w)
loss = self.model.loss(train_x, train_y)

# Compute gradient
gradients = torch.autograd.grad(loss, self.model.getWeights())

# Do virtual step (Update gradient)
# Below operations do not need gradient tracking
with torch.no_grad():
# dict key is not the value, but the pointer. So original network weight have to
# be iterated also.
for w, vw, g in zip(self.model.getWeights(), self.v_model.getWeights(), gradients):
m = w_optim.state[w].get("momentum_buffer", 0.) * self.w_momentum
vw.copy_(w - torch.FloatTensor(xi) * (m + g + self.w_weight_decay * w))
if(self.device == 'cuda'):
vw.copy_(w - torch.cuda.FloatTensor(xi) * (m + g + self.w_weight_decay * w))
elif(self.device == 'cpu'):
vw.copy_(w - torch.FloatTensor(xi) * (m + g + self.w_weight_decay * w))

# Sync alphas
for a, va in zip(self.model.getAlphas(), self.v_model.getAlphas()):
Expand All @@ -71,7 +76,7 @@ def unrolled_backward(self, train_x, train_y, valid_x, valid_y, xi, w_optim):
# Calculate unrolled loss
# Loss for validation with w'. L_valid(w')
loss = self.v_model.loss(valid_x, valid_y)

# Calculate gradient
v_alphas = tuple(self.v_model.getAlphas())
v_weights = tuple(self.v_model.getWeights())
Expand All @@ -85,7 +90,10 @@ def unrolled_backward(self, train_x, train_y, valid_x, valid_y, xi, w_optim):
# Update final gradient = dalpha - xi * hessian
with torch.no_grad():
for alpha, da, h in zip(self.model.getAlphas(), dalpha, hessian):
alpha.grad = da - torch.FloatTensor(xi) * h
if(self.device == 'cuda'):
alpha.grad = da - torch.cuda.FloatTensor(xi) * h
elif(self.device == 'cpu'):
alpha.grad = da - torch.cpu.FloatTensor(xi) * h

def compute_hessian(self, dws, train_x, train_y):
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
torch==1.13.1
torchvision==0.14.1
Pillow>=9.1.1
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def main():
num_epochs,
eta_min=w_lr_min)

architect = Architect(model, w_momentum, w_weight_decay)
architect = Architect(model, w_momentum, w_weight_decay, device)

# Start training
best_top1 = 0.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

ARG TARGETARCH
ENV TARGET_DIR /opt/enas-cnn-cifar10
Expand Down
3 changes: 2 additions & 1 deletion examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.cpu
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
FROM python:3.10-slim
FROM python:3.11-slim

ADD examples/v1beta1/trial-images/pytorch-mnist /opt/pytorch-mnist

WORKDIR /opt/pytorch-mnist

# Add folder for the logs.
RUN mkdir /katib
RUN pip install --prefer-binary --no-cache-dir torch==2.2.1 torchvision==0.17.1
RUN pip install --prefer-binary --no-cache-dir -r requirements.txt

RUN chgrp -R 0 /opt/pytorch-mnist \
Expand Down
6 changes: 3 additions & 3 deletions examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.gpu
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# We need to use the nvcr.io/nvidia/pytorch image as a base image to support both linux/amd64 and linux_arm64 platforms.
# PyTorch=1.13.0, cuda=11.8.0
# Ref: https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-22-11.html#rel-22-11
FROM nvcr.io/nvidia/pytorch:22.11-py3
# PyTorch=2.2.0, cuda=12.3.2
# Ref: https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-01.html#rel-24-01
FROM nvcr.io/nvidia/pytorch:24.01-py3

ADD examples/v1beta1/trial-images/pytorch-mnist /opt/pytorch-mnist

Expand Down
2 changes: 0 additions & 2 deletions examples/v1beta1/trial-images/pytorch-mnist/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
cloudml-hypertune==0.1.0.dev6
torch==1.13.1
torchvision==0.14.1
Pillow>=9.1.1
2 changes: 1 addition & 1 deletion examples/v1beta1/trial-images/simple-pbt/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

ADD examples/v1beta1/trial-images/simple-pbt /opt/pbt

Expand Down
2 changes: 1 addition & 1 deletion examples/v1beta1/trial-images/simple-pbt/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
numpy==1.22.2
numpy==1.25.2
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-slim
FROM python:3.11-slim

ARG TARGETARCH

Expand Down
16 changes: 8 additions & 8 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ require (
github.com/shirou/gopsutil/v3 v3.22.5
github.com/spf13/viper v1.9.0
github.com/tidwall/gjson v1.14.1
golang.org/x/net v0.10.0
google.golang.org/grpc v1.55.0
golang.org/x/net v0.17.0
google.golang.org/grpc v1.56.3
k8s.io/api v0.27.4
k8s.io/apimachinery v0.27.4
k8s.io/client-go v0.27.4
Expand Down Expand Up @@ -69,7 +69,7 @@ require (
github.com/dimchansky/utfbom v1.1.1 // indirect
github.com/docker/cli v24.0.0+incompatible // indirect
github.com/docker/distribution v2.8.2+incompatible // indirect
github.com/docker/docker v24.0.0+incompatible // indirect
github.com/docker/docker v24.0.9+incompatible // indirect
github.com/docker/docker-credential-helpers v0.7.0 // indirect
github.com/emicklei/go-restful/v3 v3.10.2 // indirect
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
Expand Down Expand Up @@ -126,20 +126,20 @@ require (
go.uber.org/atomic v1.7.0 // indirect
go.uber.org/multierr v1.6.0 // indirect
go.uber.org/zap v1.24.0 // indirect
golang.org/x/crypto v0.9.0 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/mod v0.10.0 // indirect
golang.org/x/oauth2 v0.8.0 // indirect
golang.org/x/sync v0.2.0 // indirect
golang.org/x/sys v0.8.0 // indirect
golang.org/x/term v0.8.0 // indirect
golang.org/x/text v0.9.0 // indirect
golang.org/x/sys v0.15.0 // indirect
golang.org/x/term v0.15.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/time v0.3.0 // indirect
golang.org/x/tools v0.9.1 // indirect
gomodules.xyz/jsonpatch/v2 v2.3.0 // indirect
gonum.org/v1/gonum v0.8.2 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect
google.golang.org/protobuf v1.30.0 // indirect
google.golang.org/protobuf v1.33.0 // indirect
gopkg.in/fsnotify/fsnotify.v1 v1.4.7 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/ini.v1 v1.63.2 // indirect
Expand Down
Loading

0 comments on commit a4e4b62

Please sign in to comment.