You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I am trying to run very basic Kubeflow Pipeline with MNIST classifier:
import kfp
from kfp import dsl
from kfp.dsl import Dataset, Input, Model, Output
import torch
import torchvision
@dsl.component(base_image='python:3.11', packages_to_install=['torch', 'torchvision'])
def load_training_data(train_dataset: Output[Dataset], test_dataset: Output[Dataset]):
# Load MNIST dataset
train_dataset_obj = torchvision.datasets.MNIST(root='./data', train=True, download=True,
transform=torchvision.transforms.ToTensor())
test_dataset_obj = torchvision.datasets.MNIST(root='./data', train=False, download=True,
transform=torchvision.transforms.ToTensor())
# Save datasets
torch.save(train_dataset_obj, train_dataset.path)
torch.save(test_dataset_obj, test_dataset.path)
@dsl.component(base_image='python:3.11', packages_to_install=['torch', 'torchvision'])
def train_model(train_dataset: Input[Dataset], test_dataset: Input[Dataset], model: Output[Model]):
# Load datasets
train_dataset_obj = torch.load(train_dataset.path)
test_dataset_obj = torch.load(test_dataset.path)
# Define a simple neural network
model_obj = torch.nn.Sequential(
torch.nn.Flatten(),
torch.nn.Linear(28*28, 128),
torch.nn.ReLU(),
torch.nn.Linear(128, 10)
)
# Train the model
optimizer = torch.optim.Adam(model_obj.parameters())
criterion = torch.nn.CrossEntropyLoss()
train_loader = torch.utils.data.DataLoader(train_dataset_obj, batch_size=64, shuffle=True)
for epoch in range(1): # Train for 1 epoch
for batch in train_loader:
inputs, targets = batch
outputs = model_obj(inputs)
loss = criterion(outputs, targets)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Evaluate the model
test_loader = torch.utils.data.DataLoader(test_dataset_obj, batch_size=64, shuffle=False)
correct = 0
total = 0
with torch.no_grad():
for batch in test_loader:
inputs, targets = batch
outputs = model_obj(inputs)
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (predicted == targets).sum().item()
print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%')
# Save the trained model
torch.save(model_obj.state_dict(), model.path)
@dsl.component(base_image='python:3.11', packages_to_install=['torch', 'torchvision'])
def save_trained_model(model: Input[Model]):
# Load the trained model
model_state_dict = torch.load(model.path)
# Save the trained model to a file
torch.save(model_state_dict, 'mnist_model.pth')
@dsl.pipeline(name='MNIST Training Pipeline')
def mnist_pipeline():
load_data_task = load_training_data()
train_model_task = train_model(
train_dataset=load_data_task.outputs['train_dataset'],
test_dataset=load_data_task.outputs['test_dataset']
)
save_model_task = save_trained_model(model=train_model_task.outputs['model'])
if __name__ == '__main__':
# Compile the pipeline
kfp.compiler.Compiler().compile(mnist_pipeline, 'mnist_pipeline.yaml')
client = kfp.Client()
pipeline_info = client.upload_pipeline(
pipeline_package_path='mnist_pipeline.yaml',
pipeline_name='MNIST Training Pipeline 3',
description='A pipeline to train a model on the MNIST dataset'
)
But when I start a "run" with a pipeline (in UI), unfortunately it's stuck on the first component (load_training_data).
Can you give me ideas what am i doing wrong? I suppose it should be something related to the data download, but I am not an expert in Kubeflow.
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
-
I am trying to run very basic Kubeflow Pipeline with MNIST classifier:
But when I start a "run" with a pipeline (in UI), unfortunately it's stuck on the first component (load_training_data).
Can you give me ideas what am i doing wrong? I suppose it should be something related to the data download, but I am not an expert in Kubeflow.
Additional Info:
Beta Was this translation helpful? Give feedback.
All reactions