Skip to content

Commit

Permalink
[feat][python] Support MLflow task in python api (#11962)
Browse files Browse the repository at this point in the history
  • Loading branch information
jieguangzhou authored Sep 18, 2022
1 parent c24ad9a commit ad683c3
Show file tree
Hide file tree
Showing 8 changed files with 712 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ In this section
sub_process

sagemaker
mlflow
openmldb
pytorch
dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
.. Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
.. http://www.apache.org/licenses/LICENSE-2.0
.. Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
MLflow
=========


A MLflow task type's example and dive into information of **PyDolphinScheduler**.

Example
-------

.. literalinclude:: ../../../src/pydolphinscheduler/examples/task_mlflow_example.py
:start-after: [start workflow_declare]
:end-before: [end workflow_declare]

Dive Into
---------

.. automodule:: pydolphinscheduler.tasks.mlflow


YAML file example
-----------------

.. literalinclude:: ../../../examples/yaml_define/mlflow.yaml
:start-after: # under the License.
:language: yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.


# Define variable `mlflow_tracking_uri`
mlflow_tracking_uri: &mlflow_tracking_uri "http://127.0.0.1:5000"

# Define the workflow
workflow:
name: "MLflow"

# Define the tasks under the workflow
tasks:
- name: train_xgboost_native
task_type: MLFlowProjectsCustom
repository: https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native
mlflow_tracking_uri: *mlflow_tracking_uri
parameters: -P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9
experiment_name: xgboost


- name: deploy_mlflow
deps: [train_xgboost_native]
task_type: MLflowModels
model_uri: models:/xgboost_native/Production
mlflow_tracking_uri: *mlflow_tracking_uri
deploy_mode: MLFLOW
port: 7001

- name: train_automl
task_type: MLFlowProjectsAutoML
mlflow_tracking_uri: *mlflow_tracking_uri
parameters: time_budget=30;estimator_list=['lgbm']
experiment_name: automl_iris
model_name: iris_A
automl_tool: flaml
data_path: /data/examples/iris

- name: deploy_docker
task_type: MLflowModels
deps: [train_automl]
model_uri: models:/iris_A/Production
mlflow_tracking_uri: *mlflow_tracking_uri
deploy_mode: DOCKER
port: 7002

- name: train_basic_algorithm
task_type: MLFlowProjectsBasicAlgorithm
mlflow_tracking_uri: *mlflow_tracking_uri
parameters: n_estimators=200;learning_rate=0.2
experiment_name: basic_algorithm_iris
model_name: iris_B
algorithm: lightgbm
data_path: /data/examples/iris
search_params: max_depth=[5, 10];n_estimators=[100, 200]


- name: deploy_docker_compose
task_type: MLflowModels
deps: [train_basic_algorithm]
model_uri: models:/iris_B/Production
mlflow_tracking_uri: *mlflow_tracking_uri
deploy_mode: DOCKER COMPOSE
port: 7003
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class TaskType(str):
SPARK = "SPARK"
MR = "MR"
SAGEMAKER = "SAGEMAKER"
MLFLOW = "MLFLOW"
OPENMLDB = "OPENMLDB"
PYTORCH = "PYTORCH"
DVC = "DVC"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# [start workflow_declare]
"""A example workflow for task mlflow."""

from pydolphinscheduler.core.process_definition import ProcessDefinition
from pydolphinscheduler.tasks.mlflow import (
MLflowDeployType,
MLflowModels,
MLFlowProjectsAutoML,
MLFlowProjectsBasicAlgorithm,
MLFlowProjectsCustom,
)

mlflow_tracking_uri = "http://127.0.0.1:5000"

with ProcessDefinition(
name="task_mlflow_example",
tenant="tenant_exists",
) as pd:

# run custom mlflow project to train model
train_custom = MLFlowProjectsCustom(
name="train_xgboost_native",
repository="https://github.com/mlflow/mlflow#examples/xgboost/xgboost_native",
mlflow_tracking_uri=mlflow_tracking_uri,
parameters="-P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9",
experiment_name="xgboost",
)

# Using MLFLOW to deploy model from custom mlflow project
deploy_mlflow = MLflowModels(
name="deploy_mlflow",
model_uri="models:/xgboost_native/Production",
mlflow_tracking_uri=mlflow_tracking_uri,
deploy_mode=MLflowDeployType.MLFLOW,
port=7001,
)

train_custom >> deploy_mlflow

# run automl to train model
train_automl = MLFlowProjectsAutoML(
name="train_automl",
mlflow_tracking_uri=mlflow_tracking_uri,
parameters="time_budget=30;estimator_list=['lgbm']",
experiment_name="automl_iris",
model_name="iris_A",
automl_tool="flaml",
data_path="/data/examples/iris",
)

# Using DOCKER to deploy model from train_automl
deploy_docker = MLflowModels(
name="deploy_docker",
model_uri="models:/iris_A/Production",
mlflow_tracking_uri=mlflow_tracking_uri,
deploy_mode=MLflowDeployType.DOCKER,
port=7002,
)

train_automl >> deploy_docker

# run lightgbm to train model
train_basic_algorithm = MLFlowProjectsBasicAlgorithm(
name="train_basic_algorithm",
mlflow_tracking_uri=mlflow_tracking_uri,
parameters="n_estimators=200;learning_rate=0.2",
experiment_name="basic_algorithm_iris",
model_name="iris_B",
algorithm="lightgbm",
data_path="/data/examples/iris",
search_params="max_depth=[5, 10];n_estimators=[100, 200]",
)

# Using DOCKER COMPOSE to deploy model from train_basic_algorithm
deploy_docker_compose = MLflowModels(
name="deploy_docker_compose",
model_uri="models:/iris_B/Production",
mlflow_tracking_uri=mlflow_tracking_uri,
deploy_mode=MLflowDeployType.DOCKER_COMPOSE,
port=7003,
)

train_basic_algorithm >> deploy_docker_compose

pd.submit()

# [end workflow_declare]
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@
from pydolphinscheduler.tasks.flink import Flink
from pydolphinscheduler.tasks.http import Http
from pydolphinscheduler.tasks.map_reduce import MR
from pydolphinscheduler.tasks.mlflow import (
MLflowModels,
MLFlowProjectsAutoML,
MLFlowProjectsBasicAlgorithm,
MLFlowProjectsCustom,
)
from pydolphinscheduler.tasks.openmldb import OpenMLDB
from pydolphinscheduler.tasks.procedure import Procedure
from pydolphinscheduler.tasks.python import Python
Expand All @@ -47,6 +53,10 @@
"Http",
"MR",
"OpenMLDB",
"MLFlowProjectsBasicAlgorithm",
"MLFlowProjectsCustom",
"MLFlowProjectsAutoML",
"MLflowModels",
"Procedure",
"Python",
"Pytorch",
Expand Down
Loading

0 comments on commit ad683c3

Please sign in to comment.