diff --git a/.make.versions b/.make.versions index 3328b5a9a..f2d1ccc1b 100644 --- a/.make.versions +++ b/.make.versions @@ -33,7 +33,7 @@ FILTER_SPARK_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX) NOOP_PYTHON_VERSION=0.9.0$(RELEASE_VERSION_SUFFIX) NOOP_RAY_VERSION=0.9.0$(RELEASE_VERSION_SUFFIX) NOOP_SPARK_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX) -PROFILER_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX) +PROFILER_RAY_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX) RESIZE_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) diff --git a/scripts/kind/populate_minio.sh b/scripts/kind/populate_minio.sh index 074470b0f..490d80a3b 100755 --- a/scripts/kind/populate_minio.sh +++ b/scripts/kind/populate_minio.sh @@ -26,4 +26,5 @@ mc cp --recursive ${REPOROOT}/transforms/universal/fdedup/ray/test-data/input/ k mc cp --recursive ${REPOROOT}/transforms/universal/filter/ray/test-data/input/ kfp/test/filter/input mc cp --recursive ${REPOROOT}/transforms/universal/noop/ray/test-data/input/ kfp/test/noop/input mc cp --recursive ${REPOROOT}/transforms/universal/tokenization/ray/test-data/ds01/input/ kfp/test/tokenization/ds01/input +mc cp --recursive ${REPOROOT}/transforms/universal/profiler/ray/test-data/input/ kfp/test/profiler/input diff --git a/transforms/universal/profiler/kfp_ray/Makefile b/transforms/universal/profiler/kfp_ray/Makefile index 30739f491..85150ab57 100644 --- a/transforms/universal/profiler/kfp_ray/Makefile +++ b/transforms/universal/profiler/kfp_ray/Makefile @@ -29,24 +29,22 @@ image:: load-image:: -set-versions: workflow-reconcile-requirements - .PHONY: workflow-build workflow-build: workflow-venv $(MAKE) $(YAML_WF) .PHONY: workflow-test workflow-test: workflow-build - $(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=profiler_wf.yaml + $(MAKE) .workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=profiler_wf.yaml .PHONY: workflow-upload workflow-upload: workflow-build @for file in $(YAML_WF); do \ - $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=$$file; \ + $(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \ done -.PHONY: workflow-reconcile-requirements -workflow-reconcile-requirements: +.PHONY: set-versions +set-versions: @for file in $(PYTHON_WF); do \ - $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=$$file; \ + $(MAKE) .workflows.set-versions PIPELINE_FILE=$$file; \ done diff --git a/transforms/universal/profiler/kfp_ray/profiler_wf.py b/transforms/universal/profiler/kfp_ray/profiler_wf.py index 73fa2c881..292230adf 100644 --- a/transforms/universal/profiler/kfp_ray/profiler_wf.py +++ b/transforms/universal/profiler/kfp_ray/profiler_wf.py @@ -18,7 +18,7 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/profiler-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/profiler-ray:0.2.0.dev6" # the name of the job script EXEC_SCRIPT_NAME: str = "profiler_transform_ray.py" @@ -72,9 +72,10 @@ def profiler( # Ray cluster ray_name: str = "profiler-kfp-ray", # name of Ray cluster - ray_head_options: str = '{"cpu": 1, "memory": 4, "image_pull_secret": "", "image": "' + task_image + '" }', + # Add image_pull_secret and image_pull_policy to ray workers if needed + ray_head_options: str = '{"cpu": 1, "memory": 4, "image": "' + task_image + '" }', ray_worker_options: str = '{"replicas": 2, "max_replicas": 2, "min_replicas": 2, "cpu": 2, "memory": 4, ' - '"image_pull_secret": "", "image": "' + task_image + '"}', + '"image": "' + task_image + '"}', server_url: str = "http://kuberay-apiserver-service.kuberay.svc.cluster.local:8888", # data access. checkpointing is not supported by dedup data_s3_config: str = "{'input_folder': 'test/profiler/input/', 'output_folder': 'test/profiler/output'}", diff --git a/transforms/universal/profiler/kfp_ray/src/profiler_compute_execution_params.py b/transforms/universal/profiler/kfp_ray/src/profiler_compute_execution_params.py index 1ff699148..74cd6456a 100644 --- a/transforms/universal/profiler/kfp_ray/src/profiler_compute_execution_params.py +++ b/transforms/universal/profiler/kfp_ray/src/profiler_compute_execution_params.py @@ -46,7 +46,7 @@ def profiler_compute_execution_params( from data_processing.data_access import DataAccessS3 from data_processing.utils import GB, KB - from workflow_support.runtime_utils import KFPUtils + from runtime_utils import KFPUtils EXECUTION_OF_KB_DOC = 0.00025 @@ -116,7 +116,7 @@ def profiler_compute_execution_params( "runtime_pipeline_id": runtime_pipeline_id, "runtime_job_id": runtime_job_id, "runtime_code_location": runtime_code_location, - "aggregator_aggregator_cpu": aggregator_cpu, - "aggregator_num_aggregators": n_aggregators, - "aggregator_doc_column": doc_column, + "profiler_aggregator_cpu": aggregator_cpu, + "profiler_num_aggregators": n_aggregators, + "profiler_doc_column": doc_column, } diff --git a/transforms/universal/profiler/ray/Makefile b/transforms/universal/profiler/ray/Makefile index cca7c833e..01a117b5f 100644 --- a/transforms/universal/profiler/ray/Makefile +++ b/transforms/universal/profiler/ray/Makefile @@ -8,7 +8,7 @@ include $(REPOROOT)/transforms/.make.transforms TRANSFORM_NAME=profiler # $(REPOROOT)/.make.versions file contains the versions -DOCKER_IMAGE_VERSION=${PROFILER_VERSION} +DOCKER_IMAGE_VERSION=${PROFILER_RAY_VERSION} venv:: .transforms.ray-venv