Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor fixes to profiler workflow #308

Merged
merged 4 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .make.versions
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ FILTER_SPARK_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX)
NOOP_PYTHON_VERSION=0.9.0$(RELEASE_VERSION_SUFFIX)
NOOP_RAY_VERSION=0.9.0$(RELEASE_VERSION_SUFFIX)
NOOP_SPARK_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX)
PROFILER_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX)
PROFILER_RAY_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX)

RESIZE_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)

Expand Down
1 change: 1 addition & 0 deletions scripts/kind/populate_minio.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,5 @@ mc cp --recursive ${REPOROOT}/transforms/universal/fdedup/ray/test-data/input/ k
mc cp --recursive ${REPOROOT}/transforms/universal/filter/ray/test-data/input/ kfp/test/filter/input
mc cp --recursive ${REPOROOT}/transforms/universal/noop/ray/test-data/input/ kfp/test/noop/input
mc cp --recursive ${REPOROOT}/transforms/universal/tokenization/ray/test-data/ds01/input/ kfp/test/tokenization/ds01/input
mc cp --recursive ${REPOROOT}/transforms/universal/profiler/ray/test-data/input/ kfp/test/profiler/input

12 changes: 5 additions & 7 deletions transforms/universal/profiler/kfp_ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,24 +29,22 @@ image::

load-image::

set-versions: workflow-reconcile-requirements

.PHONY: workflow-build
workflow-build: workflow-venv
$(MAKE) $(YAML_WF)

.PHONY: workflow-test
workflow-test: workflow-build
$(MAKE) .transforms_workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=profiler_wf.yaml
$(MAKE) .workflows.test-pipeline TRANSFORM_SRC=${SRC_DIR} PIPELINE_FILE=profiler_wf.yaml

.PHONY: workflow-upload
workflow-upload: workflow-build
@for file in $(YAML_WF); do \
$(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=$$file; \
$(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \
done

.PHONY: workflow-reconcile-requirements
workflow-reconcile-requirements:
.PHONY: set-versions
set-versions:
@for file in $(PYTHON_WF); do \
$(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=$$file; \
$(MAKE) .workflows.set-versions PIPELINE_FILE=$$file; \
done
7 changes: 4 additions & 3 deletions transforms/universal/profiler/kfp_ray/profiler_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils


task_image = "quay.io/dataprep1/data-prep-kit/profiler-ray:0.4.0.dev6"
task_image = "quay.io/dataprep1/data-prep-kit/profiler-ray:0.2.0.dev6"

# the name of the job script
EXEC_SCRIPT_NAME: str = "profiler_transform_ray.py"
Expand Down Expand Up @@ -72,9 +72,10 @@
def profiler(
# Ray cluster
ray_name: str = "profiler-kfp-ray", # name of Ray cluster
ray_head_options: str = '{"cpu": 1, "memory": 4, "image_pull_secret": "", "image": "' + task_image + '" }',
# Add image_pull_secret and image_pull_policy to ray workers if needed
ray_head_options: str = '{"cpu": 1, "memory": 4, "image": "' + task_image + '" }',
ray_worker_options: str = '{"replicas": 2, "max_replicas": 2, "min_replicas": 2, "cpu": 2, "memory": 4, '
'"image_pull_secret": "", "image": "' + task_image + '"}',
'"image": "' + task_image + '"}',
server_url: str = "http://kuberay-apiserver-service.kuberay.svc.cluster.local:8888",
# data access. checkpointing is not supported by dedup
data_s3_config: str = "{'input_folder': 'test/profiler/input/', 'output_folder': 'test/profiler/output'}",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def profiler_compute_execution_params(

from data_processing.data_access import DataAccessS3
from data_processing.utils import GB, KB
from workflow_support.runtime_utils import KFPUtils
from runtime_utils import KFPUtils

EXECUTION_OF_KB_DOC = 0.00025

Expand Down Expand Up @@ -116,7 +116,7 @@ def profiler_compute_execution_params(
"runtime_pipeline_id": runtime_pipeline_id,
"runtime_job_id": runtime_job_id,
"runtime_code_location": runtime_code_location,
"aggregator_aggregator_cpu": aggregator_cpu,
"aggregator_num_aggregators": n_aggregators,
"aggregator_doc_column": doc_column,
"profiler_aggregator_cpu": aggregator_cpu,
"profiler_num_aggregators": n_aggregators,
"profiler_doc_column": doc_column,
}
2 changes: 1 addition & 1 deletion transforms/universal/profiler/ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ include $(REPOROOT)/transforms/.make.transforms

TRANSFORM_NAME=profiler
# $(REPOROOT)/.make.versions file contains the versions
DOCKER_IMAGE_VERSION=${PROFILER_VERSION}
DOCKER_IMAGE_VERSION=${PROFILER_RAY_VERSION}

venv:: .transforms.ray-venv

Expand Down