Skip to content

Ray version update #305

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions data-processing-lib/ray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ authors = [
]
dependencies = [
"data-prep-toolkit==0.2.0.dev6",
"ray[default]==2.9.3",
"ray[default]==2.24.0",
# These two are to fix security issues identified by quay.io
"fastapi>=0.109.1",
"pillow>=10.2.0",
"fastapi>=0.110.2",
"pillow>=10.3.0",
]

[project_urls]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def orchestrate(

logger = get_logger(__name__)
start_ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
start_time = time.time()
logger.info(f"orchestrator started at {start_ts}")
try:
# create data access
Expand Down Expand Up @@ -134,7 +135,7 @@ def orchestrate(
"job_input_params": runtime_config.get_transform_metadata()
| data_access_factory.get_input_params()
| preprocessing_params.get_input_params(),
"execution_stats": resources,
"execution_stats": resources | {"execution time, min": (time.time() - start_time)/60, },
"job_output_stats": stats,
}
logger.debug(f"Saving job metadata: {metadata}.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,23 @@ def __init__(self, params: dict[str, Any]):
from ray.util.metrics import Counter

super().__init__()
self.data_write_counter = Counter("data_written", "Total data written bytes")
self.data_read_counter = Counter("data_read", "Total data read bytes")
self.source_files_counter = Counter("source_files_processed", "Total source files processed")
self.result_files_counter = Counter("result_files_written", "Total result files written")
self.source_documents_counter = Counter("source_documents_processed", "Total source document processed")
self.result_documents_counter = Counter("result_documents_written", "Total result documents written")
self.empty_table_counter = Counter("empty_tables", "Total empty tables read")
self.failed_read_counter = Counter("failed_read_files", "Total read failed files")
self.failed_write_counter = Counter("failed_write_files", "Total write failed files")
self.transform_exceptions_counter = Counter("transform_exceptions", "Transform exception occurred")
self.data_retries_counter = Counter("data_access_retries", "Data access retries")
self.data_write_counter = Counter(name="data_written", description="Total data written bytes")
self.data_read_counter = Counter(name="data_read", description="Total data read bytes")
self.source_files_counter = Counter(name="source_files_processed", description="Total source files processed")
self.result_files_counter = Counter(name="result_files_written", description="Total result files written")
self.source_documents_counter = Counter(
name="source_documents_processed", description="Total source document processed"
)
self.result_documents_counter = Counter(
name="result_documents_written", description="Total result documents written"
)
self.empty_table_counter = Counter(name="empty_tables", description="Total empty tables read")
self.failed_read_counter = Counter(name="failed_read_files", description="Total read failed files")
self.failed_write_counter = Counter(name="failed_write_files", description="Total write failed files")
self.transform_exceptions_counter = Counter(
name="transform_exceptions", description="Transform exception occurred"
)
self.data_retries_counter = Counter(name="data_access_retries", description="Data access retries")

def add_stats(self, stats=dict[str, Any]) -> None:
"""
Expand Down
2 changes: 1 addition & 1 deletion kfp/kfp_ray_components/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/rayproject/ray:2.9.3-py310
FROM docker.io/rayproject/ray:2.24.0-py310
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need to add a make rule to set ray-based image and version

Copy link
Contributor Author

@blublinsky blublinsky Jun 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are welcome to do another PR once this one is merged. We actually discussed this with @revit13 when we were debugging this


# install libraries
COPY requirements.txt requirements.txt
Expand Down
2 changes: 1 addition & 1 deletion kfp/kfp_ray_components/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ endif

.PHONY: .lib-src-image
.lib-src-image::
$(MAKE) .defaults.copy-lib LIB_PATH=$(DPK_RAY_LIB_DIR) LIB_NAME=data-processing-lib-ray
$(MAKE) .defaults.copy-lib LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python
$(MAKE) .defaults.copy-lib LIB_PATH=$(DPK_RAY_LIB_DIR) LIB_NAME=data-processing-lib-ray
$(MAKE) .defaults.copy-lib LIB_PATH=$(REPOROOT)/kfp/kfp_support_lib/shared_workflow_support LIB_NAME=shared_workflow_support_lib
$(MAKE) .defaults.copy-lib LIB_PATH=$(REPOROOT)/kfp/kfp_support_lib/$(WORKFLOW_SUPPORT_LIB) LIB_NAME=workflow_support_lib
$(MAKE) .defaults.image
Expand Down
5 changes: 3 additions & 2 deletions kfp/kfp_support_lib/kfp_v1_workflow_support/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@ clean::
set-versions: .check-env
@# Help: Copy the Makefile distribution version into the pyproject.toml
sed -i.back 's/^version[ ]*=.*/version = "'${DPK_LIB_KFP_VERSION}'"/' pyproject.toml
sed -i.back 's/data_prep_toolkit==[0-9].*/data_prep_toolkit==${DPK_LIB_VERSION}",/' pyproject.toml
sed -i.back 's/data_prep_toolkit_kfp_shared==[0-9].*/data_prep_toolkit_kfp_shared==${DPK_LIB_KFP_SHARED}",/' pyproject.toml
sed -i.back 's/kfp==[0-9].*/kfp==${KFP_v1}",/' pyproject.toml
sed -i.back 's/ray=[0-9].*/ray==${RAY}",/' pyproject.toml

build:: set-versions venv
ifeq ($(KFPv2), 1)
Expand Down Expand Up @@ -60,8 +59,10 @@ else
$(PYTHON) -m venv venv
. ${VENV_ACTIVATE}; \
cd ../../../data-processing-lib/python && make set-versions && cd -; \
cd ../../../data-processing-lib/ray && make set-versions && cd -; \
pip install --upgrade pip; \
pip install -e ../../../data-processing-lib/python; \
pip install -e ../../../data-processing-lib/ray; \
cd ../shared_workflow_support && make set-versions && cd -; \
pip install -e ../shared_workflow_support; \
pip install -e .; \
Expand Down
4 changes: 1 addition & 3 deletions kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@ authors = [
]
dependencies = [
"kfp==1.8.22",
"ray==2.9.3",
"requests",
"data_prep_toolkit==0.2.0.dev6",
"data_prep_toolkit_kfp_shared==0.2.0.dev6",
]

[build-system]
Expand Down
5 changes: 3 additions & 2 deletions kfp/kfp_support_lib/kfp_v2_workflow_support/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@ clean::
set-versions: .check-env
@# Help: Copy the Makefile distribution version into the pyproject.toml
sed -i.back 's/^version[ ]*=.*/version = "'${DPK_LIB_KFP_VERSION_v2}'"/' pyproject.toml
sed -i.back 's/data_prep_toolkit==[0-9].*/data_prep_toolkit==${DPK_LIB_VERSION}",/' pyproject.toml
sed -i.back 's/data_prep_toolkit_kfp_shared==[0-9].*/data_prep_toolkit_kfp_shared==${DPK_LIB_KFP_SHARED}",/' pyproject.toml
sed -i.back 's/kfp==[0-9].*/kfp==${KFP_v2}",/' pyproject.toml
sed -i.back 's/ray=[0-9].*/ray==${RAY}",/' pyproject.toml

build:: set-versions venv
ifneq ($(KFPv2), 1)
Expand Down Expand Up @@ -60,8 +59,10 @@ else
$(PYTHON) -m venv venv
. ${VENV_ACTIVATE}; \
cd ../../../data-processing-lib/python && make set-versions && cd -; \
cd ../../../data-processing-lib/ray && make set-versions && cd -; \
pip install --upgrade pip; \
pip install -e ../../../data-processing-lib/python; \
pip install -e ../../../data-processing-lib/ray; \
cd ../shared_workflow_support && make set-versions && cd -; \
pip install -e ../shared_workflow_support; \
pip install -e .; \
Expand Down
4 changes: 1 addition & 3 deletions kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@ authors = [
dependencies = [
"kfp==2.7.0",
"kfp-kubernetes==1.2.0",
"ray==2.9.3",
"requests",
"data_prep_toolkit_ray==0.2.0.dev6",
"data_prep_toolkit_kfp_shared==0.2.0.dev6",
]

[build-system]
Expand Down
3 changes: 2 additions & 1 deletion kfp/kfp_support_lib/shared_workflow_support/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ set-versions: .check-env
@# Help: Copy the Makefile distribution version into the pyproject.toml
sed -i.back 's/^version[ ]*=.*/version = "'${DPK_LIB_KFP_SHARED}'"/' pyproject.toml
sed -i.back 's/data_prep_toolkit_ray==[0-9].*/data_prep_toolkit_ray==${DPK_LIB_VERSION}",/' pyproject.toml
sed -i.back 's/ray=[0-9].*/ray==${RAY}",/' pyproject.toml

build:: set-versions venv
@# Help: Build the distribution for publishing to a pypi
Expand All @@ -52,6 +51,8 @@ venv:: pyproject.toml .check-env
pip install --upgrade pip; \
cd ../../../data-processing-lib/python && make set-versions && cd -; \
pip install -e ../../../data-processing-lib/python; \
cd ../../../data-processing-lib/ray && make set-versions && cd -; \
pip install -e ../../../data-processing-lib/ray; \
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you don't want to set-versions there?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

version of what?

pip install -e .; \
pip install pytest pytest-cov

Expand Down
3 changes: 1 addition & 2 deletions kfp/kfp_support_lib/shared_workflow_support/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@ authors = [
{ name = "Revital Eres", email = "[email protected]" },
]
dependencies = [
"ray==2.9.3",
"requests",
"kubernetes",
"data_prep_toolkit==0.2.0.dev6",
"data_prep_toolkit_ray==0.2.0.dev6",
]

[build-system]
Expand Down
1 change: 0 additions & 1 deletion kfp/requirements.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
RAY=2.9.3
KFP_v2=2.7.0
KFP_v1=1.8.22

Expand Down
2 changes: 1 addition & 1 deletion tools/ingest2parquet/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/rayproject/ray:2.9.3-py310
FROM docker.io/rayproject/ray:2.24.0-py310

# install pytest
RUN pip install --no-cache-dir pytest
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_quality/ray/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/rayproject/ray:2.9.3-py310
FROM docker.io/rayproject/ray:2.24.0-py310

# install pytest
RUN pip install --no-cache-dir pytest
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/ingest_2_parquet/ray/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/rayproject/ray:2.9.3-py310
FROM docker.io/rayproject/ray:2.24.0-py310

# install pytest
RUN pip install --no-cache-dir pytest
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/malware/ray/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/rayproject/ray:2.9.3-py310 AS base
FROM docker.io/rayproject/ray:2.24.0-py310 AS base

USER root
RUN apt -y update \
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/proglang_select/ray/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/rayproject/ray:2.9.3-py310
FROM docker.io/rayproject/ray:2.24.0-py310

# install pytest
RUN pip install --no-cache-dir pytest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ def get_test_transform_fixtures(self) -> list[tuple]:
"lang_id_model_url": "facebook/fasttext-language-identification",
"lang_id_content_column_name": "text",
}
basedir = "../test-data"
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), basedir))
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../test-data"))
fixtures = []
launcher = PythonTransformLauncher(LangIdentificationPythonTransformConfiguration())
fixtures.append((launcher, cli_params, basedir + "/input", basedir + "/expected"))
Expand Down
2 changes: 1 addition & 1 deletion transforms/language/lang_id/ray/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/rayproject/ray:2.9.3-py310
FROM docker.io/rayproject/ray:2.24.0-py310

# install pytest
RUN pip install --no-cache-dir pytest
Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/doc_id/ray/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/rayproject/ray:2.9.3-py310
FROM docker.io/rayproject/ray:2.24.0-py310

# install pytest
RUN pip install --no-cache-dir pytest
Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/ededup/ray/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/rayproject/ray:2.9.3-py310
FROM docker.io/rayproject/ray:2.24.0-py310

# install pytest
RUN pip install --no-cache-dir pytest
Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/fdedup/ray/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/rayproject/ray:2.9.3-py310
FROM docker.io/rayproject/ray:2.24.0-py310

# install pytest
RUN pip install --no-cache-dir pytest
Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/filter/ray/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/rayproject/ray:2.9.3-py310
FROM docker.io/rayproject/ray:2.24.0-py310

# install pytest
RUN pip install --no-cache-dir pytest
Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/noop/ray/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/rayproject/ray:2.9.3-py310
FROM docker.io/rayproject/ray:2.24.0-py310

# install pytest
RUN pip install --no-cache-dir pytest
Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/profiler/ray/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/rayproject/ray:2.9.3-py310
FROM docker.io/rayproject/ray:2.24.0-py310

# install pytest
RUN pip install --no-cache-dir pytest
Expand Down
2 changes: 1 addition & 1 deletion transforms/universal/tokenization/ray/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/rayproject/ray:2.9.3-py310
FROM docker.io/rayproject/ray:2.24.0-py310

# install pytest
RUN pip install --no-cache-dir pytest
Expand Down