-
Notifications
You must be signed in to change notification settings - Fork 134
Add dedicated fields for job reruns: run_group_id and rerun_from_job_id
#1512
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c369f4e
54615cb
54076e7
9826083
2d94f54
fc60f82
a2dead6
3f8b982
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -466,6 +466,8 @@ def create_job( | |||||||||||||||||||||||||||||||||||||
| python_version: str | None = None, | ||||||||||||||||||||||||||||||||||||||
| params: dict[str, str] | None = None, | ||||||||||||||||||||||||||||||||||||||
| parent_job_id: str | None = None, | ||||||||||||||||||||||||||||||||||||||
| rerun_from_job_id: str | None = None, | ||||||||||||||||||||||||||||||||||||||
| run_group_id: str | None = None, | ||||||||||||||||||||||||||||||||||||||
| ) -> str: | ||||||||||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||||||||||
| Creates a new job. | ||||||||||||||||||||||||||||||||||||||
|
|
@@ -1835,7 +1837,11 @@ def _jobs_columns() -> "list[SchemaItem]": | |||||||||||||||||||||||||||||||||||||
| Column("params", JSON, nullable=False), | ||||||||||||||||||||||||||||||||||||||
| Column("metrics", JSON, nullable=False), | ||||||||||||||||||||||||||||||||||||||
| Column("parent_job_id", Text, nullable=True), | ||||||||||||||||||||||||||||||||||||||
| Column("rerun_from_job_id", Text, nullable=True), | ||||||||||||||||||||||||||||||||||||||
| Column("run_group_id", Text, nullable=True), | ||||||||||||||||||||||||||||||||||||||
| Index("idx_jobs_parent_job_id", "parent_job_id"), | ||||||||||||||||||||||||||||||||||||||
| Index("idx_jobs_rerun_from_job_id", "rerun_from_job_id"), | ||||||||||||||||||||||||||||||||||||||
| Index("idx_jobs_run_group_id", "run_group_id"), | ||||||||||||||||||||||||||||||||||||||
| ] | ||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||
| @cached_property | ||||||||||||||||||||||||||||||||||||||
|
|
@@ -1896,13 +1902,29 @@ def create_job( | |||||||||||||||||||||||||||||||||||||
| python_version: str | None = None, | ||||||||||||||||||||||||||||||||||||||
| params: dict[str, str] | None = None, | ||||||||||||||||||||||||||||||||||||||
| parent_job_id: str | None = None, | ||||||||||||||||||||||||||||||||||||||
| rerun_from_job_id: str | None = None, | ||||||||||||||||||||||||||||||||||||||
| run_group_id: str | None = None, | ||||||||||||||||||||||||||||||||||||||
| conn: Any = None, | ||||||||||||||||||||||||||||||||||||||
| ) -> str: | ||||||||||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||||||||||
| Creates a new job. | ||||||||||||||||||||||||||||||||||||||
| Returns the job id. | ||||||||||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||||||||||
| job_id = str(uuid4()) | ||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||
| # Validate run_group_id and rerun_from_job_id consistency | ||||||||||||||||||||||||||||||||||||||
| if rerun_from_job_id: | ||||||||||||||||||||||||||||||||||||||
| # Rerun job: run_group_id must be provided by caller | ||||||||||||||||||||||||||||||||||||||
| assert run_group_id is not None, ( | ||||||||||||||||||||||||||||||||||||||
| "run_group_id must be provided when rerun_from_job_id is set" | ||||||||||||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||||||||||||||||
| # First job: run_group_id should not be provided (we set it here) | ||||||||||||||||||||||||||||||||||||||
| assert run_group_id is None, ( | ||||||||||||||||||||||||||||||||||||||
| "run_group_id should not be provided when rerun_from_job_id is not set" | ||||||||||||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+1918
to
+1925
|
||||||||||||||||||||||||||||||||||||||
| assert run_group_id is not None, ( | |
| "run_group_id must be provided when rerun_from_job_id is set" | |
| ) | |
| else: | |
| # First job: run_group_id should not be provided (we set it here) | |
| assert run_group_id is None, ( | |
| "run_group_id should not be provided when rerun_from_job_id is not set" | |
| ) | |
| if run_group_id is None: | |
| raise ValueError( | |
| "run_group_id must be provided when rerun_from_job_id is set" | |
| ) | |
| else: | |
| # First job: run_group_id should not be provided (we set it here) | |
| if run_group_id is not None: | |
| raise ValueError( | |
| "run_group_id should not be provided when rerun_from_job_id is not set" | |
| ) |
Copilot
AI
Dec 18, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The safety check that filters by run_group_id could have issues with NULL values from legacy jobs created before this PR. In SQL, NULL == NULL evaluates to NULL (not TRUE), which means the recursive traversal might not work correctly for jobs with NULL run_group_id values.
Consider adding a check at the beginning of the function to handle the case where the starting job has a NULL run_group_id, or document that this function requires jobs to have non-NULL run_group_id values. For example, you could either:
- Return an empty list if the initial job's run_group_id is NULL
- Skip the run_group_id filter if the initial job's run_group_id is NULL (though this would reduce safety)
- Use a coalesce or IS NOT DISTINCT FROM comparison that handles NULLs properly
| & ( | |
| self._jobs.c.run_group_id | |
| == cast( | |
| ancestors_cte.c.run_group_id, self._jobs.c.run_group_id.type | |
| ) | |
| ), # Safety: only traverse within same run group | |
| & self._jobs.c.run_group_id.isnot_distinct_from( | |
| cast( | |
| ancestors_cte.c.run_group_id, | |
| self._jobs.c.run_group_id.type, | |
| ) | |
| ), # Safety: only traverse within same run group (handles NULLs) |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -154,7 +154,7 @@ def get_or_create_job(self) -> "Job": | |||||
| script = str(uuid4()) | ||||||
| python_version = f"{sys.version_info.major}.{sys.version_info.minor}" | ||||||
|
|
||||||
| # try to find the parent job | ||||||
| # try to find the parent job for checkpoint/rerun chain | ||||||
|
||||||
| # try to find the parent job for checkpoint/rerun chain | |
| # try to find the previous job in the checkpoint/rerun chain |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -117,7 +117,7 @@ def test_get_or_create_links_to_parent(test_session, patch_argv, monkeypatch): | |
| session2 = Session(catalog=test_session.catalog) | ||
| job2 = session2.get_or_create_job() | ||
|
|
||
| assert job2.parent_job_id == job1.id | ||
| assert job2.rerun_from_job_id == job1.id | ||
|
||
|
|
||
|
|
||
| def test_nested_sessions_share_same_job(test_session, patch_argv, monkeypatch): | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The docstring should be expanded to document the new parameters
rerun_from_job_idandrun_group_id, especially their relationship and the validation requirements. Consider adding documentation that explains:rerun_from_job_id: The ID of the job that this job is rerunning from (None for initial jobs)run_group_id: The ID that groups all jobs in a rerun chain (automatically set to the job's own ID for initial jobs, must be provided when rerun_from_job_id is set)