Skip to content

Commit 401712f

Browse files
kaxilromsharon98
authored andcommitted
Remove deprecated SubDags (apache#41390)
This PR removes SubDags in favor of TaskGroups fro Airflow 3.0 Subdags have been removed from the following locations: - CLI - API - ``SubDagOperator`` This removal marks the end of Subdag support across all interfaces. Users should transition to using TaskGroups as a more efficient and maintainable alternative. --------- Co-authored-by: Brent Bovenzi <[email protected]>
1 parent c6fe296 commit 401712f

File tree

88 files changed

+1397
-3918
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+1397
-3918
lines changed

airflow/api/common/delete_dag.py

+3-23
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@
2222
import logging
2323
from typing import TYPE_CHECKING
2424

25-
from sqlalchemy import and_, delete, or_, select
25+
from sqlalchemy import delete, select
2626

2727
from airflow import models
2828
from airflow.exceptions import AirflowException, DagNotFound
29-
from airflow.models import DagModel, TaskFail
29+
from airflow.models import DagModel
3030
from airflow.models.errors import ParseImportError
3131
from airflow.models.serialized_dag import SerializedDagModel
3232
from airflow.utils.db import get_sqla_model_classes
@@ -64,18 +64,6 @@ def delete_dag(dag_id: str, keep_records_in_log: bool = True, session: Session =
6464
if dag is None:
6565
raise DagNotFound(f"Dag id {dag_id} not found")
6666

67-
# deleting a DAG should also delete all of its subdags
68-
dags_to_delete_query = session.execute(
69-
select(DagModel.dag_id).where(
70-
or_(
71-
DagModel.dag_id == dag_id,
72-
and_(DagModel.dag_id.like(f"{dag_id}.%"), DagModel.is_subdag),
73-
)
74-
)
75-
)
76-
77-
dags_to_delete = [dag_id for (dag_id,) in dags_to_delete_query]
78-
7967
# Scheduler removes DAGs without files from serialized_dag table every dag_dir_list_interval.
8068
# There may be a lag, so explicitly removes serialized DAG here.
8169
if SerializedDagModel.has_dag(dag_id=dag_id, session=session):
@@ -86,15 +74,7 @@ def delete_dag(dag_id: str, keep_records_in_log: bool = True, session: Session =
8674
for model in get_sqla_model_classes():
8775
if hasattr(model, "dag_id") and (not keep_records_in_log or model.__name__ != "Log"):
8876
count += session.execute(
89-
delete(model)
90-
.where(model.dag_id.in_(dags_to_delete))
91-
.execution_options(synchronize_session="fetch")
92-
).rowcount
93-
if dag.is_subdag:
94-
parent_dag_id, task_id = dag_id.rsplit(".", 1)
95-
for model in TaskFail, models.TaskInstance:
96-
count += session.execute(
97-
delete(model).where(model.dag_id == parent_dag_id, model.task_id == task_id)
77+
delete(model).where(model.dag_id == dag_id).execution_options(synchronize_session="fetch")
9878
).rowcount
9979

10080
# Delete entries in Import Errors table for a deleted DAG

airflow/api/common/mark_tasks.py

+3-103
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,10 @@
2626

2727
from airflow.models.dagrun import DagRun
2828
from airflow.models.taskinstance import TaskInstance
29-
from airflow.operators.subdag import SubDagOperator
3029
from airflow.utils import timezone
3130
from airflow.utils.helpers import exactly_one
3231
from airflow.utils.session import NEW_SESSION, provide_session
3332
from airflow.utils.state import DagRunState, State, TaskInstanceState
34-
from airflow.utils.types import DagRunType
3533

3634
if TYPE_CHECKING:
3735
from datetime import datetime
@@ -40,6 +38,7 @@
4038

4139
from airflow.models.dag import DAG
4240
from airflow.models.operator import Operator
41+
from airflow.utils.types import DagRunType
4342

4443

4544
class _DagRunInfo(NamedTuple):
@@ -101,14 +100,14 @@ def set_state(
101100
Can set state for future tasks (calculated from run_id) and retroactively
102101
for past tasks. Will verify integrity of past dag runs in order to create
103102
tasks that did not exist. It will not create dag runs that are missing
104-
on the schedule (but it will, as for subdag, dag runs if needed).
103+
on the schedule.
105104
106105
:param tasks: the iterable of tasks or (task, map_index) tuples from which to work.
107106
``task.dag`` needs to be set
108107
:param run_id: the run_id of the dagrun to start looking from
109108
:param execution_date: the execution date from which to start looking (deprecated)
110109
:param upstream: Mark all parents (upstream tasks)
111-
:param downstream: Mark all siblings (downstream tasks) of task_id, including SubDags
110+
:param downstream: Mark all siblings (downstream tasks) of task_id
112111
:param future: Mark all future tasks on the interval of the dag up until
113112
last execution date.
114113
:param past: Retroactively mark all tasks starting from start_date of the DAG
@@ -140,54 +139,20 @@ def set_state(
140139

141140
dag_run_ids = get_run_ids(dag, run_id, future, past, session=session)
142141
task_id_map_index_list = list(find_task_relatives(tasks, downstream, upstream))
143-
task_ids = [task_id if isinstance(task_id, str) else task_id[0] for task_id in task_id_map_index_list]
144-
145-
confirmed_infos = list(_iter_existing_dag_run_infos(dag, dag_run_ids, session=session))
146-
confirmed_dates = [info.logical_date for info in confirmed_infos]
147-
148-
sub_dag_run_ids = (
149-
list(
150-
_iter_subdag_run_ids(dag, session, DagRunState(state), task_ids, commit, confirmed_infos),
151-
)
152-
if not state == TaskInstanceState.SKIPPED
153-
else []
154-
)
155-
156142
# now look for the task instances that are affected
157143

158144
qry_dag = get_all_dag_task_query(dag, session, state, task_id_map_index_list, dag_run_ids)
159145

160146
if commit:
161147
tis_altered = session.scalars(qry_dag.with_for_update()).all()
162-
if sub_dag_run_ids:
163-
qry_sub_dag = all_subdag_tasks_query(sub_dag_run_ids, session, state, confirmed_dates)
164-
tis_altered += session.scalars(qry_sub_dag.with_for_update()).all()
165148
for task_instance in tis_altered:
166149
task_instance.set_state(state, session=session)
167150
session.flush()
168151
else:
169152
tis_altered = session.scalars(qry_dag).all()
170-
if sub_dag_run_ids:
171-
qry_sub_dag = all_subdag_tasks_query(sub_dag_run_ids, session, state, confirmed_dates)
172-
tis_altered += session.scalars(qry_sub_dag).all()
173153
return tis_altered
174154

175155

176-
def all_subdag_tasks_query(
177-
sub_dag_run_ids: list[str],
178-
session: SASession,
179-
state: TaskInstanceState,
180-
confirmed_dates: Iterable[datetime],
181-
):
182-
"""Get *all* tasks of the sub dags."""
183-
qry_sub_dag = (
184-
select(TaskInstance)
185-
.where(TaskInstance.dag_id.in_(sub_dag_run_ids), TaskInstance.execution_date.in_(confirmed_dates))
186-
.where(or_(TaskInstance.state.is_(None), TaskInstance.state != state))
187-
)
188-
return qry_sub_dag
189-
190-
191156
def get_all_dag_task_query(
192157
dag: DAG,
193158
session: SASession,
@@ -208,71 +173,6 @@ def get_all_dag_task_query(
208173
return qry_dag
209174

210175

211-
def _iter_subdag_run_ids(
212-
dag: DAG,
213-
session: SASession,
214-
state: DagRunState,
215-
task_ids: list[str],
216-
commit: bool,
217-
confirmed_infos: Iterable[_DagRunInfo],
218-
) -> Iterator[str]:
219-
"""
220-
Go through subdag operators and create dag runs.
221-
222-
We only work within the scope of the subdag. A subdag does not propagate to
223-
its parent DAG, but parent propagates to subdags.
224-
"""
225-
dags = [dag]
226-
while dags:
227-
current_dag = dags.pop()
228-
for task_id in task_ids:
229-
if not current_dag.has_task(task_id):
230-
continue
231-
232-
current_task = current_dag.get_task(task_id)
233-
if isinstance(current_task, SubDagOperator) or current_task.task_type == "SubDagOperator":
234-
# this works as a kind of integrity check
235-
# it creates missing dag runs for subdag operators,
236-
# maybe this should be moved to dagrun.verify_integrity
237-
if TYPE_CHECKING:
238-
assert current_task.subdag
239-
dag_runs = _create_dagruns(
240-
current_task.subdag,
241-
infos=confirmed_infos,
242-
state=DagRunState.RUNNING,
243-
run_type=DagRunType.BACKFILL_JOB,
244-
)
245-
246-
verify_dagruns(dag_runs, commit, state, session, current_task)
247-
248-
dags.append(current_task.subdag)
249-
yield current_task.subdag.dag_id
250-
251-
252-
def verify_dagruns(
253-
dag_runs: Iterable[DagRun],
254-
commit: bool,
255-
state: DagRunState,
256-
session: SASession,
257-
current_task: Operator,
258-
):
259-
"""
260-
Verify integrity of dag_runs.
261-
262-
:param dag_runs: dag runs to verify
263-
:param commit: whether dag runs state should be updated
264-
:param state: state of the dag_run to set if commit is True
265-
:param session: session to use
266-
:param current_task: current task
267-
"""
268-
for dag_run in dag_runs:
269-
dag_run.dag = current_task.subdag
270-
dag_run.verify_integrity()
271-
if commit:
272-
dag_run.state = state
273-
session.merge(dag_run)
274-
275-
276176
def _iter_existing_dag_run_infos(dag: DAG, run_ids: list[str], session: SASession) -> Iterator[_DagRunInfo]:
277177
for dag_run in DagRun.find(dag_id=dag.dag_id, run_id=run_ids, session=session):
278178
dag_run.dag = dag

airflow/api/common/trigger_dag.py

+13-17
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def _trigger_dag(
4343
conf: dict | str | None = None,
4444
execution_date: datetime | None = None,
4545
replace_microseconds: bool = True,
46-
) -> list[DagRun | None]:
46+
) -> DagRun | None:
4747
"""
4848
Triggers DAG run.
4949
@@ -90,21 +90,17 @@ def _trigger_dag(
9090
if conf:
9191
run_conf = conf if isinstance(conf, dict) else json.loads(conf)
9292

93-
dag_runs = []
94-
dags_to_run = [dag, *dag.subdags]
95-
for _dag in dags_to_run:
96-
dag_run = _dag.create_dagrun(
97-
run_id=run_id,
98-
execution_date=execution_date,
99-
state=DagRunState.QUEUED,
100-
conf=run_conf,
101-
external_trigger=True,
102-
dag_hash=dag_bag.dags_hash.get(dag_id),
103-
data_interval=data_interval,
104-
)
105-
dag_runs.append(dag_run)
93+
dag_run = dag.create_dagrun(
94+
run_id=run_id,
95+
execution_date=execution_date,
96+
state=DagRunState.QUEUED,
97+
conf=run_conf,
98+
external_trigger=True,
99+
dag_hash=dag_bag.dags_hash.get(dag_id),
100+
data_interval=data_interval,
101+
)
106102

107-
return dag_runs
103+
return dag_run
108104

109105

110106
@internal_api_call
@@ -133,7 +129,7 @@ def trigger_dag(
133129
raise DagNotFound(f"Dag id {dag_id} not found in DagModel")
134130

135131
dagbag = DagBag(dag_folder=dag_model.fileloc, read_dags_from_db=True)
136-
triggers = _trigger_dag(
132+
dr = _trigger_dag(
137133
dag_id=dag_id,
138134
dag_bag=dagbag,
139135
run_id=run_id,
@@ -142,4 +138,4 @@ def trigger_dag(
142138
replace_microseconds=replace_microseconds,
143139
)
144140

145-
return triggers[0] if triggers else None
141+
return dr if dr else None

airflow/api_connexion/endpoints/dag_endpoint.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def get_dags(
106106
) -> APIResponse:
107107
"""Get all DAGs."""
108108
allowed_attrs = ["dag_id"]
109-
dags_query = select(DagModel).where(~DagModel.is_subdag)
109+
dags_query = select(DagModel)
110110
if only_active:
111111
dags_query = dags_query.where(DagModel.is_active)
112112
if paused is not None:
@@ -179,10 +179,9 @@ def patch_dags(limit, session, offset=0, only_active=True, tags=None, dag_id_pat
179179
update_mask = update_mask[0]
180180
patch_body_[update_mask] = patch_body[update_mask]
181181
patch_body = patch_body_
182+
dags_query = select(DagModel)
182183
if only_active:
183-
dags_query = select(DagModel).where(~DagModel.is_subdag, DagModel.is_active)
184-
else:
185-
dags_query = select(DagModel).where(~DagModel.is_subdag)
184+
dags_query = dags_query.where(DagModel.is_active)
186185

187186
if dag_id_pattern == "~":
188187
dag_id_pattern = "%"

airflow/api_connexion/endpoints/dag_run_endpoint.py

-4
Original file line numberDiff line numberDiff line change
@@ -425,8 +425,6 @@ def clear_dag_run(*, dag_id: str, dag_run_id: str, session: Session = NEW_SESSIO
425425
start_date=start_date,
426426
end_date=end_date,
427427
task_ids=None,
428-
include_subdags=True,
429-
include_parentdag=True,
430428
only_failed=False,
431429
dry_run=True,
432430
)
@@ -438,8 +436,6 @@ def clear_dag_run(*, dag_id: str, dag_run_id: str, session: Session = NEW_SESSIO
438436
start_date=start_date,
439437
end_date=end_date,
440438
task_ids=None,
441-
include_subdags=True,
442-
include_parentdag=True,
443439
only_failed=False,
444440
)
445441
dag_run = session.execute(select(DagRun).where(DagRun.id == dag_run.id)).scalar_one()

airflow/api_connexion/openapi/v1.yaml

-17
Original file line numberDiff line numberDiff line change
@@ -3106,11 +3106,6 @@ components:
31063106
Human centric display text for the DAG.
31073107
31083108
*New in version 2.9.0*
3109-
root_dag_id:
3110-
type: string
3111-
readOnly: true
3112-
nullable: true
3113-
description: If the DAG is SubDAG then it is the top level DAG identifier. Otherwise, null.
31143109
is_paused:
31153110
type: boolean
31163111
nullable: true
@@ -3125,10 +3120,6 @@ components:
31253120
nullable: true
31263121
readOnly: true
31273122
type: boolean
3128-
is_subdag:
3129-
description: Whether the DAG is SubDAG.
3130-
type: boolean
3131-
readOnly: true
31323123
last_parsed_time:
31333124
type: string
31343125
format: date-time
@@ -4903,14 +4894,6 @@ components:
49034894
type: boolean
49044895
default: false
49054896

4906-
include_subdags:
4907-
description: Clear tasks in subdags and clear external tasks indicated by ExternalTaskMarker.
4908-
type: boolean
4909-
4910-
include_parentdag:
4911-
description: Clear tasks in the parent dag of the subdag.
4912-
type: boolean
4913-
49144897
reset_dag_runs:
49154898
description: Set state of DAG runs to RUNNING.
49164899
type: boolean

airflow/api_connexion/schemas/dag_schema.py

-2
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,8 @@ class Meta:
5151

5252
dag_id = auto_field(dump_only=True)
5353
dag_display_name = fields.String(attribute="dag_display_name", dump_only=True)
54-
root_dag_id = auto_field(dump_only=True)
5554
is_paused = auto_field()
5655
is_active = auto_field(dump_only=True)
57-
is_subdag = auto_field(dump_only=True)
5856
last_parsed_time = auto_field(dump_only=True)
5957
last_pickled = auto_field(dump_only=True)
6058
last_expired = auto_field(dump_only=True)

airflow/api_connexion/schemas/task_instance_schema.py

-2
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,6 @@ class ClearTaskInstanceFormSchema(Schema):
177177
end_date = fields.DateTime(load_default=None, validate=validate_istimezone)
178178
only_failed = fields.Boolean(load_default=True)
179179
only_running = fields.Boolean(load_default=False)
180-
include_subdags = fields.Boolean(load_default=False)
181-
include_parentdag = fields.Boolean(load_default=False)
182180
reset_dag_runs = fields.Boolean(load_default=False)
183181
task_ids = fields.List(fields.String(), validate=validate.Length(min=1))
184182
dag_run_id = fields.Str(load_default=None)

airflow/api_connexion/schemas/task_schema.py

-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
TimeDeltaSchema,
2727
WeightRuleField,
2828
)
29-
from airflow.api_connexion.schemas.dag_schema import DAGSchema
3029
from airflow.models.mappedoperator import MappedOperator
3130

3231
if TYPE_CHECKING:
@@ -61,7 +60,6 @@ class TaskSchema(Schema):
6160
ui_color = ColorField(dump_only=True)
6261
ui_fgcolor = ColorField(dump_only=True)
6362
template_fields = fields.List(fields.String(), dump_only=True)
64-
sub_dag = fields.Nested(DAGSchema, dump_only=True)
6563
downstream_task_ids = fields.List(fields.String(), dump_only=True)
6664
params = fields.Method("_get_params", dump_only=True)
6765
is_mapped = fields.Method("_get_is_mapped", dump_only=True)

0 commit comments

Comments
 (0)