-
Notifications
You must be signed in to change notification settings - Fork 16.4k
Add deferrable mode in Redshift delete cluster #30244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ | |
| from airflow.providers.amazon.aws.triggers.redshift_cluster import ( | ||
| RedshiftCreateClusterSnapshotTrigger, | ||
| RedshiftCreateClusterTrigger, | ||
| RedshiftDeleteClusterTrigger, | ||
| RedshiftPauseClusterTrigger, | ||
| RedshiftResumeClusterTrigger, | ||
| ) | ||
|
|
@@ -629,6 +630,8 @@ class RedshiftDeleteClusterOperator(BaseOperator): | |
| The default value is ``True`` | ||
| :param aws_conn_id: aws connection to use | ||
| :param poll_interval: Time (in seconds) to wait between two consecutive calls to check cluster state | ||
| :param deferrable: Run operator in the deferrable mode. | ||
| :param max_attempts: (Deferrable mode only) The maximum number of attempts to be made | ||
pankajastro marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| """ | ||
|
|
||
| template_fields: Sequence[str] = ("cluster_identifier",) | ||
|
|
@@ -643,7 +646,9 @@ def __init__( | |
| final_cluster_snapshot_identifier: str | None = None, | ||
| wait_for_completion: bool = True, | ||
| aws_conn_id: str = "aws_default", | ||
| poll_interval: float = 30.0, | ||
| poll_interval: int = 30, | ||
| deferrable: bool = False, | ||
| max_attempts: int = 30, | ||
| **kwargs, | ||
| ): | ||
| super().__init__(**kwargs) | ||
|
|
@@ -658,8 +663,12 @@ def __init__( | |
| self._attempts = 10 | ||
| self._attempt_interval = 15 | ||
| self.redshift_hook = RedshiftHook(aws_conn_id=aws_conn_id) | ||
| self.aws_conn_id = aws_conn_id | ||
| self.deferrable = deferrable | ||
| self.max_attempts = max_attempts | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How is this different to
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. self._attempts: we added this because sometimes redshift goes into an unexpected state which does not allow us to perform the other operation cluster so this param allows us to wait for the right cluster state. |
||
|
|
||
| def execute(self, context: Context): | ||
|
|
||
| while self._attempts >= 1: | ||
| try: | ||
| self.redshift_hook.delete_cluster( | ||
|
|
@@ -676,10 +685,26 @@ def execute(self, context: Context): | |
| time.sleep(self._attempt_interval) | ||
| else: | ||
| raise | ||
|
|
||
| if self.wait_for_completion: | ||
| if self.deferrable: | ||
| self.defer( | ||
| timeout=timedelta(seconds=self.max_attempts * self.poll_interval + 60), | ||
| trigger=RedshiftDeleteClusterTrigger( | ||
| cluster_identifier=self.cluster_identifier, | ||
| poll_interval=self.poll_interval, | ||
| max_attempts=self.max_attempts, | ||
| aws_conn_id=self.aws_conn_id, | ||
| ), | ||
| method_name="execute_complete", | ||
| ) | ||
| elif self.wait_for_completion: | ||
| waiter = self.redshift_hook.get_conn().get_waiter("cluster_deleted") | ||
| waiter.wait( | ||
| ClusterIdentifier=self.cluster_identifier, | ||
| WaiterConfig={"Delay": self.poll_interval, "MaxAttempts": 30}, | ||
| WaiterConfig={"Delay": self.poll_interval, "MaxAttempts": self.max_attempts}, | ||
| ) | ||
|
|
||
| def execute_complete(self, context, event=None): | ||
| if event["status"] != "success": | ||
| raise AirflowException(f"Error deleting cluster: {event}") | ||
| else: | ||
| self.log.info("Cluster deleted successfully") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -357,3 +357,78 @@ async def run(self): | |
| ) | ||
| else: | ||
| yield TriggerEvent({"status": "success", "message": "Cluster resumed"}) | ||
|
|
||
|
|
||
| class RedshiftDeleteClusterTrigger(BaseTrigger): | ||
| """ | ||
| Trigger for RedshiftDeleteClusterOperator | ||
|
|
||
| :param cluster_identifier: A unique identifier for the cluster. | ||
| :param max_attempts: The maximum number of attempts to be made. | ||
| :param aws_conn_id: The Airflow connection used for AWS credentials. | ||
| :param poll_interval: The amount of time in seconds to wait between attempts. | ||
| """ | ||
|
|
||
| def __init__( | ||
| self, | ||
| cluster_identifier: str, | ||
| max_attempts: int = 30, | ||
| aws_conn_id: str = "aws_default", | ||
| poll_interval: int = 30, | ||
| ): | ||
| super().__init__() | ||
| self.cluster_identifier = cluster_identifier | ||
| self.max_attempts = max_attempts | ||
| self.aws_conn_id = aws_conn_id | ||
| self.poll_interval = poll_interval | ||
|
|
||
| def serialize(self) -> tuple[str, dict[str, Any]]: | ||
| return ( | ||
| "airflow.providers.amazon.aws.triggers.redshift_cluster.RedshiftDeleteClusterTrigger", | ||
| { | ||
| "cluster_identifier": self.cluster_identifier, | ||
| "max_attempts": self.max_attempts, | ||
| "aws_conn_id": self.aws_conn_id, | ||
| "poll_interval": self.poll_interval, | ||
| }, | ||
| ) | ||
|
|
||
| @cached_property | ||
| def hook(self): | ||
| return RedshiftHook(aws_conn_id=self.aws_conn_id) | ||
|
||
|
|
||
| async def run(self) -> AsyncIterator[TriggerEvent]: | ||
| async with self.hook.async_conn as client: | ||
| attempt = 0 | ||
| waiter = client.get_waiter("cluster_deleted") | ||
| while attempt < self.max_attempts: | ||
| attempt = attempt + 1 | ||
| try: | ||
| await waiter.wait( | ||
| ClusterIdentifier=self.cluster_identifier, | ||
| WaiterConfig={ | ||
| "Delay": self.poll_interval, | ||
| "MaxAttempts": 1, | ||
hussein-awala marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| }, | ||
| ) | ||
| break | ||
| except WaiterError as error: | ||
| if "terminal failure" in str(error): | ||
| yield TriggerEvent( | ||
| {"status": "failure", "message": f"Delete Cluster Failed: {error}"} | ||
| ) | ||
| break | ||
| self.log.info( | ||
| "Cluster status is %s. Retrying attempt %s/%s", | ||
| error.last_response["Clusters"][0]["ClusterStatus"], | ||
| attempt, | ||
| self.max_attempts, | ||
| ) | ||
| await asyncio.sleep(int(self.poll_interval)) | ||
|
|
||
| if attempt >= self.max_attempts: | ||
| yield TriggerEvent( | ||
| {"status": "failure", "message": "Delete Cluster Failed - max attempts reached."} | ||
| ) | ||
| else: | ||
| yield TriggerEvent({"status": "success", "message": "Cluster deleted."}) | ||
Uh oh!
There was an error while loading. Please reload this page.