diff --git a/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml b/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml index 77514e2e4812e..7e644736784ee 100644 --- a/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml @@ -34,6 +34,7 @@ body: - apache-hdfs - apache-hive - apache-impala + - apache-kafka - apache-kylin - apache-livy - apache-pig @@ -66,6 +67,7 @@ body: - influxdb - jdbc - jenkins + - apache-kafka - microsoft-azure - microsoft-mssql - microsoft-psrp diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 94a86bbea0d5c..aeee676f22dea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1271,6 +1271,11 @@ jobs: breeze testing integration-tests --integration trino --integration kerberos breeze stop if: needs.build-info.outputs.runs-on != 'self-hosted' + - name: "Integration Tests Postgres: Kafka" + run: | + breeze testing integration-tests --integration kafka + breeze stop + if: needs.build-info.outputs.runs-on != 'self-hosted' - name: "Integration Tests Postgres: all-testable" run: breeze testing integration-tests --integration all-testable if: needs.build-info.outputs.runs-on == 'self-hosted' diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4a152a6c1dd7a..1c2da5a25a701 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -488,6 +488,7 @@ repos: ^docs/README.rst$| ^docs/apache-airflow-providers-amazon/secrets-backends/aws-ssm-parameter-store.rst$| ^docs/apache-airflow-providers-apache-hdfs/connections.rst$| + ^docs/apache-airflow-providers-apache-kafka/connections/kafka.rst$| ^docs/apache-airflow-providers-google/operators/cloud/kubernetes_engine.rst$| ^docs/apache-airflow-providers-microsoft-azure/connections/azure_cosmos.rst$| ^docs/conf.py$| diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index e3e566c1ebe4b..2ec24a426d330 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -610,18 +610,18 @@ This is the full list of those extras: .. START EXTRAS HERE aiobotocore, airbyte, alibaba, all, all_dbs, amazon, apache.atlas, apache.beam, apache.cassandra, -apache.drill, apache.druid, apache.flink, apache.hdfs, apache.hive, apache.impala, apache.kylin, -apache.livy, apache.pig, apache.pinot, apache.spark, apache.sqoop, apache.webhdfs, arangodb, asana, -async, atlas, atlassian.jira, aws, azure, cassandra, celery, cgroups, cloudant, cncf.kubernetes, -common.sql, crypto, dask, databricks, datadog, dbt.cloud, deprecated_api, devel, devel_all, -devel_ci, devel_hadoop, dingding, discord, doc, doc_gen, docker, druid, elasticsearch, exasol, -facebook, ftp, gcp, gcp_api, github, github_enterprise, google, google_auth, grpc, hashicorp, hdfs, -hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft.azure, -microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, mysql, neo4j, odbc, openfaas, -openlineage, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, pinot, plexus, -postgres, presto, qds, qubole, rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry, -sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, tableau, tabular, telegram, -trino, vertica, virtualenv, webhdfs, winrm, zendesk +apache.drill, apache.druid, apache.flink, apache.hdfs, apache.hive, apache.impala, apache.kafka, +apache.kylin, apache.livy, apache.pig, apache.pinot, apache.spark, apache.sqoop, apache.webhdfs, +arangodb, asana, async, atlas, atlassian.jira, aws, azure, cassandra, celery, cgroups, cloudant, +cncf.kubernetes, common.sql, crypto, dask, databricks, datadog, dbt.cloud, deprecated_api, devel, +devel_all, devel_ci, devel_hadoop, dingding, discord, doc, doc_gen, docker, druid, elasticsearch, +exasol, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, google_auth, grpc, +hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, +microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, mysql, neo4j, odbc, +openfaas, openlineage, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, pinot, +plexus, postgres, presto, qds, qubole, rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, +sentry, sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, tableau, tabular, +telegram, trino, vertica, virtualenv, webhdfs, winrm, zendesk .. END EXTRAS HERE Provider packages diff --git a/INSTALL b/INSTALL index 1ca1b78eb570a..7fc9d1097cce4 100644 --- a/INSTALL +++ b/INSTALL @@ -95,18 +95,18 @@ The list of available extras: # START EXTRAS HERE aiobotocore, airbyte, alibaba, all, all_dbs, amazon, apache.atlas, apache.beam, apache.cassandra, -apache.drill, apache.druid, apache.flink, apache.hdfs, apache.hive, apache.impala, apache.kylin, -apache.livy, apache.pig, apache.pinot, apache.spark, apache.sqoop, apache.webhdfs, arangodb, asana, -async, atlas, atlassian.jira, aws, azure, cassandra, celery, cgroups, cloudant, cncf.kubernetes, -common.sql, crypto, dask, databricks, datadog, dbt.cloud, deprecated_api, devel, devel_all, -devel_ci, devel_hadoop, dingding, discord, doc, doc_gen, docker, druid, elasticsearch, exasol, -facebook, ftp, gcp, gcp_api, github, github_enterprise, google, google_auth, grpc, hashicorp, hdfs, -hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft.azure, -microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, mysql, neo4j, odbc, openfaas, -openlineage, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, pinot, plexus, -postgres, presto, qds, qubole, rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry, -sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, tableau, tabular, telegram, -trino, vertica, virtualenv, webhdfs, winrm, zendesk +apache.drill, apache.druid, apache.flink, apache.hdfs, apache.hive, apache.impala, apache.kafka, +apache.kylin, apache.livy, apache.pig, apache.pinot, apache.spark, apache.sqoop, apache.webhdfs, +arangodb, asana, async, atlas, atlassian.jira, aws, azure, cassandra, celery, cgroups, cloudant, +cncf.kubernetes, common.sql, crypto, dask, databricks, datadog, dbt.cloud, deprecated_api, devel, +devel_all, devel_ci, devel_hadoop, dingding, discord, doc, doc_gen, docker, druid, elasticsearch, +exasol, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, google_auth, grpc, +hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, +microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, mysql, neo4j, odbc, +openfaas, openlineage, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, pinot, +plexus, postgres, presto, qds, qubole, rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, +sentry, sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, tableau, tabular, +telegram, trino, vertica, virtualenv, webhdfs, winrm, zendesk # END EXTRAS HERE # For installing Airflow in development environments - see CONTRIBUTING.rst diff --git a/airflow/provider.yaml.schema.json b/airflow/provider.yaml.schema.json index fcb8851830d01..d961d672b0340 100644 --- a/airflow/provider.yaml.schema.json +++ b/airflow/provider.yaml.schema.json @@ -78,6 +78,7 @@ "gcp", "gmp", "google", + "kafka", "protocol", "service", "software", diff --git a/airflow/providers/apache/kafka/CHANGELOG.rst b/airflow/providers/apache/kafka/CHANGELOG.rst new file mode 100644 index 0000000000000..cef7dda80708a --- /dev/null +++ b/airflow/providers/apache/kafka/CHANGELOG.rst @@ -0,0 +1,25 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +Changelog +--------- + +1.0.0 +..... + +Initial version of the provider. diff --git a/airflow/providers/apache/kafka/__init__.py b/airflow/providers/apache/kafka/__init__.py new file mode 100644 index 0000000000000..217e5db960782 --- /dev/null +++ b/airflow/providers/apache/kafka/__init__.py @@ -0,0 +1,17 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/airflow/providers/apache/kafka/hooks/__init__.py b/airflow/providers/apache/kafka/hooks/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/airflow/providers/apache/kafka/hooks/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/airflow/providers/apache/kafka/hooks/base.py b/airflow/providers/apache/kafka/hooks/base.py new file mode 100644 index 0000000000000..bd3a2d3cc74b8 --- /dev/null +++ b/airflow/providers/apache/kafka/hooks/base.py @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import Any + +from confluent_kafka.admin import AdminClient + +from airflow.compat.functools import cached_property +from airflow.hooks.base import BaseHook + + +class KafkaBaseHook(BaseHook): + """ + A base hook for interacting with Apache Kafka + + :param kafka_config_id: The connection object to use, defaults to "kafka_default" + """ + + conn_name_attr = "kafka_config_id" + default_conn_name = "kafka_default" + conn_type = "kafka" + hook_name = "Apache Kafka" + + def __init__(self, kafka_config_id=default_conn_name, *args, **kwargs): + """Initialize our Base""" + super().__init__() + self.kafka_config_id = kafka_config_id + self.get_conn + + @staticmethod + def get_ui_field_behaviour() -> dict[str, Any]: + """Returns custom field behaviour""" + return { + "hidden_fields": ["schema", "login", "password", "port", "host"], + "relabeling": {"extra": "Config Dict"}, + "placeholders": { + "extra": '{"bootstrap.servers": "localhost:9092"}', + }, + } + + def _get_client(self, config): + raise NotImplementedError + + @cached_property + def get_conn(self) -> Any: + """get the configuration object""" + config = self.get_connection(self.kafka_config_id).extra_dejson + + if not (config.get("bootstrap.servers", None)): + raise ValueError("config['bootstrap.servers'] must be provided.") + + return self._get_client(config) + + def test_connection(self) -> tuple[bool, str]: + """Test Connectivity from the UI""" + try: + config = self.get_connection(self.kafka_config_id).extra_dejson + t = AdminClient(config, timeout=10).list_topics() + if t: + return True, "Connection successful." + except Exception as e: + False, str(e) + + return False, "Failed to establish connection." diff --git a/airflow/providers/apache/kafka/hooks/client.py b/airflow/providers/apache/kafka/hooks/client.py new file mode 100644 index 0000000000000..7613bfab220c0 --- /dev/null +++ b/airflow/providers/apache/kafka/hooks/client.py @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import Any, Sequence + +from confluent_kafka import KafkaException +from confluent_kafka.admin import AdminClient, NewTopic + +from airflow.providers.apache.kafka.hooks.base import KafkaBaseHook + + +class KafkaAdminClientHook(KafkaBaseHook): + """ + A hook for interacting with the Kafka Cluster + + :param kafka_config_id: The connection object to use, defaults to "kafka_default" + """ + + def __init__(self, kafka_config_id=KafkaBaseHook.default_conn_name) -> None: + super().__init__(kafka_config_id=kafka_config_id) + + def _get_client(self, config) -> AdminClient: + return AdminClient(config) + + def create_topic( + self, + topics: Sequence[Sequence[Any]], + ) -> None: + """creates a topic + + :param topics: a list of topics to create including the number of partitions for the topic + and the replication factor. Format: [ ("topic_name", number of partitions, replication factor)] + """ + admin_client = self.get_conn + + new_topics = [NewTopic(t[0], num_partitions=t[1], replication_factor=t[2]) for t in topics] + + futures = admin_client.create_topics(new_topics) + + for t, f in futures.items(): + try: + f.result() + self.log.info("The topic %s has been created.", t) + except KafkaException as e: + if e.args[0].name == "TOPIC_ALREADY_EXISTS": + self.log.warning("The topic %s already exists.", t) + else: + raise diff --git a/airflow/providers/apache/kafka/hooks/consume.py b/airflow/providers/apache/kafka/hooks/consume.py new file mode 100644 index 0000000000000..9ab0361067125 --- /dev/null +++ b/airflow/providers/apache/kafka/hooks/consume.py @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import Sequence + +from confluent_kafka import Consumer + +from airflow.providers.apache.kafka.hooks.base import KafkaBaseHook + + +class KafkaConsumerHook(KafkaBaseHook): + """ + A hook for creating a Kafka Consumer + + :param kafka_config_id: The connection object to use, defaults to "kafka_default" + :param topics: A list of topics to subscribe to. + """ + + def __init__(self, topics: Sequence[str], kafka_config_id=KafkaBaseHook.default_conn_name) -> None: + + super().__init__(kafka_config_id=kafka_config_id) + self.topics = topics + + def _get_client(self, config) -> Consumer: + return Consumer(config) + + def get_consumer(self) -> Consumer: + """Returns a Consumer that has been subscribed to topics.""" + consumer = self.get_conn + consumer.subscribe(self.topics) + + return consumer diff --git a/airflow/providers/apache/kafka/hooks/produce.py b/airflow/providers/apache/kafka/hooks/produce.py new file mode 100644 index 0000000000000..7e3a5bcf6e2ca --- /dev/null +++ b/airflow/providers/apache/kafka/hooks/produce.py @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from confluent_kafka import Producer + +from airflow.providers.apache.kafka.hooks.base import KafkaBaseHook + + +class KafkaProducerHook(KafkaBaseHook): + """ + A hook for creating a Kafka Producer + + :param kafka_config_id: The connection object to use, defaults to "kafka_default" + """ + + def __init__(self, kafka_config_id=KafkaBaseHook.default_conn_name) -> None: + super().__init__(kafka_config_id=kafka_config_id) + + def _get_client(self, config) -> Producer: + return Producer(config) + + def get_producer(self) -> Producer: + """Returns a producer object for sending messages to Kafka""" + producer = self.get_conn + + self.log.info("Producer %s", producer) + return producer diff --git a/airflow/providers/apache/kafka/operators/__init__.py b/airflow/providers/apache/kafka/operators/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/airflow/providers/apache/kafka/operators/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/airflow/providers/apache/kafka/operators/consume.py b/airflow/providers/apache/kafka/operators/consume.py new file mode 100644 index 0000000000000..02c9db6556df6 --- /dev/null +++ b/airflow/providers/apache/kafka/operators/consume.py @@ -0,0 +1,187 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from functools import partial +from typing import Any, Callable, Sequence + +from airflow.exceptions import AirflowException +from airflow.models import BaseOperator +from airflow.providers.apache.kafka.hooks.consume import KafkaConsumerHook +from airflow.utils.module_loading import import_string + +VALID_COMMIT_CADENCE = {"never", "end_of_batch", "end_of_operator"} + + +class ConsumeFromTopicOperator(BaseOperator): + """An operator that consumes from Kafka a topic(s) and processing the messages. + + The operator creates a Kafka consumer that reads a batch of messages from the cluster and processes them + using the user supplied callable function. The consumer will continue to read in batches until it reaches + the end of the log or reads a maximum number of messages is reached. + + :param kafka_config_id: The connection object to use, defaults to "kafka_default" + :param topics: A list of topics or regex patterns the consumer should subscribe to. + :param apply_function: The function that should be applied to fetched one at a time. + name of dag file executing the function and the function name delimited by a `.` + :param apply_function_batch: The function that should be applied to a batch of messages fetched. Can not + be used with `apply_function`. Intended for transactional workloads where an expensive task might + be called before or after operations on the messages are taken. + :param apply_function_args: Additional arguments that should be applied to the callable, defaults to None + :param apply_function_kwargs: Additional key word arguments that should be applied to the callable + defaults to None + :param commit_cadence: When consumers should commit offsets ("never", "end_of_batch","end_of_operator"), + defaults to "end_of_operator"; + if end_of_operator, the commit() is called based on the max_messages arg. Commits are made after the + operator has processed the apply_function method for the maximum messages in the operator. + if end_of_batch, the commit() is called based on the max_batch_size arg. Commits are made after each + batch has processed by the apply_function method for all messages in the batch. + if never, close() is called without calling the commit() method. + :param max_messages: The maximum total number of messages an operator should read from Kafka, + defaults to None implying read to the end of the topic. + :param max_batch_size: The maximum number of messages a consumer should read when polling, + defaults to 1000 + :param poll_timeout: How long the Kafka consumer should wait before determining no more messages are + available, defaults to 60 + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:ConsumeFromTopicOperator` + """ + + BLUE = "#ffefeb" + ui_color = BLUE + template_fields = ( + "topics", + "apply_function", + "apply_function_args", + "apply_function_kwargs", + "kafka_config_id", + ) + + def __init__( + self, + topics: str | Sequence[str], + kafka_config_id: str = "kafka_default", + apply_function: Callable[..., Any] | str | None = None, + apply_function_batch: Callable[..., Any] | str | None = None, + apply_function_args: Sequence[Any] | None = None, + apply_function_kwargs: dict[Any, Any] | None = None, + commit_cadence: str | None = "end_of_operator", + max_messages: int | None = None, + max_batch_size: int = 1000, + poll_timeout: float = 60, + **kwargs: Any, + ) -> None: + + super().__init__(**kwargs) + + self.topics = topics + self.apply_function = apply_function + self.apply_function_batch = apply_function_batch + self.apply_function_args = apply_function_args or () + self.apply_function_kwargs = apply_function_kwargs or {} + self.kafka_config_id = kafka_config_id + self.commit_cadence = commit_cadence + self.max_messages = max_messages or True + self.max_batch_size = max_batch_size + self.poll_timeout = poll_timeout + + if self.max_messages is True: + self.read_to_end = True + else: + self.read_to_end = False + + if self.commit_cadence not in VALID_COMMIT_CADENCE: + raise AirflowException( + f"commit_cadence must be one of {VALID_COMMIT_CADENCE}. Got {self.commit_cadence}" + ) + + if self.max_messages and self.max_batch_size > self.max_messages: + self.log.warning( + "max_batch_size (%s) > max_messages (%s). Setting max_messages to %s ", + self.max_batch_size, + self.max_messages, + self.max_batch_size, + ) + + if self.commit_cadence == "never": + self.commit_cadence = None + + if apply_function and apply_function_batch: + raise AirflowException( + "One of apply_function or apply_function_batch must be supplied, not both." + ) + + def execute(self, context) -> Any: + + consumer = KafkaConsumerHook(topics=self.topics, kafka_config_id=self.kafka_config_id).get_consumer() + + if isinstance(self.apply_function, str): + self.apply_function = import_string(self.apply_function) + + if isinstance(self.apply_function_batch, str): + self.apply_function_batch = import_string(self.apply_function_batch) + + if self.apply_function: + apply_callable = partial( + self.apply_function, *self.apply_function_args, **self.apply_function_kwargs # type: ignore + ) + + if self.apply_function_batch: + apply_callable = partial( + self.apply_function_batch, # type: ignore + *self.apply_function_args, + **self.apply_function_kwargs, + ) + + messages_left = self.max_messages + + while self.read_to_end or ( + messages_left > 0 + ): # bool(True > 0) == True in the case where self.max_messages isn't set by the user + + if not isinstance(messages_left, bool): + batch_size = self.max_batch_size if messages_left > self.max_batch_size else messages_left + else: + batch_size = self.max_batch_size + + msgs = consumer.consume(num_messages=batch_size, timeout=self.poll_timeout) + messages_left -= len(msgs) + + if not msgs: # No messages + messages_left is being used. + self.log.info("Reached end of log. Exiting.") + break + + if self.apply_function: + for m in msgs: + apply_callable(m) + + if self.apply_function_batch: + apply_callable(msgs) + + if self.commit_cadence == "end_of_batch": + self.log.info("committing offset at %s", self.commit_cadence) + consumer.commit() + + if self.commit_cadence: + self.log.info("committing offset at %s", self.commit_cadence) + consumer.commit() + + consumer.close() + + return diff --git a/airflow/providers/apache/kafka/operators/produce.py b/airflow/providers/apache/kafka/operators/produce.py new file mode 100644 index 0000000000000..c6a05436aa5ac --- /dev/null +++ b/airflow/providers/apache/kafka/operators/produce.py @@ -0,0 +1,130 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import logging +from functools import partial +from typing import Any, Callable, Sequence + +from airflow.exceptions import AirflowException +from airflow.models import BaseOperator +from airflow.providers.apache.kafka.hooks.produce import KafkaProducerHook +from airflow.utils.module_loading import import_string + +local_logger = logging.getLogger("airflow") + + +def acked(err, msg): + if err is not None: + local_logger.error(f"Failed to deliver message: {err}") + else: + local_logger.info( + f"Produced record to topic {msg.topic()} partition [{msg.partition()}] @ offset {msg.offset()}" + ) + + +class ProduceToTopicOperator(BaseOperator): + """An operator that produces messages to a Kafka topic + + Registers a producer to a kafka topic and publishes messages to the log. + + :param kafka_config_id: The connection object to use, defaults to "kafka_default" + :param topic: The topic the producer should produce to, defaults to None + :param producer_function: The function that generates key/value pairs as messages for production, + defaults to None + :param producer_function_args: Additional arguments to be applied to the producer callable, + defaults to None + :param producer_function_kwargs: Additional keyword arguments to be applied to the producer callable, + defaults to None + :param delivery_callback: The callback to apply after delivery(or failure) of a message, defaults to None + :param synchronous: If writes to kafka should be fully synchronous, defaults to True + :param poll_timeout: How long of a delay should be applied when calling poll after production to kafka, + defaults to 0 + :raises AirflowException: _description_ + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:ProduceToTopicOperator` + """ + + template_fields = ( + "topic", + "producer_function", + "producer_function_args", + "producer_function_kwargs", + "kafka_config_id", + ) + + def __init__( + self, + topic: str, + producer_function: str | Callable[..., Any], + kafka_config_id: str = "kafka_default", + producer_function_args: Sequence[Any] | None = None, + producer_function_kwargs: dict[Any, Any] | None = None, + delivery_callback: str | None = None, + synchronous: bool = True, + poll_timeout: float = 0, + **kwargs: Any, + ) -> None: + + super().__init__(**kwargs) + + if delivery_callback: + dc = import_string(delivery_callback) + else: + dc = acked + + self.kafka_config_id = kafka_config_id + self.topic = topic + self.producer_function = producer_function + self.producer_function_args = producer_function_args or () + self.producer_function_kwargs = producer_function_kwargs or {} + self.delivery_callback = dc + self.synchronous = synchronous + self.poll_timeout = poll_timeout + + if not (self.topic and self.producer_function): + raise AirflowException( + "topic and producer_function must be provided. Got topic=" + f"{self.topic} and producer_function={self.producer_function}" + ) + + return + + def execute(self, context) -> None: + + # Get producer and callable + producer = KafkaProducerHook(kafka_config_id=self.kafka_config_id).get_producer() + + if isinstance(self.producer_function, str): + self.producer_function = import_string(self.producer_function) + + producer_callable = partial( + self.producer_function, # type: ignore + *self.producer_function_args, + **self.producer_function_kwargs, + ) + + # For each returned k/v in the callable : publish and flush if needed. + for k, v in producer_callable(): + producer.produce(self.topic, key=k, value=v, on_delivery=self.delivery_callback) + producer.poll(self.poll_timeout) + if self.synchronous: + producer.flush() + + producer.flush() diff --git a/airflow/providers/apache/kafka/provider.yaml b/airflow/providers/apache/kafka/provider.yaml new file mode 100644 index 0000000000000..6b3fdad1ab38a --- /dev/null +++ b/airflow/providers/apache/kafka/provider.yaml @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +--- +package-name: apache-airflow-providers-apache-kafka +name: Apache Kafka + +suspended: false +description: | + `Apache Kafka `__ +versions: + - 1.0.0 + +dependencies: + - apache-airflow>=2.3.0 + - asgiref + - confluent-kafka>=1.8.2 + +integrations: + - integration-name: Apache Kafka + external-doc-url: https://kafka.apache.org/ + logo: /integration-logos/apache/kafka.svg + tags: [apache] + +operators: + - integration-name: Apache Kafka + python-modules: + - airflow.providers.apache.kafka.operators.consume + - airflow.providers.apache.kafka.operators.produce + +hooks: + - integration-name: Apache Kafka + python-modules: + - airflow.providers.apache.kafka.hooks.base + - airflow.providers.apache.kafka.hooks.client + - airflow.providers.apache.kafka.hooks.consume + - airflow.providers.apache.kafka.hooks.produce + +sensors: + - integration-name: Apache Kafka + python-modules: + - airflow.providers.apache.kafka.sensors.kafka + +triggers: + - integration-name: Apache Kafka + python-modules: + - airflow.providers.apache.kafka.triggers.await_message + +connection-types: + - hook-class-name: airflow.providers.apache.kafka.hooks.base.KafkaBaseHook + connection-type: kafka diff --git a/airflow/providers/apache/kafka/sensors/__init__.py b/airflow/providers/apache/kafka/sensors/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/airflow/providers/apache/kafka/sensors/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/airflow/providers/apache/kafka/sensors/kafka.py b/airflow/providers/apache/kafka/sensors/kafka.py new file mode 100644 index 0000000000000..747c01ddc5aa7 --- /dev/null +++ b/airflow/providers/apache/kafka/sensors/kafka.py @@ -0,0 +1,219 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import Any, Callable, Sequence + +from airflow.models import BaseOperator +from airflow.providers.apache.kafka.triggers.await_message import AwaitMessageTrigger + +VALID_COMMIT_CADENCE = {"never", "end_of_batch", "end_of_operator"} + + +class AwaitMessageSensor(BaseOperator): + """An Airflow sensor that defers until a specific message is published to Kafka. + + The sensor creates a consumer that reads the Kafka log until it encounters a positive event. + + The behavior of the consumer for this trigger is as follows: + - poll the Kafka topics for a message + - if no message returned, sleep + - process the message with provided callable and commit the message offset + - if callable returns any data, raise a TriggerEvent with the return data + - else continue to next message + - return event (as default xcom or specific xcom key) + + + :param kafka_config_id: The connection object to use, defaults to "kafka_default" + :param topics: Topics (or topic regex) to use for reading from + :param apply_function: The function to apply to messages to determine if an event occurred. As a dot + notation string. + :param apply_function_args: Arguments to be applied to the processing function, + defaults to None + :param apply_function_kwargs: Key word arguments to be applied to the processing function, + defaults to None + :param poll_timeout: How long the kafka consumer should wait for a message to arrive from the kafka + cluster,defaults to 1 + :param poll_interval: How long the kafka consumer should sleep after reaching the end of the Kafka log, + defaults to 5 + :param xcom_push_key: the name of a key to push the returned message to, defaults to None + + + """ + + BLUE = "#ffefeb" + ui_color = BLUE + + template_fields = ( + "topics", + "apply_function", + "apply_function_args", + "apply_function_kwargs", + "kafka_config_id", + ) + + def __init__( + self, + topics: Sequence[str], + apply_function: str, + kafka_config_id: str = "kafka_default", + apply_function_args: Sequence[Any] | None = None, + apply_function_kwargs: dict[Any, Any] | None = None, + poll_timeout: float = 1, + poll_interval: float = 5, + xcom_push_key=None, + **kwargs: Any, + ) -> None: + + super().__init__(**kwargs) + + self.topics = topics + self.apply_function = apply_function + self.apply_function_args = apply_function_args + self.apply_function_kwargs = apply_function_kwargs + self.kafka_config_id = kafka_config_id + self.poll_timeout = poll_timeout + self.poll_interval = poll_interval + self.xcom_push_key = xcom_push_key + + def execute(self, context) -> Any: + + self.defer( + trigger=AwaitMessageTrigger( + topics=self.topics, + apply_function=self.apply_function, + apply_function_args=self.apply_function_args, + apply_function_kwargs=self.apply_function_kwargs, + kafka_config_id=self.kafka_config_id, + poll_timeout=self.poll_timeout, + poll_interval=self.poll_interval, + ), + method_name="execute_complete", + ) + + def execute_complete(self, context, event=None): + if self.xcom_push_key: + self.xcom_push(context, key=self.xcom_push_key, value=event) + return event + + +class AwaitMessageTriggerFunctionSensor(BaseOperator): + """An Airflow sensor that defers until a specific message is published to + Kafka, then triggers a registered function, and goes back to waiting for + a message. + + + The behavior of the consumer for this trigger is as follows: + - poll the Kafka topics for a message + - if no message returned, sleep + - process the message with provided callable and commit the message offset + - if callable returns any data, raise a TriggerEvent with the return data + - else continue to next message + - return event (as default xcom or specific xcom key) + + + :param kafka_config_id: The connection object to use, defaults to "kafka_default" + :param topics: Topics (or topic regex) to use for reading from + :param apply_function: The function to apply to messages to determine if an event occurred. As a dot + notation string. + :param event_triggered_function: The callable to trigger once the apply_function encounters a + positive event. + :param apply_function_args: Arguments to be applied to the processing function, defaults to None + :param apply_function_kwargs: Key word arguments to be applied to the processing function, + defaults to None + :param poll_timeout: How long the kafka consumer should wait for a message to arrive from the kafka + cluster, defaults to 1 + :param poll_interval: How long the kafka consumer should sleep after reaching the end of the Kafka log, + defaults to 5 + + + """ + + BLUE = "#ffefeb" + ui_color = BLUE + + template_fields = ( + "topics", + "apply_function", + "apply_function_args", + "apply_function_kwargs", + "kafka_config_id", + ) + + def __init__( + self, + topics: Sequence[str], + apply_function: str, + event_triggered_function: Callable, + kafka_config_id: str = "kafka_default", + apply_function_args: Sequence[Any] | None = None, + apply_function_kwargs: dict[Any, Any] | None = None, + poll_timeout: float = 1, + poll_interval: float = 5, + **kwargs: Any, + ) -> None: + + super().__init__(**kwargs) + + self.topics = topics + self.apply_function = apply_function + self.apply_function_args = apply_function_args + self.apply_function_kwargs = apply_function_kwargs + self.kafka_config_id = kafka_config_id + self.poll_timeout = poll_timeout + self.poll_interval = poll_interval + self.event_triggered_function = event_triggered_function + + if not callable(self.event_triggered_function): + raise TypeError( + "parameter event_triggered_function is expected to be of type callable," + f"got {type(event_triggered_function)}" + ) + + def execute(self, context, event=None) -> Any: + + self.defer( + trigger=AwaitMessageTrigger( + topics=self.topics, + apply_function=self.apply_function, + apply_function_args=self.apply_function_args, + apply_function_kwargs=self.apply_function_kwargs, + kafka_config_id=self.kafka_config_id, + poll_timeout=self.poll_timeout, + poll_interval=self.poll_interval, + ), + method_name="execute_complete", + ) + + return event + + def execute_complete(self, context, event=None): + + self.event_triggered_function(event, **context) + + self.defer( + trigger=AwaitMessageTrigger( + topics=self.topics, + apply_function=self.apply_function, + apply_function_args=self.apply_function_args, + apply_function_kwargs=self.apply_function_kwargs, + kafka_config_id=self.kafka_config_id, + poll_timeout=self.poll_timeout, + poll_interval=self.poll_interval, + ), + method_name="execute_complete", + ) diff --git a/airflow/providers/apache/kafka/triggers/__init__.py b/airflow/providers/apache/kafka/triggers/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/airflow/providers/apache/kafka/triggers/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/airflow/providers/apache/kafka/triggers/await_message.py b/airflow/providers/apache/kafka/triggers/await_message.py new file mode 100644 index 0000000000000..7e7021f54be70 --- /dev/null +++ b/airflow/providers/apache/kafka/triggers/await_message.py @@ -0,0 +1,118 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import asyncio +from functools import partial +from typing import Any, Sequence + +from asgiref.sync import sync_to_async + +from airflow import AirflowException +from airflow.providers.apache.kafka.hooks.consume import KafkaConsumerHook +from airflow.triggers.base import BaseTrigger, TriggerEvent +from airflow.utils.module_loading import import_string + + +class AwaitMessageTrigger(BaseTrigger): + """A trigger that waits for a message matching specific criteria to arrive in Kafka + + The behavior of the consumer of this trigger is as follows: + - poll the Kafka topics for a message, if no message returned, sleep + - process the message with provided callable and commit the message offset: + + - if callable returns any data, raise a TriggerEvent with the return data + + - else continue to next message + + + :param kafka_config_id: The connection object to use, defaults to "kafka_default" + :param topics: The topic (or topic regex) that should be searched for messages + :param apply_function: the location of the function to apply to messages for determination of matching + criteria. (In python dot notation as a string) + :param apply_function_args: A set of arguments to apply to the callable, defaults to None + :param apply_function_kwargs: A set of key word arguments to apply to the callable, defaults to None, + defaults to None + :param poll_timeout: How long the Kafka client should wait before returning from a poll request to + Kafka (seconds), defaults to 1 + :param poll_interval: How long the the trigger should sleep after reaching the end of the Kafka log + (seconds), defaults to 5 + + """ + + def __init__( + self, + topics: Sequence[str], + apply_function: str, + kafka_config_id: str = "kafka_default", + apply_function_args: Sequence[Any] | None = None, + apply_function_kwargs: dict[Any, Any] | None = None, + poll_timeout: float = 1, + poll_interval: float = 5, + ) -> None: + + self.topics = topics + self.apply_function = apply_function + self.apply_function_args = apply_function_args or () + self.apply_function_kwargs = apply_function_kwargs or {} + self.kafka_config_id = kafka_config_id + self.poll_timeout = poll_timeout + self.poll_interval = poll_interval + + def serialize(self) -> tuple[str, dict[str, Any]]: + return ( + "airflow.providers.apache.kafka.triggers.await_message.AwaitMessageTrigger", + { + "topics": self.topics, + "apply_function": self.apply_function, + "apply_function_args": self.apply_function_args, + "apply_function_kwargs": self.apply_function_kwargs, + "kafka_config_id": self.kafka_config_id, + "poll_timeout": self.poll_timeout, + "poll_interval": self.poll_interval, + }, + ) + + async def run(self): + consumer_hook = KafkaConsumerHook(topics=self.topics, kafka_config_id=self.kafka_config_id) + + async_get_consumer = sync_to_async(consumer_hook.get_consumer) + consumer = await async_get_consumer() + + async_poll = sync_to_async(consumer.poll) + async_commit = sync_to_async(consumer.commit) + + processing_call = import_string(self.apply_function) + processing_call = partial(processing_call, *self.apply_function_args, **self.apply_function_kwargs) + async_message_process = sync_to_async(processing_call) + while True: + + message = await async_poll(self.poll_timeout) + + if message is None: + continue + elif message.error(): + raise AirflowException(f"Error: {message.error()}") + else: + + rv = await async_message_process(message) + if rv: + await async_commit(asynchronous=False) + yield TriggerEvent(rv) + else: + await async_commit(asynchronous=False) + await asyncio.sleep(self.poll_interval) diff --git a/airflow/utils/db.py b/airflow/utils/db.py index 99f63598fbb26..358496e44453c 100644 --- a/airflow/utils/db.py +++ b/airflow/utils/db.py @@ -19,6 +19,7 @@ import contextlib import enum +import json import logging import os import sys @@ -39,7 +40,7 @@ from airflow.utils import helpers # TODO: remove create_session once we decide to break backward compatibility -from airflow.utils.session import NEW_SESSION, create_session, provide_session # noqa: F401 +from airflow.utils.session import NEW_SESSION, provide_session if TYPE_CHECKING: from alembic.runtime.environment import EnvironmentContext @@ -364,6 +365,14 @@ def create_default_connections(session: Session = NEW_SESSION): session, ) merge_conn(Connection(conn_id="impala_default", conn_type="impala", host="localhost", port=21050)) + merge_conn( + Connection( + conn_id="kafka_default", + conn_type="kafka", + extra=json.dumps({"bootstrap.servers": "broker:29092"}), + ), + session, + ) merge_conn( Connection( conn_id="kubernetes_default", diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index 22088120c843e..49cbf6be606ce 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -42,14 +42,7 @@ ALLOWED_BACKENDS = ["sqlite", "mysql", "postgres", "mssql"] ALLOWED_PROD_BACKENDS = ["mysql", "postgres", "mssql"] DEFAULT_BACKEND = ALLOWED_BACKENDS[0] -TESTABLE_INTEGRATIONS = [ - "cassandra", - "celery", - "kerberos", - "mongo", - "pinot", - "trino", -] +TESTABLE_INTEGRATIONS = ["cassandra", "celery", "kerberos", "mongo", "pinot", "trino", "kafka"] OTHER_INTEGRATIONS = ["statsd"] ALL_INTEGRATIONS = sorted( [ diff --git a/docs/apache-airflow-providers-apache-kafka/commits.rst b/docs/apache-airflow-providers-apache-kafka/commits.rst new file mode 100644 index 0000000000000..252b3943f5c61 --- /dev/null +++ b/docs/apache-airflow-providers-apache-kafka/commits.rst @@ -0,0 +1,32 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Package apache-airflow-providers-apache-kafka + +------------------------------------------------------ + +`Kafka `__ + + +This is detailed commit list of changes for versions provider package: ``kafka``. +For high-level changelog, see :doc:`package information including changelog `. + + +1.0.0 +..... + +Initial release of this provider. diff --git a/docs/apache-airflow-providers-apache-kafka/connections/kafka.rst b/docs/apache-airflow-providers-apache-kafka/connections/kafka.rst new file mode 100644 index 0000000000000..831653c3dc887 --- /dev/null +++ b/docs/apache-airflow-providers-apache-kafka/connections/kafka.rst @@ -0,0 +1,43 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. _howto/connection: kafka + +Apache Kafka Connection +======================== + +The Apache Kafka connection type configures a connection to Apache Kafka via the ``confluent-kafka`` Python package. + +.. |Kafka Connection| image:: kafka_connection.png + :width: 400 + :alt: Kafka Connection Screenshot + + +Default Connection IDs +---------------------- + +Kafka hooks and operators use ``kafka_default`` by default, this connection is very minimal and should not be assumed useful for more than the most trivial of testing. + +Configuring the Connection +-------------------------- + +Connections are configured as a json serializable string within provided to the ``extra`` field. A full list of parameters +are described in the `Confluent Kafka python library `_. + +If you are defining the Airflow connection from the Airflow UI, the ``extra`` field will be renamed to ``Config Dict``. + +Most operators and hooks will check that at the minimum ``bootstrap.servers`` key exists and has a value set to be valid. diff --git a/docs/apache-airflow-providers-apache-kafka/connections/kafka_connection.png b/docs/apache-airflow-providers-apache-kafka/connections/kafka_connection.png new file mode 100644 index 0000000000000..bd90be0a9b918 Binary files /dev/null and b/docs/apache-airflow-providers-apache-kafka/connections/kafka_connection.png differ diff --git a/docs/apache-airflow-providers-apache-kafka/hooks.rst b/docs/apache-airflow-providers-apache-kafka/hooks.rst new file mode 100644 index 0000000000000..a50f6cbb9f00b --- /dev/null +++ b/docs/apache-airflow-providers-apache-kafka/hooks.rst @@ -0,0 +1,70 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +Apache Kafka Hooks +================== + +.. _howto/hook:KafkaHook: + +KafkaHook +------------------------ + +A base hook for interacting with Apache Kafka. Use this hook as a base class when creating your own Kafka hooks. +For parameter definitions take a look at :class:`~airflow.providers.apache.kafka.hooks.base.KafkaHook`. + + +.. _howto/hook:KafkaAdminClientHook: + +KafkaAdminClientHook +------------------------ + +A hook for interacting with an Apache Kafka cluster. +For parameter definitions take a look at :class:`~airflow.providers.apache.kafka.hooks.client.KafkaAdminClientHook`. + +Reference +""""""""" + +For further information, look at `Apache Kafka Admin config documentation `_. + + +.. _howto/hook:KafkaConsumerHook: + +KafkaConsumerHook +------------------------ + +A hook for creating a Kafka Consumer. This hook is used by the ``ConsumeFromTopicOperator`` and the ``AwaitMessageTrigger``. +For parameter definitions take a look at :class:`~airflow.providers.apache.kafka.hooks.consume.KafkaConsumerHook`. + +Reference +""""""""" + +For further information, look at `Apache Kafka Consumer documentation `_. + + +.. _howto/hook:KafkaProducerHook: + +KafkaProducerHook +------------------------ + +A hook for creating a Kafka Consumer. This hook is used by the ``ProduceToTopicOperator``. +For parameter definitions take a look at :class:`~airflow.providers.apache.kafka.hooks.produce.KafkaProducerHook`. + +Reference +""""""""" + +For further information, look at `Apache Kafka Producer documentation `_. diff --git a/docs/apache-airflow-providers-apache-kafka/index.rst b/docs/apache-airflow-providers-apache-kafka/index.rst new file mode 100644 index 0000000000000..4a89f1c71cdf6 --- /dev/null +++ b/docs/apache-airflow-providers-apache-kafka/index.rst @@ -0,0 +1,94 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +``apache-airflow-providers-apache-kafka`` +========================================== + +Content +------- + +.. toctree:: + :maxdepth: 1 + :caption: Guides + + + Connection + Hooks + Operators + Sensors + Triggers + + +.. toctree:: + :maxdepth: 1 + :caption: References + + Python API <_api/airflow/providers/apache/kafka/index> + +.. toctree:: + :hidden: + :caption: System tests + + System Tests <_api/tests/system/providers/apache/kafka/index> + + +.. toctree:: + :maxdepth: 1 + :caption: Resources + + Example DAGs + PyPI Repository + Installing from sources + +.. toctree:: + :maxdepth: 1 + :caption: Commits + + Detailed list of commits + + +Package apache-airflow-providers-apache-kafka +------------------------------------------------------ + +`Apache Kafka `__ + + +Release: 1.0.0 + +Provider package +---------------- + +This is a provider package for ``apache.kafka`` provider. All classes for this provider package +are in ``airflow.providers.apache.kafka`` python package. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below) +for the minimum Airflow version supported via ``pip install apache-airflow-providers-apache-kafka``. + +Requirements +------------ + +=================== ================== +PIP package Version required +=================== ================== +``apache-airflow`` ``>=2.3.0`` +``confluent-kafka`` +=================== ================== + +.. include:: ../../airflow/providers/apache/kafka/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-apache-kafka/installing-providers-from-sources.rst b/docs/apache-airflow-providers-apache-kafka/installing-providers-from-sources.rst new file mode 100644 index 0000000000000..1c90205d15b3a --- /dev/null +++ b/docs/apache-airflow-providers-apache-kafka/installing-providers-from-sources.rst @@ -0,0 +1,18 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: ../installing-providers-from-sources.rst diff --git a/docs/apache-airflow-providers-apache-kafka/operators/index.rst b/docs/apache-airflow-providers-apache-kafka/operators/index.rst new file mode 100644 index 0000000000000..2ec6133a0e1b0 --- /dev/null +++ b/docs/apache-airflow-providers-apache-kafka/operators/index.rst @@ -0,0 +1,72 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +Apache Spark Operators +====================== + +.. _howto/operator:ConsumeFromTopicOperator: + +ConsumeFromTopicOperator +------------------------ + +An operator that consumes from Kafka one or more Kafka topic(s) and processes the messages. +The operator creates a Kafka consumer that reads a batch of messages from the cluster and processes them using the user supplied callable function. The consumer will continue to read in batches until it reaches the end of the log or reads a maximum number of messages is reached. + +For parameter definitions take a look at :class:`~airflow.providers.apache.kafka.operators.consume.ConsumeFromTopicOperator`. + + +Using the operator +"""""""""""""""""" + +.. exampleinclude:: /../../tests/system/providers/apache/kafka/example_dag_hello_kafka.py + :language: python + :dedent: 4 + :start-after: [START howto_operator_consume_from_topic] + :end-before: [END howto_operator_consume_from_topic] + + +Reference +""""""""" + +For further information, see the `Apache Kafka Consumer documentation `_. + + +.. _howto/operator:ProduceToTopicOperator: + +ProduceToTopicOperator +------------------------ + +An operator that produces messages to a Kafka topic. +The operator creates a Kafka consumer that reads a batch of messages from the cluster and processes them using the user supplied callable function. The consumer will continue to read in batches until it reaches the end of the log or reads a maximum number of messages is reached. + +For parameter definitions take a look at :class:`~airflow.providers.apache.kafka.operators.produce.ProduceToTopicOperator`. + +Using the operator +"""""""""""""""""" + +.. exampleinclude:: /../../tests/system/providers/apache/kafka/example_dag_hello_kafka.py + :language: python + :dedent: 4 + :start-after: [START howto_operator_produce_to_topic] + :end-before: [END howto_operator_produce_to_topic] + + +Reference +""""""""" + +For further information, see the `Apache Kafka Producer documentation `_. diff --git a/docs/apache-airflow-providers-apache-kafka/sensors.rst b/docs/apache-airflow-providers-apache-kafka/sensors.rst new file mode 100644 index 0000000000000..82b560f84d44e --- /dev/null +++ b/docs/apache-airflow-providers-apache-kafka/sensors.rst @@ -0,0 +1,75 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +Apache Kafka Sensors +==================== + + +.. _howto/sensor:AwaitMessageSensor: + +AwaitMessageSensor +------------------------ + +A sensor that defers until a specific message is published to a Kafka topic. +The sensor will create a consumer reading messages from a Kafka topic until a message fulfilling criteria defined in the +``apply_function`` parameter is found. + +For parameter definitions take a look at :class:`~airflow.providers.apache.kafka.sensors.kafka.AwaitMessageSensor`. + +Using the sensor +"""""""""""""""""" + + +.. exampleinclude:: /../../tests/system/providers/apache/kafka/example_dag_hello_kafka.py + :language: python + :dedent: 4 + :start-after: [START howto_sensor_await_message] + :end-before: [END howto_sensor_await_message] + + +Reference +""""""""" + +For further information, see the `Apache Kafka Consumer documentation `_. + + +.. _howto/sensor:AwaitMessageTriggerFunctionSensor: + +AwaitMessageTriggerFunctionSensor +--------------------------------- + +Similar to the ``AwaitMessageSensor`` above, this sensor will defer until it consumes a message from a Kafka topic fulfilling the criteria +of its ``apply_function``. Once a positive event is encountered, the ``AwaitMessageTriggerFunctionSensor`` will trigger a callable provided +to ``event_triggered_function``. + +For parameter definitions take a look at :class:`~airflow.providers.apache.kafka.sensors.kafka.AwaitMessageTriggerFunctionSensor`. + +Using the sensor +"""""""""""""""""" + +.. exampleinclude:: /../../tests/system/providers/apache/kafka/example_dag_event_listener.py + :language: python + :dedent: 4 + :start-after: [START howto_sensor_await_message_trigger_function] + :end-before: [END howto_sensor_await_message_trigger_function] + + +Reference +""""""""" + +For further information, see the `Apache Kafka Consumer documentation `_. diff --git a/docs/apache-airflow-providers-apache-kafka/triggers.rst b/docs/apache-airflow-providers-apache-kafka/triggers.rst new file mode 100644 index 0000000000000..14c74bcdf768b --- /dev/null +++ b/docs/apache-airflow-providers-apache-kafka/triggers.rst @@ -0,0 +1,30 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +Apache Kafka Triggers +===================== + +.. _howto/triggers:AwaitMessageTrigger: + +AwaitMessageTrigger +------------------------ + +The ``AwaitMessageTrigger`` is a trigger that will consume messages polled from a Kafka topic and process them with a provided callable. +If the callable returns any data, a TriggerEvent is raised. + +For parameter definitions take a look at :class:`~airflow.providers.apache.kafka.triggers.await_message.AwaitMessageTrigger`. diff --git a/docs/apache-airflow/extra-packages-ref.rst b/docs/apache-airflow/extra-packages-ref.rst index 017ad927caa2b..4477afdc1ae59 100644 --- a/docs/apache-airflow/extra-packages-ref.rst +++ b/docs/apache-airflow/extra-packages-ref.rst @@ -134,6 +134,8 @@ custom bash/python providers). +---------------------+-----------------------------------------------------+------------------------------------------------+ | apache.impala | ``pip install 'apache-airflow[apache.impala]'`` | All Impala related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ +| apache.kafka | ``pip install 'apache-airflow[apache.kafka]'`` | All Kafka related operators & hooks | ++---------------------+-----------------------------------------------------+------------------------------------------------+ | apache.kylin | ``pip install 'apache-airflow[apache.kylin]'`` | All Kylin related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ | apache.livy | ``pip install 'apache-airflow[apache.livy]'`` | All Livy related operators, hooks & sensors | diff --git a/docs/integration-logos/apache/kafka.svg b/docs/integration-logos/apache/kafka.svg new file mode 100644 index 0000000000000..bdf6af9b89fed --- /dev/null +++ b/docs/integration-logos/apache/kafka.svg @@ -0,0 +1,22 @@ + + +Apache Kafka logo + + + + image/svg+xml + + + + + + + + + + + + + + + diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 3ffe00f60994d..38bac06ca8c1d 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -812,6 +812,8 @@ jthomas Jupyter jupyter jupytercmd +Kafka +kafka Kalibrr Kamil KEDA diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index 8e0b5846cbd71..79e919591815e 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -126,6 +126,14 @@ "common.sql" ] }, + "apache.kafka": { + "deps": [ + "apache-airflow>=2.3.0", + "asgiref", + "confluent-kafka>=1.8.2" + ], + "cross-providers-deps": [] + }, "apache.kylin": { "deps": [ "apache-airflow>=2.3.0", diff --git a/images/breeze/output-commands-hash.txt b/images/breeze/output-commands-hash.txt index c9922142bd2f7..7eab1f50984e6 100644 --- a/images/breeze/output-commands-hash.txt +++ b/images/breeze/output-commands-hash.txt @@ -1,8 +1,8 @@ # This file is automatically generated by pre-commit. If you have a conflict with this file # Please do not solve it but run `breeze setup regenerate-command-images`. # This command should fix the conflict and regenerate help images that you have conflict with. -main:83de6a9bf2b1afecd1f9ce4cd0493733 -build-docs:d449f8ee7b20545a2c7c46ad65226f94 +main:3b0efd589fb61236e9fb1e4422de78c9 +build-docs:3b89efaf5551b1782227cd382c019990 ci:fix-ownership:fee2c9ec9ef19686792002ae054fecdd ci:free-space:47234aa0a60b0efd84972e6e797379f8 ci:get-workflow-info:01ee34c33ad62fa5dc33e0ac8773223f @@ -36,16 +36,16 @@ prod-image:verify:31bc5efada1d70a0a31990025db1a093 prod-image:79bd4cc9de03ab7e1d75f025d75eee46 release-management:create-minor-branch:6a01066dce15e09fb269a8385626657c release-management:generate-constraints:ae30d6ad49a1b2c15b61cb29080fd957 -release-management:generate-issue-content-providers:767a85195f6e686df63b8f8ea0fb7142 +release-management:generate-issue-content-providers:421c1b186818a6251c16f7f3b7807292 release-management:install-provider-packages:5838b06b78e3c5c6e8380024867a1a8d release-management:prepare-airflow-package:3ac14ea6d2b09614959c0ec4fd564789 -release-management:prepare-provider-documentation:8dab0cba3d0bf3d36ec60d71c8c23d50 -release-management:prepare-provider-packages:9d803d0eb5f55d1a178fff2f7951eec8 +release-management:prepare-provider-documentation:b48d9c8af27d5e110364ed2454d23959 +release-management:prepare-provider-packages:cf41c33c6d6121efef1f1d97333e8710 release-management:release-prod-images:c9bc40938e0efad49e51ef66e83f9527 release-management:start-rc-process:6aafbaceabd7b67b9a1af4c2f59abc4c release-management:start-release:acb384d86e02ff5fde1bf971897be17c release-management:verify-provider-packages:566c60fb1bfdc5ed7c4be590736891b2 -release-management:878586136f3d17ecc38e63d969eb3d79 +release-management:a6d6b27d8705294bd55cd1281b05f4c8 setup:autocomplete:03343478bf1d0cf9c101d454cdb63b68 setup:check-all-params-in-groups:c3aca085350fc09451a6d502be9ee821 setup:config:3ffcd35dd24b486ddf1d08b797e3d017 @@ -53,12 +53,12 @@ setup:regenerate-command-images:aaf263095a037d2271640513d8c156fe setup:self-upgrade:d02f70c7a230eae3463ceec2056b63fa setup:version:123b462a421884dc2320ffc5e54b2478 setup:26f37743534e14f5aad5300aad920301 -shell:d77f43d3faadfce6c332beae1cf46d1c -start-airflow:5e8460ac38f8e9ea2a0ac7e248fd7bc9 +shell:bd3e004a92ebcec8feb40fc5cd95872d +start-airflow:ee5066f1420a489864b48bc4e5e472da static-checks:543f0c776d0f198e80a0f75058445bb2 stop:e5aa686b4e53707ced4039d8414d5cd6 testing:docker-compose-tests:b86c044b24138af0659a05ed6331576c testing:helm-tests:936cf28fd84ce4ff5113795fdae9624b -testing:integration-tests:225ddb6243cce5fc64f4824b87adfd98 -testing:tests:b96f54a7e08986e2309af33141099e8d -testing:8d1f02ebc1119bdf93e027a4f291237f +testing:integration-tests:7865b62e9418ddb749511f8a801a49c2 +testing:tests:d301440c82391f9c21c29e7a45efd3b9 +testing:db7a6fc196906d4ead598d63b094c72f diff --git a/images/breeze/output-commands.svg b/images/breeze/output-commands.svg index 784715a8cce83..d1ce7944df5bd 100644 --- a/images/breeze/output-commands.svg +++ b/images/breeze/output-commands.svg @@ -35,8 +35,8 @@ .breeze-help-r1 { fill: #c5c8c6;font-weight: bold } .breeze-help-r2 { fill: #c5c8c6 } .breeze-help-r3 { fill: #d0b344;font-weight: bold } -.breeze-help-r4 { fill: #868887 } -.breeze-help-r5 { fill: #68a0b3;font-weight: bold } +.breeze-help-r4 { fill: #68a0b3;font-weight: bold } +.breeze-help-r5 { fill: #868887 } .breeze-help-r6 { fill: #98a84b;font-weight: bold } .breeze-help-r7 { fill: #8d7b39 } @@ -190,50 +190,50 @@ -Usage: breeze [OPTIONS] COMMAND [ARGS]... +Usage: breeze [OPTIONSCOMMAND [ARGS]... -╭─ Basic flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--python-pPython major/minor version used in Airflow image for images.(>3.7< | 3.8 | 3.9 | 3.10)│ -│[default: 3.7]                                              â”‚ -│--backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite]│ -│--postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11]│ -│--mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7]│ -│--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest]│ -│--integrationIntegration(s) to enable when running (can be more than one).                            â”‚ -│(all | all-testable | cassandra | celery | kerberos | mongo | otel | pinot | statsd |    â”‚ -│statsd | trino)                                                                          â”‚ -│--forward-credentials-fForward local credentials to container when running.│ -│--db-reset-dReset DB when entering the container.│ -│--max-timeMaximum time that the command should take - if it takes longer, the command will fail.│ -│(INTEGER RANGE)                                                                       â”‚ -│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--verbose-vPrint verbose information about performed steps.│ -│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ -│--answer-aForce answer to questions.(y | n | q | yes | no | quit)│ -│--help-hShow this message and exit.│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Basic developer commands â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│start-airflow     Enter breeze environment and starts all Airflow components in the tmux session. Compile assets  â”‚ -│if contents of www directory changed.                                                           â”‚ -│static-checks     Run static checks.                                                                              â”‚ -│build-docs        Build documentation in the container.                                                           â”‚ -│stop              Stop running breeze environment.                                                                â”‚ -│shell             Enter breeze environment. this is the default command use when no other is selected.            â”‚ -│exec              Joins the interactive shell of running airflow container.                                       â”‚ -│compile-www-assetsCompiles www assets.                                                                            â”‚ -│cleanup           Cleans the cache of parameters, docker cache and optionally built CI/PROD images.               â”‚ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Advanced command groups â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│testing                Tools that developers can use to run tests                                                 â”‚ -│ci-image               Tools that developers can use to manually manage CI images                                 â”‚ -│k8s                    Tools that developers use to run Kubernetes tests                                          â”‚ -│prod-image             Tools that developers can use to manually manage PROD images                               â”‚ -│setup                  Tools that developers can use to configure Breeze                                          â”‚ -│release-management     Tools that release managers can use to prepare and manage Airflow releases                 â”‚ -│ci                     Tools that CI workflows use to cleanup/manage CI environment                               â”‚ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Basic flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--python-pPython major/minor version used in Airflow image for images.(>3.7< | 3.8 | 3.9 | 3.10)│ +│[default: 3.7]                                              â”‚ +│--backend-bDatabase backend to use.(>sqlite< | mysql | postgres | mssql)[default: sqlite]│ +│--postgres-version-PVersion of Postgres used.(>11< | 12 | 13 | 14 | 15)[default: 11]│ +│--mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7]│ +│--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest]│ +│--integrationIntegration(s) to enable when running (can be more than one).                            â”‚ +│(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot |     â”‚ +│statsd | statsd | trino)                                                                 â”‚ +│--forward-credentials-fForward local credentials to container when running.│ +│--db-reset-dReset DB when entering the container.│ +│--max-timeMaximum time that the command should take - if it takes longer, the command will fail.│ +│(INTEGER RANGE)                                                                       â”‚ +│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--verbose-vPrint verbose information about performed steps.│ +│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ +│--answer-aForce answer to questions.(y | n | q | yes | no | quit)│ +│--help-hShow this message and exit.│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Basic developer commands â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│start-airflow     Enter breeze environment and starts all Airflow components in the tmux session. Compile assets  â”‚ +│if contents of www directory changed.                                                           â”‚ +│static-checks     Run static checks.                                                                              â”‚ +│build-docs        Build documentation in the container.                                                           â”‚ +│stop              Stop running breeze environment.                                                                â”‚ +│shell             Enter breeze environment. this is the default command use when no other is selected.            â”‚ +│exec              Joins the interactive shell of running airflow container.                                       â”‚ +│compile-www-assetsCompiles www assets.                                                                            â”‚ +│cleanup           Cleans the cache of parameters, docker cache and optionally built CI/PROD images.               â”‚ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Advanced command groups â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│testing                Tools that developers can use to run tests                                                 â”‚ +│ci-image               Tools that developers can use to manually manage CI images                                 â”‚ +│k8s                    Tools that developers use to run Kubernetes tests                                          â”‚ +│prod-image             Tools that developers can use to manually manage PROD images                               â”‚ +│setup                  Tools that developers can use to configure Breeze                                          â”‚ +│release-management     Tools that release managers can use to prepare and manage Airflow releases                 â”‚ +│ci                     Tools that CI workflows use to cleanup/manage CI environment                               â”‚ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_build-docs.svg b/images/breeze/output_build-docs.svg index d8c26d13745ed..9ff15eefc79d0 100644 --- a/images/breeze/output_build-docs.svg +++ b/images/breeze/output_build-docs.svg @@ -1,4 +1,4 @@ - + - + @@ -225,9 +225,12 @@ + + + - Command: build-docs + Command: build-docs @@ -238,66 +241,67 @@ -Usage: breeze build-docs [OPTIONS] +Usage: breeze build-docs [OPTIONS] Build documentation in the container. -╭─ Doc flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--docs-only-dOnly build documentation.│ -│--spellcheck-only-sOnly run spell checking.│ -│--clean-buildClean inventories of Inter-Sphinx documentation and generated APIs and sphinx artifacts    â”‚ -│before the build - useful for a clean build.                                               â”‚ -│--for-productionBuilds documentation for official release i.e. all links point to stable version. Implies  â”‚ -│--clean-build│ -│--one-pass-onlyBuilds documentation in one pass only. This is useful for debugging sphinx errors.│ -│--package-filterList of packages to consider.                                                              â”‚ -│(apache-airflow | apache-airflow-providers-airbyte | apache-airflow-providers-alibaba |    â”‚ -│apache-airflow-providers-amazon | apache-airflow-providers-apache-beam |                   â”‚ -│apache-airflow-providers-apache-cassandra | apache-airflow-providers-apache-drill |        â”‚ -│apache-airflow-providers-apache-druid | apache-airflow-providers-apache-flink |            â”‚ -│apache-airflow-providers-apache-hdfs | apache-airflow-providers-apache-hive |              â”‚ -│apache-airflow-providers-apache-impala | apache-airflow-providers-apache-kylin |           â”‚ -│apache-airflow-providers-apache-livy | apache-airflow-providers-apache-pig |               â”‚ -│apache-airflow-providers-apache-pinot | apache-airflow-providers-apache-spark |            â”‚ -│apache-airflow-providers-apache-sqoop | apache-airflow-providers-arangodb |                â”‚ -│apache-airflow-providers-asana | apache-airflow-providers-atlassian-jira |                 â”‚ -│apache-airflow-providers-celery | apache-airflow-providers-cloudant |                      â”‚ -│apache-airflow-providers-cncf-kubernetes | apache-airflow-providers-common-sql |           â”‚ -│apache-airflow-providers-databricks | apache-airflow-providers-datadog |                   â”‚ -│apache-airflow-providers-dbt-cloud | apache-airflow-providers-dingding |                   â”‚ -│apache-airflow-providers-discord | apache-airflow-providers-docker |                       â”‚ -│apache-airflow-providers-elasticsearch | apache-airflow-providers-exasol |                 â”‚ -│apache-airflow-providers-facebook | apache-airflow-providers-ftp |                         â”‚ -│apache-airflow-providers-github | apache-airflow-providers-google |                        â”‚ -│apache-airflow-providers-grpc | apache-airflow-providers-hashicorp |                       â”‚ -│apache-airflow-providers-http | apache-airflow-providers-imap |                            â”‚ -│apache-airflow-providers-influxdb | apache-airflow-providers-jdbc |                        â”‚ -│apache-airflow-providers-jenkins | apache-airflow-providers-microsoft-azure |              â”‚ -│apache-airflow-providers-microsoft-mssql | apache-airflow-providers-microsoft-psrp |       â”‚ -│apache-airflow-providers-microsoft-winrm | apache-airflow-providers-mongo |                â”‚ -│apache-airflow-providers-mysql | apache-airflow-providers-neo4j |                          â”‚ -│apache-airflow-providers-odbc | apache-airflow-providers-openfaas |                        â”‚ -│apache-airflow-providers-openlineage | apache-airflow-providers-opsgenie |                 â”‚ -│apache-airflow-providers-oracle | apache-airflow-providers-pagerduty |                     â”‚ -│apache-airflow-providers-papermill | apache-airflow-providers-plexus |                     â”‚ -│apache-airflow-providers-postgres | apache-airflow-providers-presto |                      â”‚ -│apache-airflow-providers-qubole | apache-airflow-providers-redis |                         â”‚ -│apache-airflow-providers-salesforce | apache-airflow-providers-samba |                     â”‚ -│apache-airflow-providers-segment | apache-airflow-providers-sendgrid |                     â”‚ -│apache-airflow-providers-sftp | apache-airflow-providers-singularity |                     â”‚ -│apache-airflow-providers-slack | apache-airflow-providers-smtp |                           â”‚ -│apache-airflow-providers-snowflake | apache-airflow-providers-sqlite |                     â”‚ -│apache-airflow-providers-ssh | apache-airflow-providers-tableau |                          â”‚ -│apache-airflow-providers-tabular | apache-airflow-providers-telegram |                     â”‚ -│apache-airflow-providers-trino | apache-airflow-providers-vertica |                        â”‚ -│apache-airflow-providers-zendesk | docker-stack | helm-chart)                              â”‚ -│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--verbose-vPrint verbose information about performed steps.│ -│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ -│--help-hShow this message and exit.│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Doc flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--docs-only-dOnly build documentation.│ +│--spellcheck-only-sOnly run spell checking.│ +│--clean-buildClean inventories of Inter-Sphinx documentation and generated APIs and sphinx artifacts    â”‚ +│before the build - useful for a clean build.                                               â”‚ +│--for-productionBuilds documentation for official release i.e. all links point to stable version. Implies  â”‚ +│--clean-build│ +│--one-pass-onlyBuilds documentation in one pass only. This is useful for debugging sphinx errors.│ +│--package-filterList of packages to consider.                                                              â”‚ +│(apache-airflow | apache-airflow-providers-airbyte | apache-airflow-providers-alibaba |    â”‚ +│apache-airflow-providers-amazon | apache-airflow-providers-apache-beam |                   â”‚ +│apache-airflow-providers-apache-cassandra | apache-airflow-providers-apache-drill |        â”‚ +│apache-airflow-providers-apache-druid | apache-airflow-providers-apache-flink |            â”‚ +│apache-airflow-providers-apache-hdfs | apache-airflow-providers-apache-hive |              â”‚ +│apache-airflow-providers-apache-impala | apache-airflow-providers-apache-kafka |           â”‚ +│apache-airflow-providers-apache-kylin | apache-airflow-providers-apache-livy |             â”‚ +│apache-airflow-providers-apache-pig | apache-airflow-providers-apache-pinot |              â”‚ +│apache-airflow-providers-apache-spark | apache-airflow-providers-apache-sqoop |            â”‚ +│apache-airflow-providers-arangodb | apache-airflow-providers-asana |                       â”‚ +│apache-airflow-providers-atlassian-jira | apache-airflow-providers-celery |                â”‚ +│apache-airflow-providers-cloudant | apache-airflow-providers-cncf-kubernetes |             â”‚ +│apache-airflow-providers-common-sql | apache-airflow-providers-databricks |                â”‚ +│apache-airflow-providers-datadog | apache-airflow-providers-dbt-cloud |                    â”‚ +│apache-airflow-providers-dingding | apache-airflow-providers-discord |                     â”‚ +│apache-airflow-providers-docker | apache-airflow-providers-elasticsearch |                 â”‚ +│apache-airflow-providers-exasol | apache-airflow-providers-facebook |                      â”‚ +│apache-airflow-providers-ftp | apache-airflow-providers-github |                           â”‚ +│apache-airflow-providers-google | apache-airflow-providers-grpc |                          â”‚ +│apache-airflow-providers-hashicorp | apache-airflow-providers-http |                       â”‚ +│apache-airflow-providers-imap | apache-airflow-providers-influxdb |                        â”‚ +│apache-airflow-providers-jdbc | apache-airflow-providers-jenkins |                         â”‚ +│apache-airflow-providers-microsoft-azure | apache-airflow-providers-microsoft-mssql |      â”‚ +│apache-airflow-providers-microsoft-psrp | apache-airflow-providers-microsoft-winrm |       â”‚ +│apache-airflow-providers-mongo | apache-airflow-providers-mysql |                          â”‚ +│apache-airflow-providers-neo4j | apache-airflow-providers-odbc |                           â”‚ +│apache-airflow-providers-openfaas | apache-airflow-providers-openlineage |                 â”‚ +│apache-airflow-providers-opsgenie | apache-airflow-providers-oracle |                      â”‚ +│apache-airflow-providers-pagerduty | apache-airflow-providers-papermill |                  â”‚ +│apache-airflow-providers-plexus | apache-airflow-providers-postgres |                      â”‚ +│apache-airflow-providers-presto | apache-airflow-providers-qubole |                        â”‚ +│apache-airflow-providers-redis | apache-airflow-providers-salesforce |                     â”‚ +│apache-airflow-providers-samba | apache-airflow-providers-segment |                        â”‚ +│apache-airflow-providers-sendgrid | apache-airflow-providers-sftp |                        â”‚ +│apache-airflow-providers-singularity | apache-airflow-providers-slack |                    â”‚ +│apache-airflow-providers-smtp | apache-airflow-providers-snowflake |                       â”‚ +│apache-airflow-providers-sqlite | apache-airflow-providers-ssh |                           â”‚ +│apache-airflow-providers-tableau | apache-airflow-providers-tabular |                      â”‚ +│apache-airflow-providers-telegram | apache-airflow-providers-trino |                       â”‚ +│apache-airflow-providers-vertica | apache-airflow-providers-zendesk | docker-stack |       â”‚ +│helm-chart)                                                                                â”‚ +│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--verbose-vPrint verbose information about performed steps.│ +│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ +│--help-hShow this message and exit.│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_release-management_generate-issue-content-providers.svg b/images/breeze/output_release-management_generate-issue-content-providers.svg index de00e33814b9f..e446c2c62c2db 100644 --- a/images/breeze/output_release-management_generate-issue-content-providers.svg +++ b/images/breeze/output_release-management_generate-issue-content-providers.svg @@ -35,8 +35,8 @@ .breeze-release-management-generate-issue-content-providers-r1 { fill: #c5c8c6;font-weight: bold } .breeze-release-management-generate-issue-content-providers-r2 { fill: #c5c8c6 } .breeze-release-management-generate-issue-content-providers-r3 { fill: #d0b344;font-weight: bold } -.breeze-release-management-generate-issue-content-providers-r4 { fill: #868887 } -.breeze-release-management-generate-issue-content-providers-r5 { fill: #68a0b3;font-weight: bold } +.breeze-release-management-generate-issue-content-providers-r4 { fill: #68a0b3;font-weight: bold } +.breeze-release-management-generate-issue-content-providers-r5 { fill: #868887 } .breeze-release-management-generate-issue-content-providers-r6 { fill: #8d7b39 } .breeze-release-management-generate-issue-content-providers-r7 { fill: #98a84b;font-weight: bold } @@ -163,41 +163,41 @@ -Usage: breeze release-management generate-issue-content-providers [OPTIONS] [airbyte | alibaba | amazon | apache.beam +Usage: breeze release-management generate-issue-content-providers [OPTIONS] [airbyte | alibaba | amazon | apache.beam                                                                   | apache.cassandra | apache.drill | apache.druid |                                                                   apache.flink | apache.hdfs | apache.hive | -                                                                  apache.impala | apache.kylin | apache.livy | -                                                                  apache.pig | apache.pinot | apache.spark | -                                                                  apache.sqoop | arangodb | asana | atlassian.jira | -                                                                  celery | cloudant | cncf.kubernetes | common.sql | -                                                                  databricks | datadog | dbt.cloud | dingding | -                                                                  discord | docker | elasticsearch | exasol | facebook -                                                                  | ftp | github | google | grpc | hashicorp | http | -                                                                  imap | influxdb | jdbc | jenkins | microsoft.azure | -                                                                  microsoft.mssql | microsoft.psrp | microsoft.winrm | -                                                                  mongo | mysql | neo4j | odbc | openfaas | -                                                                  openlineage | opsgenie | oracle | pagerduty | -                                                                  papermill | plexus | postgres | presto | qubole | -                                                                  redis | salesforce | samba | segment | sendgrid | -                                                                  sftp | singularity | slack | smtp | snowflake | -                                                                  sqlite | ssh | tableau | tabular | telegram | trino -                                                                  | vertica | zendesk]... +                                                                  apache.impala | apache.kafka | apache.kylin | +                                                                  apache.livy | apache.pig | apache.pinot | +                                                                  apache.spark | apache.sqoop | arangodb | asana | +                                                                  atlassian.jira | celery | cloudant | cncf.kubernetes +                                                                  | common.sql | databricks | datadog | dbt.cloud | +                                                                  dingding | discord | docker | elasticsearch | exasol +                                                                  | facebook | ftp | github | google | grpc | +                                                                  hashicorp | http | imap | influxdb | jdbc | jenkins +                                                                  | microsoft.azure | microsoft.mssql | microsoft.psrp +                                                                  | microsoft.winrm | mongo | mysql | neo4j | odbc | +                                                                  openfaas | openlineage | opsgenie | oracle | +                                                                  pagerduty | papermill | plexus | postgres | presto | +                                                                  qubole | redis | salesforce | samba | segment | +                                                                  sendgrid | sftp | singularity | slack | smtp | +                                                                  snowflake | sqlite | ssh | tableau | tabular | +                                                                  telegram | trino | vertica | zendesk]... Generates content for issue to test the release. -╭─ Generate issue content flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--github-tokenGitHub token used to authenticate. You can set omit it if you have GITHUB_TOKEN env     â”‚ -│variable set. Can be generated with:                                                    â”‚ -│https://github.com/settings/tokens/new?description=Read%20sssues&scopes=repo:status     â”‚ -│(TEXT)                                                                                  â”‚ -│--suffixSuffix to add to the version prepared(TEXT)│ -│--only-available-in-distOnly consider package ids with packages prepared in the dist folder│ -│--excluded-pr-listComa-separated list of PRs to exclude from the issue.(TEXT)│ -│--disable-progressDisable progress bar│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--help-hShow this message and exit.│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Generate issue content flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--github-tokenGitHub token used to authenticate. You can set omit it if you have GITHUB_TOKEN env     â”‚ +│variable set. Can be generated with:                                                    â”‚ +│https://github.com/settings/tokens/new?description=Read%20sssues&scopes=repo:status     â”‚ +│(TEXT)                                                                                  â”‚ +│--suffixSuffix to add to the version prepared(TEXT)│ +│--only-available-in-distOnly consider package ids with packages prepared in the dist folder│ +│--excluded-pr-listComa-separated list of PRs to exclude from the issue.(TEXT)│ +│--disable-progressDisable progress bar│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--help-hShow this message and exit.│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_release-management_prepare-provider-documentation.svg b/images/breeze/output_release-management_prepare-provider-documentation.svg index 90af2e162884e..64ea91aaaca47 100644 --- a/images/breeze/output_release-management_prepare-provider-documentation.svg +++ b/images/breeze/output_release-management_prepare-provider-documentation.svg @@ -35,8 +35,8 @@ .breeze-release-management-prepare-provider-documentation-r1 { fill: #c5c8c6;font-weight: bold } .breeze-release-management-prepare-provider-documentation-r2 { fill: #c5c8c6 } .breeze-release-management-prepare-provider-documentation-r3 { fill: #d0b344;font-weight: bold } -.breeze-release-management-prepare-provider-documentation-r4 { fill: #868887 } -.breeze-release-management-prepare-provider-documentation-r5 { fill: #68a0b3;font-weight: bold } +.breeze-release-management-prepare-provider-documentation-r4 { fill: #68a0b3;font-weight: bold } +.breeze-release-management-prepare-provider-documentation-r5 { fill: #868887 } .breeze-release-management-prepare-provider-documentation-r6 { fill: #98a84b;font-weight: bold } .breeze-release-management-prepare-provider-documentation-r7 { fill: #8d7b39 } @@ -154,12 +154,12 @@ -Usage: breeze release-management prepare-provider-documentation [OPTIONS] [airbyte | alibaba | amazon | apache.beam | +Usage: breeze release-management prepare-provider-documentation [OPTIONS] [airbyte | alibaba | amazon | apache.beam |                                                                 apache.cassandra | apache.drill | apache.druid |                                                                 apache.flink | apache.hdfs | apache.hive | -                                                                apache.impala | apache.kylin | apache.livy | -                                                                apache.pig | apache.pinot | apache.spark | -                                                                apache.sqoop | arangodb | asana | atlassian.jira | +                                                                apache.impala | apache.kafka | apache.kylin | +                                                                apache.livy | apache.pig | apache.pinot | apache.spark +                                                                | apache.sqoop | arangodb | asana | atlassian.jira |                                                                 celery | cloudant | cncf.kubernetes | common.sql |                                                                 databricks | datadog | dbt.cloud | dingding | discord                                                                 | docker | elasticsearch | exasol | facebook | ftp | @@ -173,19 +173,19 @@                                                                 slack | smtp | snowflake | sqlite | ssh | tableau |                                                                 tabular | telegram | trino | vertica | zendesk]... -Prepare CHANGELOG, README and COMMITS information for providers. +Prepare CHANGELOGREADME and COMMITS information for providers. -╭─ Provider documentation preparation flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--debugDrop user in shell instead of running the command. Useful for debugging.│ -│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ -│--base-branch(TEXT)│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--verbose-vPrint verbose information about performed steps.│ -│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ -│--answer-aForce answer to questions.(y | n | q | yes | no | quit)│ -│--help-hShow this message and exit.│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Provider documentation preparation flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--debugDrop user in shell instead of running the command. Useful for debugging.│ +│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ +│--base-branch(TEXT)│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--verbose-vPrint verbose information about performed steps.│ +│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ +│--answer-aForce answer to questions.(y | n | q | yes | no | quit)│ +│--help-hShow this message and exit.│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_release-management_prepare-provider-packages.svg b/images/breeze/output_release-management_prepare-provider-packages.svg index 0a165ee969c62..654822c9eb6de 100644 --- a/images/breeze/output_release-management_prepare-provider-packages.svg +++ b/images/breeze/output_release-management_prepare-provider-packages.svg @@ -35,8 +35,8 @@ .breeze-release-management-prepare-provider-packages-r1 { fill: #c5c8c6;font-weight: bold } .breeze-release-management-prepare-provider-packages-r2 { fill: #c5c8c6 } .breeze-release-management-prepare-provider-packages-r3 { fill: #d0b344;font-weight: bold } -.breeze-release-management-prepare-provider-packages-r4 { fill: #868887 } -.breeze-release-management-prepare-provider-packages-r5 { fill: #68a0b3;font-weight: bold } +.breeze-release-management-prepare-provider-packages-r4 { fill: #68a0b3;font-weight: bold } +.breeze-release-management-prepare-provider-packages-r5 { fill: #868887 } .breeze-release-management-prepare-provider-packages-r6 { fill: #8d7b39 } .breeze-release-management-prepare-provider-packages-r7 { fill: #98a84b;font-weight: bold } @@ -154,38 +154,38 @@ -Usage: breeze release-management prepare-provider-packages [OPTIONS] [airbyte | alibaba | amazon | apache.beam | +Usage: breeze release-management prepare-provider-packages [OPTIONS] [airbyte | alibaba | amazon | apache.beam |                                                            apache.cassandra | apache.drill | apache.druid |                                                            apache.flink | apache.hdfs | apache.hive | apache.impala | -                                                           apache.kylin | apache.livy | apache.pig | apache.pinot | -                                                           apache.spark | apache.sqoop | arangodb | asana | -                                                           atlassian.jira | celery | cloudant | cncf.kubernetes | -                                                           common.sql | databricks | datadog | dbt.cloud | dingding | -                                                           discord | docker | elasticsearch | exasol | facebook | ftp -                                                           | github | google | grpc | hashicorp | http | imap | -                                                           influxdb | jdbc | jenkins | microsoft.azure | -                                                           microsoft.mssql | microsoft.psrp | microsoft.winrm | mongo -                                                           | mysql | neo4j | odbc | openfaas | openlineage | opsgenie -                                                           | oracle | pagerduty | papermill | plexus | postgres | -                                                           presto | qubole | redis | salesforce | samba | segment | -                                                           sendgrid | sftp | singularity | slack | smtp | snowflake | -                                                           sqlite | ssh | tableau | tabular | telegram | trino | -                                                           vertica | zendesk]... +                                                           apache.kafka | apache.kylin | apache.livy | apache.pig | +                                                           apache.pinot | apache.spark | apache.sqoop | arangodb | +                                                           asana | atlassian.jira | celery | cloudant | +                                                           cncf.kubernetes | common.sql | databricks | datadog | +                                                           dbt.cloud | dingding | discord | docker | elasticsearch | +                                                           exasol | facebook | ftp | github | google | grpc | +                                                           hashicorp | http | imap | influxdb | jdbc | jenkins | +                                                           microsoft.azure | microsoft.mssql | microsoft.psrp | +                                                           microsoft.winrm | mongo | mysql | neo4j | odbc | openfaas | +                                                           openlineage | opsgenie | oracle | pagerduty | papermill | +                                                           plexus | postgres | presto | qubole | redis | salesforce | +                                                           samba | segment | sendgrid | sftp | singularity | slack | +                                                           smtp | snowflake | sqlite | ssh | tableau | tabular | +                                                           telegram | trino | vertica | zendesk]... Prepare sdist/whl packages of Airflow Providers. -╭─ Package flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--package-formatFormat of packages.(wheel | sdist | both)[default: wheel]│ -│--version-suffix-for-pypiVersion suffix used for PyPI packages (alpha, beta, rc1, etc.).(TEXT)│ -│--package-list-fileRead list of packages from text file (one package per line).(FILENAME)│ -│--debugDrop user in shell instead of running the command. Useful for debugging.│ -│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--verbose-vPrint verbose information about performed steps.│ -│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ -│--help-hShow this message and exit.│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Package flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--package-formatFormat of packages.(wheel | sdist | both)[default: wheel]│ +│--version-suffix-for-pypiVersion suffix used for PyPI packages (alpha, beta, rc1, etc.).(TEXT)│ +│--package-list-fileRead list of packages from text file (one package per line).(FILENAME)│ +│--debugDrop user in shell instead of running the command. Useful for debugging.│ +│--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--verbose-vPrint verbose information about performed steps.│ +│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ +│--help-hShow this message and exit.│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_setup.svg b/images/breeze/output_setup.svg index 65185a603f97a..c313db50bab0c 100644 --- a/images/breeze/output_setup.svg +++ b/images/breeze/output_setup.svg @@ -35,8 +35,8 @@ .breeze-setup-r1 { fill: #c5c8c6;font-weight: bold } .breeze-setup-r2 { fill: #c5c8c6 } .breeze-setup-r3 { fill: #d0b344;font-weight: bold } -.breeze-setup-r4 { fill: #868887 } -.breeze-setup-r5 { fill: #68a0b3;font-weight: bold } +.breeze-setup-r4 { fill: #68a0b3;font-weight: bold } +.breeze-setup-r5 { fill: #868887 } .breeze-setup-r6 { fill: #98a84b;font-weight: bold } @@ -102,21 +102,21 @@ -Usage: breeze setup [OPTIONS] COMMAND [ARGS]... +Usage: breeze setup [OPTIONSCOMMAND [ARGS]... Tools that developers can use to configure Breeze -╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--help-hShow this message and exit.│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Commands â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│autocomplete                     Enables autocompletion of breeze commands.                                       â”‚ -│check-all-params-in-groups       Check that all parameters are put in groups.                                     â”‚ -│config                           Show/update configuration (Python, Backend, Cheatsheet, ASCIIART).               â”‚ -│regenerate-command-images        Regenerate breeze command images.                                                â”‚ -│self-upgrade                     Self upgrade Breeze.                                                             â”‚ -│version                          Print information about version of apache-airflow-breeze.                        â”‚ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--help-hShow this message and exit.│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Commands â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│autocomplete                     Enables autocompletion of breeze commands.                                       â”‚ +│check-all-params-in-groups       Check that all parameters are put in groups.                                     â”‚ +│config                           Show/update configuration (Python, Backend, Cheatsheet, ASCIIART).               â”‚ +│regenerate-command-images        Regenerate breeze command images.                                                â”‚ +│self-upgrade                     Self upgrade Breeze.                                                             â”‚ +│version                          Print information about version of apache-airflow-breeze.                        â”‚ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_setup_check-all-params-in-groups.svg b/images/breeze/output_setup_check-all-params-in-groups.svg index 1ed51d446efa8..096285f97fb7c 100644 --- a/images/breeze/output_setup_check-all-params-in-groups.svg +++ b/images/breeze/output_setup_check-all-params-in-groups.svg @@ -35,8 +35,8 @@ .breeze-setup-check-all-params-in-groups-r1 { fill: #c5c8c6;font-weight: bold } .breeze-setup-check-all-params-in-groups-r2 { fill: #c5c8c6 } .breeze-setup-check-all-params-in-groups-r3 { fill: #d0b344;font-weight: bold } -.breeze-setup-check-all-params-in-groups-r4 { fill: #868887 } -.breeze-setup-check-all-params-in-groups-r5 { fill: #68a0b3;font-weight: bold } +.breeze-setup-check-all-params-in-groups-r4 { fill: #68a0b3;font-weight: bold } +.breeze-setup-check-all-params-in-groups-r5 { fill: #868887 } .breeze-setup-check-all-params-in-groups-r6 { fill: #8d7b39 } .breeze-setup-check-all-params-in-groups-r7 { fill: #98a84b;font-weight: bold } @@ -139,33 +139,33 @@ -Usage: breeze setup check-all-params-in-groups [OPTIONS] +Usage: breeze setup check-all-params-in-groups [OPTIONS] Check that all parameters are put in groups. -╭─ Check all params in groups flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--commandCommand(s) to regenerate images for (optional, might be repeated)                                      â”‚ -│(main | build-docs | ci:fix-ownership | ci:free-space | ci:get-workflow-info | ci:resource-check |     â”‚ -│ci:selective-check | ci | ci-image:build | ci-image:pull | ci-image:verify | ci-image | cleanup |      â”‚ -│compile-www-assets | exec | k8s:build-k8s-image | k8s:configure-cluster | k8s:create-cluster |         â”‚ -│k8s:delete-cluster | k8s:deploy-airflow | k8s:k9s | k8s:logs | k8s:run-complete-tests | k8s:setup-env |│ -│k8s:shell | k8s:status | k8s:tests | k8s:upload-k8s-image | k8s | prod-image:build | prod-image:pull | â”‚ -│prod-image:verify | prod-image | release-management:create-minor-branch |                              â”‚ -│release-management:generate-constraints | release-management:generate-issue-content-providers |        â”‚ -│release-management:install-provider-packages | release-management:prepare-airflow-package |            â”‚ -│release-management:prepare-provider-documentation | release-management:prepare-provider-packages |     â”‚ -│release-management:release-prod-images | release-management:start-rc-process |                         â”‚ -│release-management:start-release | release-management:verify-provider-packages | release-management |  â”‚ -│setup:autocomplete | setup:check-all-params-in-groups | setup:config | setup:regenerate-command-images â”‚ -│| setup:self-upgrade | setup:version | setup | shell | start-airflow | static-checks | stop |          â”‚ -│testing:docker-compose-tests | testing:helm-tests | testing:integration-tests | testing:tests |        â”‚ -│testing)                                                                                               â”‚ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--verbose-vPrint verbose information about performed steps.│ -│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ -│--help-hShow this message and exit.│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Check all params in groups flags â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--commandCommand(s) to regenerate images for (optional, might be repeated)                                      â”‚ +│(main | build-docs | ci:fix-ownership | ci:free-space | ci:get-workflow-info | ci:resource-check |     â”‚ +│ci:selective-check | ci | ci-image:build | ci-image:pull | ci-image:verify | ci-image | cleanup |      â”‚ +│compile-www-assets | exec | k8s:build-k8s-image | k8s:configure-cluster | k8s:create-cluster |         â”‚ +│k8s:delete-cluster | k8s:deploy-airflow | k8s:k9s | k8s:logs | k8s:run-complete-tests | k8s:setup-env |│ +│k8s:shell | k8s:status | k8s:tests | k8s:upload-k8s-image | k8s | prod-image:build | prod-image:pull | â”‚ +│prod-image:verify | prod-image | release-management:create-minor-branch |                              â”‚ +│release-management:generate-constraints | release-management:generate-issue-content-providers |        â”‚ +│release-management:install-provider-packages | release-management:prepare-airflow-package |            â”‚ +│release-management:prepare-provider-documentation | release-management:prepare-provider-packages |     â”‚ +│release-management:release-prod-images | release-management:start-rc-process |                         â”‚ +│release-management:start-release | release-management:verify-provider-packages | release-management |  â”‚ +│setup:autocomplete | setup:check-all-params-in-groups | setup:config | setup:regenerate-command-images â”‚ +│| setup:self-upgrade | setup:version | setup | shell | start-airflow | static-checks | stop |          â”‚ +│testing:docker-compose-tests | testing:helm-tests | testing:integration-tests | testing:tests |        â”‚ +│testing)                                                                                               â”‚ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--verbose-vPrint verbose information about performed steps.│ +│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ +│--help-hShow this message and exit.│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_setup_regenerate-command-images.svg b/images/breeze/output_setup_regenerate-command-images.svg index 5bb38f1c124b7..b8bcafe739900 100644 --- a/images/breeze/output_setup_regenerate-command-images.svg +++ b/images/breeze/output_setup_regenerate-command-images.svg @@ -35,8 +35,8 @@ .breeze-setup-regenerate-command-images-r1 { fill: #c5c8c6;font-weight: bold } .breeze-setup-regenerate-command-images-r2 { fill: #c5c8c6 } .breeze-setup-regenerate-command-images-r3 { fill: #d0b344;font-weight: bold } -.breeze-setup-regenerate-command-images-r4 { fill: #868887 } -.breeze-setup-regenerate-command-images-r5 { fill: #68a0b3;font-weight: bold } +.breeze-setup-regenerate-command-images-r4 { fill: #68a0b3;font-weight: bold } +.breeze-setup-regenerate-command-images-r5 { fill: #868887 } .breeze-setup-regenerate-command-images-r6 { fill: #8d7b39 } .breeze-setup-regenerate-command-images-r7 { fill: #98a84b;font-weight: bold } @@ -148,36 +148,36 @@ -Usage: breeze setup regenerate-command-images [OPTIONS] +Usage: breeze setup regenerate-command-images [OPTIONS] Regenerate breeze command images. -╭─ Image regeneration option â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--forceForces regeneration of all images│ -│--commandCommand(s) to regenerate images for (optional, might be repeated)                                   â”‚ -│(main | build-docs | ci:fix-ownership | ci:free-space | ci:get-workflow-info | ci:resource-check |  â”‚ -│ci:selective-check | ci | ci-image:build | ci-image:pull | ci-image:verify | ci-image | cleanup |   â”‚ -│compile-www-assets | exec | k8s:build-k8s-image | k8s:configure-cluster | k8s:create-cluster |      â”‚ -│k8s:delete-cluster | k8s:deploy-airflow | k8s:k9s | k8s:logs | k8s:run-complete-tests |             â”‚ -│k8s:setup-env | k8s:shell | k8s:status | k8s:tests | k8s:upload-k8s-image | k8s | prod-image:build |│ -│prod-image:pull | prod-image:verify | prod-image | release-management:create-minor-branch |         â”‚ -│release-management:generate-constraints | release-management:generate-issue-content-providers |     â”‚ -│release-management:install-provider-packages | release-management:prepare-airflow-package |         â”‚ -│release-management:prepare-provider-documentation | release-management:prepare-provider-packages |  â”‚ -│release-management:release-prod-images | release-management:start-rc-process |                      â”‚ -│release-management:start-release | release-management:verify-provider-packages | release-management â”‚ -│| setup:autocomplete | setup:check-all-params-in-groups | setup:config |                            â”‚ -│setup:regenerate-command-images | setup:self-upgrade | setup:version | setup | shell | start-airflow│ -│| static-checks | stop | testing:docker-compose-tests | testing:helm-tests |                        â”‚ -│testing:integration-tests | testing:tests | testing)                                                â”‚ -│--check-onlyOnly check if some images need to be regenerated. Return 0 if no need or 1 if needed. Cannot be used│ -│together with --command flag or --force.                                                            â”‚ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® -│--verbose-vPrint verbose information about performed steps.│ -│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ -│--help-hShow this message and exit.│ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Image regeneration option â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--forceForces regeneration of all images│ +│--commandCommand(s) to regenerate images for (optional, might be repeated)                                   â”‚ +│(main | build-docs | ci:fix-ownership | ci:free-space | ci:get-workflow-info | ci:resource-check |  â”‚ +│ci:selective-check | ci | ci-image:build | ci-image:pull | ci-image:verify | ci-image | cleanup |   â”‚ +│compile-www-assets | exec | k8s:build-k8s-image | k8s:configure-cluster | k8s:create-cluster |      â”‚ +│k8s:delete-cluster | k8s:deploy-airflow | k8s:k9s | k8s:logs | k8s:run-complete-tests |             â”‚ +│k8s:setup-env | k8s:shell | k8s:status | k8s:tests | k8s:upload-k8s-image | k8s | prod-image:build |│ +│prod-image:pull | prod-image:verify | prod-image | release-management:create-minor-branch |         â”‚ +│release-management:generate-constraints | release-management:generate-issue-content-providers |     â”‚ +│release-management:install-provider-packages | release-management:prepare-airflow-package |         â”‚ +│release-management:prepare-provider-documentation | release-management:prepare-provider-packages |  â”‚ +│release-management:release-prod-images | release-management:start-rc-process |                      â”‚ +│release-management:start-release | release-management:verify-provider-packages | release-management â”‚ +│| setup:autocomplete | setup:check-all-params-in-groups | setup:config |                            â”‚ +│setup:regenerate-command-images | setup:self-upgrade | setup:version | setup | shell | start-airflow│ +│| static-checks | stop | testing:docker-compose-tests | testing:helm-tests |                        â”‚ +│testing:integration-tests | testing:tests | testing)                                                â”‚ +│--check-onlyOnly check if some images need to be regenerated. Return 0 if no need or 1 if needed. Cannot be used│ +│together with --command flag or --force.                                                            â”‚ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® +│--verbose-vPrint verbose information about performed steps.│ +│--dry-run-DIf dry-run is set, commands are only printed, not executed.│ +│--help-hShow this message and exit.│ +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/images/breeze/output_shell.svg b/images/breeze/output_shell.svg index 4cec938becd85..dc84cf0b2194c 100644 --- a/images/breeze/output_shell.svg +++ b/images/breeze/output_shell.svg @@ -235,8 +235,8 @@ │--mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7]│ │--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest]│ │--integrationIntegration(s) to enable when running (can be more than one).                            â”‚ -│(all | all-testable | cassandra | celery | kerberos | mongo | otel | pinot | statsd |    â”‚ -│statsd | trino)                                                                          â”‚ +│(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot |     â”‚ +│statsd | statsd | trino)                                                                 â”‚ │--forward-credentials-fForward local credentials to container when running.│ │--db-reset-dReset DB when entering the container.│ │--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ diff --git a/images/breeze/output_start-airflow.svg b/images/breeze/output_start-airflow.svg index ccb0221884d54..9a562e9eb99e1 100644 --- a/images/breeze/output_start-airflow.svg +++ b/images/breeze/output_start-airflow.svg @@ -249,8 +249,8 @@ │--mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7]│ │--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest]│ │--integrationIntegration(s) to enable when running (can be more than one).                       â”‚ -│(all | all-testable | cassandra | celery | kerberos | mongo | otel | pinot | statsd â”‚ -│| statsd | trino)                                                                   â”‚ +│(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot |│ +│statsd | statsd | trino)                                                            â”‚ │--forward-credentials-fForward local credentials to container when running.│ │--db-reset-dReset DB when entering the container.│ │--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow]│ diff --git a/images/breeze/output_testing_integration-tests.svg b/images/breeze/output_testing_integration-tests.svg index 56270fc9386be..71de0e33361a8 100644 --- a/images/breeze/output_testing_integration-tests.svg +++ b/images/breeze/output_testing_integration-tests.svg @@ -163,8 +163,8 @@ ╭─ Basic flag for integration tests command â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® │--integrationIntegration(s) to enable when running (can be more than one).                               â”‚ -│(all | all-testable | cassandra | celery | kerberos | mongo | otel | pinot | statsd | statsd│ -│| trino)                                                                                    â”‚ +│(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot | statsd â”‚ +│| statsd | trino)                                                                           â”‚ │--test-timeoutTest timeout. Set the pytest setup, execution and teardown timeouts to this value│ │(INTEGER RANGE)                                                                  â”‚ │[default: 60; x>=0]                                                              â”‚ diff --git a/images/breeze/output_testing_tests.svg b/images/breeze/output_testing_tests.svg index d303079df20f7..bc51ed0235a9c 100644 --- a/images/breeze/output_testing_tests.svg +++ b/images/breeze/output_testing_tests.svg @@ -229,8 +229,8 @@ │--mysql-version-MVersion of MySQL used.(>5.7< | 8)[default: 5.7]│ │--mssql-version-SVersion of MsSQL used.(>2017-latest< | 2019-latest)[default: 2017-latest]│ │--integrationIntegration(s) to enable when running (can be more than one).                               â”‚ -│(all | all-testable | cassandra | celery | kerberos | mongo | otel | pinot | statsd | statsd│ -│| trino)                                                                                    â”‚ +│(all | all-testable | cassandra | celery | kafka | kerberos | mongo | otel | pinot | statsd â”‚ +│| statsd | trino)                                                                           â”‚ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Options for parallel test commands â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â•® │--run-in-parallelRun the operation in parallel on all or selected subset of Python versions.│ diff --git a/scripts/ci/docker-compose/integration-kafka.yml b/scripts/ci/docker-compose/integration-kafka.yml new file mode 100644 index 0000000000000..3f89a941434e2 --- /dev/null +++ b/scripts/ci/docker-compose/integration-kafka.yml @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +--- +version: "3.7" +services: + broker: + image: confluentinc/cp-kafka:7.3.0 + hostname: broker + container_name: broker + ports: + - "9092:9092" + - "9101:9101" + environment: + KAFKA_BROKER_ID: 1 + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: > + CONTROLLER:PLAINTEXT, + PLAINTEXT:PLAINTEXT, + PLAINTEXT_HOST:PLAINTEXT + KAFKA_ADVERTISED_LISTENERS: > + PLAINTEXT://broker:29092, + PLAINTEXT_HOST://localhost:9092 + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 + KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 + KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 + KAFKA_JMX_PORT: 9101 + KAFKA_JMX_HOSTNAME: localhost + KAFKA_PROCESS_ROLES: 'broker,controller' + KAFKA_NODE_ID: 1 + KAFKA_CONTROLLER_QUORUM_VOTERS: '1@broker:29093' + KAFKA_LISTENERS: > + PLAINTEXT://broker:29092, + CONTROLLER://broker:29093, + PLAINTEXT_HOST://0.0.0.0:9092 + KAFKA_INTER_BROKER_LISTENER_NAME: 'PLAINTEXT' + KAFKA_CONTROLLER_LISTENER_NAMES: 'CONTROLLER' + KAFKA_LOG_DIRS: '/tmp/kraft-combined-logs' + volumes: + - ./kafka/update_run.sh:/tmp/update_run.sh + command: > + bash -c 'if [ ! -f /tmp/update_run.sh ]; + then echo "ERROR: update_run.sh not mounted?" + && exit 1 ; else /tmp/update_run.sh && /etc/confluent/docker/run ; fi' + + airflow: + environment: + - INTEGRATION_KAFKA=true + depends_on: + - broker diff --git a/scripts/ci/docker-compose/kafka/update_run.sh b/scripts/ci/docker-compose/kafka/update_run.sh new file mode 100755 index 0000000000000..d12bdb332b3d3 --- /dev/null +++ b/scripts/ci/docker-compose/kafka/update_run.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Docker workaround: Remove check for KAFKA_ZOOKEEPER_CONNECT parameter +sed -i '/KAFKA_ZOOKEEPER_CONNECT/d' /etc/confluent/docker/configure + +# Docker workaround: Ignore cub zk-ready +sed -i 's/cub zk-ready/echo ignore zk-ready/' /etc/confluent/docker/ensure + +# KRaft required step: Format the storage directory with a new cluster ID +echo "kafka-storage format --ignore-formatted --cluster-id=$(kafka-storage random-uuid) -c /etc/kafka/kafka.properties" >> /etc/confluent/docker/ensure diff --git a/scripts/in_container/check_environment.sh b/scripts/in_container/check_environment.sh index 8e6b975965205..a81145a3901e2 100755 --- a/scripts/in_container/check_environment.sh +++ b/scripts/in_container/check_environment.sh @@ -160,6 +160,9 @@ if [[ ${INTEGRATION_PINOT} == "true" ]]; then CMD="curl --max-time 1 -X GET 'http://pinot:8000/health' -H 'accept: text/plain' | grep OK" check_service "Pinot (Broker API)" "${CMD}" 50 fi +if [[ ${INTEGRATION_KAFKA} == "true" ]]; then + check_service "Kakfa Cluster" "run_nc broker 9092" 50 +fi if [[ ${EXIT_CODE} != 0 ]]; then echo diff --git a/scripts/in_container/verify_providers.py b/scripts/in_container/verify_providers.py index dd09e2f7c213a..fa15300c633f5 100755 --- a/scripts/in_container/verify_providers.py +++ b/scripts/in_container/verify_providers.py @@ -552,6 +552,7 @@ def get_package_class_summary( unexpected_class_name_patterns=ALL_PATTERNS - {OPERATORS_PATTERN}, exclude_class_type=BaseSensorOperator, false_positive_class_names={ + "ProduceToTopicOperator", "CloudVisionAddProductToProductSetOperator", "CloudDataTransferServiceGCSToGCSOperator", "CloudDataTransferServiceS3ToGCSOperator", diff --git a/setup.py b/setup.py index d6e540cc9800b..ff816de0b7abe 100644 --- a/setup.py +++ b/setup.py @@ -382,6 +382,7 @@ def write_version(filename: str = str(AIRFLOW_SOURCES_ROOT / "airflow" / "git_ve "pytest-capture-warnings", "pytest-cov", "pytest-instafail", + "pytest-mock", "pytest-rerunfailures", "pytest-timeouts", "pytest-xdist", diff --git a/tests/integration/providers/apache/kafka/__init__.py b/tests/integration/providers/apache/kafka/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/integration/providers/apache/kafka/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/integration/providers/apache/kafka/hooks/__init__.py b/tests/integration/providers/apache/kafka/hooks/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/integration/providers/apache/kafka/hooks/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/integration/providers/apache/kafka/hooks/test_admin_client.py b/tests/integration/providers/apache/kafka/hooks/test_admin_client.py new file mode 100644 index 0000000000000..1200a96565be5 --- /dev/null +++ b/tests/integration/providers/apache/kafka/hooks/test_admin_client.py @@ -0,0 +1,51 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json + +import pytest + +from airflow.models import Connection +from airflow.providers.apache.kafka.hooks.client import KafkaAdminClientHook +from airflow.utils import db + +client_config = {"socket.timeout.ms": 1000, "bootstrap.servers": "broker:29092"} + + +@pytest.mark.integration("kafka") +class TestKafkaAdminClientHook: + def setup_method(self): + db.merge_conn( + Connection( + conn_id="kafka_d", + conn_type="kafka", + extra=json.dumps(client_config), + ) + ) + + def test_hook(self): + """test the creation of topics""" + + # Standard Init + hook = KafkaAdminClientHook(kafka_config_id="kafka_d") + hook.create_topic(topics=[("test_1", 1, 1), ("test_2", 1, 1)]) + + kadmin = hook.get_conn + t = kadmin.list_topics(timeout=10).topics + assert t.get("test_2") diff --git a/tests/integration/providers/apache/kafka/hooks/test_consumer.py b/tests/integration/providers/apache/kafka/hooks/test_consumer.py new file mode 100644 index 0000000000000..1134f5526576e --- /dev/null +++ b/tests/integration/providers/apache/kafka/hooks/test_consumer.py @@ -0,0 +1,70 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json + +import pytest +from confluent_kafka import Producer + +from airflow.models import Connection + +# Import Hook +from airflow.providers.apache.kafka.hooks.consume import KafkaConsumerHook +from airflow.utils import db + +TOPIC = "consumer_hook_test_1" + +config = { + "bootstrap.servers": "broker:29092", + "group.id": "hook.consumer.integration.test", + "enable.auto.commit": False, + "auto.offset.reset": "beginning", +} + + +@pytest.mark.integration("kafka") +class TestConsumerHook: + """ + Test consumer hook. + """ + + def setup_method(self): + db.merge_conn( + Connection( + conn_id="kafka_d", + conn_type="kafka", + extra=json.dumps(config), + ) + ) + + def test_consume_messages(self): + """test initialization of AdminClientHook""" + + # Standard Init + p = Producer(**{"bootstrap.servers": "broker:29092"}) + p.produce(TOPIC, "test_message") + assert len(p) == 1 + x = p.flush() + assert x == 0 + + c = KafkaConsumerHook([TOPIC], kafka_config_id="kafka_d") + consumer = c.get_consumer() + + msg = consumer.consume() + + assert msg[0].value() == b"test_message" diff --git a/tests/integration/providers/apache/kafka/hooks/test_producer.py b/tests/integration/providers/apache/kafka/hooks/test_producer.py new file mode 100644 index 0000000000000..663a965eb91c3 --- /dev/null +++ b/tests/integration/providers/apache/kafka/hooks/test_producer.py @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +import logging + +import pytest + +from airflow.models import Connection +from airflow.providers.apache.kafka.hooks.produce import KafkaProducerHook +from airflow.utils import db + +log = logging.getLogger(__name__) +config = {"bootstrap.servers": "broker:29092", "group.id": "hook.producer.integration.test"} + + +@pytest.mark.integration("kafka") +class TestProducerHook: + """ + Test consumer hook. + """ + + def setup_method(self): + db.merge_conn( + Connection( + conn_id="kafka_default", + conn_type="kafka", + extra=json.dumps(config), + ) + ) + + def test_produce(self): + """test producer hook functionality""" + + topic = "producer_hook_integration_test" + + def acked(err, msg): + if err is not None: + raise Exception(f"{err}") + else: + assert msg.topic() == topic + assert msg.partition() == 0 + assert msg.offset() == 0 + + # Standard Init + p_hook = KafkaProducerHook(kafka_config_id="kafka_default") + + producer = p_hook.get_producer() + + producer.produce(topic, key="p1", value="p2", on_delivery=acked) + producer.poll(0) + producer.flush() diff --git a/tests/integration/providers/apache/kafka/operators/__init__.py b/tests/integration/providers/apache/kafka/operators/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/integration/providers/apache/kafka/operators/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/integration/providers/apache/kafka/operators/test_consume.py b/tests/integration/providers/apache/kafka/operators/test_consume.py new file mode 100644 index 0000000000000..240b02f9be1ad --- /dev/null +++ b/tests/integration/providers/apache/kafka/operators/test_consume.py @@ -0,0 +1,147 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +from __future__ import annotations + +import json +import logging +from typing import Any + +import pytest +from confluent_kafka import Producer + +from airflow.models import Connection + +# Import Operator +from airflow.providers.apache.kafka.operators.consume import ConsumeFromTopicOperator +from airflow.utils import db + +log = logging.getLogger(__name__) + + +def _batch_tester(messages, test_string=None): + assert test_string + assert len(messages) == 10 + + for x in messages: + assert x.value() == test_string + + +def _basic_message_tester(message, test=None) -> Any: + """a function that tests the message received""" + + assert test + assert message.value() == test + + +@pytest.mark.integration("kafka") +class TestConsumeFromTopic: + """ + test ConsumeFromTopicOperator + """ + + def setup_method(self): + + for num in (1, 2, 3): + db.merge_conn( + Connection( + conn_id=f"operator.consumer.test.integration.test_{num}", + conn_type="kafka", + extra=json.dumps( + { + "socket.timeout.ms": 10, + "bootstrap.servers": "localhost:9092", + "group.id": f"operator.consumer.test.integration.test_{num}", + "enable.auto.commit": False, + "auto.offset.reset": "beginning", + } + ), + ) + ) + + def test_consumer_operator_test_1(self): + """test consumer works with string import""" + + TOPIC = "operator.consumer.test.integration.test_1" + + p = Producer(**{"bootstrap.servers": "broker:29092"}) + p.produce(TOPIC, TOPIC) + assert len(p) == 1 + x = p.flush() + assert x == 0 + + operator = ConsumeFromTopicOperator( + kafka_config_id=TOPIC, + topics=[TOPIC], + apply_function="tests.integration.providers.apache.kafka.operators.test_consume._basic_message_tester", + apply_function_kwargs={"test": TOPIC}, + task_id="test", + poll_timeout=0.0001, + ) + + x = operator.execute(context={}) + + def test_consumer_operator_test_2(self): + """test consumer works with direct binding""" + + TOPIC = "operator.consumer.test.integration.test_2" + + p = Producer(**{"bootstrap.servers": "broker:29092"}) + p.produce(TOPIC, TOPIC) + assert len(p) == 1 + x = p.flush() + assert x == 0 + + operator = ConsumeFromTopicOperator( + kafka_config_id=TOPIC, + topics=[TOPIC], + apply_function=_basic_message_tester, + apply_function_kwargs={"test": TOPIC}, + task_id="test", + poll_timeout=0.0001, + ) + + x = operator.execute(context={}) + + def test_consumer_operator_test_3(self): + """test consumer works in batch mode""" + + TOPIC = "operator.consumer.test.integration.test_3" + + p = Producer(**{"bootstrap.servers": "broker:29092"}) + + for x in range(20): + p.produce(TOPIC, TOPIC) + + assert len(p) == 20 + x = p.flush() + assert x == 0 + + operator = ConsumeFromTopicOperator( + kafka_config_id=TOPIC, + topics=[TOPIC], + apply_function=_batch_tester, + apply_function_kwargs={"test_string": TOPIC}, + task_id="test", + poll_timeout=0.0001, + commit_cadence="end_of_batch", + max_messages=30, + max_batch_size=10, + ) + + x = operator.execute(context={}) diff --git a/tests/integration/providers/apache/kafka/operators/test_produce.py b/tests/integration/providers/apache/kafka/operators/test_produce.py new file mode 100644 index 0000000000000..f74fa9e18e4be --- /dev/null +++ b/tests/integration/providers/apache/kafka/operators/test_produce.py @@ -0,0 +1,112 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +import logging + +import pytest +from confluent_kafka import Consumer + +from airflow.models import Connection +from airflow.providers.apache.kafka.operators.produce import ProduceToTopicOperator +from airflow.utils import db + +log = logging.getLogger(__name__) + + +def _producer_function(): + for i in range(20): + yield (json.dumps(i), json.dumps(i + 1)) + + +@pytest.mark.integration("kafka") +class TestProduceToTopic: + """ + test ProduceToTopicOperator + """ + + def setup_method(self): + db.merge_conn( + Connection( + conn_id="kafka_default", + conn_type="kafka", + extra=json.dumps( + { + "socket.timeout.ms": 10, + "message.timeout.ms": 10, + "bootstrap.servers": "broker:29092", + } + ), + ) + ) + + def test_producer_operator_test_1(self): + + GROUP = "operator.producer.test.integration.test_1" + TOPIC = "operator.producer.test.integration.test_1" + + t = ProduceToTopicOperator( + kafka_config_id="kafka_default", + task_id="produce_to_topic", + topic=TOPIC, + producer_function="tests.integration.providers.apache.kafka.operators.test_produce._producer_function", + ) + + t.execute(context={}) + + config = { + "bootstrap.servers": "broker:29092", + "group.id": GROUP, + "enable.auto.commit": False, + "auto.offset.reset": "beginning", + } + + c = Consumer(config) + c.subscribe([TOPIC]) + msg = c.consume() + + assert msg[0].key() == b"0" + assert msg[0].value() == b"1" + + def test_producer_operator_test_2(self): + + GROUP = "operator.producer.test.integration.test_2" + TOPIC = "operator.producer.test.integration.test_2" + + t = ProduceToTopicOperator( + kafka_config_id="kafka_default", + task_id="produce_to_topic", + topic=TOPIC, + producer_function=_producer_function, + ) + + t.execute(context={}) + + config = { + "bootstrap.servers": "broker:29092", + "group.id": GROUP, + "enable.auto.commit": False, + "auto.offset.reset": "beginning", + } + + c = Consumer(config) + c.subscribe([TOPIC]) + msg = c.consume() + + assert msg[0].key() == b"0" + assert msg[0].value() == b"1" diff --git a/tests/integration/providers/apache/kafka/sensors/__init__.py b/tests/integration/providers/apache/kafka/sensors/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/integration/providers/apache/kafka/sensors/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/integration/providers/apache/kafka/triggers/__init__.py b/tests/integration/providers/apache/kafka/triggers/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/integration/providers/apache/kafka/triggers/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/integration/providers/apache/kafka/triggers/test_await_message.py b/tests/integration/providers/apache/kafka/triggers/test_await_message.py new file mode 100644 index 0000000000000..a3808cdb47713 --- /dev/null +++ b/tests/integration/providers/apache/kafka/triggers/test_await_message.py @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json + +import pytest +from confluent_kafka import Producer + +from airflow.models import Connection +from airflow.providers.apache.kafka.triggers.await_message import AwaitMessageTrigger +from airflow.utils import db + +GROUP = "trigger.await_message.test.integration.test_1" +TOPIC = "trigger.await_message.test.integration.test_1" + + +def _apply_function(message): + if message.value() == bytes(TOPIC, "utf-8"): + return message + + +@pytest.mark.integration("kafka") +class TestTrigger: + def setup_method(self): + + for num in [1]: + db.merge_conn( + Connection( + conn_id=f"trigger.await_message.test.integration.test_{num}", + conn_type="kafka", + extra=json.dumps( + { + "socket.timeout.ms": 10, + "bootstrap.servers": "broker:29092", + "group.id": f"trigger.await_message.test.integration.test_{num}", + "enable.auto.commit": False, + "auto.offset.reset": "beginning", + } + ), + ) + ) + + @pytest.mark.asyncio + async def test_trigger_await_message_test_1(self): + """ + Await message waits for a message that returns truthy + """ + + TOPIC = "trigger.await_message.test.integration.test_1" + + p = Producer(**{"bootstrap.servers": "broker:29092"}) + + for x in range(20): + p.produce(TOPIC, "not_this") + + p.produce(TOPIC, TOPIC) + + assert len(p) == 21 + x = p.flush() + assert x == 0 + + trigger = AwaitMessageTrigger( + topics=[TOPIC], + apply_function="tests.integration.providers.apache.kafka.triggers.test_await_message._apply_function", + apply_function_args=None, + apply_function_kwargs=None, + kafka_config_id="trigger.await_message.test.integration.test_1", + poll_timeout=0, + poll_interval=1, + ) + + generator = trigger.run() + actual = await generator.__anext__() + + assert actual.payload.value() == bytes(TOPIC, "utf-8") diff --git a/tests/providers/apache/kafka/__init__.py b/tests/providers/apache/kafka/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/providers/apache/kafka/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/providers/apache/kafka/hooks/__init__.py b/tests/providers/apache/kafka/hooks/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/providers/apache/kafka/hooks/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/providers/apache/kafka/hooks/test_admin_client.py b/tests/providers/apache/kafka/hooks/test_admin_client.py new file mode 100644 index 0000000000000..651b5a20b6604 --- /dev/null +++ b/tests/providers/apache/kafka/hooks/test_admin_client.py @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +import logging + +import pytest +from confluent_kafka.admin import AdminClient + +from airflow.models import Connection +from airflow.providers.apache.kafka.hooks.client import KafkaAdminClientHook +from airflow.utils import db + +log = logging.getLogger(__name__) + + +class TestSampleHook: + """ + Test Admin Client Hook. + """ + + def setup_method(self): + db.merge_conn( + Connection( + conn_id="kafka_d", + conn_type="kafka", + extra=json.dumps( + {"socket.timeout.ms": 10, "bootstrap.servers": "localhost:9092", "group.id": "test_group"} + ), + ) + ) + + db.merge_conn( + Connection( + conn_id="kafka_bad", + conn_type="kafka", + extra=json.dumps({"socket.timeout.ms": 10}), + ) + ) + + def test_init(self): + """test initialization of AdminClientHook""" + + # Standard Init + KafkaAdminClientHook(kafka_config_id="kafka_d") + + # # Not Enough Args + with pytest.raises(ValueError): + KafkaAdminClientHook(kafka_config_id="kafka_bad") + + def test_get_conn(self): + """test get_conn""" + + # Standard Init + k = KafkaAdminClientHook(kafka_config_id="kafka_d") + + c = k.get_conn + + assert isinstance(c, AdminClient) diff --git a/tests/providers/apache/kafka/hooks/test_consumer.py b/tests/providers/apache/kafka/hooks/test_consumer.py new file mode 100644 index 0000000000000..60ac44f1bc132 --- /dev/null +++ b/tests/providers/apache/kafka/hooks/test_consumer.py @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json + +import pytest + +from airflow.models import Connection + +# Import Hook +from airflow.providers.apache.kafka.hooks.consume import KafkaConsumerHook +from airflow.utils import db + + +class TestConsumerHook: + """ + Test consumer hook. + """ + + def setup_method(self): + db.merge_conn( + Connection( + conn_id="kafka_d", + conn_type="kafka", + extra=json.dumps( + {"socket.timeout.ms": 10, "bootstrap.servers": "localhost:9092", "group.id": "test_group"} + ), + ) + ) + + db.merge_conn( + Connection( + conn_id="kafka_bad", + conn_type="kafka", + extra=json.dumps({}), + ) + ) + + def test_init(self): + """test initialization of AdminClientHook""" + + # Standard Init + KafkaConsumerHook(["test_1"], kafka_config_id="kafka_d") + + # Not Enough Args + with pytest.raises(ValueError): + KafkaConsumerHook(["test_1"], kafka_config_id="kafka_bad") diff --git a/tests/providers/apache/kafka/hooks/test_producer.py b/tests/providers/apache/kafka/hooks/test_producer.py new file mode 100644 index 0000000000000..65c827b41cc93 --- /dev/null +++ b/tests/providers/apache/kafka/hooks/test_producer.py @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +import logging + +import pytest + +from airflow.models import Connection +from airflow.providers.apache.kafka.hooks.produce import KafkaProducerHook +from airflow.utils import db + +log = logging.getLogger(__name__) + + +class TestProducerHook: + """ + Test consumer hook. + """ + + def setup_method(self): + db.merge_conn( + Connection( + conn_id="kafka_d", + conn_type="kafka", + extra=json.dumps( + {"socket.timeout.ms": 10, "bootstrap.servers": "localhost:9092", "group.id": "test_group"} + ), + ) + ) + + db.merge_conn( + Connection( + conn_id="kafka_bad", + conn_type="kafka", + extra=json.dumps({}), + ) + ) + + def test_init(self): + """test initialization of AdminClientHook""" + + # Standard Init + KafkaProducerHook(kafka_config_id="kafka_d") + + # Not Enough Args + with pytest.raises(ValueError): + KafkaProducerHook(kafka_config_id="kafka_bad") diff --git a/tests/providers/apache/kafka/operators/__init__.py b/tests/providers/apache/kafka/operators/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/providers/apache/kafka/operators/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/providers/apache/kafka/operators/test_consume.py b/tests/providers/apache/kafka/operators/test_consume.py new file mode 100644 index 0000000000000..178419052699c --- /dev/null +++ b/tests/providers/apache/kafka/operators/test_consume.py @@ -0,0 +1,81 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +import logging +from typing import Any + +from airflow.models import Connection + +# Import Operator +from airflow.providers.apache.kafka.operators.consume import ConsumeFromTopicOperator +from airflow.utils import db + +log = logging.getLogger(__name__) + + +def _no_op(*args, **kwargs) -> Any: + """no_op A function that returns its arguments + + :return: whatever was passed in + :rtype: Any + """ + return args, kwargs + + +class TestConsumeFromTopic: + """ + Test ConsumeFromTopic + """ + + def setup_method(self): + db.merge_conn( + Connection( + conn_id="kafka_d", + conn_type="kafka", + extra=json.dumps( + {"socket.timeout.ms": 10, "bootstrap.servers": "localhost:9092", "group.id": "test_group"} + ), + ) + ) + + def test_operator(self): + + operator = ConsumeFromTopicOperator( + kafka_config_id="kafka_d", + topics=["test"], + apply_function="tests.providers.apache.kafka.operators.test_consume._no_op", + task_id="test", + poll_timeout=0.0001, + ) + + # execute the operator (this is essentially a no op as the broker isn't setup) + operator.execute(context={}) + + def test_operator_callable(self): + + operator = ConsumeFromTopicOperator( + kafka_config_id="kafka_d", + topics=["test"], + apply_function=_no_op, + task_id="test", + poll_timeout=0.0001, + ) + + # execute the operator (this is essentially a no op as the broker isn't setup) + operator.execute(context={}) diff --git a/tests/providers/apache/kafka/operators/test_produce.py b/tests/providers/apache/kafka/operators/test_produce.py new file mode 100644 index 0000000000000..46797365e8588 --- /dev/null +++ b/tests/providers/apache/kafka/operators/test_produce.py @@ -0,0 +1,85 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import json +import logging +from typing import Any + +from airflow.models import Connection +from airflow.providers.apache.kafka.operators.produce import ProduceToTopicOperator +from airflow.utils import db + +log = logging.getLogger(__name__) + + +def _simple_producer(key, value) -> list[tuple[Any, Any]]: + """simple_producer A function that returns the key,value passed + in for production via "KafkaProducerOperator" + + :param key: the key for the message + :param value: the value for the message + :return: The Key / Value pair for production via the operator + :rtype: List[Tuple[Any, Any]] + """ + return [(key, value)] + + +class TestProduceToTopic: + """ + Test ConsumeFromTopic + """ + + def setup_method(self): + db.merge_conn( + Connection( + conn_id="kafka_d", + conn_type="kafka", + extra=json.dumps( + { + "socket.timeout.ms": 10, + "message.timeout.ms": 10, + "bootstrap.servers": "localhost:9092", + "group.id": "test_group", + } + ), + ) + ) + + def test_operator_string(self): + operator = ProduceToTopicOperator( + kafka_config_id="kafka_d", + topic="test_1", + producer_function="tests.providers.apache.kafka.operators.test_produce._simple_producer", + producer_function_args=(b"test", b"test"), + task_id="test", + synchronous=False, + ) + + operator.execute(context={}) + + def test_operator_callable(self): + operator = ProduceToTopicOperator( + kafka_config_id="kafka_d", + topic="test_1", + producer_function=_simple_producer, + producer_function_args=(b"test", b"test"), + task_id="test", + synchronous=False, + ) + + operator.execute(context={}) diff --git a/tests/providers/apache/kafka/sensors/__init__.py b/tests/providers/apache/kafka/sensors/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/providers/apache/kafka/sensors/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/providers/apache/kafka/sensors/test_kafka.py b/tests/providers/apache/kafka/sensors/test_kafka.py new file mode 100644 index 0000000000000..e2c6d8ae7c4ef --- /dev/null +++ b/tests/providers/apache/kafka/sensors/test_kafka.py @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import json +import logging + +import pytest + +from airflow.exceptions import TaskDeferred +from airflow.models import Connection +from airflow.providers.apache.kafka.sensors.kafka import AwaitMessageSensor, AwaitMessageTriggerFunctionSensor +from airflow.utils import db + +log = logging.getLogger(__name__) + + +def _return_true(message): + return True + + +class TestSensors: + """ + Test Sensors + """ + + def setup_method(self): + db.merge_conn( + Connection( + conn_id="kafka_d", + conn_type="kafka", + extra=json.dumps( + {"socket.timeout.ms": 10, "bootstrap.servers": "localhost:9092", "group.id": "test_group"} + ), + ) + ) + + def test_await_message_good(self): + sensor = AwaitMessageSensor( + kafka_config_id="kafka_d", topics=["test"], task_id="test", apply_function=_return_true + ) + + # execute marks the task as deferred + with pytest.raises(TaskDeferred): + sensor.execute(context={}) + + def test_await_execute_complete(self): + sensor = AwaitMessageSensor( + kafka_config_id="kafka_d", topics=["test"], task_id="test", apply_function=_return_true + ) + + assert "test" == sensor.execute_complete(context={}, event="test") + + def test_await_message_trigger_event(self): + sensor = AwaitMessageTriggerFunctionSensor( + kafka_config_id="kafka_d", + topics=["test"], + task_id="test", + apply_function=_return_true, + event_triggered_function=_return_true, + ) + + # task should immediately come out of deferred + with pytest.raises(TaskDeferred): + sensor.execute(context={}) + + def test_await_message_trigger_event_execute_complete(self): + sensor = AwaitMessageTriggerFunctionSensor( + kafka_config_id="kafka_d", + topics=["test"], + task_id="test", + apply_function=_return_true, + event_triggered_function=_return_true, + ) + + # task should immediately come out of deferred + with pytest.raises(TaskDeferred): + sensor.execute_complete(context={}) diff --git a/tests/providers/apache/kafka/triggers/__init__.py b/tests/providers/apache/kafka/triggers/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/providers/apache/kafka/triggers/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/providers/apache/kafka/triggers/test_await_message.py b/tests/providers/apache/kafka/triggers/test_await_message.py new file mode 100644 index 0000000000000..1e0ad24a59203 --- /dev/null +++ b/tests/providers/apache/kafka/triggers/test_await_message.py @@ -0,0 +1,129 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import asyncio +import json + +import pytest + +from airflow.models import Connection +from airflow.providers.apache.kafka.hooks.consume import KafkaConsumerHook +from airflow.providers.apache.kafka.triggers.await_message import AwaitMessageTrigger +from airflow.utils import db + + +def apply_function_false(message): + return False + + +def apply_function_true(message): + return True + + +class MockedMessage: + def __init__(*args, **kwargs): + pass + + def error(*args, **kwargs): + return False + + +class MockedConsumer: + def __init__(*args, **kwargs) -> None: + pass + + def poll(*args, **kwargs): + return MockedMessage() + + def commit(*args, **kwargs): + return True + + +class TestTrigger: + def setup_method(self): + db.merge_conn( + Connection( + conn_id="kafka_d", + conn_type="kafka", + extra=json.dumps( + {"socket.timeout.ms": 10, "bootstrap.servers": "localhost:9092", "group.id": "test_group"} + ), + ) + ) + + def test_trigger_serialization(self): + trigger = AwaitMessageTrigger( + kafka_config_id="kafka_d", + apply_function="test.noop", + topics=["noop"], + apply_function_args=[1, 2], + apply_function_kwargs=dict(one=1, two=2), + poll_timeout=10, + poll_interval=5, + ) + + assert isinstance(trigger, AwaitMessageTrigger) + + classpath, kwargs = trigger.serialize() + + assert classpath == "airflow.providers.apache.kafka.triggers.await_message.AwaitMessageTrigger" + assert kwargs == dict( + kafka_config_id="kafka_d", + apply_function="test.noop", + topics=["noop"], + apply_function_args=[1, 2], + apply_function_kwargs=dict(one=1, two=2), + poll_timeout=10, + poll_interval=5, + ) + + @pytest.mark.asyncio + async def test_trigger_run_good(self, mocker): + + mocker.patch.object(KafkaConsumerHook, "get_consumer", return_value=MockedConsumer) + + trigger = AwaitMessageTrigger( + kafka_config_id="kafka_d", + apply_function="tests.providers.apache.kafka.triggers.test_await_message.apply_function_true", + topics=["noop"], + poll_timeout=0.0001, + poll_interval=5, + ) + + task = asyncio.create_task(trigger.run().__anext__()) + await asyncio.sleep(1.0) + assert task.done() is True + asyncio.get_event_loop().stop() + + @pytest.mark.asyncio + async def test_trigger_run_bad(self, mocker): + + mocker.patch.object(KafkaConsumerHook, "get_consumer", return_value=MockedConsumer) + + trigger = AwaitMessageTrigger( + kafka_config_id="kafka_d", + apply_function="tests.providers.apache.kafka.triggers.test_await_message.apply_function_false", + topics=["noop"], + poll_timeout=0.0001, + poll_interval=5, + ) + + task = asyncio.create_task(trigger.run().__anext__()) + await asyncio.sleep(1.0) + assert task.done() is False + asyncio.get_event_loop().stop() diff --git a/tests/system/providers/apache/kafka/__init__.py b/tests/system/providers/apache/kafka/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/system/providers/apache/kafka/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/system/providers/apache/kafka/example_dag_event_listener.py b/tests/system/providers/apache/kafka/example_dag_event_listener.py new file mode 100644 index 0000000000000..c6d0df8d4af82 --- /dev/null +++ b/tests/system/providers/apache/kafka/example_dag_event_listener.py @@ -0,0 +1,136 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# listener_dag_function.py +from __future__ import annotations + +import json +import random +import string + +from pendulum import datetime + +from airflow import DAG + +# This is just for setting up connections in the demo - you should use standard +# methods for setting these connections in production +from airflow.operators.python import PythonOperator +from airflow.operators.trigger_dagrun import TriggerDagRunOperator +from airflow.providers.apache.kafka.operators.produce import ProduceToTopicOperator +from airflow.providers.apache.kafka.sensors.kafka import AwaitMessageTriggerFunctionSensor + +# Connections needed for this example dag to finish +# from airflow.models import Connection +# from airflow.utils import db +# +# db.merge_conn( +# Connection( +# conn_id="fizz_buzz", +# conn_type="kafka", +# extra=json.dumps( +# { +# "bootstrap.servers": "broker:29092", +# "group.id": "fizz_buzz", +# "enable.auto.commit": False, +# "auto.offset.reset": "beginning", +# } +# ), +# ) +# ) + + +def _producer_function(): + for i in range(50): + yield (json.dumps(i), json.dumps(i + 1)) + + +def _generate_uuid(): + letters = string.ascii_lowercase + return "".join(random.choice(letters) for i in range(6)) + + +with DAG( + dag_id="fizzbuzz-load-topic", + description="Load Data to fizz_buzz topic", + start_date=datetime(2022, 11, 1), + catchup=False, + tags=["fizz-buzz"], +) as dag: + + t1 = ProduceToTopicOperator( + kafka_config_id="fizz_buzz", + task_id="produce_to_topic", + topic="fizz_buzz", + producer_function=_producer_function, + ) + +with DAG( + dag_id="fizzbuzz-listener-dag", + description="listen for messages with mod 3 and mod 5 are zero", + start_date=datetime(2022, 11, 1), + catchup=False, + tags=["fizz", "buzz"], +): + + def await_function(message): + val = json.loads(message.value()) + print(f"Value in message is {val}") + if val % 3 == 0: + return val + if val % 5 == 0: + return val + + def pick_downstream_dag(message, **context): + if message % 15 == 0: + print(f"encountered {message} - executing external dag!") + TriggerDagRunOperator(trigger_dag_id="fizz_buzz", task_id=f"{message}{_generate_uuid()}").execute( + context + ) + else: + if message % 3 == 0: + print(f"encountered {message} FIZZ !") + if message & 5 == 0: + print(f"encountered {message} BUZZ !") + + # [START howto_sensor_await_message_trigger_function] + listen_for_message = AwaitMessageTriggerFunctionSensor( + kafka_config_id="fizz_buzz", + task_id="listen_for_message", + topics=["fizz_buzz"], + apply_function="event_listener.await_function", + event_triggered_function=pick_downstream_dag, + ) + # [END howto_sensor_await_message_trigger_function] + +with DAG( + dag_id="fizz-buzz", + description="Triggered when mod 15 is 0.", + start_date=datetime(2022, 11, 1), + catchup=False, + tags=["fizz-buzz"], +): + + def _fizz_buzz(): + print("FIZZ BUZZ") + + fizz_buzz_task = PythonOperator(task_id="fizz_buzz", python_callable=_fizz_buzz) + + +from tests.system.utils import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest) +test_run = get_test_run(dag) diff --git a/tests/system/providers/apache/kafka/example_dag_hello_kafka.py b/tests/system/providers/apache/kafka/example_dag_hello_kafka.py new file mode 100644 index 0000000000000..83fb2d930aa64 --- /dev/null +++ b/tests/system/providers/apache/kafka/example_dag_hello_kafka.py @@ -0,0 +1,244 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import functools +import json +import logging +from datetime import datetime, timedelta + +from airflow import DAG + +# This is just for setting up connections in the demo - you should use standard +# methods for setting these connections in production +from airflow.operators.python import PythonOperator +from airflow.providers.apache.kafka.operators.consume import ConsumeFromTopicOperator +from airflow.providers.apache.kafka.operators.produce import ProduceToTopicOperator +from airflow.providers.apache.kafka.sensors.kafka import AwaitMessageSensor + +# Connections needed for this example dag to finish +# from airflow.models import Connection +# from airflow.utils import db +# db.merge_conn( +# Connection( +# conn_id="t1-3", +# conn_type="kafka", +# extra=json.dumps({"socket.timeout.ms": 10, "bootstrap.servers": "broker:29092"}), +# ) +# ) + +# db.merge_conn( +# Connection( +# conn_id="t2", +# conn_type="kafka", +# extra=json.dumps( +# { +# "bootstrap.servers": "broker:29092", +# "group.id": "t2", +# "enable.auto.commit": False, +# "auto.offset.reset": "beginning", +# } +# ), +# ) +# ) + +# db.merge_conn( +# Connection( +# conn_id="t4", +# conn_type="kafka", +# extra=json.dumps( +# { +# "bootstrap.servers": "broker:29092", +# "group.id": "t4", +# "enable.auto.commit": False, +# "auto.offset.reset": "beginning", +# } +# ), +# ) +# ) + +# db.merge_conn( +# Connection( +# conn_id="t4b", +# conn_type="kafka", +# extra=json.dumps( +# { +# "bootstrap.servers": "broker:29092", +# "group.id": "t4b", +# "enable.auto.commit": False, +# "auto.offset.reset": "beginning", +# } +# ), +# ) +# ) + + +# db.merge_conn( +# Connection( +# conn_id="t5", +# conn_type="kafka", +# extra=json.dumps( +# { +# "bootstrap.servers": "broker:29092", +# "group.id": "t5", +# "enable.auto.commit": False, +# "auto.offset.reset": "beginning", +# } +# ), +# ) +# ) + + +default_args = { + "owner": "airflow", + "depend_on_past": False, + "email_on_failure": False, + "email_on_retry": False, + "retries": 1, + "retry_delay": timedelta(minutes=5), +} + + +def producer_function(): + for i in range(20): + yield (json.dumps(i), json.dumps(i + 1)) + + +consumer_logger = logging.getLogger("airflow") + + +def consumer_function(message, prefix=None): + key = json.loads(message.key()) + value = json.loads(message.value()) + consumer_logger.info(f"{prefix} {message.topic()} @ {message.offset()}; {key} : {value}") + return + + +def consumer_function_batch(messages, prefix=None): + for message in messages: + key = json.loads(message.key()) + value = json.loads(message.value()) + consumer_logger.info(f"{prefix} {message.topic()} @ {message.offset()}; {key} : {value}") + return + + +def await_function(message): + if json.loads(message.value()) % 5 == 0: + return f" Got the following message: {json.loads(message.value())}" + + +def hello_kafka(): + print("Hello Kafka !") + return + + +with DAG( + "kafka-example", + default_args=default_args, + description="Examples of Kafka Operators", + schedule_interval=timedelta(days=1), + start_date=datetime(2021, 1, 1), + catchup=False, + tags=["example"], +) as dag: + + # [START howto_operator_produce_to_topic] + t1 = ProduceToTopicOperator( + kafka_config_id="t1-3", + task_id="produce_to_topic", + topic="test_1", + producer_function="hello_kafka.producer_function", + ) + # [END howto_operator_produce_to_topic] + + t1.doc_md = "Takes a series of messages from a generator function and publishes" + "them to the `test_1` topic of our kafka cluster." + + # [START howto_operator_consume_from_topic] + t2 = ConsumeFromTopicOperator( + kafka_config_id="t2", + task_id="consume_from_topic", + topics=["test_1"], + apply_function="hello_kafka.consumer_function", + apply_function_kwargs={"prefix": "consumed:::"}, + commit_cadence="end_of_batch", + max_messages=10, + max_batch_size=2, + ) + # [END howto_operator_consume_from_topic] + + t2.doc_md = "Reads a series of messages from the `test_1` topic, and processes" + "them with a consumer function with a keyword argument." + + t3 = ProduceToTopicOperator( + kafka_config_id="t1-3", + task_id="produce_to_topic_2", + topic="test_1", + producer_function=producer_function, + ) + + t3.doc_md = "Does the same thing as the t1 task, but passes the callable directly" + "instead of using the string notation." + + t4 = ConsumeFromTopicOperator( + kafka_config_id="t4", + task_id="consume_from_topic_2", + topics=["test_1"], + apply_function=functools.partial(consumer_function, prefix="consumed:::"), + commit_cadence="end_of_batch", + max_messages=30, + max_batch_size=10, + ) + + t4b = ConsumeFromTopicOperator( + kafka_config_id="t4b", + task_id="consume_from_topic_2_b", + topics=["test_1"], + apply_function_batch=functools.partial(consumer_function_batch, prefix="consumed:::"), + commit_cadence="end_of_batch", + max_messages=30, + max_batch_size=10, + ) + + t4.doc_md = "Does the same thing as the t2 task, but passes the callable directly" + "instead of using the string notation." + + # [START howto_sensor_await_message] + t5 = AwaitMessageSensor( + kafka_config_id="t5", + task_id="awaiting_message", + topics=["test_1"], + apply_function="hello_kafka.await_function", + xcom_push_key="retrieved_message", + ) + # [END howto_sensor_await_message] + + t5.doc_md = "A deferable task. Reads the topic `test_1` until a message with a value" + "divisible by 5 is encountered." + + t6 = PythonOperator(task_id="hello_kafka", python_callable=hello_kafka) + + t6.doc_md = "The task that is executed after the deferable task returns for execution." + + t1 >> t2 + t3 >> [t4, t4b] >> t5 >> t6 + + +from tests.system.utils import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest) +test_run = get_test_run(dag)