diff --git a/airflow/utils/docs.py b/airflow/utils/docs.py index 254b04c4b623e..8f0cd10d9b9e9 100644 --- a/airflow/utils/docs.py +++ b/airflow/utils/docs.py @@ -39,17 +39,25 @@ def get_docs_url(page: str | None = None) -> str: return result +def get_project_url_from_metadata(provider_name: str): + """Return the Project-URL from metadata.""" + return metadata.metadata(provider_name).get_all("Project-URL") + + def get_doc_url_for_provider(provider_name: str, provider_version: str) -> str: """Prepare link to Airflow Provider documentation.""" try: - metadata_items = metadata.metadata(provider_name).get_all("Project-URL") + from urllib.parse import urlparse + + metadata_items = get_project_url_from_metadata(provider_name) if isinstance(metadata_items, str): metadata_items = [metadata_items] if metadata_items: for item in metadata_items: if item.lower().startswith("documentation"): _, _, url = item.partition(",") - if url: + parsed_url = urlparse(url) + if url and (parsed_url.scheme in ("http", "https") and bool(parsed_url.netloc)): return url.strip() except metadata.PackageNotFoundError: pass diff --git a/tests/www/views/test_views.py b/tests/www/views/test_views.py index 5cf425e99f4aa..4dcea471a5c75 100644 --- a/tests/www/views/test_views.py +++ b/tests/www/views/test_views.py @@ -32,6 +32,7 @@ write_webserver_configuration_if_needed, ) from airflow.plugins_manager import AirflowPlugin, EntryPointSource +from airflow.utils.docs import get_doc_url_for_provider from airflow.utils.task_group import TaskGroup from airflow.www.views import ( ProviderView, @@ -180,6 +181,36 @@ def test__clean_description(admin_client, provider_description, expected): assert actual == expected +@pytest.mark.parametrize( + "provider_name, project_url, expected", + [ + ( + "apache-airflow-providers-airbyte", + "Documentation, https://airflow.apache.org/docs/apache-airflow-providers-airbyte/3.8.1/", + "https://airflow.apache.org/docs/apache-airflow-providers-airbyte/3.8.1/", + ), + ( + "apache-airflow-providers-amazon", + "Documentation, https://airflow.apache.org/docs/apache-airflow-providers-amazon/8.25.0/", + "https://airflow.apache.org/docs/apache-airflow-providers-amazon/8.25.0/", + ), + ( + "apache-airflow-providers-apache-druid", + "Documentation, javascript:prompt(document.domain)", + # the default one is returned + "https://airflow.apache.org/docs/apache-airflow-providers-apache-druid/1.0.0/", + ), + ], +) +@patch("airflow.utils.docs.get_project_url_from_metadata") +def test_get_doc_url_for_provider( + mock_get_project_url_from_metadata, admin_client, provider_name, project_url, expected +): + mock_get_project_url_from_metadata.return_value = [project_url] + actual = get_doc_url_for_provider(provider_name, "1.0.0") + assert actual == expected + + def test_endpoint_should_not_be_unauthenticated(app): resp = app.test_client().get("/provider", follow_redirects=True) check_content_not_in_response("Providers", resp)