Skip to content

Commit

Permalink
Remove RAW HTML support from Trigger Form UI (apache#40029)
Browse files Browse the repository at this point in the history
* Remove RAW HTML support from Trigger Form UI

* Updates since main branch changed to 3.0.0

* Force really no HTML in code, fix unit tests

* Fix version in deprecation message
  • Loading branch information
jscheffl committed Aug 21, 2024
1 parent 86e12a9 commit 23e9716
Show file tree
Hide file tree
Showing 8 changed files with 20 additions and 168 deletions.
15 changes: 0 additions & 15 deletions airflow/config_templates/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2042,21 +2042,6 @@ webserver:
type: integer
example: "10"
default: "5"
allow_raw_html_descriptions:
description: |
A DAG author is able to provide any raw HTML into ``doc_md`` or params description in
``description_md`` for text formatting. This is including potentially unsafe javascript.
Displaying the DAG or trigger form in web UI provides the DAG author the potential to
inject malicious code into clients browsers. To ensure the web UI is safe by default,
raw HTML is disabled by default. If you trust your DAG authors, you can enable HTML
support in markdown by setting this option to ``True``.
This parameter also enables the deprecated fields ``description_html`` and
``custom_html_form`` in DAG params until the feature is removed in a future version.
version_added: 2.8.0
type: boolean
example: "False"
default: "False"
allowed_payload_size:
description: |
The maximum size of the request payload (in MB) that can be sent.
Expand Down
4 changes: 1 addition & 3 deletions airflow/www/templates/airflow/trigger.html
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@
: </label>
</td>
<td>
{% if "custom_html_form" in form_details.schema %}
{{ form_details.schema.custom_html_form | replace("{name}", "element_" + form_key) | replace("{value}", form_details.value) }}
{% elif "type" in form_details.schema and form_details.schema.type == "boolean" %}
{% if "type" in form_details.schema and form_details.schema.type == "boolean" %}
<label for="element_{{ form_key }}" control-label="">
<input class="switch-input" name="element_{{ form_key }}" id="element_{{ form_key }}" type="checkbox"
{%- if form_details.value %} checked="checked"{%- endif -%}/>
Expand Down
3 changes: 1 addition & 2 deletions airflow/www/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
from sqlalchemy import delete, func, select, types
from sqlalchemy.ext.associationproxy import AssociationProxy

from airflow.configuration import conf
from airflow.exceptions import RemovedInAirflow3Warning
from airflow.models.dagrun import DagRun
from airflow.models.dagwarning import DagWarning
Expand Down Expand Up @@ -639,7 +638,7 @@ def json_render(obj, lexer):

def wrapped_markdown(s, css_class="rich_doc"):
"""Convert a Markdown string to HTML."""
md = MarkdownIt("gfm-like", {"html": conf.getboolean("webserver", "allow_raw_html_descriptions")})
md = MarkdownIt("gfm-like", {"html": False})
if s is None:
return None
s = textwrap.dedent(s)
Expand Down
43 changes: 0 additions & 43 deletions airflow/www/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2059,8 +2059,6 @@ def trigger(self, dag_id: str, session: Session = NEW_SESSION):

# Prepare form fields with param struct details to render a proper form with schema information
form_fields = {}
allow_raw_html_descriptions = conf.getboolean("webserver", "allow_raw_html_descriptions")
form_trust_problems = []
for k, v in dag.params.items():
form_fields[k] = v.dump()
form_field: dict = form_fields[k]
Expand All @@ -2078,19 +2076,6 @@ def trigger(self, dag_id: str, session: Session = NEW_SESSION):
form_field_schema["type"] = ["array", "null"]
elif isinstance(form_field_value, dict):
form_field_schema["type"] = ["object", "null"]
# Mark HTML fields as safe if allowed
if allow_raw_html_descriptions:
if "description_html" in form_field_schema:
form_field["description"] = Markup(form_field_schema["description_html"])
if "custom_html_form" in form_field_schema:
form_field_schema["custom_html_form"] = Markup(form_field_schema["custom_html_form"])
else:
if "description_html" in form_field_schema and "description_md" not in form_field_schema:
form_trust_problems.append(f"Field {k} uses HTML description")
form_field["description"] = form_field_schema.pop("description_html")
if "custom_html_form" in form_field_schema:
form_trust_problems.append(f"Field {k} uses custom HTML form definition")
form_field_schema.pop("custom_html_form")
if "description_md" in form_field_schema:
form_field["description"] = wwwutils.wrapped_markdown(form_field_schema["description_md"])
# Check for default values and pre-populate
Expand All @@ -2110,34 +2095,6 @@ def trigger(self, dag_id: str, session: Session = NEW_SESSION):
)
else:
form_field["value"] = request.values.get(k)
if form_trust_problems:
flash(
Markup(
"At least one field in the trigger form uses a raw HTML form definition. This is not allowed for "
"security. Please switch to markdown description via <code>description_md</code>. "
"Raw HTML is deprecated and must be enabled via "
"<code>webserver.allow_raw_html_descriptions</code> configuration parameter. Using plain text "
"as fallback for these fields. "
f"<ul><li>{'</li><li>'.join(form_trust_problems)}</li></ul>"
),
"warning",
)
if allow_raw_html_descriptions and any("description_html" in p.schema for p in dag.params.values()):
flash(
Markup(
"The form params use raw HTML in <code>description_html</code> which is deprecated. "
"Please migrate to <code>description_md</code>."
),
"warning",
)
if allow_raw_html_descriptions and any("custom_html_form" in p.schema for p in dag.params.values()):
flash(
Markup(
"The form params use <code>custom_html_form</code> definition. "
"This is deprecated with Airflow 2.8.0 and will be removed in a future release."
),
"warning",
)

ui_fields_defined = any("const" not in f["schema"] for f in form_fields.values())
show_trigger_form_if_no_params = conf.getboolean("webserver", "show_trigger_form_if_no_params")
Expand Down
12 changes: 4 additions & 8 deletions docs/apache-airflow/core-concepts/params.rst
Original file line number Diff line number Diff line change
Expand Up @@ -388,14 +388,10 @@ For examples, please take a look at the two example DAGs provided: :ref:`Params
The trigger form can also be forced to be displayed also if no params are defined using the configuration switch
``webserver.show_trigger_form_if_no_params``.

.. versionchanged:: 2.8.0
By default custom HTML is not allowed to prevent injection of scripts or other malicious HTML code. If you trust your DAG authors
you can change the trust level of parameter descriptions to allow raw HTML by setting the configuration entry
``webserver.allow_raw_html_descriptions`` to ``True``. With the default setting all HTML will be displayed as plain text.
This relates to the previous feature to enable rich formatting with the attribute ``description_html`` which is now super-seeded
with the attribute ``description_md``.
Custom form elements using the attribute ``custom_html_form`` allow a DAG author to specify raw HTML form templates. These
custom HTML form elements are deprecated as of version 2.8.0.
.. versionchanged:: 3.0.0
By default custom HTML is not allowed to prevent injection of scripts or other malicious HTML code. The previous field named
``description_html`` is now super-seeded with the attribute ``description_md``. ``description_html`` is not supported anymore.
Custom form elements using the attribute ``custom_html_form`` was deprecated in version 2.8.0 and support was removed in 3.0.0.

Disabling Runtime Param Modification
------------------------------------
Expand Down
1 change: 1 addition & 0 deletions newsfragments/40029.significant.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Removed deprecated ``allow_raw_html_descriptions`` option from UI Trigger forms.
59 changes: 12 additions & 47 deletions tests/www/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
wrapped_markdown,
)
from airflow.www.widgets import AirflowDateTimePickerROWidget, BS3TextAreaROWidget, BS3TextFieldROWidget
from tests.test_utils.config import conf_vars


class TestUtils:
Expand Down Expand Up @@ -504,53 +503,19 @@ def test_wrapped_markdown_with_nested_list(self):
== rendered
)

def test_wrapped_markdown_with_collapsible_section(self):
with conf_vars({("webserver", "allow_raw_html_descriptions"): "true"}):
rendered = wrapped_markdown(
"""
# A collapsible section with markdown
<details>
<summary>Click to expand!</summary>
## Heading
1. A numbered
2. list
* With some
* Sub bullets
</details>
"""
)

assert (
"""<div class="rich_doc" ><h1>A collapsible section with markdown</h1>
<details>
<summary>Click to expand!</summary>
<h2>Heading</h2>
<ol>
<li>A numbered</li>
<li>list
<ul>
<li>With some</li>
<li>Sub bullets</li>
</ul>
</li>
</ol>
</details>
</div>"""
== rendered
)

@pytest.mark.parametrize("allow_html", [False, True])
def test_wrapped_markdown_with_raw_html(self, allow_html):
with conf_vars({("webserver", "allow_raw_html_descriptions"): str(allow_html)}):
HTML = "test <code>raw HTML</code>"
rendered = wrapped_markdown(HTML)
if allow_html:
assert HTML in rendered
else:
from markupsafe import escape
@pytest.mark.parametrize(
"html",
[
"test <code>raw HTML</code>",
"hidden <script>alert(1)</script> nuggets.",
],
)
def test_wrapped_markdown_with_raw_html(self, html):
"""Ensure that HTML code is not ending-up in markdown but is always escaped."""
from markupsafe import escape

assert escape(HTML) in rendered
rendered = wrapped_markdown(html)
assert escape(html) in rendered

@pytest.mark.parametrize(
"dag_run,expected_val",
Expand Down
51 changes: 1 addition & 50 deletions tests/www/views/test_views_trigger_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from airflow.utils.types import DagRunType
from tests.test_utils.api_connexion_utils import create_test_client
from tests.test_utils.config import conf_vars
from tests.test_utils.www import check_content_in_response, check_content_not_in_response
from tests.test_utils.www import check_content_in_response

pytestmark = pytest.mark.db_test

Expand Down Expand Up @@ -273,55 +273,6 @@ def test_trigger_dag_params_render(admin_client, dag_maker, session, app, monkey
)


@pytest.mark.parametrize("allow_html", [False, True])
def test_trigger_dag_html_allow(admin_client, dag_maker, session, app, monkeypatch, allow_html):
"""
Test that HTML is escaped per default in description.
"""
from markupsafe import escape

DAG_ID = "params_dag"
HTML_DESCRIPTION1 = "HTML <code>raw code</code>."
HTML_DESCRIPTION2 = "HTML <code>in md text</code>."
expect_escape = not allow_html
with conf_vars({("webserver", "allow_raw_html_descriptions"): str(allow_html)}):
param1 = Param(
42,
description_html=HTML_DESCRIPTION1,
type="integer",
minimum=1,
maximum=100,
)
param2 = Param(
42,
description_md=HTML_DESCRIPTION2,
type="integer",
minimum=1,
maximum=100,
)
with monkeypatch.context() as m:
with dag_maker(
dag_id=DAG_ID, serialized=True, session=session, params={"param1": param1, "param2": param2}
):
EmptyOperator(task_id="task1")

m.setattr(app, "dag_bag", dag_maker.dagbag)
resp = admin_client.get(f"dags/{DAG_ID}/trigger")

if expect_escape:
check_content_in_response(escape(HTML_DESCRIPTION1), resp)
check_content_in_response(escape(HTML_DESCRIPTION2), resp)
check_content_in_response(
"At least one field in the trigger form uses a raw HTML form definition.", resp
)
else:
check_content_in_response(HTML_DESCRIPTION1, resp)
check_content_in_response(HTML_DESCRIPTION2, resp)
check_content_not_in_response(
"At least one field in the trigger form uses a raw HTML form definition.", resp
)


def test_trigger_endpoint_uses_existing_dagbag(admin_client):
"""
Test that Trigger Endpoint uses the DagBag already created in views.py
Expand Down

0 comments on commit 23e9716

Please sign in to comment.