From 8251b77599d535df067d2e822ba930d435c575fc Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Tue, 16 Apr 2024 12:41:55 -0500 Subject: [PATCH 1/2] feat: investigate file authenticity --- ietf/doc/forms.py | 22 +++++ ietf/doc/templatetags/ietf_filters.py | 30 ++++++ ietf/doc/tests.py | 136 +++++++++++++++++++++++++- ietf/doc/urls.py | 5 +- ietf/doc/utils.py | 27 +++++ ietf/doc/views_doc.py | 17 +++- ietf/templates/doc/investigate.html | 93 ++++++++++++++++++ 7 files changed, 326 insertions(+), 4 deletions(-) create mode 100644 ietf/templates/doc/investigate.html diff --git a/ietf/doc/forms.py b/ietf/doc/forms.py index 554451c564..5504d62160 100644 --- a/ietf/doc/forms.py +++ b/ietf/doc/forms.py @@ -266,3 +266,25 @@ def clean(self): @staticmethod def valid_resource_tags(): return ExtResourceName.objects.all().order_by('slug').values_list('slug', flat=True) + +class InvestigateForm(forms.Form): + name_fragment = forms.CharField( + label="File name or fragment to investigate", + required=True, + help_text=( + "Enter a filename such as draft-ietf-some-draft-00.txt or a fragment like draft-ietf-some-draft using at least 8 characters. The search will also work for files that are not necessarily drafts." + ), + ) + + def clean_name_fragment(self): + disallowed_characters = ["%", "/", "\\", "*"] + name_fragment = self.cleaned_data["name_fragment"] + # Manual inspection of the directories at the time of this writing shows + # looking for files with less than 8 characters in the name is not useful + # Requiring this will help protect against the secretariat unintentionally + # matching every draft. + if len(name_fragment) < 8: + raise ValidationError("Please enter at least 8 characters") + if any([c in name_fragment for c in disallowed_characters]): + raise ValidationError(f"The following characters are disallowed: {', '.join(disallowed_characters)}") + return name_fragment diff --git a/ietf/doc/templatetags/ietf_filters.py b/ietf/doc/templatetags/ietf_filters.py index a791aad383..4c200a4700 100644 --- a/ietf/doc/templatetags/ietf_filters.py +++ b/ietf/doc/templatetags/ietf_filters.py @@ -4,6 +4,7 @@ import datetime import re +from pathlib import Path from urllib.parse import urljoin from zoneinfo import ZoneInfo @@ -899,3 +900,32 @@ def simple_history_delta_change_cnt(history): delta = history.diff_against(prev) return len(delta.changes) return 0 + +@register.filter +def mtime(path): + """Returns a datetime object representing mtime given a pathlib Path object""" + return datetime.datetime.fromtimestamp(path.stat().st_mtime).astimezone(ZoneInfo(settings.TIME_ZONE)) + +@register.filter +def url_for_path(path): + """Consructs a 'best' URL for web access to the given pathlib Path object. + + Assumes that the path is into the Internet-Draft archive or the proceedings. + """ + if path.match(f"{settings.AGENDA_PATH}/**/*"): + return ( + f"https://www.ietf.org/proceedings/{path.relative_to(settings.AGENDA_PATH)}" + ) + elif any( + [ + pathdir in path.parents + for pathdir in [ + Path(settings.INTERNET_DRAFT_PATH), + Path(settings.INTERNET_DRAFT_ARCHIVE_DIR).parent, + Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR), + ] + ] + ): + return f"{settings.IETF_ID_ARCHIVE_URL}{path.name}" + else: + return "#" diff --git a/ietf/doc/tests.py b/ietf/doc/tests.py index d4a6504213..4f5492e6cb 100644 --- a/ietf/doc/tests.py +++ b/ietf/doc/tests.py @@ -45,7 +45,7 @@ StatusChangeFactory, DocExtResourceFactory, RgDraftFactory, BcpFactory) from ietf.doc.forms import NotifyForm from ietf.doc.fields import SearchableDocumentsField -from ietf.doc.utils import create_ballot_if_not_open, uppercase_std_abbreviated_name, DraftAliasGenerator +from ietf.doc.utils import create_ballot_if_not_open, investigate_fragment, uppercase_std_abbreviated_name, DraftAliasGenerator from ietf.group.models import Group, Role from ietf.group.factories import GroupFactory, RoleFactory from ietf.ipr.factories import HolderIprDisclosureFactory @@ -3141,3 +3141,137 @@ def test_state_index(self): if not '-' in name: self.assertIn(name, content) +class InvestigateTests(TestCase): + settings_temp_path_overrides = TestCase.settings_temp_path_overrides + [ + "AGENDA_PATH", + # "INTERNET_DRAFT_PATH", + # "INTERNET_DRAFT_ARCHIVE_DIR", + # "INTERNET_ALL_DRAFTS_ARCHIVE_DIR", + ] + + def setUp(self): + super().setUp() + # Contort the draft archive dir temporary replacement + # to match the "collections" concept + archive_tmp_dir = Path(settings.INTERNET_DRAFT_ARCHIVE_DIR) + new_archive_dir = archive_tmp_dir / "draft-archive" + new_archive_dir.mkdir() + settings.INTERNET_DRAFT_ARCHIVE_DIR = str(new_archive_dir) + donated_personal_copy_dir = archive_tmp_dir / "donated-personal-copy" + donated_personal_copy_dir.mkdir() + meeting_dir = Path(settings.AGENDA_PATH) / "666" + meeting_dir.mkdir() + all_archive_dir = Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR) + repository_dir = Path(settings.INTERNET_DRAFT_PATH) + + for path in [repository_dir, all_archive_dir]: + (path / "draft-this-is-active-00.txt").touch() + for path in [new_archive_dir, all_archive_dir]: + (path / "draft-old-but-can-authenticate-00.txt").touch() + (path / "draft-has-mixed-provenance-01.txt").touch() + for path in [donated_personal_copy_dir, all_archive_dir]: + (path / "draft-donated-from-a-personal-collection-00.txt").touch() + (path / "draft-has-mixed-provenance-00.txt").touch() + (path / "draft-has-mixed-provenance-00.txt.Z").touch() + (all_archive_dir / "draft-this-should-not-be-possible-00.txt").touch() + (meeting_dir / "draft-this-predates-the-archive-00.txt").touch() + + def test_investigate_fragment(self): + + result = investigate_fragment("this-is-active") + self.assertEqual(len(result["can_verify"]), 1) + self.assertEqual(len(result["unverifiable_collections"]), 0) + self.assertEqual(len(result["unexpected"]), 0) + self.assertEqual( + list(result["can_verify"])[0].name, "draft-this-is-active-00.txt" + ) + + result = investigate_fragment("old-but-can") + self.assertEqual(len(result["can_verify"]), 1) + self.assertEqual(len(result["unverifiable_collections"]), 0) + self.assertEqual(len(result["unexpected"]), 0) + self.assertEqual( + list(result["can_verify"])[0].name, "draft-old-but-can-authenticate-00.txt" + ) + + result = investigate_fragment("predates") + self.assertEqual(len(result["can_verify"]), 1) + self.assertEqual(len(result["unverifiable_collections"]), 0) + self.assertEqual(len(result["unexpected"]), 0) + self.assertEqual( + list(result["can_verify"])[0].name, "draft-this-predates-the-archive-00.txt" + ) + + result = investigate_fragment("personal-collection") + self.assertEqual(len(result["can_verify"]), 0) + self.assertEqual(len(result["unverifiable_collections"]), 1) + self.assertEqual(len(result["unexpected"]), 0) + self.assertEqual( + list(result["unverifiable_collections"])[0].name, + "draft-donated-from-a-personal-collection-00.txt", + ) + + result = investigate_fragment("mixed-provenance") + self.assertEqual(len(result["can_verify"]), 1) + self.assertEqual(len(result["unverifiable_collections"]), 2) + self.assertEqual(len(result["unexpected"]), 0) + self.assertEqual( + list(result["can_verify"])[0].name, "draft-has-mixed-provenance-01.txt" + ) + self.assertEqual( + set([p.name for p in result["unverifiable_collections"]]), + set( + [ + "draft-has-mixed-provenance-00.txt", + "draft-has-mixed-provenance-00.txt.Z", + ] + ), + ) + + result = investigate_fragment("not-be-possible") + self.assertEqual(len(result["can_verify"]), 0) + self.assertEqual(len(result["unverifiable_collections"]), 0) + self.assertEqual(len(result["unexpected"]), 1) + self.assertEqual( + list(result["unexpected"])[0].name, + "draft-this-should-not-be-possible-00.txt", + ) + + def test_investigate(self): + url = urlreverse("ietf.doc.views_doc.investigate") + login_testing_unauthorized(self, "secretary", url) + r = self.client.get(url) + self.assertEqual(r.status_code, 200) + q = PyQuery(r.content) + self.assertEqual(len(q("form#investigate")), 1) + self.assertEqual(len(q("div#results")), 0) + r = self.client.post(url, dict(name_fragment="this-is-not-found")) + self.assertEqual(r.status_code, 200) + q = PyQuery(r.content) + self.assertEqual(len(q("div#results")), 1) + self.assertEqual(len(q("table#authenticated")), 0) + self.assertEqual(len(q("table#unverifiable")), 0) + self.assertEqual(len(q("table#unexpected")), 0) + r = self.client.post(url, dict(name_fragment="mixed-provenance")) + self.assertEqual(r.status_code, 200) + q = PyQuery(r.content) + self.assertEqual(len(q("div#results")), 1) + self.assertEqual(len(q("table#authenticated")), 1) + self.assertEqual(len(q("table#unverifiable")), 1) + self.assertEqual(len(q("table#unexpected")), 0) + r = self.client.post(url, dict(name_fragment="not-be-possible")) + self.assertEqual(r.status_code, 200) + q = PyQuery(r.content) + self.assertEqual(len(q("div#results")), 1) + self.assertEqual(len(q("table#authenticated")), 0) + self.assertEqual(len(q("table#unverifiable")), 0) + self.assertEqual(len(q("table#unexpected")), 1) + r = self.client.post(url, dict(name_fragment="short")) + self.assertEqual(r.status_code, 200) + q = PyQuery(r.content) + self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1) + for char in ["*", "%", "/", "\\"]: + r = self.client.post(url, dict(name_fragment=f"bad{char}character")) + self.assertEqual(r.status_code, 200) + q = PyQuery(r.content) + self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1) diff --git a/ietf/doc/urls.py b/ietf/doc/urls.py index d29fd9da14..7d930f7f40 100644 --- a/ietf/doc/urls.py +++ b/ietf/doc/urls.py @@ -66,6 +66,8 @@ r"^shepherdwriteup-template/(?P\w+)/?$", views_doc.document_shepherd_writeup_template, ), + url(r'^investigate/?$', views_doc.investigate), + url(r'^stats/newrevisiondocevent/?$', views_stats.chart_newrevisiondocevent), url(r'^stats/newrevisiondocevent/conf/?$', views_stats.chart_conf_newrevisiondocevent), @@ -179,7 +181,8 @@ url(r'^%(name)s/session/' % settings.URL_REGEXPS, include('ietf.doc.urls_material')), url(r'^(?P[A-Za-z0-9._+-]+)/session/', include(session_patterns)), url(r'^(?P[A-Za-z0-9\._\+\-]+)$', views_search.search_for_name), - # latest versions - keep old URLs alive during migration period + # rfcdiff - latest versions - keep old URLs alive during migration period url(r'^rfcdiff-latest-json/%(name)s(?:-%(rev)s)?(\.txt|\.html)?/?$' % settings.URL_REGEXPS, RedirectView.as_view(pattern_name='ietf.api.views.rfcdiff_latest_json', permanent=True)), url(r'^rfcdiff-latest-json/(?P[Rr][Ff][Cc] [0-9]+?)(\.txt|\.html)?/?$', RedirectView.as_view(pattern_name='ietf.api.views.rfcdiff_latest_json', permanent=True)), + # end of rfcdiff support URLs ] diff --git a/ietf/doc/utils.py b/ietf/doc/utils.py index 4872f41fc4..8ec3985b8b 100644 --- a/ietf/doc/utils.py +++ b/ietf/doc/utils.py @@ -13,6 +13,7 @@ from collections import defaultdict, namedtuple, Counter from dataclasses import dataclass +from pathlib import Path from typing import Iterator, Union from zoneinfo import ZoneInfo @@ -1382,3 +1383,29 @@ def __iter__(self) -> Iterator[tuple[str, list[str]]]: # .all = everything from above if all: yield alias + ".all", list(all) + +def investigate_fragment(name_fragment): + can_verify = set() + for root in [settings.INTERNET_DRAFT_PATH, settings.INTERNET_DRAFT_ARCHIVE_DIR]: + can_verify.update(list(Path(root).glob(f"*{name_fragment}*"))) + + can_verify.update(list(Path(settings.AGENDA_PATH).glob(f"**/*{name_fragment}*"))) + + # N.B. This reflects the assumption that the internet draft archive dir is in the + # a directory with other collections (at /a/ietfdata/draft/collections as this is written) + unverifiable_collections = set( + Path(settings.INTERNET_DRAFT_ARCHIVE_DIR).parent.glob(f"**/*{name_fragment}*") + ) + unverifiable_collections.difference_update(can_verify) + + expected_names = set([p.name for p in can_verify.union(unverifiable_collections)]) + maybe_unexpected = list( + Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR).glob(f"*{name_fragment}*") + ) + unexpected = [p for p in maybe_unexpected if p.name not in expected_names] + + return dict( + can_verify=can_verify, + unverifiable_collections=unverifiable_collections, + unexpected=unexpected, + ) diff --git a/ietf/doc/views_doc.py b/ietf/doc/views_doc.py index a94af21e61..021d5645d9 100644 --- a/ietf/doc/views_doc.py +++ b/ietf/doc/views_doc.py @@ -58,7 +58,7 @@ IESG_BALLOT_ACTIVE_STATES, STATUSCHANGE_RELATIONS, DocumentActionHolder, DocumentAuthor, RelatedDocument, RelatedDocHistory) from ietf.doc.utils import (augment_events_with_revision, - can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id, + can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id, investigate_fragment, needed_ballot_positions, nice_consensus, update_telechat, has_same_ballot, get_initial_notify, make_notify_changed_event, make_rev_history, default_consensus, add_events_message_info, get_unicode_document_content, @@ -72,7 +72,7 @@ role_required, is_individual_draft_author, can_request_rfc_publication) from ietf.name.models import StreamName, BallotPositionName from ietf.utils.history import find_history_active_at -from ietf.doc.forms import TelechatForm, NotifyForm, ActionHoldersForm, DocAuthorForm, DocAuthorChangeBasisForm +from ietf.doc.forms import InvestigateForm, TelechatForm, NotifyForm, ActionHoldersForm, DocAuthorForm, DocAuthorChangeBasisForm from ietf.doc.mails import email_comment, email_remind_action_holders from ietf.mailtrigger.utils import gather_relevant_expansions from ietf.meeting.models import Session, SessionPresentation @@ -2254,3 +2254,16 @@ def idnits2_state(request, name, rev=None): content_type="text/plain;charset=utf-8", ) +@role_required("Secretariat") +def investigate(request): + results = None + if request.method == "POST": + form = InvestigateForm(request.POST) + if form.is_valid(): + name_fragment = form.cleaned_data["name_fragment"] + results = investigate_fragment(name_fragment) + else: + form = InvestigateForm() + return render( + request, "doc/investigate.html", context=dict(form=form, results=results) + ) diff --git a/ietf/templates/doc/investigate.html b/ietf/templates/doc/investigate.html new file mode 100644 index 0000000000..80b004c838 --- /dev/null +++ b/ietf/templates/doc/investigate.html @@ -0,0 +1,93 @@ +{% extends "base.html" %} +{# Copyright The IETF Trust 2024, All Rights Reserved #} +{% load django_bootstrap5 ietf_filters origin static %} +{% block title %}Investigate{% endblock %} +{% block pagehead %} + +{% endblock %} +{% block content %} + {% origin %} +

Investigate

+
+ {% csrf_token %} + {% bootstrap_form form %} + +
+ {% if results %} +
+ {% if results.can_verify %} +

These can be authenticated

+ + + + + + + + + + {% for path in results.can_verify %} + {% with url=path|url_for_path %} + + {% endwith %} + {% endfor %} + +
NameLast Modified OnLink
{{path.name}}{{path|mtime|date:"DATETIME_FORMAT"}}{{url}}
+ {% else %} +

Nothing with this name fragment can be authenticated

+ {% endif %} +
+ {% if results.unverifiable_collections %} +

These are in the archive, but cannot be authenticated

+ + + + + + + + + + + {% for path in results.unverifiable_collections %} + {% with url=path|url_for_path %} + + + + + + + {% endwith %} + {% endfor %} + +
NameLast Modified OnLinkSource
{{path.name}}{{path|mtime|date:"DATETIME_FORMAT"}}{{url}}{{path}}
+ {% endif %} + {% if results.unexpected %} +

These are unexpected and we do not know what their origin is. These cannot be authenticated

+ + + + + + + + + + {% for path in results.unexpected %} + {% with url=path|url_for_path %} + + + + + + {% endwith %} + {% endfor %} + +
NameLast Modified OnLink
{{path.name}}{{path|mtime|date:"DATETIME_FORMAT"}}{{url}}
+ {% endif %} +
+ {% endif %} +{% endblock %} +{% block js %} + +{% endblock %} \ No newline at end of file From 46ae41fb63a619278edc2f6001e8b0338e91ea59 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Tue, 23 Apr 2024 09:14:29 -0500 Subject: [PATCH 2/2] fix: use django-provided validation --- ietf/doc/forms.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ietf/doc/forms.py b/ietf/doc/forms.py index 5504d62160..f77b218318 100644 --- a/ietf/doc/forms.py +++ b/ietf/doc/forms.py @@ -274,6 +274,7 @@ class InvestigateForm(forms.Form): help_text=( "Enter a filename such as draft-ietf-some-draft-00.txt or a fragment like draft-ietf-some-draft using at least 8 characters. The search will also work for files that are not necessarily drafts." ), + min_length=8, ) def clean_name_fragment(self): @@ -283,8 +284,6 @@ def clean_name_fragment(self): # looking for files with less than 8 characters in the name is not useful # Requiring this will help protect against the secretariat unintentionally # matching every draft. - if len(name_fragment) < 8: - raise ValidationError("Please enter at least 8 characters") - if any([c in name_fragment for c in disallowed_characters]): + if any(c in name_fragment for c in disallowed_characters): raise ValidationError(f"The following characters are disallowed: {', '.join(disallowed_characters)}") return name_fragment