Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: investigate file authenticity #7331

Merged
merged 3 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions ietf/doc/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,3 +266,25 @@ def clean(self):
@staticmethod
def valid_resource_tags():
return ExtResourceName.objects.all().order_by('slug').values_list('slug', flat=True)

class InvestigateForm(forms.Form):
name_fragment = forms.CharField(
label="File name or fragment to investigate",
required=True,
help_text=(
"Enter a filename such as draft-ietf-some-draft-00.txt or a fragment like draft-ietf-some-draft using at least 8 characters. The search will also work for files that are not necessarily drafts."
),
)

def clean_name_fragment(self):
disallowed_characters = ["%", "/", "\\", "*"]
name_fragment = self.cleaned_data["name_fragment"]
# Manual inspection of the directories at the time of this writing shows
# looking for files with less than 8 characters in the name is not useful
# Requiring this will help protect against the secretariat unintentionally
# matching every draft.
if len(name_fragment) < 8:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you put min_length=8 on the CharField, it'll validate for you and (I think) catch it client-side with HTML validation

raise ValidationError("Please enter at least 8 characters")
if any([c in name_fragment for c in disallowed_characters]):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Completely trivial gains here, but if you leave out the [] inside the any(), then this will use generators instead of a list. That'll let the any() short-circuit if it hits a True value.

raise ValidationError(f"The following characters are disallowed: {', '.join(disallowed_characters)}")
return name_fragment
30 changes: 30 additions & 0 deletions ietf/doc/templatetags/ietf_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import datetime
import re
from pathlib import Path
from urllib.parse import urljoin
from zoneinfo import ZoneInfo

Expand Down Expand Up @@ -899,3 +900,32 @@ def simple_history_delta_change_cnt(history):
delta = history.diff_against(prev)
return len(delta.changes)
return 0

@register.filter
def mtime(path):
"""Returns a datetime object representing mtime given a pathlib Path object"""
return datetime.datetime.fromtimestamp(path.stat().st_mtime).astimezone(ZoneInfo(settings.TIME_ZONE))

@register.filter
def url_for_path(path):
"""Consructs a 'best' URL for web access to the given pathlib Path object.

Assumes that the path is into the Internet-Draft archive or the proceedings.
"""
if path.match(f"{settings.AGENDA_PATH}/**/*"):
return (
f"https://www.ietf.org/proceedings/{path.relative_to(settings.AGENDA_PATH)}"
)
elif any(
[
pathdir in path.parents
for pathdir in [
Path(settings.INTERNET_DRAFT_PATH),
Path(settings.INTERNET_DRAFT_ARCHIVE_DIR).parent,
Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR),
]
]
):
return f"{settings.IETF_ID_ARCHIVE_URL}{path.name}"
else:
return "#"
136 changes: 135 additions & 1 deletion ietf/doc/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
StatusChangeFactory, DocExtResourceFactory, RgDraftFactory, BcpFactory)
from ietf.doc.forms import NotifyForm
from ietf.doc.fields import SearchableDocumentsField
from ietf.doc.utils import create_ballot_if_not_open, uppercase_std_abbreviated_name, DraftAliasGenerator
from ietf.doc.utils import create_ballot_if_not_open, investigate_fragment, uppercase_std_abbreviated_name, DraftAliasGenerator
from ietf.group.models import Group, Role
from ietf.group.factories import GroupFactory, RoleFactory
from ietf.ipr.factories import HolderIprDisclosureFactory
Expand Down Expand Up @@ -3141,3 +3141,137 @@ def test_state_index(self):
if not '-' in name:
self.assertIn(name, content)

class InvestigateTests(TestCase):
settings_temp_path_overrides = TestCase.settings_temp_path_overrides + [
"AGENDA_PATH",
# "INTERNET_DRAFT_PATH",
# "INTERNET_DRAFT_ARCHIVE_DIR",
# "INTERNET_ALL_DRAFTS_ARCHIVE_DIR",
]

def setUp(self):
super().setUp()
# Contort the draft archive dir temporary replacement
# to match the "collections" concept
archive_tmp_dir = Path(settings.INTERNET_DRAFT_ARCHIVE_DIR)
new_archive_dir = archive_tmp_dir / "draft-archive"
new_archive_dir.mkdir()
settings.INTERNET_DRAFT_ARCHIVE_DIR = str(new_archive_dir)
donated_personal_copy_dir = archive_tmp_dir / "donated-personal-copy"
donated_personal_copy_dir.mkdir()
meeting_dir = Path(settings.AGENDA_PATH) / "666"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😈

meeting_dir.mkdir()
all_archive_dir = Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR)
repository_dir = Path(settings.INTERNET_DRAFT_PATH)

for path in [repository_dir, all_archive_dir]:
(path / "draft-this-is-active-00.txt").touch()
for path in [new_archive_dir, all_archive_dir]:
(path / "draft-old-but-can-authenticate-00.txt").touch()
(path / "draft-has-mixed-provenance-01.txt").touch()
for path in [donated_personal_copy_dir, all_archive_dir]:
(path / "draft-donated-from-a-personal-collection-00.txt").touch()
(path / "draft-has-mixed-provenance-00.txt").touch()
(path / "draft-has-mixed-provenance-00.txt.Z").touch()
(all_archive_dir / "draft-this-should-not-be-possible-00.txt").touch()
(meeting_dir / "draft-this-predates-the-archive-00.txt").touch()

def test_investigate_fragment(self):

result = investigate_fragment("this-is-active")
self.assertEqual(len(result["can_verify"]), 1)
self.assertEqual(len(result["unverifiable_collections"]), 0)
self.assertEqual(len(result["unexpected"]), 0)
self.assertEqual(
list(result["can_verify"])[0].name, "draft-this-is-active-00.txt"
)

result = investigate_fragment("old-but-can")
self.assertEqual(len(result["can_verify"]), 1)
self.assertEqual(len(result["unverifiable_collections"]), 0)
self.assertEqual(len(result["unexpected"]), 0)
self.assertEqual(
list(result["can_verify"])[0].name, "draft-old-but-can-authenticate-00.txt"
)

result = investigate_fragment("predates")
self.assertEqual(len(result["can_verify"]), 1)
self.assertEqual(len(result["unverifiable_collections"]), 0)
self.assertEqual(len(result["unexpected"]), 0)
self.assertEqual(
list(result["can_verify"])[0].name, "draft-this-predates-the-archive-00.txt"
)

result = investigate_fragment("personal-collection")
self.assertEqual(len(result["can_verify"]), 0)
self.assertEqual(len(result["unverifiable_collections"]), 1)
self.assertEqual(len(result["unexpected"]), 0)
self.assertEqual(
list(result["unverifiable_collections"])[0].name,
"draft-donated-from-a-personal-collection-00.txt",
)

result = investigate_fragment("mixed-provenance")
self.assertEqual(len(result["can_verify"]), 1)
self.assertEqual(len(result["unverifiable_collections"]), 2)
self.assertEqual(len(result["unexpected"]), 0)
self.assertEqual(
list(result["can_verify"])[0].name, "draft-has-mixed-provenance-01.txt"
)
self.assertEqual(
set([p.name for p in result["unverifiable_collections"]]),
set(
[
"draft-has-mixed-provenance-00.txt",
"draft-has-mixed-provenance-00.txt.Z",
]
),
)

result = investigate_fragment("not-be-possible")
self.assertEqual(len(result["can_verify"]), 0)
self.assertEqual(len(result["unverifiable_collections"]), 0)
self.assertEqual(len(result["unexpected"]), 1)
self.assertEqual(
list(result["unexpected"])[0].name,
"draft-this-should-not-be-possible-00.txt",
)

def test_investigate(self):
url = urlreverse("ietf.doc.views_doc.investigate")
login_testing_unauthorized(self, "secretary", url)
r = self.client.get(url)
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("form#investigate")), 1)
self.assertEqual(len(q("div#results")), 0)
r = self.client.post(url, dict(name_fragment="this-is-not-found"))
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("div#results")), 1)
self.assertEqual(len(q("table#authenticated")), 0)
self.assertEqual(len(q("table#unverifiable")), 0)
self.assertEqual(len(q("table#unexpected")), 0)
r = self.client.post(url, dict(name_fragment="mixed-provenance"))
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("div#results")), 1)
self.assertEqual(len(q("table#authenticated")), 1)
self.assertEqual(len(q("table#unverifiable")), 1)
self.assertEqual(len(q("table#unexpected")), 0)
r = self.client.post(url, dict(name_fragment="not-be-possible"))
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("div#results")), 1)
self.assertEqual(len(q("table#authenticated")), 0)
self.assertEqual(len(q("table#unverifiable")), 0)
self.assertEqual(len(q("table#unexpected")), 1)
r = self.client.post(url, dict(name_fragment="short"))
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)
for char in ["*", "%", "/", "\\"]:
r = self.client.post(url, dict(name_fragment=f"bad{char}character"))
self.assertEqual(r.status_code, 200)
q = PyQuery(r.content)
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)
5 changes: 4 additions & 1 deletion ietf/doc/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@
r"^shepherdwriteup-template/(?P<type>\w+)/?$",
views_doc.document_shepherd_writeup_template,
),
url(r'^investigate/?$', views_doc.investigate),


url(r'^stats/newrevisiondocevent/?$', views_stats.chart_newrevisiondocevent),
url(r'^stats/newrevisiondocevent/conf/?$', views_stats.chart_conf_newrevisiondocevent),
Expand Down Expand Up @@ -179,7 +181,8 @@
url(r'^%(name)s/session/' % settings.URL_REGEXPS, include('ietf.doc.urls_material')),
url(r'^(?P<name>[A-Za-z0-9._+-]+)/session/', include(session_patterns)),
url(r'^(?P<name>[A-Za-z0-9\._\+\-]+)$', views_search.search_for_name),
# latest versions - keep old URLs alive during migration period
# rfcdiff - latest versions - keep old URLs alive during migration period
url(r'^rfcdiff-latest-json/%(name)s(?:-%(rev)s)?(\.txt|\.html)?/?$' % settings.URL_REGEXPS, RedirectView.as_view(pattern_name='ietf.api.views.rfcdiff_latest_json', permanent=True)),
url(r'^rfcdiff-latest-json/(?P<name>[Rr][Ff][Cc] [0-9]+?)(\.txt|\.html)?/?$', RedirectView.as_view(pattern_name='ietf.api.views.rfcdiff_latest_json', permanent=True)),
# end of rfcdiff support URLs
]
27 changes: 27 additions & 0 deletions ietf/doc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from collections import defaultdict, namedtuple, Counter
from dataclasses import dataclass
from pathlib import Path
from typing import Iterator, Union
from zoneinfo import ZoneInfo

Expand Down Expand Up @@ -1382,3 +1383,29 @@ def __iter__(self) -> Iterator[tuple[str, list[str]]]:
# .all = everything from above
if all:
yield alias + ".all", list(all)

def investigate_fragment(name_fragment):
can_verify = set()
for root in [settings.INTERNET_DRAFT_PATH, settings.INTERNET_DRAFT_ARCHIVE_DIR]:
can_verify.update(list(Path(root).glob(f"*{name_fragment}*")))

can_verify.update(list(Path(settings.AGENDA_PATH).glob(f"**/*{name_fragment}*")))

# N.B. This reflects the assumption that the internet draft archive dir is in the
# a directory with other collections (at /a/ietfdata/draft/collections as this is written)
unverifiable_collections = set(
Path(settings.INTERNET_DRAFT_ARCHIVE_DIR).parent.glob(f"**/*{name_fragment}*")
)
unverifiable_collections.difference_update(can_verify)

expected_names = set([p.name for p in can_verify.union(unverifiable_collections)])
maybe_unexpected = list(
Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR).glob(f"*{name_fragment}*")
)
unexpected = [p for p in maybe_unexpected if p.name not in expected_names]

return dict(
can_verify=can_verify,
unverifiable_collections=unverifiable_collections,
unexpected=unexpected,
)
17 changes: 15 additions & 2 deletions ietf/doc/views_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
IESG_BALLOT_ACTIVE_STATES, STATUSCHANGE_RELATIONS, DocumentActionHolder, DocumentAuthor,
RelatedDocument, RelatedDocHistory)
from ietf.doc.utils import (augment_events_with_revision,
can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id,
can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id, investigate_fragment,
needed_ballot_positions, nice_consensus, update_telechat, has_same_ballot,
get_initial_notify, make_notify_changed_event, make_rev_history, default_consensus,
add_events_message_info, get_unicode_document_content,
Expand All @@ -72,7 +72,7 @@
role_required, is_individual_draft_author, can_request_rfc_publication)
from ietf.name.models import StreamName, BallotPositionName
from ietf.utils.history import find_history_active_at
from ietf.doc.forms import TelechatForm, NotifyForm, ActionHoldersForm, DocAuthorForm, DocAuthorChangeBasisForm
from ietf.doc.forms import InvestigateForm, TelechatForm, NotifyForm, ActionHoldersForm, DocAuthorForm, DocAuthorChangeBasisForm
from ietf.doc.mails import email_comment, email_remind_action_holders
from ietf.mailtrigger.utils import gather_relevant_expansions
from ietf.meeting.models import Session, SessionPresentation
Expand Down Expand Up @@ -2254,3 +2254,16 @@ def idnits2_state(request, name, rev=None):
content_type="text/plain;charset=utf-8",
)

@role_required("Secretariat")
def investigate(request):
results = None
if request.method == "POST":
form = InvestigateForm(request.POST)
if form.is_valid():
name_fragment = form.cleaned_data["name_fragment"]
results = investigate_fragment(name_fragment)
else:
form = InvestigateForm()
return render(
request, "doc/investigate.html", context=dict(form=form, results=results)
)
Loading
Loading