Skip to content

Commit

Permalink
feat: v3-era aware feed for rfcs (#5828)
Browse files Browse the repository at this point in the history
* feat: v3-era aware feed for rfcs

* chore: remove obviated comment

* test: improve rfc feed tests
  • Loading branch information
rjsparks authored Jun 16, 2023
1 parent ac65232 commit 3c016cc
Show file tree
Hide file tree
Showing 2 changed files with 174 additions and 62 deletions.
214 changes: 153 additions & 61 deletions ietf/doc/feeds.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
# Copyright The IETF Trust 2007-2020, All Rights Reserved
# -*- coding: utf-8 -*-

import debug # pyflakes:ignore

import datetime
import unicodedata

from django.contrib.syndication.views import Feed, FeedDoesNotExist
from django.utils.feedgenerator import Atom1Feed, Rss201rev2Feed
from django.urls import reverse as urlreverse
from django.template.defaultfilters import truncatewords, truncatewords_html, date as datefilter
from django.template.defaultfilters import linebreaks # type: ignore
from django.template.defaultfilters import (
truncatewords,
truncatewords_html,
date as datefilter,
)
from django.template.defaultfilters import linebreaks # type: ignore
from django.utils import timezone
from django.utils.html import strip_tags

Expand All @@ -21,12 +26,12 @@

def strip_control_characters(s):
"""Remove Unicode control / non-printing characters from a string"""
replacement_char = unicodedata.lookup('REPLACEMENT CHARACTER')
return ''.join(
replacement_char if unicodedata.category(c)[0] == 'C' else c
for c in s
replacement_char = unicodedata.lookup("REPLACEMENT CHARACTER")
return "".join(
replacement_char if unicodedata.category(c)[0] == "C" else c for c in s
)


class DocumentChangesFeed(Feed):
feed_type = Atom1Feed

Expand All @@ -39,25 +44,37 @@ def title(self, obj):
def link(self, obj):
if obj is None:
raise FeedDoesNotExist
return urlreverse('ietf.doc.views_doc.document_history', kwargs=dict(name=obj.canonical_name()))
return urlreverse(
"ietf.doc.views_doc.document_history",
kwargs=dict(name=obj.canonical_name()),
)

def subtitle(self, obj):
return "History of change entries for %s." % obj.display_name()

def items(self, obj):
events = obj.docevent_set.all().order_by("-time","-id").select_related("by", "newrevisiondocevent", "submissiondocevent")
events = (
obj.docevent_set.all()
.order_by("-time", "-id")
.select_related("by", "newrevisiondocevent", "submissiondocevent")
)
augment_events_with_revision(obj, events)
return events

def item_title(self, item):
return strip_control_characters("[%s] %s [rev. %s]" % (
item.by,
truncatewords(strip_tags(item.desc), 15),
item.rev,
))
return strip_control_characters(
"[%s] %s [rev. %s]"
% (
item.by,
truncatewords(strip_tags(item.desc), 15),
item.rev,
)
)

def item_description(self, item):
return strip_control_characters(truncatewords_html(format_textarea(item.desc), 20))
return strip_control_characters(
truncatewords_html(format_textarea(item.desc), 20)
)

def item_pubdate(self, item):
return item.time
Expand All @@ -66,17 +83,28 @@ def item_author_name(self, item):
return str(item.by)

def item_link(self, item):
return urlreverse('ietf.doc.views_doc.document_history', kwargs=dict(name=item.doc.canonical_name())) + "#history-%s" % item.pk
return (
urlreverse(
"ietf.doc.views_doc.document_history",
kwargs=dict(name=item.doc.canonical_name()),
)
+ "#history-%s" % item.pk
)


class InLastCallFeed(Feed):
title = "Documents in Last Call"
subtitle = "Announcements for documents in last call."
feed_type = Atom1Feed
author_name = 'IESG Secretary'
author_name = "IESG Secretary"
link = "/doc/iesg/last-call/"

def items(self):
docs = list(Document.objects.filter(type="draft", states=State.objects.get(type="draft-iesg", slug="lc")))
docs = list(
Document.objects.filter(
type="draft", states=State.objects.get(type="draft-iesg", slug="lc")
)
)
for d in docs:
d.lc_event = d.latest_event(LastCallDocEvent, type="sent_last_call")

Expand All @@ -86,99 +114,164 @@ def items(self):
return docs

def item_title(self, item):
return "%s (%s - %s)" % (item.name,
datefilter(item.lc_event.time, "F j"),
datefilter(item.lc_event.expires, "F j, Y"))
return "%s (%s - %s)" % (
item.name,
datefilter(item.lc_event.time, "F j"),
datefilter(item.lc_event.expires, "F j, Y"),
)

def item_description(self, item):
return strip_control_characters(linebreaks(item.lc_event.desc))

def item_pubdate(self, item):
return item.lc_event.time


class Rss201WithNamespacesFeed(Rss201rev2Feed):
def root_attributes(self):
attrs = super(Rss201WithNamespacesFeed, self).root_attributes()
attrs['xmlns:dcterms'] = 'http://purl.org/dc/terms/'
attrs['xmlns:media'] = 'http://search.yahoo.com/mrss/'
attrs['xmlns:xsi'] = 'http://www.w3.org/2001/XMLSchema-instance'
attrs["xmlns:dcterms"] = "http://purl.org/dc/terms/"
attrs["xmlns:media"] = "http://search.yahoo.com/mrss/"
attrs["xmlns:xsi"] = "http://www.w3.org/2001/XMLSchema-instance"
return attrs

def add_item_elements(self, handler, item):
super(Rss201WithNamespacesFeed, self).add_item_elements(handler, item)

for element_name in ['abstract','accessRights', 'format', 'publisher',]:
dc_item_name = 'dcterms_%s' % element_name
dc_element_name = 'dcterms:%s' % element_name
attrs= {'xsi:type':'dcterms:local'} if element_name == 'publisher' else {}
for element_name in [
"abstract",
"accessRights",
"format",
"publisher",
]:
dc_item_name = "dcterms_%s" % element_name
dc_element_name = "dcterms:%s" % element_name
attrs = {"xsi:type": "dcterms:local"} if element_name == "publisher" else {}
if dc_item_name in item and item[dc_item_name] is not None:
handler.addQuickElement(dc_element_name,item[dc_item_name],attrs)
handler.addQuickElement(dc_element_name, item[dc_item_name], attrs)

if "doi" in item and item["doi"] is not None:
handler.addQuickElement(
"dcterms:identifier", item["doi"], {"xsi:type": "dcterms:doi"}
)
if "doiuri" in item and item["doiuri"] is not None:
handler.addQuickElement(
"dcterms:identifier", item["doiuri"], {"xsi:type": "dcterms:uri"}
)

# TODO: consider using media:group
if "media_contents" in item and item["media_contents"] is not None:
for media_content in item["media_contents"]:
handler.startElement(
"media:content",
{
"url": media_content["url"],
"type": media_content["media_type"],
},
)
if "is_format_of" in media_content:
handler.addQuickElement(
"dcterms:isFormatOf", media_content["is_format_of"]
)
handler.endElement("media:content")

if 'doi' in item and item['doi'] is not None:
handler.addQuickElement('dcterms:identifier',item['doi'],{'xsi:type':'dcterms:doi'})
if 'doiuri' in item and item['doiuri'] is not None:
handler.addQuickElement('dcterms:identifier',item['doiuri'],{'xsi:type':'dcterms:uri'})

if 'media_content' in item and item['media_content'] is not None:
handler.startElement('media:content',{'url':item['media_content']['url'],'type':'text/plain'})
handler.addQuickElement('dcterms:isFormatOf',item['media_content']['link_url'])
handler.endElement('media:content')

class RfcFeed(Feed):
feed_type = Rss201WithNamespacesFeed
title = "RFCs"
author_name = "RFC Editor"
link = "https://www.rfc-editor.org/rfc-index2.html"

def get_object(self,request,year=None):
def get_object(self, request, year=None):
self.year = year

def items(self):
if self.year:
# Find published RFCs based on their official publication year
start_of_year = datetime.datetime(int(self.year), 1, 1, tzinfo=RPC_TZINFO)
start_of_next_year = datetime.datetime(int(self.year) + 1, 1, 1, tzinfo=RPC_TZINFO)
start_of_next_year = datetime.datetime(
int(self.year) + 1, 1, 1, tzinfo=RPC_TZINFO
)
rfc_events = DocEvent.objects.filter(
type='published_rfc',
type="published_rfc",
time__gte=start_of_year,
time__lt=start_of_next_year,
).order_by('-time')
).order_by("-time")
else:
cutoff = timezone.now() - datetime.timedelta(days=8)
rfc_events = DocEvent.objects.filter(type='published_rfc',time__gte=cutoff).order_by('-time')
rfc_events = DocEvent.objects.filter(
type="published_rfc", time__gte=cutoff
).order_by("-time")
results = [(e.doc, e.time) for e in rfc_events]
for doc,time in results:
for doc, time in results:
doc.publication_time = time
return [doc for doc,time in results]
return [doc for doc, time in results]

def item_title(self, item):
return "%s : %s" % (item.canonical_name(),item.title)
return "%s : %s" % (item.canonical_name(), item.title)

def item_description(self, item):
return item.abstract

def item_link(self, item):
return "https://rfc-editor.org/info/%s"%item.canonical_name()
return "https://rfc-editor.org/info/%s" % item.canonical_name()

def item_pubdate(self, item):
return item.publication_time

def item_extra_kwargs(self, item):
extra = super(RfcFeed, self).item_extra_kwargs(item)
extra.update({'dcterms_accessRights': 'gratis'})
extra.update({'dcterms_format': 'text/html'})
extra.update({'media_content': {'url': 'https://rfc-editor.org/rfc/%s.txt' % item.canonical_name(),
'link_url': self.item_link(item)
}
})
extra.update({'doi':'10.17487/%s' % item.canonical_name().upper()})
extra.update({'doiuri':'http://dx.doi.org/10.17487/%s' % item.canonical_name().upper()})

#TODO
extra.update({"dcterms_accessRights": "gratis"})
extra.update({"dcterms_format": "text/html"})
media_contents = []
if int(item.rfc_number()) < 8650:
if int(item.rfc_number()) not in [8, 9, 51, 418, 500, 530, 589]:
for fmt, media_type in [("txt", "text/plain"), ("html", "text/html")]:
media_contents.append(
{
"url": f"https://rfc-editor.org/rfc/{item.canonical_name()}.{fmt}",
"media_type": media_type,
"is_format_of": self.item_link(item),
}
)
if int(item.rfc_number()) not in [571, 587]:
media_contents.append(
{
"url": f"https://www.rfc-editor.org/rfc/pdfrfc/{item.canonical_name()}.txt.pdf",
"media_type": "application/pdf",
"is_format_of": self.item_link(item),
}
)
else:
media_contents.append(
{
"url": f"https://www.rfc-editor.org/rfc/{item.canonical_name()}.xml",
"media_type": "application/rfc+xml",
}
)
for fmt, media_type in [
("txt", "text/plain"),
("html", "text/html"),
("pdf", "application/pdf"),
]:
media_contents.append(
{
"url": f"https://rfc-editor.org/rfc/{item.canonical_name()}.{fmt}",
"media_type": media_type,
"is_format_of": f"https://www.rfc-editor.org/rfc/{item.canonical_name()}.xml",
}
)
extra.update({"media_contents": media_contents})

extra.update({"doi": "10.17487/%s" % item.canonical_name().upper()})
extra.update(
{"doiuri": "http://dx.doi.org/10.17487/%s" % item.canonical_name().upper()}
)

# R104 Publisher (Mandatory - but we need a string from them first)
extra.update({'dcterms_publisher':'rfc-editor.org'})
extra.update({"dcterms_publisher": "rfc-editor.org"})

#TODO MAYBE (Optional stuff)
# TODO MAYBE (Optional stuff)
# R108 License
# R115 Creator/Contributor (which would we use?)
# F305 Checksum (do they use it?) (or should we put the our digital signature in here somewhere?)
Expand All @@ -188,4 +281,3 @@ def item_extra_kwargs(self, item):
# R118 Keyword

return extra

22 changes: 21 additions & 1 deletion ietf/doc/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1911,11 +1911,31 @@ def test_last_call_feed(self):
self.assertContains(r, doc.name)

def test_rfc_feed(self):
WgRfcFactory()
rfc = WgRfcFactory(alias2__name="rfc9000")
DocEventFactory(doc=rfc, type="published_rfc")
r = self.client.get("/feed/rfc/")
self.assertTrue(r.status_code, 200)
q = PyQuery(r.content[39:]) # Strip off the xml declaration
self.assertEqual(len(q("item")), 1)
item = q("item")[0]
media_content = item.findall("{http://search.yahoo.com/mrss/}content")
self.assertEqual(len(media_content),4)
types = set([m.attrib["type"] for m in media_content])
self.assertEqual(types, set(["application/rfc+xml", "text/plain", "text/html", "application/pdf"]))
rfcs_2016 = WgRfcFactory.create_batch(3) # rfc numbers will be well below v3
for rfc in rfcs_2016:
e = DocEventFactory(doc=rfc, type="published_rfc")
e.time = e.time.replace(year=2016)
e.save()
r = self.client.get("/feed/rfc/2016")
self.assertTrue(r.status_code, 200)
q = PyQuery(r.content[39:])
self.assertEqual(len(q("item")), 3)
item = q("item")[0]
media_content = item.findall("{http://search.yahoo.com/mrss/}content")
self.assertEqual(len(media_content), 3)
types = set([m.attrib["type"] for m in media_content])
self.assertEqual(types, set(["text/plain", "text/html", "application/pdf"]))

def test_state_help(self):
url = urlreverse('ietf.doc.views_help.state_help', kwargs=dict(type="draft-iesg"))
Expand Down

0 comments on commit 3c016cc

Please sign in to comment.