From e01379635b1a6c2a27782c2a5aa3a08352c56007 Mon Sep 17 00:00:00 2001 From: Simon Hughesdon Date: Thu, 5 Sep 2019 15:17:49 +0100 Subject: [PATCH 1/2] Switch guides to emit faq structured data This potentially gives us more control over the presentation of the result in external search applications. Related to https://github.com/alphagov/govuk_publishing_components/pull/1087 --- app/views/content_items/guide.html.erb | 2 +- test/integration/guide_test.rb | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/app/views/content_items/guide.html.erb b/app/views/content_items/guide.html.erb index 5b19c4643..d1c30a3b9 100644 --- a/app/views/content_items/guide.html.erb +++ b/app/views/content_items/guide.html.erb @@ -1,6 +1,6 @@ <% content_for :extra_head_content do %> <%= machine_readable_metadata( - schema: :article, + schema: :faq, canonical_url: @content_item.canonical_url ) %> <% end %> diff --git a/test/integration/guide_test.rb b/test/integration/guide_test.rb index 90c634f4a..227e18cc5 100644 --- a/test/integration/guide_test.rb +++ b/test/integration/guide_test.rb @@ -76,4 +76,14 @@ class GuideTest < ActionDispatch::IntegrationTest assert_has_component_title(title) end + + test "guides show the faq page schema" do + setup_and_visit_content_item('guide') + + schema_sections = page.find_all("script[type='application/ld+json']", visible: false) + schemas = schema_sections.map { |section| JSON.parse(section.text(:all)) } + + qa_page_schema = schemas.detect { |schema| schema["@type"] == "FAQPage" } + assert_equal qa_page_schema["headline"], @content_item['title'] + end end From 4efa9de908b0563d9272d43653c179a4a293b0b9 Mon Sep 17 00:00:00 2001 From: Simon Hughesdon Date: Thu, 5 Sep 2019 15:54:23 +0100 Subject: [PATCH 2/2] Allow spidering I only put this in two days ago :facepalm: I need this to be allowed in order to review structured data in checkers such as https://search.google.com/structured-data/testing-tool/u/0/ and https://search.google.com/test/rich-results This only affects review apps as this robots.txt is not publicly accessible from production-like environments We noindex, nofollow all the content pages in Heroku review apps, so I think we're safe to allow robots to access these pages. --- public/robots.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/robots.txt b/public/robots.txt index f6ca098a0..181e6f3ff 100644 --- a/public/robots.txt +++ b/public/robots.txt @@ -1,5 +1,5 @@ # See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file # # To ban all spiders from the entire site uncomment the next two lines: -User-agent: * -Disallow: / +# User-agent: * +# Disallow: /