diff --git a/kitsune/middleware.py b/kitsune/middleware.py index a1ec00508c3..be6c067086a 100644 --- a/kitsune/middleware.py +++ b/kitsune/middleware.py @@ -5,17 +5,12 @@ # # Flow: # -# 1. Check if the response is 404 and it's from a Lithium URL (/t5/*) +# 1. Checks if the response is 404 and it's from a Lithium URL (/t5/*) # 2. Checks cache, redirects if successful -# 3. Checks Lithium localized product_pages from LithiumRedirectionMiddleware.product_pages, redirects if successful -# 4. Fetches the page from Lithium and tries to extract title of the page. -# Use og:title instead of HTML Title because Lithium strips the later down to 50 chars or something. -# If it fails it returns 404 -# 5a. Try to find a Document with the same title, redirect if successful -# 5b. Try to find a Topic with the same title under a product. Product is also extracted by title. For non-English pages we need to do a reverse translation lookup from TOPICS -# 5c. Try to find a Product. For non-English pages we need to do a reverse translation lookup from PRODUCTS -# -# In all cases we store a successful redirect in the cache. +# 3. Checks popular Lithium pages from LithiumRedirectionMiddleware.extra_pages, +# redirects if successful +# 4. Checks Lithium localized product_pages from +# LithiumRedirectionMiddleware.product_pages, redirects if successful # In all cases we append ?cache=no in the redirect to bust the cache # All redirects are 302s # @@ -23,7 +18,6 @@ import hashlib import re -import urllib from django.core.cache import cache from django.conf import settings @@ -31,12 +25,8 @@ from django.utils.translation import pgettext from django.utils import translation -import requests -from raven.contrib.django.raven_compat.models import client as sentry_client - from kitsune.products.models import Product, Topic from kitsune.sumo.views import handle404 -from kitsune.wiki.models import Document # build a list of topics translated for reverse lookup @@ -79,18 +69,18 @@ class LithiumRedirectionMiddleware(): u'Mozilla-Support-Nederlands': '/nl/?cache=no', u'Apoio-da-Mozilla-Português': '/pt/?cache=no', u'Mozilla-Support-Svenska': '/sv/?cache=no', - 'Mozilla-\xe3\x82\xb5\xe3\x83\x9d\xe3\x83\xbc\xe3\x83\x88-\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e': '/ja/?cache=no', - 'Mozilla-\xe6\x8a\x80\xe8\xa1\x93\xe6\x94\xaf\xe6\x8f\xb4-\xe6\xad\xa3\xe9\xab\x94\xe4\xb8\xad\xe6\x96\x87-\xe7\xb9\x81\xe9\xab\x94': '/zh-TW/?cache=no', - 'Mozilla-\xeb\x8f\x84\xec\x9b\x80\xeb\xa7\x90-\xed\x95\x9c\xea\xb5\xad\xec\x96\xb4': '/ko/?cache=no', - 'Mozilla-\xe6\x8a\x80\xe6\x9c\xaf\xe6\x94\xaf\xe6\x8c\x81-\xe4\xb8\xad\xe6\x96\x87-\xe7\xae\x80\xe4\xbd\x93': '/zh-CN/?cache=no', - '\xd8\xaf\xd8\xb9\xd9\x85-\xd9\x81\xd9\x8e\xd9\x8a\xd9\x8e\xd8\xb1\xd9\x81\xd9\x8f\xd9\x83\xd8\xb3-\xd8\xb9\xd8\xb1\xd8\xa8\xd9\x8a': '/ar/?cache=no', + 'Mozilla-\xe3\x82\xb5\xe3\x83\x9d\xe3\x83\xbc\xe3\x83\x88-\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e': '/ja/?cache=no', # noqa + 'Mozilla-\xe6\x8a\x80\xe8\xa1\x93\xe6\x94\xaf\xe6\x8f\xb4-\xe6\xad\xa3\xe9\xab\x94\xe4\xb8\xad\xe6\x96\x87-\xe7\xb9\x81\xe9\xab\x94': '/zh-TW/?cache=no', # noqa + 'Mozilla-\xeb\x8f\x84\xec\x9b\x80\xeb\xa7\x90-\xed\x95\x9c\xea\xb5\xad\xec\x96\xb4': '/ko/?cache=no', # noqa + 'Mozilla-\xe6\x8a\x80\xe6\x9c\xaf\xe6\x94\xaf\xe6\x8c\x81-\xe4\xb8\xad\xe6\x96\x87-\xe7\xae\x80\xe4\xbd\x93': '/zh-CN/?cache=no', # noqa + '\xd8\xaf\xd8\xb9\xd9\x85-\xd9\x81\xd9\x8e\xd9\x8a\xd9\x8e\xd8\xb1\xd9\x81\xd9\x8f\xd9\x83\xd8\xb3-\xd8\xb9\xd8\xb1\xd8\xa8\xd9\x8a': '/ar/?cache=no', # noqa } extra_pages = { - '/t5/How-To/Cookies/ta-p/16348': '/kb/cookies-information-websites-store-on-your-computer', - '/t5/Fix-slowness-crashing-error/What-does-quot-Your-connection-is-not-secure-quot-mean/ta-p/30354': '/kb/what-does-your-connection-is-not-secure-mean', - '/t5/Firefox-for-Android/ct-p/Firefox-Android': '/products/mobile', - '/t5/Manage-preferences-and-add-ons/Disable-or-remove-Add-ons/ta-p/1000': '/kb/disable-or-remove-add-ons', + '/t5/How-To/Cookies/ta-p/16348': '/kb/cookies-information-websites-store-on-your-computer?cache=no', # noqa + '/t5/Fix-slowness-crashing-error/What-does-quot-Your-connection-is-not-secure-quot-mean/ta-p/30354': '/kb/what-does-your-connection-is-not-secure-mean?cache=no', # noqa + '/t5/Firefox-for-Android/ct-p/Firefox-Android': '/products/mobile?cache=no', + '/t5/Manage-preferences-and-add-ons/Disable-or-remove-Add-ons/ta-p/1000': '/kb/disable-or-remove-add-ons?cache=no', # noqa } def process_response(self, request, response): @@ -118,74 +108,5 @@ def process_response(self, request, response): if urlparts[2] in self.product_pages: return HttpResponseRedirect(self.product_pages[urlparts[2]]) - path = request.path_info - url = 'https://hwsfp35778.lithium.com' + path - try: - response = requests.get( - url, - timeout=6, - verify=False, - ) - except requests.exceptions.Timeout: - sentry_client.captureException() - return handle404(request) - - try: - response.raise_for_status() - except requests.exceptions.RequestException: - sentry_client.captureException() - return handle404(request) - else: - for content, prop in self.meta_regex.findall(response.content): - try: - content = content.strip().decode('utf-8') - prop = prop.strip().decode('utf-8') - except UnicodeError: - break - - if prop == 'og:title': - # order of lookup is important - try: - bar = Document.objects.get(title=content) - except (Document.DoesNotExist, Document.MultipleObjectsReturned): - pass - except: - sentry_client.captureException() - else: - url = bar.get_absolute_url() + '?cache=no' - cache.set(cache_key, url, None) - return HttpResponseRedirect(url) - - if '-' in content: - product, title = content.split('-', 1) - product = product.strip() - title = title.strip() - - title = TOPICS.get(title, title) - product = PRODUCTS.get(product, product) - - try: - bar = Topic.objects.get(title=title, product__title=product) - except (Topic.DoesNotExist, Topic.MultipleObjectsReturned): - pass - except: - sentry_client.captureException() - else: - url = bar.get_absolute_url() + '?cache=no' - cache.set(cache_key, url, None) - return HttpResponseRedirect(url) - - product = PRODUCTS.get(content, content) - try: - bar = Product.objects.get(title=product) - except (Product.DoesNotExist, Product.MultipleObjectsReturned): - pass - except: - sentry_client.captureException() - else: - url = bar.get_absolute_url() + '?cache=no' - cache.set(cache_key, url, None) - return HttpResponseRedirect(url) - # everything failed, kittens die, it's a 404 return handle404(request)