Skip to content

Commit

Permalink
Merge pull request #120 from arXiv/develop
Browse files Browse the repository at this point in the history
Pre-release merge for v0.15.6
  • Loading branch information
mhl10 authored Apr 19, 2019
2 parents 60ad7ee + d927472 commit c99dda7
Show file tree
Hide file tree
Showing 6 changed files with 125 additions and 215 deletions.
69 changes: 32 additions & 37 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion arxiv/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def register_blueprint(self: Flask, blueprint: Blueprint,

# Attach the external URL handler as a fallback for failed calls to
# url_for().
app.url_build_error_handlers.append(urls.external_url_handler)
urls.register_external_urls(app)

filters.register_filters(app)
context_processors.register_context_processors(app)
4 changes: 2 additions & 2 deletions arxiv/base/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@

ARXIV_BUSINESS_TZ = os.environ.get("ARXIV_BUSINESS_TZ", "US/Eastern")

BASE_VERSION = "0.15.5"
BASE_VERSION = "0.15.6"
"""The version of the arxiv-base package."""

APP_VERSION = "0.15.4"
APP_VERSION = "0.15.6"
"""The version of the base test app."""

"""
Expand Down
79 changes: 36 additions & 43 deletions arxiv/base/urls/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,83 +44,70 @@
This will build URLs with the ``https`` scheme by default. To use ``http``,
set ``EXTERNAL_URL_SCHEME = 'http'`` in your configuration.
Danger! Memory leaks lurk here
------------------------------
Earlier versions of this module built Werkzeug routing machinery (Rules, Maps,
etc) on the fly. This led to serious memory leaks. As of v0.15.6,
:class:`.Base` uses :func:`register_external_urls` to set up external URL
handling, which registers a single :class:`.MapAdapter` on a Flask app. This
adapter is in turn used by :func:`external_url_handler` on demand.
See ARXIVNG-2085.
"""

import sys
from typing import Dict, Any
from typing import Dict, Any, List
from urllib.parse import parse_qs
from werkzeug.urls import url_encode, url_parse, url_unparse
from werkzeug.routing import Map, Rule, BuildError
from flask import current_app
from werkzeug.routing import Map, Rule, BuildError, MapAdapter
from flask import current_app, g, Flask

from arxiv.base.exceptions import ConfigurationError
from arxiv.base.converter import ArXivConverter
from arxiv.base import logging
from arxiv.base import config as base_config

from .clickthrough import clickthrough_url
from .links import urlize, urlizer, url_for_doi

logger = logging.getLogger(__name__)


# The module arxiv.base.config needs to be able to load its values from
# environment variables, some of which are set by SetEnv directives in Apache.
# Those variables are not set until application execution begins, which means
# that if arxiv.base.config is imported beforehand its values will not be
# correct.
def _get_base_config() -> Any:
from arxiv.base import config
return config


def get_url_map() -> Map:
"""Build a :class:`werkzeug.routing.Map` from configured URLs."""
config = _get_base_config()

def build_adapter(app: Flask) -> MapAdapter:
"""Build a :class:`.MapAdapter` from configured URLs."""
# Get the base URLs (configured in this package).
configured_urls = {url[0]: url for url in config.URLS}
configured_urls = {url[0]: url for url in base_config.URLS}
# Privilege ARXIV_URLs set on the application config.
current_urls = current_app.config.get('URLS', [])
current_urls = app.config.get('URLS', [])
if current_urls:
configured_urls.update({url[0]: url for url in current_urls})

url_map = Map([
Rule(pattern, endpoint=name, host=host, build_only=True)
for name, pattern, host in configured_urls.values()
], converters={'arxiv': ArXivConverter}, host_matching=True)
return url_map


def external_url_for(endpoint: str, **values: Any) -> str:
"""
Like :flask`.url_for`, but builds external URLs based on the config.
This works by loading the configuration variable ``URLS`` from
:mod:`arxiv.base.config` and from the application on which the blueprint
has been registered, and registering the URL patterns described therein.
Preference is given to URLs defined on the current application. An attempt
is made to avoid adding URL rules for which identical patterns have already
been registered.
"""
values.pop('_external', None)
url_map = get_url_map()
scheme = current_app.config.get('EXTERNAL_URL_SCHEME', 'https')
host = current_app.config.get('BASE_SERVER', 'arxiv.org')
adapter = url_map.bind(host, url_scheme=scheme)
url: str = adapter.build(endpoint, values=values, force_external=True)
return url
scheme = app.config.get('EXTERNAL_URL_SCHEME', 'https')
base_host = app.config.get('BASE_SERVER', 'arxiv.org')
adapter: MapAdapter = url_map.bind(base_host, url_scheme=scheme)
return adapter


def external_url_handler(err: BuildError, endpoint: str, values: Dict) -> str:
"""
Attempt to handle failed URL building with :func:`external_url_for`.
This gets attached to a Flask application via the
:meth:`flask.Flask.url_build_error_handlers` hook.
:func:`flask.Flask.url_build_error_handlers` hook.
"""
values.pop('_external')
try:
url = external_url_for(endpoint, **values)
except BuildError as e:
url: str = current_app.external_url_adapter.build(endpoint,
values=values,
force_external=True)
except BuildError:
# Re-raise the original BuildError, in context of original traceback.
exc_type, exc_value, tb = sys.exc_info()
if exc_value is err:
Expand All @@ -146,3 +133,9 @@ def canonical_url(id: str, version: int = 0) -> str:
if version:
return f'{scheme}://{host}/abs/{id}v{version}'
return f'{scheme}://{host}/abs/{id}'


def register_external_urls(app: Flask) -> None:
"""Register :func:`external_url_handler` on a Flask app."""
app.external_url_adapter = build_adapter(app)
app.url_build_error_handlers.append(external_url_handler)
Loading

0 comments on commit c99dda7

Please sign in to comment.