-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
/
models.py
348 lines (293 loc) · 10.8 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
"""Django models for the redirects app."""
import re
import structlog
from django.db import models
from django.utils.translation import gettext
from django.utils.translation import gettext_lazy as _
from readthedocs.core.resolver import Resolver
from readthedocs.projects.models import Project
from readthedocs.projects.ordering import ProjectItemPositionManager
from readthedocs.redirects.constants import (
CLEAN_URL_TO_HTML_REDIRECT,
EXACT_REDIRECT,
HTML_TO_CLEAN_URL_REDIRECT,
HTTP_STATUS_CHOICES,
PAGE_REDIRECT,
TYPE_CHOICES,
)
from readthedocs.redirects.validators import validate_redirect
from .querysets import RedirectQuerySet
log = structlog.get_logger(__name__)
# FIXME: this help_text message should be dynamic since "Absolute path" doesn't
# make sense for "Prefix Redirects" since the from URL is considered after the
# ``/$lang/$version/`` part. Also, there is a feature for the "Exact Redirects"
# that should be mentioned here: the usage of ``*``.
from_url_helptext = _(
"Absolute path, excluding the domain. "
"Example: <b>/docs/</b> or <b>/install.html</b>",
)
to_url_helptext = _(
"Absolute or relative URL. Example: <b>/tutorial/install.html</b>",
)
redirect_type_helptext = _("The type of redirect you wish to use.")
class Redirect(models.Model):
"""A HTTP redirect associated with a Project."""
project = models.ForeignKey(
Project,
verbose_name=_("Project"),
related_name="redirects",
on_delete=models.CASCADE,
)
redirect_type = models.CharField(
_("Redirect Type"),
max_length=255,
choices=TYPE_CHOICES,
help_text=redirect_type_helptext,
)
from_url = models.CharField(
_("From URL"),
max_length=255,
db_index=True,
help_text=from_url_helptext,
blank=True,
)
# Store the from_url without the ``*`` wildcard for easier and faster querying.
from_url_without_rest = models.CharField(
max_length=255,
db_index=True,
help_text="Only for internal querying use",
blank=True,
null=True,
)
to_url = models.CharField(
_("To URL"),
max_length=255,
db_index=True,
help_text=to_url_helptext,
blank=True,
)
force = models.BooleanField(
_("Force redirect"),
null=True,
default=False,
help_text=_("Apply the redirect even if the page exists."),
)
http_status = models.SmallIntegerField(
_("HTTP status code"),
choices=HTTP_STATUS_CHOICES,
default=302,
)
enabled = models.BooleanField(
_("Enabled"),
default=True,
null=True,
help_text=_("Enable or disable the redirect."),
)
description = models.CharField(
_("Description"),
blank=True,
null=True,
max_length=255,
default="",
)
position = models.PositiveIntegerField(
_("Position"),
default=0,
help_text=_("Order of execution of the redirect."),
)
create_dt = models.DateTimeField(auto_now_add=True)
update_dt = models.DateTimeField(auto_now=True)
_position_manager = ProjectItemPositionManager(position_field_name="position")
objects = RedirectQuerySet.as_manager()
class Meta:
verbose_name = _("redirect")
verbose_name_plural = _("redirects")
ordering = (
"position",
"-update_dt",
)
# TODO: add the project, position unique_together constraint once
# all redirects have a position set.
def save(self, *args, **kwargs):
self.from_url_without_rest = None
if self.redirect_type in [
CLEAN_URL_TO_HTML_REDIRECT,
HTML_TO_CLEAN_URL_REDIRECT,
]:
# These redirects don't make use of the ``from_url``/``to_url`` fields.
self.to_url = ""
self.from_url = ""
else:
self.to_url = self.normalize_to_url(self.to_url)
self.from_url = self.normalize_from_url(self.from_url)
if self.from_url.endswith("*"):
self.from_url_without_rest = self.from_url.removesuffix("*")
self._position_manager.change_position_before_save(self)
super().save(*args, **kwargs)
def delete(self, *args, **kwargs):
super().delete(*args, **kwargs)
self._position_manager.change_position_after_delete(self)
def normalize_from_url(self, path):
"""
Normalize from_url to be used for matching.
Normalize the path to always start with one slash,
and end without a slash, so we can match both,
with and without a trailing slash.
"""
path = path.rstrip("/")
path = "/" + path.lstrip("/")
return path
def normalize_to_url(self, path):
"""
Normalize to_url to be used for redirecting.
Normalize the path to always start with one slash,
if the path is not an absolute URL.
Otherwise, return the path as is.
"""
if re.match("^https?://", path):
return path
path = "/" + path.lstrip("/")
return path
def clean(self):
validate_redirect(
project=self.project,
pk=self.pk,
redirect_type=self.redirect_type,
from_url=self.from_url,
to_url=self.to_url,
)
@property
def redirects_to_external_domain(self):
"""Check if the redirect is to an external domain."""
return bool(re.match("^https?://", self.to_url))
def __str__(self):
redirect_text = "{type}: {from_to_url}"
if self.redirect_type in [PAGE_REDIRECT, EXACT_REDIRECT]:
return redirect_text.format(
type=self.get_redirect_type_display(),
from_to_url=self.get_from_to_url_display(),
)
return gettext(
"Redirect: {}".format(
self.get_redirect_type_display(),
),
)
def get_from_to_url_display(self):
if self.redirect_type in [PAGE_REDIRECT, EXACT_REDIRECT]:
return "{from_url} -> {to_url}".format(
from_url=self.from_url,
to_url=self.to_url,
)
return ""
def get_full_path(
self, filename, language=None, version_slug=None, allow_crossdomain=False
):
"""
Return a full path for a given filename.
This will include version and language information. No protocol/domain
is returned.
"""
# Handle explicit http redirects
if allow_crossdomain and re.match("^https?://", filename):
return filename
return Resolver().resolve_path(
project=self.project,
language=language,
version_slug=version_slug,
filename=filename,
)
def get_redirect_path(self, filename, path=None, language=None, version_slug=None):
"""
Resolve the redirect for the given filename.
.. note::
This method doesn't check if the current path matches ``from_url``,
that should be done before calling this method
using ``Redirect.objects.get_matching_redirect_with_path``.
:param filename: The filename being served.
:param path: The whole path from the request.
:param language: The language of the project.
:param version_slug: The slug of the current version.
"""
method = getattr(
self,
"redirect_{type}".format(
type=self.redirect_type,
),
)
return method(
filename=filename, path=path, language=language, version_slug=version_slug
)
def _redirect_with_wildcard(self, current_path):
if self.from_url.endswith("*"):
if self._will_cause_infinite_redirect(current_path):
log.debug(
"Infinite redirect loop detected",
redirect=self,
)
return None
splat = current_path[len(self.from_url_without_rest) :]
to_url = self.to_url.replace(":splat", splat)
return to_url
return self.to_url
def _will_cause_infinite_redirect(self, current_path):
"""
Check if this redirect will cause an infinite redirect for the given path.
We detect infinite redirects of the form:
/dir/* -> /dir/subdir/:splat
For example, /dir/test.html will redirect to /dir/subdir/test.html,
and if the file doesn't exist, it will redirect to
/dir/subdir/subdir/test.html and then to /dir/subdir/subdir/subdir/test.html and so on.
We do this by checking if we will redirect to a subdirectory of the current path,
and if the current path already starts with the path we will redirect to.
"""
if self.from_url.endswith("*") and ":splat" in self.to_url:
to_url_without_splat = self.to_url.split(":splat", maxsplit=1)[0]
redirects_to_subpath = to_url_without_splat.startswith(
self.from_url_without_rest
)
if redirects_to_subpath and current_path.startswith(to_url_without_splat):
return True
return False
def redirect_page(self, filename, path, language=None, version_slug=None):
log.debug("Redirecting...", redirect=self)
to_url = self._redirect_with_wildcard(current_path=filename)
if to_url:
return self.get_full_path(
filename=to_url,
language=language,
version_slug=version_slug,
allow_crossdomain=True,
)
return None
def redirect_exact(self, filename, path, language=None, version_slug=None):
log.debug("Redirecting...", redirect=self)
return self._redirect_with_wildcard(current_path=path)
def redirect_clean_url_to_html(
self, filename, path, language=None, version_slug=None
):
log.debug("Redirecting...", redirect=self)
suffixes = ["/", "/index.html"]
for suffix in suffixes:
if filename.endswith(suffix):
to = filename[: -len(suffix)]
if not to:
to = "index.html"
else:
to += ".html"
return self.get_full_path(
filename=to,
language=language,
version_slug=version_slug,
allow_crossdomain=False,
)
def redirect_html_to_clean_url(
self, filename, path, language=None, version_slug=None
):
log.debug("Redirecting...", redirect=self)
to = filename.removesuffix(".html") + "/"
return self.get_full_path(
filename=to,
language=language,
version_slug=version_slug,
allow_crossdomain=False,
)