Skip to content

Commit e5a1ff3

Browse files
committed
Fix #1650 Replace search field with generated search vector field
1 parent 7c05b00 commit e5a1ff3

File tree

9 files changed

+77
-114
lines changed

9 files changed

+77
-114
lines changed

README.rst

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ Install and run locally from a virtual environment
8787
#. For docs (next step requires ``gettext``)::
8888

8989
python -m manage loaddata doc_releases
90-
python -m manage update_docs --update-index
90+
python -m manage update_docs
9191

9292
#. For dashboard:
9393

@@ -236,16 +236,8 @@ minified version of it to this directory.
236236
Documentation search
237237
--------------------
238238

239-
When running ``python -m manage update_docs --update-index`` to build all
240-
documents it will also automatically index every document it builds in the
241-
search engine as well. In case you've already built the documents and would like
242-
to reindex the search index, run the command::
243-
244-
python -m manage update_index
245-
246-
This is also the right command to run when you work on the search feature
247-
itself. You can pass the ``-d`` option to try to drop the search index
248-
first before indexing all the documents.
239+
When running ``python -m manage update_docs`` to build all documents it will
240+
also automatically index every document it builds in the search engine as well.
249241

250242
Updating metrics from production
251243
--------------------------------

docker-entrypoint.dev.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,5 @@ python -m manage loaddata doc_releases
1212
python -m manage loaddata dashboard_production_metrics
1313
# python -m manage loaddata dashboard_example_data
1414
python -m manage update_metrics
15-
#python -m manage update_index
1615

1716
exec "$@"

docs/management/commands/update_docs.py

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,6 @@ def add_arguments(self, parser):
4747
action="store_true",
4848
help="Ask before building each version",
4949
)
50-
parser.add_argument(
51-
"--update-index",
52-
action="store_true",
53-
dest="update_index",
54-
default=False,
55-
help="Also update the search vector field.",
56-
)
5750
parser.add_argument(
5851
"--purge-cache",
5952
action="store_true",
@@ -98,25 +91,19 @@ def _get_doc_releases(self, versions, options):
9891

9992
def handle(self, *versions, **kwargs):
10093
self.verbosity = kwargs["verbosity"]
101-
self.update_index = kwargs["update_index"]
10294
self.purge_cache = kwargs["purge_cache"]
10395

10496
self.default_builders = ["json", "djangohtml"]
10597

10698
# Keep track of which Git sources have been updated, e.g.,
10799
# {'1.8': True} if the 1.8 docs updated.
108100
self.release_docs_changed = {}
109-
# Only update the index if some docs rebuild.
110-
self.update_index_required = False
111101

112102
for release in self._get_doc_releases(versions, kwargs):
113103
self.build_doc_release(
114104
release, force=kwargs["force"], interactive=kwargs["interactive"]
115105
)
116106

117-
if self.update_index_required:
118-
call_command("update_index", **{"verbosity": self.verbosity})
119-
120107
if self.purge_cache:
121108
changed_versions = {
122109
version
@@ -172,8 +159,6 @@ def build_doc_release(self, release, force=False, interactive=False):
172159
)
173160
return
174161

175-
self.update_index_required = self.update_index
176-
177162
source_dir = checkout_dir.joinpath("docs")
178163

179164
if release.lang != "en":
@@ -288,15 +273,6 @@ def zipfile_inclusion_filter(file_path):
288273
if release.is_default:
289274
self._setup_stable_symlink(release, built_dir)
290275

291-
#
292-
# Rebuild the imported document list and search index.
293-
#
294-
if not self.update_index:
295-
return
296-
297-
if self.verbosity >= 2:
298-
self.stdout.write(" reindexing...")
299-
300276
json_built_dir = parent_build_dir.joinpath("_built", "json")
301277
documents = gen_decoded_documents(json_built_dir)
302278
release.sync_to_db(documents)

docs/management/commands/update_index.py

Lines changed: 0 additions & 28 deletions
This file was deleted.

docs/migrations/0007_add_search_vector.py

Lines changed: 38 additions & 0 deletions
Large diffs are not rendered by default.

docs/models.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@
1616
)
1717
from django.core.cache import cache
1818
from django.db import models, transaction
19-
from django.db.models import Q
19+
from django.db.models import (
20+
Case,
21+
Q,
22+
When,
23+
)
2024
from django.db.models.fields.json import KeyTextTransform
2125
from django.utils.functional import cached_property
2226
from django.utils.html import strip_tags
@@ -27,10 +31,10 @@
2731
from . import utils
2832
from .search import (
2933
DEFAULT_TEXT_SEARCH_CONFIG,
30-
DOCUMENT_SEARCH_VECTOR,
3134
START_SEL,
3235
STOP_SEL,
3336
TSEARCH_CONFIG_LANGUAGES,
37+
get_document_search_vector,
3438
)
3539

3640

@@ -261,7 +265,7 @@ def search(self, query_text, release, document_category=None):
261265
search_query = SearchQuery(
262266
query_text, config=models.F("config"), search_type="websearch"
263267
)
264-
search_rank = SearchRank(models.F("search"), search_query)
268+
search_rank = SearchRank(models.F("search_vector"), search_query)
265269
search = partial(
266270
SearchHeadline,
267271
start_sel=START_SEL,
@@ -296,7 +300,7 @@ def search(self, query_text, release, document_category=None):
296300
)
297301
vector_qs = (
298302
base_qs.alias(rank=search_rank)
299-
.filter(search=search_query)
303+
.filter(search_vector=search_query)
300304
.order_by("-rank")
301305
)
302306
if not vector_qs:
@@ -314,22 +318,6 @@ def search(self, query_text, release, document_category=None):
314318
else:
315319
return self.none()
316320

317-
def search_reset(self):
318-
"""Set to null all not null Document's search vector fields."""
319-
return Document.objects.exclude(search=None).update(search=None)
320-
321-
def search_update(self):
322-
"""
323-
Update Document's search vector fields using the document definition.
324-
325-
This method don't index the module pages (since source code is hard to
326-
combine with full text search) and the big flattened index of the CBVs.
327-
"""
328-
return Document.objects.exclude(
329-
Q(path__startswith="_modules")
330-
| Q(path__startswith="ref/class-based-views/flattened-index")
331-
).update(search=DOCUMENT_SEARCH_VECTOR)
332-
333321

334322
class Document(models.Model):
335323
"""
@@ -344,8 +332,20 @@ class Document(models.Model):
344332
path = models.CharField(max_length=500)
345333
title = models.CharField(max_length=500)
346334
metadata = models.JSONField(default=dict)
347-
search = SearchVectorField(null=True, editable=False)
348-
config = models.SlugField(default=DEFAULT_TEXT_SEARCH_CONFIG)
335+
search_vector = models.GeneratedField(
336+
expression=Case(
337+
*[
338+
When(config=lang, then=get_document_search_vector(lang))
339+
for lang in TSEARCH_CONFIG_LANGUAGES.values()
340+
],
341+
default=get_document_search_vector(),
342+
),
343+
output_field=SearchVectorField(),
344+
db_persist=True,
345+
)
346+
config = models.SlugField(
347+
db_default=DEFAULT_TEXT_SEARCH_CONFIG, default=DEFAULT_TEXT_SEARCH_CONFIG
348+
)
349349

350350
objects = DocumentQuerySet.as_manager()
351351

@@ -354,7 +354,7 @@ class Meta:
354354
models.Index(
355355
fields=["release", "title"], name="document_release_title_idx"
356356
),
357-
GinIndex(fields=["search"]),
357+
GinIndex(fields=["search_vector"], name="document_search_vector_idx"),
358358
]
359359
unique_together = ("release", "path")
360360

docs/search.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from django.contrib.postgres.search import SearchVector
2-
from django.db.models import F, TextChoices
2+
from django.db.models import TextChoices
33
from django.db.models.fields.json import KeyTextTransform
44
from django.utils.translation import gettext_lazy as _
55

@@ -40,15 +40,17 @@
4040
# https://github.com/postgres/postgres/blob/REL_14_STABLE/src/bin/initdb/initdb.c#L2557
4141
DEFAULT_TEXT_SEARCH_CONFIG = "simple"
4242

43-
DOCUMENT_SEARCH_VECTOR = (
44-
SearchVector("title", weight="A", config=F("config"))
45-
+ SearchVector(KeyTextTransform("slug", "metadata"), weight="A", config=F("config"))
46-
+ SearchVector(KeyTextTransform("toc", "metadata"), weight="B", config=F("config"))
47-
+ SearchVector(KeyTextTransform("body", "metadata"), weight="C", config=F("config"))
48-
+ SearchVector(
49-
KeyTextTransform("parents", "metadata"), weight="D", config=F("config")
43+
44+
def get_document_search_vector(lang=DEFAULT_TEXT_SEARCH_CONFIG):
45+
"""Return the search vector with the proper language config."""
46+
return (
47+
SearchVector("title", weight="A", config=lang)
48+
+ SearchVector(KeyTextTransform("slug", "metadata"), weight="A", config=lang)
49+
+ SearchVector(KeyTextTransform("toc", "metadata"), weight="B", config=lang)
50+
+ SearchVector(KeyTextTransform("body", "metadata"), weight="C", config=lang)
51+
+ SearchVector(KeyTextTransform("parents", "metadata"), weight="D", config=lang)
5052
)
51-
)
53+
5254

5355
START_SEL = "<mark>"
5456
STOP_SEL = "</mark>"

docs/tests/test_models.py

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from releases.models import Release
99

10-
from ..models import DOCUMENT_SEARCH_VECTOR, Document, DocumentRelease
10+
from ..models import Document, DocumentRelease
1111

1212

1313
class ModelsTests(TestCase):
@@ -350,9 +350,6 @@ def setUpTestData(cls):
350350
]
351351
Document.objects.bulk_create(Document(**doc) for doc in documents)
352352

353-
def setUp(self):
354-
Document.objects.search_update()
355-
356353
def test_search(self):
357354
expected_list = [
358355
(
@@ -415,29 +412,17 @@ def test_search_breadcrumbs(self):
415412
],
416413
)
417414

418-
def test_search_reset(self):
419-
self.assertEqual(Document.objects.exclude(search=None).count(), 6)
420-
self.assertEqual(Document.objects.search_reset(), 6)
421-
self.assertEqual(Document.objects.exclude(search=None).count(), 0)
422-
423-
def test_search_update(self):
424-
self.assertEqual(Document.objects.exclude(search=None).count(), 6)
425-
self.assertEqual(Document.objects.search_update(), 6)
426-
self.assertEqual(Document.objects.exclude(search=None).count(), 6)
427-
428415
def test_search_highlight_stemmed(self):
429416
# The issue only manifests itself when the defaut search config is not english
430417
with connection.cursor() as cursor:
431418
cursor.execute("SET default_text_search_config TO 'simple'", [])
432419

433-
doc = self.release.documents.create(
420+
self.release.documents.create(
434421
config="english",
435422
path="/",
436423
title="triaging tickets",
437424
metadata={"body": "text containing the word triaging", "breadcrumbs": []},
438425
)
439-
doc.search = DOCUMENT_SEARCH_VECTOR
440-
doc.save(update_fields=["search"])
441426

442427
self.assertQuerySetEqual(
443428
Document.objects.search("triaging", self.release),

docs/tests/test_views.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,6 @@ def test_code_links(self):
195195
Document.objects.bulk_create(
196196
[Document(**queryset_data), Document(**empty_page_data)]
197197
)
198-
Document.objects.search_update()
199198
base_url = reverse_with_host(
200199
"document-detail",
201200
host="docs",

0 commit comments

Comments
 (0)