readthedocs · ericholscher · Jun 4, 2019 · Jun 3, 2019 · Jun 3, 2019 · Jun 3, 2019
diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py
@@ -3,6 +3,8 @@
 from django.conf import settings
 from django_elasticsearch_dsl import DocType, Index, fields
 
+from elasticsearch import Elasticsearch
+
 from readthedocs.projects.models import HTMLFile, Project
 from readthedocs.sphinx_domains.models import SphinxDomain
 
@@ -22,8 +24,19 @@
 log = logging.getLogger(__name__)
 
 
+class RTDDocTypeMixin:
+
+    def update(self, *args, **kwargs):
+        # Hack a fix to our broken connection pooling
+        # This creates a new connection on every request,
+        # but actually works :)
+        log.info('Hacking Elastic indexing to fix connection pooling')
+        self.using = Elasticsearch(**settings.ELASTICSEARCH_DSL['default'])
+        super().update(*args, **kwargs)
+
+
 @domain_index.doc_type
-class SphinxDomainDocument(DocType):
+class SphinxDomainDocument(RTDDocTypeMixin, DocType):
     project = fields.KeywordField(attr='project.slug')
     version = fields.KeywordField(attr='version.slug')
     role_name = fields.KeywordField(attr='role_name')
@@ -63,7 +76,7 @@ def get_queryset(self):
 
 
 @project_index.doc_type
-class ProjectDocument(DocType):
+class ProjectDocument(RTDDocTypeMixin, DocType):
 
     # Metadata
     url = fields.TextField(attr='get_absolute_url')
@@ -97,7 +110,7 @@ def faceted_search(cls, query, user, language=None):
 
 
 @page_index.doc_type
-class PageDocument(DocType):
+class PageDocument(RTDDocTypeMixin, DocType):
 
     # Metadata
     project = fields.KeywordField(attr='project.slug')

diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py
@@ -1,8 +1,11 @@
 import logging
 
+from elasticsearch import Elasticsearch
 from elasticsearch_dsl import FacetedSearch, TermsFacet
 from elasticsearch_dsl.query import Bool, SimpleQueryString
 
+from django.conf import settings
+
 from readthedocs.core.utils.extend import SettingsOverrideObject
 from readthedocs.search.documents import (
     PageDocument,
@@ -40,6 +43,12 @@ def __init__(self, user, **kwargs):
             if f in kwargs:
                 del kwargs[f]
 
+        # Hack a fix to our broken connection pooling
+        # This creates a new connection on every request,
+        # but actually works :)
+        log.info('Hacking Elastic to fix search connection pooling')
+        self.using = Elasticsearch(**settings.ELASTICSEARCH_DSL['default'])
+
         super().__init__(**kwargs)
 
     def query(self, search, query):

diff --git a/readthedocs/search/signals.py b/readthedocs/search/signals.py
@@ -61,14 +61,17 @@ def remove_indexed_file(sender, instance_list, **kwargs):
 
         if version and commit:
             # Sanity check by deleting all old files not in this commit
-            log.info('Deleting old commits from search index')
-            document().search().filter(
-                'term', version=version.slug,
-            ).filter(
-                'term', project=version.project.slug,
-            ).exclude(
-                'term', commit=commit,
-            ).delete()
+            try:
+                log.info('Deleting old commits from search index')
+                document().search().filter(
+                    'term', version=version.slug,
+                ).filter(
+                    'term', project=version.project.slug,
+                ).exclude(
+                    'term', commit=commit,
+                ).delete()
+            except Exception:
+                log.warning('Unable to delete a subset of files. Continuing.', exc_info=True)
 
 
 @receiver(post_save, sender=Project)

diff --git a/readthedocs/search/tasks.py b/readthedocs/search/tasks.py
@@ -60,7 +60,11 @@ def delete_objects_in_es(app_label, model_name, document_class, objects_id):
     queryset = doc_obj.get_queryset()
     queryset = queryset.filter(id__in=objects_id)
     log.info("Deleting model: %s, '%s' objects", model.__name__, queryset.count())
-    doc_obj.update(queryset.iterator(), action='delete')
+    try:
+        # This is a common case that we should be handling a better way
+        doc_obj.update(queryset.iterator(), action='delete')
+    except Exception:
+        log.warning('Unable to delete a subset of files. Continuing.', exc_info=True)
 
 
 @app.task(queue='web')