Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Try to fix Elastic connection pooling issues #5763

Merged
merged 4 commits into from
Jun 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions readthedocs/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from django.conf import settings
from django_elasticsearch_dsl import DocType, Index, fields

from elasticsearch import Elasticsearch

from readthedocs.projects.models import HTMLFile, Project
from readthedocs.sphinx_domains.models import SphinxDomain

Expand All @@ -22,8 +24,19 @@
log = logging.getLogger(__name__)


class RTDDocTypeMixin:

def update(self, *args, **kwargs):
# Hack a fix to our broken connection pooling
# This creates a new connection on every request,
# but actually works :)
log.info('Hacking Elastic indexing to fix connection pooling')
self.using = Elasticsearch(**settings.ELASTICSEARCH_DSL['default'])
super().update(*args, **kwargs)


@domain_index.doc_type
class SphinxDomainDocument(DocType):
class SphinxDomainDocument(RTDDocTypeMixin, DocType):
project = fields.KeywordField(attr='project.slug')
version = fields.KeywordField(attr='version.slug')
role_name = fields.KeywordField(attr='role_name')
Expand Down Expand Up @@ -63,7 +76,7 @@ def get_queryset(self):


@project_index.doc_type
class ProjectDocument(DocType):
class ProjectDocument(RTDDocTypeMixin, DocType):

# Metadata
url = fields.TextField(attr='get_absolute_url')
Expand Down Expand Up @@ -97,7 +110,7 @@ def faceted_search(cls, query, user, language=None):


@page_index.doc_type
class PageDocument(DocType):
class PageDocument(RTDDocTypeMixin, DocType):

# Metadata
project = fields.KeywordField(attr='project.slug')
Expand Down
9 changes: 9 additions & 0 deletions readthedocs/search/faceted_search.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import logging

from elasticsearch import Elasticsearch
from elasticsearch_dsl import FacetedSearch, TermsFacet
from elasticsearch_dsl.query import Bool, SimpleQueryString

from django.conf import settings

from readthedocs.core.utils.extend import SettingsOverrideObject
from readthedocs.search.documents import (
PageDocument,
Expand Down Expand Up @@ -40,6 +43,12 @@ def __init__(self, user, **kwargs):
if f in kwargs:
del kwargs[f]

# Hack a fix to our broken connection pooling
# This creates a new connection on every request,
# but actually works :)
log.info('Hacking Elastic to fix search connection pooling')
self.using = Elasticsearch(**settings.ELASTICSEARCH_DSL['default'])

super().__init__(**kwargs)

def query(self, search, query):
Expand Down
19 changes: 11 additions & 8 deletions readthedocs/search/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,17 @@ def remove_indexed_file(sender, instance_list, **kwargs):

if version and commit:
# Sanity check by deleting all old files not in this commit
log.info('Deleting old commits from search index')
document().search().filter(
'term', version=version.slug,
).filter(
'term', project=version.project.slug,
).exclude(
'term', commit=commit,
).delete()
try:
log.info('Deleting old commits from search index')
document().search().filter(
'term', version=version.slug,
).filter(
'term', project=version.project.slug,
).exclude(
'term', commit=commit,
).delete()
except Exception:
log.warning('Unable to delete a subset of files. Continuing.', exc_info=True)


@receiver(post_save, sender=Project)
Expand Down
6 changes: 5 additions & 1 deletion readthedocs/search/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,11 @@ def delete_objects_in_es(app_label, model_name, document_class, objects_id):
queryset = doc_obj.get_queryset()
queryset = queryset.filter(id__in=objects_id)
log.info("Deleting model: %s, '%s' objects", model.__name__, queryset.count())
doc_obj.update(queryset.iterator(), action='delete')
try:
# This is a common case that we should be handling a better way
doc_obj.update(queryset.iterator(), action='delete')
except Exception:
log.warning('Unable to delete a subset of files. Continuing.', exc_info=True)


@app.task(queue='web')
Expand Down