Skip to content

Commit

Permalink
Merge pull request #4635 from safwanrahman/delete_old
Browse files Browse the repository at this point in the history
[Fix #4247] deleting old search code
  • Loading branch information
ericholscher authored Sep 17, 2018
2 parents 21fed3a + bf6ccbe commit 1417f86
Show file tree
Hide file tree
Showing 8 changed files with 2 additions and 956 deletions.
36 changes: 0 additions & 36 deletions readthedocs/projects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@
from readthedocs.doc_builder.python_environments import Conda, Virtualenv
from readthedocs.projects.models import APIProject
from readthedocs.restapi.client import api as api_v2
from readthedocs.restapi.utils import index_search_request
from readthedocs.search.parse_json import process_all_json_files
from readthedocs.vcs_support import utils as vcs_support_utils
from readthedocs.worker import app
from .constants import LOG_TEMPLATE
Expand Down Expand Up @@ -902,40 +900,6 @@ def move_files(version_pk, hostname, html=False, localmedia=False, search=False,
Syncer.copy(from_path, to_path, host=hostname)


@app.task(queue='web')
def update_search(version_pk, commit, delete_non_commit_files=True):
"""
Task to update search indexes.
:param version_pk: Version id to update
:param commit: Commit that updated index
:param delete_non_commit_files: Delete files not in commit from index
"""
version = Version.objects.get(pk=version_pk)

if version.project.is_type_sphinx:
page_list = process_all_json_files(version, build_dir=False)
else:
log.debug('Unknown documentation type: %s',
version.project.documentation_type)
return

log_msg = ' '.join([page['path'] for page in page_list])
log.info("(Search Index) Sending Data: %s [%s]", version.project.slug,
log_msg)
index_search_request(
version=version,
page_list=page_list,
commit=commit,
project_scale=0,
page_scale=0,
# Don't index sections to speed up indexing.
# They aren't currently exposed anywhere.
section=False,
delete=delete_non_commit_files,
)


@app.task(queue='web')
def symlink_project(project_pk):
project = Project.objects.get(pk=project_pk)
Expand Down
1 change: 0 additions & 1 deletion readthedocs/projects/views/public.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from readthedocs.builds.views import BuildTriggerMixin
from readthedocs.projects.models import ImportedFile, Project
from readthedocs.search.documents import PageDocument
from readthedocs.search.indexes import PageIndex
from readthedocs.search.views import LOG_TEMPLATE

from .base import ProjectOnboardMixin
Expand Down
29 changes: 1 addition & 28 deletions readthedocs/restapi/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,7 @@
from rest_framework import routers

from readthedocs.constants import pattern_opts
from readthedocs.restapi import views
from readthedocs.restapi.views import (
core_views,
footer_views,
integrations,
search_views,
task_views,
)

from readthedocs.restapi.views import (core_views, footer_views, task_views, integrations)
from .views.model_views import (
BuildCommandViewSet,
BuildViewSet,
Expand Down Expand Up @@ -69,24 +61,6 @@
url(r'footer_html/', footer_views.footer_html, name='footer_html'),
]

search_urls = [
url(
r'index_search/',
search_views.index_search,
name='index_search',
),
url(r'^search/$', views.search_views.search, name='api_search'),
url(r'search/project/$',
search_views.project_search,
name='api_project_search',
),
url(
r'search/section/$',
search_views.section_search,
name='api_section_search',
),
]

task_urls = [
url(
r'jobs/status/(?P<task_id>[^/]+)/',
Expand Down Expand Up @@ -138,7 +112,6 @@

urlpatterns += function_urls
urlpatterns += task_urls
urlpatterns += search_urls
urlpatterns += integration_urls


Expand Down
115 changes: 0 additions & 115 deletions readthedocs/restapi/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from __future__ import (
absolute_import, division, print_function, unicode_literals)

import hashlib
import logging

from rest_framework.pagination import PageNumberPagination
Expand All @@ -13,7 +12,6 @@
NON_REPOSITORY_VERSIONS, STABLE,
STABLE_VERBOSE_NAME)
from readthedocs.builds.models import Version
from readthedocs.search.indexes import PageIndex, ProjectIndex, SectionIndex

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -154,119 +152,6 @@ def delete_versions(project, version_data):
return set()


def index_search_request(
version, page_list, commit, project_scale, page_scale, section=True,
delete=True):
"""
Update search indexes with build output JSON.
In order to keep sub-projects all indexed on the same shard, indexes will be
updated using the parent project's slug as the routing value.
"""
# TODO refactor this function
# pylint: disable=too-many-locals
project = version.project

log_msg = ' '.join([page['path'] for page in page_list])
log.info(
'Updating search index: project=%s pages=[%s]',
project.slug,
log_msg,
)

project_obj = ProjectIndex()
project_obj.index_document(
data={
'id': project.pk,
'name': project.name,
'slug': project.slug,
'description': project.description,
'lang': project.language,
'author': [user.username for user in project.users.all()],
'url': project.get_absolute_url(),
'tags': None,
'weight': project_scale,
})

page_obj = PageIndex()
section_obj = SectionIndex()
index_list = []
section_index_list = []
routes = [project.slug]
routes.extend([p.parent.slug for p in project.superprojects.all()])
for page in page_list:
log.debug('Indexing page: %s:%s', project.slug, page['path'])
to_hash = '-'.join([project.slug, version.slug, page['path']])
page_id = hashlib.md5(to_hash.encode('utf-8')).hexdigest()
index_list.append({
'id': page_id,
'project': project.slug,
'version': version.slug,
'path': page['path'],
'title': page['title'],
'headers': page['headers'],
'content': page['content'],
'taxonomy': None,
'commit': commit,
'weight': page_scale + project_scale,
})
if section:
for sect in page['sections']:
id_to_hash = '-'.join([
project.slug,
version.slug,
page['path'],
sect['id'],
])
section_index_list.append({
'id': (hashlib.md5(id_to_hash.encode('utf-8')).hexdigest()),
'project': project.slug,
'version': version.slug,
'path': page['path'],
'page_id': sect['id'],
'title': sect['title'],
'content': sect['content'],
'weight': page_scale,
})
for route in routes:
section_obj.bulk_index(
section_index_list,
parent=page_id,
routing=route,
)

for route in routes:
page_obj.bulk_index(index_list, parent=project.slug, routing=route)

if delete:
log.info('Deleting files not in commit: %s', commit)
# TODO: AK Make sure this works
delete_query = {
'query': {
'bool': {
'must': [
{
'term': {
'project': project.slug,
},
},
{
'term': {
'version': version.slug,
},
},
],
'must_not': {
'term': {
'commit': commit,
},
},
},
},
}
page_obj.delete_document(body=delete_query)


class RemoteOrganizationPagination(PageNumberPagination):
page_size = 25

Expand Down
151 changes: 0 additions & 151 deletions readthedocs/restapi/views/search_views.py

This file was deleted.

Loading

0 comments on commit 1417f86

Please sign in to comment.