diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py index 594d81882b2..39f9dcd7a58 100644 --- a/readthedocs/restapi/urls.py +++ b/readthedocs/restapi/urls.py @@ -49,13 +49,14 @@ url(r'index_search/', search_views.index_search, name='index_search'), - url(r'search/$', views.search_views.search, name='api_search'), + # url(r'search/$', views.search_views.search, name='api_search'), url(r'search/project/$', search_views.project_search, name='api_project_search'), url(r'search/section/$', search_views.section_search, name='api_section_search'), + url(r'^docsearch/$', PageSearchAPIView.as_view(), name='doc_search'), ] task_urls = [ @@ -86,15 +87,11 @@ name='api_webhook'), ] -api_search_urls = [ - url(r'^docsearch/$', PageSearchAPIView.as_view(), name='doc_search'), -] urlpatterns += function_urls urlpatterns += task_urls urlpatterns += search_urls urlpatterns += integration_urls -urlpatterns += api_search_urls try: from readthedocsext.donate.restapi.urls import urlpatterns as sustainability_urls diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index 2b2855618c0..8e72547d195 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -1,4 +1,6 @@ from rest_framework import generics +from rest_framework import exceptions +from rest_framework.exceptions import ValidationError from readthedocs.search.documents import PageDocument from readthedocs.search.filters import SearchFilterBackend @@ -19,6 +21,20 @@ def get_queryset(self): So for searching, its possible to return ``Search`` object instead of queryset. The ``filter_backends`` and ``pagination_class`` is compatible with ``Search`` """ + # Validate all the required params are there + self.validate_query_params() query = self.request.query_params.get('query', '') queryset = PageDocument.simple_search(query=query) return queryset + + def validate_query_params(self): + required_query_params = set(['query', 'project', 'version']) + request_params = set(self.request.query_params.keys()) + missing_params = required_query_params - request_params + if missing_params: + errors = [] + for param in missing_params: + e = {param: "This query param is required"} + errors.append(e) + + raise ValidationError(errors) diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py index 575f4f83322..3cc70343c35 100644 --- a/readthedocs/search/documents.py +++ b/readthedocs/search/documents.py @@ -90,8 +90,9 @@ def faceted_search(cls, query, projects_list=None, versions_list=None, using=Non def simple_search(cls, query, using=None, index=None): es_search = cls.search(using=using, index=index) es_query = cls.get_es_query(query=query) + highlighted_fields = [f.split('^', 1)[0] for f in cls.search_fields] - es_search = es_search.query(es_query).highlight('content') + es_search = es_search.query(es_query).highlight(*highlighted_fields) return es_search @classmethod diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py index 4327c07d0ae..dd3ad346eef 100644 --- a/readthedocs/search/serializers.py +++ b/readthedocs/search/serializers.py @@ -2,6 +2,8 @@ class PageSearchSerializer(serializers.Serializer): + project = serializers.CharField() + version = serializers.CharField() title = serializers.CharField() path = serializers.CharField() highlight = serializers.SerializerMethodField() diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py index 4133fb5aa85..f02ab7b9e78 100644 --- a/readthedocs/search/tests/test_api.py +++ b/readthedocs/search/tests/test_api.py @@ -1,12 +1,15 @@ import pytest from django.core.urlresolvers import reverse +from django_dynamic_fixture import G +from readthedocs.builds.models import Version +from readthedocs.projects.models import HTMLFile from readthedocs.search.tests.utils import get_search_query_from_project_file @pytest.mark.django_db @pytest.mark.search -class TestPageSearch(object): +class TestDocumentSearch(object): url = reverse('doc_search') @pytest.mark.parametrize('data_type', ['content', 'headers', 'title']) @@ -16,149 +19,70 @@ def test_search_works(self, api_client, project, data_type, page_num): data_type=data_type) version = project.versions.all()[0] - url = reverse('doc_search') - resp = api_client.get(url, {'project': project.slug, 'version': version.slug, 'query': query}) - data = resp.data - assert len(data['results']) == 1 - - # @pytest.mark.parametrize('case', ['upper', 'lower', 'title']) - # def test_file_search_case_insensitive(self, client, project, case): - # """Check File search is case insensitive - # - # It tests with uppercase, lowercase and camelcase - # """ - # query_text = get_search_query_from_project_file(project_slug=project.slug) - # - # cased_query = getattr(query_text, case) - # query = cased_query() - # - # result, _ = self._get_search_result(url=self.url, client=client, - # search_params={'q': query, 'type': 'file'}) - # - # assert len(result) == 1 - # # Check the actual text is in the result, not the cased one - # assert query_text in result.text() - # - # def test_file_search_exact_match(self, client, project): - # """Check quoted query match exact phrase - # - # Making a query with quoted text like ``"foo bar"`` should match - # exactly ``foo bar`` phrase. - # """ - # - # # `Github` word is present both in `kuma` and `pipeline` files - # # But the phrase Github can is available only in kuma docs. - # # So search with this phrase to check - # query = r'"GitHub can"' - # - # result, _ = self._get_search_result(url=self.url, client=client, - # search_params={'q': query, 'type': 'file'}) - # - # assert len(result) == 1 - # - # def test_page_search_not_return_removed_page(self, client, project): - # """Check removed page are not in the search index""" - # query = get_search_query_from_project_file(project_slug=project.slug) - # # Make a query to check it returns result - # result, _ = self._get_search_result(url=self.url, client=client, - # search_params={'q': query, 'type': 'file'}) - # assert len(result) == 1 - # - # # Delete all the HTML files of the project - # HTMLFile.objects.filter(project=project).delete() - # # Run the query again and this time there should not be any result - # result, _ = self._get_search_result(url=self.url, client=client, - # search_params={'q': query, 'type': 'file'}) - # assert len(result) == 0 - # - # def test_file_search_show_projects(self, client, all_projects): - # """Test that search result page shows list of projects while searching for files""" - # - # # `Github` word is present both in `kuma` and `pipeline` files - # # so search with this phrase - # result, page = self._get_search_result(url=self.url, client=client, - # search_params={'q': 'GitHub', 'type': 'file'}) - # - # # There should be 2 search result - # assert len(result) == 2 - # - # # there should be 2 projects in the left side column - # content = page.find('.navigable .project-list') - # assert len(content) == 2 - # text = content.text() - # - # # kuma and pipeline should be there - # assert 'kuma' and 'pipeline' in text - # - # def test_file_search_filter_by_project(self, client): - # """Test that search result are filtered according to project""" - # - # # `Github` word is present both in `kuma` and `pipeline` files - # # so search with this phrase but filter through `kuma` project - # search_params = {'q': 'GitHub', 'type': 'file', 'project': 'kuma'} - # result, page = self._get_search_result(url=self.url, client=client, - # search_params=search_params) - # - # # There should be 1 search result as we have filtered - # assert len(result) == 1 - # content = page.find('.navigable .project-list') - # - # # kuma should should be there only - # assert 'kuma' in result.text() - # assert 'pipeline' not in result.text() - # - # # But there should be 2 projects in the left side column - # # as the query is present in both projects - # content = page.find('.navigable .project-list') - # if len(content) != 2: - # pytest.xfail("failing because currently all projects are not showing in project list") - # else: - # assert 'kuma' and 'pipeline' in content.text() - # - # @pytest.mark.xfail(reason="Versions are not showing correctly! Fixme while rewrite!") - # def test_file_search_show_versions(self, client, all_projects, es_index, settings): - # # override the settings to index all versions - # settings.INDEX_ONLY_LATEST = False - # - # project = all_projects[0] - # # Create some versions of the project - # versions = [G(Version, project=project) for _ in range(3)] - # - # query = get_search_query_from_project_file(project_slug=project.slug) - # - # result, page = self._get_search_result(url=self.url, client=client, - # search_params={'q': query, 'type': 'file'}) - # - # # There should be only one result because by default - # # only latest version result should be there - # assert len(result) == 1 - # - # content = page.find('.navigable .version-list') - # # There should be total 4 versions - # # one is latest, and other 3 that we created above - # assert len(content) == 4 - # - # project_versions = [v.slug for v in versions] + [LATEST] - # content_versions = [] - # for element in content: - # text = element.text_content() - # # strip and split to keep the version slug only - # slug = text.strip().split('\n')[0] - # content_versions.append(slug) - # - # assert sorted(project_versions) == sorted(content_versions) - # - # def test_file_search_subprojects(self, client, all_projects, es_index): - # """File search should return results from subprojects also""" - # project = all_projects[0] - # subproject = all_projects[1] - # # Add another project as subproject of the project - # project.add_subproject(subproject) - # - # # Now search with subproject content but explicitly filter by the parent project - # query = get_search_query_from_project_file(project_slug=subproject.slug) - # search_params = {'q': query, 'type': 'file', 'project': project.slug} - # result, page = self._get_search_result(url=self.url, client=client, - # search_params=search_params) - # - # assert len(result) == 1 \ No newline at end of file + search_params = {'project': project.slug, 'version': version.slug, 'query': query} + resp = api_client.get(self.url, search_params) + assert resp.status_code == 200 + + data = resp.data['results'] + assert len(data) == 1 + project_data = data[0] + assert project_data['project'] == project.slug + + # Check highlight return correct object + all_highlights = project_data['highlight'][data_type] + for highlight in all_highlights: + # Make it lower because our search is case insensitive + assert query.lower() in highlight.lower() + + def test_doc_search_filter_by_project(self, api_client): + """Test Doc search result are filtered according to project""" + + # `Github` word is present both in `kuma` and `pipeline` files + # so search with this phrase but filter through `kuma` project + search_params = {'query': 'GitHub', 'project': 'kuma', 'version': 'latest'} + resp = api_client.get(self.url, search_params) + assert resp.status_code == 200 + + data = resp.data['results'] + assert len(data) == 1 + assert data[0]['project'] == 'kuma' + + def test_doc_search_filter_by_version(self, api_client, project): + """Test Doc search result are filtered according to version""" + query = get_search_query_from_project_file(project_slug=project.slug) + latest_version = project.versions.all()[0] + # Create another version + dummy_version = G(Version, project=project) + # Create HTMLFile same as the latest version + latest_version_files = HTMLFile.objects.all().filter(version=latest_version) + for f in latest_version_files: + f.version = dummy_version + # Make primary key to None, so django will create new object + f.pk = None + f.save() + + search_params = {'query': query, 'project': project.slug, 'version': dummy_version.slug} + resp = api_client.get(self.url, search_params) + assert resp.status_code == 200 + + data = resp.data['results'] + assert len(data) == 1 + assert data[0]['project'] == project.slug + + def test_doc_search_subprojects(self, api_client, all_projects): + """Test Document search return results from subprojects also""" + project = all_projects[0] + subproject = all_projects[1] + version = project.versions.all()[0] + # Add another project as subproject of the project + project.add_subproject(subproject) + + # Now search with subproject content but explicitly filter by the parent project + query = get_search_query_from_project_file(project_slug=subproject.slug) + search_params = {'query': query, 'project': project.slug, 'version': version.slug} + resp = api_client.get(self.url, search_params) + assert resp.status_code == 200 + + data = resp.data['results'] + assert len(data) == 1 + assert data[0]['project'] == subproject.slug