Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP some work on integration tests #338

Open
wants to merge 1 commit into
base: ARXIVNG-3598-integration-test
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions Dockerfile-integration-test
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Runtime for running integration tests
#
# $ mkdir /tmp/to_index
# $ cp arxiv_id_dump.txt /tmp/to_index
# $ docker run -it --network=arxivsearch_es_stack \
# > -v /tmp/to_index:/to_index \
# > -e ELASTICSEARCH_SERVICE_HOST=elasticsearch \
# > arxiv/search-index /to_index/arxiv_id_dump.txt
#
# See also ELASTICSEARCH_* and METADATA_ENDPOINT parameters, below.

FROM arxiv/search:0.5.5

ENV PATH "/opt/arxiv:${PATH}"
ADD bulk_index.py /opt/arxiv/

WORKDIR /opt/arxiv/

ENV LC_ALL en_US.utf8
ENV LANG en_US.utf8
ENV LOGLEVEL 40
ENV FLASK_DEBUG 1
ENV FLASK_APP /opt/arxiv/app.py

ENV ELASTICSEARCH_SERVICE_HOST 127.0.0.1
ENV ELASTICSEARCH_SERVICE_PORT 9200
ENV ELASTICSEARCH_SERVICE_PORT_9200_PROTO http
ENV ELASTICSEARCH_USER elastic
ENV ELASTICSEARCH_PASSWORD changeme
ENV METADATA_ENDPOINT https://localhost:8080/docmeta_bulk/
ENV METADATA_VERIFY_CERT True

VOLUME /to_index

CMD "./integration_test.sh"
85 changes: 85 additions & 0 deletions docker-compose-integration-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Provides the Elasticsearch + Kibana stack for integration-test
# See README.md for instructions on use.
#
# docker-compose v3 lack extend but if another extension mechanism is
# found consider using that.
---
version: '3.4'
services:
elasticsearch:
build:
context: .
dockerfile: Dockerfile-elasticsearch
container_name: elasticsearch
# environment: ['http.host=0.0.0.0', 'transport.host=127.0.0.1', 'ELASTIC_PASSWORD=${ELASTIC_PASSWORD}']
environment: ['http.host=0.0.0.0', 'transport.host=127.0.0.1']
ports: ['127.0.0.1:9200:9200', '127.0.0.1:9300:9300']
networks: ['es_stack']

# kibana is not needed

agent:
build:
context: .
dockerfile: Dockerfile-agent
container_name: agent
environment:
AWS_ACCESS_KEY_ID: "foo"
AWS_SECRET_ACCESS_KEY: "bar"
ELASTICSEARCH_SERVICE_HOST: "elasticsearch"
ELASTICSEARCH_SERVICE_PORT: "9200"
ELASTICSEARCH_SERVICE_PORT_9200_PROTO: "http"
ELASTICSEARCH_USER: "elastic"
ELASTICSEARCH_PASSWORD: "changeme"
ELASTICSEARCH_VERIFY: "false"
KINESIS_STREAM: "MetadataIsAvailable"
KINESIS_SHARD_ID: "0"
KINESIS_ENDPOINT: "https://localstack:4568"
KINESIS_VERIFY: "false"
KINESIS_START_TYPE: "TRIM_HORIZON"
LOGLEVEL: 20
networks:
- es_stack
depends_on:
- localstack
- elasticsearch

localstack:
image: atlassianlabs/localstack
container_name: localstack
networks:
- es_stack
ports:
- "5568:4568"
environment:
USE_SSL: 'true'
DEBUG: 'true'

integration-test:
build:
context: .
dockerfile: Dockerfile
target: search_web_app_test
container_name: search-integration-test
environment:
AWS_ACCESS_KEY_ID: "foo"
AWS_SECRET_ACCESS_KEY: "bar"
ELASTICSEARCH_SERVICE_HOST: "elasticsearch"
ELASTICSEARCH_SERVICE_PORT: "9200"
ELASTICSEARCH_SERVICE_PORT_9200_PROTO: "http"
ELASTICSEARCH_USER: "elastic"
ELASTICSEARCH_PASSWORD: "changeme"
ELASTICSEARCH_VERIFY: "false"
KINESIS_STREAM: "MetadataIsAvailable"
KINESIS_SHARD_ID: "0"
KINESIS_ENDPOINT: "https://localstack:4568"
KINESIS_VERIFY: "false"
KINESIS_START_TYPE: "TRIM_HORIZON"
LOGLEVEL: 20
networks:
- es_stack
depends_on:
- elasticsearch
- agent

networks: {es_stack: {}}
13 changes: 13 additions & 0 deletions integration_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
set -o errexit

echo "Waiting for ES to start then creating index..."
FLASK_APP=app.py FLASK_DEBUG=1 pipenv run python create_index.py --wait
echo "ES started and index created"
echo "About to fill the index with the standard bulk dataset..."
FLASK_APP=app.py FLASK_DEBUG=1 pipenv run python bulk_index.py
echo "Done filling index."
echo "About to start integration tests"
WITH_INTEGRATION=True nose2 tests.integration.test_integration
echo "Done with integration tests.

144 changes: 144 additions & 0 deletions search/controllers/api/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
"""Tests for advanced search controller, :mod:`search.controllers.advanced`."""

from unittest import TestCase, mock
from datetime import date, datetime
from dateutil.relativedelta import relativedelta
from werkzeug import MultiDict
from werkzeug.exceptions import InternalServerError, BadRequest

from arxiv import status

from search.domain import Query, DateRange, FieldedSearchTerm, Classification,\
AdvancedQuery, DocumentSet
from search.controllers import api
from search.domain import api as api_domain
from search.services.index import IndexConnectionError, QueryError


class TestAPISearch(TestCase):
"""Tests for :func:`.api.search`."""

@mock.patch(f'{api.__name__}.index')
def test_no_params(self, mock_index):
"""Request with no parameters."""
params = MultiDict({})
data, code, headers = api.search(params)

self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
self.assertIn("results", data, "Results are returned")
self.assertIn("query", data, "Query object is returned")
expected_fields = api_domain.get_required_fields() \
+ api_domain.get_default_extra_fields()
self.assertEqual(set(data["query"].include_fields),
set(expected_fields),
"Default set of fields is included")

@mock.patch(f'{api.__name__}.index')
def test_include_fields(self, mock_index):
"""Request with specific fields included."""
extra_fields = ['title', 'abstract', 'authors']
params = MultiDict({'include': extra_fields})
data, code, headers = api.search(params)

self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
self.assertIn("results", data, "Results are returned")
self.assertIn("query", data, "Query object is returned")
expected_fields = api_domain.get_required_fields() + extra_fields
self.assertEqual(set(data["query"].include_fields),
set(expected_fields),
"Requested fields are included")

@mock.patch(f'{api.__name__}.index')
def test_group_primary_classification(self, mock_index):
"""Request with a group as primary classification."""
group = 'grp_physics'
params = MultiDict({'primary_classification': group})
data, code, headers = api.search(params)

self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
query = mock_index.search.call_args[0][0]
self.assertEqual(len(query.primary_classification), 1)
self.assertEqual(query.primary_classification[0],
Classification(group={'id': group}))

@mock.patch(f'{api.__name__}.index')
def test_archive_primary_classification(self, mock_index):
"""Request with an archive as primary classification."""
archive = 'physics'
params = MultiDict({'primary_classification': archive})
data, code, headers = api.search(params)

self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
query = mock_index.search.call_args[0][0]
self.assertEqual(len(query.primary_classification), 1)
self.assertEqual(query.primary_classification[0],
Classification(archive={'id': archive}))

@mock.patch(f'{api.__name__}.index')
def test_archive_subsumed_classification(self, mock_index):
"""Request with a subsumed archive as primary classification."""
archive = 'chao-dyn'
params = MultiDict({'primary_classification': archive})
data, code, headers = api.search(params)

self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
query = mock_index.search.call_args[0][0]
self.assertEqual(len(query.primary_classification), 2)
self.assertEqual(query.primary_classification[0],
Classification(archive={'id': archive}))
self.assertEqual(query.primary_classification[1],
Classification(archive={'id': 'nlin.CD'}),
"The canonical archive is used instead")

@mock.patch(f'{api.__name__}.index')
def test_category_primary_classification(self, mock_index):
"""Request with a category as primary classification."""
category = 'cs.DL'
params = MultiDict({'primary_classification': category})
data, code, headers = api.search(params)

self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
query = mock_index.search.call_args[0][0]
self.assertEqual(len(query.primary_classification), 1)
self.assertEqual(query.primary_classification[0],
Classification(category={'id': category}))

@mock.patch(f'{api.__name__}.index')
def test_bad_classification(self, mock_index):
"""Request with nonsense as primary classification."""
params = MultiDict({'primary_classification': 'nonsense'})
with self.assertRaises(BadRequest):
api.search(params)

@mock.patch(f'{api.__name__}.index')
def test_with_start_date(self, mock_index):
"""Request with dates specified."""
params = MultiDict({'start_date': '1999-01-02'})
data, code, headers = api.search(params)

self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
query = mock_index.search.call_args[0][0]
self.assertIsNotNone(query.date_range)
self.assertEqual(query.date_range.start_date.year, 1999)
self.assertEqual(query.date_range.start_date.month, 1)
self.assertEqual(query.date_range.start_date.day, 2)
self.assertEqual(query.date_range.date_type,
DateRange.SUBMITTED_CURRENT,
"Submitted date of current version is the default")

@mock.patch(f'{api.__name__}.index')
def test_with_end_dates_and_type(self, mock_index):
"""Request with end date and date type specified."""
params = MultiDict({'end_date': '1999-01-02',
'date_type': 'announced_date_first'})
data, code, headers = api.search(params)

self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
query = mock_index.search.call_args[0][0]
self.assertIsNotNone(query.date_range)
self.assertEqual(query.date_range.end_date.year, 1999)
self.assertEqual(query.date_range.end_date.month, 1)
self.assertEqual(query.date_range.end_date.day, 2)

self.assertEqual(query.date_range.date_type,
DateRange.ANNOUNCED)
Loading