diff --git a/.vscode/launch.json b/.vscode/launch.json index 9ae029d6b..786b5f0a5 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -59,6 +59,26 @@ "/*": "*", "/./~/*": "${webRoot}/node_modules/*" } + }, + { + "name": "celery", + "type": "debugpy", + "request": "launch", + "cwd": "${workspaceFolder}/backend", + "env": { + "PYTHONPATH": "${workspaceFolder}/backend" + }, + "module": "celery", + "console": "integratedTerminal", + "args": [ + "-A", + "ianalyzer.celery", + "worker", + "--pool=solo", + "--concurrency=1", + "--events", + "--loglevel=info" + ] } ], "inputs": [ diff --git a/CITATION.cff b/CITATION.cff index a02132bfe..69b99051a 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -35,5 +35,5 @@ keywords: - elasticsearch - natural language processing license: MIT -version: 5.7.0 -date-released: '2024-06-5' +version: 5.9.0 +date-released: '2024-07-05' diff --git a/backend/Dockerfile b/backend/Dockerfile index 3548587c4..2c58b766e 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -3,7 +3,7 @@ FROM docker.io/library/python:3.9 # Setting this means stdout and stderr streams are sent to terminal in real time ENV PYTHONUNBUFFERED 1 # Get required libraries for xmlsec -RUN apt-get -y update +RUN apt-get -y update && apt-get -y upgrade RUN apt-get install -y pkg-config libxml2-dev libxmlsec1-dev libxmlsec1-openssl default-libmysqlclient-dev RUN pip install --upgrade pip diff --git a/backend/addcorpus/json_corpora/export_json.py b/backend/addcorpus/json_corpora/export_json.py index 6ba9c130a..5178590fc 100644 --- a/backend/addcorpus/json_corpora/export_json.py +++ b/backend/addcorpus/json_corpora/export_json.py @@ -6,7 +6,7 @@ def export_json_corpus(corpus: Corpus) -> Dict: config = corpus.configuration - data = {'name': corpus.name, 'id': corpus.pk } + data = {'name': corpus.name} data['meta'] = export_corpus_meta(config) data['source_data'] = export_corpus_source_data(config) options = export_corpus_options(config) diff --git a/backend/addcorpus/json_corpora/tests/test_export.py b/backend/addcorpus/json_corpora/tests/test_export.py index b1ee9f691..cee319467 100644 --- a/backend/addcorpus/json_corpora/tests/test_export.py +++ b/backend/addcorpus/json_corpora/tests/test_export.py @@ -2,10 +2,9 @@ from addcorpus.models import Corpus, Field from addcorpus.json_corpora.import_json import _parse_field -def test_corpus_export(json_mock_corpus: Corpus, json_corpus_data): +def test_corpus_export(json_mock_corpus: Corpus, json_corpus_definition): result = export_json_corpus(json_mock_corpus) - result.pop('id') - assert result == json_corpus_data + assert result == json_corpus_definition def test_field_export(any_field_json): imported = _parse_field(any_field_json) diff --git a/backend/addcorpus/json_corpora/tests/test_import.py b/backend/addcorpus/json_corpora/tests/test_import.py index b620ea749..6e165b19a 100644 --- a/backend/addcorpus/json_corpora/tests/test_import.py +++ b/backend/addcorpus/json_corpora/tests/test_import.py @@ -4,10 +4,15 @@ from addcorpus.serializers import CorpusJSONDefinitionSerializer from addcorpus.models import Corpus, CorpusConfiguration -def test_json_corpus_import(db, json_corpus_data): - Corpus.objects.all().delete() +def test_json_corpus_import(db, json_mock_corpus, json_corpus_definition): + json_mock_corpus.delete() - serializer = CorpusJSONDefinitionSerializer(data=json_corpus_data) + data = { + 'definition': json_corpus_definition, + 'active': True, + } + + serializer = CorpusJSONDefinitionSerializer(data=data) assert serializer.is_valid() corpus = serializer.create(serializer.validated_data) @@ -35,21 +40,29 @@ def test_json_corpus_import(db, json_corpus_data): assert line_field.display_type == 'text_content' -def test_serializer_representation(db, json_corpus_data): - Corpus.objects.all().delete() +def test_serializer_representation(db, json_mock_corpus, json_corpus_definition): + json_mock_corpus.delete() + + data = { + 'definition': json_corpus_definition, + 'active': True, + } - serializer = CorpusJSONDefinitionSerializer(data=json_corpus_data) + serializer = CorpusJSONDefinitionSerializer(data=data) assert serializer.is_valid() corpus = serializer.create(serializer.validated_data) serialized = serializer.to_representation(corpus) - serialized.pop('id') - assert json_corpus_data == serialized + assert json_corpus_definition == serialized['definition'] -def test_serializer_update(db, json_corpus_data, json_mock_corpus: Corpus): +def test_serializer_update(db, json_corpus_definition, json_mock_corpus: Corpus): # edit description - json_corpus_data['meta']['description'] = 'A different description' - serializer = CorpusJSONDefinitionSerializer(data=json_corpus_data) + data = { + 'definition': json_corpus_definition, + 'active': True, + } + data['definition']['meta']['description'] = 'A different description' + serializer = CorpusJSONDefinitionSerializer(data=data) assert serializer.is_valid() serializer.update(json_mock_corpus, serializer.validated_data) corpus_config = CorpusConfiguration.objects.get(corpus=json_mock_corpus) @@ -57,8 +70,8 @@ def test_serializer_update(db, json_corpus_data, json_mock_corpus: Corpus): # remove a field assert Field.objects.filter(corpus_configuration__corpus=json_mock_corpus).count() == 2 - json_corpus_data['fields'] = json_corpus_data['fields'][:-1] - serializer = CorpusJSONDefinitionSerializer(data=json_corpus_data) + data['definition']['fields'] = data['definition']['fields'][:-1] + serializer = CorpusJSONDefinitionSerializer(data=data) assert serializer.is_valid() serializer.update(json_mock_corpus, serializer.validated_data) assert Field.objects.filter(corpus_configuration__corpus=json_mock_corpus).count() == 1 diff --git a/backend/addcorpus/json_corpora/tests/test_validate.py b/backend/addcorpus/json_corpora/tests/test_validate.py index 18ad89011..068c1af29 100644 --- a/backend/addcorpus/json_corpora/tests/test_validate.py +++ b/backend/addcorpus/json_corpora/tests/test_validate.py @@ -1,10 +1,10 @@ from addcorpus.json_corpora.validate import validate -def test_validate(json_corpus_data): - validate(json_corpus_data) +def test_validate(json_corpus_definition): + validate(json_corpus_definition) -def test_validate_subschema(json_corpus_data): - source_data = json_corpus_data['source_data'] +def test_validate_subschema(json_corpus_definition): + source_data = json_corpus_definition['source_data'] validate(source_data, 'properties', 'source_data') diff --git a/backend/addcorpus/migrations/0023_alter_corpusdocumentationpage_type_alter_field_name.py b/backend/addcorpus/migrations/0023_alter_corpusdocumentationpage_type_alter_field_name.py new file mode 100644 index 000000000..6d8be1bb4 --- /dev/null +++ b/backend/addcorpus/migrations/0023_alter_corpusdocumentationpage_type_alter_field_name.py @@ -0,0 +1,24 @@ +# Generated by Django 4.2.11 on 2024-07-05 16:30 + +import addcorpus.validation.creation +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('addcorpus', '0022_add_url_display_type'), + ] + + operations = [ + migrations.AlterField( + model_name='corpusdocumentationpage', + name='type', + field=models.CharField(choices=[('general', 'General information'), ('citation', 'Citation'), ('license', 'License'), ('terms_of_service', 'Terms of service'), ('wordmodels', 'Word models')], default='general', help_text='the type of documentation', max_length=16), + ), + migrations.AlterField( + model_name='field', + name='name', + field=models.SlugField(help_text='internal name for the field', max_length=126, validators=[addcorpus.validation.creation.validate_name_is_not_a_route_parameter, addcorpus.validation.creation.validate_name_has_no_ner_suffix]), + ), + ] diff --git a/backend/addcorpus/models.py b/backend/addcorpus/models.py index ca31d9912..2d6c8927a 100644 --- a/backend/addcorpus/models.py +++ b/backend/addcorpus/models.py @@ -4,8 +4,8 @@ from addcorpus.validation.creation import ( validate_es_mapping, validate_field_language, validate_implication, validate_language_code, validate_mimetype, - validate_name_is_not_a_route_parameter, validate_search_filter, - validate_search_filter_with_mapping, + validate_name_is_not_a_route_parameter, validate_name_has_no_ner_suffix, + validate_search_filter, validate_search_filter_with_mapping, validate_searchable_field_has_full_text_search, validate_sort_configuration, validate_visualizations_with_mapping, validate_source_data_directory, @@ -21,6 +21,8 @@ from django.db import models from django.db.models.constraints import UniqueConstraint +from ianalyzer.elasticsearch import elasticsearch + MAX_LENGTH_NAME = 126 MAX_LENGTH_DESCRIPTION = 254 MAX_LENGTH_TITLE = 256 @@ -260,6 +262,20 @@ def clean(self): e ]) + @property + def has_named_entities(self): + client = elasticsearch(self.es_index) + try: + mapping = client.indices.get_mapping( + index=self.es_index) + fields = mapping[self.es_index].get( + 'mappings', {}).get('properties', {}).keys() + if any(field.endswith(':ner') for field in fields): + return True + except: + return False + return False + FIELD_DISPLAY_TYPES = [ ('text_content', 'text content'), @@ -293,7 +309,8 @@ def clean(self): class Field(models.Model): name = models.SlugField( max_length=MAX_LENGTH_NAME, - validators=[validate_name_is_not_a_route_parameter], + validators=[validate_name_is_not_a_route_parameter, + validate_name_has_no_ner_suffix], help_text='internal name for the field', ) corpus_configuration = models.ForeignKey( @@ -431,11 +448,12 @@ def clean(self): e ]) + class CorpusDocumentationPage(models.Model): class PageType(models.TextChoices): GENERAL = ('general', 'General information') CITATION = ('citation', 'Citation') - LICENSE = ('license', 'Licence') + LICENSE = ('license', 'License') TERMS_OF_SERVICE = ('terms_of_service', 'Terms of service') WORDMODELS = ('wordmodels', 'Word models') @@ -455,6 +473,9 @@ class PageType(models.TextChoices): help_text='markdown contents of the documentation' ) + def __str__(self): + return f'{self.corpus_configuration.corpus.name} - {self.type}' + class Meta: constraints = [ UniqueConstraint( diff --git a/backend/addcorpus/serializers.py b/backend/addcorpus/serializers.py index d6fb31db9..b350656a3 100644 --- a/backend/addcorpus/serializers.py +++ b/backend/addcorpus/serializers.py @@ -70,6 +70,7 @@ class CorpusConfigurationSerializer(serializers.ModelSerializer): languages = serializers.ListField(child=LanguageField()) category = PrettyChoiceField(choices=CATEGORIES) default_sort = NonEmptyJSONField() + has_named_entities = serializers.ReadOnlyField() class Meta: model = CorpusConfiguration @@ -89,6 +90,7 @@ class Meta: 'default_sort', 'language_field', 'fields', + 'has_named_entities', ] @@ -128,33 +130,47 @@ class Meta: fields = ['corpus_configuration', 'type', 'content'] -class CorpusJSONDefinitionSerializer(serializers.ModelSerializer): - class Meta: - model = Corpus - fields = '__all__' +class JSONDefinitionField(serializers.Field): + def get_attribute(self, instance: Corpus): + return instance - def to_representation(self, instance) -> Dict: - return export_json_corpus(instance) + def to_representation(self, value: Corpus) -> Dict: + return export_json_corpus(value) - def to_internal_value(self, data) -> Dict: + def to_internal_value(self, data: Dict) -> Dict: return import_json_corpus(data) + +class CorpusJSONDefinitionSerializer(serializers.ModelSerializer): + definition = JSONDefinitionField() + + class Meta: + model = Corpus + fields = ['id', 'active', 'definition'] + read_only_fields = ['id'] + def create(self, validated_data: Dict): - configuration_data = validated_data.pop('configuration') + definition_data = validated_data.get('definition') + configuration_data = definition_data.pop('configuration') fields_data = configuration_data.pop('fields') - corpus = Corpus.objects.create(**validated_data) + corpus = Corpus.objects.create(**definition_data) configuration = CorpusConfiguration.objects.create(corpus=corpus, **configuration_data) for field_data in fields_data: Field.objects.create(corpus_configuration=configuration, **field_data) + if validated_data.get('active') == True: + corpus.active = True + corpus.save() + return corpus def update(self, instance: Corpus, validated_data: Dict): - configuration_data = validated_data.pop('configuration') + definition_data = validated_data.get('definition') + configuration_data = definition_data.pop('configuration') fields_data = configuration_data.pop('fields') - corpus = Corpus(pk=instance.pk, **validated_data) + corpus = Corpus(pk=instance.pk, **definition_data) corpus.save() configuration, _ = CorpusConfiguration.objects.get_or_create(corpus=corpus) @@ -172,4 +188,8 @@ def update(self, instance: Corpus, validated_data: Dict): configuration.fields.exclude(name__in=(f['name'] for f in fields_data)).delete() + if validated_data.get('active') == True: + corpus.active = True + corpus.save() + return corpus diff --git a/backend/addcorpus/tests/test_corpus_views.py b/backend/addcorpus/tests/test_corpus_views.py index 9e1c3fc99..d0080c053 100644 --- a/backend/addcorpus/tests/test_corpus_views.py +++ b/backend/addcorpus/tests/test_corpus_views.py @@ -7,6 +7,7 @@ from addcorpus.python_corpora.save_corpus import load_and_save_all_corpora def test_no_corpora(db, settings, admin_client): + Corpus.objects.all().delete() settings.CORPORA = {} load_and_save_all_corpora() @@ -18,9 +19,14 @@ def test_no_corpora(db, settings, admin_client): def test_corpus_documentation_view(admin_client, basic_mock_corpus, settings): response = admin_client.get(f'/api/corpus/documentation/{basic_mock_corpus}/') assert response.status_code == 200 + pages = response.data + + # check that the pages are sorted in canonical order + page_types = [page['type'] for page in pages] + assert page_types == ['General information', 'Citation', 'License'] # should contain citation guidelines - citation_page = next(page for page in response.data if page['type'] == 'Citation') + citation_page = next(page for page in pages if page['type'] == 'Citation') # check that the page template is rendered with context content = citation_page['content'] @@ -84,7 +90,7 @@ def test_corpus_not_publication_ready(admin_client, basic_mock_corpus): response = admin_client.get('/api/corpus/') corpus = not any(c['name'] == basic_mock_corpus for c in response.data) -def test_corpus_edit_views(admin_client: Client, json_corpus_data: Dict, json_mock_corpus: Corpus): +def test_corpus_edit_views(admin_client: Client, json_corpus_definition: Dict, json_mock_corpus: Corpus): json_mock_corpus.delete() response = admin_client.get('/api/corpus/definitions/') @@ -93,7 +99,7 @@ def test_corpus_edit_views(admin_client: Client, json_corpus_data: Dict, json_mo response = admin_client.post( '/api/corpus/definitions/', - json_corpus_data, + {'definition': json_corpus_definition, 'active': True}, content_type='application/json', ) assert status.is_success(response.status_code) diff --git a/backend/addcorpus/validation/creation.py b/backend/addcorpus/validation/creation.py index 8b0a2666a..445a272fa 100644 --- a/backend/addcorpus/validation/creation.py +++ b/backend/addcorpus/validation/creation.py @@ -122,6 +122,13 @@ def validate_name_is_not_a_route_parameter(value): f'{value} cannot be used as a field name, because it is also a route parameter' ) + +def validate_name_has_no_ner_suffix(value): + if value.endswith(':ner'): + raise ValidationError( + f'{value} cannot be used as a field name: the suffix `:ner` is reserved for annotated_text fields' + ) + def mapping_can_be_searched(es_mapping): ''' Verify if a mapping is appropriate for searching diff --git a/backend/addcorpus/views.py b/backend/addcorpus/views.py index f495fd162..a32064011 100644 --- a/backend/addcorpus/views.py +++ b/backend/addcorpus/views.py @@ -42,22 +42,30 @@ def send_corpus_file(corpus='', subdir='', filename=''): return FileResponse(open(path, 'rb')) + class CorpusDocumentationPageViewset(viewsets.ModelViewSet): permission_classes = [IsAuthenticatedOrReadOnly, CorpusAccessPermission] serializer_class = CorpusDocumentationPageSerializer - def get_queryset(self): - corpus_name = corpus_name_from_request(self.request) - pages = CorpusDocumentationPage.objects.filter(corpus_configuration__corpus__name=corpus_name) - + @staticmethod + def get_relevant_pages(pages, corpus_name): # only include wordmodels documentation if models are present if Corpus.objects.get(name=corpus_name).has_python_definition: definition = load_corpus_definition(corpus_name) if definition.word_models_present: return pages - return pages.exclude(type=CorpusDocumentationPage.PageType.WORDMODELS) + def get_queryset(self): + corpus_name = corpus_name_from_request(self.request) + pages = CorpusDocumentationPage.objects.filter( + corpus_configuration__corpus__name=corpus_name) + relevant_pages = self.get_relevant_pages(pages, corpus_name) + canonical_order = [e.value for e in CorpusDocumentationPage.PageType] + + return sorted( + relevant_pages, key=lambda p: canonical_order.index(p.type)) + class CorpusImageView(APIView): ''' diff --git a/backend/conftest.py b/backend/conftest.py index 2e76c7939..0e5914319 100644 --- a/backend/conftest.py +++ b/backend/conftest.py @@ -197,16 +197,20 @@ def add_mock_python_corpora_to_db(db, media_dir): @pytest.fixture() -def json_corpus_data(): +def json_corpus_definition(): path = os.path.join(settings.BASE_DIR, 'corpora_test', 'basic', 'mock_corpus.json') with open(path) as f: return json.load(f) @pytest.fixture(autouse=True) -def json_mock_corpus(db, json_corpus_data) -> Corpus: +def json_mock_corpus(db, json_corpus_definition) -> Corpus: # add json mock corpora to the database at the start of each test - serializer = CorpusJSONDefinitionSerializer(data=json_corpus_data) + data = { + 'definition': json_corpus_definition, + 'active': True, + } + serializer = CorpusJSONDefinitionSerializer(data=data) assert serializer.is_valid() corpus = serializer.create(serializer.validated_data) diff --git a/backend/corpora/jewishmigration/jewishmigration.py b/backend/corpora/jewishmigration/jewishmigration.py index 24fa4486e..02fd3c8a3 100644 --- a/backend/corpora/jewishmigration/jewishmigration.py +++ b/backend/corpora/jewishmigration/jewishmigration.py @@ -48,9 +48,9 @@ class JewishMigration(PeacePortal, JSONCorpusDefinition): min_date = datetime(year=1, month=1, day=1) max_date = datetime(year=1800, month=12, day=31) - data_directory = getattr(settings, 'JMIG_DATA') - data_url = getattr(settings, 'JMIG_DATA_URL', - 'localhost:8100/api/records/') + data_directory = settings.JMIG_DATA_DIR + data_filepath = getattr(settings, 'JMIG_DATA', None) + data_url = getattr(settings, 'JMIG_DATA_URL', None) es_index = getattr(settings, 'JMIG_INDEX', 'jewishmigration') image = 'jewish_inscriptions.jpg' @@ -62,12 +62,12 @@ def sources(self, start, end): if self.data_url: response = requests.get(self.data_url) list_of_sources = response.json() - elif self.data_directory: - with open(self.data_directory, 'r') as f: + elif self.data_filepath: + with open(self.data_filepath, 'r') as f: list_of_sources = json.load(f) else: logging.getLogger('indexing').warning( - 'No data directory or URL provided.') + 'No data filepath or URL provided.') for source in list_of_sources: yield source diff --git a/backend/corpora/jewishmigration/test_jewishmigration.py b/backend/corpora/jewishmigration/test_jewishmigration.py index cec55f18a..d1dc7acdd 100644 --- a/backend/corpora/jewishmigration/test_jewishmigration.py +++ b/backend/corpora/jewishmigration/test_jewishmigration.py @@ -136,6 +136,7 @@ def jm_corpus_settings(settings): settings.CORPORA = { 'jewishmigration': os.path.join(here, 'jewishmigration.py') } + settings.JMIG_DATA_DIR = '/corpora' settings.JMIG_DATA = None settings.JMIG_DATA_URL = 'http://www.example.com' settings.JMIG_INDEX = 'test-jewishmigration' diff --git a/backend/corpora/ublad/description/ublad.md b/backend/corpora/ublad/description/ublad.md new file mode 100644 index 000000000..197227486 --- /dev/null +++ b/backend/corpora/ublad/description/ublad.md @@ -0,0 +1,7 @@ +Op 5 september 1969 kreeg de Universiteit Utrecht voor het eerst een onafhankelijk blad: _U utrechtse universitaire reflexen_. Dit blad kwam voort uit een fusie van twee andere tijdschriften: _Sol Iustitiae_ dat voornamelijk gericht was op studenten en _Solaire Reflexen_ dat meer was bedoeld voor medewerkers. U utrechtse universitaire reflexen was bedoeld voor alle geledingen. + +In 1974 veranderde de naam in het _Ublad_. Dat bleef zo tot de universiteit besloot het papieren Ublad digitaal te maken. Onder luid protest verdween het papieren Ublad en ontstond in april 2010 _DUB_, het digitale universiteitsblad. + +Om alle informatie uit het verleden toegankelijk te maken, heeft het Centre for Digital Humanities samen met de Universiteitsbibliotheek de oude jaargangen gedigitaliseerd. In I-analyzer kunt u alle jaargangen van U utrechtse universitaire reflexen en het Ublad vinden en doorzoeken. + +Het onafhankelijke Ublad geeft een kleurrijk verslag van wat er speelde op de universiteit, de stad en het studentenleven door middel van artikelen, foto’s en cartoons. De afbeelding die is gebruikt voor OCR is voor elke pagina bijgevoegd zodat u altijd het originele bronmateriaal kunt raadplegen. diff --git a/backend/corpora/ublad/images/ublad.jpg b/backend/corpora/ublad/images/ublad.jpg new file mode 100644 index 000000000..0f9d3a87e Binary files /dev/null and b/backend/corpora/ublad/images/ublad.jpg differ diff --git a/backend/corpora/ublad/tests/test_ublad.py b/backend/corpora/ublad/tests/test_ublad.py new file mode 100644 index 000000000..fca9cc6c5 --- /dev/null +++ b/backend/corpora/ublad/tests/test_ublad.py @@ -0,0 +1,14 @@ +import locale +import pytest +from corpora.ublad.ublad import transform_date +import datetime + + +def test_transform_date(): + datestring = '6 september 2007' + goal_date = datetime.date(2007, 9, 6) + try: + date = transform_date(datestring) + except locale.Error: + pytest.skip('Dutch Locale not installed in environment') + assert date == str(goal_date) diff --git a/backend/corpora/ublad/ublad.py b/backend/corpora/ublad/ublad.py new file mode 100644 index 000000000..ec340eb9c --- /dev/null +++ b/backend/corpora/ublad/ublad.py @@ -0,0 +1,264 @@ +from datetime import datetime +import os +from os.path import join, splitext +import locale +import logging + +from django.conf import settings +from addcorpus.python_corpora.corpus import HTMLCorpusDefinition, FieldDefinition +from addcorpus.python_corpora.extract import FilterAttribute +from addcorpus.es_mappings import * +from addcorpus.python_corpora.filters import DateFilter +from addcorpus.es_settings import es_settings + + +from ianalyzer_readers.readers.html import HTMLReader +from ianalyzer_readers.readers.core import Field +from ianalyzer_readers.extract import html, Constant + +from bs4 import BeautifulSoup, Tag + +def transform_content(soup): + """ + Transforms the text contents of a page node (soup) into a string consisting + of blocks of text, foregoing the column structure of the OCR'ed material. + """ + page_text = "" + for child in soup.children: + if isinstance(child, Tag) and 'ocr_carea' in child.get('class', []): + paragraph_text = "" + paragraph_list = child.get_text().split('\n') + for item in paragraph_list[1:]: + if not item: + pass + elif item.endswith('-'): + paragraph_text += item.strip('-') + else: + paragraph_text += item + ' ' + if paragraph_text: + page_text += paragraph_text + '\n\n' + return page_text + +def transform_date(date_string): + try: + locale.setlocale(locale.LC_ALL, 'nl_NL.UTF-8') + date = datetime.strptime(date_string, '%d %B %Y').strftime('%Y-%m-%d') + locale.setlocale(locale.LC_ALL, '') + return date + except ValueError: + logger.error("Unable to get date from {}".format(date_string)) + return None + + +logger = logging.getLogger('indexing') + +class UBlad(HTMLCorpusDefinition): + title = 'U-Blad' + description = 'The print editions of the Utrecht University paper from 1969 until 2010.' + description_page = 'ublad.md' + min_date = datetime(year=1969, month=1, day=1) + max_date = datetime(year=2010, month=12, day=31) + + data_directory = settings.UBLAD_DATA + es_index = getattr(settings, 'UBLAD_ES_INDEX', 'ublad') + image = 'ublad.jpg' + scan_image_type = 'image/jpeg' + allow_image_download = True + + document_context = { + 'context_fields': ['volume_id'], + 'sort_field': 'sequence', + 'sort_direction': 'asc', + 'context_display_name': 'volume' + } + + languages = ['nl'] + category = 'periodical' + + @property + def es_settings(self): + return es_settings(self.languages[:1], stopword_analysis=True, stemming_analysis=True) + + def sources(self, start=min_date, end=max_date): + for directory, _, filenames in os.walk(self.data_directory): + _body, tail = os.path.split(directory) + if '.snapshot' in _: + _.remove('.snapshot') + continue + for filename in filenames: + if filename != '.DS_Store': + full_path = join(directory, filename) + yield full_path, {'filename': filename} + + + fields = [ + FieldDefinition( + name = 'content', + display_name='Content', + display_type='text_content', + description='Text content of the page, generated by OCR', + results_overview=True, + csv_core=True, + search_field_core=True, + visualizations=['ngram', 'wordcloud'], + es_mapping = main_content_mapping(True, True, True, 'nl'), + extractor= FilterAttribute(tag='div', + recursive=True, + multiple=False, + flatten=False, + extract_soup_func=transform_content, + attribute_filter={ + 'attribute': 'class', + 'value': 'ocr_page' + }) + ), + FieldDefinition( + name='pagenum', + display_name='Page number', + description='Page number', + csv_core=True, + es_mapping = int_mapping(), + extractor = FilterAttribute(tag='meta', attribute='content', attribute_filter={ + 'attribute': 'name', + 'value': 'pagenum' + } + ) + ), + FieldDefinition( + name='journal_title', + display_name='Publication Title', + description='Title of the publication', + extractor = FilterAttribute(tag='meta', attribute='content', attribute_filter={ + 'attribute': 'name', + 'value': 'journal_title' + } + ) + ), + FieldDefinition( + name='volume_id', + display_name='Volume ID', + description='Unique identifier for this volume', + hidden=True, + es_mapping=keyword_mapping(), + extractor = FilterAttribute(tag='meta', attribute='content', attribute_filter={ + 'attribute': 'name', + 'value': 'identifier_ocn' + } + ) + ), + FieldDefinition( + name='id', + display_name='Page ID', + description='Unique identifier for this page', + hidden=True, + extractor = FilterAttribute(tag='meta', attribute='content', attribute_filter={ + 'attribute': 'name', + 'value': 'identifier_indexid' + } + ) + ), + FieldDefinition( + name='edition', + display_name='Edition', + description='The number of the edition in this volume. Every year starts at 1.', + sortable=True, + es_mapping = keyword_mapping(), + extractor = FilterAttribute(tag='meta', attribute='content', attribute_filter={ + 'attribute': 'name', + 'value': 'aflevering' + } + ) + ), + FieldDefinition( + name='volume', + display_name='Volume', + sortable=True, + results_overview=True, + csv_core=True, + description='The volume number of this publication. There is one volume per year.', + es_mapping=keyword_mapping(), + extractor = FilterAttribute(tag='meta', attribute='content', attribute_filter={ + 'attribute': 'name', + 'value': 'yearstring' + } + ), + ), + FieldDefinition( + name='date', + display_name='Date', + description='The publication date of this edition', + es_mapping={'type': 'date', 'format': 'yyyy-MM-dd'}, + visualizations=['resultscount', 'termfrequency'], + sortable=True, + results_overview=True, + search_filter=DateFilter( + min_date, + max_date, + description=( + 'Accept only articles with publication date in this range.' + ) + ), + extractor = FilterAttribute(tag='meta', attribute='content', attribute_filter={ + 'attribute': 'name', + 'value': 'datestring', + }, + transform=transform_date + ) + ), + FieldDefinition( + name='repo_url', + display_name='Repository URL', + description='URL to the dSPACE repository entry of this volume', + es_mapping=keyword_mapping(), + display_type='url', + searchable=False, + extractor=FilterAttribute(tag='meta', attribute='content', attribute_filter={ + 'attribute': 'name', + 'value': 'link_repository' + } + ) + ), + FieldDefinition( + name='reader_url', + display_name='Reader URL', + description='URL to the UB reader view of this page', + es_mapping=keyword_mapping(), + display_type='url', + searchable=False, + extractor=FilterAttribute(tag='meta', attribute='content', attribute_filter={ + 'attribute': 'name', + 'value': 'link_objects_image' + } + ) + ), + FieldDefinition( + name='jpg_url', + display_name='Image URL', + description='URL to the jpg file of this page', + es_mapping=keyword_mapping(), + display_type='url', + searchable=False, + extractor=FilterAttribute(tag='meta', attribute='content', attribute_filter={ + 'attribute': 'name', + 'value': 'link_objects_jpg' + } + ) + ), + FieldDefinition( + name='worldcat_url', + display_name='Worldcat URL', + description='URL to the Worldcat entry of this volume', + es_mapping=keyword_mapping(), + display_type='url', + searchable=False, + extractor=FilterAttribute(tag='meta', attribute='content', attribute_filter={ + 'attribute': 'name', + 'value': 'link_worldcat' + } + ) + ) + ] + + def request_media(self, document, corpus_name): + image_list = [document['fieldValues']['jpg_url']] + return {'media': image_list} diff --git a/backend/corpora/utils_test.py b/backend/corpora/utils_test.py index 8dd27e233..ccc671df5 100644 --- a/backend/corpora/utils_test.py +++ b/backend/corpora/utils_test.py @@ -1,4 +1,5 @@ from addcorpus.python_corpora.save_corpus import load_and_save_all_corpora +from addcorpus.models import Corpus def corpus_from_api(client): ''' @@ -11,6 +12,7 @@ def corpus_from_api(client): useful when you have configured your settings with only one corpus. ''' + Corpus.objects.all().delete() load_and_save_all_corpora() response = client.get('/api/corpus/') diff --git a/backend/corpora_test/basic/license/license.md b/backend/corpora_test/basic/license/license.md new file mode 100644 index 000000000..4fcb91a18 --- /dev/null +++ b/backend/corpora_test/basic/license/license.md @@ -0,0 +1 @@ +Do whatever you please. diff --git a/backend/corpora_test/basic/mock_csv_corpus.py b/backend/corpora_test/basic/mock_csv_corpus.py index 84711f8a0..3b3d38360 100644 --- a/backend/corpora_test/basic/mock_csv_corpus.py +++ b/backend/corpora_test/basic/mock_csv_corpus.py @@ -25,6 +25,8 @@ class MockCSVCorpus(CSVCorpusDefinition): max_date = datetime.datetime(year=2022, month=12, day=31) data_directory = os.path.join(here, 'source_data') citation_page = 'citation.md' + license_page = 'license.md' + description_page = 'mock-csv-corpus.md' languages = ['en'] category = 'book' diff --git a/backend/corpora_test/media/media_mock_corpus.py b/backend/corpora_test/media/media_mock_corpus.py index e965ca660..6bd7be485 100644 --- a/backend/corpora_test/media/media_mock_corpus.py +++ b/backend/corpora_test/media/media_mock_corpus.py @@ -13,6 +13,8 @@ class MediaMockCorpus(MockCSVCorpus): data_directory = os.path.join(here, 'source_data') scan_image_type = 'image/png' citation_page = None + license_page = None + description_page = None def request_media(self, document, corpus_name): field_values = document['fieldValues'] diff --git a/backend/download/tasks.py b/backend/download/tasks.py index 875f4d5bc..ccc559065 100644 --- a/backend/download/tasks.py +++ b/backend/download/tasks.py @@ -44,19 +44,12 @@ def make_download(request_json, download_id, download_size=None): es_query = api_query_to_es_query(request_json, corpus_name) results, _total = es_download.scroll( corpus_name, es_query, download_size) + filepath = create_csv.search_results_csv( - results, request_json['fields'], query, download_id) + results, request_json['fields'], query.get_query_text(es_query), download_id) return filepath -def create_query(request_json): - """ - format the route of the search into a query string - """ - route = request_json.get('route') - return re.sub(r';|%\d+', '_', re.sub(r'\$', '', route.split('/')[2])) - - def try_download(tasks_func, download): ''' Try initialising a task chain for a download. Marks the download diff --git a/backend/download/tests/test_download_views.py b/backend/download/tests/test_download_views.py index ef1c6411a..7cbbd981c 100644 --- a/backend/download/tests/test_download_views.py +++ b/backend/download/tests/test_download_views.py @@ -246,3 +246,22 @@ def test_unauthenticated_download(db, client, basic_mock_corpus, basic_corpus_pu download_objects = Download.objects.all() assert download_objects.count() == 1 assert download_objects.first().user == None + +def test_query_text_in_csv(db, client, basic_mock_corpus, basic_corpus_public, index_basic_mock_corpus): + es_query = query.set_query_text(mock_match_all_query(), 'ghost') + download_request_json = { + 'corpus': basic_mock_corpus, + 'es_query': es_query, + 'fields': ['character', 'line'], + 'route': f"/search/{basic_mock_corpus}", + 'encoding': 'utf-8' + } + response = client.post('/api/download/search_results', + download_request_json, + content_type='application/json' + ) + assert status.is_success(response.status_code) + stream = read_file_response(response, 'utf-8') + reader = csv.DictReader(stream, delimiter=';') + row = next(reader) + assert row['query'] == 'ghost' diff --git a/backend/es/conftest.py b/backend/es/conftest.py index c9272b917..40d462e05 100644 --- a/backend/es/conftest.py +++ b/backend/es/conftest.py @@ -1,4 +1,6 @@ import pytest +from time import sleep + from django.contrib.auth.models import Group from addcorpus.python_corpora.load_corpus import load_corpus_definition @@ -17,6 +19,27 @@ def corpus_definition(mock_corpus): yield corpus +@pytest.fixture() +def es_ner_search_client(es_client, basic_mock_corpus, basic_corpus_public, index_basic_mock_corpus): + """ + Create and populate an index for the mock corpus in elasticsearch. + Returns an elastic search client for the mock corpus. + """ + # add data from mock corpus + corpus = Corpus.objects.get(name=basic_mock_corpus) + es_client.indices.put_mapping(index=corpus.configuration.es_index, properties={ + "content:ner": {"type": "annotated_text"}}) + + es_client.index(index=corpus.configuration.es_index, document={ + 'id': 'my_identifier', + 'content': 'Guybrush Threepwood is looking for treasure on Monkey Island', + 'content:ner': '[Guybrush Threepwood](PER) is looking for treasure on [Monkey Island](LOC)'}) + + # ES is "near real time", so give it a second before we start searching the index + sleep(1) + yield es_client + + @pytest.fixture() def es_index_client(es_client, mock_corpus): """ diff --git a/backend/es/es_index.py b/backend/es/es_index.py index 23947c377..d89fad177 100644 --- a/backend/es/es_index.py +++ b/backend/es/es_index.py @@ -130,6 +130,7 @@ def populate(client: Elasticsearch, corpus: Corpus, start=None, end=None): corpus_server = settings.SERVERS[ settings.CORPUS_SERVER_NAMES.get(corpus_name, 'default')] + # Do bulk operation for success, info in es_helpers.streaming_bulk( client, diff --git a/backend/es/tests/test_named_entity_search.py b/backend/es/tests/test_named_entity_search.py new file mode 100644 index 000000000..1335c63b8 --- /dev/null +++ b/backend/es/tests/test_named_entity_search.py @@ -0,0 +1,48 @@ +from es.views import NamedEntitySearchView + + +def test_ner_search_view(es_ner_search_client, client): + route = '/api/es/mock-csv-corpus/my_identifier/named_entities' + response = client.get(route, content_type='application/json') + assert response.status_code == 200 + + +def test_construct_ner_query(): + viewset = NamedEntitySearchView() + fields = ['content:ner'] + query = viewset.construct_named_entity_query(fields, 'my_identifier') + expected = { + "bool": { + "must": [ + { + "term": { + "id": "my_identifier" + } + }, + { + "terms": { + "content:ner": ["LOC", "PER", "ORG", "MISC"] + } + } + ] + } + } + assert query == expected + + +def test_find_named_entity_fields(es_ner_search_client): + viewset = NamedEntitySearchView() + fields = viewset.find_named_entity_fields( + es_ner_search_client, 'test-basic-corpus') + assert len(fields) == 1 + assert fields[0] == 'content:ner' + + +def test_find_entities(): + viewset = NamedEntitySearchView() + text = '[Guybrush Threepwood](PER) is looking for treasure on [Monkey Island](LOC)' + output = viewset.find_entities(text) + expected = [{'entity': 'person', 'text': 'Guybrush Threepwood'}, + {'entity': 'flat', 'text': ' is looking for treasure on '}, + {'entity': 'location', 'text': 'Monkey Island'}] + assert output == expected diff --git a/backend/es/urls.py b/backend/es/urls.py index 62c389eae..f7ab60884 100644 --- a/backend/es/urls.py +++ b/backend/es/urls.py @@ -1,6 +1,8 @@ from django.urls import path -from es.views import * +from es.views import ForwardSearchView, NamedEntitySearchView urlpatterns = [ path('/_search', ForwardSearchView.as_view()), + path('//named_entities', + NamedEntitySearchView.as_view()) ] diff --git a/backend/es/views.py b/backend/es/views.py index ab7cadc2e..55e36ed7c 100644 --- a/backend/es/views.py +++ b/backend/es/views.py @@ -1,17 +1,19 @@ +import logging +import re + from django.utils import timezone from rest_framework.views import APIView from rest_framework.response import Response -from ianalyzer.elasticsearch import elasticsearch -from es.search import get_index, total_hits, hits -import logging -from rest_framework.permissions import IsAuthenticated from rest_framework.exceptions import APIException + from addcorpus.permissions import CorpusAccessPermission -from tag.permissions import CanSearchTags from api.save_query import should_save_query from addcorpus.models import Corpus from api.models import Query from api.api_query import api_query_to_es_query +from es.search import get_index, total_hits, hits +from ianalyzer.elasticsearch import elasticsearch +from tag.permissions import CanSearchTags logger = logging.getLogger(__name__) @@ -98,3 +100,80 @@ def _save_query_done(self, query, results): query.total_results = total_hits(results) query.transferred = len(hits(results)) query.save() + + +class NamedEntitySearchView(APIView): + ''' Construct a terms query for named entities, combined with a term query of the id + Perform search via Elasticsearch and reformat the output + ''' + entity_dict = { + 'PER': 'person', + 'LOC': 'location', + 'ORG': 'organization', + 'MISC': 'miscellaneous' + } + + permission_classes = [CorpusAccessPermission] + + def get(self, request, *args, **kwargs): + corpus_name = kwargs.get('corpus') + document_id = kwargs.get('id') + client = elasticsearch(corpus_name) + index = get_index(corpus_name) + fields = self.find_named_entity_fields(client, index) + query = self.construct_named_entity_query(fields, document_id) + response = client.search(index=index, query=query, fields=fields) + results = hits(response) + annotations = {} + response = {} + if len(results): + source = results[0]['_source'] + for field in fields: + text_with_entities = source.get(field) + annotations.update({field.replace(':ner', ''): self.find_entities( + text_with_entities)}) + return Response(annotations) + + def find_named_entity_fields(self, client, index: str) -> list[str]: + mapping = client.indices.get_mapping(index=index) + fields = mapping[index]['mappings']['properties'] + field_names = fields.keys() + return [name for name in field_names if name.endswith(':ner') and fields[name].get('type') == 'annotated_text'] + + def construct_named_entity_query(self, fields: list[str], document_id: str) -> dict: + return { + "bool": { + "must": [ + { + "term": { + "id": document_id + } + }, *self.add_terms(fields) + ] + } + } + + def add_terms(self, fields: list[str]) -> list[dict]: + return [ + { + "terms": { + field: ["LOC", "PER", "ORG", "MISC"] + } + } for field in fields + ] + + def find_entities(self, input_text: str) -> str: + # regex pattern to match annotations of format "[Wally](Person)" and split it into two groups + pattern = re.compile('(\[[^]]+\])(\([A-Z]+\))') + annotations = pattern.split(input_text) + output = [] + for index, annotation in enumerate(annotations): + if annotation.startswith('('): + continue + elif annotation.startswith('['): + output.append( + {'entity': self.entity_dict.get(annotations[index+1][1:-1]), 'text': annotation[1:-1]}) + else: + if annotation: + output.append({'entity': 'flat', 'text': annotation}) + return output diff --git a/backend/ianalyzer/settings.py b/backend/ianalyzer/settings.py index fae30ca3e..214ad7623 100644 --- a/backend/ianalyzer/settings.py +++ b/backend/ianalyzer/settings.py @@ -76,6 +76,8 @@ CORPUS_SERVER_NAMES = {} +CORPORA_LOCALES = {} + CORPORA = {} WORDCLOUD_LIMIT = 1000 diff --git a/backend/ianalyzer/settings_test.py b/backend/ianalyzer/settings_test.py index a3f013c2b..a9974627e 100644 --- a/backend/ianalyzer/settings_test.py +++ b/backend/ianalyzer/settings_test.py @@ -17,4 +17,6 @@ def test_corpus_path(*path): TIMES_DATA = os.path.join(BASE_DIR, 'addcorpus', 'python_corpora', 'tests') TIMES_ES_INDEX = 'times-test' +UBLAD_DATA = '' # necessary to make ublad test not fail + SERVERS['default']['index_prefix'] = 'test' diff --git a/backend/media/views.py b/backend/media/views.py index 25fa5f910..2ee55d162 100644 --- a/backend/media/views.py +++ b/backend/media/views.py @@ -19,7 +19,7 @@ class GetMediaView(APIView): Return the image/pdf of a document ''' - permission_classes = [IsAuthenticated, CorpusAccessPermission] + permission_classes = (CorpusAccessPermission,) def get(self, request, *args, **kwargs): corpus_name = corpus_name_from_request(request) @@ -60,7 +60,7 @@ class MediaMetadataView(APIView): Return metadata on the media for a document ''' - permission_classes = [IsAuthenticated, CorpusAccessPermission] + permission_classes = (CorpusAccessPermission,) def post(self, request, *args, **kwargs): corpus_name = corpus_name_from_request(request) diff --git a/backend/wordmodels/views.py b/backend/wordmodels/views.py index 81b2de6a1..1c4d28ac3 100644 --- a/backend/wordmodels/views.py +++ b/backend/wordmodels/views.py @@ -1,7 +1,5 @@ -from django.shortcuts import render from rest_framework.views import APIView from rest_framework.response import Response -from rest_framework.permissions import IsAuthenticated from addcorpus.permissions import CorpusAccessPermission, corpus_name_from_request from wordmodels import utils, visualisations from rest_framework.exceptions import APIException @@ -11,7 +9,7 @@ class RelatedWordsView(APIView): Get words with the highest similarity to the query term ''' - permission_classes = [IsAuthenticated, CorpusAccessPermission] + permission_classes = [CorpusAccessPermission] def post(self, request, *args, **kwargs): corpus = corpus_name_from_request(request) @@ -35,7 +33,7 @@ class SimilarityView(APIView): Get similarity between two query terms ''' - permission_classes = [IsAuthenticated, CorpusAccessPermission] + permission_classes = [CorpusAccessPermission] def get(self, request, *args, **kwargs): corpus = corpus_name_from_request(request) @@ -55,7 +53,7 @@ class WordInModelView(APIView): Check if a word has a vector in the model for a corpus ''' - permission_classes = [IsAuthenticated, CorpusAccessPermission] + permission_classes = [CorpusAccessPermission] def get(self, request, *args, **kwargs): corpus = corpus_name_from_request(request) diff --git a/documentation/First-time-setup.md b/documentation/First-time-setup.md index f58aee3c8..974a00256 100644 --- a/documentation/First-time-setup.md +++ b/documentation/First-time-setup.md @@ -73,11 +73,12 @@ The source files of a corpus are not included in this directory; ask another dev Note: database-only corpora are still in development and not yet recommended for first-time users. -To add a database-only corpus, you will need a JSON definition of the corpus, and a directory with (a sample of) the pre-processed source data. To retrieve a JSON definition from a running I-analyzer server, visit `/api/corpus/edit/` and copy the JSON of the corpus you want to import. +To add a database-only corpus, you will need a JSON definition of the corpus, and a directory with (a sample of) the pre-processed source data. To retrieve a JSON definition from a running I-analyzer server, log in as a staff user and visit `/corpus-definitions/`. Open the corpus you want to import and click "Download JSON". -1. Start up your I-analyzer server. Make a POST request to `localhost:8000/api/corpus/edit/` (you can use the browsable API for this) to import the JSON definition. -2. Visit the admin menu (`localhost:8000/admin`). Go to "corpus configurations" and select your corpus. In the "data directory" field, add the path to your source data directory. -3. Activate your python virutal environment. Then create an ElasticSearch index from the source files by running, e.g., `yarn django index dutchannualreports`, for indexing the Dutch Annual Reports corpus in a development environment. See [Indexing](documentation/Indexing-corpora.md) for more information. +1. Start up your I-analyzer server and log in as a staff user. Go to `localhost:4200/corpus-definitions/new`. Upload the JSON definition file and save. +2. Visit the admin menu (`localhost:4200/admin`). Go to "corpus configurations" and select your corpus. In the "data directory" field, add the path to your source data directory. +3. Activate your python virtual environment. Create an ElasticSearch index from the source files by running `yarn django index {corpusname}`. See [Indexing](documentation/Indexing-corpora.md) for more information. +4. Visit the admin menu again. Go to "corpora" and select te corpus. Set "active" to true and save. ## Running a dev environment diff --git a/documentation/Writing-a-corpus-definition-in-JSON.md b/documentation/Writing-a-corpus-definition-in-JSON.md index 9f93a6a1d..31cc42312 100644 --- a/documentation/Writing-a-corpus-definition-in-JSON.md +++ b/documentation/Writing-a-corpus-definition-in-JSON.md @@ -6,7 +6,7 @@ The format is defined in [corpus.schema.json](/backend/addcorpus/schemas/corpus. ## Importing and exporting definitions -Currently, importing and exporting JSON definitions is only supported through the backend API. +You can import and export JSON definitions through the frontend. Visit `/corpus-definitions/` to do so. Some notes on importing and exporting JSON definitions: diff --git a/frontend/src/_utilities.scss b/frontend/src/_utilities.scss index d11b74d18..e1f728cf0 100644 --- a/frontend/src/_utilities.scss +++ b/frontend/src/_utilities.scss @@ -30,6 +30,11 @@ $section-padding: 3rem 1.5rem; $boxShadow: 0 2px 3px rgba(10, 10, 10, 0.1), 0 0 0 1px rgba(10, 10, 10, 0.1); $boxShadowHover: 0px 5px 3px rgba(10, 10, 10, 0.1), 0 0 0 1px $primary; +$entity-person: #303F9F; +$entity-location: #4e8f2d; +$entity-organization: #efb71d; +$entity-miscellaneous: #ee5986; + @import "bulma/sass/utilities/_all"; // based on the Bulma loader diff --git a/frontend/src/app/app.module.ts b/frontend/src/app/app.module.ts index af5cd851f..20c8c3e7c 100644 --- a/frontend/src/app/app.module.ts +++ b/frontend/src/app/app.module.ts @@ -17,6 +17,10 @@ import { import { AboutComponent } from './about/about.component'; import { AppComponent } from './app.component'; +import { CorpusDefinitionsModule } from './corpus-definitions/corpus-definitions.module'; +import { CreateDefinitionComponent } from './corpus-definitions/create-definition/create-definition.component'; +import { DefinitionsOverviewComponent } from './corpus-definitions/definitions-overview/definitions-overview.component'; +import { EditDefinitionComponent } from './corpus-definitions/edit-definition/edit-definition.component'; import { CorpusModule } from './corpus-header/corpus.module'; import { CorpusInfoComponent } from './corpus-info/corpus-info.component'; import { CorpusSelectionModule } from './corpus-selection/corpus-selection.module'; @@ -131,6 +135,24 @@ export const appRoutes: Routes = [ component: TagOverviewComponent, canActivate: [LoggedOnGuard], }, + { + path: 'corpus-definitions', + canActivate: [LoggedOnGuard], + children: [ + { + path: 'new', + component: CreateDefinitionComponent, + }, + { + path: 'edit/:corpusID', + component: EditDefinitionComponent, + }, + { + path: '', + component: DefinitionsOverviewComponent, + }, + ] + }, { path: '', redirectTo: 'home', @@ -156,6 +178,7 @@ export const imports: any[] = [ SharedModule, // Feature Modules CorpusModule, + CorpusDefinitionsModule, CorpusSelectionModule, DialogModule, DocumentModule, diff --git a/frontend/src/app/common-test-bed.ts b/frontend/src/app/common-test-bed.ts index 25de1b887..888762621 100644 --- a/frontend/src/app/common-test-bed.ts +++ b/frontend/src/app/common-test-bed.ts @@ -2,6 +2,7 @@ import { TestBed } from '@angular/core/testing'; import { ElementRef } from '@angular/core'; import { RouterTestingModule } from '@angular/router/testing'; import { HttpClientModule } from '@angular/common/http'; +import {FontAwesomeTestingModule} from '@fortawesome/angular-fontawesome/testing'; import { appRoutes, declarations, imports, providers } from './app.module'; @@ -10,9 +11,12 @@ import { AuthServiceMock } from '../mock-data/auth'; import { CorpusServiceMock } from '../mock-data/corpus'; import { DialogServiceMock } from '../mock-data/dialog'; import { ElasticSearchServiceMock } from '../mock-data/elastic-search'; +import { EntityServiceMock } from '../mock-data/entity'; import { MockCorpusResponse } from '../mock-data/corpus-response'; import { SearchServiceMock } from '../mock-data/search'; -import { ApiService, AuthService, CorpusService, DialogService, ElasticSearchService, SearchService } from './services'; +import { ApiService, AuthService, CorpusService, DialogService, SearchService } from './services'; +import { ElasticSearchService } from './services/elastic-search.service'; +import { EntityService } from './services/entity.service'; import { WordmodelsService } from './services/wordmodels.service'; import { WordmodelsServiceMock } from '../mock-data/wordmodels'; import { VisualizationService } from './services/visualization.service'; @@ -25,6 +29,7 @@ import { SimpleStore } from './store/simple-store'; export const commonTestBed = () => { const filteredImports = imports.filter(value => !(value in [HttpClientModule])); filteredImports.push(RouterTestingModule.withRoutes(appRoutes)); + filteredImports.push(FontAwesomeTestingModule) const filteredProviders = providers.filter(provider => !( provider in [ApiService, CorpusService, DialogService, ElasticSearchService, SearchService])); filteredProviders.push( @@ -49,6 +54,10 @@ export const commonTestBed = () => { provide: ElasticSearchService, useValue: new ElasticSearchServiceMock(), }, + { + provide: EntityService, + useValue: new EntityServiceMock(), + }, { provide: ElementRef, useClass: MockElementRef, diff --git a/frontend/src/app/corpus-definitions/corpus-definitions.module.ts b/frontend/src/app/corpus-definitions/corpus-definitions.module.ts new file mode 100644 index 000000000..9bfd7a334 --- /dev/null +++ b/frontend/src/app/corpus-definitions/corpus-definitions.module.ts @@ -0,0 +1,26 @@ +import { NgModule } from '@angular/core'; +import { DefinitionsOverviewComponent } from './definitions-overview/definitions-overview.component'; +import { SharedModule } from '../shared/shared.module'; +import { CreateDefinitionComponent } from './create-definition/create-definition.component'; +import { EditDefinitionComponent } from './edit-definition/edit-definition.component'; +import { DefinitionJsonUploadComponent } from './definition-json-upload/definition-json-upload.component'; + + + +@NgModule({ + declarations: [ + CreateDefinitionComponent, + DefinitionsOverviewComponent, + EditDefinitionComponent, + DefinitionJsonUploadComponent, + ], + exports: [ + CreateDefinitionComponent, + DefinitionsOverviewComponent, + EditDefinitionComponent, + ], + imports: [ + SharedModule + ] +}) +export class CorpusDefinitionsModule { } diff --git a/frontend/src/app/corpus-definitions/create-definition/create-definition.component.html b/frontend/src/app/corpus-definitions/create-definition/create-definition.component.html new file mode 100644 index 000000000..8894fa7c6 --- /dev/null +++ b/frontend/src/app/corpus-definitions/create-definition/create-definition.component.html @@ -0,0 +1,42 @@ +
+
+ + +

New corpus

+ +

+ Upload a JSON definition file to add it as a corpus. +

+ +
+
+ + +
+ +
+ +
+ +
+
+ Could not save corpus: {{error.message}} +
+
+
+
+
diff --git a/frontend/src/app/corpus-definitions/create-definition/create-definition.component.scss b/frontend/src/app/corpus-definitions/create-definition/create-definition.component.scss new file mode 100644 index 000000000..e69de29bb diff --git a/frontend/src/app/corpus-definitions/create-definition/create-definition.component.spec.ts b/frontend/src/app/corpus-definitions/create-definition/create-definition.component.spec.ts new file mode 100644 index 000000000..77a9c51c8 --- /dev/null +++ b/frontend/src/app/corpus-definitions/create-definition/create-definition.component.spec.ts @@ -0,0 +1,23 @@ +import { ComponentFixture, TestBed, waitForAsync } from '@angular/core/testing'; + +import { CreateDefinitionComponent } from './create-definition.component'; +import { commonTestBed } from '../../common-test-bed'; + +describe('CreateDefinitionComponent', () => { + let component: CreateDefinitionComponent; + let fixture: ComponentFixture; + + beforeEach(waitForAsync(() => { + commonTestBed().testingModule.compileComponents(); + })); + + beforeEach(() => { + fixture = TestBed.createComponent(CreateDefinitionComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/frontend/src/app/corpus-definitions/create-definition/create-definition.component.ts b/frontend/src/app/corpus-definitions/create-definition/create-definition.component.ts new file mode 100644 index 000000000..9d7e16469 --- /dev/null +++ b/frontend/src/app/corpus-definitions/create-definition/create-definition.component.ts @@ -0,0 +1,48 @@ +import { Component } from '@angular/core'; +import { actionIcons, formIcons } from '../../shared/icons'; +import { ApiService } from '../../services'; +import { APIEditableCorpus, CorpusDefinition } from '../../models/corpus-definition'; +import * as _ from 'lodash'; +import { Router } from '@angular/router'; +import { HttpErrorResponse } from '@angular/common/http'; +import { Subject } from 'rxjs'; + +@Component({ + selector: 'ia-create-definition', + templateUrl: './create-definition.component.html', + styleUrls: ['./create-definition.component.scss'], +}) +export class CreateDefinitionComponent { + actionIcons = actionIcons; + formIcons = formIcons; + + corpus: CorpusDefinition; + + error: Error; + + reset$ = new Subject(); + + constructor(private apiService: ApiService, private router: Router) { + this.corpus = new CorpusDefinition(apiService); + } + + onJSONUpload(data: any) { + this.corpus.setFromDefinition(data); + } + + submit() { + this.error = undefined; + this.corpus.save().subscribe( + (result: APIEditableCorpus) => { + this.router.navigate([ + '/corpus-definitions', + 'edit', + result.id, + ]); + }, + (err: HttpErrorResponse) => { + this.error = err; + } + ); + } +} diff --git a/frontend/src/app/corpus-definitions/definition-json-upload/definition-json-upload.component.html b/frontend/src/app/corpus-definitions/definition-json-upload/definition-json-upload.component.html new file mode 100644 index 000000000..b41f5b177 --- /dev/null +++ b/frontend/src/app/corpus-definitions/definition-json-upload/definition-json-upload.component.html @@ -0,0 +1,25 @@ +
+ +
+ +
+
+ Invalid file +
+
+ An error occurred when reading the file: {{error.message}} +
+
diff --git a/frontend/src/app/corpus-definitions/definition-json-upload/definition-json-upload.component.scss b/frontend/src/app/corpus-definitions/definition-json-upload/definition-json-upload.component.scss new file mode 100644 index 000000000..e69de29bb diff --git a/frontend/src/app/corpus-definitions/definition-json-upload/definition-json-upload.component.spec.ts b/frontend/src/app/corpus-definitions/definition-json-upload/definition-json-upload.component.spec.ts new file mode 100644 index 000000000..c837cb18c --- /dev/null +++ b/frontend/src/app/corpus-definitions/definition-json-upload/definition-json-upload.component.spec.ts @@ -0,0 +1,25 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { DefinitionJsonUploadComponent } from './definition-json-upload.component'; + +describe('DefinitionJsonUploadComponent', () => { + let component: DefinitionJsonUploadComponent; + let fixture: ComponentFixture; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [ DefinitionJsonUploadComponent ] + }) + .compileComponents(); + }); + + beforeEach(() => { + fixture = TestBed.createComponent(DefinitionJsonUploadComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/frontend/src/app/corpus-definitions/definition-json-upload/definition-json-upload.component.ts b/frontend/src/app/corpus-definitions/definition-json-upload/definition-json-upload.component.ts new file mode 100644 index 000000000..4e91bf606 --- /dev/null +++ b/frontend/src/app/corpus-definitions/definition-json-upload/definition-json-upload.component.ts @@ -0,0 +1,65 @@ +import { Component, Input, OnChanges, OnDestroy, Output } from '@angular/core'; +import * as _ from 'lodash'; +import { BehaviorSubject, Observable, Subject, from, of } from 'rxjs'; +import { catchError, filter, switchMap, takeUntil, tap } from 'rxjs/operators'; +import { actionIcons } from '../../shared/icons'; + +@Component({ + selector: 'ia-definition-json-upload', + templateUrl: './definition-json-upload.component.html', + styleUrls: ['./definition-json-upload.component.scss'] +}) +export class DefinitionJsonUploadComponent implements OnChanges, OnDestroy { + @Input() reset: Observable; + @Output() upload = new Subject(); + + actionIcons = actionIcons; + + file$: BehaviorSubject = new BehaviorSubject(undefined); + data$: Observable; + error$ = new Subject(); + + private inputChange$ = new Subject(); + private destroy$ = new Subject(); + + constructor() { + this.data$ = this.file$.pipe( + takeUntil(this.destroy$), + tap(() => this.error$.next(undefined)), + filter(_.negate(_.isUndefined)), + switchMap(file => + from( + file.text().then(text => JSON.parse(text)) + ).pipe(catchError((err: Error) => { + this.error$.next(err); + console.log(err.message); + return of(undefined); + })) + ), + ); + + this.data$.subscribe(data => { + this.upload.next(data); + }); + } + + ngOnChanges() { + this.inputChange$.next(); + this.reset?.pipe( + takeUntil(this.inputChange$), + takeUntil(this.destroy$), + ).subscribe(() => this.file$.next(undefined)); + } + + ngOnDestroy(): void { + this.destroy$.next(); + this.inputChange$.complete(); + this.destroy$.complete(); + } + + onJSONUpload(event: InputEvent) { + const files: File[] = event.target['files']; + const file = files ? _.first(files) : undefined; + this.file$.next(file); + } +} diff --git a/frontend/src/app/corpus-definitions/definitions-overview/definitions-overview.component.html b/frontend/src/app/corpus-definitions/definitions-overview/definitions-overview.component.html new file mode 100644 index 000000000..cd1aad704 --- /dev/null +++ b/frontend/src/app/corpus-definitions/definitions-overview/definitions-overview.component.html @@ -0,0 +1,60 @@ +
+
+ + +

Corpus definitions

+ + + +
+ + + + + + + + + + + + + +
CorpusActions
{{corpus.definition.meta.title}} + + + + + +   + + + + + +   + +
+
+
+
diff --git a/frontend/src/app/corpus-definitions/definitions-overview/definitions-overview.component.scss b/frontend/src/app/corpus-definitions/definitions-overview/definitions-overview.component.scss new file mode 100644 index 000000000..e69de29bb diff --git a/frontend/src/app/corpus-definitions/definitions-overview/definitions-overview.component.spec.ts b/frontend/src/app/corpus-definitions/definitions-overview/definitions-overview.component.spec.ts new file mode 100644 index 000000000..3893a65e7 --- /dev/null +++ b/frontend/src/app/corpus-definitions/definitions-overview/definitions-overview.component.spec.ts @@ -0,0 +1,23 @@ +import { ComponentFixture, TestBed, waitForAsync } from '@angular/core/testing'; + +import { DefinitionsOverviewComponent } from './definitions-overview.component'; +import { commonTestBed } from '../../common-test-bed'; + +describe('DefinitionsOverviewComponent', () => { + let component: DefinitionsOverviewComponent; + let fixture: ComponentFixture; + + beforeEach(waitForAsync(() => { + commonTestBed().testingModule.compileComponents(); + })); + + beforeEach(() => { + fixture = TestBed.createComponent(DefinitionsOverviewComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/frontend/src/app/corpus-definitions/definitions-overview/definitions-overview.component.ts b/frontend/src/app/corpus-definitions/definitions-overview/definitions-overview.component.ts new file mode 100644 index 000000000..399950560 --- /dev/null +++ b/frontend/src/app/corpus-definitions/definitions-overview/definitions-overview.component.ts @@ -0,0 +1,28 @@ +import { Component } from '@angular/core'; +import { actionIcons } from '../../shared/icons'; +import { ApiService} from '../../services'; +import { Observable } from 'rxjs'; +import { APIEditableCorpus } from '../../models/corpus-definition'; +import * as _ from 'lodash'; + +@Component({ + selector: 'ia-definitions-overview', + templateUrl: './definitions-overview.component.html', + styleUrls: ['./definitions-overview.component.scss'] +}) +export class DefinitionsOverviewComponent { + actionIcons = actionIcons; + + corpora$: Observable; + + constructor(private apiService: ApiService) { + this.corpora$ = this.apiService.corpusDefinitions(); + } + + delete(corpus: APIEditableCorpus) { + this.apiService.deleteCorpus(corpus.id).subscribe(response => { + console.log(response); + this.corpora$ = this.apiService.corpusDefinitions(); + }); + } +} diff --git a/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.html b/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.html new file mode 100644 index 000000000..601cff15f --- /dev/null +++ b/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.html @@ -0,0 +1,65 @@ +
+
+ + +

Edit corpus "{{corpus.definition?.meta.title || '...'}}"

+ +

+ You can download this corpus definition as a JSON file. +

+ +
+ +
+ +

+ You can update this corpus by uploading a new JSON definition for it. +

+ +
+
+ + +
+ +
+ +   + +
+ +
+
+ Could not save corpus: {{error.message}} +
+
+
+
+
diff --git a/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.scss b/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.scss new file mode 100644 index 000000000..e69de29bb diff --git a/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.spec.ts b/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.spec.ts new file mode 100644 index 000000000..42a4f7838 --- /dev/null +++ b/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.spec.ts @@ -0,0 +1,23 @@ +import { ComponentFixture, TestBed, waitForAsync } from '@angular/core/testing'; + +import { EditDefinitionComponent } from './edit-definition.component'; +import { commonTestBed } from '../../common-test-bed'; + +describe('EditDefinitionComponent', () => { + let component: EditDefinitionComponent; + let fixture: ComponentFixture; + + beforeEach(waitForAsync(() => { + commonTestBed().testingModule.compileComponents(); + })); + + beforeEach(() => { + fixture = TestBed.createComponent(EditDefinitionComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.ts b/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.ts new file mode 100644 index 000000000..262b26286 --- /dev/null +++ b/frontend/src/app/corpus-definitions/edit-definition/edit-definition.component.ts @@ -0,0 +1,56 @@ +import { Component, } from '@angular/core'; +import { actionIcons, formIcons } from '../../shared/icons'; +import { Subject } from 'rxjs'; +import { CorpusDefinition } from '../../models/corpus-definition'; +import { ApiService } from '../../services'; +import { ActivatedRoute } from '@angular/router'; +import * as _ from 'lodash'; +import { HttpErrorResponse } from '@angular/common/http'; + +@Component({ + selector: 'ia-edit-definition', + templateUrl: './edit-definition.component.html', + styleUrls: ['./edit-definition.component.scss'], +}) +export class EditDefinitionComponent { + actionIcons = actionIcons; + formIcons = formIcons; + + reset$: Subject = new Subject(); + + corpus: CorpusDefinition; + + error: Error; + + constructor( + private apiService: ApiService, + private route: ActivatedRoute, + + ) { + const id = parseInt(this.route.snapshot.params['corpusID'], 10); + this.corpus = new CorpusDefinition(this.apiService, id); + } + + downloadJSON() { + const data = this.corpus.toDefinition(); + const content = JSON.stringify(data, undefined, 4); + const blob = new Blob([content], { type: `application/json;charset=utf-8`, endings: 'native' }); + const filename = data.name + '.json'; + saveAs(blob, filename); + } + + onJSONUpload(data: any) { + this.corpus.setFromDefinition(data); + } + + submit() { + this.corpus.save().subscribe( + () => this.reset(), + (err: HttpErrorResponse) => this.error = err, + ); + } + + reset() { + this.reset$.next(); + } +} diff --git a/frontend/src/app/corpus-selection/corpus-selection.component.html b/frontend/src/app/corpus-selection/corpus-selection.component.html index cc90ff34a..a4c4fd1c2 100644 --- a/frontend/src/app/corpus-selection/corpus-selection.component.html +++ b/frontend/src/app/corpus-selection/corpus-selection.component.html @@ -1,11 +1,20 @@
+ + + Corpus definitions + +

Welcome

Select a corpus to search through

+
diff --git a/frontend/src/app/corpus-selection/corpus-selection.component.ts b/frontend/src/app/corpus-selection/corpus-selection.component.ts index 6bcdac5cb..f22a3dc87 100644 --- a/frontend/src/app/corpus-selection/corpus-selection.component.ts +++ b/frontend/src/app/corpus-selection/corpus-selection.component.ts @@ -1,6 +1,10 @@ import { Component, Input, OnInit } from '@angular/core'; import { Corpus } from '../models/corpus'; import * as _ from 'lodash'; +import { AuthService } from '../services'; +import { map } from 'rxjs/operators'; +import { Observable } from 'rxjs'; +import { actionIcons } from '../shared/icons'; @Component({ @@ -14,7 +18,15 @@ export class CorpusSelectionComponent implements OnInit { filteredItems: Corpus[]; - constructor() { } + showManageLink$: Observable; + + actionIcons = actionIcons; + + constructor(private authService: AuthService) { + this.showManageLink$ = this.authService.currentUser$.pipe( + map((user) => user?.isAdmin) + ); + } get displayItems(): Corpus[] { if (_.isUndefined(this.filteredItems)) { diff --git a/frontend/src/app/document-page/document-page.component.html b/frontend/src/app/document-page/document-page.component.html index 26ebf2ca6..8c2635901 100644 --- a/frontend/src/app/document-page/document-page.component.html +++ b/frontend/src/app/document-page/document-page.component.html @@ -1,7 +1,8 @@
-
+
+ Show named entities
@@ -14,7 +15,7 @@
- +
diff --git a/frontend/src/app/document-page/document-page.component.ts b/frontend/src/app/document-page/document-page.component.ts index 4d44a7d61..488accb81 100644 --- a/frontend/src/app/document-page/document-page.component.ts +++ b/frontend/src/app/document-page/document-page.component.ts @@ -19,10 +19,14 @@ export class DocumentPageComponent implements OnInit { documentId: string; document: FoundDocument; - documentNotFound: boolean; + documentFound: boolean; documentIcons = documentIcons; + showNEROption: boolean; + + showNamedEntities = false; + constructor( private corpusService: CorpusService, private elasticSearchService: ElasticSearchService, @@ -55,6 +59,7 @@ export class DocumentPageComponent implements OnInit { ]).subscribe(([params, corpus]) => { this.corpus = corpus; this.documentId = params['id']; + this.showNEROption = this.corpus.hasNamedEntities; this.getDocument(this.documentId); this.title.setTitle(pageTitle(`Document in ${corpus.title}`)); }); @@ -63,9 +68,13 @@ export class DocumentPageComponent implements OnInit { getDocument(id: string) { this.elasticSearchService.getDocumentById(id, this.corpus).then(document => { this.document = document; - this.documentNotFound = _.isUndefined(this.document); + this.documentFound = !_.isUndefined(this.document); }); } + toggleNER(active: boolean): void { + this.showNamedEntities = active; + } + } diff --git a/frontend/src/app/document-view/document-view.component.html b/frontend/src/app/document-view/document-view.component.html index 8f0fc9078..26c464270 100644 --- a/frontend/src/app/document-view/document-view.component.html +++ b/frontend/src/app/document-view/document-view.component.html @@ -9,7 +9,7 @@ - + Your tags @@ -21,9 +21,9 @@ + [innerHtml]="field | elasticsearchHighlight:document | paragraph"> - {{displayGeoPointField(field)}} + {{field | geoData:document}} {{document.fieldValue(field)}} @@ -38,10 +38,30 @@
-
+
+
+ + + {{textSegment.text}} + + + + {{textSegment.text | paragraph}} + + +
+ +
+
+
+ +
+
+
diff --git a/frontend/src/app/document-view/document-view.component.scss b/frontend/src/app/document-view/document-view.component.scss index 9fa3f278e..af60ed17b 100644 --- a/frontend/src/app/document-view/document-view.component.scss +++ b/frontend/src/app/document-view/document-view.component.scss @@ -8,5 +8,3 @@ table { color: $primary; } } - - \ No newline at end of file diff --git a/frontend/src/app/document-view/document-view.component.spec.ts b/frontend/src/app/document-view/document-view.component.spec.ts index f59858f51..9f1e799ee 100644 --- a/frontend/src/app/document-view/document-view.component.spec.ts +++ b/frontend/src/app/document-view/document-view.component.spec.ts @@ -20,10 +20,9 @@ describe('DocumentViewComponent', () => { fixture = TestBed.createComponent(DocumentViewComponent); component = fixture.componentInstance; component.corpus = _.merge({ - scanImageType: 'farout_image_type', - fields: [mockField] + scanImageType: 'farout_image_type' }, mockCorpus); - component.document = makeDocument({ great_field: 'Hello world!' }); + component.document = makeDocument({ great_field: 'Hello world!', speech: 'Wally was last seen in Paris' }); fixture.detectChanges(); }); @@ -31,11 +30,8 @@ describe('DocumentViewComponent', () => { expect(component).toBeTruthy(); }); - it('should render fields', async () => { - await fixture.whenStable(); - + it('should render fields', () => { expect(component.propertyFields).toEqual([mockField]); - const debug = fixture.debugElement.queryAll(By.css('[data-test-field-value]')); expect(debug.length).toEqual(1); // number of fields const element = debug[0].nativeElement; @@ -48,4 +44,14 @@ describe('DocumentViewComponent', () => { expect(debug[0].attributes['id']).toBe('tab-speech'); expect(debug[1].attributes['id']).toBe('tab-scan'); }); + + it('shows named entities if showEntities is true', async () => { + expect(fixture.debugElement.query(By.css('ia-entity-legend'))).toBeFalsy(); + component.showEntities = true; + fixture.detectChanges(); + await fixture.whenStable(); + fixture.detectChanges(); + expect(fixture.debugElement.query(By.css('ia-entity-legend'))).toBeTruthy(); + }); + }); diff --git a/frontend/src/app/document-view/document-view.component.ts b/frontend/src/app/document-view/document-view.component.ts index 81a35aa27..203c3d38b 100644 --- a/frontend/src/app/document-view/document-view.component.ts +++ b/frontend/src/app/document-view/document-view.component.ts @@ -3,7 +3,7 @@ import { Component, Input, OnChanges, SimpleChanges } from '@angular/core'; import { CorpusField, FoundDocument, Corpus, QueryModel } from '../models/index'; import { DocumentView } from '../models/document-page'; import * as _ from 'lodash'; -import { documentIcons } from '../shared/icons'; +import { documentIcons, entityIcons } from '../shared/icons'; @Component({ selector: 'ia-document-view', @@ -24,7 +24,11 @@ export class DocumentViewComponent implements OnChanges { @Input() public view: DocumentView; + @Input() + public showEntities: boolean; + documentIcons = documentIcons; + entityIcons = entityIcons; /** active tab on opening */ activeTab: string; @@ -75,62 +79,4 @@ export class DocumentViewComponent implements OnChanges { return field.mappingType === 'geo_point'; } - displayGeoPointField(field: CorpusField) { - let latitude = this.document.fieldValue(field)[field.name][1]; - let longitude = this.document.fieldValue(field)[field.name][0]; - // Round to 2 decimal places - latitude = Math.round(latitude * 100) / 100; - longitude = Math.round(longitude * 100) / 100; - return `Lat: ${latitude}; Lon: ${longitude}`; - } - - /** - * Checks if user has selected fields in the queryModel and whether current field is among them - * Used to check which fields need to be highlighted - */ - selectedFieldsContain(field: CorpusField) { - if (this.queryModel && this.queryModel.searchFields && this.queryModel.searchFields.includes(field)) { - return true; - } else if (this.queryModel && !this.queryModel.searchFields) { - return true; // if there are no selected fields, return true for all fields - } else { - return false; - } - } - - stripTags(htmlString: string){ - const parseHTML= new DOMParser().parseFromString(htmlString, 'text/html'); - return parseHTML.body.textContent || ''; - } - - formatInnerHtml(field: CorpusField) { - const fieldValue = this.document.fieldValues[field.name]; - - if (_.isEmpty(fieldValue)) { - return; - } - - const highlighted = this.highlightedInnerHtml(field); - return this.addParagraphTags(highlighted); - } - - - highlightedInnerHtml(field: CorpusField) { - let highlighted = this.document.fieldValues[field.name]; - if (this.document.highlight && this.document.highlight.hasOwnProperty(field.name) && - this.selectedFieldsContain(field)) { // only apply highlights to selected search fields - for (const highlight of this.document.highlight[field.name]) { - const stripped_highlight = this.stripTags(highlight); - highlighted = highlighted.replace(stripped_highlight, highlight); - } - return highlighted; - } else { - return this.document.fieldValues[field.name]; - } - } - - addParagraphTags(content: string | string[]) { - const paragraphs = typeof content === 'string' ? content.split('\n') : content; - return paragraphs.map(p => `

${p}

`).join(' '); - } } diff --git a/frontend/src/app/document/document-popup/document-popup.component.html b/frontend/src/app/document/document-popup/document-popup.component.html index 6f379e664..705199abb 100644 --- a/frontend/src/app/document/document-popup/document-popup.component.html +++ b/frontend/src/app/document/document-popup/document-popup.component.html @@ -1,9 +1,12 @@ + [responsive]="true" [maximizable]="true" [dismissableMask]="true" [draggable]="true" [resizable]="false" [blockScroll]="true"> + + Document {{document.position}} of {{page.total}} + Show named entities + - +
diff --git a/frontend/src/app/document/document-popup/document-popup.component.spec.ts b/frontend/src/app/document/document-popup/document-popup.component.spec.ts index 74546db68..b497ceecf 100644 --- a/frontend/src/app/document/document-popup/document-popup.component.spec.ts +++ b/frontend/src/app/document/document-popup/document-popup.component.spec.ts @@ -1,7 +1,14 @@ -import { ComponentFixture, TestBed, waitForAsync } from '@angular/core/testing'; +import { ComponentFixture, TestBed, fakeAsync, tick, waitForAsync } from '@angular/core/testing'; +import { By } from '@angular/platform-browser'; import { DocumentPopupComponent } from './document-popup.component'; import { commonTestBed } from '../../common-test-bed'; +import { makeDocument } from '../../../mock-data/constructor-helpers'; +import { mockCorpus, mockCorpus2, mockField } from '../../../mock-data/corpus'; +import { DocumentPage } from '../../models/document-page'; +import { QueryModel } from '../../models'; +import { query } from '@angular/animations'; + describe('DocumentPopupComponent', () => { let component: DocumentPopupComponent; @@ -14,10 +21,27 @@ describe('DocumentPopupComponent', () => { beforeEach(() => { fixture = TestBed.createComponent(DocumentPopupComponent); component = fixture.componentInstance; + const document = makeDocument({ great_field: 'Hello world!' }); + component.document = document; + component.page = new DocumentPage([document], 1, [mockField]); + component.queryModel = new QueryModel(mockCorpus); fixture.detectChanges(); }); it('should create', () => { expect(component).toBeTruthy(); }); + + it('does not show the NER toggle for corpora without named entities', () => { + expect(fixture.debugElement.query(By.css('ia-toggle'))).toBeFalsy(); + }); + + it('shows the NER toggle for corpora with named entities', () => { + const setModel = component.queryModel; + const queryModel = new QueryModel(mockCorpus2); + component.queryModel = queryModel; + component.ngOnChanges({queryModel: {previousValue: setModel, currentValue: queryModel, firstChange: false, isFirstChange: null}}); + fixture.detectChanges(); + expect(fixture.debugElement.query(By.css('ia-toggle'))).toBeTruthy(); + }); }); diff --git a/frontend/src/app/document/document-popup/document-popup.component.ts b/frontend/src/app/document/document-popup/document-popup.component.ts index 98109ffa5..b41997c14 100644 --- a/frontend/src/app/document/document-popup/document-popup.component.ts +++ b/frontend/src/app/document/document-popup/document-popup.component.ts @@ -4,7 +4,7 @@ import { takeUntil } from 'rxjs/operators'; import * as _ from 'lodash'; import { FoundDocument, QueryModel } from '../../models'; import { Subject } from 'rxjs'; -import { documentIcons, actionIcons } from '../../shared/icons'; +import { documentIcons, actionIcons, corpusIcons } from '../../shared/icons'; @Component({ selector: 'ia-document-popup', @@ -23,6 +23,9 @@ export class DocumentPopupComponent implements OnChanges, OnDestroy { actionIcons = actionIcons; documentIcons = documentIcons; + showNamedEntities = false; + showNEROption = false; + private refresh$ = new Subject(); get documentPageLink(): string[] { @@ -38,6 +41,9 @@ export class DocumentPopupComponent implements OnChanges, OnDestroy { } ngOnChanges(changes: SimpleChanges): void { + if (changes.queryModel) { + this.showNEROption = this.queryModel.corpus.hasNamedEntities; + } if (changes.page) { this.refresh$.next(); this.focusUpdate(); @@ -63,4 +69,8 @@ export class DocumentPopupComponent implements OnChanges, OnDestroy { this.visible = false; } } + + toggleNER(active: boolean): void { + this.showNamedEntities = active; + } } diff --git a/frontend/src/app/document/document-preview/document-preview.component.html b/frontend/src/app/document/document-preview/document-preview.component.html index 87429833f..248e7a632 100644 --- a/frontend/src/app/document/document-preview/document-preview.component.html +++ b/frontend/src/app/document/document-preview/document-preview.component.html @@ -15,7 +15,7 @@ + [innerHtml]="document.fieldValue(field) | snippet"> diff --git a/frontend/src/app/document/document.module.ts b/frontend/src/app/document/document.module.ts index 8674a5677..67822bcc8 100644 --- a/frontend/src/app/document/document.module.ts +++ b/frontend/src/app/document/document.module.ts @@ -9,8 +9,8 @@ import { TagModule } from '../tag/tag.module'; import { DocumentPopupComponent } from './document-popup/document-popup.component'; import { DialogModule } from 'primeng/dialog'; import { DocumentPreviewComponent } from './document-preview/document-preview.component'; - - +import { EntityLegendComponent } from './entity-legend/entity-legend.component'; +import { ElasticsearchHighlightPipe, GeoDataPipe, ParagraphPipe, SnippetPipe } from '../shared/pipes'; @NgModule({ declarations: [ @@ -19,6 +19,11 @@ import { DocumentPreviewComponent } from './document-preview/document-preview.co SearchRelevanceComponent, DocumentPopupComponent, DocumentPreviewComponent, + EntityLegendComponent, + ElasticsearchHighlightPipe, + GeoDataPipe, + ParagraphPipe, + SnippetPipe ], imports: [ DialogModule, @@ -31,6 +36,7 @@ import { DocumentPreviewComponent } from './document-preview/document-preview.co DocumentViewComponent, DocumentPageComponent, DocumentPopupComponent, + EntityLegendComponent, SearchRelevanceComponent, ] }) diff --git a/frontend/src/app/document/entity-legend/entity-legend.component.html b/frontend/src/app/document/entity-legend/entity-legend.component.html new file mode 100644 index 000000000..41f9622f6 --- /dev/null +++ b/frontend/src/app/document/entity-legend/entity-legend.component.html @@ -0,0 +1,13 @@ +
+
    +
  • + + + {{entity.slice(0,1).toUpperCase() + entity.slice(1)}} + +
  • +
+ + No named entities were found in this text. + +
diff --git a/frontend/src/app/document/entity-legend/entity-legend.component.scss b/frontend/src/app/document/entity-legend/entity-legend.component.scss new file mode 100644 index 000000000..459455661 --- /dev/null +++ b/frontend/src/app/document/entity-legend/entity-legend.component.scss @@ -0,0 +1,8 @@ +ul { + list-style-type: none; + margin-left: 0; +} + +.no-entities-message { + opacity: 0.6; +} diff --git a/frontend/src/app/document/entity-legend/entity-legend.component.spec.ts b/frontend/src/app/document/entity-legend/entity-legend.component.spec.ts new file mode 100644 index 000000000..69c35776e --- /dev/null +++ b/frontend/src/app/document/entity-legend/entity-legend.component.spec.ts @@ -0,0 +1,25 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { EntityLegendComponent } from './entity-legend.component'; + +describe('EntitiesComponent', () => { + let component: EntityLegendComponent; + let fixture: ComponentFixture; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [ EntityLegendComponent ] + }) + .compileComponents(); + }); + + beforeEach(() => { + fixture = TestBed.createComponent(EntityLegendComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/frontend/src/app/document/entity-legend/entity-legend.component.ts b/frontend/src/app/document/entity-legend/entity-legend.component.ts new file mode 100644 index 000000000..882fcd59c --- /dev/null +++ b/frontend/src/app/document/entity-legend/entity-legend.component.ts @@ -0,0 +1,28 @@ +import { Component, Input, OnChanges } from '@angular/core'; +import * as _ from 'lodash'; + +import { entityIcons } from '../../shared/icons'; +import { FieldEntities } from '../../models'; + +@Component({ + selector: 'ia-entity-legend', + templateUrl: './entity-legend.component.html', + styleUrls: ['./entity-legend.component.scss'] +}) +export class EntityLegendComponent implements OnChanges { + @Input() entityAnnotations: FieldEntities[]; + + public entityIcons = entityIcons; + public entities: string[]; + + constructor() { } + + ngOnChanges(): void { + if (!this.entityAnnotations) { + this.entities = null; + } else { + this.entities = _.uniq(this.entityAnnotations.map((item) => item.entity)).filter((value) => value !=='flat'); + } + } + +} diff --git a/frontend/src/app/manual/manual.module.ts b/frontend/src/app/manual/manual.module.ts index ac0d5b3fe..61404fcbd 100644 --- a/frontend/src/app/manual/manual.module.ts +++ b/frontend/src/app/manual/manual.module.ts @@ -4,7 +4,7 @@ import { ManualNavigationComponent } from './manual-navigation.component'; import { ManualComponent } from './manual.component'; import { AboutComponent } from '../about/about.component'; import { PrivacyComponent } from '../privacy/privacy.component'; - +import { RegexHighlightPipe } from '../shared/pipes'; @NgModule({ @@ -13,6 +13,7 @@ import { PrivacyComponent } from '../privacy/privacy.component'; ManualComponent, ManualNavigationComponent, PrivacyComponent, + RegexHighlightPipe ], imports: [ SharedModule diff --git a/frontend/src/app/models/corpus-definition.spec.ts b/frontend/src/app/models/corpus-definition.spec.ts new file mode 100644 index 000000000..234ea35ef --- /dev/null +++ b/frontend/src/app/models/corpus-definition.spec.ts @@ -0,0 +1,53 @@ +import { TestBed } from '@angular/core/testing'; +import { ApiService } from '../services'; +import { CorpusDefinition } from './corpus-definition'; +import { ApiServiceMock } from '../../mock-data/api'; +import { mockCorpusDefinition } from '../../mock-data/corpus-definition'; +import * as _ from 'lodash'; + + +describe('CorpusDefinition', () => { + let apiService: ApiService; + + beforeEach(() => { + TestBed.configureTestingModule({ + providers: [ + { provide: ApiService, useClass: ApiServiceMock } + ] + }); + apiService = TestBed.inject(ApiService); + }); + + it('should create for an existing corpus', () => { + const corpus = new CorpusDefinition(apiService, 1); + expect(corpus).toBeTruthy(); + expect(corpus.id).toBe(1); + }); + + it('should create for a new corpus', () => { + const corpus = new CorpusDefinition(apiService); + expect(corpus).toBeTruthy(); + expect(corpus.id).toBeUndefined(); + }); + + it('should save updates', () => { + const createSpy = spyOn(apiService, 'createCorpus').and.callThrough(); + const updateSpy = spyOn(apiService, 'updateCorpus').and.callThrough(); + + const corpus = new CorpusDefinition(apiService); + corpus.definition = mockCorpusDefinition; + corpus.save(); + + expect(createSpy).toHaveBeenCalledTimes(1); + expect(updateSpy).not.toHaveBeenCalled(); + expect(corpus.id).toBeDefined(); + + const newDefinition = _.set(_.cloneDeep(mockCorpusDefinition), ['meta', 'title'], 'Different title'); + corpus.setFromDefinition(newDefinition); + corpus.save(); + + expect(createSpy).toHaveBeenCalledTimes(1); + expect(updateSpy).toHaveBeenCalledTimes(1); + expect(corpus.definition.meta.title).toBe('Different title'); + }); +}); diff --git a/frontend/src/app/models/corpus-definition.ts b/frontend/src/app/models/corpus-definition.ts new file mode 100644 index 000000000..e5318fba9 --- /dev/null +++ b/frontend/src/app/models/corpus-definition.ts @@ -0,0 +1,125 @@ +import * as _ from 'lodash'; +import { ApiService } from '../services'; +import { BehaviorSubject, Observable } from 'rxjs'; +import { share } from 'rxjs/operators'; + +export interface APICorpusField { + name: string; + display_name: string; + description: string; + type: 'text_content'|'text_metadata'|'url'|'integer'|'float'|'date'|'boolean'|'geo_point'; + options: { + search: boolean; + filter: 'show'|'hide'|'none'; + preview: boolean; + visualize: boolean; + sort: boolean; + hidden: boolean; + }; + language?: string; + extract: { + column: string; + }; +} + +export interface APICorpusDefinition { + name: string; + meta: { + title: string; + category: string; + description: string; + languages: string[]; + date_range: { + min: string; + max: string; + }; + }; + source_data: { + type: 'csv'; + options?: { + delimiter?: ','|';'|'\t'; + }; + }; + fields: APICorpusField[]; + options?: { + language_field?: string; + document_context?: { + context_fields: string[]; + context_display_name: string; + sort?: { + field: string; + ascending: boolean; + }; + }; + default_sort?: { + field: string; + ascending: boolean; + }; + }; +}; + +export interface APIEditableCorpus { + id?: number; + active: boolean; + definition: APICorpusDefinition; +}; + +export class CorpusDefinition { + active = false; + loading$ = new BehaviorSubject(true); + + definition: APICorpusDefinition; + + + constructor(private apiService: ApiService, public id?: number) { + if (this.id) { + this.apiService.corpusDefinition(this.id).subscribe(result => + this.setFromAPIData(result) + ); + } else { + this.loading$.next(false); + } + } + + /** update the corpus state from a JSON definition */ + setFromDefinition(definition: APICorpusDefinition) { + this.definition = definition; + } + + /** return the JSON definition for the corpus state */ + toDefinition(): APICorpusDefinition { + return this.definition; + } + + /** whether the corpus definition contains all data necessary for saving */ + isComplete() { + return !_.isUndefined(this.definition); + } + + /** save the corpus state in the database */ + save(): Observable { + this.loading$.next(true); + const data = this.toAPIData(); + const request$ = this.id ? + this.apiService.updateCorpus(this.id, data) : + this.apiService.createCorpus(data); + const result$ = request$.pipe(share()); + result$.subscribe(result => this.setFromAPIData(result)); + return result$; + } + + private toAPIData(): APIEditableCorpus { + return { + id: this.id, + active: this.active, + definition: this.toDefinition() + }; + } + + private setFromAPIData(result: APIEditableCorpus) { + this.id = result.id; + this.active = result.active; + this.setFromDefinition(result.definition); + this.loading$.next(false); + } +} diff --git a/frontend/src/app/models/corpus.ts b/frontend/src/app/models/corpus.ts index 4906e2966..4e58d42fe 100644 --- a/frontend/src/app/models/corpus.ts +++ b/frontend/src/app/models/corpus.ts @@ -29,6 +29,7 @@ export class Corpus { public wordModelsPresent: boolean, public languages: string[], public category: string, + public hasNamedEntities: boolean, public documentContext?: DocumentContext, public newHighlight?: boolean, public defaultSort?: SortState, diff --git a/frontend/src/app/models/found-document.spec.ts b/frontend/src/app/models/found-document.spec.ts index fa8452f6c..b286fa8ec 100644 --- a/frontend/src/app/models/found-document.spec.ts +++ b/frontend/src/app/models/found-document.spec.ts @@ -1,14 +1,17 @@ -import { TestBed, waitForAsync } from '@angular/core/testing'; +import { TestBed, fakeAsync, waitForAsync } from '@angular/core/testing'; +import * as _ from 'lodash'; +import { reduce, take } from 'rxjs/operators'; +import { Observable } from 'rxjs'; import { makeDocument } from '../../mock-data/constructor-helpers'; import { mockCorpus, mockCorpus3 } from '../../mock-data/corpus'; +import { EntityServiceMock } from '../../mock-data/entity'; +import { TagServiceMock, mockTags } from '../../mock-data/tag'; import { FoundDocument } from './found-document'; +import { EntityService } from '../services/entity.service'; import { TagService } from '../services/tag.service'; -import { TagServiceMock, mockTags } from '../../mock-data/tag'; -import * as _ from 'lodash'; -import { reduce, take } from 'rxjs/operators'; -import { Observable } from 'rxjs'; import { Tag } from './tag'; + const maxScore = 2.9113607; const mockResponse = { _index: 'troonredes', @@ -33,19 +36,20 @@ const mockResponse = { }; describe('FoundDocument', () => { - let tagService: TagService; + const mockTagService = new TagServiceMock() as any; + const mockEntityService = new EntityServiceMock() as any; beforeEach(() => { TestBed.configureTestingModule({ providers: [ - { provide: TagService, useValue: new TagServiceMock() } + { provide: TagService, useClass: TagServiceMock }, + { provide: EntityService, useClass: EntityServiceMock } ] }); - tagService = TestBed.inject(TagService); }); it('should construct from an elasticsearch response', () => { - const document = new FoundDocument(tagService, mockCorpus, mockResponse, maxScore); + const document = new FoundDocument(mockTagService, mockEntityService, mockCorpus, mockResponse, maxScore); expect(document.id).toBe('1994_troonrede'); expect(document.fieldValues['monarch']).toBe('Beatrix'); @@ -90,4 +94,5 @@ describe('FoundDocument', () => { ]); }); })); + }); diff --git a/frontend/src/app/models/found-document.ts b/frontend/src/app/models/found-document.ts index 96b40cead..6fcbcb134 100644 --- a/frontend/src/app/models/found-document.ts +++ b/frontend/src/app/models/found-document.ts @@ -1,11 +1,14 @@ import * as _ from 'lodash'; +import { map, mergeMap, shareReplay, take } from 'rxjs/operators'; + import { makeContextParams } from '../utils/document-context'; import { Corpus, CorpusField } from './corpus'; import { FieldValues, HighlightResult, SearchHit } from './elasticsearch'; import { Tag } from './tag'; import { Observable, Subject, merge, timer } from 'rxjs'; +import { EntityService } from '../services/entity.service'; import { TagService } from '../services/tag.service'; -import { map, mergeMap, shareReplay, take, tap } from 'rxjs/operators'; +import { FieldEntities } from './search-results'; export class FoundDocument { id: string; @@ -27,13 +30,16 @@ export class FoundDocument { /** tags created on the document */ tags$: Observable; + /** named entities associated with the document */ + entityAnnotations$: Observable<{[fieldName: string]: FieldEntities[]}>; private tagsChanged$ = new Subject(); constructor( private tagService: TagService, + private entityService: EntityService, public corpus: Corpus, hit: SearchHit, - maxScore: number = 1 + maxScore: number = 1, ) { this.id = hit._id; this.relevance = hit._score / maxScore; @@ -42,12 +48,19 @@ export class FoundDocument { const created$ = timer(0); // observable of the moment of construction (i.e. now) - // tags need to refreshed when the document is created, and + // tags need to be refreshed when the document is created, and // after each update + // shareReplay shares the value over all observers: + // add/removeTag, async pipe in document-tags.component template this.tags$ = merge(created$, this.tagsChanged$).pipe( mergeMap(() => this.fetchTags()), shareReplay(1), ); + + this.entityAnnotations$ = created$.pipe( + mergeMap(() => this.fetchAnnotatedEntities()), + shareReplay(1), + ); } /** @@ -112,6 +125,10 @@ export class FoundDocument { ); } + private fetchAnnotatedEntities(): Observable<{[fieldName: string]: FieldEntities[]}> { + return this.entityService.getDocumentEntities(this.corpus, this.id); + } + private setTags(tags: Tag[]): Observable { return this.tagService.setDocumentTags(this, tags); } @@ -119,4 +136,5 @@ export class FoundDocument { private fetchTags(): Observable { return this.tagService.getDocumentTags(this); } + } diff --git a/frontend/src/app/models/search-results.ts b/frontend/src/app/models/search-results.ts index ad1593a51..1eac5b1ff 100644 --- a/frontend/src/app/models/search-results.ts +++ b/frontend/src/app/models/search-results.ts @@ -83,6 +83,14 @@ export interface QueryFeedback { similarTerms?: string[]; } +export interface FieldEntities { + [entityType: string] : string +} + +export interface NamedEntitiesResult { + [fieldName: string]: FieldEntities[] +} + export interface TaskResult { task_ids: string[] }; export interface TaskSuccess { diff --git a/frontend/src/app/param/param-directive.ts b/frontend/src/app/param/param-directive.ts index cb15c0c81..364cb82f9 100644 --- a/frontend/src/app/param/param-directive.ts +++ b/frontend/src/app/param/param-directive.ts @@ -1,5 +1,5 @@ import { Directive, OnDestroy, OnInit } from '@angular/core'; -import { ActivatedRoute, Params, Router } from '@angular/router'; +import { ActivatedRoute, ParamMap, Params, Router } from '@angular/router'; import { Subscription } from 'rxjs'; import * as _ from 'lodash'; @@ -57,5 +57,5 @@ export abstract class ParamDirective implements OnDestroy, OnInit { abstract teardown(); - abstract setStateFromParams(params: Params); + abstract setStateFromParams(params: ParamMap); } diff --git a/frontend/src/app/search/index.ts b/frontend/src/app/search/index.ts index 11e8bc3bf..40905695a 100644 --- a/frontend/src/app/search/index.ts +++ b/frontend/src/app/search/index.ts @@ -1,4 +1,3 @@ -export * from './highlight.pipe'; export * from './search.component'; export * from './search-relevance.component'; export * from './search-results.component'; diff --git a/frontend/src/app/services/api.service.ts b/frontend/src/app/services/api.service.ts index f2d33bea7..b05bae0e7 100644 --- a/frontend/src/app/services/api.service.ts +++ b/frontend/src/app/services/api.service.ts @@ -31,6 +31,7 @@ import { } from '../models/index'; import { environment } from '../../environments/environment'; import * as _ from 'lodash'; +import { APICorpusDefinition, APIEditableCorpus } from '../models/corpus-definition'; interface SolisLoginResponse { success: boolean; @@ -234,6 +235,28 @@ export class ApiService { return this.http.get('/api/corpus/'); } + // Corpus definitions + + public corpusDefinitions(): Observable { + return this.http.get('/api/corpus/definitions/'); + } + + public corpusDefinition(corpusID: number): Observable { + return this.http.get(`/api/corpus/definitions/${corpusID}/`); + } + + public createCorpus(data: APIEditableCorpus): Observable { + return this.http.post('/api/corpus/definitions/', data); + } + + public updateCorpus(corpusID: number, data: APIEditableCorpus): Observable { + return this.http.put(`/api/corpus/definitions/${corpusID}/`, data); + } + + public deleteCorpus(corpusID: number): Observable { + return this.http.delete(`/api/corpus/definitions/${corpusID}/`); + } + // Tagging public userTags(): Observable { diff --git a/frontend/src/app/services/corpus.service.ts b/frontend/src/app/services/corpus.service.ts index 7092bd591..6c4e7ff6d 100644 --- a/frontend/src/app/services/corpus.service.ts +++ b/frontend/src/app/services/corpus.service.ts @@ -89,6 +89,7 @@ export class CorpusService { data.word_models_present, data.languages, data.category, + data.has_named_entities, this.parseDocumentContext(data.document_context, allFields), data.new_highlight, this.parseDefaultSort(data.default_sort, allFields), diff --git a/frontend/src/app/services/download.service.spec.ts b/frontend/src/app/services/download.service.spec.ts index add89bc77..308c65640 100644 --- a/frontend/src/app/services/download.service.spec.ts +++ b/frontend/src/app/services/download.service.spec.ts @@ -3,8 +3,6 @@ import { TestBed, inject } from '@angular/core/testing'; import { ApiService } from './api.service'; import { ApiServiceMock } from '../../mock-data/api'; import { DownloadService } from './download.service'; -import { ElasticSearchService } from './elastic-search.service'; -import { ElasticSearchServiceMock } from '../../mock-data/elastic-search'; import { mockCorpus, mockField } from '../../mock-data/corpus'; import { DownloadOptions, LimitedResultsDownloadParameters, QueryModel, SortState } from '../models'; diff --git a/frontend/src/app/services/elastic-search.service.spec.ts b/frontend/src/app/services/elastic-search.service.spec.ts index 53841cc52..e1ace87e8 100644 --- a/frontend/src/app/services/elastic-search.service.spec.ts +++ b/frontend/src/app/services/elastic-search.service.spec.ts @@ -3,9 +3,12 @@ import { HttpClientTestingModule, HttpTestingController } from '@angular/common/ import { ElasticSearchService, SearchResponse } from './elastic-search.service'; import { QueryModel } from '../models'; import { mockCorpus, mockField, mockField2 } from '../../mock-data/corpus'; -import { TagService } from './tag.service'; +import { EntityService } from './entity.service'; +import { EntityServiceMock } from '../../mock-data/entity'; import { TagServiceMock } from '../../mock-data/tag'; -import { Aggregator, TermsAggregator } from '../models/aggregation'; +import { TagService } from './tag.service'; +import { TermsAggregator } from '../models/aggregation'; + const mockResponse: SearchResponse = { took: 4, @@ -65,6 +68,7 @@ describe('ElasticSearchService', () => { TestBed.configureTestingModule({ providers: [ ElasticSearchService, + { provide: EntityService, useValue: new EntityServiceMock()}, { provide: TagService, useValue: new TagServiceMock() } ], imports: [ HttpClientTestingModule ] diff --git a/frontend/src/app/services/elastic-search.service.ts b/frontend/src/app/services/elastic-search.service.ts index 8c4df1822..defd2d261 100644 --- a/frontend/src/app/services/elastic-search.service.ts +++ b/frontend/src/app/services/elastic-search.service.ts @@ -6,18 +6,19 @@ import { FoundDocument, Corpus, QueryModel, SearchResults, SearchHit } from '../models/index'; +import { Aggregator } from '../models/aggregation'; import * as _ from 'lodash'; import { TagService } from './tag.service'; import { APIQuery } from '../models/search-requests'; import { PageResultsParameters } from '../models/page-results'; import { resultsParamsToAPIQuery } from '../utils/es-query'; -import { Aggregator } from '../models/aggregation'; +import { EntityService } from './entity.service'; @Injectable() export class ElasticSearchService { - constructor(private http: HttpClient, private tagService: TagService) { + constructor(private http: HttpClient, private entityService: EntityService, private tagService: TagService) { } getDocumentById(id: string, corpus: Corpus): Promise { @@ -64,6 +65,8 @@ export class ElasticSearchService { return this.parseResponse(queryModel.corpus, response); } + + /** * Execute an ElasticSearch query and return a dictionary containing the results. */ @@ -96,7 +99,7 @@ export class ElasticSearchService { * return the id, relevance and field values of a given document */ private hitToDocument(corpus: Corpus, hit: SearchHit, maxScore: number): FoundDocument { - return new FoundDocument(this.tagService, corpus, hit, maxScore); + return new FoundDocument(this.tagService, this.entityService, corpus, hit, maxScore); } } diff --git a/frontend/src/app/services/entity.service.ts b/frontend/src/app/services/entity.service.ts new file mode 100644 index 000000000..056e1e543 --- /dev/null +++ b/frontend/src/app/services/entity.service.ts @@ -0,0 +1,19 @@ +import { HttpClient } from '@angular/common/http'; +import { Injectable } from '@angular/core'; +import { Observable } from 'rxjs'; + +import { Corpus, NamedEntitiesResult } from '../models'; + +@Injectable({ + providedIn: 'root', +}) +export class EntityService { + + constructor(private http: HttpClient) { + } + + public getDocumentEntities(corpus: Corpus, id: string): Observable { + const url = `/api/es/${corpus.name}/${id}/named_entities`; + return this.http.get(url); + } +} diff --git a/frontend/src/app/services/highlight.service.spec.ts b/frontend/src/app/services/highlight.service.spec.ts index 2eb8aa049..0f50f0cfb 100644 --- a/frontend/src/app/services/highlight.service.spec.ts +++ b/frontend/src/app/services/highlight.service.spec.ts @@ -90,43 +90,6 @@ describe('HighlightService', () => { [13, 'في']]); }); - it('Should limit the length of hits using snippets', () => { - const text = generateSequence(0, 10000); - const remainingLength = (maxSnippetsLength - 4) * 0.5; - const leftLength = Math.ceil(remainingLength); - const rightLength = Math.floor(remainingLength); - const sequenceSnippetsLength = Math.ceil(leftLength / 5); - - const highlights = highlightService.highlight(text, '5000'); - const snippets = highlightService.snippets(highlights); - - const result = getHighlightedString(snippets); - const expected = getHighlightedString([ - { - substring: omissionString + generateSequence(5000 - sequenceSnippetsLength, 5000).slice(-leftLength + 1) + ' ', - isHit: false - }, - { - substring: '5000', - isHit: true - }, - { - substring: ' ' + generateSequence(5001, 5001 + sequenceSnippetsLength).substr(0, rightLength - 1) + omissionString, - isHit: false - }]); - - expect(result).toEqual(expected); - }); - - it('Should pass short snippets', () => { - const highlights = highlightService.highlight('hello world!', ''); - const snippets = highlightService.snippets(highlights); - expect(snippets).toEqual([{ - isHit: false, - substring: 'hello world!' - }]); - }); - it('Should highlight multiline text', () => { expectHighlights( // eslint-disable-next-line max-len diff --git a/frontend/src/app/services/highlight.service.ts b/frontend/src/app/services/highlight.service.ts index 4fdc2a4ea..e4bc31941 100644 --- a/frontend/src/app/services/highlight.service.ts +++ b/frontend/src/app/services/highlight.service.ts @@ -5,10 +5,7 @@ import { Injectable } from '@angular/core'; * a more scalable approach would need to be implemented if rendering many hits is required. */ const maxHits = 100; -/** - * The maximum number of snippets. - */ -const maxSnippetsCount = 7; + /** * The maximum character length of all the text snippets combined. */ @@ -52,7 +49,6 @@ export class HighlightService { } let result: RegExpExecArray; - const parsedText: TextPart[] = []; let lastIndex = 0; for ( @@ -78,39 +74,6 @@ export class HighlightService { } } - /** - * Gets short snippets from the text part to give the user a short overview of the text content. - */ - public snippets(parts: IterableIterator): TextPart[] { - const snippets: TextPart[] = []; - for ( - let i = 0, next = parts.next(); - !next.done && i < maxSnippetsCount; - i++, next = parts.next() - ) { - snippets.push(next.value); - } - - const lengths = this.getSnippetLengths( - snippets.map((snippet) => snippet.substring.length), - maxSnippetsLength - ); - - snippets.forEach((part, index) => { - part.substring = this.cropSnippetText( - part.substring, - lengths[index], - index === snippets.length - 1 - ? 'left' - : index === 0 - ? 'right' - : 'middle' - ); - }); - - return snippets; - } - /** * Convert the query to a regular expression matching any hit in a string. * @@ -151,79 +114,6 @@ export class HighlightService { ); } - private getSnippetLengths( - actualLengths: number[], - maxTotalLength: number, - croppedSnippets = actualLengths.length - ): number[] { - const targetLengths: number[] = []; - let remainingCharacters = maxTotalLength; - const maxLength = Math.max( - 1, - Math.floor(maxTotalLength / croppedSnippets) - ); - - let remainingSnippets = 0; - - let i = 0; - for (; i < actualLengths.length && remainingCharacters > 0; i++) { - const actualLength = actualLengths[i]; - const targetLength = Math.min(actualLength, maxLength); - - remainingCharacters -= targetLength; - targetLengths[i] = targetLength; - - if (actualLength > targetLength) { - // only the cropped snippets could become longer - remainingSnippets++; - } - } - for (; i < actualLengths.length; i++) { - targetLengths[i] = 0; - } - - if (remainingCharacters && remainingSnippets) { - // if a snippet is shorter than the maximum snippet length, allow the remaining snippets to become longer - const additionalLengths = this.getSnippetLengths( - actualLengths.map( - (length, index) => length - targetLengths[index] - ), - remainingCharacters, - remainingSnippets - ); - return targetLengths.map( - (length, index) => length + additionalLengths[index] - ); - } - - return targetLengths; - } - - private cropSnippetText( - text: string, - maxLength: number, - location: 'left' | 'middle' | 'right' - ): string { - if (text.length <= maxLength) { - return text; - } - - switch (location) { - case 'left': - return text.substr(0, maxLength) + omissionString; - - case 'middle': - return ( - text.substr(0, maxLength / 2) + - omissionString + - text.substr(text.length - maxLength / 2) - ); - - case 'right': - return omissionString + text.slice(-maxLength); - } - } - /** * Get the word patterns match in a query. * diff --git a/frontend/src/app/services/search.service.spec.ts b/frontend/src/app/services/search.service.spec.ts index 523c16112..1facbb5f3 100644 --- a/frontend/src/app/services/search.service.spec.ts +++ b/frontend/src/app/services/search.service.spec.ts @@ -9,8 +9,6 @@ import { ElasticSearchServiceMock } from '../../mock-data/elastic-search'; import { QueryService } from './query.service'; import { SearchService } from './search.service'; import { SessionService } from './session.service'; -import { WordmodelsService } from './wordmodels.service'; -import { WordmodelsServiceMock } from '../../mock-data/wordmodels'; import { HttpClientTestingModule } from '@angular/common/http/testing'; import { QueryModel } from '../models'; import { mockCorpus } from '../../mock-data/corpus'; diff --git a/frontend/src/app/shared/icons.ts b/frontend/src/app/shared/icons.ts index 0d5dd8676..21b16626d 100644 --- a/frontend/src/app/shared/icons.ts +++ b/frontend/src/app/shared/icons.ts @@ -4,12 +4,12 @@ import { } from '@fortawesome/free-regular-svg-icons'; import { IconDefinition as SolidIconDefinition, - faAngleDown, faAngleUp, faArrowLeft, faArrowRight, faAt, faBook, faBookOpen, faChartColumn, - faCheck, faChevronDown, faChevronLeft, faChevronRight, faCog, faCogs, faDatabase, faDiagramProject, - faDownload, faEdit, faEnvelope, faEye, faFilter, faHistory, faImage, faInfo, faInfoCircle, faLink, faList, faLock, - faMinus, faPalette, faPlus, faQuestionCircle, faSearch, faSearchMinus, faSearchPlus, faSignOut, + faAngleDown, faAngleUp, faArrowLeft, faArrowRight, faAt, faBook, faBookmark, faBookOpen, faBuilding, faChartColumn, + faCheck, faChevronLeft, faChevronRight, faCog, faCogs, faDatabase, faDiagramProject, + faDownload, faEnvelope, faEye, faFilter, faHistory, faImage, faInfo, faInfoCircle, faLink, faList, faLocationDot, faLock, + faMinus, faPalette, faPencil, faPlus, faQuestionCircle, faSearch, faSearchMinus, faSearchPlus, faSignOut, faSortAlphaAsc, faSortAlphaDesc, faSortNumericAsc, faSortNumericDesc, faSquare, - faTable, faTags, faTimes, faTrashCan, faUndo, faUser + faTable, faTags, faTimes, faTrashCan, faUndo, faUpload, faUser } from '@fortawesome/free-solid-svg-icons'; type IconDefinition = SolidIconDefinition | RegularIconDefinition; @@ -40,6 +40,7 @@ export const actionIcons: Icons = { help: faInfoCircle, helpAlt: faQuestionCircle, download: faDownload, + upload: faUpload, config: faCog, email: faEnvelope, more: faPlus, @@ -52,7 +53,7 @@ export const actionIcons: Icons = { add: faPlus, remove: faTimes, delete: faTrashCan, - edit: faEdit, + edit: faPencil, view: faEye, }; @@ -108,3 +109,10 @@ export const documentIcons: Icons = { scanAlt: faNewspaper, context: faBookOpen, }; + +export const entityIcons: Icons = { + person: faUser, + location: faLocationDot, + organization: faBuilding, + miscellaneous: faBookmark, +} diff --git a/frontend/src/app/shared/pipes/elasticsearch-highlight.pipe.ts b/frontend/src/app/shared/pipes/elasticsearch-highlight.pipe.ts new file mode 100644 index 000000000..ae1d374da --- /dev/null +++ b/frontend/src/app/shared/pipes/elasticsearch-highlight.pipe.ts @@ -0,0 +1,43 @@ +import { Pipe, PipeTransform } from '@angular/core'; +import * as _ from 'lodash'; + +import { CorpusField, FoundDocument } from '../../models'; + +@Pipe({ + name: 'elasticsearchHighlight' +}) +export class ElasticsearchHighlightPipe implements PipeTransform { + + /** + * Transforms a text to display highlights fetched from Elasticsearch + * + * @param document a FoundDocument, containing the fetched highlights + */ + transform(field: CorpusField, document: FoundDocument) { + const fieldValue = document.fieldValues[field.name]; + + if (_.isEmpty(fieldValue)) { + return; + } + + const highlighted = this.highlightedInnerHtml(field, document); + return highlighted; + } + + highlightedInnerHtml(field: CorpusField, document: FoundDocument) { + let highlighted = document.fieldValues[field.name]; + if (document.highlight && document.highlight.hasOwnProperty(field.name)) { + for (const highlight of document.highlight[field.name]) { + const strippedHighlight = this.stripTags(highlight); + highlighted = highlighted.replace(strippedHighlight, highlight); + } + } + return highlighted; + } + + stripTags(htmlString: string){ + const parseHTML= new DOMParser().parseFromString(htmlString, 'text/html'); + return parseHTML.body.textContent || ''; + } + +} diff --git a/frontend/src/app/shared/pipes/geo-data.pipe.ts b/frontend/src/app/shared/pipes/geo-data.pipe.ts new file mode 100644 index 000000000..08d9df047 --- /dev/null +++ b/frontend/src/app/shared/pipes/geo-data.pipe.ts @@ -0,0 +1,25 @@ +import { Pipe, PipeTransform } from '@angular/core'; +import { DomSanitizer } from '@angular/platform-browser'; +import { CorpusField, FoundDocument } from '../../models'; +@Pipe({ + name: 'geoData' +}) +export class GeoDataPipe implements PipeTransform { + constructor(private sanitizer: DomSanitizer) { + } + + /** + * Transforms GeoJSON data + * + * @param document FoundDocument holding the actual data + */ + transform(field: CorpusField, document: FoundDocument) { + let latitude = document.fieldValue(field)[field.name][1]; + let longitude = document.fieldValue(field)[field.name][0]; + // Round to 2 decimal places + latitude = Math.round(latitude * 100) / 100; + longitude = Math.round(longitude * 100) / 100; + return `Lat: ${latitude}; Lon: ${longitude}`; + } + +} diff --git a/frontend/src/app/shared/pipes/index.ts b/frontend/src/app/shared/pipes/index.ts new file mode 100644 index 000000000..0bf4c0f5b --- /dev/null +++ b/frontend/src/app/shared/pipes/index.ts @@ -0,0 +1,5 @@ +export * from './elasticsearch-highlight.pipe'; +export * from './geo-data.pipe'; +export * from './paragraph.pipe'; +export * from './regex-highlight.pipe'; +export * from './snippet.pipe'; diff --git a/frontend/src/app/shared/pipes/paragraph.pipe.spec.ts b/frontend/src/app/shared/pipes/paragraph.pipe.spec.ts new file mode 100644 index 000000000..10f3ec4c0 --- /dev/null +++ b/frontend/src/app/shared/pipes/paragraph.pipe.spec.ts @@ -0,0 +1,8 @@ +import { ParagraphPipe } from './paragraph.pipe'; + +describe('ParagraphPipe', () => { + it('create an instance', () => { + const pipe = new ParagraphPipe(); + expect(pipe).toBeTruthy(); + }); +}); diff --git a/frontend/src/app/shared/pipes/paragraph.pipe.ts b/frontend/src/app/shared/pipes/paragraph.pipe.ts new file mode 100644 index 000000000..eebafc51a --- /dev/null +++ b/frontend/src/app/shared/pipes/paragraph.pipe.ts @@ -0,0 +1,23 @@ +import { Pipe, PipeTransform } from '@angular/core'; + + +@Pipe({ + name: 'paragraph', +}) +export class ParagraphPipe implements PipeTransform { + + transform(content: string | string[]): unknown { + const splitText = this.addParagraphTags(content); + return splitText; + } + + addParagraphTags(content: string | string[]) { + const paragraphs = typeof content === 'string' ? content.split('\n') : content; + if (!paragraphs || paragraphs.length === 1) { + return content; + } + return paragraphs.map(p => `

${p}

`).join(' '); + } + + +} diff --git a/frontend/src/app/search/highlight.pipe.ts b/frontend/src/app/shared/pipes/regex-highlight.pipe.ts similarity index 67% rename from frontend/src/app/search/highlight.pipe.ts rename to frontend/src/app/shared/pipes/regex-highlight.pipe.ts index 3ee84d437..595499503 100644 --- a/frontend/src/app/search/highlight.pipe.ts +++ b/frontend/src/app/shared/pipes/regex-highlight.pipe.ts @@ -1,24 +1,22 @@ import { Pipe, PipeTransform, SecurityContext } from '@angular/core'; import { DomSanitizer } from '@angular/platform-browser'; -import { HighlightService } from '../services/highlight.service'; +import { HighlightService } from '../../services/highlight.service'; @Pipe({ - name: 'highlight' + name: 'regexHighlight' }) -export class HighlightPipe implements PipeTransform { +export class RegexHighlightPipe implements PipeTransform { constructor(private sanitizer: DomSanitizer, private highlightService: HighlightService) { } /** * Transforms a text to highlight all the text matching the specified query. - * - * @param snippets Only show snippets. When this enabled, line breaks will also be replaced with -- */ - transform(text: string, query: string, snippets: boolean = false) { + transform(text: string, query: string) { const highlights = this.highlightService.highlight(text, query); - const parts = snippets ? this.highlightService.snippets(highlights) : Array.from(highlights); + const parts = Array.from(highlights); const highlightedText = parts.map(part => { - const sanitizedText = this.sanitizedLineBreaks(part.substring, snippets ? ' — ' : '
'); + const sanitizedText = this.sanitizedLineBreaks(part.substring, '
'); return part.isHit ? `${sanitizedText}` : sanitizedText; }).join(''); diff --git a/frontend/src/app/shared/pipes/slugify.pipe.spec.ts b/frontend/src/app/shared/pipes/slugify.pipe.spec.ts new file mode 100644 index 000000000..b016f08b7 --- /dev/null +++ b/frontend/src/app/shared/pipes/slugify.pipe.spec.ts @@ -0,0 +1,24 @@ +import { SlugifyPipe } from './slugify.pipe'; + +describe('SlugifyPipe', () => { + it('create an instance', () => { + const pipe = new SlugifyPipe(); + expect(pipe).toBeTruthy(); + }); + + it('slugifies strings', () => { + const pipe = new SlugifyPipe(); + const input = 'tab-General information'; + const output = pipe.transform(input); + const expected = 'tab-general-information'; + expect(output).toEqual(expected); + }); + + it('slugifies numbers', () => { + const pipe = new SlugifyPipe(); + const input = 1; + const output = pipe.transform(input); + const expected = '1'; + expect(output).toEqual(expected); + }); +}); diff --git a/frontend/src/app/shared/pipes/slugify.pipe.ts b/frontend/src/app/shared/pipes/slugify.pipe.ts new file mode 100644 index 000000000..cac63f35e --- /dev/null +++ b/frontend/src/app/shared/pipes/slugify.pipe.ts @@ -0,0 +1,16 @@ +import { Pipe, PipeTransform } from '@angular/core'; + +@Pipe({ name: 'slugify' }) +export class SlugifyPipe implements PipeTransform { + // via: https://gist.github.com/djabif/b8d21c4ebcef51db7a4a28ecf3d41846 + transform(input: string | number): string { + return input + .toString() + .toLowerCase() + .replace(/\s+/g, '-') // Replace spaces with - + .replace(/[^\w\-]+/g, '') // Remove all non-word chars + .replace(/\-\-+/g, '-') // Replace multiple - with single - + .replace(/^-+/, '') // Trim - from start of text + .replace(/-+$/, ''); // Trim - from end of text + } +} diff --git a/frontend/src/app/shared/pipes/snippet.pipe.ts b/frontend/src/app/shared/pipes/snippet.pipe.ts new file mode 100644 index 000000000..4634395cb --- /dev/null +++ b/frontend/src/app/shared/pipes/snippet.pipe.ts @@ -0,0 +1,20 @@ +import { Pipe, PipeTransform } from '@angular/core'; +import { DomSanitizer } from '@angular/platform-browser'; +@Pipe({ + name: 'snippet' +}) +export class SnippetPipe implements PipeTransform { + constructor(private sanitizer: DomSanitizer) { + } + + /** + * Transforms a text to only show its leading characters with an ellipsis + * + * @param nCharacters Specifies how many leading characters should be displayed + */ + transform(text: string, nCharacters=100) { + const snippedText = text.slice(0, nCharacters).concat('...'); + return this.sanitizer.bypassSecurityTrustHtml(snippedText); + } + +} diff --git a/frontend/src/app/shared/shared.module.ts b/frontend/src/app/shared/shared.module.ts index 8f25fd406..ee400898a 100644 --- a/frontend/src/app/shared/shared.module.ts +++ b/frontend/src/app/shared/shared.module.ts @@ -12,20 +12,22 @@ import { BalloonDirective } from '../balloon.directive'; import { DatePickerComponent } from '../corpus-selection/corpus-filter/date-picker/date-picker.component'; import { ErrorComponent } from '../error/error.component'; import { ScrollToDirective } from '../scroll-to.directive'; -import { HighlightPipe } from '../search'; import { DropdownModule } from './dropdown/dropdown.module'; import { TabPanelDirective } from './tabs/tab-panel.directive'; import { TabsComponent } from './tabs/tabs.component'; +import { ToggleComponent } from './toggle/toggle.component'; +import { SlugifyPipe } from './pipes/slugify.pipe'; @NgModule({ declarations: [ DatePickerComponent, ErrorComponent, BalloonDirective, - HighlightPipe, ScrollToDirective, TabsComponent, TabPanelDirective, + ToggleComponent, + SlugifyPipe, ], exports: [ // shared components @@ -43,11 +45,14 @@ import { TabsComponent } from './tabs/tabs.component'; FormsModule, FontAwesomeModule, BalloonDirective, - HighlightPipe, HttpClientModule, HttpClientXsrfModule, RouterModule, TableModule, + ToggleComponent, + + // Shared pipes + SlugifyPipe, ], imports: [ BrowserAnimationsModule, @@ -64,7 +69,7 @@ import { TabsComponent } from './tabs/tabs.component'; headerName: 'X-CSRFToken', }), RouterModule, - ], + providers: [SlugifyPipe], }) export class SharedModule {} diff --git a/frontend/src/app/shared/tabs/tabs.component.spec.ts b/frontend/src/app/shared/tabs/tabs.component.spec.ts index d00cdd97e..24182bc1f 100644 --- a/frontend/src/app/shared/tabs/tabs.component.spec.ts +++ b/frontend/src/app/shared/tabs/tabs.component.spec.ts @@ -27,4 +27,13 @@ describe('TabsComponent', () => { it('should create', () => { expect(component).toBeTruthy(); }); + + it('should produce slug tab IDs', () => { + const tabIdNumerical = 1; + const tabIdString = 'General Information'; + expect(component.tabLinkId(tabIdNumerical)).toEqual('tab-1'); + expect(component.tabLinkId(tabIdString)).toEqual( + 'tab-general-information' + ); + }); }); diff --git a/frontend/src/app/shared/tabs/tabs.component.ts b/frontend/src/app/shared/tabs/tabs.component.ts index c18cf0e50..e8a25ba0d 100644 --- a/frontend/src/app/shared/tabs/tabs.component.ts +++ b/frontend/src/app/shared/tabs/tabs.component.ts @@ -6,6 +6,7 @@ import * as _ from 'lodash'; import { TabPanelDirective } from './tab-panel.directive'; import { IconDefinition } from '@fortawesome/free-solid-svg-icons'; import { modulo } from '../../utils/utils'; +import { SlugifyPipe } from '../pipes/slugify.pipe'; interface Tab { label: string; // display name @@ -28,7 +29,7 @@ export class TabsComponent implements AfterContentInit { tabs: Tab[]; - constructor() { } + constructor(private slugifyPipe: SlugifyPipe) {} ngAfterContentInit(): void { this.tabs = this.tabPanels.map(tabPanel => ({ @@ -74,6 +75,7 @@ export class TabsComponent implements AfterContentInit { } tabLinkId(tabId: string | number): string { - return `tab-${tabId}`; + const slugifiedId = this.slugifyPipe.transform(tabId); + return `tab-${slugifiedId}`; } } diff --git a/frontend/src/app/shared/toggle/toggle.component.html b/frontend/src/app/shared/toggle/toggle.component.html new file mode 100644 index 000000000..8b855a255 --- /dev/null +++ b/frontend/src/app/shared/toggle/toggle.component.html @@ -0,0 +1,4 @@ +
+ + +
diff --git a/frontend/src/app/shared/toggle/toggle.component.scss b/frontend/src/app/shared/toggle/toggle.component.scss new file mode 100644 index 000000000..7ca30a115 --- /dev/null +++ b/frontend/src/app/shared/toggle/toggle.component.scss @@ -0,0 +1,68 @@ +@import "../../../_utilities"; + +/* The switch - the box around the slider */ +.toggle-container { + position: absolute; + margin-left: .5rem; + margin-top: -.2rem; + display: inline-block; + width: 4rem; + height: 2rem; + pointer-events: none; + + /* Hide default HTML checkbox */ + input { + opacity: 0; + width: 100%; + height: 100%; + pointer-events: all; + } +} + + +/* The slider */ +.slider { + position: absolute; + cursor: pointer; + top: 0; + left: 0; + right: 0; + bottom: 0; + background-color: $highlight-color; + transition: .4s; + border-radius: 35px; + pointer-events: none; + &:before { + content: ""; + z-index: 20; + position: absolute; + height: 1.6rem; + width: 1.6rem; + left: .2rem; + bottom: .2rem; + background-color: white; + transition: .4s; + border-radius: 50%; + } +} + + + +input:checked { + + .slider { + background-color: $primary; + } + + + .slider:before { + transform: translateX(2rem); + -webkit-transform: translateX(2rem); + -moz-transform: translateX(2rem); + -ms-transform: translateX(2rem); + -o-transform: translateX(2rem); + } + + + .slider:after { + left: calc(100% - 5px); + transform: translateX(-100%); + } +} \ No newline at end of file diff --git a/frontend/src/app/shared/toggle/toggle.component.spec.ts b/frontend/src/app/shared/toggle/toggle.component.spec.ts new file mode 100644 index 000000000..301ab008b --- /dev/null +++ b/frontend/src/app/shared/toggle/toggle.component.spec.ts @@ -0,0 +1,25 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { ToggleComponent } from './toggle.component'; + +describe('ToggleComponent', () => { + let component: ToggleComponent; + let fixture: ComponentFixture; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [ ToggleComponent ] + }) + .compileComponents(); + }); + + beforeEach(() => { + fixture = TestBed.createComponent(ToggleComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/frontend/src/app/shared/toggle/toggle.component.ts b/frontend/src/app/shared/toggle/toggle.component.ts new file mode 100644 index 000000000..13194c3c8 --- /dev/null +++ b/frontend/src/app/shared/toggle/toggle.component.ts @@ -0,0 +1,22 @@ +import { Component, EventEmitter, OnInit, Output } from '@angular/core'; + +@Component({ + selector: 'ia-toggle', + templateUrl: './toggle.component.html', + styleUrls: ['./toggle.component.scss'] +}) +export class ToggleComponent implements OnInit { + @Output() toggled = new EventEmitter(); + active = false; + + constructor() { } + + ngOnInit(): void { + } + + public toggleButton() { + this.active = !this.active; + this.toggled.emit(this.active); + } + +} diff --git a/frontend/src/app/visualization/barchart/barchart-options.component.ts b/frontend/src/app/visualization/barchart/barchart-options.component.ts index 307cdb822..5a339cdfd 100644 --- a/frontend/src/app/visualization/barchart/barchart-options.component.ts +++ b/frontend/src/app/visualization/barchart/barchart-options.component.ts @@ -1,5 +1,5 @@ import { Component, EventEmitter, Input, OnChanges, Output, SimpleChanges } from '@angular/core'; -import { ActivatedRoute, Params, Router } from '@angular/router'; +import { ActivatedRoute, ParamMap, Router } from '@angular/router'; import * as _ from 'lodash'; import { ParamDirective } from '../../param/param-directive'; import { Normalizer, ChartType, ChartParameters } from '../../models'; @@ -85,7 +85,7 @@ export class BarchartOptionsComponent teardown() {} - setStateFromParams(params: Params) { + setStateFromParams(params: ParamMap) { if (params.has('normalize')) { this.currentNormalizer = params.get('normalize') as Normalizer; } else { diff --git a/frontend/src/app/visualization/barchart/histogram.component.ts b/frontend/src/app/visualization/barchart/histogram.component.ts index a73f630fd..24f952f09 100644 --- a/frontend/src/app/visualization/barchart/histogram.component.ts +++ b/frontend/src/app/visualization/barchart/histogram.component.ts @@ -213,28 +213,33 @@ export class HistogramComponent formatDownload: this.formatDownloadValue, isOptional: 'relative_doc_count' !== valueKey, }, - { - key: 'match_count', - label: 'Token Frequency', - format: this.formatValue('raw'), - formatDownload: this.formatDownloadValue, - isOptional: 'match_count' !== valueKey, - }, - { - key: 'matches_by_doc_count', - label: 'Relative Frequency (documents)', - format: this.formatValue('documents'), - formatDownload: this.formatDownloadValue, - isOptional: 'matches_by_doc_count' !== valueKey, - }, - { - key: 'matches_by_token_count', - label: 'Relative Frequency (terms)', - format: this.formatValue('terms'), - formatDownload: this.formatDownloadValue, - isOptional: 'matches_by_token_count' !== valueKey, - }, ]; + if (this.frequencyMeasure == 'tokens') { + // Headers related to tokens should not be applied to document visualizations + this.tableHeaders = this.tableHeaders.concat([ + { + key: 'match_count', + label: 'Token Frequency', + format: this.formatValue('raw'), + formatDownload: this.formatDownloadValue, + isOptional: 'match_count' !== valueKey, + }, + { + key: 'matches_by_doc_count', + label: 'Relative Frequency (documents)', + format: this.formatValue('documents'), + formatDownload: this.formatDownloadValue, + isOptional: 'matches_by_doc_count' !== valueKey, + }, + { + key: 'matches_by_token_count', + label: 'Relative Frequency (terms)', + format: this.formatValue('terms'), + formatDownload: this.formatDownloadValue, + isOptional: 'matches_by_token_count' !== valueKey, + }, + ]); + } } } } diff --git a/frontend/src/app/visualization/barchart/term-comparison-editor/term-comparison-editor.component.ts b/frontend/src/app/visualization/barchart/term-comparison-editor/term-comparison-editor.component.ts index 12b0ccbfe..08679fae3 100644 --- a/frontend/src/app/visualization/barchart/term-comparison-editor/term-comparison-editor.component.ts +++ b/frontend/src/app/visualization/barchart/term-comparison-editor/term-comparison-editor.component.ts @@ -1,5 +1,5 @@ import { Component, EventEmitter, Input, OnChanges, Output, SimpleChanges } from '@angular/core'; -import { ActivatedRoute, Params, Router } from '@angular/router'; +import { ActivatedRoute, ParamMap, Router } from '@angular/router'; import { ParamDirective } from '../../../param/param-directive'; import { ParamService } from '../../../services'; @@ -44,7 +44,7 @@ export class TermComparisonEditorComponent teardown() {} - setStateFromParams(params: Params) { + setStateFromParams(params: ParamMap) { if (params.has('compareTerm')) { this.queries = params.getAll('compareTerm'); this.queriesChanged.emit(this.queries); diff --git a/frontend/src/app/visualization/ngram/ngram.component.html b/frontend/src/app/visualization/ngram/ngram.component.html index cc49268ee..c7c191b4f 100644 --- a/frontend/src/app/visualization/ngram/ngram.component.html +++ b/frontend/src/app/visualization/ngram/ngram.component.html @@ -52,7 +52,7 @@ (onChange)="onParameterChange('freqCompensation', $event.value)"> {{currentFreqCompensationOption?.label}}
- {{option.label}} diff --git a/frontend/src/app/word-models/related-words/related-words.component.ts b/frontend/src/app/word-models/related-words/related-words.component.ts index 52333dafb..963c16ab2 100644 --- a/frontend/src/app/word-models/related-words/related-words.component.ts +++ b/frontend/src/app/word-models/related-words/related-words.component.ts @@ -1,5 +1,5 @@ import { Component, EventEmitter, HostBinding, Input, OnChanges, Output, SimpleChanges } from '@angular/core'; -import { ActivatedRoute, Params, Router } from '@angular/router'; +import { ActivatedRoute, ParamMap, Params, Router } from '@angular/router'; import { BehaviorSubject } from 'rxjs'; import * as _ from 'lodash'; import { showLoading } from '../../utils/utils'; @@ -26,7 +26,7 @@ export class RelatedWordsComponent extends ParamDirective implements OnChanges { isLoading$ = new BehaviorSubject(false); - neighbours = 5; + neighbours: number = 5; timeIntervals: string[] = []; totalData: WordSimilarity[]; // similarities of overall nearest neighbours per time period @@ -60,8 +60,8 @@ export class RelatedWordsComponent extends ParamDirective implements OnChanges { teardown() {} - setStateFromParams(params: Params) { - this.neighbours = _.get(params, 'neighbours', 5); + setStateFromParams(params: ParamMap) { + this.neighbours = Number(params.get('neighbours')); } getData(): void { diff --git a/frontend/src/app/word-models/word-models.component.html b/frontend/src/app/word-models/word-models.component.html index f0ec4c6b0..ca5255730 100644 --- a/frontend/src/app/word-models/word-models.component.html +++ b/frontend/src/app/word-models/word-models.component.html @@ -1,6 +1,6 @@ -
+
diff --git a/frontend/src/app/word-models/word-models.component.ts b/frontend/src/app/word-models/word-models.component.ts index f1353b265..3951aa885 100644 --- a/frontend/src/app/word-models/word-models.component.ts +++ b/frontend/src/app/word-models/word-models.component.ts @@ -1,5 +1,5 @@ import { Component, ElementRef, HostListener, ViewChild } from '@angular/core'; -import { ActivatedRoute, Params, Router } from '@angular/router'; +import { ActivatedRoute, ParamMap, Router } from '@angular/router'; import * as _ from 'lodash'; import { Corpus, QueryFeedback, User, WordInModelResult } from '../models'; @@ -18,7 +18,6 @@ export class WordModelsComponent extends ParamDirective { public searchSection: ElementRef; public isScrolledDown: boolean; - user: User; corpus: Corpus; queryText: string; @@ -79,13 +78,12 @@ export class WordModelsComponent extends ParamDirective { } async initialize(): Promise { - this.user = await this.authService.getCurrentUserPromise(); this.corpusService.currentCorpus.subscribe(this.setCorpus.bind(this)); } teardown() {} - setStateFromParams(params: Params) { + setStateFromParams(params: ParamMap) { const queryFromParams = params.get('query'); if (queryFromParams !== this.activeQuery) { this.queryText = queryFromParams; @@ -101,7 +99,7 @@ export class WordModelsComponent extends ParamDirective { this.queryFeedback = { status: 'success' }; } if (params.has('show')) { - this.currentTab = params.get('show'); + this.currentTab = params.get('show') as 'relatedwords' | 'wordsimilarity'; } else { this.currentTab = 'relatedwords'; } diff --git a/frontend/src/mock-data/api.ts b/frontend/src/mock-data/api.ts index 37f3b7411..adaca1a90 100644 --- a/frontend/src/mock-data/api.ts +++ b/frontend/src/mock-data/api.ts @@ -4,6 +4,8 @@ import { takeUntil } from 'rxjs/operators'; import { mockUserResponse } from './user'; import { CorpusDocumentationPage, TaskResult, TasksOutcome } from '../app/models'; import { LimitedResultsDownloadParameters } from '../app/models/search-results'; +import { mockCorpusDefinition } from './corpus-definition'; +import { APIEditableCorpus } from '../app/models/corpus-definition'; export const fakeNgramResult = { words: [ @@ -93,4 +95,23 @@ export class ApiServiceMock { userTags() { return of([]); } + + corpusDefinitions(): Observable { + const data = [{ id: 1, active: false, definition: mockCorpusDefinition }]; + return of(data); + } + + corpusDefinition(id: number): Observable { + const data = { id, active: false, definition: mockCorpusDefinition }; + return of(data); + } + + createCorpus(data: APIEditableCorpus): Observable { + const result = _.merge({ id: 1 }, data); + return of(result); + } + + updateCorpus(_id: number, data: APIEditableCorpus): Observable { + return of(data); + } } diff --git a/frontend/src/mock-data/constructor-helpers.ts b/frontend/src/mock-data/constructor-helpers.ts index 21c4a1aa1..b369eb305 100644 --- a/frontend/src/mock-data/constructor-helpers.ts +++ b/frontend/src/mock-data/constructor-helpers.ts @@ -3,8 +3,10 @@ import { Corpus, FieldValues, FoundDocument, HighlightResult, SearchHit } from '../app/models'; import { mockCorpus } from './corpus'; import { TagServiceMock } from './tag'; +import { EntityServiceMock } from './entity'; const tagService = new TagServiceMock() as any; +const entityService = new EntityServiceMock() as any; export const makeDocument = ( fieldValues: FieldValues, @@ -16,6 +18,6 @@ export const makeDocument = ( const hit: SearchHit = { _id: id, _score: relevance, _source: fieldValues, highlight }; - return new FoundDocument(tagService, corpus, hit); + return new FoundDocument(tagService, entityService, corpus, hit); }; diff --git a/frontend/src/mock-data/corpus-definition.ts b/frontend/src/mock-data/corpus-definition.ts new file mode 100644 index 000000000..cf418c9ac --- /dev/null +++ b/frontend/src/mock-data/corpus-definition.ts @@ -0,0 +1,54 @@ +import { APICorpusDefinition } from '../app/models/corpus-definition'; + +export const mockCorpusDefinition: APICorpusDefinition = { + name: 'test', + meta: { + title: 'Test corpus', + description: 'JSON corpus definition for testing', + category: 'book', + date_range: { + min: '1800-01-01', + max: '1900-01-01' + }, + languages: ['en'], + }, + source_data: { + type: 'csv' + }, + fields: [ + { + name: 'content', + display_name: 'Content', + description: 'Main text content', + type: 'text_content', + options: { + search: true, + filter: 'none', + visualize: true, + preview: true, + sort: false, + hidden: false, + }, + extract: { + column: 'content' + } + }, + { + name: 'date', + display_name: 'Date', + description: 'Date on which the text was published', + type: 'date', + options: { + search: false, + filter: 'show', + visualize: true, + preview: true, + sort: true, + hidden: false, + }, + extract: { + column: 'date' + } + } + ] +}; diff --git a/frontend/src/mock-data/corpus.ts b/frontend/src/mock-data/corpus.ts index f1472315d..ae6e1d1eb 100644 --- a/frontend/src/mock-data/corpus.ts +++ b/frontend/src/mock-data/corpus.ts @@ -145,6 +145,7 @@ export const mockCorpus: Corpus = { scanImageType: 'pdf', allowImageDownload: false, wordModelsPresent: false, + hasNamedEntities: false, directDownloadLimit: 500, fields: [mockField, mockField2], languages: ['English'], @@ -163,6 +164,7 @@ export const mockCorpus2 = { scanImageType: 'pdf', allowImageDownload: false, wordModelsPresent: false, + hasNamedEntities: true, directDownloadLimit: 1000, fields: [mockField2], languages: ['English', 'French'], @@ -181,6 +183,7 @@ export const mockCorpus3: Corpus = { scanImageType: 'pdf', allowImageDownload: false, wordModelsPresent: false, + hasNamedEntities: false, directDownloadLimit: 2000, fields: [mockField, mockField2, mockField3, mockFieldDate, mockFieldMultipleChoice], languages: ['English'], diff --git a/frontend/src/mock-data/entity.ts b/frontend/src/mock-data/entity.ts new file mode 100644 index 000000000..9452852ec --- /dev/null +++ b/frontend/src/mock-data/entity.ts @@ -0,0 +1,12 @@ +import { of, Observable } from 'rxjs'; + +import { Corpus, NamedEntitiesResult } from '../app/models'; + +export class EntityServiceMock { + + public getDocumentEntities(corpus: Corpus, id: string): Observable { + return of({speech: [{entity: 'person', text: 'Wally'}, + {entity: 'flat', text: ' was last seen in '}, + {entity: 'location', text: 'Paris'}]}) + } +} diff --git a/frontend/src/mock-data/search.ts b/frontend/src/mock-data/search.ts index d5c4f233b..68d6d720b 100644 --- a/frontend/src/mock-data/search.ts +++ b/frontend/src/mock-data/search.ts @@ -3,7 +3,7 @@ import { SearchFilter } from '../app/models/field-filter'; import { Corpus, CorpusField, FoundDocument, QueryModel, SearchResults } from '../app/models/index'; import { mockCorpus } from './corpus'; import { TagServiceMock } from './tag'; -import { TagService } from '../app/services/tag.service'; +import { ElasticSearchServiceMock } from './elastic-search'; import { Aggregator } from '../app/models/aggregation'; export class SearchServiceMock { @@ -39,7 +39,8 @@ export class SearchServiceMock { loadResults(queryModel: QueryModel, resultsParams: PageResultsParameters): Promise { const doc = new FoundDocument( - new TagServiceMock() as unknown as TagService, + new TagServiceMock() as any, + new ElasticSearchServiceMock() as any, mockCorpus, { _id: 'test_1', diff --git a/frontend/src/styles.scss b/frontend/src/styles.scss index b6a8d5d8b..942fe4314 100644 --- a/frontend/src/styles.scss +++ b/frontend/src/styles.scss @@ -71,3 +71,31 @@ a.dropdown-item[disabled] { color: $grey; } } + +@mixin mark-entity($color) { + background-color: rgb(from $color r g b /.2); + border-bottom: .2em solid; + border-color: $color; + + .entity-icon, &.entity-icon { + padding-left: .3em; + padding-right: .3em; + color: $color; + } +} + +.entity-person { + @include mark-entity($entity-person); +} + +.entity-location { + @include mark-entity($entity-location); +} + +.entity-organization { + @include mark-entity($entity-organization); +} + +.entity-miscellaneous { + @include mark-entity($entity-miscellaneous); +} diff --git a/package.json b/package.json index 26128bab2..937dc3e55 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "i-analyzer", - "version": "5.7.0", + "version": "5.9.0", "license": "MIT", "scripts": { "postinstall": "yarn install-back && yarn install-front",