From 25ba54bbaee5fd89794067b2c34c6bea20e7fc16 Mon Sep 17 00:00:00 2001
From: Pablo Panero <pablo.panero@cern.ch>
Date: Wed, 13 Jul 2022 15:31:38 +0200
Subject: [PATCH] global: add support for custom fields and feature branches
 tests

Co-authored-by: Zacharias Zacharodimos <zacharias.zacharodimos@cern.ch>
---
 .github/workflows/tests-feature.yaml          |  79 ++++++++++++++
 .github/workflows/tests.yml                   |   4 +-
 .gitignore                                    |   2 +-
 .../contrib/affiliations/schema.py            |  26 +----
 .../contrib/funders/schema.py                 |  25 +----
 .../contrib/subjects/schema.py                |  25 +----
 .../records/systemfields/relations.py         |  53 +++++++++
 .../services/custom_fields/__init__.py        |  12 ++
 .../services/custom_fields/vocabulary.py      |  80 ++++++++++++++
 invenio_vocabularies/services/schema.py       |  62 ++++++++++-
 ...ents-devel.txt => requirements-feature.txt |   4 +-
 setup.cfg                                     |   4 +-
 tests/conftest.py                             |   2 +-
 tests/custom_fields/test_custom_fields.py     | 103 ++++++++++++++++++
 14 files changed, 412 insertions(+), 69 deletions(-)
 create mode 100644 .github/workflows/tests-feature.yaml
 create mode 100644 invenio_vocabularies/records/systemfields/relations.py
 create mode 100644 invenio_vocabularies/services/custom_fields/__init__.py
 create mode 100644 invenio_vocabularies/services/custom_fields/vocabulary.py
 rename requirements-devel.txt => requirements-feature.txt (61%)
 create mode 100644 tests/custom_fields/test_custom_fields.py

diff --git a/.github/workflows/tests-feature.yaml b/.github/workflows/tests-feature.yaml
new file mode 100644
index 00000000..ca6aae7a
--- /dev/null
+++ b/.github/workflows/tests-feature.yaml
@@ -0,0 +1,79 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Invenio.
+# Copyright (C) 2022 CERN.
+#
+# Invenio is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+name: Feature development CI
+
+on:
+  pull_request:
+    branches:
+      - "feature/**"
+  schedule:
+    # * is a special character in YAML so you have to quote this string
+    - cron: "0 3 * * 6"
+  workflow_dispatch:
+    inputs:
+      reason:
+        description: "Reason"
+        required: false
+        default: "Manual trigger"
+
+jobs:
+  Tests:
+    runs-on: ubuntu-20.04
+    strategy:
+      matrix:
+        python-version: [3.8] # for efficiency test only one specific python version
+        requirements-level: [pypi]
+        cache-service: [redis]
+        db-service: [postgresql13]
+        search-service: [elasticsearch7]
+        include:
+          - db-service: postgresql13
+            DB_EXTRAS: "postgresql"
+
+          - search-service: elasticsearch7
+            SEARCH_EXTRAS: "elasticsearch7"
+
+    env:
+      CACHE: ${{ matrix.cache-service }}
+      DB: ${{ matrix.db-service }}
+      SEARCH: ${{ matrix.search-service }}
+      EXTRAS: tests,${{ matrix.SEARCH_EXTRAS }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Generate dependencies
+        run: |
+          pip install wheel requirements-builder
+          requirements-builder -e "$EXTRAS" --level=${{ matrix.requirements-level }} setup.py > .${{ matrix.requirements-level }}-${{ matrix.python-version }}-requirements.txt
+
+      - name: Cache pip
+        uses: actions/cache@v2
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('.${{ matrix.requirements-level }}-${{ matrix.python-version }}-requirements.txt') }}
+
+      - name: Install dependencies
+        run: |
+
+          pip install -r .${{ matrix.requirements-level }}-${{ matrix.python-version }}-requirements.txt
+          pip install -r requirements-feature.txt # this file is used only when targeting a feature/* branch
+          pip install ".[$EXTRAS]"
+          pip freeze
+          docker --version
+          docker-compose --version
+
+      - name: Run tests
+        run: |
+          ./run-tests.sh
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 2c43145e..fa451e52 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -13,7 +13,9 @@ on:
   push:
     branches: master
   pull_request:
-    branches: master
+    branches:
+      - master
+      - "maint-**"
   schedule:
     # * is a special character in YAML so you have to quote this string
     - cron: "0 3 * * 6"
diff --git a/.gitignore b/.gitignore
index 16f27d13..6e47d266 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,7 +9,7 @@ __pycache__/
 .vscode/
 
 # DS_Store
-.DS_Store/
+*.DS_Store
 
 # C extensions
 *.so
diff --git a/invenio_vocabularies/contrib/affiliations/schema.py b/invenio_vocabularies/contrib/affiliations/schema.py
index 7a60023f..30d778a8 100644
--- a/invenio_vocabularies/contrib/affiliations/schema.py
+++ b/invenio_vocabularies/contrib/affiliations/schema.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2020-2021 CERN.
+# Copyright (C) 2020-2022 CERN.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
 # modify it under the terms of the MIT License; see LICENSE file for more
@@ -11,11 +11,11 @@
 from functools import partial
 
 from flask_babelex import lazy_gettext as _
-from marshmallow import Schema, ValidationError, fields, validates_schema
+from marshmallow import fields
 from marshmallow_utils.fields import IdentifierSet, SanitizedUnicode
 from marshmallow_utils.schemas import IdentifierSchema
 
-from ...services.schema import BaseVocabularySchema
+from ...services.schema import BaseVocabularySchema, ContribVocabularyRelationSchema
 from .config import affiliation_schemes
 
 
@@ -35,23 +35,9 @@ class AffiliationSchema(BaseVocabularySchema):
     name = SanitizedUnicode(required=True)
 
 
-class AffiliationRelationSchema(Schema):
+class AffiliationRelationSchema(ContribVocabularyRelationSchema):
     """Schema to define an optional affialiation relation in another schema."""
 
-    id = SanitizedUnicode()
+    ftf_name = "name"
+    parent_field_name = "affiliations"
     name = SanitizedUnicode()
-
-    @validates_schema
-    def validate_affiliation(self, data, **kwargs):
-        """Validates that either id either name are present."""
-        id_ = data.get("id")
-        name = data.get("name")
-        if id_:
-            data = {"id": id_}
-        elif name:
-            data = {"name": name}
-
-        if not id_ and not name:
-            raise ValidationError(
-                _("An existing id or a free text name must be present."), "affiliations"
-            )
diff --git a/invenio_vocabularies/contrib/funders/schema.py b/invenio_vocabularies/contrib/funders/schema.py
index 29026a13..9b176b4b 100644
--- a/invenio_vocabularies/contrib/funders/schema.py
+++ b/invenio_vocabularies/contrib/funders/schema.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2021 CERN.
+# Copyright (C) 2021-2022 CERN.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
 # modify it under the terms of the MIT License; see LICENSE file for more
@@ -12,7 +12,6 @@
 
 from flask_babelex import lazy_gettext as _
 from marshmallow import (
-    Schema,
     ValidationError,
     fields,
     post_load,
@@ -23,32 +22,18 @@
 from marshmallow_utils.fields import IdentifierSet, SanitizedUnicode
 from marshmallow_utils.schemas import IdentifierSchema
 
-from ...services.schema import BaseVocabularySchema
+from ...services.schema import BaseVocabularySchema, ContribVocabularyRelationSchema
 from .config import funder_schemes
 
 
-class FunderRelationSchema(Schema):
+class FunderRelationSchema(ContribVocabularyRelationSchema):
     """Funder schema."""
 
+    ftf_name = "name"
+    parent_field_name = "funder"
     name = SanitizedUnicode(
         validate=validate.Length(min=1, error=_("Name cannot be blank."))
     )
-    id = SanitizedUnicode()
-
-    @validates_schema
-    def validate_funder(self, data, **kwargs):
-        """Validates that either id either name are present."""
-        id_ = data.get("id")
-        name = data.get("name")
-        if id_:
-            data = {"id": id_}
-        elif name:
-            data = {"name": name}
-
-        if not id_ and not name:
-            raise ValidationError(
-                _("An existing id or a free text name must be present."), "funder"
-            )
 
 
 class FunderSchema(BaseVocabularySchema):
diff --git a/invenio_vocabularies/contrib/subjects/schema.py b/invenio_vocabularies/contrib/subjects/schema.py
index bb6f904b..688a219e 100644
--- a/invenio_vocabularies/contrib/subjects/schema.py
+++ b/invenio_vocabularies/contrib/subjects/schema.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 #
 # Copyright (C) 2021 Northwestern University.
-# Copyright (C) 2021 CERN.
+# Copyright (C) 2021-2022 CERN.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
 # modify it under the terms of the MIT License; see LICENSE file for more
@@ -10,10 +10,9 @@
 """Subjects schema."""
 
 from flask_babelex import lazy_gettext as _
-from marshmallow import Schema, ValidationError, validates_schema
 from marshmallow_utils.fields import SanitizedUnicode
 
-from ...services.schema import BaseVocabularySchema
+from ...services.schema import BaseVocabularySchema, ContribVocabularyRelationSchema
 
 
 class SubjectSchema(BaseVocabularySchema):
@@ -27,23 +26,9 @@ class SubjectSchema(BaseVocabularySchema):
     subject = SanitizedUnicode(required=True)
 
 
-class SubjectRelationSchema(Schema):
+class SubjectRelationSchema(ContribVocabularyRelationSchema):
     """Schema to define an optional subject relation in another schema."""
 
-    id = SanitizedUnicode()
+    ftf_name = "subject"
+    parent_field_name = "subjects"
     subject = SanitizedUnicode()
-
-    @validates_schema
-    def validate_subject(self, data, **kwargs):
-        """Validates that either id either name are present."""
-        id_ = data.get("id")
-        subject = data.get("subject")
-        if id_:
-            data = {"id": id_}
-        elif subject:
-            data = {"subject": subject}
-
-        if not id_ and not subject:
-            raise ValidationError(
-                _("An existing id or a free text subject must be present."), "subjects"
-            )
diff --git a/invenio_vocabularies/records/systemfields/relations.py b/invenio_vocabularies/records/systemfields/relations.py
new file mode 100644
index 00000000..75d1ac36
--- /dev/null
+++ b/invenio_vocabularies/records/systemfields/relations.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2022 CERN.
+#
+# Invenio-Records-Resources is free software; you can redistribute it and/or
+# modify it under the terms of the MIT License; see LICENSE file for more
+# details.
+
+"""Relations system fields."""
+
+from flask import current_app
+from invenio_records.systemfields import RelationsField
+from werkzeug.local import LocalProxy
+
+from invenio_vocabularies.services.custom_fields import VocabularyCF
+
+from ..api import Vocabulary
+
+
+class CustomFieldsRelation(RelationsField):
+    """Relation field to manage custom fields.
+
+    Iterates through all configured custom fields and collects the ones
+    defining a relation dependency e.g vocabularies.
+    """
+
+    def __init__(self, fields_var):
+        """Initialize the field."""
+        super().__init__()
+        self._fields_var = fields_var
+        self._fields = LocalProxy(lambda: self._load_custom_fields_relations())
+
+    def _load_custom_fields_relations(self):
+        """Loads custom fields relations from config."""
+        custom_fields = current_app.config.get(self._fields_var, {})
+
+        relations = {}
+        for cf in custom_fields:
+            if getattr(cf, "relation_cls", None):
+                relations[cf.name] = cf.relation_cls(
+                    f"custom_fields.{cf.name}",
+                    keys=cf.field_keys,
+                    pid_field=Vocabulary.pid.with_type_ctx(cf.vocabulary_id),
+                    cache_key=cf.vocabulary_id,
+                )
+
+        return relations
+
+    def __set__(self, instance, values):
+        """Setting the attribute."""
+        raise ValueError(
+            f"This field can only be set through config ({self._fields_var})"
+        )
diff --git a/invenio_vocabularies/services/custom_fields/__init__.py b/invenio_vocabularies/services/custom_fields/__init__.py
new file mode 100644
index 00000000..1ce96f3c
--- /dev/null
+++ b/invenio_vocabularies/services/custom_fields/__init__.py
@@ -0,0 +1,12 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2022 CERN.
+#
+# Invenio-RDM-Records is free software; you can redistribute it and/or modify
+# it under the terms of the MIT License; see LICENSE file for more details.
+
+"""Custom Fields for InvenioRDM."""
+
+from .vocabulary import VocabularyCF
+
+__all__ = "VocabularyCF"
diff --git a/invenio_vocabularies/services/custom_fields/vocabulary.py b/invenio_vocabularies/services/custom_fields/vocabulary.py
new file mode 100644
index 00000000..55996fff
--- /dev/null
+++ b/invenio_vocabularies/services/custom_fields/vocabulary.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2022 CERN.
+#
+# Invenio-RDM-Records is free software; you can redistribute it and/or modify
+# it under the terms of the MIT License; see LICENSE file for more details.
+
+"""Custom Fields for InvenioRDM."""
+
+from invenio_records_resources.records.systemfields import PIDListRelation, PIDRelation
+from invenio_records_resources.services.custom_fields.base import BaseCF
+from marshmallow import fields
+
+from ...proxies import current_service
+from ...resources.serializer import VocabularyL10NItemSchema
+from ...services.schema import VocabularyRelationSchema
+
+
+class VocabularyCF(BaseCF):
+    """Vocabulary custom field.
+
+    Supporting common vocabulary structure.
+    """
+
+    field_keys = ["id", "props", "title", "icon"]
+    """Return field's keys for querying.
+
+    These keys are used to select which information to return from the
+    vocabulary that is queried.
+    """
+
+    def __init__(self, name, vocabulary_id, multiple=False, dump_options=True):
+        """Constructor."""
+        super().__init__(name)
+        self.relation_cls = PIDRelation if not multiple else PIDListRelation
+        self.vocabulary_id = vocabulary_id
+        self.dump_options = dump_options
+        self.multiple = multiple
+
+    @property
+    def mapping(self):
+        """Return the mapping."""
+        _mapping = {
+            "type": "object",
+            "properties": {
+                "@v": {"type": "keyword"},
+                "id": {"type": "keyword"},
+                "title": {"type": "object", "dynamic": True},
+            },
+        }
+
+        return _mapping
+
+    @property
+    def field(self):
+        """Marshmallow schema for vocabulary custom fields."""
+        return fields.Nested(VocabularyRelationSchema, many=self.multiple)
+
+    @property
+    def ui_field(self):
+        """Marshmallow UI schema for vocabulary custom fields.
+
+        This schema is used in the UIJSONSerializer and controls how the field will be
+        dumped in the UI. It takes responsibility of the localization of strings.
+        """
+        return fields.Nested(VocabularyL10NItemSchema, many=self.multiple)
+
+    def options(self, identity):
+        """Return UI serialized vocabulary items."""
+        if self.dump_options:
+            vocabs = current_service.read_all(
+                identity,
+                fields=self.field_keys,
+                type=self.vocabulary_id,
+            )
+            options = []
+            for vocab in vocabs:
+                options.append(VocabularyL10NItemSchema().dump(vocab))
+
+            return options
diff --git a/invenio_vocabularies/services/schema.py b/invenio_vocabularies/services/schema.py
index da513f82..06912d8a 100644
--- a/invenio_vocabularies/services/schema.py
+++ b/invenio_vocabularies/services/schema.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2020-2021 CERN.
+# Copyright (C) 2020-2022 CERN.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
 # modify it under the terms of the MIT License; see LICENSE file for more
@@ -8,8 +8,16 @@
 
 """Vocabulary service schema."""
 
+from flask_babelex import lazy_gettext as _
 from invenio_records_resources.services.records.schema import BaseRecordSchema
-from marshmallow import EXCLUDE, RAISE, Schema, fields, validate
+from marshmallow import (
+    Schema,
+    ValidationError,
+    fields,
+    pre_load,
+    validate,
+    validates_schema,
+)
 from marshmallow_utils.fields import SanitizedUnicode
 
 i18n_strings = fields.Dict(
@@ -20,6 +28,56 @@
 """Field definition for language aware strings."""
 
 
+class BaseVocabularyRelationSchema(Schema):
+    """Base Vocabulary relation schema."""
+
+    id = SanitizedUnicode(required=True)
+
+
+class VocabularyRelationSchema(BaseVocabularyRelationSchema):
+    """Vocabulary relation schema."""
+
+    title = fields.Dict(dump_only=True)
+
+    @pre_load
+    def clean(self, data, **kwargs):
+        """Removes dump_only fields.
+
+        Why: We want to allow the output of a Schema dump, to be a valid input
+             to a Schema load without causing strange issues.
+        """
+        value_is_dict = isinstance(data, dict)
+        if value_is_dict:
+            for name, field in self.fields.items():
+                if field.dump_only:
+                    data.pop(name, None)
+        return data
+
+
+class ContribVocabularyRelationSchema(Schema):
+    """Base Vocabulary relation schema."""
+
+    id = SanitizedUnicode()
+    ftf_name = None  # free text field name
+    parent_field_name = None
+
+    @validates_schema
+    def validate_relation_schema(self, data, **kwargs):
+        """Validates that either id either the free text field are present."""
+        id_ = data.get("id")
+        free_text = data.get(self.ftf_name)
+        if id_:
+            data = {"id": id_}
+        elif free_text:
+            data = {self.ftf_name: free_text}
+
+        if not id_ and not free_text:
+            raise ValidationError(
+                _(f"An existing id or a free text {self.ftf_name} must be present."),
+                self.parent_field_name,
+            )
+
+
 class BaseVocabularySchema(BaseRecordSchema):
     """Base schema for vocabularies."""
 
diff --git a/requirements-devel.txt b/requirements-feature.txt
similarity index 61%
rename from requirements-devel.txt
rename to requirements-feature.txt
index 3a5124e9..bb3d33c9 100644
--- a/requirements-devel.txt
+++ b/requirements-feature.txt
@@ -1,7 +1,9 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2020 CERN.
+# Copyright (C) 2022 CERN.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
 # modify it under the terms of the MIT License; see LICENSE file for more
 # details.
+
+# This file is adding constraints to be used when a PR is targeting a feature/* branch
diff --git a/setup.cfg b/setup.cfg
index 4006e03a..21314397 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -27,7 +27,7 @@ packages = find:
 python_requires = >=3.7
 zip_safe = False
 install_requires =
-    invenio-records-resources>=0.20.0,<0.21.0
+    invenio-records-resources>=0.20.1,<0.21.0
     lxml>=4.5.0
     PyYAML>=5.4.1
 
@@ -38,8 +38,6 @@ tests =
     invenio-db[postgresql,mysql,versioning]>=1.0.14,<2.0.0
     pytest-invenio>=1.4.11
     Sphinx>=4.5
-elasticsearch6 =
-    invenio-search[elasticsearch6]>=1.4.2,<2.0
 elasticsearch7 =
     invenio-search[elasticsearch7]>=1.4.2,<2.0
 # Kept for backwards compatibility:
diff --git a/tests/conftest.py b/tests/conftest.py
index f68eccef..617f2c60 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -168,7 +168,7 @@ def example_record(db, identity, service, example_data):
 
 
 @pytest.fixture(scope="function")
-def lang_data_many(lang_type, lic_type, lang_data, service, identity):
+def lang_data_many(lang_type, lang_data, service, identity):
     """Create many language vocabulary."""
     lang_ids = ["fr", "tr", "gr", "ger", "es"]
     data = dict(lang_data)
diff --git a/tests/custom_fields/test_custom_fields.py b/tests/custom_fields/test_custom_fields.py
new file mode 100644
index 00000000..7d2262ef
--- /dev/null
+++ b/tests/custom_fields/test_custom_fields.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of Invenio.
+# Copyright (C) 2022 CERN.
+#
+# Invenio is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+"""Test custom fields."""
+
+import pytest
+from invenio_records_resources.records.systemfields import PIDListRelation, PIDRelation
+from marshmallow import Schema
+
+from invenio_vocabularies.services.custom_fields import VocabularyCF
+
+
+@pytest.fixture(scope="module")
+def vocabulary_cf():
+    return VocabularyCF("test", "test")
+
+
+@pytest.fixture(scope="module")
+def TestSchema(vocabulary_cf):
+    return Schema.from_dict({"test": vocabulary_cf.field})
+
+
+@pytest.fixture(scope="module")
+def TestUISchema(vocabulary_cf):
+    return Schema.from_dict({"test": vocabulary_cf.ui_field})
+
+
+def test_relation_cls(vocabulary_cf):
+    assert vocabulary_cf.relation_cls == PIDRelation
+
+    multi = VocabularyCF("test", "test", multiple=True)
+    assert multi.relation_cls == PIDListRelation
+
+
+def test_cf_mapping(vocabulary_cf):
+    # this will be useful when implementing compatibility with OS
+    assert vocabulary_cf.mapping == {
+        "type": "object",
+        "properties": {
+            "@v": {"type": "keyword"},
+            "id": {"type": "keyword"},
+            "title": {"type": "object", "dynamic": True},
+        },
+    }
+
+
+def test_field_load(TestSchema):
+    vocab = {"test": {"id": "test", "title": {"en": "Test"}}}
+    assert TestSchema().load(vocab) == {"test": {"id": "test"}}
+
+
+def test_field_dump(TestSchema):
+    vocab = {"test": {"id": "test", "title": {"en": "Test"}}}
+    assert TestSchema().load(vocab) == {"test": {"id": "test"}}
+
+
+def test_ui_field_dump(app, TestUISchema):
+    # app is needed for babel local setup
+    # no load test since it's never used for loading
+    vocab = {
+        "test": {
+            "id": "test",
+            "title": {"en": "Test"},
+            "description": {"en": "Test description"},
+            "props": {"key": "value"},
+            "icon": "icon.png",
+            "tags": ["tag1", "tag2"],
+        }
+    }
+
+    expected_vocab = {
+        "test": {
+            "id": "test",
+            "title_l10n": "Test",
+            "description_l10n": "Test description",
+            "props": {"key": "value"},
+            "icon": "icon.png",
+            "tags": ["tag1", "tag2"],
+        }
+    }
+    assert TestUISchema().dump(vocab) == expected_vocab
+
+
+def test_options(lang_data_many, identity):
+    cf = VocabularyCF("test", "languages")
+    expected_options = [
+        {
+            "id": lang,
+            "title_l10n": "English",
+            "icon": "file-o",
+            "props": {
+                "akey": "avalue",
+            },
+        }
+        for lang in ["fr", "tr", "gr", "ger", "es"]
+    ]
+
+    assert cf.options(identity) == expected_options