Skip to content

Commit

Permalink
documents: delete $ref possibility on provisionActivity.places
Browse files Browse the repository at this point in the history
* Fixes conversion errors on some entities fields (removing unauthorized
  `type` field).
* Removes `$ref` field into the `provisionActivity.places` structure :
  `places` authority are not yet supported by MEF project so none link
  could be done.
* Updates alembic tree dependencies for scripts into the `US-entity`
  branch.

Co-authored-by: Renaud Michotte <[email protected]>
  • Loading branch information
zannkukai committed Apr 4, 2023
1 parent ef387c7 commit bd765eb
Show file tree
Hide file tree
Showing 22 changed files with 2,099 additions and 4,123 deletions.
4,684 changes: 1,567 additions & 3,117 deletions data/documents_big.json

Large diffs are not rendered by default.

1,177 changes: 395 additions & 782 deletions data/documents_small.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

# revision identifiers, used by Alembic.
revision = 'a710021979fe'
down_revision = '5f0b086e4b82'
down_revision = '8145a7cdef99'
branch_labels = ()
depends_on = None

Expand Down
3 changes: 0 additions & 3 deletions rero_ils/dojson/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,8 +646,6 @@ def build_place(self):
place['canton'] = self.cantons[0]
if self.country:
place['country'] = self.country
if place:
place['type'] = 'bf:Place'
if self.links_from_752:
place['identifiedBy'] = self.links_from_752[0]
return place
Expand All @@ -664,7 +662,6 @@ def default_provision_activity(self, result):
for i in range(1, len(self.links_from_752)):
place = {
'country': 'xx',
'type': 'bf:Place',
'identifiedBy': self.links_from_752[i]
}
places.append(place)
Expand Down
9 changes: 5 additions & 4 deletions rero_ils/modules/documents/dojson/contrib/jsontodc/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from flask_babelex import gettext as _

from rero_ils.modules.documents.extensions import TitleExtension
from rero_ils.modules.entities.models import EntityType
from rero_ils.modules.entities.utils import get_entity_localized_value


Expand Down Expand Up @@ -210,8 +211,8 @@ def json_to_relations(self, key, value):
def json_to_subject(self, key, value):
"""Get subject data."""
result = ''
subject_type = value.get('type')
if subject_type in ['bf:Person', 'bf:Organisation', 'bf:Place']:
_type = value.get('type')
if _type in [EntityType.PERSON, EntityType.ORGANISATION, EntityType.PLACE]:
# TODO: set the language
authorized_access_point = get_entity_localized_value(
entity=value,
Expand All @@ -222,13 +223,13 @@ def json_to_subject(self, key, value):
result = authorized_access_point
else:
result = value.get('preferred_name')
elif subject_type == 'bf:Work':
elif _type == EntityType.WORK:
work = []
creator = value.get('creator')
if creator:
work.append(creator)
work.append(value.get('title'))
result = '. - '.join(work)
elif subject_type in ['bf:Topic', 'bf:Temporal']:
elif _type in [EntityType.TOPIC, EntityType.TEMPORAL]:
result = value.get('term')
return result or None
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from rero_ils.modules.documents.utils import display_alternate_graphic_first
from rero_ils.modules.documents.views import create_title_responsibilites
from rero_ils.modules.entities.api import Entity
from rero_ils.modules.entities.models import EntityType
from rero_ils.modules.holdings.api import Holding, HoldingsSearch
from rero_ils.modules.items.api import Item, ItemsSearch
from rero_ils.modules.libraries.api import Library
Expand Down Expand Up @@ -596,7 +597,7 @@ def reverse_contribution(self, key, value):
break
result = {}
result = add_value(result, 'a', preferred_name)
if agent_type == 'bf:Person':
if agent_type == EntityType.PERSON:
tag = '7000_'
if ',' in preferred_name:
tag = '7001_'
Expand All @@ -611,7 +612,7 @@ def reverse_contribution(self, key, value):
date = f'{date_of_birth} - {date_of_death}'
if date != ' - ':
result = add_value(result, 'd', date)
elif agent_type == 'bf:Organisation':
elif agent_type == EntityType.ORGANISATION:
tag = '710__'
if agent.get('conference'):
tag = '711__'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def marc21_to_work_access_point(self, key, value):
if field_100 := marc21.get_fields('100'):
agent = {}
for blob_key, blob_value in field_100[0].get('subfields').items():
agent['type'] = 'bf:Person'
agent['type'] = EntityType.PERSON
if blob_key == 'a':
# numeration = not_repetitive(
# marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'b')
Expand Down Expand Up @@ -616,10 +616,7 @@ def marc21_to_subjects_6XX(self, key, value):
ref = get_contribution_link(
marc21.bib_id, marc21.bib_id, cont_id, key)
if ref:
subject = {
'$ref': ref,
'type': data_type,
}
subject = {'$ref': ref}
if not subject.get('$ref'):
identifier = build_identifier(value)
if identifier:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
get_field_items, not_repetitive, re_identified, \
remove_trailing_punctuation
from rero_ils.modules.documents.utils import create_authorized_access_point
from rero_ils.modules.entities.models import EntityType

from ..utils import _CONTRIBUTION_ROLE, do_abbreviated_title, \
do_acquisition_terms_from_field_037, do_copyright_date, do_credits, \
Expand Down Expand Up @@ -126,10 +127,6 @@ def marc21_to_contribution(self, key, value):
if ref := get_contribution_link(
marc21.bib_id, marc21.rero_id, subfields_0[0], key):
agent['$ref'] = ref
if key[:3] in ['100', '700']:
agent['type'] = 'bf:Person'
elif key[:3] in ['710', '711']:
agent['type'] = 'bf:Organisation'

# we do not have a $ref
agent_data = {}
Expand All @@ -143,7 +140,7 @@ def marc21_to_contribution(self, key, value):

# 100|700 Person
if key[:3] in ['100', '700']:
agent_data['type'] = 'bf:Person'
agent_data['type'] = EntityType.PERSON
if value.get('b'):
numeration = not_repetitive(
marc21.bib_id, marc21.rero_id, key, value, 'b')
Expand Down Expand Up @@ -175,7 +172,7 @@ def marc21_to_contribution(self, key, value):
agent_data['identifiedBy'] = identifier

elif key[:3] in ['710', '711']:
agent_data['type'] = 'bf:Organisation'
agent_data['type'] = EntityType.ORGANISATION
agent_data['conference'] = key[:3] == '711'
if value.get('b'):
subordinate_units = [
Expand Down Expand Up @@ -503,16 +500,16 @@ def marc21_to_subjects(self, key, value):
subjects_imported : for 6xx having indicator 2 '0' or '2'
"""
type_per_tag = {
'600': 'bf:Person',
'610': 'bf:Organisation',
'611': 'bf:Organisation',
'600t': 'bf:Work',
'610t': 'bf:Work',
'611t': 'bf:Work',
'630': 'bf:Work',
'650': 'bf:Topic', # or bf:Temporal, changed by code
'651': 'bf:Place',
'655': 'bf:Topic'
'600': EntityType.PERSON,
'610': EntityType.ORGANISATION,
'611': EntityType.ORGANISATION,
'600t': EntityType.WORK,
'610t': EntityType.WORK,
'611t': EntityType.WORK,
'630': EntityType.WORK,
'650': EntityType.TOPIC, # or bf:Temporal, changed by code
'651': EntityType.PLACE,
'655': EntityType.TOPIC
}

source_per_indicator_2 = {
Expand All @@ -535,7 +532,7 @@ def marc21_to_subjects(self, key, value):
if tag_key == '650':
for subfield_a in subfields_a:
if subfield_a[0].isdigit():
data_type = 'bf:Temporal'
data_type = EntityType.TEMPORAL
break

subject = {
Expand Down Expand Up @@ -573,17 +570,13 @@ def marc21_to_subjects(self, key, value):
) + '. ' + subject['authorized_access_point']
field_key = 'genreForm' if tag_key == '655' else 'subjects'
subfields_0 = utils.force_list(value.get('0'))
if data_type in ['bf:Person', 'bf:Organisation'] and subfields_0:
ref = get_contribution_link(marc21.bib_id, marc21.rero_id,
subfields_0[0], key)
if ref:
subject = {
'$ref': ref,
'type': data_type,
}
if data_type in [EntityType.PERSON, EntityType.ORGANISATION] \
and subfields_0:
if ref := get_contribution_link(marc21.bib_id, marc21.rero_id,
subfields_0[0], key):
subject = {'$ref': ref}
if not subject.get('$ref'):
identifier = build_identifier(value)
if identifier:
if identifier := build_identifier(value):
subject['identifiedBy'] = identifier
if field_key != 'genreForm':
perform_subdivisions(subject, value)
Expand All @@ -594,9 +587,8 @@ def marc21_to_subjects(self, key, value):
self[field_key] = subjects

elif subfield_2 == 'rerovoc' or indicator_2 in ['0', '2']:
term_string = build_string_from_subfields(
value, 'abcdefghijklmnopqrstuw', ' - ')
if term_string:
if term_string := build_string_from_subfields(
value, 'abcdefghijklmnopqrstuw', ' - '):
source = 'rerovoc' if subfield_2 == 'rerovoc' \
else source_per_indicator_2[indicator_2]
subject_imported = {
Expand Down Expand Up @@ -628,7 +620,7 @@ def marc21_to_subjects_imported(self, key, value):
field_key = 'subjects_imported'
if subfields_2:
subfield_2 = subfields_2[0]
if match := contains_specific_voc_regexp.search(subfield_2):
if contains_specific_voc_regexp.search(subfield_2):
add_data_imported = False
if subfield_2 == 'chrero':
subfields_9 = utils.force_list(value.get('9'))
Expand All @@ -651,14 +643,14 @@ def marc21_to_subjects_imported(self, key, value):
value,
'abcdefghijklmnopqrstuvwxyz', ' - ')
data_imported = {
'type': 'bf:Topic',
'type': EntityType.TOPIC,
'source': subfield_2,
'authorized_access_point': term_string
}
elif term_string := build_string_from_subfields(
value, 'abcdefghijklmnopqrstuvwxyz', ' - '):
data_imported = {
'type': 'bf:Topic',
'type': EntityType.TOPIC,
'authorized_access_point': term_string
}
if data_imported:
Expand Down Expand Up @@ -727,7 +719,7 @@ def get_classif_type_and_subdivision_codes_from_980_2(subfield_2):
if tag == '980':
if subfield_2 and _CONTAINS_FACTUM_REGEXP.search(subfield_2):
subject = {
'type': 'bf:Person',
'type': EntityType.PERSON,
'authorized_access_point': subfield_a,
'source': 'Factum'
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from rero_ils.dojson.utils import ReroIlsMarc21Overdo, build_identifier, \
build_string_from_subfields, get_contribution_link, \
remove_trailing_punctuation
from rero_ils.modules.entities.models import EntityType

from ..utils import do_abbreviated_title, \
do_acquisition_terms_from_field_037, do_classification, do_contribution, \
Expand Down Expand Up @@ -320,16 +321,16 @@ def marc21_to_subjects_6XX(self, key, value):
subjects_imported : for 6xx having indicator 2 '0' or '2'
"""
type_per_tag = {
'600': 'bf:Person',
'610': 'bf:Organisation',
'611': 'bf:Organisation',
'600t': 'bf:Work',
'610t': 'bf:Work',
'611t': 'bf:Work',
'630': 'bf:Work',
'650': 'bf:Topic', # or bf:Temporal, changed by code
'651': 'bf:Place',
'655': 'bf:Topic'
'600': EntityType.PERSON,
'610': EntityType.ORGANISATION,
'611': EntityType.ORGANISATION,
'600t': EntityType.WORK,
'610t': EntityType.WORK,
'611t': EntityType.WORK,
'630': EntityType.WORK,
'650': EntityType.TOPIC, # or bf:Temporal, changed by code
'651': EntityType.PLACE,
'655': EntityType.TOPIC
}

conference_per_tag = {
Expand Down Expand Up @@ -362,7 +363,7 @@ def marc21_to_subjects_6XX(self, key, value):
if tag_key == '650':
for subfield_a in subfields_a:
if subfield_a[0].isdigit():
data_type = 'bf:Temporal'
data_type = EntityType.TEMPORAL
break

subject = {
Expand Down Expand Up @@ -398,7 +399,8 @@ def marc21_to_subjects_6XX(self, key, value):
value, subfield_code_per_tag[creator_tag_key]), '.', '.')
field_key = 'genreForm' if tag_key == '655' else config_field_key
subfields_0 = utils.force_list(value.get('0'))
if data_type in ['bf:Person', 'bf:Organisation'] and subfields_0:
if data_type in [EntityType.PERSON, EntityType.ORGANISATION] \
and subfields_0:
ref = get_contribution_link(
marc21.bib_id, marc21.bib_id, subfields_0[0], key)
if ref:
Expand All @@ -413,7 +415,7 @@ def marc21_to_subjects_6XX(self, key, value):
subfields_2 = utils.force_list(value.get('2'))

if identifier \
and data_type == 'bf:Topic' \
and data_type == EntityType.TOPIC \
and len(subfields_2) > 0 \
and subfields_2[0].lower() == 'rero':
identifier['type'] = 'RERO-RAMEAU'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from rero_ils.dojson.utils import ReroIlsMarc21Overdo, \
build_string_from_subfields
from rero_ils.modules.entities.models import EntityType

from ..utils import do_abbreviated_title, \
do_acquisition_terms_from_field_037, do_classification, do_contribution, \
Expand Down Expand Up @@ -305,16 +306,16 @@ def marc21_to_subjects_6XX(self, key, value):
subjects_imported : for 6xx having indicator 2 '0' or '2'
"""
type_per_tag = {
'600': 'bf:Person',
'610': 'bf:Organisation',
'611': 'bf:Organisation',
'600t': 'bf:Work',
'610t': 'bf:Work',
'611t': 'bf:Work',
'630': 'bf:Work',
'650': 'bf:Topic', # or bf:Temporal, changed by code
'651': 'bf:Place',
'655': 'bf:Topic'
'600': EntityType.PERSON,
'610': EntityType.ORGANISATION,
'611': EntityType.ORGANISATION,
'600t': EntityType.WORK,
'610t': EntityType.WORK,
'611t': EntityType.WORK,
'630': EntityType.WORK,
'650': EntityType.TOPIC, # or bf:Temporal, changed by code
'651': EntityType.PLACE,
'655': EntityType.TOPIC
}

conference_per_tag = {
Expand Down
Loading

0 comments on commit bd765eb

Please sign in to comment.