Skip to content

Commit

Permalink
documents: delete $ref possibility on provisionActivity.places
Browse files Browse the repository at this point in the history
* Fixes conversion errors on some entities fields (removing unauthorized
  `type` field).
* Removes `$ref` field into the `provisionActivity.places` structure :
  `places` authority are not yet supported by MEF project so none link
  could be done.
* Updates alembic tree dependencies for scripts into the `US-entity`
  branch.

Co-authored-by: Renaud Michotte <[email protected]>
  • Loading branch information
zannkukai committed Mar 29, 2023
1 parent 08b377b commit 8abe28a
Show file tree
Hide file tree
Showing 18 changed files with 2,045 additions and 4,074 deletions.
4,684 changes: 1,567 additions & 3,117 deletions data/documents_big.json

Large diffs are not rendered by default.

1,177 changes: 395 additions & 782 deletions data/documents_small.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

# revision identifiers, used by Alembic.
revision = 'a710021979fe'
down_revision = '5f0b086e4b82'
down_revision = '8145a7cdef99'
branch_labels = ()
depends_on = None

Expand Down
3 changes: 0 additions & 3 deletions rero_ils/dojson/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,8 +646,6 @@ def build_place(self):
place['canton'] = self.cantons[0]
if self.country:
place['country'] = self.country
if place:
place['type'] = 'bf:Place'
if self.links_from_752:
place['identifiedBy'] = self.links_from_752[0]
return place
Expand All @@ -664,7 +662,6 @@ def default_provision_activity(self, result):
for i in range(1, len(self.links_from_752)):
place = {
'country': 'xx',
'type': 'bf:Place',
'identifiedBy': self.links_from_752[i]
}
places.append(place)
Expand Down
9 changes: 5 additions & 4 deletions rero_ils/modules/documents/dojson/contrib/jsontodc/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from flask_babelex import gettext as _

from rero_ils.modules.documents.extensions import TitleExtension
from rero_ils.modules.entities.models import EntityType
from rero_ils.modules.entities.utils import get_entity_localized_value


Expand Down Expand Up @@ -210,8 +211,8 @@ def json_to_relations(self, key, value):
def json_to_subject(self, key, value):
"""Get subject data."""
result = ''
subject_type = value.get('type')
if subject_type in ['bf:Person', 'bf:Organisation', 'bf:Place']:
_type = value.get('type')
if _type in [EntityType.PERSON, EntityType.ORGANISATION, 'bf:Place']:
# TODO: set the language
authorized_access_point = get_entity_localized_value(
entity=value,
Expand All @@ -222,13 +223,13 @@ def json_to_subject(self, key, value):
result = authorized_access_point
else:
result = value.get('preferred_name')
elif subject_type == 'bf:Work':
elif _type == 'bf:Work':
work = []
creator = value.get('creator')
if creator:
work.append(creator)
work.append(value.get('title'))
result = '. - '.join(work)
elif subject_type in ['bf:Topic', 'bf:Temporal']:
elif _type in ['bf:Topic', 'bf:Temporal']:
result = value.get('term')
return result or None
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from rero_ils.modules.documents.utils import display_alternate_graphic_first
from rero_ils.modules.documents.views import create_title_responsibilites
from rero_ils.modules.entities.api import Entity
from rero_ils.modules.entities.models import EntityType
from rero_ils.modules.holdings.api import Holding, HoldingsSearch
from rero_ils.modules.items.api import Item, ItemsSearch
from rero_ils.modules.libraries.api import Library
Expand Down Expand Up @@ -596,7 +597,7 @@ def reverse_contribution(self, key, value):
break
result = {}
result = add_value(result, 'a', preferred_name)
if agent_type == 'bf:Person':
if agent_type == EntityType.PERSON:
tag = '7000_'
if ',' in preferred_name:
tag = '7001_'
Expand All @@ -611,7 +612,7 @@ def reverse_contribution(self, key, value):
date = f'{date_of_birth} - {date_of_death}'
if date != ' - ':
result = add_value(result, 'd', date)
elif agent_type == 'bf:Organisation':
elif agent_type == EntityType.ORGANISATION:
tag = '710__'
if agent.get('conference'):
tag = '711__'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def marc21_to_work_access_point(self, key, value):
if field_100 := marc21.get_fields('100'):
agent = {}
for blob_key, blob_value in field_100[0].get('subfields').items():
agent['type'] = 'bf:Person'
agent['type'] = EntityType.PERSON
if blob_key == 'a':
# numeration = not_repetitive(
# marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'b')
Expand Down Expand Up @@ -616,10 +616,7 @@ def marc21_to_subjects_6XX(self, key, value):
ref = get_contribution_link(
marc21.bib_id, marc21.bib_id, cont_id, key)
if ref:
subject = {
'$ref': ref,
'type': data_type,
}
subject = {'$ref': ref}
if not subject.get('$ref'):
identifier = build_identifier(value)
if identifier:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
get_field_items, not_repetitive, re_identified, \
remove_trailing_punctuation
from rero_ils.modules.documents.utils import create_authorized_access_point
from rero_ils.modules.entities.models import EntityType

from ..utils import _CONTRIBUTION_ROLE, do_abbreviated_title, \
do_acquisition_terms_from_field_037, do_copyright_date, do_credits, \
Expand Down Expand Up @@ -126,10 +127,6 @@ def marc21_to_contribution(self, key, value):
if ref := get_contribution_link(
marc21.bib_id, marc21.rero_id, subfields_0[0], key):
agent['$ref'] = ref
if key[:3] in ['100', '700']:
agent['type'] = 'bf:Person'
elif key[:3] in ['710', '711']:
agent['type'] = 'bf:Organisation'

# we do not have a $ref
agent_data = {}
Expand All @@ -143,7 +140,7 @@ def marc21_to_contribution(self, key, value):

# 100|700 Person
if key[:3] in ['100', '700']:
agent_data['type'] = 'bf:Person'
agent_data['type'] = EntityType.PERSON
if value.get('b'):
numeration = not_repetitive(
marc21.bib_id, marc21.rero_id, key, value, 'b')
Expand Down Expand Up @@ -175,7 +172,7 @@ def marc21_to_contribution(self, key, value):
agent_data['identifiedBy'] = identifier

elif key[:3] in ['710', '711']:
agent_data['type'] = 'bf:Organisation'
agent_data['type'] = EntityType.ORGANISATION
agent_data['conference'] = key[:3] == '711'
if value.get('b'):
subordinate_units = [
Expand Down Expand Up @@ -503,9 +500,9 @@ def marc21_to_subjects(self, key, value):
subjects_imported : for 6xx having indicator 2 '0' or '2'
"""
type_per_tag = {
'600': 'bf:Person',
'610': 'bf:Organisation',
'611': 'bf:Organisation',
'600': EntityType.PERSON,
'610': EntityType.ORGANISATION,
'611': EntityType.ORGANISATION,
'600t': 'bf:Work',
'610t': 'bf:Work',
'611t': 'bf:Work',
Expand Down Expand Up @@ -573,17 +570,13 @@ def marc21_to_subjects(self, key, value):
) + '. ' + subject['authorized_access_point']
field_key = 'genreForm' if tag_key == '655' else 'subjects'
subfields_0 = utils.force_list(value.get('0'))
if data_type in ['bf:Person', 'bf:Organisation'] and subfields_0:
ref = get_contribution_link(marc21.bib_id, marc21.rero_id,
subfields_0[0], key)
if ref:
subject = {
'$ref': ref,
'type': data_type,
}
if data_type in [EntityType.PERSON, EntityType.ORGANISATION] \
and subfields_0:
if ref := get_contribution_link(marc21.bib_id, marc21.rero_id,
subfields_0[0], key):
subject = {'$ref': ref}
if not subject.get('$ref'):
identifier = build_identifier(value)
if identifier:
if identifier := build_identifier(value):
subject['identifiedBy'] = identifier
if field_key != 'genreForm':
perform_subdivisions(subject, value)
Expand All @@ -594,9 +587,8 @@ def marc21_to_subjects(self, key, value):
self[field_key] = subjects

elif subfield_2 == 'rerovoc' or indicator_2 in ['0', '2']:
term_string = build_string_from_subfields(
value, 'abcdefghijklmnopqrstuw', ' - ')
if term_string:
if term_string := build_string_from_subfields(
value, 'abcdefghijklmnopqrstuw', ' - '):
source = 'rerovoc' if subfield_2 == 'rerovoc' \
else source_per_indicator_2[indicator_2]
subject_imported = {
Expand Down Expand Up @@ -628,7 +620,7 @@ def marc21_to_subjects_imported(self, key, value):
field_key = 'subjects_imported'
if subfields_2:
subfield_2 = subfields_2[0]
if match := contains_specific_voc_regexp.search(subfield_2):
if contains_specific_voc_regexp.search(subfield_2):
add_data_imported = False
if subfield_2 == 'chrero':
subfields_9 = utils.force_list(value.get('9'))
Expand Down Expand Up @@ -727,7 +719,7 @@ def get_classif_type_and_subdivision_codes_from_980_2(subfield_2):
if tag == '980':
if subfield_2 and _CONTAINS_FACTUM_REGEXP.search(subfield_2):
subject = {
'type': 'bf:Person',
'type': EntityType.PERSON,
'authorized_access_point': subfield_a,
'source': 'Factum'
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from rero_ils.dojson.utils import ReroIlsMarc21Overdo, build_identifier, \
build_string_from_subfields, get_contribution_link, \
remove_trailing_punctuation
from rero_ils.modules.entities.models import EntityType

from ..utils import do_abbreviated_title, \
do_acquisition_terms_from_field_037, do_classification, do_contribution, \
Expand Down Expand Up @@ -320,9 +321,9 @@ def marc21_to_subjects_6XX(self, key, value):
subjects_imported : for 6xx having indicator 2 '0' or '2'
"""
type_per_tag = {
'600': 'bf:Person',
'610': 'bf:Organisation',
'611': 'bf:Organisation',
'600': EntityType.PERSON,
'610': EntityType.ORGANISATION,
'611': EntityType.ORGANISATION,
'600t': 'bf:Work',
'610t': 'bf:Work',
'611t': 'bf:Work',
Expand Down Expand Up @@ -398,7 +399,8 @@ def marc21_to_subjects_6XX(self, key, value):
value, subfield_code_per_tag[creator_tag_key]), '.', '.')
field_key = 'genreForm' if tag_key == '655' else config_field_key
subfields_0 = utils.force_list(value.get('0'))
if data_type in ['bf:Person', 'bf:Organisation'] and subfields_0:
if data_type in [EntityType.PERSON, EntityType.ORGANISATION] \
and subfields_0:
ref = get_contribution_link(
marc21.bib_id, marc21.bib_id, subfields_0[0], key)
if ref:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from rero_ils.dojson.utils import ReroIlsMarc21Overdo, \
build_string_from_subfields
from rero_ils.modules.entities.models import EntityType

from ..utils import do_abbreviated_title, \
do_acquisition_terms_from_field_037, do_classification, do_contribution, \
Expand Down Expand Up @@ -305,9 +306,9 @@ def marc21_to_subjects_6XX(self, key, value):
subjects_imported : for 6xx having indicator 2 '0' or '2'
"""
type_per_tag = {
'600': 'bf:Person',
'610': 'bf:Organisation',
'611': 'bf:Organisation',
'600': EntityType.PERSON,
'610': EntityType.ORGANISATION,
'611': EntityType.ORGANISATION,
'600t': 'bf:Work',
'610t': 'bf:Work',
'611t': 'bf:Work',
Expand Down
26 changes: 8 additions & 18 deletions rero_ils/modules/documents/dojson/contrib/marc21tojson/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
get_contribution_link, get_field_items, get_field_link_data, \
not_repetitive, re_identified, remove_trailing_punctuation
from rero_ils.modules.documents.utils import create_authorized_access_point
from rero_ils.modules.entities.models import EntityType

_DOCUMENT_RELATION_PER_TAG = {
'770': 'supplement',
Expand Down Expand Up @@ -518,16 +519,11 @@ def build_agent(marc21, key, value):
"""Build agent."""
agent_data = {}
if value.get('a'):
name = not_repetitive(
marc21.bib_id, marc21.bib_id, key, value, 'a')
agent_data['preferred_name'] = remove_trailing_punctuation(name)
agent_data['preferred_name'] = remove_trailing_punctuation(
not_repetitive(marc21.bib_id, marc21.bib_id, key, value, 'a'))
# 100|700|240 Person
if key[:3] in ['100', '700']:
agent_data['type'] = 'bf:Person'
if value.get('a'):
name = not_repetitive(
marc21.bib_id, marc21.bib_id, key, value, 'a')
agent_data['preferred_name'] = remove_trailing_punctuation(name)
agent_data['type'] = EntityType.PERSON
if value.get('b'):
numeration = not_repetitive(
marc21.bib_id, marc21.bib_id, key, value, 'b')
Expand Down Expand Up @@ -559,7 +555,7 @@ def build_agent(marc21, key, value):

# 710|711 Organisation
if key[:3] in ['710', '711']:
agent_data['type'] = 'bf:Organisation'
agent_data['type'] = EntityType.ORGANISATION
agent_data['conference'] = key[:3] == '711'
if value.get('b'):
subordinate_units = [
Expand Down Expand Up @@ -604,15 +600,10 @@ def do_contribution(data, marc21, key, value):
return None

agent = {}

if value.get('0'):
if ref := get_contribution_link(marc21.bib_id, marc21.rero_id,
value.get('0'), key):
agent['$ref'] = ref
if key[:3] in ['100', '700']:
agent['type'] = 'bf:Person'
elif key[:3] in ['710', '711']:
agent['type'] = 'bf:Organisation'

# we do not have a $ref
if not agent.get('$ref') and value.get('a'):
Expand Down Expand Up @@ -821,8 +812,7 @@ def build_agent_data(code, label, index, link):
# parce the link skipping the fist (already used by build_place)
for i in range(1, len(marc21.links_from_752)):
place = {
'country': 'xx',
'type': 'bf:Place',
'country': 'xx'
}
if marc21.links_from_752:
place['identifiedBy'] = marc21.links_from_752[i]
Expand Down Expand Up @@ -1634,7 +1624,7 @@ def do_work_access_point(marc21, key, value):
work_access_point = {}
if tag in ['700', '800'] and value.get('t'):
title_tag = 't'
agent['type'] = 'bf:Person'
agent['type'] = EntityType.PERSON
if value.get('a'):
preferred_name = not_repetitive(
marc21.bib_id, marc21.bib_id, key, value, 'a')
Expand All @@ -1660,7 +1650,7 @@ def do_work_access_point(marc21, key, value):
).rstrip('.')
elif tag == '710':
title_tag = 't'
agent['type'] = 'bf:Organisation'
agent['type'] = EntityType.ORGANISATION
agent['conference'] = False
if value.get('a'):
preferred_name = not_repetitive(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
remove_trailing_punctuation
from rero_ils.modules.documents.api import Document
from rero_ils.modules.documents.utils import create_authorized_access_point
from rero_ils.modules.entities.models import EntityType

_ISSUANCE_MAIN_TYPE_PER_BIB_LEVEL = {
'a': 'rdami:1001',
Expand Down Expand Up @@ -530,7 +531,7 @@ def unimarc_to_contribution(self, key, value):
"""
agent = {}
agent['preferred_name'] = ', '.join(utils.force_list(value.get('a', '')))
agent['type'] = 'bf:Person'
agent['type'] = EntityType.PERSON

if key[:3] in ['700', '701', '702', '703']:
if agent['preferred_name']:
Expand Down
Loading

0 comments on commit 8abe28a

Please sign in to comment.