Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

contributions: better update_contributions #2728

Merged
merged 1 commit into from
Mar 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rero_ils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ def _(x):
# Every week on Saturday at 22:22 UTC,
'enabled': False
},
'replace_idby-subjects': {
'replace-idby-subjects': {
'task': ('rero_ils.modules.documents.tasks.replace_idby_subjects'),
'schedule': crontab(minute=22, hour=22, day_of_week=6),
# Every week on Saturday at 22:22 UTC,
Expand Down
34 changes: 28 additions & 6 deletions rero_ils/modules/contributions/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,20 +239,29 @@ def get_authorized_access_point(self, language):
language=language
)

def documents_pids(self, with_subjects=True):
def _search_documents(self, with_subjects=True):
"""Get documents pids."""
search_filters = Q("term", contribution__agent__pid=self.pid)
if with_subjects:
subject_filters = Q("term", subjects__pid=self.pid) & \
Q("terms", subjects__type=['bf:Person', 'bf:Organisation'])
search_filters = search_filters | subject_filters

search = DocumentsSearch() \
.query('bool', filter=[search_filters]) \
.source('pid')
return DocumentsSearch() \
.query('bool', filter=[search_filters])

def documents_pids(self, with_subjects=True):
"""Get documents pids."""
search = self._search_documents(
with_subjects=with_subjects).source('pid')
return [hit.pid for hit in search.scan()]

def documents_ids(self, with_subjects=True):
"""Get documents ids."""
search = self._search_documents(
with_subjects=with_subjects).source('pid')
return [hit.meta.id for hit in search.scan()]

def update_online(self, dbcommit=False, reindex=False, verbose=False):
"""Update record online.

Expand All @@ -273,24 +282,37 @@ def update_online(self, dbcommit=False, reindex=False, verbose=False):
current_app.logger.warning(
f'UPDATE ONLINE {pid}: was deleted')
action = ContributionUpdateAction.ERROR
elif not metadata['sources']:
elif not metadata.get('sources'):
current_app.logger.warning(
f'UPDATE ONLINE {pid}: has no sources')
action = ContributionUpdateAction.ERROR
elif not metadata.get('type'):
current_app.logger.warning(
f'UPDATE ONLINE {pid}: has no type')
action = ContributionUpdateAction.ERROR
elif dict(self) != metadata:
action = ContributionUpdateAction.REPLACE
self.replace(data=metadata, dbcommit=dbcommit,
reindex=reindex)
if reindex:
indexer = DocumentsIndexer()
indexer.bulk_index(self.documents_pids())
indexer.bulk_index(self.documents_ids())
indexer.process_bulk_queue()
except Exception as err:
action = ContributionUpdateAction.ERROR
current_app.logger.warning(f'UPDATE ONLINE {pid}: {err}')
# TODO: find new MEF record
return action, self

def source_pids(self):
"""Get agents pids."""
sources = current_app.config.get('RERO_ILS_CONTRIBUTIONS_SOURCES', [])
pids = {}
for source in sources:
if source in self:
pids[source] = self[source]['pid']
return pids


class ContributionsIndexer(IlsRecordsIndexer):
"""Contribution indexing class."""
Expand Down
8 changes: 5 additions & 3 deletions rero_ils/modules/contributions/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,16 +100,18 @@ def update_contributions(pids=None, dbcommit=True, reindex=True, verbose=False,
pids = pids or Contribution.get_all_pids()
log = {}
error_pids = []
for pid in pids:
if verbose:
click.echo(f'Contribution update: {len(pids)}')
for idx, pid in enumerate(pids):
cont = Contribution.get_record_by_pid(pid)
msg, _ = cont.update_online(dbcommit=dbcommit, reindex=reindex,
verbose=verbose)
log.setdefault(msg, 0)
log[msg] += 1
if verbose and msg != ContributionUpdateAction.UPTODATE:
click.echo(f'{pid:>10}: {msg}')
click.echo(f'{idx:>10} mef:{pid:>10} {msg} {cont.source_pids()}')
if ContributionUpdateAction.ERROR:
error_pids.append(pid)
log[msg] += 1
if timestamp:
set_timestamp('update_contributions', **log)
return log, error_pids