Skip to content

Commit

Permalink
improve logging
Browse files Browse the repository at this point in the history
  • Loading branch information
ntarocco committed Oct 12, 2024
1 parent 8265e00 commit 057e55e
Show file tree
Hide file tree
Showing 12 changed files with 154 additions and 90 deletions.
19 changes: 15 additions & 4 deletions invenio_cern_sync/authz/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from flask import current_app

from ..errors import RequestError
from ..logging import log_info


def request_with_retries(
Expand Down Expand Up @@ -161,11 +162,17 @@ def get_identities(self, fields=IDENTITY_FIELDS, since=None):
query_params += [("field", value) for value in fields]
if since:
dt = datetime.fromisoformat(since)
ymd = dt.strftime("%Y-%m-%d")
query_params.append(("filter", f"modificationTime:gt:{ymd}T00:00:00Z"))
str_dt = dt.strftime("%Y-%m-%dT%H:%M:%SZ")
query_params.append(("filter", f"modificationTime:gt:{str_dt}"))
query_string = urlencode(query_params)

url_without_offset = f"{self.base_url}/api/v1.0/Identity?{query_string}"
log_info(
"authz-client",
dict(
action="get_identities", params=f"since: {since}, limit: {self.limit}"
),
)
return self._fetch_all(url_without_offset, headers)

def get_groups(self, fields=GROUPS_FIELDS, since=None):
Expand All @@ -189,9 +196,13 @@ def get_groups(self, fields=GROUPS_FIELDS, since=None):
query_params += [("field", value) for value in fields]
if since:
dt = datetime.fromisoformat(since)
ymd = dt.strftime("%Y-%m-%d")
query_params.append(("filter", f"modificationTime:gt:{ymd}T00:00:00Z"))
str_dt = dt.strftime("%Y-%m-%dT%H:%M:%SZ")
query_params.append(("filter", f"modificationTime:gt:{str_dt}"))
query_string = urlencode(query_params)

url_without_offset = f"{self.base_url}/api/v1.0/Group?{query_string}"
log_info(
"authz-client",
dict(action="get_groups", params=f"since: {since}, limit: {self.limit}"),
)
return self._fetch_all(url_without_offset, headers)
3 changes: 2 additions & 1 deletion invenio_cern_sync/authz/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ def userprofile_mapper(cern_identity):
"""Map the CERN Identity fields to the Invenio user profile schema.
The returned dict structure must match the user profile schema defined via
the config ACCOUNTS_USER_PROFILE_SCHEMA."""
the config ACCOUNTS_USER_PROFILE_SCHEMA.
"""
return dict(
affiliations=cern_identity["instituteName"] or "",
department=cern_identity["cernDepartment"] or "",
Expand Down
28 changes: 22 additions & 6 deletions invenio_cern_sync/groups/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,12 @@ def _serialize_groups(groups):
def sync(**kwargs):
"""Sync CERN groups with local db."""
log_uuid = str(uuid.uuid4())
log_info(log_uuid, "groups_sync", dict(status="fetching-cern-groups"))
log_name = "groups-sync"
log_info(
log_name,
dict(action="fetching-cern-groups", status="started"),
log_uuid=log_uuid,
)
start_time = time.time()

overridden_params = kwargs.get("keycloak_service", dict())
Expand All @@ -41,15 +46,26 @@ def sync(**kwargs):
overridden_params = kwargs.get("groups", dict())
groups = authz_client.get_groups(**overridden_params)

log_info(log_uuid, "creating-updating-groups", dict(status="started"))
log_info(
log_name,
dict(action="fetching-cern-groups", status="completed"),
log_uuid=log_uuid,
)
log_info(
log_name,
dict(action="creating-updating-groups", status="started"),
log_uuid=log_uuid,
)
roles_ids = create_or_update_roles(_serialize_groups(groups))
log_info(
log_uuid,
"creating-updating-groups",
dict(status="completed", count=len(roles_ids)),
log_name,
dict(
action="creating-updating-groups", status="completed", count=len(roles_ids)
),
log_uuid=log_uuid,
)

total_time = time.time() - start_time
log_info(log_uuid, "groups_sync", dict(status="completed", time=total_time))
log_info(log_name, dict(status="completed", time=total_time), log_uuid=log_uuid)

return list(roles_ids)
5 changes: 4 additions & 1 deletion invenio_cern_sync/ldap/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@

"""Invenio-CERN-sync LDAP Client."""

import ldap
try:
import ldap
except ImportError:
ldap = None
from flask import current_app

BASE = "OU=Users,OU=Organic Units,DC=cern,DC=ch"
Expand Down
3 changes: 2 additions & 1 deletion invenio_cern_sync/ldap/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ def userprofile_mapper(ldap_user):
"""Map the LDAP fields to the Invenio user profile schema.
The returned dict structure must match the user profile schema defined via
the config ACCOUNTS_USER_PROFILE_SCHEMA."""
the config ACCOUNTS_USER_PROFILE_SCHEMA.
"""
return dict(
affiliations=first_or_default(ldap_user, "cernInstituteName"),
department=first_or_default(ldap_user, "division"),
Expand Down
23 changes: 15 additions & 8 deletions invenio_cern_sync/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,34 @@
"""Invenio-CERN-sync logging."""

import json
import uuid

from flask import current_app


def _log(log_func, log_uuid, action, extra=dict()):
def _log(log_func, name, extra=dict(), log_uuid=None):
"""Format log."""
structured_msg = dict(name="sync_users", uuid=log_uuid, action=action, **extra)
uuid_ = log_uuid or str(uuid.uuid4())
structured_msg = dict(name=name, uuid=uuid_, **extra)
msg = json.dumps(structured_msg, sort_keys=True)
log_func(msg)


def log_info(log_uuid, action, extra=dict()):
def log_debug(name, extra=dict(), log_uuid=None):
"""Log debug."""
_log(current_app.logger.debug, name, extra=extra, log_uuid=log_uuid)


def log_info(name, extra=dict(), log_uuid=None):
"""Log info."""
_log(current_app.logger.info, log_uuid, action, extra)
_log(current_app.logger.info, name, extra=extra, log_uuid=log_uuid)


def log_warning(log_uuid, action, extra=dict()):
def log_warning(name, extra=dict(), log_uuid=None):
"""Log warning."""
_log(current_app.logger.warning, log_uuid, action, extra)
_log(current_app.logger.warning, name, extra=extra, log_uuid=log_uuid)


def log_error(log_uuid, action, extra=dict()):
def log_error(name, extra=dict(), log_uuid=None):
"""Log error."""
_log(current_app.logger.error, log_uuid, action, extra)
_log(current_app.logger.error, name, extra=extra, log_uuid=log_uuid)
48 changes: 29 additions & 19 deletions invenio_cern_sync/users/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

def _log_user_data_changed(
log_uuid,
log_name,
log_action,
ra_extra_data,
identity_id,
Expand All @@ -34,7 +35,7 @@ def _log_user_data_changed(
):
"""Log a warning about username/e-mail change."""
log_msg = f"Username/e-mail changed for UserIdentity.id #{identity_id}. Local DB username/e-mail: `{previous_username}` `{previous_email}`. New from CERN DB: `{new_username}` `{new_email}`."
log_warning(log_uuid, log_action, dict(msg=log_msg))
log_warning(log_name, dict(action=log_action, msg=log_msg), log_uuid=log_uuid)

# record this change in the RemoteAccount.extra_data
ra_extra_data.append(
Expand All @@ -52,6 +53,7 @@ def _log_user_data_changed(

def _log_identity_id_changed(
log_uuid,
log_name,
log_action,
ra_extra_data,
username,
Expand All @@ -61,7 +63,7 @@ def _log_identity_id_changed(
):
"""Log a warning about Identity Id change."""
log_msg = f"Identity Id changed for User `{username}` `{email}`. Previous UserIdentity.id in the local DB: `{previous_identity_id}` - New Identity Id from CERN DB: `{new_identity_id}`."
log_warning(log_uuid, log_action, dict(msg=log_msg))
log_warning(log_name, dict(action=log_action, msg=log_msg), log_uuid=log_uuid)

# record this change in the RemoteAccount.extra_data
ra_extra_data.append(
Expand All @@ -75,12 +77,12 @@ def _log_identity_id_changed(
return ra_extra_data


def _update_existing(users, serializer_fn, log_uuid):
def _update_existing(users, serializer_fn, log_uuid, log_name):
"""Update existing users and return a list of missing users to insert."""
missing = []
updated = set()
log_action = "updating_existing_users"
log_info(log_uuid, log_action, dict(status="started"))
log_action = "updating-existing-users"
log_info(log_name, dict(action=log_action, status="started"), log_uuid=log_uuid)

for invenio_user in serializer_fn(users):
user = user_identity = None
Expand Down Expand Up @@ -114,6 +116,7 @@ def _update_existing(users, serializer_fn, log_uuid):
)
ra_extra_data = _log_user_data_changed(
log_uuid,
log_name,
log_action,
ra_extra_data=_ra_extra_data,
identity_id=invenio_user["user_identity_id"],
Expand All @@ -134,6 +137,7 @@ def _update_existing(users, serializer_fn, log_uuid):
)
ra_extra_data = _log_identity_id_changed(
log_uuid,
log_name,
log_action,
ra_extra_data=_ra_extra_data,
username=invenio_user["username"],
Expand All @@ -154,17 +158,18 @@ def _update_existing(users, serializer_fn, log_uuid):

# persist changes before starting with the inserting of missing users
db.session.commit()
log_info(log_uuid, log_action, dict(status="completed", updated_count=len(updated)))
log_info(
log_name,
dict(action=log_action, status="completed", updated_count=len(updated)),
log_uuid=log_uuid,
)
return missing, updated


def _insert_missing(invenio_users, log_uuid):
def _insert_missing(invenio_users, log_uuid, log_name):
"""Insert users."""
log_info(
log_uuid,
"inserting_missing_users",
dict(status="started"),
)
log_action = "inserting-missing-users"
log_info(log_name, dict(action=log_action, status="started"), log_uuid=log_uuid)

inserted = set()
for invenio_user in invenio_users:
Expand All @@ -173,9 +178,9 @@ def _insert_missing(invenio_users, log_uuid):

db.session.commit()
log_info(
log_uuid,
"inserting_missing_users",
dict(status="completed", inserted_count=len(inserted)),
log_name,
dict(action=log_action, status="completed", inserted_count=len(inserted)),
log_uuid=log_uuid,
)
return inserted

Expand All @@ -188,7 +193,12 @@ def sync(method="AuthZ", **kwargs):
)

log_uuid = str(uuid.uuid4())
log_info(log_uuid, "users_sync", dict(status="fetching-cern-users", method=method))
log_name = "users-sync"
log_info(
log_name,
dict(action="fetching-cern-users", status="started", method=method),
log_uuid=log_uuid,
)
start_time = time.time()

if method == "AuthZ":
Expand All @@ -212,11 +222,11 @@ def sync(method="AuthZ", **kwargs):
)

missing_invenio_users, updated_ids = _update_existing(
users, serializer_fn, log_uuid
users, serializer_fn, log_uuid, log_name
)
inserted_ids = _insert_missing(missing_invenio_users, log_uuid)
inserted_ids = _insert_missing(missing_invenio_users, log_uuid, log_name)

total_time = time.time() - start_time
log_info(log_uuid, "users_sync", dict(status="completed", time=total_time))
log_info(log_name, dict(status="completed", time=total_time), log_uuid=log_uuid)

return list(updated_ids.union(inserted_ids))
7 changes: 4 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@ class CustomProfile(Schema):
"""A custom user profile schema that matches the default mapper."""

affiliations = fields.String()
cern_department = fields.String()
cern_group = fields.String()
cern_section = fields.String()
department = fields.String()
family_name = fields.String()
full_name = fields.String()
given_name = fields.String()
group = fields.String()
mailbox = fields.String()
orcid = fields.String()
person_id = fields.String()
section = fields.String()


@pytest.fixture(scope="module")
Expand Down
8 changes: 4 additions & 4 deletions tests/test_authz_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def test_get_identities(
authz_service = AuthZService(mock_keycloak_service, limit=1)
identities = list(authz_service.get_identities())
assert len(identities) == 1
assert identities[0][0]["upn"] == "jdoe"
assert identities[0]["upn"] == "jdoe"
mock_request_with_retries.assert_called()


Expand All @@ -125,7 +125,7 @@ def test_get_groups(
groups = list(authz_service.get_groups())

assert len(groups) == 1
assert groups[0][0]["groupIdentifier"] == "authorization-service-administrators"
assert groups[0]["groupIdentifier"] == "authorization-service-administrators"
mock_request_with_retries.assert_called()


Expand Down Expand Up @@ -180,7 +180,7 @@ def test_get_identities_empty(
authz_service = AuthZService(mock_keycloak_service, limit=1)
identities = list(authz_service.get_identities())

assert len(identities[0]) == 0
assert len(identities) == 0
mock_request_with_retries.assert_called()


Expand All @@ -198,5 +198,5 @@ def test_get_groups_empty(
authz_service = AuthZService(mock_keycloak_service, limit=1)
groups = list(authz_service.get_groups())

assert len(groups[0]) == 0
assert len(groups) == 0
mock_request_with_retries.assert_called()
9 changes: 5 additions & 4 deletions tests/test_authz_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,14 @@ def test_serialize(app, cern_identities):
assert serialized_identities[i]["username"] == f"jdoe{i}"
assert serialized_identities[i]["user_profile"] == {
"affiliations": "CERN",
"cern_department": "IT",
"cern_group": "CA",
"cern_section": "IR",
"department": "IT",
"group": "CA",
"section": "IR",
"family_name": f"Doe {i}",
"full_name": f"John Doe {i}",
"given_name": "John",
"mailbox": "",
"mailbox": f"M31120{i}",
"orcid": f"0000-0002-2227-122{i}",
"person_id": f"1234{i}",
}
assert serialized_identities[i]["preferences"] == {"locale": "en"}
Expand Down
Loading

0 comments on commit 057e55e

Please sign in to comment.