Skip to content

Commit

Permalink
Move isbn lookup to identifier module. (PP-1948) (#2196)
Browse files Browse the repository at this point in the history
  • Loading branch information
tdilauro authored Nov 27, 2024
1 parent c741d34 commit 102e9f6
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 118 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,8 @@
from sqlalchemy.engine import Connection
from sqlalchemy.orm.session import Session

from palace.manager.sqlalchemy.model.identifier import Identifier
from palace.manager.sqlalchemy.model.time_tracking import (
_isbn_for_identifier,
_title_for_identifier,
)
from palace.manager.sqlalchemy.model.identifier import Identifier, isbn_for_identifier
from palace.manager.sqlalchemy.model.time_tracking import _title_for_identifier
from palace.manager.sqlalchemy.util import get_one

# revision identifiers, used by Alembic.
Expand Down Expand Up @@ -231,7 +228,7 @@ def update_summary_isbn_and_title(session: Session) -> None:
@cache
def cached_isbn_lookup(identifier: Identifier) -> str | None:
"""Given an identifier, return its ISBN."""
return _isbn_for_identifier(identifier)
return isbn_for_identifier(identifier)


@cache
Expand Down
30 changes: 30 additions & 0 deletions src/palace/manager/sqlalchemy/model/identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -1271,3 +1271,33 @@ def equivalent_identifiers(
results[parent_identifier] = equivalent.identifier

return results


def isbn_for_identifier(identifier: Identifier | None) -> str | None:
"""Find the strongest ISBN match for the given identifier.
:param identifier: The identifier to match.
:return: The ISBN string associated with the identifier or None, if no match is found.
"""
if identifier is None:
return None

if identifier.type == Identifier.ISBN:
return identifier.identifier

# If our identifier is not an ISBN itself, we'll use our Recursive Equivalency
# mechanism to find the next best one that is, if available.
db = Session.object_session(identifier)
eq_subquery = db.query(RecursiveEquivalencyCache.identifier_id).filter(
RecursiveEquivalencyCache.parent_identifier_id == identifier.id
)
equivalent_identifiers = (
db.query(Identifier)
.filter(Identifier.id.in_(eq_subquery))
.filter(Identifier.type == Identifier.ISBN)
)

return next(
map(lambda id_: id_.identifier, equivalent_identifiers),
None,
)
39 changes: 3 additions & 36 deletions src/palace/manager/sqlalchemy/model/time_tracking.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import datetime
from typing import TYPE_CHECKING, Any, cast
from typing import TYPE_CHECKING, Any

from sqlalchemy import (
Boolean,
Expand All @@ -17,10 +17,7 @@

from palace.manager.sqlalchemy.model.base import Base
from palace.manager.sqlalchemy.model.edition import Edition
from palace.manager.sqlalchemy.model.identifier import (
Identifier,
RecursiveEquivalencyCache,
)
from palace.manager.sqlalchemy.model.identifier import Identifier, isbn_for_identifier
from palace.manager.sqlalchemy.util import get_one_or_create
from palace.manager.util.datetime_helpers import minute_timestamp

Expand Down Expand Up @@ -215,7 +212,7 @@ def add(
if (not playtime.isbn or not playtime.title) and not identifier:
identifier, _ = Identifier.parse_urn(_db, identifier_str, autocreate=False)
if not playtime.isbn and identifier:
playtime.isbn = _isbn_for_identifier(identifier)
playtime.isbn = isbn_for_identifier(identifier)
if not playtime.title and identifier:
playtime.title = _title_for_identifier(identifier)

Expand Down Expand Up @@ -243,33 +240,3 @@ def _title_for_identifier(identifier: Identifier | None) -> str | None:
):
return edition.title
return None


def _isbn_for_identifier(identifier: Identifier | None) -> str | None:
"""Find the strongest ISBN match for the given identifier.
:param identifier: The identifier to match.
:return: The ISBN string associated with the identifier or None, if no match is found.
"""
if identifier is None:
return None

if identifier.type == Identifier.ISBN:
return cast(str, identifier.identifier)

# If our identifier is not an ISBN itself, we'll use our Recursive Equivalency
# mechanism to find the next best one that is, if available.
db = Session.object_session(identifier)
eq_subquery = db.query(RecursiveEquivalencyCache.identifier_id).filter(
RecursiveEquivalencyCache.parent_identifier_id == identifier.id
)
equivalent_identifiers = (
db.query(Identifier)
.filter(Identifier.id.in_(eq_subquery))
.filter(Identifier.type == Identifier.ISBN)
)

return next(
map(lambda id_: id_.identifier, equivalent_identifiers),
None,
)
72 changes: 72 additions & 0 deletions tests/manager/sqlalchemy/model/test_identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
Identifier,
ProQuestIdentifierParser,
RecursiveEquivalencyCache,
isbn_for_identifier,
)
from palace.manager.sqlalchemy.model.resource import Hyperlink
from palace.manager.sqlalchemy.presentation import PresentationCalculationPolicy
Expand Down Expand Up @@ -860,6 +861,77 @@ def test_equivalent_identifiers(self, db: DatabaseTransactionFixture) -> None:
proquest_identfier: proquest_gutenberg,
}

@pytest.mark.parametrize(
"id_key, equivalents, expected_isbn",
[
# If the identifier is an ISBN, we will not use an equivalency.
[
"i1",
(("g1", "g2", 1), ("g2", "i1", 1), ("g1", "i2", 0.5)),
"080442957X",
],
[
"i2",
(("g1", "g2", 1), ("g2", "i1", 0.5), ("g1", "i2", 1)),
"9788175257665",
],
["i1", (("i1", "i2", 200),), "080442957X"],
["i2", (("i2", "i1", 200),), "9788175257665"],
# If identifier is not an ISBN, but has an equivalency that is, use the strongest match.
[
"g2",
(("g1", "g2", 1), ("g2", "i1", 1), ("g1", "i2", 0.5)),
"080442957X",
],
[
"g2",
(("g1", "g2", 1), ("g2", "i1", 0.5), ("g1", "i2", 1)),
"9788175257665",
],
# If we don't find an equivalent ISBN identifier, then we should get None.
["g2", (), None],
["g1", (("g1", "g2", 1),), None],
# If identifier is None, expect default value.
[None, (), None],
],
)
def test_isbn_for_identifier(
self,
db: DatabaseTransactionFixture,
id_key: str | None,
equivalents: tuple[tuple[str, str, int | float]],
expected_isbn: str,
):
ids: dict[str, Identifier] = {
"i1": db.identifier(
identifier_type=Identifier.ISBN, foreign_id="080442957X"
),
"i2": db.identifier(
identifier_type=Identifier.ISBN, foreign_id="9788175257665"
),
"g1": db.identifier(identifier_type=Identifier.GUTENBERG_ID),
"g2": db.identifier(identifier_type=Identifier.GUTENBERG_ID),
}
equivalencies = [
Equivalency(
input_id=ids[equivalent[0]].id,
output_id=ids[equivalent[1]].id,
strength=equivalent[2],
)
for equivalent in equivalents
]
test_identifier: Identifier | None = ids[id_key] if id_key is not None else None
if test_identifier is not None:
test_identifier.equivalencies = equivalencies

# We're using the RecursiveEquivalencyCache, so must refresh it.
EquivalentIdentifiersCoverageProvider(db.session).run()

# Act
result = isbn_for_identifier(test_identifier)
# Assert
assert result == expected_isbn


class TestProQuestIdentifierParser:
@pytest.mark.parametrize(
Expand Down
76 changes: 0 additions & 76 deletions tests/manager/sqlalchemy/model/test_time_tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,9 @@
import pytest
from sqlalchemy.exc import IntegrityError

from palace.manager.core.equivalents_coverage import (
EquivalentIdentifiersCoverageProvider,
)
from palace.manager.sqlalchemy.model.identifier import Equivalency, Identifier
from palace.manager.sqlalchemy.model.time_tracking import (
PlaytimeEntry,
PlaytimeSummary,
_isbn_for_identifier,
_title_for_identifier,
)
from palace.manager.sqlalchemy.util import create
Expand Down Expand Up @@ -239,77 +234,6 @@ def test_cascades(self, db: DatabaseTransactionFixture):


class TestHelpers:
@pytest.mark.parametrize(
"id_key, equivalents, expected_isbn",
[
# If the identifier is an ISBN, we will not use an equivalency.
[
"i1",
(("g1", "g2", 1), ("g2", "i1", 1), ("g1", "i2", 0.5)),
"080442957X",
],
[
"i2",
(("g1", "g2", 1), ("g2", "i1", 0.5), ("g1", "i2", 1)),
"9788175257665",
],
["i1", (("i1", "i2", 200),), "080442957X"],
["i2", (("i2", "i1", 200),), "9788175257665"],
# If identifier is not an ISBN, but has an equivalency that is, use the strongest match.
[
"g2",
(("g1", "g2", 1), ("g2", "i1", 1), ("g1", "i2", 0.5)),
"080442957X",
],
[
"g2",
(("g1", "g2", 1), ("g2", "i1", 0.5), ("g1", "i2", 1)),
"9788175257665",
],
# If we don't find an equivalent ISBN identifier, then we should get None.
["g2", (), None],
["g1", (("g1", "g2", 1),), None],
# If identifier is None, expect default value.
[None, (), None],
],
)
def test__isbn_for_identifier(
self,
db: DatabaseTransactionFixture,
id_key: str | None,
equivalents: tuple[tuple[str, str, int | float]],
expected_isbn: str,
):
ids: dict[str, Identifier] = {
"i1": db.identifier(
identifier_type=Identifier.ISBN, foreign_id="080442957X"
),
"i2": db.identifier(
identifier_type=Identifier.ISBN, foreign_id="9788175257665"
),
"g1": db.identifier(identifier_type=Identifier.GUTENBERG_ID),
"g2": db.identifier(identifier_type=Identifier.GUTENBERG_ID),
}
equivalencies = [
Equivalency(
input_id=ids[equivalent[0]].id,
output_id=ids[equivalent[1]].id,
strength=equivalent[2],
)
for equivalent in equivalents
]
test_identifier: Identifier | None = ids[id_key] if id_key is not None else None
if test_identifier is not None:
test_identifier.equivalencies = equivalencies

# We're using the RecursiveEquivalencyCache, so must refresh it.
EquivalentIdentifiersCoverageProvider(db.session).run()

# Act
result = _isbn_for_identifier(test_identifier)
# Assert
assert result == expected_isbn

def test__title_for_identifier_multiple_editions(
self,
db: DatabaseTransactionFixture,
Expand Down

0 comments on commit 102e9f6

Please sign in to comment.