From 7d2581687bad6b3b716bcf28f7c0041dc2dbd4e2 Mon Sep 17 00:00:00 2001 From: John Marshall Date: Thu, 13 Jun 2024 11:10:49 +1200 Subject: [PATCH] Refactor all *.from_db() routines to use from_db_json() This wrapper calls json.loads() but also handles None (returning None), which enables the code at many call sites to be simplified. Removed some callers' `if isinstance(field, str): ...` code, which has the effect of newly disallowing field values that are already dicts. However we've verified that all *.from_db() calls have raw database outputs as their arguments, so such fields will be always be strings and IMHO giving a dict to from_db_json() is really a logic error that should be detected. In SequencingGroupInternal.from_db() added `pop(..., None)` so that a missing meta field is now accepted. The previous code suggests that having pop() produce KeyError here was unintended. The expected argument types for from_db_json() are listed in the definition, but we don't list its return type. The best we could say in general is `object` but most call sites expect `dict[str, str]` (or occasionally `list[str]`) due to the shape of their expected JSON. Specifying `object` would lead to mypy errors at these call sites. --- db/python/layers/web.py | 15 +++++++-------- db/python/tables/participant_phenotype.py | 8 ++++---- db/python/utils.py | 6 ++++-- models/models/analysis.py | 8 ++------ models/models/analysis_runner.py | 8 ++------ models/models/assay.py | 10 ++-------- models/models/audit_log.py | 9 +++------ models/models/cohort.py | 7 ++----- models/models/participant.py | 7 +++---- models/models/project.py | 4 ++-- models/models/sample.py | 15 ++++----------- models/models/sequencing_group.py | 6 ++---- 12 files changed, 37 insertions(+), 66 deletions(-) diff --git a/db/python/layers/web.py b/db/python/layers/web.py index 4366e52ed..5eb60a38e 100644 --- a/db/python/layers/web.py +++ b/db/python/layers/web.py @@ -1,7 +1,6 @@ # pylint: disable=too-many-locals, too-many-instance-attributes import asyncio import itertools -import json import re from collections import defaultdict from datetime import date @@ -15,7 +14,7 @@ from db.python.tables.base import DbBase from db.python.tables.project import ProjectPermissionsTable from db.python.tables.sequencing_group import SequencingGroupTable -from db.python.utils import escape_like_term +from db.python.utils import escape_like_term, from_db_json from models.models import ( AssayInternal, FamilySimpleInternal, @@ -114,7 +113,7 @@ def _project_summary_process_assay_rows_by_sample_id( AssayInternal( id=seq['id'], type=seq['type'], - meta=json.loads(seq['meta']), + meta=from_db_json(seq['meta']), sample_id=seq['sample_id'], ) for seq in assay_rows @@ -153,7 +152,7 @@ def _project_summary_process_sequencing_group_rows_by_sample_id( sg_id_to_sample_id[sg_id] = row['sample_id'] sg_by_id[sg_id] = NestedSequencingGroupInternal( id=sg_id, - meta=json.loads(row['meta']), + meta=from_db_json(row['meta']), type=row['type'], technology=row['technology'], platform=row['platform'], @@ -189,9 +188,9 @@ def _project_summary_process_sample_rows( smodels = [ NestedSampleInternal( id=s['id'], - external_ids=json.loads(s['external_ids']), + external_ids=from_db_json(s['external_ids']), type=s['type'], - meta=json.loads(s['meta']) or {}, + meta=from_db_json(s['meta']) or {}, created_date=str(sample_id_start_times.get(s['id'], '')), sequencing_groups=sg_models_by_sample_id.get(s['id'], []), non_sequencing_assays=filtered_assay_models_by_sid.get(s['id'], []), @@ -450,8 +449,8 @@ async def get_project_summary( pmodels.append( NestedParticipantInternal( id=p['id'], - external_ids=json.loads(p['external_ids']), - meta=json.loads(p['meta']), + external_ids=from_db_json(p['external_ids']), + meta=from_db_json(p['meta']), families=pid_to_families.get(p['id'], []), samples=list(smodels_by_pid.get(p['id'])), reported_sex=p['reported_sex'], diff --git a/db/python/tables/participant_phenotype.py b/db/python/tables/participant_phenotype.py index d50b5bbb7..ec32eb03f 100644 --- a/db/python/tables/participant_phenotype.py +++ b/db/python/tables/participant_phenotype.py @@ -1,8 +1,8 @@ -import json from collections import defaultdict from typing import Any, Dict, List, Tuple from db.python.tables.base import DbBase +from db.python.utils import from_db_json, to_db_json class ParticipantPhenotypeTable(DbBase): @@ -32,7 +32,7 @@ async def add_key_value_rows(self, rows: List[Tuple[int, str, Any]]) -> None: { 'participant_id': r[0], 'description': r[1], - 'value': json.dumps(r[2]), + 'value': to_db_json(r[2]), 'audit_log_id': audit_log_id, } for r in rows @@ -67,7 +67,7 @@ async def get_key_value_rows_for_participant_ids( pid = row['participant_id'] key = row['description'] value = row['value'] - formed_key_value_pairs[pid][key] = json.loads(value) + formed_key_value_pairs[pid][key] = from_db_json(value) return formed_key_value_pairs @@ -91,6 +91,6 @@ async def get_key_value_rows_for_all_participants( pid = row['participant_id'] key = row['description'] value = row['value'] - formed_key_value_pairs[pid][key] = json.loads(value) + formed_key_value_pairs[pid][key] = from_db_json(value) return formed_key_value_pairs diff --git a/db/python/utils.py b/db/python/utils.py index 909aa87b5..6c00f980a 100644 --- a/db/python/utils.py +++ b/db/python/utils.py @@ -431,8 +431,10 @@ def get_logger(): return _logger -def from_db_json(text): - """Convert DB's JSON text to Python object""" +def from_db_json(text: str | bytes | None): + """Convert database's JSON text to Python object""" + if text is None: + return None return json.loads(text) diff --git a/models/models/analysis.py b/models/models/analysis.py index b5d2646c6..b9b31febe 100644 --- a/models/models/analysis.py +++ b/models/models/analysis.py @@ -1,10 +1,10 @@ import enum -import json from datetime import date, datetime from typing import Any from pydantic import BaseModel +from db.python.utils import from_db_json from models.base import SMBase from models.enums import AnalysisStatus from models.utils.cohort_id_format import ( @@ -40,10 +40,6 @@ def from_db(**kwargs): analysis_type = kwargs.pop('type', None) status = kwargs.pop('status', None) timestamp_completed = kwargs.pop('timestamp_completed', None) - meta = kwargs.get('meta') - - if meta and isinstance(meta, str): - meta = json.loads(meta) if timestamp_completed and isinstance(timestamp_completed, str): timestamp_completed = datetime.fromisoformat(timestamp_completed) @@ -65,7 +61,7 @@ def from_db(**kwargs): output=kwargs.pop('output', []), timestamp_completed=timestamp_completed, project=kwargs.get('project'), - meta=meta, + meta=from_db_json(kwargs.get('meta')), active=bool(kwargs.get('active')), author=kwargs.get('author'), ) diff --git a/models/models/analysis_runner.py b/models/models/analysis_runner.py index 4dfd93b80..4d862347d 100644 --- a/models/models/analysis_runner.py +++ b/models/models/analysis_runner.py @@ -1,6 +1,6 @@ import datetime -import json +from db.python.utils import from_db_json from models.base import SMBase from models.models.project import ProjectId @@ -34,10 +34,6 @@ class AnalysisRunnerInternal(SMBase): @staticmethod def from_db(**kwargs): """Convert from db Record""" - meta = kwargs.pop('meta') - if meta: - meta = json.loads(meta) - _timestamp = kwargs.pop('timestamp') # if _timestamp: # _timestamp = datetime.datetime.fromisoformat(_timestamp) @@ -58,7 +54,7 @@ def from_db(**kwargs): hail_version=kwargs.pop('hail_version'), batch_url=kwargs.pop('batch_url'), submitting_user=kwargs.pop('submitting_user'), - meta=meta, + meta=from_db_json(kwargs.pop('meta')), audit_log_id=kwargs.pop('audit_log_id'), output_path=kwargs.pop('output_path'), ) diff --git a/models/models/assay.py b/models/models/assay.py index 139d60327..0c2f379f6 100644 --- a/models/models/assay.py +++ b/models/models/assay.py @@ -1,6 +1,6 @@ -import json from typing import Any +from db.python.utils import from_db_json from models.base import OpenApiGenNoneType, SMBase from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw @@ -26,13 +26,7 @@ def __eq__(self, other): def from_db(d: dict): """Take DB mapping object, and return SampleSequencing""" meta = d.pop('meta', None) - - if meta: - if isinstance(meta, bytes): - meta = meta.decode() - if isinstance(meta, str): - meta = json.loads(meta) - return AssayInternal(meta=meta, **d) + return AssayInternal(meta=from_db_json(meta), **d) def to_external(self): """Convert to transport model""" diff --git a/models/models/audit_log.py b/models/models/audit_log.py index 20969dd50..487a1cbba 100644 --- a/models/models/audit_log.py +++ b/models/models/audit_log.py @@ -1,6 +1,6 @@ import datetime -import json +from db.python.utils import from_db_json from models.base import SMBase from models.models.project import ProjectId @@ -24,8 +24,5 @@ class AuditLogInternal(SMBase): @staticmethod def from_db(d: dict): """Take DB mapping object, and return SampleSequencing""" - meta = {} - if 'meta' in d: - meta = json.loads(d.pop('meta')) - - return AuditLogInternal(meta=meta, **d) + meta = d.pop('meta', None) + return AuditLogInternal(meta=from_db_json(meta) or {}, **d) diff --git a/models/models/cohort.py b/models/models/cohort.py index 391e7b793..6f1119644 100644 --- a/models/models/cohort.py +++ b/models/models/cohort.py @@ -1,5 +1,4 @@ -import json - +from db.python.utils import from_db_json from models.base import SMBase from models.models.project import ProjectId from models.utils.cohort_id_format import cohort_id_format @@ -92,9 +91,7 @@ def from_db(d: dict): _id = d.pop('id', None) name = d.pop('name', None) description = d.pop('description', None) - criteria = d.pop('criteria', None) - if criteria and isinstance(criteria, str): - criteria = json.loads(criteria) + criteria = from_db_json(d.pop('criteria', None)) project = d.pop('project', None) return CohortTemplateInternal( diff --git a/models/models/participant.py b/models/models/participant.py index 75b3fbfe4..34b3ea51a 100644 --- a/models/models/participant.py +++ b/models/models/participant.py @@ -1,5 +1,4 @@ -import json - +from db.python.utils import from_db_json from models.base import OpenApiGenNoneType, SMBase from models.models.family import FamilySimple, FamilySimpleInternal from models.models.project import ProjectId @@ -28,8 +27,8 @@ class ParticipantInternal(SMBase): def from_db(cls, data: dict): """Convert from db keys, mainly converting JSON-encoded fields""" for key in ['external_ids', 'meta']: - if key in data and isinstance(data[key], str): - data[key] = json.loads(data[key]) + if key in data: + data[key] = from_db_json(data[key]) return ParticipantInternal(**data) diff --git a/models/models/project.py b/models/models/project.py index 9ca19542f..b65146f5d 100644 --- a/models/models/project.py +++ b/models/models/project.py @@ -1,6 +1,6 @@ -import json from typing import Optional +from db.python.utils import from_db_json from models.base import SMBase ProjectId = int @@ -20,5 +20,5 @@ class Project(SMBase): def from_db(kwargs): """From DB row, with db keys""" kwargs = dict(kwargs) - kwargs['meta'] = json.loads(kwargs['meta']) if kwargs.get('meta') else {} + kwargs['meta'] = from_db_json(kwargs.get('meta')) or {} return Project(**kwargs) diff --git a/models/models/sample.py b/models/models/sample.py index 1499b6c6b..1a9d73461 100644 --- a/models/models/sample.py +++ b/models/models/sample.py @@ -1,5 +1,4 @@ -import json - +from db.python.utils import from_db_json from models.base import OpenApiGenNoneType, SMBase, parse_sql_bool from models.models.assay import Assay, AssayInternal, AssayUpsert, AssayUpsertInternal from models.models.sequencing_group import ( @@ -30,17 +29,11 @@ def from_db(d: dict): """ _id = d.pop('id', None) type_ = d.pop('type', None) - meta = d.pop('meta', None) + meta = from_db_json(d.pop('meta', None)) active = parse_sql_bool(d.pop('active', None)) - if meta: - if isinstance(meta, bytes): - meta = meta.decode() - if isinstance(meta, str): - meta = json.loads(meta) - - if 'external_ids' in d and isinstance(d['external_ids'], str): - d['external_ids'] = json.loads(d['external_ids']) + if 'external_ids' in d: + d['external_ids'] = from_db_json(d['external_ids']) return SampleInternal(id=_id, type=str(type_), meta=meta, active=active, **d) diff --git a/models/models/sequencing_group.py b/models/models/sequencing_group.py index ed1fe4817..075ca76af 100644 --- a/models/models/sequencing_group.py +++ b/models/models/sequencing_group.py @@ -1,6 +1,6 @@ -import json from typing import Any +from db.python.utils import from_db_json from models.base import OpenApiGenNoneType, SMBase from models.models.assay import Assay, AssayInternal, AssayUpsert, AssayUpsertInternal from models.utils.sample_id_format import sample_id_format, sample_id_transform_to_raw @@ -47,9 +47,7 @@ class SequencingGroupInternal(SMBase): @classmethod def from_db(cls, **kwargs): """From database model""" - meta = kwargs.pop('meta') - if meta and isinstance(meta, str): - meta = json.loads(meta) + meta = from_db_json(kwargs.pop('meta', None)) _archived = kwargs.pop('archived', None) if _archived is not None: