Skip to content

Commit

Permalink
Fix: Subgraph stitching query crashes BQ emulator (#5847)
Browse files Browse the repository at this point in the history
  • Loading branch information
hannes-ucsc committed Jan 17, 2024
1 parent 6645efe commit 0a5be74
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 14 deletions.
16 changes: 16 additions & 0 deletions UPGRADING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,22 @@ reverted. This is all fairly informal and loosely defined. Hopefully we won't
have too many entries in this file.


#5847 Subgraph stitching query crashes BQ emulator
===================================================

Operator
~~~~~~~~

Manually perform a two-phase deployment of the ``shared`` component of every
main deployment. In a lower deployment, perform the first phase using the
``apply_keep_unused`` Makefile target just before pushing the PR branch to the
GitLab instance in that deployment. In a stable deployment (``prod``), perform
the first phase before pushing the merge commit to the GitLab instance in that
deployment. In lower and stable deployments, perform the second phase using the
``apply`` Makefile target after the merge commit was successfully built on the
GitLab instance in that deployment.


#5046 Replace tinyquery with bigquery-emulator
==============================================

Expand Down
2 changes: 1 addition & 1 deletion src/azul/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1483,7 +1483,7 @@ def docker_images(self) -> dict[str, str]:
pycharm=f'docker.io/ucscgi/azul-pycharm:{self.docker_pycharm_version}',
elasticsearch=f'docker.io/ucscgi/azul-elasticsearch'
f':{self.docker_elasticsearch_version}',
bigquery_emulator='ghcr.io/goccy/bigquery-emulator:0.4.4',
bigquery_emulator='ghcr.io/hannes-ucsc/bigquery-emulator:azul',
# Updating any of the four images below additionally requires
# redeploying the `gitlab` TF component.
clamav='docker.io/clamav/clamav:1.2.1-24',
Expand Down
21 changes: 8 additions & 13 deletions test/indexer/test_tdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
BytesIO,
)
import json
import logging
from operator import (
attrgetter,
)
Expand Down Expand Up @@ -75,6 +76,7 @@
from azul.plugins.repository.tdr_hca import (
TDRBundleFQID,
TDRHCABundle,
log as plugin_log,
)
from azul.terra import (
TDRClient,
Expand Down Expand Up @@ -308,28 +310,21 @@ def _test_invalid_links(self, bundle: TDRHCABundle):
with self.assertRaises(RequirementError):
self._test_fetch_bundle(bundle, load_tables=False)

@patch('azul.plugins.repository.tdr_hca.Plugin._find_upstream_bundles')
def test_subgraph_stitching(self, _mock_find_upstream_bundles):
def test_subgraph_stitching(self):
downstream_uuid = '4426adc5-b3c5-5aab-ab86-51d8ce44dfbe'
upstream_uuids = [
'b0c2c714-45ee-4759-a32b-8ccbbcf911d4',
'bd4939c1-a078-43bd-8477-99ae59ceb555',
]
# FIXME: Fix the crash in bigquery-emulator and remove the mock
# https://github.com/DataBiosphere/azul/issues/5847
_mock_find_upstream_bundles.side_effect = [
{SourcedBundleFQID(source=self.source,
uuid=uuid,
version='2020-08-10T21:24:26.174274Z')}
for uuid in upstream_uuids
]
bundle = self._load_canned_bundle(SourcedBundleFQID(source=self.source,
uuid=downstream_uuid,
version='2020-08-10T21:24:26.174274Z'))
assert any(e['is_stitched'] for e in bundle.manifest)
self._test_fetch_bundle(bundle, load_tables=True)
self.assertEqual(_mock_find_upstream_bundles.call_count,
len(upstream_uuids))
with self.assertLogs(plugin_log, level=logging.DEBUG) as cm:
self._test_fetch_bundle(bundle, load_tables=True)
record = one(r for r in cm.records if 'Stitched 2 bundle(s): ' in r.message)
for upstream_uuid in upstream_uuids:
self.assertIn("uuid='" + upstream_uuid, record.message)

def _test_fetch_bundle(self,
test_bundle: TDRHCABundle,
Expand Down

0 comments on commit 0a5be74

Please sign in to comment.