Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions backend/btrixcloud/migrations/migration_0044_coll_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""
Migration 0041 - Rationalize page counts
"""

from btrixcloud.migrations import BaseMigration


MIGRATION_VERSION = "0044"


class Migration(BaseMigration):
"""Migration class."""

# pylint: disable=unused-argument
def __init__(self, mdb, **kwargs):
super().__init__(mdb, migration_version=MIGRATION_VERSION)

self.coll_ops = kwargs.get("coll_ops")

async def migrate_up(self):
"""Perform migration up.

Recalculate collection stats to get top host names
"""
colls_mdb = self.mdb["collections"]

if self.coll_ops is None:
print(
"Unable to set collection stats, missing coll_ops",
flush=True,
)
return

async for coll in colls_mdb.find({}):
coll_id = coll["_id"]
try:
await self.coll_ops.update_collection_counts_and_tags(coll_id)
# pylint: disable=broad-exception-caught
except Exception as err:
print(
f"Unable to update page stats for collection {coll_id}: {err}",
flush=True,
)
7 changes: 7 additions & 0 deletions backend/test/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ def test_create_collection(
assert data["defaultThumbnailName"] == default_thumbnail_name
assert data["allowPublicDownload"]

assert data["topPageHosts"] == [{'count': 3, 'host': 'webrecorder.net'}]


def test_create_public_collection(
crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
Expand Down Expand Up @@ -223,6 +225,7 @@ def test_update_collection(
assert data["dateEarliest"]
assert data["dateLatest"]
assert data["defaultThumbnailName"]
assert data["topPageHosts"]


def test_rename_collection(
Expand Down Expand Up @@ -310,6 +313,7 @@ def test_add_remove_crawl_from_collection(
assert data["tags"] == ["wr-test-2", "wr-test-1"]
assert data["dateEarliest"]
assert data["dateLatest"]
assert data["topPageHosts"] == [{'count': 7, 'host': 'webrecorder.net'}]

# Verify it was added
r = requests.get(
Expand All @@ -335,6 +339,7 @@ def test_add_remove_crawl_from_collection(
assert data.get("tags", []) == []
assert data.get("dateEarliest") is None
assert data.get("dateLatest") is None
assert data["topPageHosts"] == {}

# Verify they were removed
r = requests.get(
Expand Down Expand Up @@ -366,6 +371,7 @@ def test_add_remove_crawl_from_collection(
assert data["tags"] == ["wr-test-2", "wr-test-1"]
assert data["dateEarliest"]
assert data["dateLatest"]
assert data["topPageHosts"]


def test_get_collection(crawler_auth_headers, default_org_id):
Expand Down Expand Up @@ -1137,6 +1143,7 @@ def test_list_public_collections(
assert collection["pageCount"] > 0
assert collection["uniquePageCount"] > 0
assert collection["totalSize"] > 0
assert collection["topPageHosts"]

# Test non-existing slug - it should return a 404 but not reveal
# whether or not an org exists with that slug
Expand Down
Loading