browsertrix/backend/btrixcloud/migrations/migration_0044_coll_stats.py
Ilya Kreymer 1570011ec7
compute top page origins for each collection (#2483)
A quick PR to fix #2482:
- compute topPageHosts as part of existing collection stats compute
- store top 10 results in collection for now.
- display in collection About sidebar
- fixes #2482 

Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
2025-05-08 14:22:40 -07:00

45 lines
1.2 KiB
Python

"""
Migration 0044 - Recalculate collection stats
"""
from btrixcloud.migrations import BaseMigration
MIGRATION_VERSION = "0044"
# pylint: disable=duplicate-code
class Migration(BaseMigration):
"""Migration class."""
# pylint: disable=unused-argument
def __init__(self, mdb, **kwargs):
super().__init__(mdb, migration_version=MIGRATION_VERSION)
self.coll_ops = kwargs.get("coll_ops")
async def migrate_up(self):
"""Perform migration up.
Recalculate collection stats to get top host names
"""
colls_mdb = self.mdb["collections"]
if self.coll_ops is None:
print(
"Unable to set collection stats, missing coll_ops",
flush=True,
)
return
async for coll in colls_mdb.find({}):
coll_id = coll["_id"]
try:
await self.coll_ops.update_collection_counts_and_tags(coll_id)
# pylint: disable=broad-exception-caught
except Exception as err:
print(
f"Unable to update page stats for collection {coll_id}: {err}",
flush=True,
)