Temporarily remove pages migration (#1572)
Removing until we have a better tested solution, including to avoid testing of QA runs for new crawls in beta.
This commit is contained in:
parent
144000c7a3
commit
ec0db1c323
@ -17,7 +17,7 @@ from pymongo.errors import InvalidName
|
||||
from .migrations import BaseMigration
|
||||
|
||||
|
||||
CURR_DB_VERSION = "0026"
|
||||
CURR_DB_VERSION = "0025"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
|
@ -1,47 +0,0 @@
|
||||
"""
|
||||
Migration 0026 -- Crawl Pages
|
||||
"""
|
||||
|
||||
from btrixcloud.migrations import BaseMigration
|
||||
from btrixcloud.utils import gather_tasks_with_concurrency
|
||||
|
||||
|
||||
MIGRATION_VERSION = "0026"
|
||||
|
||||
|
||||
class Migration(BaseMigration):
|
||||
"""Migration class."""
|
||||
|
||||
def __init__(self, mdb, **kwargs):
|
||||
super().__init__(mdb, migration_version=MIGRATION_VERSION)
|
||||
self.page_ops = kwargs["page_ops"]
|
||||
|
||||
async def migrate_up(self):
|
||||
"""Perform migration up.
|
||||
|
||||
Add pages to database for each crawl without them, pulling from WACZ files.
|
||||
"""
|
||||
# pylint: disable=duplicate-code
|
||||
crawls_mdb = self.mdb["crawls"]
|
||||
pages_mdb = self.mdb["pages"]
|
||||
|
||||
crawl_ids = await crawls_mdb.distinct(
|
||||
"_id", {"type": "crawl", "finished": {"$ne": None}}
|
||||
)
|
||||
crawl_ids_with_pages = await pages_mdb.distinct("crawl_id")
|
||||
|
||||
crawl_ids_no_pages = list(set(crawl_ids) - set(crawl_ids_with_pages))
|
||||
if not crawl_ids_no_pages:
|
||||
return
|
||||
|
||||
all_coroutines = []
|
||||
|
||||
for crawl_id in crawl_ids_no_pages:
|
||||
current_coroutine = self.page_ops.add_crawl_pages_to_db_from_wacz(crawl_id)
|
||||
all_coroutines.append(current_coroutine)
|
||||
|
||||
try:
|
||||
await gather_tasks_with_concurrency(*all_coroutines)
|
||||
# pylint: disable=broad-exception-caught, raise-missing-from
|
||||
except Exception as err:
|
||||
print(f"Error adding pages to db: {err}", flush=True)
|
Loading…
Reference in New Issue
Block a user