Temporarily remove pages migration (#1572)
Removing until we have a better tested solution, including to avoid testing of QA runs for new crawls in beta.
This commit is contained in:
parent
144000c7a3
commit
ec0db1c323
@ -17,7 +17,7 @@ from pymongo.errors import InvalidName
|
|||||||
from .migrations import BaseMigration
|
from .migrations import BaseMigration
|
||||||
|
|
||||||
|
|
||||||
CURR_DB_VERSION = "0026"
|
CURR_DB_VERSION = "0025"
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
@ -1,47 +0,0 @@
|
|||||||
"""
|
|
||||||
Migration 0026 -- Crawl Pages
|
|
||||||
"""
|
|
||||||
|
|
||||||
from btrixcloud.migrations import BaseMigration
|
|
||||||
from btrixcloud.utils import gather_tasks_with_concurrency
|
|
||||||
|
|
||||||
|
|
||||||
MIGRATION_VERSION = "0026"
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(BaseMigration):
|
|
||||||
"""Migration class."""
|
|
||||||
|
|
||||||
def __init__(self, mdb, **kwargs):
|
|
||||||
super().__init__(mdb, migration_version=MIGRATION_VERSION)
|
|
||||||
self.page_ops = kwargs["page_ops"]
|
|
||||||
|
|
||||||
async def migrate_up(self):
|
|
||||||
"""Perform migration up.
|
|
||||||
|
|
||||||
Add pages to database for each crawl without them, pulling from WACZ files.
|
|
||||||
"""
|
|
||||||
# pylint: disable=duplicate-code
|
|
||||||
crawls_mdb = self.mdb["crawls"]
|
|
||||||
pages_mdb = self.mdb["pages"]
|
|
||||||
|
|
||||||
crawl_ids = await crawls_mdb.distinct(
|
|
||||||
"_id", {"type": "crawl", "finished": {"$ne": None}}
|
|
||||||
)
|
|
||||||
crawl_ids_with_pages = await pages_mdb.distinct("crawl_id")
|
|
||||||
|
|
||||||
crawl_ids_no_pages = list(set(crawl_ids) - set(crawl_ids_with_pages))
|
|
||||||
if not crawl_ids_no_pages:
|
|
||||||
return
|
|
||||||
|
|
||||||
all_coroutines = []
|
|
||||||
|
|
||||||
for crawl_id in crawl_ids_no_pages:
|
|
||||||
current_coroutine = self.page_ops.add_crawl_pages_to_db_from_wacz(crawl_id)
|
|
||||||
all_coroutines.append(current_coroutine)
|
|
||||||
|
|
||||||
try:
|
|
||||||
await gather_tasks_with_concurrency(*all_coroutines)
|
|
||||||
# pylint: disable=broad-exception-caught, raise-missing-from
|
|
||||||
except Exception as err:
|
|
||||||
print(f"Error adding pages to db: {err}", flush=True)
|
|
Loading…
Reference in New Issue
Block a user