Temporarily remove pages migration (#1572)

Removing until we have a better tested solution, including to avoid testing of QA runs for new crawls in beta.
2024-03-04 13:30:04 -05:00 · 2024-03-04 13:30:04 -05:00 · ec0db1c323
commit ec0db1c323
parent 144000c7a3
2 changed files with 1 additions and 48 deletions
--- a/backend/btrixcloud/db.py
+++ b/backend/btrixcloud/db.py
@ -17,7 +17,7 @@ from pymongo.errors import InvalidName
 from .migrations import BaseMigration


-CURR_DB_VERSION = "0026"
+CURR_DB_VERSION = "0025"


 # ============================================================================
--- a/backend/btrixcloud/migrations/migration_0026_crawl_pages.py
+++ b/backend/btrixcloud/migrations/migration_0026_crawl_pages.py
@ -1,47 +0,0 @@
-"""
-Migration 0026 -- Crawl Pages
-"""
-
-from btrixcloud.migrations import BaseMigration
-from btrixcloud.utils import gather_tasks_with_concurrency
-
-
-MIGRATION_VERSION = "0026"
-
-
-class Migration(BaseMigration):
-    """Migration class."""
-
-    def __init__(self, mdb, **kwargs):
-        super().__init__(mdb, migration_version=MIGRATION_VERSION)
-        self.page_ops = kwargs["page_ops"]
-
-    async def migrate_up(self):
-        """Perform migration up.
-
-        Add pages to database for each crawl without them, pulling from WACZ files.
-        """
-        # pylint: disable=duplicate-code
-        crawls_mdb = self.mdb["crawls"]
-        pages_mdb = self.mdb["pages"]
-
-        crawl_ids = await crawls_mdb.distinct(
-            "_id", {"type": "crawl", "finished": {"$ne": None}}
-        )
-        crawl_ids_with_pages = await pages_mdb.distinct("crawl_id")
-
-        crawl_ids_no_pages = list(set(crawl_ids) - set(crawl_ids_with_pages))
-        if not crawl_ids_no_pages:
-            return
-
-        all_coroutines = []
-
-        for crawl_id in crawl_ids_no_pages:
-            current_coroutine = self.page_ops.add_crawl_pages_to_db_from_wacz(crawl_id)
-            all_coroutines.append(current_coroutine)
-
-        try:
-            await gather_tasks_with_concurrency(*all_coroutines)
-        # pylint: disable=broad-exception-caught, raise-missing-from
-        except Exception as err:
-            print(f"Error adding pages to db: {err}", flush=True)