Temporarily remove pages migration (#1572)
Removing until we have a better tested solution, including to avoid testing of QA runs for new crawls in beta.
This commit is contained in:
		
							parent
							
								
									144000c7a3
								
							
						
					
					
						commit
						ec0db1c323
					
				| @ -17,7 +17,7 @@ from pymongo.errors import InvalidName | ||||
| from .migrations import BaseMigration | ||||
| 
 | ||||
| 
 | ||||
| CURR_DB_VERSION = "0026" | ||||
| CURR_DB_VERSION = "0025" | ||||
| 
 | ||||
| 
 | ||||
| # ============================================================================ | ||||
|  | ||||
| @ -1,47 +0,0 @@ | ||||
| """ | ||||
| Migration 0026 -- Crawl Pages | ||||
| """ | ||||
| 
 | ||||
| from btrixcloud.migrations import BaseMigration | ||||
| from btrixcloud.utils import gather_tasks_with_concurrency | ||||
| 
 | ||||
| 
 | ||||
| MIGRATION_VERSION = "0026" | ||||
| 
 | ||||
| 
 | ||||
| class Migration(BaseMigration): | ||||
|     """Migration class.""" | ||||
| 
 | ||||
|     def __init__(self, mdb, **kwargs): | ||||
|         super().__init__(mdb, migration_version=MIGRATION_VERSION) | ||||
|         self.page_ops = kwargs["page_ops"] | ||||
| 
 | ||||
|     async def migrate_up(self): | ||||
|         """Perform migration up. | ||||
| 
 | ||||
|         Add pages to database for each crawl without them, pulling from WACZ files. | ||||
|         """ | ||||
|         # pylint: disable=duplicate-code | ||||
|         crawls_mdb = self.mdb["crawls"] | ||||
|         pages_mdb = self.mdb["pages"] | ||||
| 
 | ||||
|         crawl_ids = await crawls_mdb.distinct( | ||||
|             "_id", {"type": "crawl", "finished": {"$ne": None}} | ||||
|         ) | ||||
|         crawl_ids_with_pages = await pages_mdb.distinct("crawl_id") | ||||
| 
 | ||||
|         crawl_ids_no_pages = list(set(crawl_ids) - set(crawl_ids_with_pages)) | ||||
|         if not crawl_ids_no_pages: | ||||
|             return | ||||
| 
 | ||||
|         all_coroutines = [] | ||||
| 
 | ||||
|         for crawl_id in crawl_ids_no_pages: | ||||
|             current_coroutine = self.page_ops.add_crawl_pages_to_db_from_wacz(crawl_id) | ||||
|             all_coroutines.append(current_coroutine) | ||||
| 
 | ||||
|         try: | ||||
|             await gather_tasks_with_concurrency(*all_coroutines) | ||||
|         # pylint: disable=broad-exception-caught, raise-missing-from | ||||
|         except Exception as err: | ||||
|             print(f"Error adding pages to db: {err}", flush=True) | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user