diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index 58ce272c..b1181343 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -492,7 +492,6 @@ class BaseCrawlOps: name=file_.filename, path=presigned_url or "", hash=file_.hash, - crc32=file_.crc32, size=file_.size, crawlId=crawl_id, numReplicas=len(file_.replicas) if file_.replicas else 0, diff --git a/backend/btrixcloud/db.py b/backend/btrixcloud/db.py index 30425eb9..76462563 100644 --- a/backend/btrixcloud/db.py +++ b/backend/btrixcloud/db.py @@ -17,7 +17,7 @@ from pymongo.errors import InvalidName from .migrations import BaseMigration -CURR_DB_VERSION = "0033" +CURR_DB_VERSION = "0034" # ============================================================================ diff --git a/backend/btrixcloud/migrations/migration_0034_drop_invalid_crc.py b/backend/btrixcloud/migrations/migration_0034_drop_invalid_crc.py new file mode 100644 index 00000000..1a53383c --- /dev/null +++ b/backend/btrixcloud/migrations/migration_0034_drop_invalid_crc.py @@ -0,0 +1,37 @@ +""" +Migration 0034 -- remove crc32 from CrawlFile +""" + +from btrixcloud.migrations import BaseMigration + + +MIGRATION_VERSION = "0034" + + +class Migration(BaseMigration): + """Migration class.""" + + # pylint: disable=unused-argument + def __init__(self, mdb, **kwargs): + super().__init__(mdb, migration_version=MIGRATION_VERSION) + + async def migrate_up(self): + """Perform migration up. + + Remove crc32 field from all crawl files + """ + crawls_db = self.mdb["crawls"] + + try: + res = await crawls_db.update_many( + {"files.crc32": {"$exists": 1}}, + {"$unset": {"files.$[].crc32": 1}}, + ) + updated = res.modified_count + print(f"{updated} crawls migrated to remove crc32 from files", flush=True) + # pylint: disable=broad-exception-caught + except Exception as err: + print( + f"Error migrating crawl files to remove crc32: {err}", + flush=True, + ) diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 99cfadca..aef2b0ab 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -625,7 +625,6 @@ class CrawlFile(BaseFile): presignedUrl: Optional[str] = None expireAt: Optional[datetime] = None - crc32: int = 0 # ============================================================================ @@ -635,7 +634,6 @@ class CrawlFileOut(BaseModel): name: str path: str hash: str - crc32: int = 0 size: int crawlId: Optional[str] = None @@ -930,7 +928,6 @@ class CrawlCompleteIn(BaseModel): filename: str size: int hash: str - crc32: int = 0 completed: Optional[bool] = True diff --git a/backend/btrixcloud/operator/crawls.py b/backend/btrixcloud/operator/crawls.py index 0327717f..5f11d6e2 100644 --- a/backend/btrixcloud/operator/crawls.py +++ b/backend/btrixcloud/operator/crawls.py @@ -1192,7 +1192,6 @@ class CrawlOperator(BaseOperator): filename=filename, size=filecomplete.size, hash=filecomplete.hash, - crc32=filecomplete.crc32, storage=crawl.storage, )