- no longer being used with latest stream-zip - was not computed correctly in the crawler - counterpart to webrecorder/browsertrix-crawler#657 --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
38 lines
1022 B
Python
38 lines
1022 B
Python
"""
|
|
Migration 0034 -- remove crc32 from CrawlFile
|
|
"""
|
|
|
|
from btrixcloud.migrations import BaseMigration
|
|
|
|
|
|
MIGRATION_VERSION = "0034"
|
|
|
|
|
|
class Migration(BaseMigration):
|
|
"""Migration class."""
|
|
|
|
# pylint: disable=unused-argument
|
|
def __init__(self, mdb, **kwargs):
|
|
super().__init__(mdb, migration_version=MIGRATION_VERSION)
|
|
|
|
async def migrate_up(self):
|
|
"""Perform migration up.
|
|
|
|
Remove crc32 field from all crawl files
|
|
"""
|
|
crawls_db = self.mdb["crawls"]
|
|
|
|
try:
|
|
res = await crawls_db.update_many(
|
|
{"files.crc32": {"$exists": 1}},
|
|
{"$unset": {"files.$[].crc32": 1}},
|
|
)
|
|
updated = res.modified_count
|
|
print(f"{updated} crawls migrated to remove crc32 from files", flush=True)
|
|
# pylint: disable=broad-exception-caught
|
|
except Exception as err:
|
|
print(
|
|
f"Error migrating crawl files to remove crc32: {err}",
|
|
flush=True,
|
|
)
|