browsertrix/backend/btrixcloud/migrations/migration_0033_crawl_quota_states.py
Tessa Walsh d38abbca7f
Standardize handling of storage and execution time quotas (#1969)
Fixes #1968 

Changes:
- `stopped_quota_reached` and `skipped_quota_reached` migrated to new
values that indicate which quota was reached
- Before crawls are run, the operator checks if storage or exec mins
quotas are reached and if so fails the crawl with the appropriate state
of `skipped_storage_quota_reached` or `skipped_time_quota_reached`
- While crawls are running, the operator checks if the exec mins quota
is reached or if the size of all running crawls will mean the storage
quota is reached once uploaded; if so, the crawl is stopped gracefully
and given `stopped_storage_quota_needed` or `stopped_time_quota_reached`
state as appropriate
- Adds new nightly tests for enforcing storage quota
2024-07-25 12:49:11 -07:00

99 lines
3.3 KiB
Python

"""
Migration 0033 - Standardizing quota-based crawl states
"""
from btrixcloud.migrations import BaseMigration
MIGRATION_VERSION = "0033"
class Migration(BaseMigration):
"""Migration class."""
# pylint: disable=unused-argument
def __init__(self, mdb, **kwargs):
super().__init__(mdb, migration_version=MIGRATION_VERSION)
async def migrate_up(self):
"""Perform migration up.
Migrate skipped_quota_reached state to skipped_storage_quota_reached
Migrate stopped_quota_reached to stopped_time_quota_reached
Also update lastCrawlStates in workflows with these states
"""
crawls_db = self.mdb["crawls"]
crawl_configs_db = self.mdb["crawl_configs"]
## CRAWLS ##
try:
res = await crawls_db.update_many(
{"type": "crawl", "state": "skipped_quota_reached"},
{"$set": {"state": "skipped_storage_quota_reached"}},
)
updated = res.modified_count
print(
f"{updated} crawls with state skipped_quota_reached migrated",
flush=True,
)
# pylint: disable=broad-exception-caught
except Exception as err:
print(
f"Error migrating crawls with state skipped_quota_reached: {err}",
flush=True,
)
try:
res = await crawls_db.update_many(
{"type": "crawl", "state": "stopped_quota_reached"},
{"$set": {"state": "stopped_time_quota_reached"}},
)
updated = res.modified_count
print(
f"{updated} crawls with state stopped_quota_reached migrated",
flush=True,
)
# pylint: disable=broad-exception-caught
except Exception as err:
print(
f"Error migrating crawls with state stopped_quota_reached: {err}",
flush=True,
)
## WORKFLOWS ##
try:
res = await crawl_configs_db.update_many(
{"lastCrawlState": "skipped_quota_reached"},
{"$set": {"lastCrawlState": "skipped_storage_quota_reached"}},
)
updated = res.modified_count
print(
f"{updated} crawl configs with lastCrawlState skipped_quota_reached migrated",
flush=True,
)
# pylint: disable=broad-exception-caught
except Exception as err:
print(
f"Error migrating crawlconfigs with lastCrawlState skipped_quota_reached: {err}",
flush=True,
)
try:
res = await crawl_configs_db.update_many(
{"lastCrawlState": "stopped_quota_reached"},
{"$set": {"lastCrawlState": "stopped_time_quota_reached"}},
)
updated = res.modified_count
print(
f"{updated} crawl configs with lastCrawlState stopped_quota_reached migrated",
flush=True,
)
# pylint: disable=broad-exception-caught
except Exception as err:
print(
f"Error migrating crawl configs with lastCrawlState stopped_quota_reached: {err}",
flush=True,
)