browsertrix/backend/btrixcloud/migrations/migration_0024_crawlerchannel.py
Ilya Kreymer 3cd52342a7
Remove Crawl Workflow Configmaps (#1894)
Fixes #1893 

- Removes crawl workflow-scoped configmaps, and replaces with operator-controlled
per-crawl configmaps that only contain the json config passed to Browsertrix
Crawler (as a volume).
- Other configmap settings replaced are replaced the custom CrawlJob options
(mostly already were, just added profile_filename and storage_filename)
- Cron jobs also updated to create CrawlJob without relying on configmaps,
querying the db for additional settings.
- The `userid` associated with cron jobs is set to the user that last modified
 the schedule of the crawl, rather than whomever last modified the workflow
- Various functions that deal with updating configmaps have been removed,
including in migrations.
- New migration 0029 added to remove all crawl workflow configmaps
2024-06-28 15:25:23 -07:00

56 lines
1.8 KiB
Python

"""
Migration 0024 -- crawlerChannel
"""
from btrixcloud.migrations import BaseMigration
MIGRATION_VERSION = "0024"
class Migration(BaseMigration):
"""Migration class."""
# pylint: disable=unused-argument
def __init__(self, mdb, **kwargs):
super().__init__(mdb, migration_version=MIGRATION_VERSION)
async def migrate_up(self):
"""Perform migration up.
Add crawlerChannel to existing workflows and profiles, and update configmaps
"""
# pylint: disable=duplicate-code
mdb_crawl_configs = self.mdb["crawl_configs"]
mdb_profiles = self.mdb["profiles"]
async for config in mdb_crawl_configs.find(
{"crawlerChannel": {"$in": ["", None]}}
):
config_id = config["_id"]
try:
await mdb_crawl_configs.find_one_and_update(
{"_id": config_id},
{"$set": {"crawlerChannel": "default"}},
)
# pylint: disable=broad-except
except Exception as err:
print(
f"Error adding crawlerChannel 'default' to workflow {config_id}: {err}",
flush=True,
)
async for profile in mdb_profiles.find({"crawlerChannel": {"$in": ["", None]}}):
profile_id = profile["_id"]
try:
await mdb_profiles.find_one_and_update(
{"_id": profile_id},
{"$set": {"crawlerChannel": "default"}},
)
# pylint: disable=broad-except
except Exception as err:
print(
f"Error adding crawlerChannel 'default' to profile {profile_id}: {err}",
flush=True,
)