browsertrix/backend/btrixcloud/swarm/swarmmanager.py
Ilya Kreymer 0c8a5a49b4 refactor to use docker swarm for local alternative to k8s instead of docker compose (#247):
- use python-on-whale to use docker cli api directly, creating docker stack for each crawl or profile browser
- configure storages via storages.yaml secret
- add crawl_job, profile_job, splitting into base and k8s/swarm implementations
- split manager into base crawlmanager and k8s/swarm implementations
- swarm: load initial scale from db to avoid modifying fixed configs, in k8s, load from configmap
- swarm: support scheduled jobs via swarm-cronjob service
- remove docker dependencies (aiodocker, apscheduler, scheduling)
- swarm: when using local minio, expose via /data/ route in nginx via extra include (in k8s, include dir is empty and routing handled via ingress)
- k8s: cleanup minio chart: move init containers to minio.yaml
- swarm: stateful set implementation to be consistent with k8s scaling:
  - don't use service replicas,
  - create a unique service with '-N' appended and allocate unique volume for each replica
  - allows crawl containers to be restarted w/o losing data
- add volume pruning background service, as volumes can be deleted only after service shuts down fully
- watch: fully simplify routing, route via replica index instead of ip for both k8s and swarm
- rename network btrix-cloud-net -> btrix-net to avoid conflict with compose network
2022-06-05 10:37:17 -07:00

163 lines
5.2 KiB
Python

""" Swarn Runner """
import os
import json
import aiohttp
from ..archives import S3Storage
from .utils import (
get_templates_dir,
run_swarm_stack,
delete_swarm_stack,
get_service,
get_service_labels,
set_service_label,
ping_containers,
create_config,
delete_configs,
)
from ..crawlmanager import BaseCrawlManager
# ============================================================================
class SwarmManager(BaseCrawlManager):
""" Docker Crawl Manager Interface"""
# pylint: disable=too-many-instance-attributes,too-many-public-methods
def __init__(self):
super().__init__(get_templates_dir())
self.storages = {
"default": S3Storage(
name="default",
access_key=os.environ["STORE_ACCESS_KEY"],
secret_key=os.environ["STORE_SECRET_KEY"],
endpoint_url=os.environ["STORE_ENDPOINT_URL"],
access_endpoint_url=os.environ["STORE_ACCESS_ENDPOINT_URL"],
)
}
async def check_storage(self, storage_name, is_default=False):
""" check if storage_name is valid storage """
# if not default, don't validate
if not is_default:
return True
# if default, ensure name is in default storages list
return self.storages[storage_name]
async def get_default_storage(self, name):
""" return default storage by name """
return self.storages[name]
async def _create_from_yaml(self, id_, yaml_data):
await self.loop.run_in_executor(None, run_swarm_stack, id_, yaml_data)
async def ping_profile_browser(self, browserid):
""" return ping profile browser """
return await self.loop.run_in_executor(
None,
ping_containers,
f"job-{browserid}_job",
"SIGUSR1",
)
async def get_profile_browser_metadata(self, browserid):
""" get browser profile labels """
return await self.loop.run_in_executor(
None, get_service_labels, f"job-{browserid}_job"
)
async def delete_profile_browser(self, browserid):
""" delete browser job, if it is a profile browser job """
return await self.loop.run_in_executor(
None, delete_swarm_stack, f"job-{browserid}"
)
def _add_extra_crawl_job_params(self, params):
""" add extra crawl job params """
params["mongo_user"] = os.environ["MONGO_INITDB_ROOT_USERNAME"]
params["mongo_pass"] = os.environ["MONGO_INITDB_ROOT_PASSWORD"]
async def _create_config_map(self, crawlconfig, **kwargs):
""" create config map for config """
data = json.dumps(crawlconfig.get_raw_config())
labels = {
"btrix.crawlconfig": str(crawlconfig.id),
"btrix.archive": str(crawlconfig.aid),
}
await self.loop.run_in_executor(
None, create_config, f"crawl-config-{crawlconfig.id}", data, labels
)
data = json.dumps(kwargs)
await self.loop.run_in_executor(
None, create_config, f"crawl-opts-{crawlconfig.id}", data, labels
)
async def _update_scheduled_job(self, crawlconfig):
""" update schedule on crawl job """
cid = str(crawlconfig.id)
crawl_id = f"sched-{cid[:12]}"
stack_name = f"job-{crawl_id}"
service_name = f"{stack_name}_job"
label_name = "swarm.cronjob.schedule"
cron_job = await self.loop.run_in_executor(None, get_service, service_name)
if cron_job:
curr_schedule = cron_job.spec.labels.get(label_name)
if crawlconfig.schedule and crawlconfig.schedule != curr_schedule:
await self.loop.run_in_executor(
None,
set_service_label,
service_name,
f"{label_name}={crawlconfig.schedule}",
)
if not crawlconfig.schedule:
# if currently running, ping container to exit on current job
# otherwise, delete!
if not await self.loop.run_in_executor(
None,
ping_containers,
service_name,
"SIGUSR1",
):
await self.loop.run_in_executor(
None, delete_swarm_stack, stack_name
)
return
if not crawlconfig.schedule:
return
data = await self._load_job_template(
crawlconfig, crawl_id, manual=False, schedule=crawlconfig.schedule
)
await self._create_from_yaml(f"job-{crawl_id}", data)
async def _post_to_job(self, crawl_id, aid, path, data=None):
""" make a POST request to the container for specified crawl job """
async with aiohttp.ClientSession() as session:
async with session.request(
"POST", f"http://job-{crawl_id}_job:8000{path}", json=data
) as resp:
await resp.json()
async def _delete_crawl_configs(self, label):
""" delete crawl configs by specified label """
await self.loop.run_in_executor(None, delete_configs, label)