- use python-on-whale to use docker cli api directly, creating docker stack for each crawl or profile browser - configure storages via storages.yaml secret - add crawl_job, profile_job, splitting into base and k8s/swarm implementations - split manager into base crawlmanager and k8s/swarm implementations - swarm: load initial scale from db to avoid modifying fixed configs, in k8s, load from configmap - swarm: support scheduled jobs via swarm-cronjob service - remove docker dependencies (aiodocker, apscheduler, scheduling) - swarm: when using local minio, expose via /data/ route in nginx via extra include (in k8s, include dir is empty and routing handled via ingress) - k8s: cleanup minio chart: move init containers to minio.yaml - swarm: stateful set implementation to be consistent with k8s scaling: - don't use service replicas, - create a unique service with '-N' appended and allocate unique volume for each replica - allows crawl containers to be restarted w/o losing data - add volume pruning background service, as volumes can be deleted only after service shuts down fully - watch: fully simplify routing, route via replica index instead of ip for both k8s and swarm - rename network btrix-cloud-net -> btrix-net to avoid conflict with compose network
75 lines
1.6 KiB
YAML
75 lines
1.6 KiB
YAML
version: '3.9'
|
|
|
|
services:
|
|
job:
|
|
image: {{ job_image }}
|
|
command: ["uvicorn", "btrixcloud.swarm.crawl_job:app", "--host", "0.0.0.0", "--access-log", "--log-level", "info"]
|
|
|
|
configs:
|
|
- shared_job_config.yaml
|
|
- custom_job_config.yaml
|
|
|
|
secrets:
|
|
- storages.yaml
|
|
|
|
volumes:
|
|
- /var/run/docker.sock:/var/run/docker.sock
|
|
|
|
networks:
|
|
- btrix
|
|
|
|
deploy:
|
|
replicas: {{ 1 if not schedule else 0 }}
|
|
labels:
|
|
btrix.run.manual: "{{ manual }}"
|
|
btrix.user: {{ userid }}
|
|
btrix.archive: {{ aid }}
|
|
btrix.crawlconfig: {{ cid }}
|
|
|
|
{% if schedule %}
|
|
swarm.cronjob.enable: "true"
|
|
swarm.cronjob.skip-running: "true"
|
|
swarm.cronjob.schedule: "{{ schedule }}"
|
|
{% endif %}
|
|
|
|
mode: replicated
|
|
restart_policy:
|
|
condition: none
|
|
|
|
environment:
|
|
SHARED_JOB_CONFIG: shared_job_config.yaml
|
|
CUSTOM_JOB_CONFIG: custom_job_config.yaml
|
|
STORAGE_SECRETS: storages.yaml
|
|
|
|
JOB_ID: "{{ id }}"
|
|
STACK_PREFIX: "crawl-"
|
|
STORE_PATH: "{{ storage_path }}"
|
|
STORAGE_NAME: "{{ storage_name }}"
|
|
PROFILE_PATH: "{{ profile_path }}"
|
|
|
|
MONGO_HOST: mongo
|
|
MONGO_INITDB_ROOT_USERNAME: {{ mongo_user }}
|
|
MONGO_INITDB_ROOT_PASSWORD: {{ mongo_pass }}
|
|
|
|
RUN_MANUAL: "{{ manual }}"
|
|
|
|
networks:
|
|
btrix:
|
|
external:
|
|
name: btrix-net
|
|
|
|
configs:
|
|
shared_job_config.yaml:
|
|
external: true
|
|
name: btrix_shared_job_config
|
|
|
|
custom_job_config.yaml:
|
|
external: true
|
|
name: crawl-opts-{{ cid }}
|
|
|
|
secrets:
|
|
storages.yaml:
|
|
name: btrix_storages
|
|
external: true
|
|
|