- use python-on-whale to use docker cli api directly, creating docker stack for each crawl or profile browser - configure storages via storages.yaml secret - add crawl_job, profile_job, splitting into base and k8s/swarm implementations - split manager into base crawlmanager and k8s/swarm implementations - swarm: load initial scale from db to avoid modifying fixed configs, in k8s, load from configmap - swarm: support scheduled jobs via swarm-cronjob service - remove docker dependencies (aiodocker, apscheduler, scheduling) - swarm: when using local minio, expose via /data/ route in nginx via extra include (in k8s, include dir is empty and routing handled via ingress) - k8s: cleanup minio chart: move init containers to minio.yaml - swarm: stateful set implementation to be consistent with k8s scaling: - don't use service replicas, - create a unique service with '-N' appended and allocate unique volume for each replica - allows crawl containers to be restarted w/o losing data - add volume pruning background service, as volumes can be deleted only after service shuts down fully - watch: fully simplify routing, route via replica index instead of ip for both k8s and swarm - rename network btrix-cloud-net -> btrix-net to avoid conflict with compose network
42 lines
774 B
YAML
42 lines
774 B
YAML
version: '3.9'
|
|
|
|
services:
|
|
browser:
|
|
image: {{ crawler_image }}
|
|
command:
|
|
- create-login-profile
|
|
- --interactive
|
|
- --filename
|
|
- /tmp/profile.tar.gz
|
|
- --url
|
|
- "{{ url }}"
|
|
{%- if profile_filename %}
|
|
- --profile
|
|
- "@{{ profile_filename }}"
|
|
{%- endif %}
|
|
|
|
|
|
hostname: "browser-{{ id }}-0.browser-{{ id }}"
|
|
networks:
|
|
- btrix
|
|
|
|
deploy:
|
|
endpoint_mode: dnsrr
|
|
replicas: 1
|
|
labels:
|
|
browser: {{ id }}
|
|
role: browser
|
|
|
|
environment:
|
|
- STORE_ENDPOINT_URL={{ endpoint_url }}
|
|
- STORE_ACCESS_KEY={{ access_key }}
|
|
- STORE_SECRET_KEY={{ secret_key }}
|
|
- STORE_PATH={{ storage_path }}
|
|
|
|
|
|
networks:
|
|
btrix:
|
|
external:
|
|
name: btrix-net
|
|
|