- use python-on-whale to use docker cli api directly, creating docker stack for each crawl or profile browser - configure storages via storages.yaml secret - add crawl_job, profile_job, splitting into base and k8s/swarm implementations - split manager into base crawlmanager and k8s/swarm implementations - swarm: load initial scale from db to avoid modifying fixed configs, in k8s, load from configmap - swarm: support scheduled jobs via swarm-cronjob service - remove docker dependencies (aiodocker, apscheduler, scheduling) - swarm: when using local minio, expose via /data/ route in nginx via extra include (in k8s, include dir is empty and routing handled via ingress) - k8s: cleanup minio chart: move init containers to minio.yaml - swarm: stateful set implementation to be consistent with k8s scaling: - don't use service replicas, - create a unique service with '-N' appended and allocate unique volume for each replica - allows crawl containers to be restarted w/o losing data - add volume pruning background service, as volumes can be deleted only after service shuts down fully - watch: fully simplify routing, route via replica index instead of ip for both k8s and swarm - rename network btrix-cloud-net -> btrix-net to avoid conflict with compose network
60 lines
1.2 KiB
Bash
60 lines
1.2 KiB
Bash
# Env Settings (for local Docker Deployment)
|
|
|
|
MONGO_HOST=mongo
|
|
PASSWORD_SECRET=change_me
|
|
|
|
MONGO_INITDB_ROOT_USERNAME=root
|
|
MONGO_INITDB_ROOT_PASSWORD=example
|
|
|
|
MINIO_ROOT_USER=ADMIN
|
|
MINIO_ROOT_PASSWORD=PASSW0RD
|
|
|
|
MINIO_BUCKET=btrix-data
|
|
|
|
SUPERUSER_EMAIL=admin@example.com
|
|
|
|
# if blank, a password is generated automatically
|
|
SUPERUSER_PASSWORD=
|
|
|
|
STORE_ENDPOINT_URL=http://minio:9000/btrix-data/
|
|
STORE_ACCESS_ENDPOINT_URL=/data/
|
|
STORE_ACCESS_KEY=ADMIN
|
|
STORE_SECRET_KEY=PASSW0RD
|
|
|
|
MC_HOST_local=http://ADMIN:PASSW0RD@minio:9000
|
|
|
|
REDIS_URL=redis://redis/0
|
|
|
|
# enable to send verification emails
|
|
#EMAIL_SMTP_HOST=smtp.gmail.com
|
|
#EMAIL_SMTP_PORT=587
|
|
#EMAIL_SENDER=user@example.com
|
|
#EMAIL_PASSWORD=password
|
|
|
|
# Browsertrix Crawler image to use
|
|
CRAWLER_IMAGE=webrecorder/browsertrix-crawler:latest
|
|
|
|
CRAWL_ARGS="--timeout 90 --logging stats,behaviors,debug --generateWACZ --screencastPort 9037 --collection main"
|
|
|
|
REGISTRATION_ENABLED=1
|
|
|
|
# number of workers to run for backend
|
|
WEB_CONCURRENCY=1
|
|
|
|
JWT_TOKEN_LIFETIME_MINUTES=240
|
|
|
|
# for debugging, keep failed containers around
|
|
NO_DELETE_ON_FAIL=0
|
|
|
|
# auth sign -- uncomment to enable signing
|
|
# WACZ_SIGN_URL="http://authsign:8080/sign"
|
|
|
|
# optional token for signing (useful if using remote signing server)
|
|
# WACZ_SIGN_TOKEN=""
|
|
|
|
|
|
|
|
|
|
|
|
|