- use python-on-whale to use docker cli api directly, creating docker stack for each crawl or profile browser - configure storages via storages.yaml secret - add crawl_job, profile_job, splitting into base and k8s/swarm implementations - split manager into base crawlmanager and k8s/swarm implementations - swarm: load initial scale from db to avoid modifying fixed configs, in k8s, load from configmap - swarm: support scheduled jobs via swarm-cronjob service - remove docker dependencies (aiodocker, apscheduler, scheduling) - swarm: when using local minio, expose via /data/ route in nginx via extra include (in k8s, include dir is empty and routing handled via ingress) - k8s: cleanup minio chart: move init containers to minio.yaml - swarm: stateful set implementation to be consistent with k8s scaling: - don't use service replicas, - create a unique service with '-N' appended and allocate unique volume for each replica - allows crawl containers to be restarted w/o losing data - add volume pruning background service, as volumes can be deleted only after service shuts down fully - watch: fully simplify routing, route via replica index instead of ip for both k8s and swarm - rename network btrix-cloud-net -> btrix-net to avoid conflict with compose network
60 lines
1.3 KiB
YAML
60 lines
1.3 KiB
YAML
apiVersion: batch/v1
|
|
kind: Job
|
|
metadata:
|
|
name: job-{{ id }}
|
|
annotations:
|
|
btrix.run.manual: "{{ manual }}"
|
|
|
|
labels:
|
|
btrix.user: {{ userid }}
|
|
btrix.archive: {{ aid }}
|
|
btrix.crawlconfig: {{ cid }}
|
|
|
|
spec:
|
|
backoffLimit: 1000
|
|
ttlSecondsAfterFinished: 20
|
|
template:
|
|
metadata:
|
|
labels:
|
|
btrix.user: {{ userid }}
|
|
btrix.archive: {{ aid }}
|
|
btrix.crawlconfig: {{ cid }}
|
|
spec:
|
|
restartPolicy: OnFailure
|
|
containers:
|
|
- name: crawl-job
|
|
image: {{ job_image }}
|
|
imagePullPolicy: Always
|
|
command: ["uvicorn", "btrixcloud.k8s.crawl_job:app", "--host", "0.0.0.0", "--access-log", "--log-level", "info"]
|
|
|
|
volumeMounts:
|
|
- name: config-volume
|
|
mountPath: /config
|
|
|
|
envFrom:
|
|
- secretRef:
|
|
name: mongo-auth
|
|
|
|
- configMapRef:
|
|
name: crawl-config-{{ cid }}
|
|
|
|
env:
|
|
- name: JOB_ID
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.labels['job-name']
|
|
|
|
- name: RUN_MANUAL
|
|
value: "{{ manual }}"
|
|
|
|
volumes:
|
|
- name: config-volume
|
|
configMap:
|
|
name: shared-job-config
|
|
items:
|
|
- key: config.yaml
|
|
path: config.yaml
|
|
|
|
|
|
|