- use python-on-whale to use docker cli api directly, creating docker stack for each crawl or profile browser - configure storages via storages.yaml secret - add crawl_job, profile_job, splitting into base and k8s/swarm implementations - split manager into base crawlmanager and k8s/swarm implementations - swarm: load initial scale from db to avoid modifying fixed configs, in k8s, load from configmap - swarm: support scheduled jobs via swarm-cronjob service - remove docker dependencies (aiodocker, apscheduler, scheduling) - swarm: when using local minio, expose via /data/ route in nginx via extra include (in k8s, include dir is empty and routing handled via ingress) - k8s: cleanup minio chart: move init containers to minio.yaml - swarm: stateful set implementation to be consistent with k8s scaling: - don't use service replicas, - create a unique service with '-N' appended and allocate unique volume for each replica - allows crawl containers to be restarted w/o losing data - add volume pruning background service, as volumes can be deleted only after service shuts down fully - watch: fully simplify routing, route via replica index instead of ip for both k8s and swarm - rename network btrix-cloud-net -> btrix-net to avoid conflict with compose network
		
			
				
	
	
		
			66 lines
		
	
	
		
			1.5 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			66 lines
		
	
	
		
			1.5 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| apiVersion: batch/v1
 | |
| kind: Job
 | |
| metadata:
 | |
|   name: "job-{{ id }}"
 | |
| 
 | |
|   labels:
 | |
|     btrix.profile: "1"
 | |
|     btrix.archive: {{ aid }}
 | |
|     btrix.user: {{ userid }}
 | |
|     {%- if baseprofile %}
 | |
|     btrix.baseprofile: "{{ baseprofile }}"
 | |
|     {%- endif %}
 | |
| 
 | |
| spec:
 | |
|   template:
 | |
|     metadata:
 | |
|       labels:
 | |
|         btrix.profile: "1"
 | |
|         btrix.archive: {{ aid }}
 | |
|         btrix.user: {{ userid }}
 | |
|         {%- if baseprofile %}
 | |
|         btrix.baseprofile: "{{ baseprofile }}"
 | |
|         {%- endif %}
 | |
| 
 | |
|     spec:
 | |
|       restartPolicy: OnFailure
 | |
|       containers:
 | |
|         - name: crawl-job
 | |
|           image: {{ job_image }}
 | |
|           imagePullPolicy: Always
 | |
|           command: ["python", "-m", "btrixcloud.k8s.profile_job"]
 | |
| 
 | |
|           volumeMounts:
 | |
|             - name: config-volume
 | |
|               mountPath: /config
 | |
| 
 | |
|           env:
 | |
|             - name: JOB_ID
 | |
|               valueFrom:
 | |
|                 fieldRef:
 | |
|                   fieldPath: metadata.labels['job-name']
 | |
| 
 | |
|             - name: STORE_PATH
 | |
|               value: {{ storage_path }}
 | |
| 
 | |
|             - name: STORAGE_NAME
 | |
|               value: {{ storage_name }}
 | |
| 
 | |
|             - name: IDLE_TIMEOUT
 | |
|               value: "60"
 | |
| 
 | |
|             - name: START_URL
 | |
|               value: {{ url }}
 | |
| 
 | |
|             - name: PROFILE_PATH
 | |
|               value: {{ profile_path }}
 | |
|               
 | |
|       volumes:
 | |
|         - name: config-volume
 | |
|           configMap:
 | |
|             name: shared-job-config
 | |
|             items:
 | |
|               - key: config.yaml
 | |
|                 path: config.yaml
 | |
|  
 |