* crawlconfig: fix default filename template, make configurable - make default crawl file template configurable with 'default_crawl_filename_template' value in values.yaml - set to '@ts-@hostsuffix.wacz' by default - allow updating via 'crawlFilenameTemplate' in crawlconfig patch, which updates configmap - tests: add test for custom 'default_crawl_filename_template'
		
			
				
	
	
		
			180 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			180 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| # -------
 | |
| # CRAWLER
 | |
| # -------
 | |
| ---
 | |
| apiVersion: apps/v1
 | |
| kind: StatefulSet
 | |
| metadata:
 | |
|   name: crawl-{{ id }}
 | |
|   namespace: {{ namespace }}
 | |
|   labels:
 | |
|     crawl: {{ id }}
 | |
|     role: crawler
 | |
| 
 | |
| spec:
 | |
|   selector:
 | |
|     matchLabels:
 | |
|       crawl: {{ id }}
 | |
|       role: crawler
 | |
| 
 | |
|   serviceName: crawl-{{ id }}
 | |
|   replicas: {{ scale }}
 | |
|   podManagementPolicy: Parallel
 | |
| 
 | |
|   # not yet supported
 | |
|   #persistentVolumeClaimRetentionPolicy:
 | |
|   #  whenDeleted: Delete
 | |
|   #  whenScaled: Delete
 | |
| 
 | |
|   volumeClaimTemplates:
 | |
|     - metadata:
 | |
|         name: crawl-data
 | |
|         labels:
 | |
|           crawl: {{ id }}
 | |
|           role: crawler
 | |
| 
 | |
|       spec:
 | |
|         accessModes:
 | |
|           - ReadWriteOnce
 | |
| 
 | |
|         resources:
 | |
|           requests:
 | |
|             storage: {{ requests_hd }}
 | |
| 
 | |
|         {% if volume_storage_class %}
 | |
|         storageClassName: {{ volume_storage_class }}
 | |
|         {% endif %}
 | |
| 
 | |
|   template:
 | |
|     metadata:
 | |
|       labels:
 | |
|         crawl: {{ id }}
 | |
|         role: crawler
 | |
| 
 | |
|       {% if force_restart %}
 | |
|       annotations:
 | |
|         btrix.crawlForceRestart: "{{ force_restart }}"
 | |
|       {% endif %}
 | |
| 
 | |
|     spec:
 | |
|       terminationGracePeriodSeconds: {{ termination_grace_secs }}
 | |
|       #nodeSelector: {{ crawl_node_selector }}
 | |
|       volumes:
 | |
|         - name: crawl-config
 | |
|           configMap:
 | |
|             name: crawl-config-{{ cid }}
 | |
| 
 | |
|       affinity:
 | |
|         nodeAffinity:
 | |
|           preferredDuringSchedulingIgnoredDuringExecution:
 | |
|             - weight: 1
 | |
|               preference:
 | |
|                 matchExpressions:
 | |
|                 - key: nodeType
 | |
|                   operator: In
 | |
|                   values:
 | |
|                     - "{{ crawler_node_type }}"
 | |
| 
 | |
|         podAffinity:
 | |
|           preferredDuringSchedulingIgnoredDuringExecution:
 | |
|             - weight: 2
 | |
|               podAffinityTerm:
 | |
|                 topologyKey: "failure-domain.beta.kubernetes.io/zone"
 | |
|                 labelSelector:
 | |
|                   matchLabels:
 | |
|                     job-name: job-{{ id }}
 | |
|                     crawl: {{ id }}
 | |
| 
 | |
|       tolerations:
 | |
|         - key: "nodeType"
 | |
|           operator: "Equal"
 | |
|           value: "crawling"
 | |
|           effect: "NoSchedule"
 | |
| 
 | |
|       containers:
 | |
|         - name: crawler
 | |
|           image: {{ crawler_image }}
 | |
|           imagePullPolicy: {{ crawler_image_pull_policy }}
 | |
|           command:
 | |
|             - crawl
 | |
|             - --config
 | |
|             - /tmp/crawl-config.json
 | |
|             - --redisStoreUrl
 | |
|             - {{ redis_url }}
 | |
|           {%- if profile_filename %}
 | |
|             - --profile
 | |
|             - "@profiles/{{ profile_filename }}"
 | |
|           {%- endif %}
 | |
| 
 | |
|           volumeMounts:
 | |
|             - name: crawl-config
 | |
|               mountPath: /tmp/crawl-config.json
 | |
|               subPath: crawl-config.json
 | |
|               readOnly: True
 | |
| 
 | |
|             - name: crawl-data
 | |
|               mountPath: /crawls
 | |
| 
 | |
|           envFrom:
 | |
|             - configMapRef:
 | |
|                 name: shared-crawler-config
 | |
| 
 | |
|             - secretRef:
 | |
|                 name: storage-{{ storage_name }}
 | |
| 
 | |
|           env:
 | |
|             - name: CRAWL_ID
 | |
|               value: "{{ id }}"
 | |
| 
 | |
|             - name: WEBHOOK_URL
 | |
|               value: "{{ redis_url }}/crawls-done"
 | |
| 
 | |
|             - name: STORE_PATH
 | |
|               value: "{{ store_path }}"
 | |
| 
 | |
|             - name: STORE_FILENAME
 | |
|               value: "{{ store_filename }}"
 | |
| 
 | |
|             - name: STORE_USER
 | |
|               value: "{{ userid }}"
 | |
| 
 | |
|           resources:
 | |
|             limits:
 | |
|               cpu: {{ crawler_limits_cpu }}
 | |
|               memory: {{ crawler_limits_memory }}
 | |
| 
 | |
|             requests:
 | |
|               cpu: {{ crawler_requests_cpu }}
 | |
|               memory: {{ crawler_requests_memory }}
 | |
| 
 | |
|           {% if crawler_liveness_port and crawler_liveness_port != '0' %}
 | |
|           livenessProbe:
 | |
|             httpGet:
 | |
|               path: /healthz
 | |
|               port: {{ crawler_liveness_port }}
 | |
| 
 | |
|             initialDelaySeconds: 15
 | |
|             periodSeconds: 120
 | |
|             failureThreshold: 3
 | |
|           {% endif %}
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: Service
 | |
| metadata:
 | |
|   name: crawl-{{ id }}
 | |
|   labels:
 | |
|     crawl: {{ id }}
 | |
|     role: crawler
 | |
|  
 | |
| spec:
 | |
|   clusterIP: None
 | |
|   selector:
 | |
|     crawl: {{ id }}
 | |
|     role: crawler
 | |
| 
 | |
|   ports:
 | |
|     - protocol: TCP
 | |
|       port: 9037
 | |
|       name: screencast
 |