Previously, the crawler pods use preferred node affinity, instead of required node affinity. This results in crawler nodes running on the main node pool. Instead, we want to ensure crawler nodes are running on dedicated node pool (if configured). - Converts 'preferred node affinity' to 'required node affinity' for the node pool, while keeping preferred pod affinity for keeping all crawler / redis pods together. - For profiles, updates to same node affinity, and also adds resource constraint to match a single crawler for profile browser, which did not have resource constraints.
		
			
				
	
	
		
			181 lines
		
	
	
		
			3.7 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			181 lines
		
	
	
		
			3.7 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| # -------
 | |
| # PVC
 | |
| # -------
 | |
| 
 | |
| apiVersion: v1
 | |
| kind: PersistentVolumeClaim
 | |
| metadata:
 | |
|   name: {{ name }}
 | |
|   namespace: {{ namespace }}
 | |
|   labels:
 | |
|     crawl: {{ id }}
 | |
|     role: crawler
 | |
| 
 | |
| spec:
 | |
|   accessModes:
 | |
|     - ReadWriteOnce
 | |
| 
 | |
|   resources:
 | |
|     requests:
 | |
|       storage: {{ crawler_storage }}
 | |
| 
 | |
|   {% if volume_storage_class %}
 | |
|   storageClassName: {{ volume_storage_class }}
 | |
|   {% endif %}
 | |
| 
 | |
| 
 | |
| 
 | |
| # -------
 | |
| # CRAWLER
 | |
| # -------
 | |
| {% if not do_restart %}
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: Pod
 | |
| metadata:
 | |
|   name: {{ name }}
 | |
|   namespace: {{ namespace }}
 | |
|   labels:
 | |
|     crawl: {{ id }}
 | |
|     role: crawler
 | |
| 
 | |
| spec:
 | |
|   hostname: {{ name }}
 | |
|   subdomain: crawler
 | |
| 
 | |
|   {% if priorityClassName %}
 | |
|   priorityClassName: {{ priorityClassName }}
 | |
|   {% endif %}
 | |
| 
 | |
|   restartPolicy: OnFailure
 | |
| 
 | |
|   terminationGracePeriodSeconds: {{ termination_grace_secs }}
 | |
|   volumes:
 | |
|     - name: crawl-config
 | |
|       configMap:
 | |
|         name: crawl-config-{{ cid }}
 | |
| 
 | |
|     - name: crawl-data
 | |
|       persistentVolumeClaim:
 | |
|         claimName: {{ name }}
 | |
| 
 | |
|   affinity:
 | |
| {% if crawler_node_type %}
 | |
|     nodeAffinity:
 | |
|       requiredDuringSchedulingIgnoredDuringExecution:
 | |
|         nodeSelectorTerms:
 | |
|           - matchExpressions:
 | |
|             - key: nodeType
 | |
|               operator: In
 | |
|               values:
 | |
|                 - "{{ crawler_node_type }}"
 | |
| {% endif %}
 | |
| 
 | |
|     podAffinity:
 | |
|       preferredDuringSchedulingIgnoredDuringExecution:
 | |
|         - weight: 10
 | |
|           podAffinityTerm:
 | |
|             topologyKey: "kubernetes.io/hostname"
 | |
|             labelSelector:
 | |
|               matchExpressions:
 | |
|               - key: crawl
 | |
|                 operator: In
 | |
|                 values:
 | |
|                 - {{ id }}
 | |
| 
 | |
|   tolerations:
 | |
|     - key: nodeType
 | |
|       operator: Equal
 | |
|       value: crawling
 | |
|       effect: NoSchedule
 | |
|     - key: node.kubernetes.io/not-ready
 | |
|       operator: Exists
 | |
|       tolerationSeconds: 300
 | |
|       effect: NoExecute
 | |
|     - key: node.kubernetes.io/unreachable
 | |
|       operator: Exists
 | |
|       effect: NoExecute
 | |
|       tolerationSeconds: 300
 | |
| 
 | |
|   containers:
 | |
|     - name: crawler
 | |
|       image: {{ crawler_image }}
 | |
|       imagePullPolicy: {{ crawler_image_pull_policy }}
 | |
|       command:
 | |
|         - crawl
 | |
|         - --config
 | |
|         - /tmp/crawl-config.json
 | |
|         - --redisStoreUrl
 | |
|         - {{ redis_url }}
 | |
|       {%- if profile_filename %}
 | |
|         - --profile
 | |
|         - "@{{ profile_filename }}"
 | |
|       {%- endif %}
 | |
| 
 | |
|       volumeMounts:
 | |
|         - name: crawl-config
 | |
|           mountPath: /tmp/crawl-config.json
 | |
|           subPath: crawl-config.json
 | |
|           readOnly: True
 | |
| 
 | |
|         - name: crawl-data
 | |
|           mountPath: /crawls
 | |
| 
 | |
|       envFrom:
 | |
|         - configMapRef:
 | |
|             name: shared-crawler-config
 | |
| 
 | |
|         - secretRef:
 | |
|             name: {{ storage_secret }}
 | |
| 
 | |
|       {% if signing_secret %}
 | |
|         - secretRef:
 | |
|             name: {{ signing_secret }}
 | |
|       {% endif %}
 | |
| 
 | |
|       env:
 | |
|         - name: CRAWL_ID
 | |
|           value: "{{ id }}"
 | |
| 
 | |
|         - name: WEBHOOK_URL
 | |
|           value: "{{ redis_url }}/crawls-done"
 | |
| 
 | |
|         - name: STORE_PATH
 | |
|           value: "{{ storage_path }}"
 | |
| 
 | |
|         - name: STORE_FILENAME
 | |
|           value: "{{ storage_filename }}"
 | |
| 
 | |
|         - name: STORE_USER
 | |
|           value: "{{ userid }}"
 | |
| 
 | |
|     {% if crawler_socks_proxy_host %}
 | |
|         - name: SOCKS_HOST
 | |
|           value: "{{ crawler_socks_proxy_host }}"
 | |
|       {% if crawler_socks_proxy_port %}
 | |
|         - name: SOCKS_PORT
 | |
|           value: "{{ crawler_socks_proxy_port }}"
 | |
|       {% endif %}
 | |
|     {% endif %}
 | |
| 
 | |
|       resources:
 | |
|         limits:
 | |
|           memory: "{{ memory }}"
 | |
| 
 | |
|         requests:
 | |
|           cpu: "{{ cpu }}"
 | |
|           memory: "{{ memory }}"
 | |
| 
 | |
|       {% if crawler_liveness_port and crawler_liveness_port != '0' %}
 | |
|       livenessProbe:
 | |
|         httpGet:
 | |
|           path: /healthz
 | |
|           port: {{ crawler_liveness_port }}
 | |
| 
 | |
|         initialDelaySeconds: 15
 | |
|         periodSeconds: 120
 | |
|         failureThreshold: 3
 | |
|       {% endif %}
 | |
| 
 | |
| {% endif %}
 |