Previously, the crawler pods use preferred node affinity, instead of required node affinity. This results in crawler nodes running on the main node pool. Instead, we want to ensure crawler nodes are running on dedicated node pool (if configured). - Converts 'preferred node affinity' to 'required node affinity' for the node pool, while keeping preferred pod affinity for keeping all crawler / redis pods together. - For profiles, updates to same node affinity, and also adds resource constraint to match a single crawler for profile browser, which did not have resource constraints.
		
			
				
	
	
		
			181 lines
		
	
	
		
			3.7 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			181 lines
		
	
	
		
			3.7 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
# -------
 | 
						|
# PVC
 | 
						|
# -------
 | 
						|
 | 
						|
apiVersion: v1
 | 
						|
kind: PersistentVolumeClaim
 | 
						|
metadata:
 | 
						|
  name: {{ name }}
 | 
						|
  namespace: {{ namespace }}
 | 
						|
  labels:
 | 
						|
    crawl: {{ id }}
 | 
						|
    role: crawler
 | 
						|
 | 
						|
spec:
 | 
						|
  accessModes:
 | 
						|
    - ReadWriteOnce
 | 
						|
 | 
						|
  resources:
 | 
						|
    requests:
 | 
						|
      storage: {{ crawler_storage }}
 | 
						|
 | 
						|
  {% if volume_storage_class %}
 | 
						|
  storageClassName: {{ volume_storage_class }}
 | 
						|
  {% endif %}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
# -------
 | 
						|
# CRAWLER
 | 
						|
# -------
 | 
						|
{% if not do_restart %}
 | 
						|
---
 | 
						|
apiVersion: v1
 | 
						|
kind: Pod
 | 
						|
metadata:
 | 
						|
  name: {{ name }}
 | 
						|
  namespace: {{ namespace }}
 | 
						|
  labels:
 | 
						|
    crawl: {{ id }}
 | 
						|
    role: crawler
 | 
						|
 | 
						|
spec:
 | 
						|
  hostname: {{ name }}
 | 
						|
  subdomain: crawler
 | 
						|
 | 
						|
  {% if priorityClassName %}
 | 
						|
  priorityClassName: {{ priorityClassName }}
 | 
						|
  {% endif %}
 | 
						|
 | 
						|
  restartPolicy: OnFailure
 | 
						|
 | 
						|
  terminationGracePeriodSeconds: {{ termination_grace_secs }}
 | 
						|
  volumes:
 | 
						|
    - name: crawl-config
 | 
						|
      configMap:
 | 
						|
        name: crawl-config-{{ cid }}
 | 
						|
 | 
						|
    - name: crawl-data
 | 
						|
      persistentVolumeClaim:
 | 
						|
        claimName: {{ name }}
 | 
						|
 | 
						|
  affinity:
 | 
						|
{% if crawler_node_type %}
 | 
						|
    nodeAffinity:
 | 
						|
      requiredDuringSchedulingIgnoredDuringExecution:
 | 
						|
        nodeSelectorTerms:
 | 
						|
          - matchExpressions:
 | 
						|
            - key: nodeType
 | 
						|
              operator: In
 | 
						|
              values:
 | 
						|
                - "{{ crawler_node_type }}"
 | 
						|
{% endif %}
 | 
						|
 | 
						|
    podAffinity:
 | 
						|
      preferredDuringSchedulingIgnoredDuringExecution:
 | 
						|
        - weight: 10
 | 
						|
          podAffinityTerm:
 | 
						|
            topologyKey: "kubernetes.io/hostname"
 | 
						|
            labelSelector:
 | 
						|
              matchExpressions:
 | 
						|
              - key: crawl
 | 
						|
                operator: In
 | 
						|
                values:
 | 
						|
                - {{ id }}
 | 
						|
 | 
						|
  tolerations:
 | 
						|
    - key: nodeType
 | 
						|
      operator: Equal
 | 
						|
      value: crawling
 | 
						|
      effect: NoSchedule
 | 
						|
    - key: node.kubernetes.io/not-ready
 | 
						|
      operator: Exists
 | 
						|
      tolerationSeconds: 300
 | 
						|
      effect: NoExecute
 | 
						|
    - key: node.kubernetes.io/unreachable
 | 
						|
      operator: Exists
 | 
						|
      effect: NoExecute
 | 
						|
      tolerationSeconds: 300
 | 
						|
 | 
						|
  containers:
 | 
						|
    - name: crawler
 | 
						|
      image: {{ crawler_image }}
 | 
						|
      imagePullPolicy: {{ crawler_image_pull_policy }}
 | 
						|
      command:
 | 
						|
        - crawl
 | 
						|
        - --config
 | 
						|
        - /tmp/crawl-config.json
 | 
						|
        - --redisStoreUrl
 | 
						|
        - {{ redis_url }}
 | 
						|
      {%- if profile_filename %}
 | 
						|
        - --profile
 | 
						|
        - "@{{ profile_filename }}"
 | 
						|
      {%- endif %}
 | 
						|
 | 
						|
      volumeMounts:
 | 
						|
        - name: crawl-config
 | 
						|
          mountPath: /tmp/crawl-config.json
 | 
						|
          subPath: crawl-config.json
 | 
						|
          readOnly: True
 | 
						|
 | 
						|
        - name: crawl-data
 | 
						|
          mountPath: /crawls
 | 
						|
 | 
						|
      envFrom:
 | 
						|
        - configMapRef:
 | 
						|
            name: shared-crawler-config
 | 
						|
 | 
						|
        - secretRef:
 | 
						|
            name: {{ storage_secret }}
 | 
						|
 | 
						|
      {% if signing_secret %}
 | 
						|
        - secretRef:
 | 
						|
            name: {{ signing_secret }}
 | 
						|
      {% endif %}
 | 
						|
 | 
						|
      env:
 | 
						|
        - name: CRAWL_ID
 | 
						|
          value: "{{ id }}"
 | 
						|
 | 
						|
        - name: WEBHOOK_URL
 | 
						|
          value: "{{ redis_url }}/crawls-done"
 | 
						|
 | 
						|
        - name: STORE_PATH
 | 
						|
          value: "{{ storage_path }}"
 | 
						|
 | 
						|
        - name: STORE_FILENAME
 | 
						|
          value: "{{ storage_filename }}"
 | 
						|
 | 
						|
        - name: STORE_USER
 | 
						|
          value: "{{ userid }}"
 | 
						|
 | 
						|
    {% if crawler_socks_proxy_host %}
 | 
						|
        - name: SOCKS_HOST
 | 
						|
          value: "{{ crawler_socks_proxy_host }}"
 | 
						|
      {% if crawler_socks_proxy_port %}
 | 
						|
        - name: SOCKS_PORT
 | 
						|
          value: "{{ crawler_socks_proxy_port }}"
 | 
						|
      {% endif %}
 | 
						|
    {% endif %}
 | 
						|
 | 
						|
      resources:
 | 
						|
        limits:
 | 
						|
          memory: "{{ memory }}"
 | 
						|
 | 
						|
        requests:
 | 
						|
          cpu: "{{ cpu }}"
 | 
						|
          memory: "{{ memory }}"
 | 
						|
 | 
						|
      {% if crawler_liveness_port and crawler_liveness_port != '0' %}
 | 
						|
      livenessProbe:
 | 
						|
        httpGet:
 | 
						|
          path: /healthz
 | 
						|
          port: {{ crawler_liveness_port }}
 | 
						|
 | 
						|
        initialDelaySeconds: 15
 | 
						|
        periodSeconds: 120
 | 
						|
        failureThreshold: 3
 | 
						|
      {% endif %}
 | 
						|
 | 
						|
{% endif %}
 |