# ------- # CRAWLER # ------- --- apiVersion: apps/v1 kind: StatefulSet metadata: name: crawl-{{ id }} namespace: {{ namespace }} labels: crawl: {{ id }} role: crawler spec: selector: matchLabels: crawl: {{ id }} role: crawler serviceName: crawler replicas: {{ scale }} podManagementPolicy: OrderedReady # not yet supported #persistentVolumeClaimRetentionPolicy: # whenDeleted: Delete # whenScaled: Delete volumeClaimTemplates: - metadata: name: crawl-data labels: crawl: {{ id }} role: crawler spec: accessModes: - ReadWriteOnce resources: requests: storage: {{ requests_hd }} {% if volume_storage_class %} storageClassName: {{ volume_storage_class }} {% endif %} template: metadata: labels: crawl: {{ id }} role: crawler {% if force_restart %} annotations: btrix.crawlForceRestart: "{{ force_restart }}" {% endif %} spec: terminationGracePeriodSeconds: {{ termination_grace_secs }} #nodeSelector: {{ crawl_node_selector }} volumes: - name: crawl-config configMap: name: crawl-config-{{ cid }} affinity: nodeAffinity: preferredDuringSchedulingIgnoredDuringExecution: - weight: 1 preference: matchExpressions: - key: nodeType operator: In values: - "{{ crawler_node_type }}" podAffinity: preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 podAffinityTerm: topologyKey: "failure-domain.beta.kubernetes.io/zone" labelSelector: matchLabels: job-name: job-{{ id }} crawl: {{ id }} tolerations: - key: "nodeType" operator: "Equal" value: "crawling" effect: "NoSchedule" containers: - name: crawler image: {{ crawler_image }} imagePullPolicy: {{ crawler_image_pull_policy }} command: - crawl - --config - /tmp/crawl-config.json - --redisStoreUrl - {{ redis_url }} {%- if profile_filename %} - --profile - "@profiles/{{ profile_filename }}" {%- endif %} volumeMounts: - name: crawl-config mountPath: /tmp/crawl-config.json subPath: crawl-config.json readOnly: True - name: crawl-data mountPath: /crawls envFrom: - configMapRef: name: shared-crawler-config - secretRef: name: storage-{{ storage_name }} env: - name: CRAWL_ID value: "{{ id }}" - name: WEBHOOK_URL value: "{{ redis_url }}/crawls-done" - name: STORE_PATH value: "{{ store_path }}" - name: STORE_FILENAME value: "{{ store_filename }}" - name: STORE_USER value: "{{ userid }}" {% if crawler_socks_proxy_host %} - name: SOCKS_HOST value: "{{ crawler_socks_proxy_host }}" {% if crawler_socks_proxy_port %} - name: SOCKS_PORT value: "{{ crawler_socks_proxy_port }}" {% endif %} {% endif %} resources: limits: memory: {{ crawler_memory }} requests: cpu: {{ crawler_cpu }} memory: {{ crawler_memory }} {% if crawler_liveness_port and crawler_liveness_port != '0' %} livenessProbe: httpGet: path: /healthz port: {{ crawler_liveness_port }} initialDelaySeconds: 15 periodSeconds: 120 failureThreshold: 3 {% endif %}