# -------- # REDIS # -------- --- apiVersion: apps/v1 kind: StatefulSet metadata: name: redis-{{ id }} namespace: {{ namespace }} labels: crawl: {{ id }} role: redis spec: selector: matchLabels: crawl: {{ id }} role: redis serviceName: redis-{{ id }} replicas: 1 podManagementPolicy: Parallel volumeClaimTemplates: - metadata: name: redis-data labels: crawl: {{ id }} role: crawler spec: persistentVolumeClaimRetentionPolicy: whenDeleted: Deleted whenScaled: Delete accessModes: - ReadWriteOnce resources: requests: storage: 1Gi {% if volume_storage_class %} storageClassName: {{ volume_storage_class }} {% endif %} template: metadata: labels: crawl: {{ id }} role: redis spec: terminationGracePeriodSeconds: 10 #nodeSelector: {{ crawl_node_selector }} volumes: - name: shared-redis-conf configMap: name: shared-job-config items: - key: redis.conf path: redis.conf # - name: redis-conf # emptyDir: {} #initContainers: # - name: init-redis # image: {{ redis_image }} # imagePullPolicy: {{ redis_image_pull_policy }} # command: # - bash # - "-c" # - | # set -ex # # if no ordinal found in hostname, something is wrong, exit # [[ `hostname` =~ (.+)-([0-9]+)$ ]] || exit 1 # ordinal=${BASH_REMATCH[2]} # base=${BASH_REMATCH[1]} # # if ordinal is 0, use main config, otherwise use replica # cp /shared-redis-conf/redis.conf /redis-conf/redis.conf # if [[ $ordinal -ne 0 ]]; then # echo "replicaof $base-0.$base 6379" >> /redis-conf/redis.conf # fi # volumeMounts: # - name: shared-redis-conf # mountPath: /shared-redis-conf # - name: redis-conf # mountPath: /redis-conf affinity: nodeAffinity: preferredDuringSchedulingIgnoredDuringExecution: - weight: 1 preference: matchExpressions: - key: nodeType operator: In values: - "{{ redis_node_type }}" podAffinity: preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 podAffinityTerm: topologyKey: "failure-domain.beta.kubernetes.io/zone" labelSelector: matchLabels: job-name: job-{{ id }} crawl: {{ id }} containers: - name: redis image: {{ redis_image }} imagePullPolicy: {{ redis_image_pull_policy }} args: ["/redis-conf/redis.conf", "--appendonly", "yes"] volumeMounts: - name: redis-data mountPath: /data - name: shared-redis-conf mountPath: /redis-conf resources: limits: cpu: {{ redis_limits_cpu }} memory: {{ redis_limits_memory }} requests: cpu: {{ redis_requests_cpu }} memory: {{ redis_requests_memory }} readinessProbe: exec: command: - redis-cli - ping --- apiVersion: v1 kind: Service metadata: name: redis-{{ id }} labels: crawl: {{ id }} role: redis spec: clusterIP: None selector: crawl: {{ id }} role: redis ports: - protocol: TCP port: 6379 name: redis # ------- # CRAWLER # ------- --- apiVersion: apps/v1 kind: StatefulSet metadata: name: crawl-{{ id }} namespace: {{ namespace }} labels: crawl: {{ id }} role: crawler spec: selector: matchLabels: crawl: {{ id }} role: crawler serviceName: crawl-{{ id }} replicas: {{ scale }} podManagementPolicy: Parallel volumeClaimTemplates: - metadata: name: crawl-data labels: crawl: {{ id }} role: crawler spec: persistentVolumeClaimRetentionPolicy: whenDeleted: Deleted whenScaled: Delete accessModes: - ReadWriteOnce resources: requests: storage: {{ requests_hd }} {% if volume_storage_class %} storageClassName: {{ volume_storage_class }} {% endif %} template: metadata: labels: crawl: {{ id }} role: crawler spec: terminationGracePeriodSeconds: {{ termination_grace_secs }} #nodeSelector: {{ crawl_node_selector }} volumes: - name: crawl-config configMap: name: crawl-config-{{ cid }} affinity: nodeAffinity: preferredDuringSchedulingIgnoredDuringExecution: - weight: 1 preference: matchExpressions: - key: nodeType operator: In values: - "{{ crawler_node_type }}" podAffinity: preferredDuringSchedulingIgnoredDuringExecution: - weight: 2 podAffinityTerm: topologyKey: "failure-domain.beta.kubernetes.io/zone" labelSelector: matchLabels: job-name: job-{{ id }} crawl: {{ id }} containers: - name: crawler image: {{ crawler_image }} imagePullPolicy: {{ crawler_image_pull_policy }} command: - crawl - --config - /tmp/crawl-config.json - --redisStoreUrl - {{ redis_url }} {%- if profile_filename %} - --profile - "@profiles/{{ profile_filename }}" {%- endif %} volumeMounts: - name: crawl-config mountPath: /tmp/crawl-config.json subPath: crawl-config.json readOnly: True - name: crawl-data mountPath: /crawls envFrom: - configMapRef: name: shared-crawler-config - secretRef: name: storage-{{ storage_name }} env: - name: CRAWL_ID value: {{ id }} - name: WEBHOOK_URL value: {{ redis_url }}/crawls-done - name: STORE_PATH valueFrom: configMapKeyRef: name: crawl-config-{{ cid }} key: STORE_PATH - name: STORE_FILENAME valueFrom: configMapKeyRef: name: crawl-config-{{ cid }} key: STORE_FILENAME - name: STORE_USER valueFrom: configMapKeyRef: name: crawl-config-{{ cid }} key: USER_ID resources: limits: cpu: {{ crawler_limits_cpu }} memory: {{ crawler_limits_memory }} requests: cpu: {{ crawler_requests_cpu }} memory: {{ crawler_requests_memory }} {% if crawler_liveness_port and crawler_liveness_port != '0' %} livenessProbe: httpGet: path: /healthz port: {{ crawler_liveness_port }} initialDelaySeconds: 15 periodSeconds: 120 failureThreshold: 3 {% endif %} --- apiVersion: v1 kind: Service metadata: name: crawl-{{ id }} labels: crawl: {{ id }} role: crawler spec: clusterIP: None selector: crawl: {{ id }} role: crawler ports: - protocol: TCP port: 9037 name: screencast