- ingress: fix proxying /data to minio, use another ingress which proxies correct host to ensure presigned urls work - presigning: determine if signing endpoint url (minio) or access endpoint (cloud bucket) based on if access endpoint is provided, set bool on storage object - chart: fix indent on incorrect storageClassName configs - ingress: make 'ingress_class' configurable (set to 'public' for microk8s, default to 'nginx') - minio: use older minio image which supports legacy fs based setup (for now) - nginx service: add 'nginx_service_use_node_port' config setting: if true, will use NodePort for frontend, other will use default (ClusterIP) and only for the frontend / nginx - chart: remove changing service type for other services
		
			
				
	
	
		
			343 lines
		
	
	
		
			7.7 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			343 lines
		
	
	
		
			7.7 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| # --------
 | |
| # REDIS
 | |
| # --------
 | |
| ---
 | |
| apiVersion: apps/v1
 | |
| kind: StatefulSet
 | |
| metadata:
 | |
|   name: redis-{{ id }}
 | |
|   namespace: {{ namespace }}
 | |
|   labels:
 | |
|     crawl: {{ id }}
 | |
|     role: redis
 | |
| 
 | |
| spec:
 | |
|   selector:
 | |
|     matchLabels:
 | |
|       crawl: {{ id }}
 | |
|       role: redis
 | |
| 
 | |
|   serviceName: redis-{{ id }}
 | |
|   replicas: 1
 | |
|   podManagementPolicy: Parallel
 | |
| 
 | |
|   persistentVolumeClaimRetentionPolicy:
 | |
|     whenDeleted: Delete
 | |
|     whenScaled: Delete
 | |
| 
 | |
|   volumeClaimTemplates:
 | |
|     - metadata:
 | |
|         name: redis-data
 | |
|         labels:
 | |
|           crawl: {{ id }}
 | |
|           role: crawler
 | |
| 
 | |
|       spec:
 | |
|         accessModes:
 | |
|           - ReadWriteOnce
 | |
| 
 | |
|         resources:
 | |
|           requests:
 | |
|             storage: 1Gi
 | |
| 
 | |
|         {% if volume_storage_class %}
 | |
|         storageClassName: {{ volume_storage_class }}
 | |
|         {% endif %}
 | |
| 
 | |
|   template:
 | |
|     metadata:
 | |
|       labels:
 | |
|         crawl: {{ id }}
 | |
|         role: redis
 | |
| 
 | |
|     spec:
 | |
|       terminationGracePeriodSeconds: 10
 | |
|       #nodeSelector: {{ crawl_node_selector }}
 | |
|       volumes:
 | |
|         - name: shared-redis-conf
 | |
|           configMap:
 | |
|             name: shared-job-config
 | |
|             items:
 | |
|               - key: redis.conf
 | |
|                 path: redis.conf
 | |
| 
 | |
|       #  - name: redis-conf
 | |
|       #    emptyDir: {}
 | |
| 
 | |
|       #initContainers:
 | |
|       #  - name: init-redis
 | |
|       #    image: {{ redis_image }}
 | |
|       #    imagePullPolicy: {{ redis_image_pull_policy }}
 | |
|       #    command:
 | |
|       #      - bash
 | |
|       #      - "-c"
 | |
|       #      - |
 | |
|       #        set -ex
 | |
|       #        # if no ordinal found in hostname, something is wrong, exit
 | |
|       #        [[ `hostname` =~ (.+)-([0-9]+)$ ]] || exit 1
 | |
|       #        ordinal=${BASH_REMATCH[2]}
 | |
|       #        base=${BASH_REMATCH[1]}
 | |
| 
 | |
|       #        # if ordinal is 0, use main config, otherwise use replica
 | |
|       #        cp /shared-redis-conf/redis.conf /redis-conf/redis.conf
 | |
|       #        if [[ $ordinal -ne 0 ]]; then
 | |
|       #          echo "replicaof $base-0.$base 6379" >> /redis-conf/redis.conf
 | |
|       #        fi
 | |
| 
 | |
|       #    volumeMounts:
 | |
|       #      - name: shared-redis-conf
 | |
|       #        mountPath: /shared-redis-conf
 | |
| 
 | |
|       #      - name: redis-conf
 | |
|       #        mountPath: /redis-conf
 | |
| 
 | |
|       affinity:
 | |
|         nodeAffinity:
 | |
|           preferredDuringSchedulingIgnoredDuringExecution:
 | |
|             - weight: 1
 | |
|               preference:
 | |
|                 matchExpressions:
 | |
|                 - key: nodeType
 | |
|                   operator: In
 | |
|                   values:
 | |
|                     - "{{ redis_node_type }}"
 | |
| 
 | |
|         podAffinity:
 | |
|           preferredDuringSchedulingIgnoredDuringExecution:
 | |
|             - weight: 2
 | |
|               podAffinityTerm:
 | |
|                 topologyKey: "failure-domain.beta.kubernetes.io/zone"
 | |
|                 labelSelector:
 | |
|                   matchLabels:
 | |
|                     job-name: job-{{ id }}
 | |
|                     crawl: {{ id }}
 | |
| 
 | |
|       tolerations:
 | |
|         - key: "nodeType"
 | |
|           operator: "Equal"
 | |
|           value: "crawling"
 | |
|           effect: "NoSchedule"
 | |
| 
 | |
|       containers:
 | |
|         - name: redis
 | |
|           image: {{ redis_image }}
 | |
|           imagePullPolicy: {{ redis_image_pull_policy }}
 | |
| 
 | |
|           args: ["/redis-conf/redis.conf", "--appendonly", "yes"]
 | |
|           volumeMounts:
 | |
|             - name: redis-data
 | |
|               mountPath: /data
 | |
| 
 | |
|             - name: shared-redis-conf
 | |
|               mountPath: /redis-conf
 | |
| 
 | |
|           resources:
 | |
|             limits:
 | |
|               cpu: {{ redis_limits_cpu }}
 | |
|               memory: {{ redis_limits_memory }}
 | |
| 
 | |
|             requests:
 | |
|               cpu: {{ redis_requests_cpu }}
 | |
|               memory: {{ redis_requests_memory }}
 | |
| 
 | |
|           readinessProbe:
 | |
|             exec:
 | |
|               command:
 | |
|                 - redis-cli
 | |
|                 - ping
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: Service
 | |
| metadata:
 | |
|   name: redis-{{ id }}
 | |
|   labels:
 | |
|     crawl: {{ id }}
 | |
|     role: redis
 | |
|  
 | |
| spec:
 | |
|   clusterIP: None
 | |
|   selector:
 | |
|     crawl: {{ id }}
 | |
|     role: redis
 | |
| 
 | |
|   ports:
 | |
|     - protocol: TCP
 | |
|       port: 6379
 | |
|       name: redis
 | |
| 
 | |
| # -------
 | |
| # CRAWLER
 | |
| # -------
 | |
| ---
 | |
| apiVersion: apps/v1
 | |
| kind: StatefulSet
 | |
| metadata:
 | |
|   name: crawl-{{ id }}
 | |
|   namespace: {{ namespace }}
 | |
|   labels:
 | |
|     crawl: {{ id }}
 | |
|     role: crawler
 | |
| 
 | |
| spec:
 | |
|   selector:
 | |
|     matchLabels:
 | |
|       crawl: {{ id }}
 | |
|       role: crawler
 | |
| 
 | |
|   serviceName: crawl-{{ id }}
 | |
|   replicas: {{ scale }}
 | |
|   podManagementPolicy: Parallel
 | |
| 
 | |
|   persistentVolumeClaimRetentionPolicy:
 | |
|     whenDeleted: Delete
 | |
|     whenScaled: Delete
 | |
| 
 | |
|   volumeClaimTemplates:
 | |
|     - metadata:
 | |
|         name: crawl-data
 | |
|         labels:
 | |
|           crawl: {{ id }}
 | |
|           role: crawler
 | |
| 
 | |
|       spec:
 | |
|         accessModes:
 | |
|           - ReadWriteOnce
 | |
| 
 | |
|         resources:
 | |
|           requests:
 | |
|             storage: {{ requests_hd }}
 | |
| 
 | |
|         {% if volume_storage_class %}
 | |
|         storageClassName: {{ volume_storage_class }}
 | |
|         {% endif %}
 | |
| 
 | |
|   template:
 | |
|     metadata:
 | |
|       labels:
 | |
|         crawl: {{ id }}
 | |
|         role: crawler
 | |
| 
 | |
|     spec:
 | |
|       terminationGracePeriodSeconds: {{ termination_grace_secs }}
 | |
|       #nodeSelector: {{ crawl_node_selector }}
 | |
|       volumes:
 | |
|         - name: crawl-config
 | |
|           configMap:
 | |
|             name: crawl-config-{{ cid }}
 | |
| 
 | |
|       affinity:
 | |
|         nodeAffinity:
 | |
|           preferredDuringSchedulingIgnoredDuringExecution:
 | |
|             - weight: 1
 | |
|               preference:
 | |
|                 matchExpressions:
 | |
|                 - key: nodeType
 | |
|                   operator: In
 | |
|                   values:
 | |
|                     - "{{ crawler_node_type }}"
 | |
| 
 | |
|         podAffinity:
 | |
|           preferredDuringSchedulingIgnoredDuringExecution:
 | |
|             - weight: 2
 | |
|               podAffinityTerm:
 | |
|                 topologyKey: "failure-domain.beta.kubernetes.io/zone"
 | |
|                 labelSelector:
 | |
|                   matchLabels:
 | |
|                     job-name: job-{{ id }}
 | |
|                     crawl: {{ id }}
 | |
| 
 | |
|       tolerations:
 | |
|         - key: "nodeType"
 | |
|           operator: "Equal"
 | |
|           value: "crawling"
 | |
|           effect: "NoSchedule"
 | |
| 
 | |
|       containers:
 | |
|         - name: crawler
 | |
|           image: {{ crawler_image }}
 | |
|           imagePullPolicy: {{ crawler_image_pull_policy }}
 | |
|           command:
 | |
|             - crawl
 | |
|             - --config
 | |
|             - /tmp/crawl-config.json
 | |
|             - --redisStoreUrl
 | |
|             - {{ redis_url }}
 | |
|           {%- if env.PROFILE_FILENAME %}
 | |
|             - --profile
 | |
|             - "@profiles/{{ env.PROFILE_FILENAME }}"
 | |
|           {%- endif %}
 | |
| 
 | |
|           volumeMounts:
 | |
|             - name: crawl-config
 | |
|               mountPath: /tmp/crawl-config.json
 | |
|               subPath: crawl-config.json
 | |
|               readOnly: True
 | |
| 
 | |
|             - name: crawl-data
 | |
|               mountPath: /crawls
 | |
| 
 | |
|           envFrom:
 | |
|             - configMapRef:
 | |
|                 name: shared-crawler-config
 | |
| 
 | |
|             - secretRef:
 | |
|                 name: storage-{{ storage_name }}
 | |
| 
 | |
|           env:
 | |
|             - name: CRAWL_ID
 | |
|               value: {{ id }}
 | |
| 
 | |
|             - name: WEBHOOK_URL
 | |
|               value: {{ redis_url }}/crawls-done
 | |
| 
 | |
|             - name: STORE_PATH
 | |
|               value: {{ env.STORE_PATH }}
 | |
| 
 | |
|             - name: STORE_FILENAME
 | |
|               value: {{ env.STORAGE_FILENAME }}
 | |
| 
 | |
|             - name: STORE_USER
 | |
|               value: {{ env.USER_ID }}
 | |
| 
 | |
|           resources:
 | |
|             limits:
 | |
|               cpu: {{ crawler_limits_cpu }}
 | |
|               memory: {{ crawler_limits_memory }}
 | |
| 
 | |
|             requests:
 | |
|               cpu: {{ crawler_requests_cpu }}
 | |
|               memory: {{ crawler_requests_memory }}
 | |
| 
 | |
|           {% if crawler_liveness_port and crawler_liveness_port != '0' %}
 | |
|           livenessProbe:
 | |
|             httpGet:
 | |
|               path: /healthz
 | |
|               port: {{ crawler_liveness_port }}
 | |
| 
 | |
|             initialDelaySeconds: 15
 | |
|             periodSeconds: 120
 | |
|             failureThreshold: 3
 | |
|           {% endif %}
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: Service
 | |
| metadata:
 | |
|   name: crawl-{{ id }}
 | |
|   labels:
 | |
|     crawl: {{ id }}
 | |
|     role: crawler
 | |
|  
 | |
| spec:
 | |
|   clusterIP: None
 | |
|   selector:
 | |
|     crawl: {{ id }}
 | |
|     role: crawler
 | |
| 
 | |
|   ports:
 | |
|     - protocol: TCP
 | |
|       port: 9037
 | |
|       name: screencast
 | |
| 
 |