browsertrix/backend/btrixcloud/k8s/templates/crawler.yaml
Ilya Kreymer dee354f252 affinity: add affinity for k8s crawl deployments:
- prefer deploy crawler, redis and job to same zone
- prefer deploying crawler and job together via crawler node type, redis via redis node type (all optional)
2022-06-07 21:52:04 -07:00

336 lines
7.8 KiB
YAML

# --------
# REDIS
# --------
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: redis-{{ id }}
namespace: {{ namespace }}
labels:
crawl: {{ id }}
role: redis
spec:
selector:
matchLabels:
crawl: {{ id }}
role: redis
serviceName: redis-{{ id }}
replicas: 1
podManagementPolicy: Parallel
volumeClaimTemplates:
- metadata:
name: redis-data
labels:
crawl: {{ id }}
role: crawler
spec:
persistentVolumeClaimRetentionPolicy:
whenDeleted: Deleted
whenScaled: Delete
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
{% if volume_storage_class %}
storageClassName: {{ volume_storage_class }}
{% endif %}
template:
metadata:
labels:
crawl: {{ id }}
role: redis
spec:
terminationGracePeriodSeconds: 10
#nodeSelector: {{ crawl_node_selector }}
volumes:
- name: shared-redis-conf
configMap:
name: shared-job-config
items:
- key: redis.conf
path: redis.conf
# - name: redis-conf
# emptyDir: {}
#initContainers:
# - name: init-redis
# image: {{ redis_image }}
# imagePullPolicy: {{ redis_image_pull_policy }}
# command:
# - bash
# - "-c"
# - |
# set -ex
# # if no ordinal found in hostname, something is wrong, exit
# [[ `hostname` =~ (.+)-([0-9]+)$ ]] || exit 1
# ordinal=${BASH_REMATCH[2]}
# base=${BASH_REMATCH[1]}
# # if ordinal is 0, use main config, otherwise use replica
# cp /shared-redis-conf/redis.conf /redis-conf/redis.conf
# if [[ $ordinal -ne 0 ]]; then
# echo "replicaof $base-0.$base 6379" >> /redis-conf/redis.conf
# fi
# volumeMounts:
# - name: shared-redis-conf
# mountPath: /shared-redis-conf
# - name: redis-conf
# mountPath: /redis-conf
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
preference:
matchExpressions:
- key: nodeType
operator: In
values:
- "{{ redis_node_type }}"
podAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 2
podAffinityTerm:
topologyKey: "failure-domain.beta.kubernetes.io/zone"
labelSelector:
matchLabels:
job-name: job-{{ id }}
crawl: {{ id }}
containers:
- name: redis
image: {{ redis_image }}
imagePullPolicy: {{ redis_image_pull_policy }}
args: ["/redis-conf/redis.conf", "--appendonly", "yes"]
volumeMounts:
- name: redis-data
mountPath: /data
- name: shared-redis-conf
mountPath: /redis-conf
resources:
limits:
cpu: {{ redis_limits_cpu }}
memory: {{ redis_limits_memory }}
requests:
cpu: {{ redis_requests_cpu }}
memory: {{ redis_requests_memory }}
readinessProbe:
exec:
command:
- redis-cli
- ping
---
apiVersion: v1
kind: Service
metadata:
name: redis-{{ id }}
labels:
crawl: {{ id }}
role: redis
spec:
clusterIP: None
selector:
crawl: {{ id }}
role: redis
ports:
- protocol: TCP
port: 6379
name: redis
# -------
# CRAWLER
# -------
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: crawl-{{ id }}
namespace: {{ namespace }}
labels:
crawl: {{ id }}
role: crawler
spec:
selector:
matchLabels:
crawl: {{ id }}
role: crawler
serviceName: crawl-{{ id }}
replicas: {{ scale }}
podManagementPolicy: Parallel
volumeClaimTemplates:
- metadata:
name: crawl-data
labels:
crawl: {{ id }}
role: crawler
spec:
persistentVolumeClaimRetentionPolicy:
whenDeleted: Deleted
whenScaled: Delete
accessModes:
- ReadWriteOnce
resources:
requests:
storage: {{ requests_hd }}
{% if volume_storage_class %}
storageClassName: {{ volume_storage_class }}
{% endif %}
template:
metadata:
labels:
crawl: {{ id }}
role: crawler
spec:
terminationGracePeriodSeconds: {{ termination_grace_secs }}
#nodeSelector: {{ crawl_node_selector }}
volumes:
- name: crawl-config
configMap:
name: crawl-config-{{ cid }}
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
preference:
matchExpressions:
- key: nodeType
operator: In
values:
- "{{ crawler_node_type }}"
podAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 2
podAffinityTerm:
topologyKey: "failure-domain.beta.kubernetes.io/zone"
labelSelector:
matchLabels:
job-name: job-{{ id }}
crawl: {{ id }}
containers:
- name: crawler
image: {{ crawler_image }}
imagePullPolicy: {{ crawler_image_pull_policy }}
command:
- crawl
- --config
- /tmp/crawl-config.json
- --redisStoreUrl
- {{ redis_url }}
{%- if profile_filename %}
- --profile
- "@profiles/{{ profile_filename }}"
{%- endif %}
volumeMounts:
- name: crawl-config
mountPath: /tmp/crawl-config.json
subPath: crawl-config.json
readOnly: True
- name: crawl-data
mountPath: /crawls
envFrom:
- configMapRef:
name: shared-crawler-config
- secretRef:
name: storage-{{ storage_name }}
env:
- name: CRAWL_ID
value: {{ id }}
- name: WEBHOOK_URL
value: {{ redis_url }}/crawls-done
- name: STORE_PATH
valueFrom:
configMapKeyRef:
name: crawl-config-{{ cid }}
key: STORE_PATH
- name: STORE_FILENAME
valueFrom:
configMapKeyRef:
name: crawl-config-{{ cid }}
key: STORE_FILENAME
- name: STORE_USER
valueFrom:
configMapKeyRef:
name: crawl-config-{{ cid }}
key: USER_ID
resources:
limits:
cpu: {{ crawler_limits_cpu }}
memory: {{ crawler_limits_memory }}
requests:
cpu: {{ crawler_requests_cpu }}
memory: {{ crawler_requests_memory }}
{% if crawler_liveness_port and crawler_liveness_port != '0' %}
livenessProbe:
httpGet:
path: /healthz
port: {{ crawler_liveness_port }}
initialDelaySeconds: 15
periodSeconds: 120
failureThreshold: 3
{% endif %}
---
apiVersion: v1
kind: Service
metadata:
name: crawl-{{ id }}
labels:
crawl: {{ id }}
role: crawler
spec:
clusterIP: None
selector:
crawl: {{ id }}
role: crawler
ports:
- protocol: TCP
port: 9037
name: screencast