Some of the `securityContext` settings need to be on the container, not on the pod, including the read-only file system, which was not previously enabled. This now enables the read-only file system. Also map the crawler /tmp directory to use the same volume as crawls (as crawler currently uses /tmp dir) as /tmp becomes read-only otherwise.
249 lines
5.6 KiB
YAML
249 lines
5.6 KiB
YAML
# -------
|
|
# PVC
|
|
# -------
|
|
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: {{ name }}
|
|
namespace: {{ namespace }}
|
|
labels:
|
|
crawl: {{ id }}
|
|
role: crawler
|
|
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
|
|
resources:
|
|
requests:
|
|
storage: {{ storage }}
|
|
|
|
{% if volume_storage_class %}
|
|
storageClassName: {{ volume_storage_class }}
|
|
{% endif %}
|
|
|
|
|
|
|
|
# -------
|
|
# CRAWLER
|
|
# -------
|
|
{% if init_crawler %}
|
|
---
|
|
apiVersion: v1
|
|
kind: Pod
|
|
metadata:
|
|
name: {{ name }}
|
|
namespace: {{ namespace }}
|
|
labels:
|
|
crawl: {{ id }}
|
|
role: crawler
|
|
network-policy: limit-crawler-egress
|
|
|
|
spec:
|
|
hostname: {{ name }}
|
|
subdomain: crawler
|
|
|
|
{% if priorityClassName %}
|
|
priorityClassName: {{ priorityClassName }}
|
|
{% endif %}
|
|
|
|
restartPolicy: OnFailure
|
|
|
|
securityContext:
|
|
runAsNonRoot: true
|
|
runAsUser: {{ crawler_uid }}
|
|
runAsGroup: {{ crawler_gid }}
|
|
fsGroup: {{ crawler_fsgroup }}
|
|
|
|
terminationGracePeriodSeconds: {{ termination_grace_secs }}
|
|
volumes:
|
|
- name: crawl-config
|
|
configMap:
|
|
name: crawl-config-{{ id }}
|
|
{% if qa_source_crawl_id %}
|
|
- name: qa-config
|
|
configMap:
|
|
name: qa-replay-{{ qa_source_crawl_id }}
|
|
{% endif %}
|
|
- name: crawl-data
|
|
persistentVolumeClaim:
|
|
claimName: {{ name }}
|
|
{% if proxy_id %}
|
|
- name: proxies
|
|
secret:
|
|
secretName: proxies
|
|
defaultMode: 0600
|
|
- name: force-user-and-group-name
|
|
secret:
|
|
secretName: force-user-and-group-name
|
|
defaultMode: 0600
|
|
{% endif %}
|
|
|
|
affinity:
|
|
{% if crawler_node_type %}
|
|
nodeAffinity:
|
|
requiredDuringSchedulingIgnoredDuringExecution:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: nodeType
|
|
operator: In
|
|
values:
|
|
- "{{ crawler_node_type }}"
|
|
{% endif %}
|
|
|
|
podAffinity:
|
|
preferredDuringSchedulingIgnoredDuringExecution:
|
|
- weight: 10
|
|
podAffinityTerm:
|
|
topologyKey: "kubernetes.io/hostname"
|
|
labelSelector:
|
|
matchExpressions:
|
|
- key: crawl
|
|
operator: In
|
|
values:
|
|
- {{ id }}
|
|
|
|
tolerations:
|
|
- key: nodeType
|
|
operator: Equal
|
|
value: crawling
|
|
effect: NoSchedule
|
|
- key: node.kubernetes.io/not-ready
|
|
operator: Exists
|
|
tolerationSeconds: 300
|
|
effect: NoExecute
|
|
- key: node.kubernetes.io/unreachable
|
|
operator: Exists
|
|
effect: NoExecute
|
|
tolerationSeconds: 300
|
|
|
|
containers:
|
|
- name: crawler
|
|
image: {{ crawler_image }}
|
|
imagePullPolicy: {{ crawler_image_pull_policy }}
|
|
command:
|
|
- {{ "crawl" if not qa_source_crawl_id else "qa" }}
|
|
- --config
|
|
- /tmp/config/crawl-config.json
|
|
- --workers
|
|
- "{{ workers }}"
|
|
- --redisStoreUrl
|
|
- {{ redis_url }}
|
|
{% if qa_source_crawl_id %}
|
|
- --qaSource
|
|
- /tmp/qa/qa-config.json
|
|
{% elif profile_filename %}
|
|
- --profile
|
|
- "@{{ profile_filename }}"
|
|
{% endif %}
|
|
{% if proxy_id %}
|
|
- --proxyServer
|
|
- "{{ proxy_url }}"
|
|
{% if proxy_ssh_private_key %}
|
|
- --sshProxyPrivateKeyFile
|
|
- /tmp/ssh-proxy/private-key
|
|
{% endif %}
|
|
{% if proxy_ssh_host_public_key %}
|
|
- --sshProxyKnownHostsFile
|
|
- /tmp/ssh-proxy/known-hosts
|
|
{% endif %}
|
|
{% endif %}
|
|
volumeMounts:
|
|
- name: crawl-config
|
|
mountPath: /tmp/config/
|
|
readOnly: True
|
|
|
|
{% if qa_source_crawl_id %}
|
|
- name: qa-config
|
|
mountPath: /tmp/qa/
|
|
readOnly: True
|
|
{% endif %}
|
|
{% if proxy_id %}
|
|
{% if proxy_ssh_private_key %}
|
|
- name: proxies
|
|
mountPath: /tmp/ssh-proxy/private-key
|
|
subPath: {{ proxy_id }}-private-key
|
|
readOnly: true
|
|
{% endif %}
|
|
{% if proxy_ssh_host_public_key %}
|
|
- name: proxies
|
|
mountPath: /tmp/ssh-proxy/known-hosts
|
|
subPath: {{ proxy_id }}-known-hosts
|
|
readOnly: true
|
|
{% endif %}
|
|
- name: force-user-and-group-name
|
|
mountPath: /etc/passwd
|
|
subPath: passwd
|
|
readOnly: true
|
|
- name: force-user-and-group-name
|
|
mountPath: /etc/group
|
|
subPath: group
|
|
readOnly: true
|
|
{% endif %}
|
|
- name: crawl-data
|
|
mountPath: /crawls
|
|
|
|
- name: crawl-data
|
|
mountPath: /tmp
|
|
subPath: tmp
|
|
|
|
envFrom:
|
|
- configMapRef:
|
|
name: shared-crawler-config
|
|
|
|
- secretRef:
|
|
name: {{ storage_secret }}
|
|
|
|
{% if signing_secret %}
|
|
- secretRef:
|
|
name: {{ signing_secret }}
|
|
{% endif %}
|
|
|
|
env:
|
|
- name: HOME
|
|
value: /crawls/home
|
|
|
|
- name: CRAWL_ID
|
|
value: "{{ id }}"
|
|
|
|
- name: WEBHOOK_URL
|
|
value: "{{ redis_url }}/crawls-done"
|
|
|
|
- name: STORE_PATH
|
|
value: "{{ storage_path }}"
|
|
|
|
- name: STORE_FILENAME
|
|
value: "{{ storage_filename }}"
|
|
|
|
- name: STORE_USER
|
|
value: "{{ userid }}"
|
|
|
|
- name: WARC_PREFIX
|
|
value: "{{ warc_prefix }}"
|
|
|
|
resources:
|
|
limits:
|
|
memory: "{{ memory_limit }}"
|
|
|
|
requests:
|
|
cpu: "{{ cpu }}"
|
|
memory: "{{ memory }}"
|
|
|
|
{% if crawler_liveness_port and crawler_liveness_port != '0' %}
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /healthz
|
|
port: {{ crawler_liveness_port }}
|
|
|
|
initialDelaySeconds: 15
|
|
periodSeconds: 120
|
|
failureThreshold: 3
|
|
{% endif %}
|
|
|
|
securityContext:
|
|
allowPrivilegeEscalation: false
|
|
readOnlyRootFilesystem: true
|
|
|
|
{% endif %}
|