- add a 'expire_at_duration_seconds' which is 75% of actual presign duration time, or <25% remaining until presigned URL actually expires to ensure presigned URLs are updated early than when they actually expire - set cached expireAt time to the renew at time for more frequent updates - update QA configmap in place with updated presigned URLs when expireAt time is reached - mount qa config volume under /tmp/qa/ without subPath to get automatic updates, which crawler will handle - tests: fix qa test typo (from main) - fixes #1864
209 lines
4.5 KiB
YAML
209 lines
4.5 KiB
YAML
# -------
|
|
# PVC
|
|
# -------
|
|
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: {{ name }}
|
|
namespace: {{ namespace }}
|
|
labels:
|
|
crawl: {{ id }}
|
|
role: crawler
|
|
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
|
|
resources:
|
|
requests:
|
|
storage: {{ crawler_storage }}
|
|
|
|
{% if volume_storage_class %}
|
|
storageClassName: {{ volume_storage_class }}
|
|
{% endif %}
|
|
|
|
|
|
|
|
# -------
|
|
# CRAWLER
|
|
# -------
|
|
{% if not do_restart %}
|
|
---
|
|
apiVersion: v1
|
|
kind: Pod
|
|
metadata:
|
|
name: {{ name }}
|
|
namespace: {{ namespace }}
|
|
labels:
|
|
crawl: {{ id }}
|
|
role: crawler
|
|
|
|
spec:
|
|
hostname: {{ name }}
|
|
subdomain: crawler
|
|
|
|
{% if priorityClassName %}
|
|
priorityClassName: {{ priorityClassName }}
|
|
{% endif %}
|
|
|
|
restartPolicy: OnFailure
|
|
|
|
securityContext:
|
|
runAsNonRoot: true
|
|
runAsUser: {{ crawler_uid}}
|
|
runAsGroup: {{ crawler_gid}}
|
|
fsGroup: {{ crawler_fsgroup }}
|
|
allowPrivilegeEscalation: false
|
|
readOnlyRootFilesystem: true
|
|
|
|
terminationGracePeriodSeconds: {{ termination_grace_secs }}
|
|
volumes:
|
|
- name: crawl-config
|
|
configMap:
|
|
name: crawl-config-{{ cid }}
|
|
{% if qa_source_crawl_id %}
|
|
- name: qa-config
|
|
configMap:
|
|
name: qa-replay-{{ qa_source_crawl_id }}
|
|
{% endif %}
|
|
- name: crawl-data
|
|
persistentVolumeClaim:
|
|
claimName: {{ name }}
|
|
|
|
|
|
affinity:
|
|
{% if crawler_node_type %}
|
|
nodeAffinity:
|
|
requiredDuringSchedulingIgnoredDuringExecution:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: nodeType
|
|
operator: In
|
|
values:
|
|
- "{{ crawler_node_type }}"
|
|
{% endif %}
|
|
|
|
podAffinity:
|
|
preferredDuringSchedulingIgnoredDuringExecution:
|
|
- weight: 10
|
|
podAffinityTerm:
|
|
topologyKey: "kubernetes.io/hostname"
|
|
labelSelector:
|
|
matchExpressions:
|
|
- key: crawl
|
|
operator: In
|
|
values:
|
|
- {{ id }}
|
|
|
|
tolerations:
|
|
- key: nodeType
|
|
operator: Equal
|
|
value: crawling
|
|
effect: NoSchedule
|
|
- key: node.kubernetes.io/not-ready
|
|
operator: Exists
|
|
tolerationSeconds: 300
|
|
effect: NoExecute
|
|
- key: node.kubernetes.io/unreachable
|
|
operator: Exists
|
|
effect: NoExecute
|
|
tolerationSeconds: 300
|
|
|
|
containers:
|
|
- name: crawler
|
|
image: {{ crawler_image }}
|
|
imagePullPolicy: {{ crawler_image_pull_policy }}
|
|
command:
|
|
- {{ "crawl" if not qa_source_crawl_id else "qa" }}
|
|
- --config
|
|
- /tmp/crawl-config.json
|
|
- --workers
|
|
- "{{ workers }}"
|
|
- --redisStoreUrl
|
|
- {{ redis_url }}
|
|
{% if qa_source_crawl_id %}
|
|
- --qaSource
|
|
- /tmp/qa/qa-config.json
|
|
{% elif profile_filename %}
|
|
- --profile
|
|
- "@{{ profile_filename }}"
|
|
{% endif %}
|
|
volumeMounts:
|
|
- name: crawl-config
|
|
mountPath: /tmp/crawl-config.json
|
|
subPath: crawl-config.json
|
|
readOnly: True
|
|
|
|
{% if qa_source_crawl_id %}
|
|
- name: qa-config
|
|
mountPath: /tmp/qa/
|
|
readOnly: True
|
|
{% endif %}
|
|
|
|
- name: crawl-data
|
|
mountPath: /crawls
|
|
envFrom:
|
|
- configMapRef:
|
|
name: shared-crawler-config
|
|
|
|
- secretRef:
|
|
name: {{ storage_secret }}
|
|
|
|
{% if signing_secret %}
|
|
- secretRef:
|
|
name: {{ signing_secret }}
|
|
{% endif %}
|
|
|
|
env:
|
|
- name: HOME
|
|
value: /crawls/home
|
|
|
|
- name: CRAWL_ID
|
|
value: "{{ id }}"
|
|
|
|
- name: WEBHOOK_URL
|
|
value: "{{ redis_url }}/crawls-done"
|
|
|
|
- name: STORE_PATH
|
|
value: "{{ storage_path }}"
|
|
|
|
- name: STORE_FILENAME
|
|
value: "{{ storage_filename }}"
|
|
|
|
- name: STORE_USER
|
|
value: "{{ userid }}"
|
|
|
|
- name: WARC_PREFIX
|
|
value: "{{ warc_prefix }}"
|
|
|
|
{% if crawler_socks_proxy_host %}
|
|
- name: SOCKS_HOST
|
|
value: "{{ crawler_socks_proxy_host }}"
|
|
{% if crawler_socks_proxy_port %}
|
|
- name: SOCKS_PORT
|
|
value: "{{ crawler_socks_proxy_port }}"
|
|
{% endif %}
|
|
{% endif %}
|
|
|
|
resources:
|
|
limits:
|
|
memory: "{{ memory_limit }}"
|
|
|
|
requests:
|
|
cpu: "{{ cpu }}"
|
|
memory: "{{ memory }}"
|
|
|
|
{% if crawler_liveness_port and crawler_liveness_port != '0' %}
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /healthz
|
|
port: {{ crawler_liveness_port }}
|
|
|
|
initialDelaySeconds: 15
|
|
periodSeconds: 120
|
|
failureThreshold: 3
|
|
{% endif %}
|
|
|
|
{% endif %}
|