* k8s: add tolerations for 'nodeType=crawling:NoSchedule' to allow scheduling crawling on designated nodes for crawler and profiles jobs and statefulsets * add affinity for 'nodeType=crawling' on crawling and profile browser statefulsets * refactor crawljob: combine crawl_updater logic into base crawl_job * increment new 'crawlAttemptCount' counter crawlconfig when crawl is started, not necessarily finished, to avoid deleting configs that had attempted but not finished crawls. * better external mongodb support: use MONGO_DB_URL to set custom url directly, otherwise build from username, password and mongo host
84 lines
1.9 KiB
YAML
84 lines
1.9 KiB
YAML
apiVersion: batch/v1
|
|
kind: Job
|
|
metadata:
|
|
name: "job-{{ id }}"
|
|
|
|
labels:
|
|
btrix.profile: "1"
|
|
btrix.archive: {{ aid }}
|
|
btrix.user: {{ userid }}
|
|
{%- if baseprofile %}
|
|
btrix.baseprofile: "{{ baseprofile }}"
|
|
{%- endif %}
|
|
|
|
spec:
|
|
template:
|
|
metadata:
|
|
labels:
|
|
btrix.profile: "1"
|
|
btrix.archive: {{ aid }}
|
|
btrix.user: {{ userid }}
|
|
{%- if baseprofile %}
|
|
btrix.baseprofile: "{{ baseprofile }}"
|
|
{%- endif %}
|
|
|
|
spec:
|
|
restartPolicy: OnFailure
|
|
|
|
affinity:
|
|
nodeAffinity:
|
|
preferredDuringSchedulingIgnoredDuringExecution:
|
|
- weight: 1
|
|
preference:
|
|
matchExpressions:
|
|
- key: nodeType
|
|
operator: In
|
|
values:
|
|
- "{{ crawler_node_type }}"
|
|
|
|
tolerations:
|
|
- key: "nodeType"
|
|
operator: "Equal"
|
|
value: "crawling"
|
|
effect: "NoSchedule"
|
|
|
|
containers:
|
|
- name: crawl-job
|
|
image: {{ job_image }}
|
|
imagePullPolicy: Always
|
|
command: ["python", "-m", "btrixcloud.k8s.profile_job"]
|
|
|
|
volumeMounts:
|
|
- name: config-volume
|
|
mountPath: /config
|
|
|
|
env:
|
|
- name: JOB_ID
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.labels['job-name']
|
|
|
|
- name: STORE_PATH
|
|
value: {{ storage_path }}
|
|
|
|
- name: STORAGE_NAME
|
|
value: {{ storage_name }}
|
|
|
|
- name: IDLE_TIMEOUT
|
|
value: "60"
|
|
|
|
- name: START_URL
|
|
value: {{ url }}
|
|
|
|
- name: PROFILE_PATH
|
|
value: {{ profile_path }}
|
|
|
|
volumes:
|
|
- name: config-volume
|
|
configMap:
|
|
name: shared-job-config
|
|
items:
|
|
- key: config.yaml
|
|
path: config.yaml
|
|
|