Fixes #1893 - Removes crawl workflow-scoped configmaps, and replaces with operator-controlled per-crawl configmaps that only contain the json config passed to Browsertrix Crawler (as a volume). - Other configmap settings replaced are replaced the custom CrawlJob options (mostly already were, just added profile_filename and storage_filename) - Cron jobs also updated to create CrawlJob without relying on configmaps, querying the db for additional settings. - The `userid` associated with cron jobs is set to the user that last modified the schedule of the crawl, rather than whomever last modified the workflow - Various functions that deal with updating configmaps have been removed, including in migrations. - New migration 0029 added to remove all crawl workflow configmaps
37 lines
856 B
YAML
37 lines
856 B
YAML
apiVersion: btrix.cloud/v1
|
|
kind: CrawlJob
|
|
metadata:
|
|
name: crawljob-{{ id }}
|
|
labels:
|
|
crawl: "{{ id }}"
|
|
role: {{ "qa-job" if qa_source else "job" }}
|
|
btrix.org: "{{ oid }}"
|
|
btrix.user: "{{ userid }}"
|
|
btrix.storage: "{{ storage_name }}"
|
|
|
|
spec:
|
|
selector:
|
|
matchLabels:
|
|
crawl: "{{ id }}"
|
|
|
|
id: "{{ id }}"
|
|
userid: "{{ userid }}"
|
|
cid: "{{ cid }}"
|
|
oid: "{{ oid }}"
|
|
scale: {{ scale }}
|
|
|
|
profile_filename: "{{ profile_filename }}"
|
|
storage_filename: "{{ storage_filename }}"
|
|
|
|
maxCrawlSize: {{ max_crawl_size if not qa_source else 0 }}
|
|
timeout: {{ timeout if not qa_source else 0 }}
|
|
qaSourceCrawlId: "{{ qa_source }}"
|
|
|
|
manual: {{ manual }}
|
|
crawlerChannel: "{{ crawler_channel }}"
|
|
ttlSecondsAfterFinished: {{ 30 if not qa_source else 0 }}
|
|
warcPrefix: "{{ warc_prefix }}"
|
|
|
|
storageName: "{{ storage_name }}"
|
|
|