Fixes #1893 - Removes crawl workflow-scoped configmaps, and replaces with operator-controlled per-crawl configmaps that only contain the json config passed to Browsertrix Crawler (as a volume). - Other configmap settings replaced are replaced the custom CrawlJob options (mostly already were, just added profile_filename and storage_filename) - Cron jobs also updated to create CrawlJob without relying on configmaps, querying the db for additional settings. - The `userid` associated with cron jobs is set to the user that last modified the schedule of the crawl, rather than whomever last modified the workflow - Various functions that deal with updating configmaps have been removed, including in migrations. - New migration 0029 added to remove all crawl workflow configmaps
		
			
				
	
	
		
			37 lines
		
	
	
		
			856 B
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			37 lines
		
	
	
		
			856 B
		
	
	
	
		
			YAML
		
	
	
	
	
	
| apiVersion: btrix.cloud/v1
 | |
| kind: CrawlJob
 | |
| metadata:
 | |
|   name: crawljob-{{ id }}
 | |
|   labels:
 | |
|     crawl: "{{ id }}"
 | |
|     role: {{ "qa-job" if qa_source else "job" }}
 | |
|     btrix.org: "{{ oid }}"
 | |
|     btrix.user: "{{ userid }}"
 | |
|     btrix.storage: "{{ storage_name }}"
 | |
| 
 | |
| spec:
 | |
|   selector:
 | |
|     matchLabels:
 | |
|       crawl: "{{ id }}"
 | |
| 
 | |
|   id: "{{ id }}"
 | |
|   userid: "{{ userid }}"
 | |
|   cid: "{{ cid }}"
 | |
|   oid: "{{ oid }}"
 | |
|   scale: {{ scale }}
 | |
| 
 | |
|   profile_filename: "{{ profile_filename }}"
 | |
|   storage_filename: "{{ storage_filename }}"
 | |
| 
 | |
|   maxCrawlSize: {{ max_crawl_size if not qa_source else 0 }}
 | |
|   timeout: {{ timeout if not qa_source else 0 }}
 | |
|   qaSourceCrawlId: "{{ qa_source }}"
 | |
| 
 | |
|   manual: {{ manual }}
 | |
|   crawlerChannel: "{{ crawler_channel }}"
 | |
|   ttlSecondsAfterFinished: {{ 30 if not qa_source else 0 }}
 | |
|   warcPrefix: "{{ warc_prefix }}"
 | |
| 
 | |
|   storageName: "{{ storage_name }}"
 | |
| 
 |