Fixes #1337 Crawl timeout is tracked via `elapsedCrawlTime` field on the crawl status, which is similar to regular crawl execution time, but only counts one pod if scale > 1. If scale == 1, this time is equivalent. Crawl is gracefully stopped when the elapsed execution time exceeds the timeout. For more responsiveness, also adding current crawl time since last update interval. Details: - handle crawl timeout via elapsed crawl time - longest running time of a single pod, instead of expire time. - include current running from last update for best precision - more accurately count elapsed time crawl is actually running - store elapsedCrawlTime in addition to crawlExecTime, storing the longest duration of each pod since last test interval --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
		
			
				
	
	
		
			29 lines
		
	
	
		
			539 B
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			29 lines
		
	
	
		
			539 B
		
	
	
	
		
			YAML
		
	
	
	
	
	
| apiVersion: btrix.cloud/v1
 | |
| kind: CrawlJob
 | |
| metadata:
 | |
|   name: crawljob-{{ id }}
 | |
|   labels:
 | |
|     crawl: "{{ id }}"
 | |
|     role: "job"
 | |
|     btrix.org: "{{ oid }}"
 | |
|     btrix.user: "{{ userid }}"
 | |
|     btrix.storage: "{{ storage_name }}"
 | |
| 
 | |
| spec:
 | |
|   selector:
 | |
|     matchLabels:
 | |
|       crawl: "{{ id }}"
 | |
| 
 | |
|   id: "{{ id }}"
 | |
|   userid: "{{ userid }}"
 | |
|   cid: "{{ cid }}"
 | |
|   oid: "{{ oid }}"
 | |
|   scale: {{ scale }}
 | |
|   maxCrawlSize: {{ max_crawl_size }}
 | |
|   timeout: {{ timeout }}
 | |
|   manual: {{ manual }}
 | |
|   ttlSecondsAfterFinished: 30
 | |
| 
 | |
|   storageName: "{{ storage_name }}"
 | |
| 
 |