Fixes #1337 Crawl timeout is tracked via `elapsedCrawlTime` field on the crawl status, which is similar to regular crawl execution time, but only counts one pod if scale > 1. If scale == 1, this time is equivalent. Crawl is gracefully stopped when the elapsed execution time exceeds the timeout. For more responsiveness, also adding current crawl time since last update interval. Details: - handle crawl timeout via elapsed crawl time - longest running time of a single pod, instead of expire time. - include current running from last update for best precision - more accurately count elapsed time crawl is actually running - store elapsedCrawlTime in addition to crawlExecTime, storing the longest duration of each pod since last test interval --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
		
			
				
	
	
		
			29 lines
		
	
	
		
			539 B
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			29 lines
		
	
	
		
			539 B
		
	
	
	
		
			YAML
		
	
	
	
	
	
apiVersion: btrix.cloud/v1
 | 
						|
kind: CrawlJob
 | 
						|
metadata:
 | 
						|
  name: crawljob-{{ id }}
 | 
						|
  labels:
 | 
						|
    crawl: "{{ id }}"
 | 
						|
    role: "job"
 | 
						|
    btrix.org: "{{ oid }}"
 | 
						|
    btrix.user: "{{ userid }}"
 | 
						|
    btrix.storage: "{{ storage_name }}"
 | 
						|
 | 
						|
spec:
 | 
						|
  selector:
 | 
						|
    matchLabels:
 | 
						|
      crawl: "{{ id }}"
 | 
						|
 | 
						|
  id: "{{ id }}"
 | 
						|
  userid: "{{ userid }}"
 | 
						|
  cid: "{{ cid }}"
 | 
						|
  oid: "{{ oid }}"
 | 
						|
  scale: {{ scale }}
 | 
						|
  maxCrawlSize: {{ max_crawl_size }}
 | 
						|
  timeout: {{ timeout }}
 | 
						|
  manual: {{ manual }}
 | 
						|
  ttlSecondsAfterFinished: 30
 | 
						|
 | 
						|
  storageName: "{{ storage_name }}"
 | 
						|
 |