Configure crawler disk utilization threshold via helm chart (#748)
This commit is contained in:
		
							parent
							
								
									f6f3b7abba
								
							
						
					
					
						commit
						11ca3e678a
					
				@ -14,5 +14,5 @@ def test_settings():
 | 
			
		||||
        "jwtTokenLifetime": 86400,
 | 
			
		||||
        "defaultBehaviorTimeSeconds": 300,
 | 
			
		||||
        "maxPagesPerCrawl": 2,
 | 
			
		||||
        "defaultPageLoadTimeSeconds": 120
 | 
			
		||||
        "defaultPageLoadTimeSeconds": 120,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -21,6 +21,11 @@ default_behavior_time_seconds: 300
 | 
			
		||||
# default time to wait for page to fully load before running behaviors (in seconds)
 | 
			
		||||
default_page_load_time_seconds: 120
 | 
			
		||||
 | 
			
		||||
# disk utilization threshold percentage - when used disk space passes
 | 
			
		||||
# this percentage of total, crawls will gracefully stop to prevent the
 | 
			
		||||
# disk from being filled
 | 
			
		||||
disk_utilization_threshold: 90
 | 
			
		||||
 | 
			
		||||
# max pages per crawl
 | 
			
		||||
# set to non-zero value to enforce global max pages per crawl limit
 | 
			
		||||
# if set, each workflow can have a lower limit, but not higher
 | 
			
		||||
@ -146,7 +151,7 @@ crawler_namespace: "crawlers"
 | 
			
		||||
crawl_retries: 1000
 | 
			
		||||
 | 
			
		||||
# browsertrix-crawler args:
 | 
			
		||||
crawler_args: "--logging stats,behaviors,debug --generateWACZ --text --collection thecrawl --screencastPort 9037 --waitOnDone"
 | 
			
		||||
crawler_args: "--logging stats,behaviors,debug --generateWACZ --text --collection thecrawl --screencastPort 9037 --diskUtilization {{ .Values.disk_utilization_threshold | default 90 }} --waitOnDone"
 | 
			
		||||
 | 
			
		||||
crawler_browser_instances: 2
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user