Configure crawler disk utilization threshold via helm chart (#748)
This commit is contained in:
		
							parent
							
								
									f6f3b7abba
								
							
						
					
					
						commit
						11ca3e678a
					
				@ -14,5 +14,5 @@ def test_settings():
 | 
				
			|||||||
        "jwtTokenLifetime": 86400,
 | 
					        "jwtTokenLifetime": 86400,
 | 
				
			||||||
        "defaultBehaviorTimeSeconds": 300,
 | 
					        "defaultBehaviorTimeSeconds": 300,
 | 
				
			||||||
        "maxPagesPerCrawl": 2,
 | 
					        "maxPagesPerCrawl": 2,
 | 
				
			||||||
        "defaultPageLoadTimeSeconds": 120
 | 
					        "defaultPageLoadTimeSeconds": 120,
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
				
			|||||||
@ -21,6 +21,11 @@ default_behavior_time_seconds: 300
 | 
				
			|||||||
# default time to wait for page to fully load before running behaviors (in seconds)
 | 
					# default time to wait for page to fully load before running behaviors (in seconds)
 | 
				
			||||||
default_page_load_time_seconds: 120
 | 
					default_page_load_time_seconds: 120
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# disk utilization threshold percentage - when used disk space passes
 | 
				
			||||||
 | 
					# this percentage of total, crawls will gracefully stop to prevent the
 | 
				
			||||||
 | 
					# disk from being filled
 | 
				
			||||||
 | 
					disk_utilization_threshold: 90
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# max pages per crawl
 | 
					# max pages per crawl
 | 
				
			||||||
# set to non-zero value to enforce global max pages per crawl limit
 | 
					# set to non-zero value to enforce global max pages per crawl limit
 | 
				
			||||||
# if set, each workflow can have a lower limit, but not higher
 | 
					# if set, each workflow can have a lower limit, but not higher
 | 
				
			||||||
@ -146,7 +151,7 @@ crawler_namespace: "crawlers"
 | 
				
			|||||||
crawl_retries: 1000
 | 
					crawl_retries: 1000
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# browsertrix-crawler args:
 | 
					# browsertrix-crawler args:
 | 
				
			||||||
crawler_args: "--logging stats,behaviors,debug --generateWACZ --text --collection thecrawl --screencastPort 9037 --waitOnDone"
 | 
					crawler_args: "--logging stats,behaviors,debug --generateWACZ --text --collection thecrawl --screencastPort 9037 --diskUtilization {{ .Values.disk_utilization_threshold | default 90 }} --waitOnDone"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
crawler_browser_instances: 2
 | 
					crawler_browser_instances: 2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user