Configure crawler disk utilization threshold via helm chart (#748)
This commit is contained in:
parent
f6f3b7abba
commit
11ca3e678a
@ -14,5 +14,5 @@ def test_settings():
|
||||
"jwtTokenLifetime": 86400,
|
||||
"defaultBehaviorTimeSeconds": 300,
|
||||
"maxPagesPerCrawl": 2,
|
||||
"defaultPageLoadTimeSeconds": 120
|
||||
"defaultPageLoadTimeSeconds": 120,
|
||||
}
|
||||
|
@ -21,6 +21,11 @@ default_behavior_time_seconds: 300
|
||||
# default time to wait for page to fully load before running behaviors (in seconds)
|
||||
default_page_load_time_seconds: 120
|
||||
|
||||
# disk utilization threshold percentage - when used disk space passes
|
||||
# this percentage of total, crawls will gracefully stop to prevent the
|
||||
# disk from being filled
|
||||
disk_utilization_threshold: 90
|
||||
|
||||
# max pages per crawl
|
||||
# set to non-zero value to enforce global max pages per crawl limit
|
||||
# if set, each workflow can have a lower limit, but not higher
|
||||
@ -146,7 +151,7 @@ crawler_namespace: "crawlers"
|
||||
crawl_retries: 1000
|
||||
|
||||
# browsertrix-crawler args:
|
||||
crawler_args: "--logging stats,behaviors,debug --generateWACZ --text --collection thecrawl --screencastPort 9037 --waitOnDone"
|
||||
crawler_args: "--logging stats,behaviors,debug --generateWACZ --text --collection thecrawl --screencastPort 9037 --diskUtilization {{ .Values.disk_utilization_threshold | default 90 }} --waitOnDone"
|
||||
|
||||
crawler_browser_instances: 2
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user