Chart: split Crawl args into separate variables (#639)
* chart crawl args cleanup: - move configurable settings out of 'crawler_args' - add 'crawler_session_size_limit_bytes' and 'crawler_session_time_limit_seconds' for --timeLimit and --sizeLimit option for crawler - remove hard-coded 'timeout' to allow configuring via crawl config - set liveness check port from existing config value - add comments that requests hd must be at least double the size limit - defaults: set crawler_requests_hd to 22GB, default crawl session size limit to 10GB
This commit is contained in:
parent
2e5db2b1f4
commit
413fd8d7ea
@ -67,7 +67,7 @@ metadata:
|
||||
namespace: {{ .Values.crawler_namespace }}
|
||||
|
||||
data:
|
||||
CRAWL_ARGS: "{{ .Values.crawler_args }} --workers {{ .Values.crawler_browser_instances | default 1 }}"
|
||||
CRAWL_ARGS: "{{ .Values.crawler_args }} --workers {{ .Values.crawler_browser_instances | default 1 }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --healthCheckPort {{ .Values.crawler_liveness_port }}"
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
|
@ -138,17 +138,25 @@ crawler_namespace: "crawlers"
|
||||
crawl_retries: 1000
|
||||
|
||||
# browsertrix-crawler args:
|
||||
crawler_args: "--timeout 120 --logging stats,behaviors,debug --generateWACZ --text --collection thecrawl --screencastPort 9037 --sizeLimit 100000000000 --timeLimit 18000 --healthCheckPort 6065 --waitOnDone"
|
||||
crawler_args: "--logging stats,behaviors,debug --generateWACZ --text --collection thecrawl --screencastPort 9037 --waitOnDone"
|
||||
|
||||
crawler_browser_instances: 4
|
||||
crawler_browser_instances: 2
|
||||
|
||||
crawler_requests_cpu: "800m"
|
||||
crawler_limits_cpu: "1200m"
|
||||
|
||||
crawler_requests_memory: "512Mi"
|
||||
crawler_limits_memory: "768Mi"
|
||||
crawler_limits_memory: "1024Mi"
|
||||
|
||||
crawler_requests_storage: "220Gi"
|
||||
# minimum size allocated to each crawler
|
||||
# should be at least double crawl session size to ensure space for WACZ
|
||||
crawler_requests_storage: "22Gi"
|
||||
|
||||
# max size at which crawler will commit current crawl session
|
||||
crawler_session_size_limit_bytes: "10000000000"
|
||||
|
||||
# max time in seconds after which crawler will restart, if set
|
||||
crawler_session_time_limit_seconds: 18000
|
||||
|
||||
crawler_liveness_port: 6065
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user