crawler arguments fixes: (#621)
- partial fix to #321, don't hard-code behavior limit into crawler args - allow setting number of crawler browser instances via 'crawler_browser_instances' to avoid having to override the full crawler args
This commit is contained in:
parent
974aeb5e93
commit
3df6e0f146
@ -67,10 +67,7 @@ metadata:
|
||||
namespace: {{ .Values.crawler_namespace }}
|
||||
|
||||
data:
|
||||
#CRAWL_ARGS: "{{ .Values.crawler_args }} --redisStoreUrl {{ .Values.redis_url }}"
|
||||
CRAWL_ARGS: "{{ .Values.crawler_args }}"
|
||||
#WEBHOOK_URL: "{{ .Values.redis_url }}/crawls-done"
|
||||
|
||||
CRAWL_ARGS: "{{ .Values.crawler_args }} --workers {{ .Values.crawler_browser_instances | default 1 }}"
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
|
@ -138,7 +138,9 @@ crawler_namespace: "crawlers"
|
||||
crawl_retries: 1000
|
||||
|
||||
# browsertrix-crawler args:
|
||||
crawler_args: "--timeout 120 --logging stats,behaviors,debug --generateWACZ --text --workers 4 --collection thecrawl --screencastPort 9037 --sizeLimit 100000000000 --timeLimit 18000 --healthCheckPort 6065 --waitOnDone --behaviorTimeout 300"
|
||||
crawler_args: "--timeout 120 --logging stats,behaviors,debug --generateWACZ --text --collection thecrawl --screencastPort 9037 --sizeLimit 100000000000 --timeLimit 18000 --healthCheckPort 6065 --waitOnDone"
|
||||
|
||||
crawler_browser_instances: 4
|
||||
|
||||
crawler_requests_cpu: "800m"
|
||||
crawler_limits_cpu: "1200m"
|
||||
|
Loading…
Reference in New Issue
Block a user