From 3df6e0f146906b2f2f48537acefde83ae0781214 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 22 Feb 2023 13:23:19 -0800 Subject: [PATCH] crawler arguments fixes: (#621) - partial fix to #321, don't hard-code behavior limit into crawler args - allow setting number of crawler browser instances via 'crawler_browser_instances' to avoid having to override the full crawler args --- chart/templates/configmap.yaml | 5 +---- chart/values.yaml | 4 +++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index a0ed3ae9..0f80695c 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -67,10 +67,7 @@ metadata: namespace: {{ .Values.crawler_namespace }} data: - #CRAWL_ARGS: "{{ .Values.crawler_args }} --redisStoreUrl {{ .Values.redis_url }}" - CRAWL_ARGS: "{{ .Values.crawler_args }}" - #WEBHOOK_URL: "{{ .Values.redis_url }}/crawls-done" - + CRAWL_ARGS: "{{ .Values.crawler_args }} --workers {{ .Values.crawler_browser_instances | default 1 }}" --- apiVersion: v1 diff --git a/chart/values.yaml b/chart/values.yaml index 86b50ff8..ab34e5f3 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -138,7 +138,9 @@ crawler_namespace: "crawlers" crawl_retries: 1000 # browsertrix-crawler args: -crawler_args: "--timeout 120 --logging stats,behaviors,debug --generateWACZ --text --workers 4 --collection thecrawl --screencastPort 9037 --sizeLimit 100000000000 --timeLimit 18000 --healthCheckPort 6065 --waitOnDone --behaviorTimeout 300" +crawler_args: "--timeout 120 --logging stats,behaviors,debug --generateWACZ --text --collection thecrawl --screencastPort 9037 --sizeLimit 100000000000 --timeLimit 18000 --healthCheckPort 6065 --waitOnDone" + +crawler_browser_instances: 4 crawler_requests_cpu: "800m" crawler_limits_cpu: "1200m"