From d7cb47390e20adb7b0342b1ff238beda165ff453 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 7 Jul 2023 12:08:40 -0700 Subject: [PATCH] readd support for passing in 'crawler_extra_args' for additional/custom (#957) options not covered by standard crawler opts (removed setting all args this way in #889) --- chart/templates/configmap.yaml | 2 +- chart/values.yaml | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index 487077d3..c7f09e6c 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -74,7 +74,7 @@ metadata: data: CRAWL_ARGS: >- - --workers {{ .Values.crawler_browser_instances | default 1 }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --userAgentSuffix {{ .Values.user_agent_suffix | quote }} --userAgent {{ .Values.user_agent | quote }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --waitOnDone --collection thecrawl --screencastPort 9037 --logErrorsToRedis + --workers {{ .Values.crawler_browser_instances | default 1 }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --userAgentSuffix {{ .Values.user_agent_suffix | quote }} --userAgent {{ .Values.user_agent | quote }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --waitOnDone --collection thecrawl --screencastPort 9037 --logErrorsToRedis {{ .Values.crawler_extra_args }} --- apiVersion: v1 diff --git a/chart/values.yaml b/chart/values.yaml index 36bdbb82..e9060023 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -42,6 +42,12 @@ max_pages_per_crawl: 0 default_crawl_filename_template: "@ts-@hostsuffix.wacz" +# advanced: additional args to be passed to the crawler +# this is mostly for testing of new/experimental crawler flags +# standard crawler options are covered with other options above +crawler_extra_args: "" + + # Cluster Settings # ========================================= name: browsertrix-cloud