readd support for passing in 'crawler_extra_args' for additional/custom (#957)

options not covered by standard crawler opts (removed setting all args this way in #889)
This commit is contained in:
Ilya Kreymer 2023-07-07 12:08:40 -07:00 committed by GitHub
parent 2038e3d668
commit d7cb47390e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 7 additions and 1 deletions

View File

@ -74,7 +74,7 @@ metadata:
data:
CRAWL_ARGS: >-
--workers {{ .Values.crawler_browser_instances | default 1 }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --userAgentSuffix {{ .Values.user_agent_suffix | quote }} --userAgent {{ .Values.user_agent | quote }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --waitOnDone --collection thecrawl --screencastPort 9037 --logErrorsToRedis
--workers {{ .Values.crawler_browser_instances | default 1 }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --userAgentSuffix {{ .Values.user_agent_suffix | quote }} --userAgent {{ .Values.user_agent | quote }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --waitOnDone --collection thecrawl --screencastPort 9037 --logErrorsToRedis {{ .Values.crawler_extra_args }}
---
apiVersion: v1

View File

@ -42,6 +42,12 @@ max_pages_per_crawl: 0
default_crawl_filename_template: "@ts-@hostsuffix.wacz"
# advanced: additional args to be passed to the crawler
# this is mostly for testing of new/experimental crawler flags
# standard crawler options are covered with other options above
crawler_extra_args: ""
# Cluster Settings
# =========================================
name: browsertrix-cloud