readd support for passing in 'crawler_extra_args' for additional/custom (#957)
options not covered by standard crawler opts (removed setting all args this way in #889)
This commit is contained in:
parent
2038e3d668
commit
d7cb47390e
@ -74,7 +74,7 @@ metadata:
|
||||
|
||||
data:
|
||||
CRAWL_ARGS: >-
|
||||
--workers {{ .Values.crawler_browser_instances | default 1 }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --userAgentSuffix {{ .Values.user_agent_suffix | quote }} --userAgent {{ .Values.user_agent | quote }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --waitOnDone --collection thecrawl --screencastPort 9037 --logErrorsToRedis
|
||||
--workers {{ .Values.crawler_browser_instances | default 1 }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --userAgentSuffix {{ .Values.user_agent_suffix | quote }} --userAgent {{ .Values.user_agent | quote }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --waitOnDone --collection thecrawl --screencastPort 9037 --logErrorsToRedis {{ .Values.crawler_extra_args }}
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
|
@ -42,6 +42,12 @@ max_pages_per_crawl: 0
|
||||
default_crawl_filename_template: "@ts-@hostsuffix.wacz"
|
||||
|
||||
|
||||
# advanced: additional args to be passed to the crawler
|
||||
# this is mostly for testing of new/experimental crawler flags
|
||||
# standard crawler options are covered with other options above
|
||||
crawler_extra_args: ""
|
||||
|
||||
|
||||
# Cluster Settings
|
||||
# =========================================
|
||||
name: browsertrix-cloud
|
||||
|
Loading…
Reference in New Issue
Block a user