diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index 07da04ea..e3361845 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -71,7 +71,8 @@ metadata: namespace: {{ .Values.crawler_namespace }} data: - CRAWL_ARGS: "--workers {{ .Values.crawler_browser_instances | default 1 }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --waitOnDone --collection thecrawl --screencastPort 9037 --logErrorsToRedis" + CRAWL_ARGS: >- + --workers {{ .Values.crawler_browser_instances | default 1 }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --userAgentSuffix {{ .Values.user_agent_suffix | quote }} --userAgent {{ .Values.user_agent | quote }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --waitOnDone --collection thecrawl --screencastPort 9037 --logErrorsToRedis" --- apiVersion: v1 diff --git a/chart/values.yaml b/chart/values.yaml index b400dc7a..36bdbb82 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -1,19 +1,6 @@ -# Settings + +# Crawler Settings # ========================================= -name: browsertrix-cloud - -# when running in the cloud, set this value to cloud-specific block storage -# keep empty to use hostPath (eg. on minikube) -volume_storage_class: - -# if set, set the node selector 'nodeType' for deployment pods -# main_node_type: - -# if set, set the node selector 'nodeType' to this crawling pods -# crawler_node_type: - -registration_enabled: "0" -jwt_token_lifetime_minutes: 1440 # default time to run behaviors on each page (in seconds) default_behavior_time_seconds: 300 @@ -38,14 +25,13 @@ crawler_extract_full_text: false # if set, each workflow can have a lower limit, but not higher max_pages_per_crawl: 0 -# if set to "1", allow inviting same user to same org multiple times -allow_dupe_invites: "0" +# User Agent Options +# set to add suffix to default browser User Agent +# user_agent_suffix: -# number of seconds before pending invites expire - default is 7 days -invite_expire_seconds: 604800 +# set to override User Agent completely (also overrides user_agent_suffix if both are set) +# user_agent: -# base url for replayweb.page -rwp_base_url: "https://replayweb.page/" # default template for generate wacz files # supports following interpolated vars: @@ -55,6 +41,33 @@ rwp_base_url: "https://replayweb.page/" # @id - full crawl id default_crawl_filename_template: "@ts-@hostsuffix.wacz" + +# Cluster Settings +# ========================================= +name: browsertrix-cloud + +# when running in the cloud, set this value to cloud-specific block storage +# keep empty to use hostPath (eg. on minikube) +volume_storage_class: + +# if set, set the node selector 'nodeType' for deployment pods +# main_node_type: + +# if set, set the node selector 'nodeType' to this crawling pods +# crawler_node_type: + +registration_enabled: "0" +jwt_token_lifetime_minutes: 1440 + +# if set to "1", allow inviting same user to same org multiple times +allow_dupe_invites: "0" + +# number of seconds before pending invites expire - default is 7 days +invite_expire_seconds: 604800 + +# base url for replayweb.page +rwp_base_url: "https://replayweb.page/" + superuser: # set this to enable a superuser admin email: admin@example.com