Fixes #2259 This PR brings backend and frontend support for the new autoclick behavior in Browsertrix, introduces in Browsertrix 1.5.0+ On the backend, we introduce `min_autoclick_crawler_image` to `values.yaml`, with a default value of `"docker.io/webrecorder/browsertrix-crawler:1.5.0"`. If this is set and the crawler version for a new crawl is less than this value, the autoclick behavior is removed from the behaviors list in the configmap created for the crawl. The one caveat for this is that a crawler image tag like "latest" will always be parsed as greater than `min_autoclick_crawler_image`, so there is the potential for the crawler to run into issues if using a non-numeric image tag with an older version of the crawler. For production we use hardcoded specific versions of the crawler except for the dev channel, which from here on out will including autoclick support, so I think this should be okay (and is also true of the existing implementation for checking `min_qa_crawler_image`). On the frontend, I've added a checkbox (unchecked by default) in the "Limits" section just below the current checkbox for autoscroll. We might want to move these to a different section eventually - I'm not sure Limits is the right place for them - but I wanted to be consistent with things as they are. --------- Co-authored-by: Ilya Kreymer <ikreymer@users.noreply.github.com>
209 lines
6.8 KiB
YAML
209 lines
6.8 KiB
YAML
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: backend-env-config
|
|
namespace: {{ .Release.Namespace }}
|
|
|
|
data:
|
|
APP_ORIGIN: {{ .Values.ingress.tls | ternary "https" "http" }}://{{ or .Values.ingress.host ( print "localhost:" ( .Values.local_service_port | default 9870 )) }}
|
|
|
|
CRAWLER_NAMESPACE: {{ .Values.crawler_namespace }}
|
|
|
|
DEFAULT_NAMESPACE: {{ .Release.Namespace }}
|
|
|
|
FRONTEND_ORIGIN: {{ .Values.frontend_alias | default "http://browsertrix-cloud-frontend" }}
|
|
|
|
CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}.svc.cluster.local"
|
|
|
|
DEFAULT_ORG: "{{ .Values.default_org }}"
|
|
|
|
INVITE_EXPIRE_SECONDS: "{{ .Values.invite_expire_seconds }}"
|
|
|
|
REGISTRATION_ENABLED: "{{ .Values.registration_enabled | default 0 }}"
|
|
|
|
REGISTER_TO_ORG_ID: "{{ .Values.registration_org_id }}"
|
|
|
|
ALLOW_DUPE_INVITES: "{{ .Values.allow_dupe_invites | default 0 }}"
|
|
|
|
JWT_TOKEN_LIFETIME_MINUTES: "{{ .Values.jwt_token_lifetime_minutes | default 60 }}"
|
|
|
|
DEFAULT_BEHAVIOR_TIME_SECONDS: "{{ .Values.default_behavior_time_seconds }}"
|
|
|
|
DEFAULT_PAGE_LOAD_TIME_SECONDS: "{{ .Values.default_page_load_time_seconds }}"
|
|
|
|
DEFAULT_CRAWL_FILENAME_TEMPLATE: "{{ .Values.default_crawl_filename_template }}"
|
|
|
|
MAX_PAGES_PER_CRAWL: "{{ .Values.max_pages_per_crawl | default 0 }}"
|
|
|
|
IDLE_TIMEOUT: "{{ .Values.profile_browser_idle_seconds | default 60 }}"
|
|
|
|
RERUN_FROM_MIGRATION: "{{ .Values.rerun_from_migration }}"
|
|
|
|
PRESIGN_DURATION_MINUTES: "{{ .Values.storage_presign_duration_minutes }}"
|
|
|
|
FAST_RETRY_SECS: "{{ .Values.operator_fast_resync_secs | default 3 }}"
|
|
|
|
MAX_CRAWL_SCALE: "{{ .Values.max_crawl_scale | default 3 }}"
|
|
|
|
LOG_FAILED_CRAWL_LINES: "{{ .Values.log_failed_crawl_lines | default 0 }}"
|
|
|
|
IS_LOCAL_MINIO: "{{ .Values.minio_local }}"
|
|
|
|
STORAGES_JSON: "/ops-configs/storages.json"
|
|
|
|
CRAWLER_CHANNELS_JSON: "/ops-configs/crawler_channels.json"
|
|
|
|
CRAWLER_PROXIES_LAST_UPDATE: "/ops-proxy-configs/crawler_proxies_last_update"
|
|
CRAWLER_PROXIES_JSON: "/ops-proxy-configs/crawler_proxies.json"
|
|
|
|
DEFAULT_PROXY_ID: "{{ .Values.default_proxy }}"
|
|
|
|
MIN_QA_CRAWLER_IMAGE: "{{ .Values.min_qa_crawler_image }}"
|
|
|
|
MIN_AUTOCLICK_CRAWLER_IMAGE: "{{ .Values.min_autoclick_crawler_image }}"
|
|
|
|
NUM_BROWSERS: "{{ .Values.crawler_browser_instances }}"
|
|
|
|
MAX_CRAWLER_MEMORY: "{{ .Values.max_crawler_memory }}"
|
|
|
|
CRAWLER_MIN_AVAIL_STORAGE_RATIO: "{{ .Values.crawler_min_avail_storage_ratio }}"
|
|
|
|
ENABLE_AUTO_RESIZE_CRAWLERS: "{{ .Values.enable_auto_resize_crawlers }}"
|
|
|
|
BILLING_ENABLED: "{{ .Values.billing_enabled }}"
|
|
|
|
SIGN_UP_URL: "{{ .Values.sign_up_url }}"
|
|
|
|
SALES_EMAIL: "{{ .Values.sales_email }}"
|
|
|
|
USER_SURVEY_URL: "{{ .Values.user_survey_url }}"
|
|
|
|
LOG_SENT_EMAILS: "{{ .Values.email.log_sent_emails }}"
|
|
|
|
BACKEND_IMAGE: "{{ .Values.backend_image }}"
|
|
|
|
BACKEND_IMAGE_PULL_POLICY: "{{ .Values.backend_pull_policy }}"
|
|
|
|
LOCALES_ENABLED: "{{ .Values.locales_enabled }}"
|
|
|
|
REPLICA_DELETION_DELAY_DAYS: "{{ .Values.replica_deletion_delay_days | default 0 }}"
|
|
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: shared-crawler-config
|
|
namespace: {{ .Values.crawler_namespace }}
|
|
|
|
data:
|
|
{{- define "btrix.crawler_args" }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --collection thecrawl --screencastPort 9037 --logErrorsToRedis --writePagesToRedis --restartsOnError --headless --screenshot view,thumbnail {{ .Values.crawler_extra_args }} {{- end }}
|
|
|
|
CRAWL_ARGS: {{- include "btrix.crawler_args" . }}
|
|
|
|
# disable behaviors for QA runs, otherwise use same args
|
|
QA_ARGS: {{- include "btrix.crawler_args" . }} --behaviors=""
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: shared-job-config
|
|
#namespace: {{ .Values.crawler_namespace }}
|
|
namespace: {{ .Release.Namespace }}
|
|
|
|
|
|
data:
|
|
config.yaml: |
|
|
namespace: {{ .Values.crawler_namespace }}
|
|
termination_grace_secs: "{{ .Values.grace_period_secs | default 600 }}"
|
|
|
|
volume_storage_class: "{{ .Values.volume_storage_class }}"
|
|
|
|
# redis
|
|
redis_image: {{ .Values.redis_image }}
|
|
redis_image_pull_policy: {{ .Values.redis_pull_policy }}
|
|
|
|
redis_cpu: "{{ .Values.redis_cpu }}"
|
|
|
|
redis_memory: "{{ .Values.redis_memory }}"
|
|
|
|
redis_storage: "{{ .Values.redis_storage }}"
|
|
|
|
# crawler
|
|
crawler_image_pull_policy: {{ .Values.crawler_pull_policy }}
|
|
|
|
crawler_cpu_base: "{{ .Values.crawler_cpu_base }}"
|
|
crawler_memory_base: "{{ .Values.crawler_memory_base }}"
|
|
|
|
crawler_extra_cpu_per_browser: "{{ .Values.crawler_extra_cpu_per_browser | default 0 }}"
|
|
crawler_extra_memory_per_browser: "{{ .Values.crawler_extra_memory_per_browser | default 0 }}"
|
|
|
|
crawler_browser_instances: "{{ .Values.crawler_browser_instances }}"
|
|
qa_browser_instances: "{{ .Values.qa_browser_instances }}"
|
|
|
|
crawler_cpu: "{{ .Values.crawler_cpu }}"
|
|
crawler_memory: "{{ .Values.crawler_memory }}"
|
|
|
|
crawler_storage: "{{ .Values.crawler_storage }}"
|
|
|
|
volume_storage_class: "{{ .Values.volume_storage_class }}"
|
|
|
|
profile_browser_cpu: "{{ .Values.profile_browser_cpu }}"
|
|
profile_browser_memory: "{{ .Values.profile_browser_memory }}"
|
|
|
|
crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}"
|
|
|
|
crawler_socks_proxy_host: "{{ .Values.crawler_socks_proxy_host }}"
|
|
crawler_socks_proxy_port: "{{ .Values.crawler_socks_proxy_port }}"
|
|
|
|
crawler_uid: "{{ .Values.crawler_uid | default 201407 }}"
|
|
crawler_gid: "{{ .Values.crawler_gid | default 201407 }}"
|
|
crawler_fsgroup: "{{ .Values.crawler_fsgroup | default 201407 }}"
|
|
|
|
profile_browser_workdir_size: "{{ .Values.profile_browser_workdir_size | default "4Gi" }}"
|
|
|
|
qa_scale: "{{ .Values.qa_scale | default 1 }}"
|
|
|
|
crawler_node_type: "{{ .Values.crawler_node_type }}"
|
|
redis_node_type: "{{ .Values.redis_node_type }}"
|
|
|
|
signing_secret: {{ and .Values.signer.enabled (not (empty .Values.signer.auth_token)) | ternary "signing-secret" "" }}
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: shared-redis-conf
|
|
namespace: {{ .Values.crawler_namespace }}
|
|
|
|
data:
|
|
redis.conf: |
|
|
appendonly yes
|
|
dir /data
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: app-templates
|
|
namespace: {{ .Release.Namespace }}
|
|
|
|
data:
|
|
{{ (.Files.Glob "app-templates/*.yaml").AsConfig | indent 2 }}
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: email-templates
|
|
namespace: {{ .Release.Namespace }}
|
|
|
|
data:
|
|
{{- $email_templates := .Values.email.templates | default dict }}
|
|
{{- range tuple "failed_bg_job" "invite" "password_reset" "validate" "sub_cancel" }}
|
|
{{ . }}: |
|
|
{{ ((get $email_templates . ) | default ($.Files.Get (printf "%s/%s" "email-templates" . ))) | indent 4 }}
|
|
{{- end }}
|