browsertrix/chart/templates/configmap.yaml
Ilya Kreymer 61239a40ed
include workflow config in QA runs + different browser instances for QA (#1829)
Currently, the workflow crawl settings were not being included at all in
QA runs.
This mounts the crawl workflow config, as well as QA configmap, into QA
run crawls, allowing for page limits from crawl workflow to be applied
to QA runs.

It also allows a different number of browser instances to be used for QA
runs, as QA runs might work better with less browsers, (eg. 2 instead of
4). This can be set with `qa_browser_instances` in helm chart.

Default qa browser workers to 1 if unset (for now, for best results)

Fixes #1828
2024-05-29 13:32:25 -07:00

171 lines
5.5 KiB
YAML

---
apiVersion: v1
kind: ConfigMap
metadata:
name: backend-env-config
namespace: {{ .Release.Namespace }}
data:
APP_ORIGIN: {{ .Values.ingress.tls | ternary "https" "http" }}://{{ .Values.ingress.host | default "localhost:9870" }}
CRAWLER_NAMESPACE: {{ .Values.crawler_namespace }}
DEFAULT_NAMESPACE: {{ .Release.Namespace }}
FRONTEND_ORIGIN: {{ .Values.frontend_alias | default "http://browsertrix-cloud-frontend" }}
CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}.svc.cluster.local"
DEFAULT_ORG: "{{ .Values.default_org }}"
INVITE_EXPIRE_SECONDS: "{{ .Values.invite_expire_seconds }}"
REGISTRATION_ENABLED: "{{ .Values.registration_enabled | default 0 }}"
REGISTER_TO_ORG_ID: "{{ .Values.registration_org_id }}"
ALLOW_DUPE_INVITES: "{{ .Values.allow_dupe_invites | default 0 }}"
JWT_TOKEN_LIFETIME_MINUTES: "{{ .Values.jwt_token_lifetime_minutes | default 60 }}"
DEFAULT_BEHAVIOR_TIME_SECONDS: "{{ .Values.default_behavior_time_seconds }}"
DEFAULT_PAGE_LOAD_TIME_SECONDS: "{{ .Values.default_page_load_time_seconds }}"
DEFAULT_CRAWL_FILENAME_TEMPLATE: "{{ .Values.default_crawl_filename_template }}"
MAX_PAGES_PER_CRAWL: "{{ .Values.max_pages_per_crawl | default 0 }}"
IDLE_TIMEOUT: "{{ .Values.profile_browser_idle_seconds | default 60 }}"
RERUN_FROM_MIGRATION: "{{ .Values.rerun_from_migration }}"
PRESIGN_DURATION_MINUTES: "{{ .Values.storage_presign_duration_minutes }}"
FAST_RETRY_SECS: "{{ .Values.operator_fast_resync_secs | default 3 }}"
MAX_CRAWL_SCALE: "{{ .Values.max_crawl_scale | default 3 }}"
LOG_FAILED_CRAWL_LINES: "{{ .Values.log_failed_crawl_lines | default 0 }}"
IS_LOCAL_MINIO: "{{ .Values.minio_local }}"
STORAGES_JSON: "/ops-configs/storages.json"
CRAWLER_CHANNELS_JSON: "/ops-configs/crawler_channels.json"
MIN_QA_CRAWLER_IMAGE: "{{ .Values.min_qa_crawler_image }}"
MAX_CRAWLER_MEMORY: "{{ .Values.max_crawler_memory }}"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: shared-crawler-config
namespace: {{ .Values.crawler_namespace }}
data:
CRAWL_ARGS: >-
--sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --collection thecrawl --screencastPort 9037 --logErrorsToRedis --writePagesToRedis --restartsOnError --headless --screenshot view,thumbnail {{ .Values.crawler_extra_args }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: shared-job-config
#namespace: {{ .Values.crawler_namespace }}
namespace: {{ .Release.Namespace }}
data:
config.yaml: |
namespace: {{ .Values.crawler_namespace }}
termination_grace_secs: "{{ .Values.grace_period_secs | default 600 }}"
volume_storage_class: "{{ .Values.volume_storage_class }}"
# redis
redis_image: {{ .Values.redis_image }}
redis_image_pull_policy: {{ .Values.redis_pull_policy }}
redis_cpu: "{{ .Values.redis_cpu }}"
redis_memory: "{{ .Values.redis_memory }}"
redis_storage: "{{ .Values.redis_storage }}"
# crawler
crawler_image_pull_policy: {{ .Values.crawler_pull_policy }}
crawler_cpu_base: "{{ .Values.crawler_cpu_base }}"
crawler_memory_base: "{{ .Values.crawler_memory_base }}"
crawler_extra_cpu_per_browser: "{{ .Values.crawler_extra_cpu_per_browser | default 0 }}"
crawler_extra_memory_per_browser: "{{ .Values.crawler_extra_memory_per_browser | default 0 }}"
crawler_browser_instances: "{{ .Values.crawler_browser_instances }}"
qa_browser_instances: "{{ .Values.qa_browser_instances }}"
crawler_cpu: "{{ .Values.crawler_cpu }}"
crawler_memory: "{{ .Values.crawler_memory }}"
crawler_storage: "{{ .Values.crawler_storage }}"
volume_storage_class: "{{ .Values.volume_storage_class }}"
profile_browser_cpu: "{{ .Values.profile_browser_cpu }}"
profile_browser_memory: "{{ .Values.profile_browser_memory }}"
crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}"
crawler_socks_proxy_host: "{{ .Values.crawler_socks_proxy_host }}"
crawler_socks_proxy_port: "{{ .Values.crawler_socks_proxy_port }}"
crawler_uid: "{{ .Values.crawler_uid | default 201400007 }}"
crawler_gid: "{{ .Values.crawler_gid | default 201400007 }}"
crawler_fsgroup: "{{ .Values.crawler_fsgroup | default 201400007 }}"
profile_browser_workdir_size: "{{ .Values.profile_browser_workdir_size | default "4Gi" }}"
crawler_node_type: "{{ .Values.crawler_node_type }}"
redis_node_type: "{{ .Values.redis_node_type }}"
signing_secret: {{ and .Values.signer.enabled (not (empty .Values.signer.auth_token)) | ternary "signing-secret" "" }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: shared-redis-conf
namespace: {{ .Values.crawler_namespace }}
data:
redis.conf: |
appendonly yes
dir /data
---
apiVersion: v1
kind: ConfigMap
metadata:
name: app-templates
namespace: {{ .Release.Namespace }}
data:
{{ (.Files.Glob "app-templates/*.yaml").AsConfig | indent 2 }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: email-templates
namespace: {{ .Release.Namespace }}
data:
{{- $email_templates := .Values.email.templates | default dict }}
{{- range tuple "failed_bg_job" "invite" "password_reset" "validate" }}
{{ . }}: |
{{ ((get $email_templates . ) | default ($.Files.Get (printf "%s/%s" "email-templates" . ))) | indent 4 }}
{{- end }}