browsertrix/chart/templates/configmap.yaml
Tessa Walsh a51f7c635e
Add behavior logs from Redis to database and add endpoint to serve (#2526)
Backend work for #2524

This PR adds a second dedicated endpoint similar to `/errors`, as a
combined log endpoint would give a false impression of being the
complete crawl logs (which is far from what we're serving in Browsertrix
at this point).

Eventually when we have support for streaming live crawl logs in
`crawls/<id>/logs` I'd ideally like to deprecate these two dedicated
endpoints in favor of using that, but for now this seems like the best
solution.

---------

Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
2025-04-08 02:16:10 +02:00

212 lines
6.9 KiB
YAML

---
apiVersion: v1
kind: ConfigMap
metadata:
name: backend-env-config
namespace: {{ .Release.Namespace }}
data:
APP_ORIGIN: {{ .Values.ingress.tls | ternary "https" "http" }}://{{ or .Values.ingress.host ( print "localhost:" ( .Values.local_service_port | default 9870 )) }}
CRAWLER_NAMESPACE: {{ .Values.crawler_namespace }}
DEFAULT_NAMESPACE: {{ .Release.Namespace }}
FRONTEND_ORIGIN: {{ .Values.frontend_alias | default "http://browsertrix-cloud-frontend" }}
CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}.svc.cluster.local"
DEFAULT_ORG: "{{ .Values.default_org }}"
INVITE_EXPIRE_SECONDS: "{{ .Values.invite_expire_seconds }}"
REGISTRATION_ENABLED: "{{ .Values.registration_enabled | default 0 }}"
REGISTER_TO_ORG_ID: "{{ .Values.registration_org_id }}"
ALLOW_DUPE_INVITES: "{{ .Values.allow_dupe_invites | default 0 }}"
JWT_TOKEN_LIFETIME_MINUTES: "{{ .Values.jwt_token_lifetime_minutes | default 60 }}"
DEFAULT_BEHAVIOR_TIME_SECONDS: "{{ .Values.default_behavior_time_seconds }}"
DEFAULT_PAGE_LOAD_TIME_SECONDS: "{{ .Values.default_page_load_time_seconds }}"
DEFAULT_CRAWL_FILENAME_TEMPLATE: "{{ .Values.default_crawl_filename_template }}"
DEFAULT_CRAWLER_IMAGE_PULL_POLICY: "{{ .Values.crawler_pull_policy }}"
MAX_PAGES_PER_CRAWL: "{{ .Values.max_pages_per_crawl | default 0 }}"
IDLE_TIMEOUT: "{{ .Values.profile_browser_idle_seconds | default 60 }}"
RERUN_FROM_MIGRATION: "{{ .Values.rerun_from_migration }}"
MIGRATION_JOBS_SCALE: "{{ .Values.migration_jobs_scale | default 1 }}"
PRESIGN_DURATION_MINUTES: "{{ .Values.storage_presign_duration_minutes }}"
FAST_RETRY_SECS: "{{ .Values.operator_fast_resync_secs | default 3 }}"
MAX_CRAWL_SCALE: "{{ .Values.max_crawl_scale | default 3 }}"
LOG_FAILED_CRAWL_LINES: "{{ .Values.log_failed_crawl_lines | default 0 }}"
IS_LOCAL_MINIO: "{{ .Values.minio_local }}"
STORAGES_JSON: "/ops-configs/storages.json"
CRAWLER_CHANNELS_JSON: "/ops-configs/crawler_channels.json"
CRAWLER_PROXIES_LAST_UPDATE: "/ops-proxy-configs/crawler_proxies_last_update"
CRAWLER_PROXIES_JSON: "/ops-proxy-configs/crawler_proxies.json"
DEFAULT_PROXY_ID: "{{ .Values.default_proxy }}"
MIN_QA_CRAWLER_IMAGE: "{{ .Values.min_qa_crawler_image }}"
MIN_AUTOCLICK_CRAWLER_IMAGE: "{{ .Values.min_autoclick_crawler_image }}"
NUM_BROWSERS: "{{ .Values.crawler_browser_instances }}"
MAX_CRAWLER_MEMORY: "{{ .Values.max_crawler_memory }}"
CRAWLER_MIN_AVAIL_STORAGE_RATIO: "{{ .Values.crawler_min_avail_storage_ratio }}"
ENABLE_AUTO_RESIZE_CRAWLERS: "{{ .Values.enable_auto_resize_crawlers }}"
BILLING_ENABLED: "{{ .Values.billing_enabled }}"
SIGN_UP_URL: "{{ .Values.sign_up_url }}"
SALES_EMAIL: "{{ .Values.sales_email }}"
USER_SURVEY_URL: "{{ .Values.user_survey_url }}"
LOG_SENT_EMAILS: "{{ .Values.email.log_sent_emails }}"
BACKEND_IMAGE: "{{ .Values.backend_image }}"
BACKEND_IMAGE_PULL_POLICY: "{{ .Values.backend_pull_policy }}"
LOCALES_ENABLED: "{{ .Values.locales_enabled }}"
REPLICA_DELETION_DELAY_DAYS: "{{ .Values.replica_deletion_delay_days | default 0 }}"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: shared-crawler-config
namespace: {{ .Values.crawler_namespace }}
data:
{{- define "btrix.crawler_args" }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --collection thecrawl --screencastPort 9037 --logErrorsToRedis --logBehaviorsToRedis --writePagesToRedis --restartsOnError --headless --screenshot view,thumbnail {{ .Values.crawler_extra_args }} {{- end }}
CRAWL_ARGS: {{- include "btrix.crawler_args" . }}
# disable behaviors for QA runs, otherwise use same args
QA_ARGS: {{- include "btrix.crawler_args" . }} --behaviors=""
---
apiVersion: v1
kind: ConfigMap
metadata:
name: shared-job-config
#namespace: {{ .Values.crawler_namespace }}
namespace: {{ .Release.Namespace }}
data:
config.yaml: |
namespace: {{ .Values.crawler_namespace }}
termination_grace_secs: "{{ .Values.grace_period_secs | default 600 }}"
volume_storage_class: "{{ .Values.volume_storage_class }}"
# redis
redis_image: {{ .Values.redis_image }}
redis_image_pull_policy: {{ .Values.redis_pull_policy }}
redis_cpu: "{{ .Values.redis_cpu }}"
redis_memory: "{{ .Values.redis_memory }}"
redis_storage: "{{ .Values.redis_storage }}"
# crawler
crawler_image_pull_policy: {{ .Values.crawler_pull_policy }}
crawler_cpu_base: "{{ .Values.crawler_cpu_base }}"
crawler_memory_base: "{{ .Values.crawler_memory_base }}"
crawler_extra_cpu_per_browser: "{{ .Values.crawler_extra_cpu_per_browser | default 0 }}"
crawler_extra_memory_per_browser: "{{ .Values.crawler_extra_memory_per_browser | default 0 }}"
crawler_browser_instances: "{{ .Values.crawler_browser_instances }}"
qa_browser_instances: "{{ .Values.qa_browser_instances }}"
crawler_cpu: "{{ .Values.crawler_cpu }}"
crawler_memory: "{{ .Values.crawler_memory }}"
crawler_storage: "{{ .Values.crawler_storage }}"
volume_storage_class: "{{ .Values.volume_storage_class }}"
profile_browser_cpu: "{{ .Values.profile_browser_cpu }}"
profile_browser_memory: "{{ .Values.profile_browser_memory }}"
crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}"
crawler_socks_proxy_host: "{{ .Values.crawler_socks_proxy_host }}"
crawler_socks_proxy_port: "{{ .Values.crawler_socks_proxy_port }}"
crawler_uid: "{{ .Values.crawler_uid | default 201407 }}"
crawler_gid: "{{ .Values.crawler_gid | default 201407 }}"
crawler_fsgroup: "{{ .Values.crawler_fsgroup | default 201407 }}"
profile_browser_workdir_size: "{{ .Values.profile_browser_workdir_size | default "4Gi" }}"
qa_scale: "{{ .Values.qa_scale | default 1 }}"
crawler_node_type: "{{ .Values.crawler_node_type }}"
redis_node_type: "{{ .Values.redis_node_type }}"
signing_secret: {{ and .Values.signer.enabled (not (empty .Values.signer.auth_token)) | ternary "signing-secret" "" }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: shared-redis-conf
namespace: {{ .Values.crawler_namespace }}
data:
redis.conf: |
appendonly yes
dir /data
---
apiVersion: v1
kind: ConfigMap
metadata:
name: app-templates
namespace: {{ .Release.Namespace }}
data:
{{ (.Files.Glob "app-templates/*.yaml").AsConfig | indent 2 }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: email-templates
namespace: {{ .Release.Namespace }}
data:
{{- $email_templates := .Values.email.templates | default dict }}
{{- range tuple "failed_bg_job" "invite" "password_reset" "validate" "sub_cancel" }}
{{ . }}: |
{{ ((get $email_templates . ) | default ($.Files.Get (printf "%s/%s" "email-templates" . ))) | indent 4 }}
{{- end }}