browsertrix/chart/templates/configmap.yaml
Ilya Kreymer ec74eb4242
operator: add 'max_crawler_memory' to limit autosizing of crawler pods (#1746)
Adds a `max_crawler_memory` chart setting, which, if set, will
defines the upper crawler memory limit that crawler pods can be resized up to.
If not set, auto resizing is disabled and pods are always set to 'crawler_memory' memory
2024-04-24 15:16:32 +02:00

167 lines
5.4 KiB
YAML

---
apiVersion: v1
kind: ConfigMap
metadata:
name: backend-env-config
namespace: {{ .Release.Namespace }}
data:
APP_ORIGIN: {{ .Values.ingress.tls | ternary "https" "http" }}://{{ .Values.ingress.host | default "localhost:9870" }}
CRAWLER_NAMESPACE: {{ .Values.crawler_namespace }}
DEFAULT_NAMESPACE: {{ .Release.Namespace }}
FRONTEND_ORIGIN: {{ .Values.frontend_alias | default "http://browsertrix-cloud-frontend" }}
CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}.svc.cluster.local"
DEFAULT_ORG: "{{ .Values.default_org }}"
INVITE_EXPIRE_SECONDS: "{{ .Values.invite_expire_seconds }}"
REGISTRATION_ENABLED: "{{ .Values.registration_enabled | default 0 }}"
REGISTER_TO_ORG_ID: "{{ .Values.registration_org_id }}"
ALLOW_DUPE_INVITES: "{{ .Values.allow_dupe_invites | default 0 }}"
JWT_TOKEN_LIFETIME_MINUTES: "{{ .Values.jwt_token_lifetime_minutes | default 60 }}"
DEFAULT_BEHAVIOR_TIME_SECONDS: "{{ .Values.default_behavior_time_seconds }}"
DEFAULT_PAGE_LOAD_TIME_SECONDS: "{{ .Values.default_page_load_time_seconds }}"
DEFAULT_CRAWL_FILENAME_TEMPLATE: "{{ .Values.default_crawl_filename_template }}"
MAX_PAGES_PER_CRAWL: "{{ .Values.max_pages_per_crawl | default 0 }}"
IDLE_TIMEOUT: "{{ .Values.profile_browser_idle_seconds | default 60 }}"
RERUN_FROM_MIGRATION: "{{ .Values.rerun_from_migration }}"
PRESIGN_DURATION_MINUTES: "{{ .Values.storage_presign_duration_minutes }}"
FAST_RETRY_SECS: "{{ .Values.operator_fast_resync_secs | default 3 }}"
MAX_CRAWL_SCALE: "{{ .Values.max_crawl_scale | default 3 }}"
LOG_FAILED_CRAWL_LINES: "{{ .Values.log_failed_crawl_lines | default 0 }}"
IS_LOCAL_MINIO: "{{ .Values.minio_local }}"
STORAGES_JSON: "/ops-configs/storages.json"
CRAWLER_CHANNELS_JSON: "/ops-configs/crawler_channels.json"
MIN_QA_CRAWLER_IMAGE: "{{ .Values.min_qa_crawler_image }}"
MAX_CRAWLER_MEMORY: "{{ .Values.max_crawler_memory }}"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: shared-crawler-config
namespace: {{ .Values.crawler_namespace }}
data:
CRAWL_ARGS: >-
--workers {{ .Values.crawler_browser_instances | default 1 }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --collection thecrawl --screencastPort 9037 --logErrorsToRedis --writePagesToRedis --restartsOnError --headless --screenshot view,thumbnail {{ .Values.crawler_extra_args }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: shared-job-config
#namespace: {{ .Values.crawler_namespace }}
namespace: {{ .Release.Namespace }}
data:
config.yaml: |
namespace: {{ .Values.crawler_namespace }}
termination_grace_secs: "{{ .Values.grace_period_secs | default 600 }}"
volume_storage_class: "{{ .Values.volume_storage_class }}"
# redis
redis_image: {{ .Values.redis_image }}
redis_image_pull_policy: {{ .Values.redis_pull_policy }}
redis_cpu: "{{ .Values.redis_cpu }}"
redis_memory: "{{ .Values.redis_memory }}"
redis_storage: "{{ .Values.redis_storage }}"
# crawler
crawler_image_pull_policy: {{ .Values.crawler_pull_policy }}
crawler_cpu_base: "{{ .Values.crawler_cpu_base }}"
crawler_memory_base: "{{ .Values.crawler_memory_base }}"
crawler_extra_cpu_per_browser: "{{ .Values.crawler_extra_cpu_per_browser | default 0 }}"
crawler_extra_memory_per_browser: "{{ .Values.crawler_extra_memory_per_browser | default 0 }}"
crawler_browser_instances: "{{ .Values.crawler_browser_instances }}"
crawler_cpu: "{{ .Values.crawler_cpu }}"
crawler_memory: "{{ .Values.crawler_memory }}"
crawler_storage: "{{ .Values.crawler_storage }}"
volume_storage_class: "{{ .Values.volume_storage_class }}"
crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}"
crawler_socks_proxy_host: "{{ .Values.crawler_socks_proxy_host }}"
crawler_socks_proxy_port: "{{ .Values.crawler_socks_proxy_port }}"
crawler_uid: "{{ .Values.crawler_uid | default 201400007 }}"
crawler_gid: "{{ .Values.crawler_gid | default 201400007 }}"
crawler_fsgroup: "{{ .Values.crawler_fsgroup | default 201400007 }}"
profile_browser_workdir_size: "{{ .Values.profile_browser_workdir_size | default "4Gi" }}"
crawler_node_type: "{{ .Values.crawler_node_type }}"
redis_node_type: "{{ .Values.redis_node_type }}"
signing_secret: {{ and .Values.signer.enabled (not (empty .Values.signer.auth_token)) | ternary "signing-secret" "" }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: shared-redis-conf
namespace: {{ .Values.crawler_namespace }}
data:
redis.conf: |
appendonly yes
dir /data
---
apiVersion: v1
kind: ConfigMap
metadata:
name: app-templates
namespace: {{ .Release.Namespace }}
data:
{{ (.Files.Glob "app-templates/*.yaml").AsConfig | indent 2 }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: email-templates
namespace: {{ .Release.Namespace }}
data:
{{- $email_templates := .Values.email.templates | default dict }}
{{- range tuple "failed_bg_job" "invite" "password_reset" "validate" }}
{{ . }}: |
{{ ((get $email_templates . ) | default ($.Files.Get (printf "%s/%s" "email-templates" . ))) | indent 4 }}
{{- end }}