browsertrix/chart/values.yaml

# Settings
# =========================================
name: browsertrix-cloud

# when running in the cloud, set this value to cloud-specific block storage
# keep empty to use hostPath (eg. on minikube)
volume_storage_class:

# if set, set the node selector 'nodeType' for deployment pods
# main_node_type:

# if set, set the node selector 'nodeType' to this crawling pods
# crawler_node_type:

registration_enabled: "0"
jwt_token_lifetime_minutes: 1440

# default time to run behaviors on each page (in seconds)
default_behavior_time_seconds: 300

# default time to wait for page to fully load before running behaviors (in seconds)
default_page_load_time_seconds: 120

# disk utilization threshold percentage - when used disk space passes
# this percentage of total, crawls will gracefully stop to prevent the
# disk from being filled
disk_utilization_threshold: 90

# max pages per crawl
# set to non-zero value to enforce global max pages per crawl limit
# if set, each workflow can have a lower limit, but not higher
max_pages_per_crawl: 0

# if set to "1", allow inviting same user to same org multiple times
allow_dupe_invites: "0"

# number of seconds before pending invites expire - default is 7 days
invite_expire_seconds: 604800

# base url for replayweb.page
rwp_base_url: "https://replayweb.page/"

# default template for generate wacz files
# supports following interpolated vars:
# @ts - current timestamp
# @hostname - full hostname
# @hostsuffix - last 14-characters of hostname
# @id - full crawl id
default_crawl_filename_template: "@ts-@hostsuffix.wacz"

superuser:
  # set this to enable a superuser admin
  email: admin@example.com

  # optional: if not set, automatically generated
  # change or remove this
  password: PASSW0RD!

# Set name for default organization created with superuser
default_org: "My Organization"


# API Image
# =========================================
backend_image: "docker.io/webrecorder/browsertrix-backend:latest"
backend_pull_policy: "Always"

backend_password_secret: "c9085f33ecce4347aa1d69339e16c499"

# number of backend pods
backend_num_replicas: 1

# number of workers per pod
backend_workers: 2

backend_requests_cpu: "10m"
backend_limits_cpu: "768m"

backend_requests_memory: "100Mi"
backend_limits_memory: "512Mi"

# port for operator service
opPort: 8756

job_cpu: "3m"
job_memory: "70Mi"

profile_browser_idle_seconds: 60


# Nginx Image
# =========================================
frontend_image: "docker.io/webrecorder/browsertrix-frontend:latest"
frontend_pull_policy: "Always"

frontend_requests_cpu: "3m"
frontend_limits_cpu: "30m"

frontend_requests_memory: "12Mi"
frontend_limits_memory: "40Mi"

# if set, maps nginx to a fixed port on host machine
# must be between 30000 - 32767
# use for deployments on localhost when not using ingress
# local_service_port: 30870


# MongoDB Image
# =========================================
mongo_local: true

mongo_host: "local-mongo.default"

mongo_image: "docker.io/library/mongo:6.0.5"
mongo_pull_policy: "IfNotPresent"

mongo_requests_cpu: "12m"
mongo_limits_cpu: "128m"

mongo_requests_memory: "96Mi"
mongo_limits_memory: "512Mi"


mongo_auth:
  # specify either username + password (for local mongo)
  username: root
  password: PASSWORD!

  # or full URL (for remote mongo server)
  # db_url: mongodb+srv://...


# Redis Image
# =========================================
redis_local: true

redis_image: "redis"
redis_pull_policy: "IfNotPresent"

redis_url: "redis://local-redis.default:6379/1"

redis_requests_cpu: "3m"
redis_limits_cpu: "48m"

redis_requests_memory: "10Mi"
redis_limits_memory: "64Mi"


# Crawler Image
# =========================================

crawler_image: "webrecorder/browsertrix-crawler:latest"
crawler_pull_policy: "IfNotPresent"

crawler_namespace: "crawlers"

# optional: enable to use a persist volume claim for all crawls
# can be enabled to use a multi-write shared filesystem
# crawler_pv_claim: "nfs-shared-crawls"

# num retries
crawl_retries: 1000

# browsertrix-crawler args:
crawler_args: "--logging stats,behaviors,debug --generateWACZ --text --collection thecrawl --screencastPort 9037 --logErrorsToRedis --diskUtilization {{ .Values.disk_utilization_threshold | default 90 }} --waitOnDone"

crawler_browser_instances: 2

crawler_requests_cpu: "800m"
crawler_limits_cpu: "1200m"

crawler_requests_memory: "512Mi"
crawler_limits_memory: "1024Mi"

# minimum size allocated to each crawler
# should be at least double crawl session size to ensure space for WACZ
crawler_requests_storage: "22Gi"

# max size at which crawler will commit current crawl session
crawler_session_size_limit_bytes: "10000000000"

# max time in seconds after which crawler will restart, if set
crawler_session_time_limit_seconds: 18000

crawler_liveness_port: 6065

# time to wait for graceful stop
grace_period: 1000


# Local Minio Pod (optional)
# =========================================
# set to true to use a local minio image
minio_local: true

minio_scheme: "http"
minio_host: "local-minio.default:9000"

minio_image: docker.io/minio/minio:RELEASE.2022-10-24T18-35-07Z
minio_mc_image: minio/mc
minio_pull_policy: "IfNotPresent"

minio_local_bucket_name: &local_bucket_name "btrix-data"


# Storage
# =========================================
# should include the local minio bucket, if enabled, and any other available buckets for default storage

storages:
  - name: "default"
    access_key: "ADMIN"
    secret_key: "PASSW0RD"
    bucket_name: *local_bucket_name

    endpoint_url: "http://local-minio.default:9000/"

# optional: if above includes a separate storage for profiles, specify here to store profiles separately from wacz files
# may be useful if, for example, the wacz files are public, while profiles should not be
# shared_storage_profile:


# Email Options
# =========================================
email:
  # email sending is enabled when 'smtp_host' is set to non-empty value
  #ex: smtp_host: smtp.gmail.com
  smtp_host: ""
  smtp_port: 587
  sender_email: example@example.com
  password: password
  reply_to_email: example@example.com


# Deployment options
# =========================================

# Ingress (Optional)
# Optional: if 'host' is set, a publicly accessible Ingress controller is created with an SSL cert (using letsencrypt)
ingress:
  #host: ""
  cert_email: "test@example.com"
  scheme: "http"
  tls: false

ingress_class: nginx


# Signing Options
# =========================================
# optionally enable signer
signer:
  enabled: false
  image: webrecorder/authsign:0.5.0
  # host: <set to signer domain>
  # cert_email: "test@example.com
  # image_pull_policy: "IfNotPresent"
  # auth_token: <set to custom value>

signer_requests_cpu: "3m"
signer_limits_cpu: "32m"

signer_requests_memory: "36Mi"
signer_limits_memory: "96Mi"


# Optional: configure load balancing annotations
# service:
#   annotations:
#     service.beta.kubernetes.io/aws-load-balancer-internal: "true"
#     helm.sh/resource-policy: keep

# Admin services (see Chart.yaml's dependencies)
# note: see `chart/examples/local-logging.yaml`
addons:
  admin:
    logging: false

# metacontroller: