browsertrix/chart/values.yaml
Ilya Kreymer 12f7db3ae2
tests: fixes for crawl cancel + crawl stopped (#864)
* tests:
- fix cancel crawl test by ensuring state is not running or waiting
- fix stop crawl test by ensuring stop is only initiated after at least one page has been crawled,
otherwise result may be failed, as no crawl data has been crawled yet (separate fix in crawler to avoid loop if stopped
before any data written webrecorder/browsertrix-crawler#314)
- bump page limit to 4 for tests to ensure crawl is partially complete, not fully complete when stopping
- allow canceled or partial_complete due to race condition

* chart: bump frontend limits in default, not just for tests (addresses #780)

* crawl stop before starting:
- if crawl stopped before it started, mark as canceled
- add test for stopping immediately, which should result in 'canceled' crawl
- attempt to increase resync interval for immediate failure
- nightly tests: increase page limit to test timeout

* backend:
- detect stopped-before-start crawl as 'failed' instead of 'done'
- stats: return stats counters as int instead of string
2023-05-22 20:17:29 -07:00

281 lines
7.0 KiB
YAML

# Settings
# =========================================
name: browsertrix-cloud
# when running in the cloud, set this value to cloud-specific block storage
# keep empty to use hostPath (eg. on minikube)
volume_storage_class:
# if set, set the node selector 'nodeType' for deployment pods
# main_node_type:
# if set, set the node selector 'nodeType' to this crawling pods
# crawler_node_type:
registration_enabled: "0"
jwt_token_lifetime_minutes: 1440
# default time to run behaviors on each page (in seconds)
default_behavior_time_seconds: 300
# default time to wait for page to fully load before running behaviors (in seconds)
default_page_load_time_seconds: 120
# disk utilization threshold percentage - when used disk space passes
# this percentage of total, crawls will gracefully stop to prevent the
# disk from being filled
disk_utilization_threshold: 90
# max pages per crawl
# set to non-zero value to enforce global max pages per crawl limit
# if set, each workflow can have a lower limit, but not higher
max_pages_per_crawl: 0
# if set to "1", allow inviting same user to same org multiple times
allow_dupe_invites: "0"
# number of seconds before pending invites expire - default is 7 days
invite_expire_seconds: 604800
# base url for replayweb.page
rwp_base_url: "https://replayweb.page/"
# default template for generate wacz files
# supports following interpolated vars:
# @ts - current timestamp
# @hostname - full hostname
# @hostsuffix - last 14-characters of hostname
# @id - full crawl id
default_crawl_filename_template: "@ts-@hostsuffix.wacz"
superuser:
# set this to enable a superuser admin
email: admin@example.com
# optional: if not set, automatically generated
# change or remove this
password: PASSW0RD!
# Set name for default organization created with superuser
default_org: "My Organization"
# API Image
# =========================================
backend_image: "docker.io/webrecorder/browsertrix-backend:latest"
backend_pull_policy: "Always"
backend_password_secret: "c9085f33ecce4347aa1d69339e16c499"
# number of backend pods
backend_num_replicas: 1
# number of workers per pod
backend_workers: 2
backend_requests_cpu: "10m"
backend_limits_cpu: "768m"
backend_requests_memory: "100Mi"
backend_limits_memory: "512Mi"
# port for operator service
opPort: 8756
job_cpu: "3m"
job_memory: "70Mi"
profile_browser_idle_seconds: 60
# Nginx Image
# =========================================
frontend_image: "docker.io/webrecorder/browsertrix-frontend:latest"
frontend_pull_policy: "Always"
frontend_requests_cpu: "3m"
frontend_limits_cpu: "30m"
frontend_requests_memory: "12Mi"
frontend_limits_memory: "40Mi"
# if set, maps nginx to a fixed port on host machine
# must be between 30000 - 32767
# use for deployments on localhost when not using ingress
# local_service_port: 30870
# MongoDB Image
# =========================================
mongo_local: true
mongo_host: "local-mongo.default"
mongo_image: "docker.io/library/mongo:6.0.5"
mongo_pull_policy: "IfNotPresent"
mongo_requests_cpu: "12m"
mongo_limits_cpu: "128m"
mongo_requests_memory: "96Mi"
mongo_limits_memory: "512Mi"
mongo_auth:
# specify either username + password (for local mongo)
username: root
password: PASSWORD!
# or full URL (for remote mongo server)
# db_url: mongodb+srv://...
# Redis Image
# =========================================
redis_local: true
redis_image: "redis"
redis_pull_policy: "IfNotPresent"
redis_url: "redis://local-redis.default:6379/1"
redis_requests_cpu: "3m"
redis_limits_cpu: "48m"
redis_requests_memory: "10Mi"
redis_limits_memory: "64Mi"
# Crawler Image
# =========================================
crawler_image: "webrecorder/browsertrix-crawler:latest"
crawler_pull_policy: "IfNotPresent"
crawler_namespace: "crawlers"
# optional: enable to use a persist volume claim for all crawls
# can be enabled to use a multi-write shared filesystem
# crawler_pv_claim: "nfs-shared-crawls"
# num retries
crawl_retries: 1000
# browsertrix-crawler args:
crawler_args: "--logging stats,behaviors,debug --generateWACZ --text --collection thecrawl --screencastPort 9037 --logErrorsToRedis --diskUtilization {{ .Values.disk_utilization_threshold | default 90 }} --waitOnDone"
crawler_browser_instances: 2
crawler_requests_cpu: "800m"
crawler_limits_cpu: "1200m"
crawler_requests_memory: "512Mi"
crawler_limits_memory: "1024Mi"
# minimum size allocated to each crawler
# should be at least double crawl session size to ensure space for WACZ
crawler_requests_storage: "22Gi"
# max size at which crawler will commit current crawl session
crawler_session_size_limit_bytes: "10000000000"
# max time in seconds after which crawler will restart, if set
crawler_session_time_limit_seconds: 18000
crawler_liveness_port: 6065
# time to wait for graceful stop
grace_period: 1000
# Local Minio Pod (optional)
# =========================================
# set to true to use a local minio image
minio_local: true
minio_scheme: "http"
minio_host: "local-minio.default:9000"
minio_image: docker.io/minio/minio:RELEASE.2022-10-24T18-35-07Z
minio_mc_image: minio/mc
minio_pull_policy: "IfNotPresent"
minio_local_bucket_name: &local_bucket_name "btrix-data"
# Storage
# =========================================
# should include the local minio bucket, if enabled, and any other available buckets for default storage
storages:
- name: "default"
access_key: "ADMIN"
secret_key: "PASSW0RD"
bucket_name: *local_bucket_name
endpoint_url: "http://local-minio.default:9000/"
# optional: if above includes a separate storage for profiles, specify here to store profiles separately from wacz files
# may be useful if, for example, the wacz files are public, while profiles should not be
# shared_storage_profile:
# Email Options
# =========================================
email:
# email sending is enabled when 'smtp_host' is set to non-empty value
#ex: smtp_host: smtp.gmail.com
smtp_host: ""
smtp_port: 587
sender_email: example@example.com
password: password
reply_to_email: example@example.com
# Deployment options
# =========================================
# Ingress (Optional)
# Optional: if 'host' is set, a publicly accessible Ingress controller is created with an SSL cert (using letsencrypt)
ingress:
#host: ""
cert_email: "test@example.com"
scheme: "http"
tls: false
ingress_class: nginx
# Signing Options
# =========================================
# optionally enable signer
signer:
enabled: false
image: webrecorder/authsign:0.5.0
# host: <set to signer domain>
# cert_email: "test@example.com
# image_pull_policy: "IfNotPresent"
# auth_token: <set to custom value>
signer_requests_cpu: "3m"
signer_limits_cpu: "32m"
signer_requests_memory: "36Mi"
signer_limits_memory: "96Mi"
# Optional: configure load balancing annotations
# service:
# annotations:
# service.beta.kubernetes.io/aws-load-balancer-internal: "true"
# helm.sh/resource-policy: keep
# Admin services (see Chart.yaml's dependencies)
# note: see `chart/examples/local-logging.yaml`
addons:
admin:
logging: false
# metacontroller: