* profile browser vnc support + fixes: - switch profile browser rendering to use VNC - frontend: add @novnc/novnc as dependency, create separate bundle novnc.js to load into vnc browser (to avoid loading from each container) - frontend: update proxy paths to proxy websocket, index page to crawler - frontend: allow browser profiles in all browsers, remove browser compatibility check - frontend: update webpack dev config, apply prettier - frontend: node version fix - backend: get vncpassword, build new URL for proxying to crawler iframe - backend: fix profile / crawl job pull policy from 'Always' -> 'Never', should use existing image for job - backend: fix kill signal to use bash -c to work with latest backend image - backend/chart: add 'profile_browser_timeout_seconds' to chart values to control how long profile browser to remain when idle (default to 60) - backend: remove utils.py, now using secret.token_hex() for random suffix Co-authored-by: sua yoo <sua@suayoo.com>
230 lines
5.3 KiB
YAML
230 lines
5.3 KiB
YAML
# Settings
|
|
# =========================================
|
|
name: browsertrix-cloud
|
|
|
|
# when running in the cloud, set this value to cloud-specific block storage
|
|
# keep empty to use hostPath (eg. on minikube)
|
|
volume_storage_class:
|
|
|
|
# if set, set the node selector 'nodeType' for deployment pods
|
|
# main_node_type:
|
|
|
|
# if set, set the node selector 'nodeType' to this crawling pods
|
|
# crawler_node_type:
|
|
|
|
registration_enabled: "0"
|
|
jwt_token_lifetime_minutes: 1440
|
|
|
|
# number of workers for backend api
|
|
backend_workers: 4
|
|
|
|
# base url for replayweb.page
|
|
rwp_base_url: "https://replayweb.page/"
|
|
|
|
superuser:
|
|
# set this to enable a superuser admin
|
|
email: admin@example.com
|
|
|
|
# optional: if not set, automatically generated
|
|
# change or remove this
|
|
password: PASSW0RD!
|
|
|
|
|
|
# API Image
|
|
# =========================================
|
|
api_image: "docker.io/webrecorder/browsertrix-backend:latest"
|
|
api_pull_policy: "Always"
|
|
|
|
api_password_secret: "c9085f33ecce4347aa1d69339e16c499"
|
|
|
|
api_num_replicas: 1
|
|
|
|
api_requests_cpu: "10m"
|
|
api_limits_cpu: "768m"
|
|
|
|
api_requests_memory: "100Mi"
|
|
api_limits_memory: "512Mi"
|
|
|
|
job_cpu: "3m"
|
|
job_memory: "70Mi"
|
|
|
|
profile_browser_idle_seconds: 60
|
|
|
|
|
|
# Nginx Image
|
|
# =========================================
|
|
nginx_image: "docker.io/webrecorder/browsertrix-frontend:latest"
|
|
nginx_pull_policy: "Always"
|
|
|
|
nginx_requests_cpu: "3m"
|
|
nginx_limits_cpu: "10m"
|
|
|
|
nginx_requests_memory: "12Mi"
|
|
nginx_limits_memory: "20Mi"
|
|
|
|
# if set, maps nginx to a fixed port on host machine
|
|
# must be between 30000 - 32767
|
|
# use for deployments on localhost when not using ingress
|
|
# local_service_port: 30870
|
|
|
|
|
|
# MongoDB Image
|
|
# =========================================
|
|
mongo_local: true
|
|
|
|
mongo_host: "local-mongo.default"
|
|
|
|
mongo_image: "docker.io/library/mongo:5.0.11"
|
|
mongo_pull_policy: "IfNotPresent"
|
|
|
|
mongo_requests_cpu: "12m"
|
|
mongo_limits_cpu: "128m"
|
|
|
|
mongo_requests_memory: "96Mi"
|
|
mongo_limits_memory: "512Mi"
|
|
|
|
|
|
mongo_auth:
|
|
# specify either username + password (for local mongo)
|
|
username: root
|
|
password: PASSWORD!
|
|
|
|
# or full URL (for remote mongo server)
|
|
# db_url: mongodb+srv://...
|
|
|
|
|
|
# Redis Image
|
|
# =========================================
|
|
redis_local: true
|
|
|
|
redis_image: "redis"
|
|
redis_pull_policy: "IfNotPresent"
|
|
|
|
redis_url: "redis://local-redis.default:6379/1"
|
|
|
|
redis_requests_cpu: "3m"
|
|
redis_limits_cpu: "48m"
|
|
|
|
redis_requests_memory: "10Mi"
|
|
redis_limits_memory: "64Mi"
|
|
|
|
|
|
|
|
# Crawler Image
|
|
# =========================================
|
|
|
|
crawler_image: "webrecorder/browsertrix-crawler:latest"
|
|
crawler_pull_policy: "IfNotPresent"
|
|
|
|
crawler_namespace: "crawlers"
|
|
|
|
# optional: enable to use a persist volume claim for all crawls
|
|
# can be enabled to use a multi-write shared filesystem
|
|
# crawler_pv_claim: "nfs-shared-crawls"
|
|
|
|
# num retries
|
|
crawl_retries: 1000
|
|
|
|
# browsertrix-crawler args:
|
|
crawler_args: "--timeout 120 --logging stats,behaviors,debug --generateWACZ --text --workers 4 --collection thecrawl --screencastPort 9037 --sizeLimit 100000000000 --timeLimit 18000 --healthCheckPort 6065 --waitOnDone --behaviorTimeout 300"
|
|
|
|
crawler_requests_cpu: "800m"
|
|
crawler_limits_cpu: "1200m"
|
|
|
|
crawler_requests_memory: "512Mi"
|
|
crawler_limits_memory: "768Mi"
|
|
|
|
crawler_requests_storage: "220Gi"
|
|
|
|
crawler_liveness_port: 6065
|
|
|
|
# time to wait for graceful stop
|
|
grace_period: 1000
|
|
|
|
|
|
|
|
# debug
|
|
no_delete_jobs: 0
|
|
|
|
|
|
# Local Minio Pod (optional)
|
|
# =========================================
|
|
# set to true to use a local minio image
|
|
minio_local: true
|
|
|
|
minio_scheme: "http"
|
|
minio_host: "local-minio.default:9000"
|
|
|
|
minio_image: docker.io/minio/minio:RELEASE.2022-10-24T18-35-07Z
|
|
minio_mc_image: minio/mc
|
|
minio_pull_policy: "IfNotPresent"
|
|
|
|
minio_local_bucket_name: &local_bucket_name "btrix-data"
|
|
|
|
|
|
# Storage
|
|
# =========================================
|
|
# should include the local minio bucket, if enabled, and any other available buckets for default storage
|
|
|
|
storages:
|
|
- name: "default"
|
|
access_key: "ADMIN"
|
|
secret_key: "PASSW0RD"
|
|
bucket_name: *local_bucket_name
|
|
|
|
endpoint_url: "http://local-minio.default:9000/"
|
|
|
|
# optional: if above includes a separate storage for profiles, specify here to store profiles separately from wacz files
|
|
# may be useful if, for example, the wacz files are public, while profiles should not be
|
|
# shared_storage_profile:
|
|
|
|
|
|
# Email Options
|
|
# =========================================
|
|
email:
|
|
# email sending is enabled when 'smtp_host' is set to non-empty value
|
|
#ex: smtp_host: smtp.gmail.com
|
|
smtp_host: ""
|
|
smtp_port: 587
|
|
sender_email: example@example.com
|
|
password: password
|
|
|
|
|
|
# Deployment options
|
|
# =========================================
|
|
|
|
# Ingress (Optional)
|
|
# Optional: if 'host' is set, a publicly accessible Ingress controller is created with an SSL cert (using letsencrypt)
|
|
ingress:
|
|
#host: ""
|
|
cert_email: "test@example.com"
|
|
scheme: "http"
|
|
tls: false
|
|
|
|
ingress_class: nginx
|
|
|
|
|
|
# Signing Options
|
|
# =========================================
|
|
# optionally enable signer
|
|
signer:
|
|
enabled: false
|
|
image: webrecorder/authsign:0.5.0
|
|
# host: <set to signer domain>
|
|
# cert_email: "test@example.com
|
|
# image_pull_policy: "IfNotPresent"
|
|
# auth_token: <set to custom value>
|
|
|
|
signer_requests_cpu: "3m"
|
|
signer_limits_cpu: "32m"
|
|
|
|
signer_requests_memory: "36Mi"
|
|
signer_limits_memory: "96Mi"
|
|
|
|
|
|
# Optional: configure load balancing annotations
|
|
# service:
|
|
# annotations:
|
|
# service.beta.kubernetes.io/aws-load-balancer-internal: "true"
|
|
# helm.sh/resource-policy: keep
|