* k8s: add tolerations for 'nodeType=crawling:NoSchedule' to allow scheduling crawling on designated nodes for crawler and profiles jobs and statefulsets * add affinity for 'nodeType=crawling' on crawling and profile browser statefulsets * refactor crawljob: combine crawl_updater logic into base crawl_job * increment new 'crawlAttemptCount' counter crawlconfig when crawl is started, not necessarily finished, to avoid deleting configs that had attempted but not finished crawls. * better external mongodb support: use MONGO_DB_URL to set custom url directly, otherwise build from username, password and mongo host
		
			
				
	
	
		
			209 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			209 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| # Settings
 | |
| # =========================================
 | |
| name: browsertrix-cloud
 | |
| 
 | |
| # when running in the cloud, set this value to cloud-specific block storage
 | |
| # keep empty to use hostPath (eg. on minikube)
 | |
| volume_storage_class:
 | |
| 
 | |
| # if set, set the node selector 'nodeType' for deployment pods
 | |
| # main_node_type:
 | |
| 
 | |
| # if set, set the node selector 'nodeType' to this crawling pods
 | |
| # crawler_node_type:
 | |
| 
 | |
| registration_enabled: 1
 | |
| jwt_token_lifetime_minutes: 60
 | |
| 
 | |
| # number of workers for backend api
 | |
| backend_workers: 4
 | |
| 
 | |
| # base url for replayweb.page
 | |
| rwp_base_url: "https://replayweb.page/"
 | |
| 
 | |
| superuser:
 | |
|   # set this to enable a superuser admin
 | |
|   email: admin@example.com
 | |
| 
 | |
|   # optional: if not set, automatically generated
 | |
|   password:
 | |
| 
 | |
| 
 | |
| # API Image
 | |
| # =========================================
 | |
| api_image: "webrecorder/browsertrix-api"
 | |
| api_pull_policy: "Never"
 | |
| 
 | |
| api_password_secret: "c9085f33ecce4347aa1d69339e16c499"
 | |
| 
 | |
| api_num_replicas: 1
 | |
| 
 | |
| api_requests_cpu: "10m"
 | |
| api_limits_cpu: "128m"
 | |
| 
 | |
| api_requests_memory: "100Mi"
 | |
| api_limits_memory: "192Mi"
 | |
| 
 | |
| 
 | |
| # Nginx Image
 | |
| # =========================================
 | |
| nginx_image: "nginx"
 | |
| nginx_pull_policy: "IfNotPresent"
 | |
| 
 | |
| nginx_num_replicas: 1
 | |
| 
 | |
| nginx_requests_cpu: "3m"
 | |
| nginx_limits_cpu: "10m"
 | |
| 
 | |
| nginx_requests_memory: "4Mi"
 | |
| nginx_limits_memory: "12Mi"
 | |
| 
 | |
| 
 | |
| # MongoDB Image
 | |
| # =========================================
 | |
| mongo_local: true
 | |
| 
 | |
| mongo_host: "local-mongo.default"
 | |
| 
 | |
| mongo_image: "mongo"
 | |
| mongo_pull_policy: "IfNotPresent"
 | |
| 
 | |
| mongo_requests_cpu: "12m"
 | |
| mongo_limits_cpu: "128m"
 | |
| 
 | |
| mongo_requests_memory: "96Mi"
 | |
| mongo_limits_memory: "512Mi"
 | |
| 
 | |
| 
 | |
| mongo_auth:
 | |
|   username: root
 | |
|   password: example 
 | |
| 
 | |
| 
 | |
| # Redis Image
 | |
| # =========================================
 | |
| redis_local: true
 | |
| 
 | |
| redis_image: "redis"
 | |
| redis_pull_policy: "IfNotPresent"
 | |
| 
 | |
| redis_url: "redis://local-redis.default:6379/1"
 | |
| 
 | |
| redis_requests_cpu: "3m"
 | |
| redis_limits_cpu: "48m"
 | |
| 
 | |
| redis_requests_memory: "10Mi"
 | |
| redis_limits_memory: "64Mi"
 | |
| 
 | |
| 
 | |
| 
 | |
| # Crawler Image
 | |
| # =========================================
 | |
| 
 | |
| crawler_image: "webrecorder/browsertrix-crawler:latest"
 | |
| crawler_pull_policy: "IfNotPresent"
 | |
| 
 | |
| crawler_namespace: "crawlers"
 | |
| 
 | |
| # optional: enable to use a persist volume claim for all crawls
 | |
| # can be enabled to use a multi-write shared filesystem
 | |
| # crawler_pv_claim: "nfs-shared-crawls"
 | |
| 
 | |
| # num retries
 | |
| crawl_retries: 1000
 | |
| 
 | |
| # browsertrix-crawler args:
 | |
| crawler_args: "--timeout 90 --logging stats,behaviors,debug --generateWACZ --text --workers 4 --collection thecrawl --screencastPort 9037 --sizeLimit 100000000000 --timeLimit 18000 --healthCheckPort 6065 --waitOnDone"
 | |
| 
 | |
| crawler_requests_cpu: "800m"
 | |
| crawler_limits_cpu: "1200m"
 | |
| 
 | |
| crawler_requests_memory: "512Mi"
 | |
| crawler_limits_memory: "768Mi"
 | |
| 
 | |
| crawler_requests_hd: "220Gi"
 | |
| 
 | |
| # debug
 | |
| no_delete_jobs: 1
 | |
| 
 | |
| 
 | |
| # Local Minio Pod (optional)
 | |
| # =========================================
 | |
| # set to true to use a local minio image
 | |
| minio_local: true
 | |
| 
 | |
| minio_scheme: "http"
 | |
| minio_host: "local-minio.default:9000"
 | |
| 
 | |
| minio_image: minio/minio
 | |
| minio_mc_image: minio/mc
 | |
| minio_pull_policy: "IfNotPresent"
 | |
| 
 | |
| minio_local_bucket_name: &local_bucket_name "test-bucket"
 | |
| 
 | |
| 
 | |
| # Storage
 | |
| # =========================================
 | |
| # should include the local minio bucket, if enabled, and any other available buckets for default storage
 | |
| 
 | |
| storages:
 | |
|   - name: "default"
 | |
|     access_key: "ADMIN"
 | |
|     secret_key: "PASSW0RD"
 | |
|     bucket_name: *local_bucket_name
 | |
| 
 | |
|     endpoint_url: "http://local-minio.default:9000/"
 | |
| 
 | |
| # optional: if above includes a separate storage for profiles, specify here to store profiles separately from wacz files
 | |
| # may be useful if, for example, the wacz files are public, while profiles should not be
 | |
| # shared_storage_profile:
 | |
| 
 | |
| 
 | |
| # Email Options
 | |
| # =========================================
 | |
| email:
 | |
|   # email sending is enabled when 'smtp_host' is set to non-empty value
 | |
|   #ex: smtp_host: smtp.gmail.com
 | |
|   smtp_host: ""
 | |
|   smtp_port: 587
 | |
|   sender_email: example@example.com
 | |
|   password: password
 | |
| 
 | |
| 
 | |
| # Deployment options
 | |
| # =========================================
 | |
| 
 | |
| # Ingress (Optional)
 | |
| # Optional: if 'host' is set, a publicly accessible Ingress controller is created with an SSL cert (using letsencrypt)
 | |
| ingress:
 | |
|   host: "btrix.cloud"
 | |
|   cert_email: "test@example.com"
 | |
|   scheme: "http"
 | |
|   tls: false
 | |
| 
 | |
| 
 | |
| # Signing Options
 | |
| # =========================================
 | |
| # optionally enable signer
 | |
| signer:
 | |
|   enabled: false
 | |
|   # host: <set to signer domain>
 | |
|   # cert_email: "test@example.com
 | |
|   # image: webrecorder/authsign:0.4.0
 | |
|   # image_pull_policy: "IfNotPresent"
 | |
|   # auth_token: <set to custom value>
 | |
| 
 | |
| signer_requests_cpu: "3m"
 | |
| signer_limits_cpu: "32m"
 | |
| 
 | |
| signer_requests_memory: "36Mi"
 | |
| signer_limits_memory: "96Mi"
 | |
| 
 | |
| 
 | |
| # Optional: configure load balancing
 | |
| service:
 | |
|   type: ClusterIP
 | |
| #   annotations:
 | |
| #     service.beta.kubernetes.io/aws-load-balancer-internal: "true"
 | |
| #     helm.sh/resource-policy: keep
 |