Backend work for #2524 This PR adds a second dedicated endpoint similar to `/errors`, as a combined log endpoint would give a false impression of being the complete crawl logs (which is far from what we're serving in Browsertrix at this point). Eventually when we have support for streaming live crawl logs in `crawls/<id>/logs` I'd ideally like to deprecate these two dedicated endpoints in favor of using that, but for now this seems like the best solution. --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
		
			
				
	
	
		
			212 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			212 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| ---
 | |
| apiVersion: v1
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   name: backend-env-config
 | |
|   namespace: {{ .Release.Namespace }}
 | |
| 
 | |
| data:
 | |
|   APP_ORIGIN: {{ .Values.ingress.tls | ternary "https" "http" }}://{{ or .Values.ingress.host ( print "localhost:" ( .Values.local_service_port | default 9870 )) }}
 | |
| 
 | |
|   CRAWLER_NAMESPACE: {{ .Values.crawler_namespace }}
 | |
| 
 | |
|   DEFAULT_NAMESPACE: {{ .Release.Namespace }}
 | |
| 
 | |
|   FRONTEND_ORIGIN: {{ .Values.frontend_alias | default "http://browsertrix-cloud-frontend" }}
 | |
| 
 | |
|   CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}.svc.cluster.local"
 | |
| 
 | |
|   DEFAULT_ORG: "{{ .Values.default_org }}"
 | |
| 
 | |
|   INVITE_EXPIRE_SECONDS: "{{ .Values.invite_expire_seconds }}"
 | |
| 
 | |
|   REGISTRATION_ENABLED: "{{ .Values.registration_enabled | default 0 }}"
 | |
| 
 | |
|   REGISTER_TO_ORG_ID: "{{ .Values.registration_org_id }}"
 | |
| 
 | |
|   ALLOW_DUPE_INVITES: "{{ .Values.allow_dupe_invites | default 0 }}"
 | |
| 
 | |
|   JWT_TOKEN_LIFETIME_MINUTES: "{{ .Values.jwt_token_lifetime_minutes | default 60 }}"
 | |
| 
 | |
|   DEFAULT_BEHAVIOR_TIME_SECONDS: "{{ .Values.default_behavior_time_seconds }}"
 | |
| 
 | |
|   DEFAULT_PAGE_LOAD_TIME_SECONDS: "{{ .Values.default_page_load_time_seconds }}"
 | |
| 
 | |
|   DEFAULT_CRAWL_FILENAME_TEMPLATE: "{{ .Values.default_crawl_filename_template }}"
 | |
| 
 | |
|   DEFAULT_CRAWLER_IMAGE_PULL_POLICY: "{{ .Values.crawler_pull_policy }}"
 | |
| 
 | |
|   MAX_PAGES_PER_CRAWL: "{{ .Values.max_pages_per_crawl | default 0 }}"
 | |
| 
 | |
|   IDLE_TIMEOUT: "{{ .Values.profile_browser_idle_seconds | default 60 }}"
 | |
| 
 | |
|   RERUN_FROM_MIGRATION: "{{ .Values.rerun_from_migration }}"
 | |
|   MIGRATION_JOBS_SCALE: "{{ .Values.migration_jobs_scale | default 1 }}"
 | |
| 
 | |
|   PRESIGN_DURATION_MINUTES: "{{ .Values.storage_presign_duration_minutes }}"
 | |
| 
 | |
|   FAST_RETRY_SECS: "{{ .Values.operator_fast_resync_secs | default 3 }}"
 | |
| 
 | |
|   MAX_CRAWL_SCALE: "{{ .Values.max_crawl_scale | default 3 }}"
 | |
| 
 | |
|   LOG_FAILED_CRAWL_LINES: "{{ .Values.log_failed_crawl_lines | default 0 }}"
 | |
| 
 | |
|   IS_LOCAL_MINIO: "{{ .Values.minio_local }}"
 | |
| 
 | |
|   STORAGES_JSON: "/ops-configs/storages.json"
 | |
| 
 | |
|   CRAWLER_CHANNELS_JSON: "/ops-configs/crawler_channels.json"
 | |
| 
 | |
|   CRAWLER_PROXIES_LAST_UPDATE: "/ops-proxy-configs/crawler_proxies_last_update"
 | |
|   CRAWLER_PROXIES_JSON: "/ops-proxy-configs/crawler_proxies.json"
 | |
| 
 | |
|   DEFAULT_PROXY_ID: "{{ .Values.default_proxy }}"
 | |
| 
 | |
|   MIN_QA_CRAWLER_IMAGE: "{{ .Values.min_qa_crawler_image }}"
 | |
| 
 | |
|   MIN_AUTOCLICK_CRAWLER_IMAGE: "{{ .Values.min_autoclick_crawler_image }}"
 | |
| 
 | |
|   NUM_BROWSERS: "{{ .Values.crawler_browser_instances }}"
 | |
| 
 | |
|   MAX_CRAWLER_MEMORY: "{{ .Values.max_crawler_memory }}"
 | |
| 
 | |
|   CRAWLER_MIN_AVAIL_STORAGE_RATIO: "{{ .Values.crawler_min_avail_storage_ratio }}"
 | |
| 
 | |
|   ENABLE_AUTO_RESIZE_CRAWLERS: "{{ .Values.enable_auto_resize_crawlers }}"
 | |
| 
 | |
|   BILLING_ENABLED: "{{ .Values.billing_enabled }}"
 | |
| 
 | |
|   SIGN_UP_URL: "{{ .Values.sign_up_url }}"
 | |
| 
 | |
|   SALES_EMAIL: "{{ .Values.sales_email }}"
 | |
| 
 | |
|   USER_SURVEY_URL: "{{ .Values.user_survey_url }}"
 | |
| 
 | |
|   LOG_SENT_EMAILS: "{{ .Values.email.log_sent_emails }}"
 | |
| 
 | |
|   BACKEND_IMAGE: "{{ .Values.backend_image }}"
 | |
| 
 | |
|   BACKEND_IMAGE_PULL_POLICY: "{{ .Values.backend_pull_policy }}"
 | |
| 
 | |
|   LOCALES_ENABLED: "{{ .Values.locales_enabled }}"
 | |
| 
 | |
|   REPLICA_DELETION_DELAY_DAYS: "{{ .Values.replica_deletion_delay_days | default 0 }}"
 | |
| 
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   name: shared-crawler-config
 | |
|   namespace: {{ .Values.crawler_namespace }}
 | |
| 
 | |
| data:
 | |
|   {{- define "btrix.crawler_args" }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --collection thecrawl --screencastPort 9037 --logErrorsToRedis --logBehaviorsToRedis --writePagesToRedis --restartsOnError --headless --screenshot view,thumbnail {{ .Values.crawler_extra_args }} {{- end }}
 | |
| 
 | |
|   CRAWL_ARGS: {{- include "btrix.crawler_args" . }}
 | |
| 
 | |
|   # disable behaviors for QA runs, otherwise use same args
 | |
|   QA_ARGS: {{- include "btrix.crawler_args" . }} --behaviors=""
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   name: shared-job-config
 | |
|   #namespace: {{ .Values.crawler_namespace }}
 | |
|   namespace: {{ .Release.Namespace }}
 | |
| 
 | |
| 
 | |
| data:
 | |
|   config.yaml: |
 | |
|     namespace: {{ .Values.crawler_namespace }}
 | |
|     termination_grace_secs: "{{ .Values.grace_period_secs | default 600 }}"
 | |
| 
 | |
|     volume_storage_class: "{{ .Values.volume_storage_class }}"
 | |
| 
 | |
|     # redis
 | |
|     redis_image: {{ .Values.redis_image }}
 | |
|     redis_image_pull_policy: {{ .Values.redis_pull_policy }}
 | |
| 
 | |
|     redis_cpu: "{{ .Values.redis_cpu }}"
 | |
| 
 | |
|     redis_memory: "{{ .Values.redis_memory }}"
 | |
| 
 | |
|     redis_storage: "{{ .Values.redis_storage }}"
 | |
| 
 | |
|     # crawler
 | |
|     crawler_image_pull_policy: {{ .Values.crawler_pull_policy }}
 | |
| 
 | |
|     crawler_cpu_base: "{{ .Values.crawler_cpu_base }}"
 | |
|     crawler_memory_base: "{{ .Values.crawler_memory_base }}"
 | |
| 
 | |
|     crawler_extra_cpu_per_browser: "{{ .Values.crawler_extra_cpu_per_browser | default 0 }}"
 | |
|     crawler_extra_memory_per_browser: "{{ .Values.crawler_extra_memory_per_browser | default 0 }}"
 | |
| 
 | |
|     crawler_browser_instances: "{{ .Values.crawler_browser_instances }}"
 | |
|     qa_browser_instances: "{{ .Values.qa_browser_instances }}"
 | |
| 
 | |
|     crawler_cpu: "{{ .Values.crawler_cpu }}"
 | |
|     crawler_memory: "{{ .Values.crawler_memory }}"
 | |
| 
 | |
|     crawler_storage: "{{ .Values.crawler_storage }}"
 | |
| 
 | |
|     volume_storage_class: "{{ .Values.volume_storage_class }}"
 | |
| 
 | |
|     profile_browser_cpu: "{{ .Values.profile_browser_cpu }}"
 | |
|     profile_browser_memory: "{{ .Values.profile_browser_memory }}"
 | |
| 
 | |
|     crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}"
 | |
| 
 | |
|     crawler_socks_proxy_host: "{{ .Values.crawler_socks_proxy_host }}"
 | |
|     crawler_socks_proxy_port: "{{ .Values.crawler_socks_proxy_port }}"
 | |
| 
 | |
|     crawler_uid: "{{ .Values.crawler_uid | default 201407 }}"
 | |
|     crawler_gid: "{{ .Values.crawler_gid | default 201407 }}"
 | |
|     crawler_fsgroup: "{{ .Values.crawler_fsgroup | default 201407 }}"
 | |
| 
 | |
|     profile_browser_workdir_size: "{{ .Values.profile_browser_workdir_size | default "4Gi" }}"
 | |
| 
 | |
|     qa_scale: "{{ .Values.qa_scale | default 1 }}"
 | |
| 
 | |
|     crawler_node_type: "{{ .Values.crawler_node_type }}"
 | |
|     redis_node_type: "{{ .Values.redis_node_type }}"
 | |
| 
 | |
|     signing_secret: {{ and .Values.signer.enabled (not (empty .Values.signer.auth_token)) | ternary "signing-secret" "" }}    
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   name: shared-redis-conf
 | |
|   namespace: {{ .Values.crawler_namespace }}
 | |
| 
 | |
| data:
 | |
|   redis.conf: |
 | |
|     appendonly yes
 | |
|     dir /data    
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   name: app-templates
 | |
|   namespace: {{ .Release.Namespace }}
 | |
| 
 | |
| data:
 | |
| {{ (.Files.Glob "app-templates/*.yaml").AsConfig | indent 2 }}
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   name: email-templates
 | |
|   namespace: {{ .Release.Namespace }}
 | |
| 
 | |
| data:
 | |
| {{- $email_templates := .Values.email.templates | default dict }}
 | |
| {{- range tuple "failed_bg_job" "invite" "password_reset" "validate" "sub_cancel" }}
 | |
|   {{ . }}: |
 | |
| {{ ((get $email_templates . ) | default ($.Files.Get (printf "%s/%s" "email-templates" . ))) | indent 4 }}
 | |
|   {{- end }}
 |