Resolves #1354 Supports crawling through pre-configured proxy servers, allowing users to select which proxy servers to use (requires browsertrix crawler 1.3+) Config: - proxies defined in btrix-proxies subchart - can be configured via btrix-proxies key or separate proxies.yaml file via separate subchart - proxies list refreshed automatically if crawler_proxies.json changes if subchart is deployed - support for ssh and socks5 proxies - proxy keys added to secrets in subchart - support for default proxy to be always used if no other proxy configured, prevent starting cluster if default proxy not available - prevent starting manual crawl if previously configured proxy is no longer available, return error - force 'btrix' username and group name on browsertrix-crawler non-root user to support ssh Operator: - support crawling through proxies, pass proxyId in CrawlJob - support running profile browsers which designated proxy, pass proxyId to ProfileJob - prevent starting scheduled crawl if previously configured proxy is no longer available API / Access: - /api/orgs/all/crawlconfigs/crawler-proxies - get all proxies (superadmin only) - /api/orgs/{oid}/crawlconfigs/crawler-proxies - get proxies available to particular org - /api/orgs/{oid}/proxies - update allowed proxies for particular org (superadmin only) - superadmin can configure which orgs can use which proxies, stored on the org - superadmin can also allow an org to access all 'shared' proxies, to avoid having to allow a shared proxy on each org. UI: - Superadmin has 'Edit Proxies' dialog to configure for each org if it has: dedicated proxies, has access to shared proxies. - User can select a proxy in Crawl Workflow browser settings - Users can choose to launch a browser profile with a particular proxy - Display which proxy is used to create profile in profile selector - Users can choose with default proxy to use for new workflows in Crawling Defaults --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com> Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
		
			
				
	
	
		
			81 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			81 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| ---
 | |
| apiVersion: v1
 | |
| kind: Secret
 | |
| metadata:
 | |
|   name: backend-auth
 | |
|   namespace: {{ .Release.Namespace }}
 | |
| 
 | |
| type: Opaque
 | |
| stringData:
 | |
|   PASSWORD_SECRET: "{{ .Values.backend_password_secret }}"
 | |
| 
 | |
|   EMAIL_SMTP_PORT: "{{ .Values.email.smtp_port }}"
 | |
|   EMAIL_SMTP_HOST: "{{ .Values.email.smtp_host }}"
 | |
|   EMAIL_SENDER: "{{ .Values.email.sender_email }}"
 | |
|   EMAIL_REPLY_TO: "{{ .Values.email.reply_to }}"
 | |
|   EMAIL_PASSWORD: "{{ .Values.email.password }}"
 | |
|   EMAIL_SMTP_USE_TLS: "{{ .Values.email.use_tls }}"
 | |
|   EMAIL_SUPPORT: "{{ .Values.email.support_email }}"
 | |
| 
 | |
|   SUPERUSER_EMAIL: "{{ .Values.superuser.email }}"
 | |
|   SUPERUSER_PASSWORD: "{{ .Values.superuser.password }}"
 | |
| 
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: Secret
 | |
| metadata:
 | |
|   name: ops-configs
 | |
|   namespace: {{ .Release.Namespace }}
 | |
| 
 | |
| type: Opaque
 | |
| data:
 | |
|   storages.json: {{ .Values.storages | toJson | b64enc | quote }}
 | |
|   crawler_channels.json: {{ .Values.crawler_channels | toJson | b64enc | quote }}
 | |
| 
 | |
| {{- range $storage := .Values.storages }}
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: Secret
 | |
| metadata:
 | |
|   name: storage-{{ $storage.name }}
 | |
|   namespace: {{ $.Values.crawler_namespace }}
 | |
| 
 | |
| type: Opaque
 | |
| stringData:
 | |
|   TYPE: "s3"
 | |
|   STORE_ACCESS_KEY: "{{ $storage.access_key }}"
 | |
|   STORE_SECRET_KEY: "{{ $storage.secret_key }}"
 | |
| 
 | |
|   {{- if $storage.bucket_name }}
 | |
|   STORE_ENDPOINT_URL: "{{ $storage.endpoint_url }}{{ $storage.bucket_name }}/"
 | |
|   {{- else }}
 | |
|   STORE_ENDPOINT_URL: "{{ $storage.endpoint_url }}"
 | |
|   {{- end }}
 | |
|   STORE_ENDPOINT_NO_BUCKET_URL: "{{ $storage.endpoint_url }}"
 | |
| 
 | |
|   STORE_REGION: "{{ $storage.region }}"
 | |
| 
 | |
|   STORE_S3_PROVIDER: {{ $storage.s3_provider | default "Other" }}
 | |
| 
 | |
| {{- end }}
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: Secret
 | |
| metadata:
 | |
|   name: force-user-and-group-name
 | |
|   namespace: {{ .Values.crawler_namespace }}
 | |
| type: Opaque
 | |
| stringData:
 | |
| 
 | |
|   # slightly hacky: override /etc/passwd and /etc/group in crawler
 | |
|   # this is needed to be able to use ssh to use proxies
 | |
|   passwd: |
 | |
|     root:x:0:0:root:/root:/bin/bash
 | |
|     btrix:btrix:{{ .Values.crawler_uid | default 201407 }}:{{ .Values.crawler_gid | default 201407 }}::/tmp/btrix:/bin/sh    
 | |
| 
 | |
|   group: |
 | |
|     root:x:0:
 | |
|     btrix:x:{{ .Values.crawler_gid | default 201407 }}:    
 |