browsertrix/chart/templates/backend.yaml
Vinzenz Sinapius bb6e703f6a
Configure browsertrix proxies (#1847)
Resolves #1354

Supports crawling through pre-configured proxy servers, allowing users to select which proxy servers to use (requires browsertrix crawler 1.3+)

Config:
- proxies defined in btrix-proxies subchart
- can be configured via btrix-proxies key or separate proxies.yaml file via separate subchart
- proxies list refreshed automatically if crawler_proxies.json changes if subchart is deployed
- support for ssh and socks5 proxies
- proxy keys added to secrets in subchart
- support for default proxy to be always used if no other proxy configured, prevent starting cluster if default proxy not available
- prevent starting manual crawl if previously configured proxy is no longer available, return error
- force 'btrix' username and group name on browsertrix-crawler non-root user to support ssh

Operator:
- support crawling through proxies, pass proxyId in CrawlJob
- support running profile browsers which designated proxy, pass proxyId to ProfileJob
- prevent starting scheduled crawl if previously configured proxy is no longer available

API / Access:
- /api/orgs/all/crawlconfigs/crawler-proxies - get all proxies (superadmin only)
- /api/orgs/{oid}/crawlconfigs/crawler-proxies - get proxies available to particular org
- /api/orgs/{oid}/proxies - update allowed proxies for particular org (superadmin only)
- superadmin can configure which orgs can use which proxies, stored on the org
- superadmin can also allow an org to access all 'shared' proxies, to avoid having to allow a shared proxy on each org.

UI:
- Superadmin has 'Edit Proxies' dialog to configure for each org if it has: dedicated proxies, has access to shared proxies.
- User can select a proxy in Crawl Workflow browser settings
- Users can choose to launch a browser profile with a particular proxy
- Display which proxy is used to create profile in profile selector
- Users can choose with default proxy to use for new workflows in Crawling Defaults

---------
Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
2024-10-02 18:35:45 -07:00

290 lines
7.1 KiB
YAML

---
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ .Values.name }}-backend
namespace: {{ .Release.Namespace }}
spec:
selector:
matchLabels:
app: {{ .Values.name }}
role: backend
{{- if eq (int .Values.backend_max_replicas) 1 }}
replicas: 1
{{- end }}
template:
metadata:
labels:
app: {{ .Values.name }}
role: backend
annotations:
# force helm to update the deployment each time
{{- if not .Values.frontend_only }}
"helm.update": {{ randAlphaNum 5 | quote }}
{{- end }}
spec:
{{- if .Values.main_node_type }}
nodeSelector:
nodeType: {{ .Values.main_node_type }}
{{- end }}
volumes:
- name: config-volume
configMap:
name: shared-job-config
items:
- key: config.yaml
path: config.yaml
- name: ops-configs
secret:
secretName: ops-configs
- name: ops-proxy-configs
secret:
secretName: ops-proxy-configs
optional: true
- name: app-templates
configMap:
name: app-templates
- name: email-templates
configMap:
name: email-templates
containers:
- name: api
image: {{ .Values.backend_image }}
imagePullPolicy: {{ .Values.backend_pull_policy }}
command:
- gunicorn
- btrixcloud.main:app_root
- --bind
- "0.0.0.0:8000"
- --access-logfile
- "-"
- --workers
- "{{ .Values.backend_workers | default 1 }}"
- --worker-class
- uvicorn.workers.UvicornWorker
envFrom:
- configMapRef:
name: backend-env-config
- secretRef:
name: backend-auth
- secretRef:
name: mongo-auth
env:
- name: MOTOR_MAX_WORKERS
value: "{{ .Values.backend_mongodb_workers | default 1 }}"
- name: BTRIX_SUBS_APP_API_KEY
valueFrom:
secretKeyRef:
name: btrix-subs-app-secret
key: BTRIX_SUBS_APP_API_KEY
optional: true
- name: BTRIX_SUBS_APP_URL
valueFrom:
secretKeyRef:
name: btrix-subs-app-secret
key: BTRIX_SUBS_APP_URL
optional: true
volumeMounts:
- name: ops-configs
mountPath: /ops-configs/
- name: ops-proxy-configs
mountPath: /ops-proxy-configs/
- name: app-templates
mountPath: /app/btrixcloud/templates/
- name: email-templates
mountPath: /app/btrixcloud/email-templates/
resources:
limits:
memory: {{ .Values.backend_memory }}
requests:
cpu: {{ .Values.backend_cpu }}
memory: {{ .Values.backend_memory }}
startupProbe:
httpGet:
path: /healthzStartup
port: 8000
periodSeconds: 5
failureThreshold: 60
successThreshold: 1
readinessProbe:
httpGet:
path: /healthz
port: 8000
initialDelaySeconds: 5
periodSeconds: 30
failureThreshold: 5
successThreshold: 1
livenessProbe:
httpGet:
path: /healthz
port: 8000
initialDelaySeconds: 5
periodSeconds: 30
failureThreshold: 15
successThreshold: 1
- name: op
image: {{ .Values.backend_image }}
imagePullPolicy: {{ .Values.backend_pull_policy }}
command:
- gunicorn
- btrixcloud.main_op:app_root
- --bind
- "0.0.0.0:{{ .Values.opPort }}"
- --access-logfile
- "-"
- --workers
- "{{ .Values.backend_workers | default 1 }}"
- --worker-class
- uvicorn.workers.UvicornWorker
envFrom:
- configMapRef:
name: backend-env-config
- secretRef:
name: backend-auth
- secretRef:
name: mongo-auth
env:
- name: MOTOR_MAX_WORKERS
value: "{{ .Values.backend_mongodb_workers | default 1 }}"
volumeMounts:
- name: config-volume
mountPath: /config
- name: ops-configs
mountPath: /ops-configs/
- name: ops-proxy-configs
mountPath: /ops-proxy-configs/
- name: app-templates
mountPath: /app/btrixcloud/templates/
- name: email-templates
mountPath: /app/btrixcloud/email-templates/
resources:
limits:
memory: {{ .Values.backend_memory }}
requests:
cpu: {{ .Values.backend_cpu }}
memory: {{ .Values.backend_memory }}
startupProbe:
httpGet:
path: /healthz
port: {{ .Values.opPort }}
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 5
successThreshold: 1
readinessProbe:
httpGet:
path: /healthz
port: {{ .Values.opPort }}
initialDelaySeconds: 5
periodSeconds: 30
failureThreshold: 5
successThreshold: 1
livenessProbe:
httpGet:
path: /healthz
port: {{ .Values.opPort }}
initialDelaySeconds: 5
periodSeconds: 30
failureThreshold: 15
successThreshold: 1
---
apiVersion: v1
kind: Service
metadata:
namespace: {{ .Release.Namespace }}
name: {{ .Values.name }}-backend
labels:
app: {{ .Values.name }}
role: backend
{{- if .Values.service }}
{{- if .Values.service.annotations }}
annotations:
{{- range $key, $val := .Values.service.annotations }}
{{ $key }}: {{ $val | quote }}
{{- end }}
{{- end }}
{{- end }}
spec:
selector:
app: {{ .Values.name }}
role: backend
ports:
- protocol: TCP
port: 8000
name: api
- protocol: TCP
port: {{ .Values.opPort }}
name: operator
{{- if gt (int .Values.backend_max_replicas) 1 }}
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: backend-autoscaler
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ .Values.name }}-backend
minReplicas: 1
maxReplicas: {{ .Values.backend_max_replicas }}
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: {{ .Values.backend_avg_cpu_threshold }}
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: {{ .Values.backend_avg_memory_threshold }}
{{- end }}