Resolves #1354 Supports crawling through pre-configured proxy servers, allowing users to select which proxy servers to use (requires browsertrix crawler 1.3+) Config: - proxies defined in btrix-proxies subchart - can be configured via btrix-proxies key or separate proxies.yaml file via separate subchart - proxies list refreshed automatically if crawler_proxies.json changes if subchart is deployed - support for ssh and socks5 proxies - proxy keys added to secrets in subchart - support for default proxy to be always used if no other proxy configured, prevent starting cluster if default proxy not available - prevent starting manual crawl if previously configured proxy is no longer available, return error - force 'btrix' username and group name on browsertrix-crawler non-root user to support ssh Operator: - support crawling through proxies, pass proxyId in CrawlJob - support running profile browsers which designated proxy, pass proxyId to ProfileJob - prevent starting scheduled crawl if previously configured proxy is no longer available API / Access: - /api/orgs/all/crawlconfigs/crawler-proxies - get all proxies (superadmin only) - /api/orgs/{oid}/crawlconfigs/crawler-proxies - get proxies available to particular org - /api/orgs/{oid}/proxies - update allowed proxies for particular org (superadmin only) - superadmin can configure which orgs can use which proxies, stored on the org - superadmin can also allow an org to access all 'shared' proxies, to avoid having to allow a shared proxy on each org. UI: - Superadmin has 'Edit Proxies' dialog to configure for each org if it has: dedicated proxies, has access to shared proxies. - User can select a proxy in Crawl Workflow browser settings - Users can choose to launch a browser profile with a particular proxy - Display which proxy is used to create profile in profile selector - Users can choose with default proxy to use for new workflows in Crawling Defaults --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com> Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
290 lines
7.1 KiB
YAML
290 lines
7.1 KiB
YAML
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: {{ .Values.name }}-backend
|
|
namespace: {{ .Release.Namespace }}
|
|
|
|
spec:
|
|
selector:
|
|
matchLabels:
|
|
app: {{ .Values.name }}
|
|
role: backend
|
|
{{- if eq (int .Values.backend_max_replicas) 1 }}
|
|
replicas: 1
|
|
{{- end }}
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: {{ .Values.name }}
|
|
role: backend
|
|
|
|
annotations:
|
|
# force helm to update the deployment each time
|
|
{{- if not .Values.frontend_only }}
|
|
"helm.update": {{ randAlphaNum 5 | quote }}
|
|
{{- end }}
|
|
|
|
spec:
|
|
{{- if .Values.main_node_type }}
|
|
nodeSelector:
|
|
nodeType: {{ .Values.main_node_type }}
|
|
{{- end }}
|
|
|
|
volumes:
|
|
- name: config-volume
|
|
configMap:
|
|
name: shared-job-config
|
|
items:
|
|
- key: config.yaml
|
|
path: config.yaml
|
|
|
|
- name: ops-configs
|
|
secret:
|
|
secretName: ops-configs
|
|
|
|
- name: ops-proxy-configs
|
|
secret:
|
|
secretName: ops-proxy-configs
|
|
optional: true
|
|
|
|
- name: app-templates
|
|
configMap:
|
|
name: app-templates
|
|
|
|
- name: email-templates
|
|
configMap:
|
|
name: email-templates
|
|
|
|
containers:
|
|
- name: api
|
|
image: {{ .Values.backend_image }}
|
|
imagePullPolicy: {{ .Values.backend_pull_policy }}
|
|
command:
|
|
- gunicorn
|
|
- btrixcloud.main:app_root
|
|
- --bind
|
|
- "0.0.0.0:8000"
|
|
- --access-logfile
|
|
- "-"
|
|
- --workers
|
|
- "{{ .Values.backend_workers | default 1 }}"
|
|
- --worker-class
|
|
- uvicorn.workers.UvicornWorker
|
|
|
|
envFrom:
|
|
- configMapRef:
|
|
name: backend-env-config
|
|
- secretRef:
|
|
name: backend-auth
|
|
- secretRef:
|
|
name: mongo-auth
|
|
|
|
env:
|
|
- name: MOTOR_MAX_WORKERS
|
|
value: "{{ .Values.backend_mongodb_workers | default 1 }}"
|
|
|
|
- name: BTRIX_SUBS_APP_API_KEY
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: btrix-subs-app-secret
|
|
key: BTRIX_SUBS_APP_API_KEY
|
|
optional: true
|
|
|
|
- name: BTRIX_SUBS_APP_URL
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: btrix-subs-app-secret
|
|
key: BTRIX_SUBS_APP_URL
|
|
optional: true
|
|
|
|
volumeMounts:
|
|
- name: ops-configs
|
|
mountPath: /ops-configs/
|
|
|
|
- name: ops-proxy-configs
|
|
mountPath: /ops-proxy-configs/
|
|
|
|
- name: app-templates
|
|
mountPath: /app/btrixcloud/templates/
|
|
|
|
- name: email-templates
|
|
mountPath: /app/btrixcloud/email-templates/
|
|
|
|
resources:
|
|
limits:
|
|
memory: {{ .Values.backend_memory }}
|
|
|
|
requests:
|
|
cpu: {{ .Values.backend_cpu }}
|
|
memory: {{ .Values.backend_memory }}
|
|
|
|
startupProbe:
|
|
httpGet:
|
|
path: /healthzStartup
|
|
port: 8000
|
|
periodSeconds: 5
|
|
failureThreshold: 60
|
|
successThreshold: 1
|
|
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /healthz
|
|
port: 8000
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 30
|
|
failureThreshold: 5
|
|
successThreshold: 1
|
|
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /healthz
|
|
port: 8000
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 30
|
|
failureThreshold: 15
|
|
successThreshold: 1
|
|
|
|
- name: op
|
|
image: {{ .Values.backend_image }}
|
|
imagePullPolicy: {{ .Values.backend_pull_policy }}
|
|
command:
|
|
- gunicorn
|
|
- btrixcloud.main_op:app_root
|
|
- --bind
|
|
- "0.0.0.0:{{ .Values.opPort }}"
|
|
- --access-logfile
|
|
- "-"
|
|
- --workers
|
|
- "{{ .Values.backend_workers | default 1 }}"
|
|
- --worker-class
|
|
- uvicorn.workers.UvicornWorker
|
|
|
|
envFrom:
|
|
- configMapRef:
|
|
name: backend-env-config
|
|
- secretRef:
|
|
name: backend-auth
|
|
- secretRef:
|
|
name: mongo-auth
|
|
|
|
env:
|
|
- name: MOTOR_MAX_WORKERS
|
|
value: "{{ .Values.backend_mongodb_workers | default 1 }}"
|
|
|
|
volumeMounts:
|
|
- name: config-volume
|
|
mountPath: /config
|
|
|
|
- name: ops-configs
|
|
mountPath: /ops-configs/
|
|
|
|
- name: ops-proxy-configs
|
|
mountPath: /ops-proxy-configs/
|
|
|
|
- name: app-templates
|
|
mountPath: /app/btrixcloud/templates/
|
|
|
|
- name: email-templates
|
|
mountPath: /app/btrixcloud/email-templates/
|
|
|
|
resources:
|
|
limits:
|
|
memory: {{ .Values.backend_memory }}
|
|
|
|
requests:
|
|
cpu: {{ .Values.backend_cpu }}
|
|
memory: {{ .Values.backend_memory }}
|
|
|
|
startupProbe:
|
|
httpGet:
|
|
path: /healthz
|
|
port: {{ .Values.opPort }}
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 5
|
|
failureThreshold: 5
|
|
successThreshold: 1
|
|
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /healthz
|
|
port: {{ .Values.opPort }}
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 30
|
|
failureThreshold: 5
|
|
successThreshold: 1
|
|
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /healthz
|
|
port: {{ .Values.opPort }}
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 30
|
|
failureThreshold: 15
|
|
successThreshold: 1
|
|
|
|
|
|
|
|
---
|
|
|
|
apiVersion: v1
|
|
kind: Service
|
|
|
|
metadata:
|
|
namespace: {{ .Release.Namespace }}
|
|
name: {{ .Values.name }}-backend
|
|
labels:
|
|
app: {{ .Values.name }}
|
|
role: backend
|
|
|
|
{{- if .Values.service }}
|
|
{{- if .Values.service.annotations }}
|
|
annotations:
|
|
{{- range $key, $val := .Values.service.annotations }}
|
|
{{ $key }}: {{ $val | quote }}
|
|
{{- end }}
|
|
{{- end }}
|
|
{{- end }}
|
|
|
|
spec:
|
|
selector:
|
|
app: {{ .Values.name }}
|
|
role: backend
|
|
|
|
ports:
|
|
- protocol: TCP
|
|
port: 8000
|
|
name: api
|
|
|
|
- protocol: TCP
|
|
port: {{ .Values.opPort }}
|
|
name: operator
|
|
|
|
{{- if gt (int .Values.backend_max_replicas) 1 }}
|
|
---
|
|
apiVersion: autoscaling/v2
|
|
kind: HorizontalPodAutoscaler
|
|
metadata:
|
|
name: backend-autoscaler
|
|
spec:
|
|
scaleTargetRef:
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
name: {{ .Values.name }}-backend
|
|
minReplicas: 1
|
|
maxReplicas: {{ .Values.backend_max_replicas }}
|
|
metrics:
|
|
- type: Resource
|
|
resource:
|
|
name: cpu
|
|
target:
|
|
type: Utilization
|
|
averageUtilization: {{ .Values.backend_avg_cpu_threshold }}
|
|
- type: Resource
|
|
resource:
|
|
name: memory
|
|
target:
|
|
type: Utilization
|
|
averageUtilization: {{ .Values.backend_avg_memory_threshold }}
|
|
{{- end }}
|