Run crawler and profilebrowser with non-root user (#1625)

With these changes, crawler and profilebrowser jobs run as a
non-root user.
This commit is contained in:
Vinzenz Sinapius 2024-04-17 21:03:33 +02:00 committed by GitHub
parent 30ab139ff2
commit a8336925b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 59 additions and 6 deletions

View File

@ -21,6 +21,7 @@ jobs:
- name: Create k3d Cluster
uses: AbsaOSS/k3d-action@v2
with:
k3d-version: v5.6.0
cluster-name: btrix-1
args: >-
-p "30870:30870@agent:0:direct"
@ -96,3 +97,7 @@ jobs:
- name: Print Backend Logs (Operator)
if: ${{ failure() }}
run: kubectl logs svc/browsertrix-cloud-backend -c op
- name: Print K8S Events
if: ${{ failure() }}
run: kubectl events --all-namespaces

View File

@ -49,6 +49,14 @@ spec:
restartPolicy: OnFailure
securityContext:
runAsNonRoot: true
runAsUser: {{ crawler_uid}}
runAsGroup: {{ crawler_gid}}
fsGroup: {{ crawler_fsgroup }}
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
terminationGracePeriodSeconds: {{ termination_grace_secs }}
volumes:
- name: crawl-config
@ -62,6 +70,7 @@ spec:
persistentVolumeClaim:
claimName: {{ name }}
affinity:
{% if crawler_node_type %}
nodeAffinity:
@ -132,7 +141,6 @@ spec:
- name: crawl-data
mountPath: /crawls
envFrom:
- configMapRef:
name: shared-crawler-config
@ -146,6 +154,9 @@ spec:
{% endif %}
env:
- name: HOME
value: /crawls/home
- name: CRAWL_ID
value: "{{ id }}"

View File

@ -12,6 +12,19 @@ spec:
hostname: browser-{{ id }}
subdomain: browser
securityContext:
runAsNonRoot: true
runAsUser: {{ crawler_uid}}
runAsGroup: {{ crawler_gid}}
fsGroup: {{ crawler_fsgroup }}
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
volumes:
- name: crawler-workdir
emptyDir:
sizeLimit: {{ profile_browser_workdir_size }}
{% if priorityClassName %}
priorityClassName: {{ priorityClassName }}
{% endif %}
@ -60,11 +73,18 @@ spec:
- "@{{ profile_filename }}"
{%- endif %}
volumeMounts:
- name: crawler-workdir
mountPath: /tmp
envFrom:
- secretRef:
name: {{ storage_secret }}
env:
- name: HOME
value: /tmp/home
- name: STORE_PATH
value: {{ storage_path }}

View File

@ -43,7 +43,7 @@ data:
FAST_RETRY_SECS: "{{ .Values.operator_fast_resync_secs | default 3 }}"
MAX_CRAWL_SCALE: "{{ .Values.max_crawl_scale | default 3 }}"
LOG_FAILED_CRAWL_LINES: "{{ .Values.log_failed_crawl_lines | default 0 }}"
IS_LOCAL_MINIO: "{{ .Values.minio_local }}"
@ -78,7 +78,7 @@ data:
config.yaml: |
namespace: {{ .Values.crawler_namespace }}
termination_grace_secs: "{{ .Values.grace_period_secs | default 600 }}"
volume_storage_class: "{{ .Values.volume_storage_class }}"
# redis
@ -106,7 +106,7 @@ data:
crawler_memory: "{{ .Values.crawler_memory }}"
crawler_storage: "{{ .Values.crawler_storage }}"
volume_storage_class: "{{ .Values.volume_storage_class }}"
crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}"
@ -114,6 +114,12 @@ data:
crawler_socks_proxy_host: "{{ .Values.crawler_socks_proxy_host }}"
crawler_socks_proxy_port: "{{ .Values.crawler_socks_proxy_port }}"
crawler_uid: "{{ .Values.crawler_uid | default 201400007 }}"
crawler_gid: "{{ .Values.crawler_gid | default 201400007 }}"
crawler_fsgroup: "{{ .Values.crawler_fsgroup | default 201400007 }}"
profile_browser_workdir_size: "{{ .Values.profile_browser_workdir_size | default "4Gi" }}"
crawler_node_type: "{{ .Values.crawler_node_type }}"
redis_node_type: "{{ .Values.redis_node_type }}"

View File

@ -250,12 +250,16 @@ crawler_extra_memory_per_browser: 768Mi
#
# profile_browser_cpu:
# optional: set the workdir size for the profilebrowser pods
# the workdir is used to store the browser profile data and other temporary files
# profile_browser_workdir_size: 4Gi
# Other Crawler Settings
# ----------------------
# minimum size allocated to each crawler
# should be at least double crawl session size to ensure space for WACZ
crawler_storage: "22Gi"
# should be at least double crawl session size to ensure space for WACZ and browser profile data
crawler_storage: "26Gi"
# max size at which crawler will commit current crawl session
crawler_session_size_limit_bytes: "10000000000"
@ -269,6 +273,13 @@ crawler_liveness_port: 6065
# crawler_socks_proxy_host: 192.0.2.1
# crawler_socks_proxy_port: 9050
# optional: set the uid, gid and fsgroup for the crawler and profilebrowser pods
# crawler_uid: 201400007
# crawler_gid: 201400007
# crawler_fsgroup: 201400007
# time to wait for graceful stop
grace_period: 1000