Run crawler and profilebrowser with non-root user (#1625)

With these changes, crawler and profilebrowser jobs run as a
non-root user.
This commit is contained in:
Vinzenz Sinapius 2024-04-17 21:03:33 +02:00 committed by GitHub
parent 30ab139ff2
commit a8336925b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 59 additions and 6 deletions

View File

@ -21,6 +21,7 @@ jobs:
- name: Create k3d Cluster - name: Create k3d Cluster
uses: AbsaOSS/k3d-action@v2 uses: AbsaOSS/k3d-action@v2
with: with:
k3d-version: v5.6.0
cluster-name: btrix-1 cluster-name: btrix-1
args: >- args: >-
-p "30870:30870@agent:0:direct" -p "30870:30870@agent:0:direct"
@ -96,3 +97,7 @@ jobs:
- name: Print Backend Logs (Operator) - name: Print Backend Logs (Operator)
if: ${{ failure() }} if: ${{ failure() }}
run: kubectl logs svc/browsertrix-cloud-backend -c op run: kubectl logs svc/browsertrix-cloud-backend -c op
- name: Print K8S Events
if: ${{ failure() }}
run: kubectl events --all-namespaces

View File

@ -49,6 +49,14 @@ spec:
restartPolicy: OnFailure restartPolicy: OnFailure
securityContext:
runAsNonRoot: true
runAsUser: {{ crawler_uid}}
runAsGroup: {{ crawler_gid}}
fsGroup: {{ crawler_fsgroup }}
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
terminationGracePeriodSeconds: {{ termination_grace_secs }} terminationGracePeriodSeconds: {{ termination_grace_secs }}
volumes: volumes:
- name: crawl-config - name: crawl-config
@ -62,6 +70,7 @@ spec:
persistentVolumeClaim: persistentVolumeClaim:
claimName: {{ name }} claimName: {{ name }}
affinity: affinity:
{% if crawler_node_type %} {% if crawler_node_type %}
nodeAffinity: nodeAffinity:
@ -132,7 +141,6 @@ spec:
- name: crawl-data - name: crawl-data
mountPath: /crawls mountPath: /crawls
envFrom: envFrom:
- configMapRef: - configMapRef:
name: shared-crawler-config name: shared-crawler-config
@ -146,6 +154,9 @@ spec:
{% endif %} {% endif %}
env: env:
- name: HOME
value: /crawls/home
- name: CRAWL_ID - name: CRAWL_ID
value: "{{ id }}" value: "{{ id }}"

View File

@ -12,6 +12,19 @@ spec:
hostname: browser-{{ id }} hostname: browser-{{ id }}
subdomain: browser subdomain: browser
securityContext:
runAsNonRoot: true
runAsUser: {{ crawler_uid}}
runAsGroup: {{ crawler_gid}}
fsGroup: {{ crawler_fsgroup }}
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
volumes:
- name: crawler-workdir
emptyDir:
sizeLimit: {{ profile_browser_workdir_size }}
{% if priorityClassName %} {% if priorityClassName %}
priorityClassName: {{ priorityClassName }} priorityClassName: {{ priorityClassName }}
{% endif %} {% endif %}
@ -60,11 +73,18 @@ spec:
- "@{{ profile_filename }}" - "@{{ profile_filename }}"
{%- endif %} {%- endif %}
volumeMounts:
- name: crawler-workdir
mountPath: /tmp
envFrom: envFrom:
- secretRef: - secretRef:
name: {{ storage_secret }} name: {{ storage_secret }}
env: env:
- name: HOME
value: /tmp/home
- name: STORE_PATH - name: STORE_PATH
value: {{ storage_path }} value: {{ storage_path }}

View File

@ -43,7 +43,7 @@ data:
FAST_RETRY_SECS: "{{ .Values.operator_fast_resync_secs | default 3 }}" FAST_RETRY_SECS: "{{ .Values.operator_fast_resync_secs | default 3 }}"
MAX_CRAWL_SCALE: "{{ .Values.max_crawl_scale | default 3 }}" MAX_CRAWL_SCALE: "{{ .Values.max_crawl_scale | default 3 }}"
LOG_FAILED_CRAWL_LINES: "{{ .Values.log_failed_crawl_lines | default 0 }}" LOG_FAILED_CRAWL_LINES: "{{ .Values.log_failed_crawl_lines | default 0 }}"
IS_LOCAL_MINIO: "{{ .Values.minio_local }}" IS_LOCAL_MINIO: "{{ .Values.minio_local }}"
@ -78,7 +78,7 @@ data:
config.yaml: | config.yaml: |
namespace: {{ .Values.crawler_namespace }} namespace: {{ .Values.crawler_namespace }}
termination_grace_secs: "{{ .Values.grace_period_secs | default 600 }}" termination_grace_secs: "{{ .Values.grace_period_secs | default 600 }}"
volume_storage_class: "{{ .Values.volume_storage_class }}" volume_storage_class: "{{ .Values.volume_storage_class }}"
# redis # redis
@ -106,7 +106,7 @@ data:
crawler_memory: "{{ .Values.crawler_memory }}" crawler_memory: "{{ .Values.crawler_memory }}"
crawler_storage: "{{ .Values.crawler_storage }}" crawler_storage: "{{ .Values.crawler_storage }}"
volume_storage_class: "{{ .Values.volume_storage_class }}" volume_storage_class: "{{ .Values.volume_storage_class }}"
crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}" crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}"
@ -114,6 +114,12 @@ data:
crawler_socks_proxy_host: "{{ .Values.crawler_socks_proxy_host }}" crawler_socks_proxy_host: "{{ .Values.crawler_socks_proxy_host }}"
crawler_socks_proxy_port: "{{ .Values.crawler_socks_proxy_port }}" crawler_socks_proxy_port: "{{ .Values.crawler_socks_proxy_port }}"
crawler_uid: "{{ .Values.crawler_uid | default 201400007 }}"
crawler_gid: "{{ .Values.crawler_gid | default 201400007 }}"
crawler_fsgroup: "{{ .Values.crawler_fsgroup | default 201400007 }}"
profile_browser_workdir_size: "{{ .Values.profile_browser_workdir_size | default "4Gi" }}"
crawler_node_type: "{{ .Values.crawler_node_type }}" crawler_node_type: "{{ .Values.crawler_node_type }}"
redis_node_type: "{{ .Values.redis_node_type }}" redis_node_type: "{{ .Values.redis_node_type }}"

View File

@ -250,12 +250,16 @@ crawler_extra_memory_per_browser: 768Mi
# #
# profile_browser_cpu: # profile_browser_cpu:
# optional: set the workdir size for the profilebrowser pods
# the workdir is used to store the browser profile data and other temporary files
# profile_browser_workdir_size: 4Gi
# Other Crawler Settings # Other Crawler Settings
# ---------------------- # ----------------------
# minimum size allocated to each crawler # minimum size allocated to each crawler
# should be at least double crawl session size to ensure space for WACZ # should be at least double crawl session size to ensure space for WACZ and browser profile data
crawler_storage: "22Gi" crawler_storage: "26Gi"
# max size at which crawler will commit current crawl session # max size at which crawler will commit current crawl session
crawler_session_size_limit_bytes: "10000000000" crawler_session_size_limit_bytes: "10000000000"
@ -269,6 +273,13 @@ crawler_liveness_port: 6065
# crawler_socks_proxy_host: 192.0.2.1 # crawler_socks_proxy_host: 192.0.2.1
# crawler_socks_proxy_port: 9050 # crawler_socks_proxy_port: 9050
# optional: set the uid, gid and fsgroup for the crawler and profilebrowser pods
# crawler_uid: 201400007
# crawler_gid: 201400007
# crawler_fsgroup: 201400007
# time to wait for graceful stop # time to wait for graceful stop
grace_period: 1000 grace_period: 1000