disable behaviors for QA runs via configmap (#1963)

- make crawl args a reusable template
- adds QA_ARGS to configmap, setting to same value as CRAWL_ARGS but
with --behaviors= prepended to disable behaviors for QA, to improve
performance of QA runs.

fixes #1962
This commit is contained in:
Ilya Kreymer 2024-07-23 19:54:21 -07:00 committed by GitHub
parent 01ddf95a56
commit b35669af8d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -74,8 +74,12 @@ metadata:
namespace: {{ .Values.crawler_namespace }}
data:
CRAWL_ARGS: >-
--sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --collection thecrawl --screencastPort 9037 --logErrorsToRedis --writePagesToRedis --restartsOnError --headless --screenshot view,thumbnail {{ .Values.crawler_extra_args }}
{{- define "btrix.crawler_args" }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --collection thecrawl --screencastPort 9037 --logErrorsToRedis --writePagesToRedis --restartsOnError --headless --screenshot view,thumbnail {{ .Values.crawler_extra_args }} {{- end }}
CRAWL_ARGS: {{- include "btrix.crawler_args" . }}
# disable behaviors for QA runs, otherwise use same args
QA_ARGS: {{- include "btrix.crawler_args" . }} --behaviors=""
---
apiVersion: v1