* backend: max pages per crawl limit, part of fix for #716: - set 'max_pages_crawl_limit' in values.yaml, default to 100,000 - if set/non-0, automatically set limit if none provided - if set/non-0, return 400 if adding config with limit exceeding max limit - return limit as 'maxPagesPerCrawl' in /api/settings - api: /all/crawls - add runningOnly=0 to show all crawls, default to 1/true (for more reliable testing) tests: add test for 'max_pages_per_crawl' setting - ensure 'limit' can not be set higher than max_pages_per_crawl - ensure pages crawled is at the limit - set test limit to max 2 pages - add settings test - check for pages.jsonl and extraPages.jsonl when crawling 2 pages
133 lines
3.9 KiB
YAML
133 lines
3.9 KiB
YAML
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: {{ .Values.name }}-env-config
|
|
namespace: {{ .Release.Namespace }}
|
|
|
|
data:
|
|
APP_ORIGIN: {{.Values.ingress.scheme }}://{{ .Values.ingress.host | default "localhost:9870" }}
|
|
|
|
CRAWLER_NAMESPACE: {{ .Values.crawler_namespace }}
|
|
CRAWLER_IMAGE: {{ .Values.crawler_image }}
|
|
CRAWLER_PULL_POLICY: {{ .Values.crawler_pull_policy }}
|
|
|
|
CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}.svc.cluster.local"
|
|
|
|
CRAWLER_TIMEOUT: "{{ .Values.crawl_timeout }}"
|
|
CRAWLER_RETRIES: "{{ .Values.crawl_retries }}"
|
|
|
|
CRAWLER_REQUESTS_CPU: "{{ .Values.crawler_requests_cpu }}"
|
|
CRAWLER_LIMITS_CPU: "{{ .Values.crawler_limits_cpu }}"
|
|
|
|
CRAWLER_REQUESTS_MEM: "{{ .Values.crawler_requests_memory }}"
|
|
CRAWLER_LIMITS_MEM: "{{ .Values.crawler_limits_memory }}"
|
|
|
|
CRAWLER_LIVENESS_PORT: "{{ .Values.crawler_liveness_port | default 0 }}"
|
|
|
|
DEFAULT_ORG: "{{ .Values.default_org }}"
|
|
|
|
INVITE_EXPIRE_SECONDS: "{{ .Values.invite_expire_seconds }}"
|
|
|
|
JOB_IMAGE: "{{ .Values.backend_image }}"
|
|
JOB_PULL_POLICY: "{{ .Values.backend_pull_policy }}"
|
|
|
|
{{- if .Values.crawler_pv_claim }}
|
|
CRAWLER_PV_CLAIM: "{{ .Values.crawler_pv_claim }}"
|
|
{{- end }}
|
|
|
|
CRAWLER_NODE_TYPE: "{{ .Values.crawler_node_type }}"
|
|
|
|
REDIS_URL: "{{ .Values.redis_url }}"
|
|
|
|
REDIS_CRAWLS_DONE_KEY: "crawls-done"
|
|
|
|
NO_DELETE_JOBS: "{{ .Values.no_delete_jobs | default 0 }}"
|
|
|
|
GRACE_PERIOD_SECS: "{{ .Values.grace_period_secs | default 600 }}"
|
|
|
|
REGISTRATION_ENABLED: "{{ .Values.registration_enabled | default 0 }}"
|
|
|
|
ALLOW_DUPE_INVITES: "{{ .Values.allow_dupe_invites | default 0 }}"
|
|
|
|
JWT_TOKEN_LIFETIME_MINUTES: "{{ .Values.jwt_token_lifetime_minutes | default 60 }}"
|
|
|
|
DEFAULT_BEHAVIOR_TIME_SECONDS: "{{ .Values.default_behavior_time_seconds }}"
|
|
|
|
MAX_PAGES_PER_CRAWL: "{{ .Values.max_pages_per_crawl | default 0 }}"
|
|
|
|
WEB_CONCURRENCY: "{{ .Values.backend_workers | default 4 }}"
|
|
|
|
IDLE_TIMEOUT: "{{ .Values.profile_browser_idle_seconds | default 60 }}"
|
|
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: shared-crawler-config
|
|
namespace: {{ .Values.crawler_namespace }}
|
|
|
|
data:
|
|
CRAWL_ARGS: "{{ .Values.crawler_args }} --workers {{ .Values.crawler_browser_instances | default 1 }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --healthCheckPort {{ .Values.crawler_liveness_port }}"
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: shared-job-config
|
|
namespace: {{ .Values.crawler_namespace }}
|
|
|
|
|
|
data:
|
|
config.yaml: |
|
|
namespace: {{ .Values.crawler_namespace }}
|
|
termination_grace_secs: "{{ .Values.grace_period_secs | default 600 }}"
|
|
|
|
volume_storage_class: "{{ .Values.volume_storage_class }}"
|
|
|
|
requests_hd: "{{ .Values.crawler_requests_storage }}"
|
|
|
|
# redis
|
|
redis_image: {{ .Values.redis_image }}
|
|
redis_image_pull_policy: {{ .Values.redis_pull_policy }}
|
|
|
|
redis_requests_cpu: "{{ .Values.redis_requests_cpu }}"
|
|
redis_limits_cpu: "{{ .Values.redis_limits_cpu }}"
|
|
|
|
redis_requests_memory: "{{ .Values.redis_requests_memory }}"
|
|
redis_limits_memory: "{{ .Values.redis_limits_memory }}"
|
|
|
|
|
|
# crawler
|
|
crawler_image: {{ .Values.crawler_image }}
|
|
crawler_image_pull_policy: {{ .Values.crawler_pull_policy }}
|
|
|
|
crawler_requests_cpu: "{{ .Values.crawler_requests_cpu }}"
|
|
crawler_limits_cpu: "{{ .Values.crawler_limits_cpu }}"
|
|
|
|
crawler_requests_memory: "{{ .Values.crawler_requests_memory }}"
|
|
crawler_limits_memory: "{{ .Values.crawler_limits_memory }}"
|
|
|
|
crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}"
|
|
|
|
crawler_node_type: "{{ .Values.crawler_node_type }}"
|
|
redis_node_type: "{{ .Values.redis_node_type }}"
|
|
|
|
redis.conf: |
|
|
appendonly yes
|
|
dir /data
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: nginx-config
|
|
namespace: {{ .Release.Namespace }}
|
|
|
|
data:
|
|
{{ (.Files.Glob "*.conf").AsConfig | indent 2 }}
|
|
|
|
#{{ (.Files.Glob "frontend/*.*").AsConfig | indent 2 }}
|
|
|