diff --git a/backend/k8sman.py b/backend/k8sman.py index 426deb4d..e057dc11 100644 --- a/backend/k8sman.py +++ b/backend/k8sman.py @@ -56,6 +56,14 @@ class K8SManager: self.requests_mem = os.environ["CRAWLER_REQUESTS_MEM"] self.limits_mem = os.environ["CRAWLER_LIMITS_MEM"] + self.crawl_volume = {"name": "crawl-data"} + # if set, use persist volume claim for crawls + crawl_pv_claim = os.environ.get("CRAWLER_PV_CLAIM") + if crawl_pv_claim: + self.crawl_volume["persistentVolumeClaim"] = {"claimName": crawl_pv_claim} + else: + self.crawl_volume["emptyDir"] = {} + self.loop = asyncio.get_running_loop() self.loop.create_task(self.run_event_loop()) self.loop.create_task(self.init_redis(self.redis_url)) @@ -742,7 +750,7 @@ class K8SManager: ], }, }, - {"name": "crawl-data", "emptyDir": {}}, + self.crawl_volume, ], "restartPolicy": "Never", "terminationGracePeriodSeconds": self.grace_period, diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index fda9e224..fce00d0a 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -21,6 +21,10 @@ data: CRAWLER_REQUESTS_MEM: "{{ .Values.crawler_requests_memory }}" CRAWLER_LIMITS_MEM: "{{ .Values.crawler_limits_memory }}" + {{- if .Values.crawler_pv_claim }} + CRAWLER_PV_CLAIM: "{{ .Values.crawler_pv_claim }}" + {{- end }} + REDIS_URL: "{{ .Values.redis_url }}" REDIS_CRAWLS_DONE_KEY: "crawls-done" diff --git a/chart/templates/frontend.yaml b/chart/templates/frontend.yaml index 36de9892..065572f5 100644 --- a/chart/templates/frontend.yaml +++ b/chart/templates/frontend.yaml @@ -10,7 +10,7 @@ spec: matchLabels: app: {{ .Values.name }} role: frontend - replicas: 1 + replicas: {{ .Values.nginx_num_replicas | default 1 }} template: metadata: labels: diff --git a/chart/values.yaml b/chart/values.yaml index cc43db5a..68325e8c 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -45,6 +45,8 @@ api_limits_memory: "192Mi" nginx_image: "nginx" nginx_pull_policy: "IfNotPresent" +nginx_num_replicas: 1 + nginx_requests_cpu: "3m" nginx_limits_cpu: "10m" @@ -98,6 +100,10 @@ crawler_pull_policy: "IfNotPresent" crawler_namespace: "crawlers" +# optional: enable to use a persist volume claim for all crawls +# can be enabled to use a multi-write shared filesystem +# crawler_pv_claim: "nfs-shared-crawls" + # num retries crawl_retries: 3