From 627e9a6f140f908e44ba8666c244d5c74449cd3a Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 19 Aug 2021 14:15:21 -0700 Subject: [PATCH] cleanup crawl config, add separate 'runNow' field crawler: add cpu/memory limits minio: auto-create bucket for local minio --- backend/crawls.py | 6 ++++-- backend/dockerman.py | 2 ++ backend/k8sman.py | 38 +++++++++++++++++++++++++------------- backend/requirements.txt | 1 - chart/templates/main.yaml | 23 ++++++++++++++++++++++- chart/values.yaml | 8 +++++++- 6 files changed, 60 insertions(+), 18 deletions(-) diff --git a/backend/crawls.py b/backend/crawls.py index a95e11ee..4d2d4148 100644 --- a/backend/crawls.py +++ b/backend/crawls.py @@ -73,8 +73,9 @@ class CrawlConfigIn(BaseModel): """CrawlConfig input model, submitted via API""" schedule: Optional[str] = "" + runNow: Optional[bool] = False - storageName: Optional[str] = "default" + #storageName: Optional[str] = "default" config: RawCrawlConfig @@ -84,8 +85,9 @@ class CrawlConfig(BaseMongoModel): """Schedulable config""" schedule: Optional[str] = "" + runNow: Optional[bool] = False - storageName: Optional[str] = "default" + #storageName: Optional[str] = "default" archive: Optional[str] diff --git a/backend/dockerman.py b/backend/dockerman.py index 2bc22626..a5a3b8ae 100644 --- a/backend/dockerman.py +++ b/backend/dockerman.py @@ -1,3 +1,5 @@ +# pylint: skip-file + from archives import Archive from crawls import CrawlConfig diff --git a/backend/k8sman.py b/backend/k8sman.py index 71c0415d..beec7a41 100644 --- a/backend/k8sman.py +++ b/backend/k8sman.py @@ -7,9 +7,6 @@ import json from kubernetes_asyncio import client, config -# from fastapi.templating import Jinja2Templates -from jinja2 import Environment, FileSystemLoader - # ============================================================================ DEFAULT_NAMESPACE = os.environ.get("CRAWLER_NAMESPACE") or "crawlers" @@ -31,11 +28,6 @@ class K8SManager: self.namespace = namespace - loader = FileSystemLoader("templates") - self.jinja_env = Environment( - loader=loader, autoescape=False, lstrip_blocks=False, trim_blocks=False - ) - self.crawler_image = os.environ.get("CRAWLER_IMAGE") self.crawler_image_pull_policy = "IfNotPresent" @@ -97,15 +89,17 @@ class K8SManager: ) # Create Cron Job - run_now = False - schedule = crawlconfig.schedule suspend = False - if not schedule or schedule == "now": - if schedule == "now": - run_now = True + schedule = crawlconfig.schedule + + if not schedule: schedule = DEFAULT_NO_SCHEDULE suspend = True + run_now = False + if crawlconfig.runNow: + run_now = True + job_template = self._get_job_template(cid, labels, extra_crawl_params) spec = client.V1beta1CronJobSpec( @@ -205,6 +199,23 @@ class K8SManager: if extra_crawl_params: command += extra_crawl_params + requests_memory = "256M" + limit_memory = "1G" + + requests_cpu = "120m" + limit_cpu = "1000m" + + resources = { + "limits": { + "cpu": limit_cpu, + "memory": limit_memory, + }, + "requests": { + "cpu": requests_cpu, + "memory": requests_memory, + }, + } + return { "spec": { "template": { @@ -227,6 +238,7 @@ class K8SManager: "envFrom": [ {"secretRef": {"name": f"crawl-secret-{uid}"}} ], + "resources": resources } ], "volumes": [ diff --git a/backend/requirements.txt b/backend/requirements.txt index db7f4315..0a58fc3e 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,6 +1,5 @@ uvicorn fastapi-users[mongodb]==6.0.0 loguru -jinja2 aiofiles kubernetes-asyncio diff --git a/chart/templates/main.yaml b/chart/templates/main.yaml index 4cf1654f..1d755ffa 100644 --- a/chart/templates/main.yaml +++ b/chart/templates/main.yaml @@ -26,6 +26,8 @@ stringData: {{- if .Values.minio_local }} MINIO_ROOT_USER: "{{ .Values.storage.access_key }}" MINIO_ROOT_PASSWORD: "{{ .Values.storage.secret_key }}" + + MC_HOST: "{{ .Values.minio_scheme }}://{{ .Values.storage.access_key }}:{{ .Values.storage.secret_key }}@{{ .Values.minio_host }}" {{- end }} STORE_ACCESS_KEY: "{{ .Values.storage.access_key }}" @@ -52,6 +54,23 @@ spec: app: {{ .Values.name }} spec: + +{{- if .Values.minio_local }} + initContainers: + - name: init-bucket + image: {{ .Values.minio_mc_image }} + imagePullPolicy: {{ .Values.minio_pull_policy }} + env: + - name: MC_HOST_local + valueFrom: + secretKeyRef: + name: storage-auth + key: MC_HOST + + command: ['/bin/sh'] + args: ['-c', 'mc mb local/test-bucket; mc policy set public local/test-bucket' ] +{{- end }} + containers: - name: api image: {{ .Values.api_image }} @@ -67,10 +86,12 @@ spec: resources: limits: - cpu: {{ .Values.api_limit_cpu }} + cpu: {{ .Values.api_limits_cpu }} + memory: {{ .Values.api_limits_memory }} requests: cpu: {{ .Values.api_requests_cpu }} + memory: {{ .Values.api_requests_memory }} --- diff --git a/chart/values.yaml b/chart/values.yaml index 2d8dd54f..36cfa980 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -9,9 +9,11 @@ api_password_secret: "c9085f33ecce4347aa1d69339e16c499" api_num_replicas: 1 -api_limit_cpu: "100m" api_requests_cpu: "25m" +api_limits_cpu: "100m" +api_requests_memory: "100M" +api_limits_memory: "256M" # MongoDB Image @@ -66,7 +68,11 @@ storage: # set to true to use a local minio image minio_local: True +minio_scheme: "http" +minio_host: "local-minio.default:9000" + minio_image: minio/minio +minio_mc_image: minio/mc minio_pull_policy: "IfNotPresent"