From dee354f252bc0c8c910e9d8778408d22c3749319 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 7 Jun 2022 21:52:04 -0700 Subject: [PATCH] affinity: add affinity for k8s crawl deployments: - prefer deploy crawler, redis and job to same zone - prefer deploying crawler and job together via crawler node type, redis via redis node type (all optional) --- backend/btrixcloud/crawlmanager.py | 3 ++ .../btrixcloud/k8s/templates/crawl_job.yaml | 11 +++++ backend/btrixcloud/k8s/templates/crawler.yaml | 42 +++++++++++++++++++ backend/btrixcloud/swarm/utils.py | 7 +++- backend/test/test_login.py | 12 +++++- chart/templates/configmap.yaml | 3 +- configs/config.yaml | 2 +- 7 files changed, 75 insertions(+), 5 deletions(-) diff --git a/backend/btrixcloud/crawlmanager.py b/backend/btrixcloud/crawlmanager.py index 707b4d72..4769c565 100644 --- a/backend/btrixcloud/crawlmanager.py +++ b/backend/btrixcloud/crawlmanager.py @@ -22,6 +22,8 @@ class BaseCrawlManager(ABC): self.no_delete_jobs = os.environ.get("NO_DELETE_JOBS", "0") != "0" + self.crawler_node_type = os.environ.get("CRAWLER_NODE_TYPE", "") + self.templates = Jinja2Templates(directory=templates) self.loop = asyncio.get_running_loop() @@ -172,6 +174,7 @@ class BaseCrawlManager(ABC): "aid": str(crawlconfig.aid), "job_image": self.job_image, "manual": "1" if manual else "0", + "crawler_node_type": self.crawler_node_type, "schedule": schedule, } diff --git a/backend/btrixcloud/k8s/templates/crawl_job.yaml b/backend/btrixcloud/k8s/templates/crawl_job.yaml index b23e9d59..483049b4 100644 --- a/backend/btrixcloud/k8s/templates/crawl_job.yaml +++ b/backend/btrixcloud/k8s/templates/crawl_job.yaml @@ -21,6 +21,17 @@ spec: btrix.crawlconfig: {{ cid }} spec: restartPolicy: OnFailure + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: nodeType + operator: In + values: + - "{{ crawler_node_type }}" + containers: - name: crawl-job image: {{ job_image }} diff --git a/backend/btrixcloud/k8s/templates/crawler.yaml b/backend/btrixcloud/k8s/templates/crawler.yaml index a4b9f13f..7a2fd7a1 100644 --- a/backend/btrixcloud/k8s/templates/crawler.yaml +++ b/backend/btrixcloud/k8s/templates/crawler.yaml @@ -89,6 +89,27 @@ spec: # - name: redis-conf # mountPath: /redis-conf + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: nodeType + operator: In + values: + - "{{ redis_node_type }}" + + podAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 2 + podAffinityTerm: + topologyKey: "failure-domain.beta.kubernetes.io/zone" + labelSelector: + matchLabels: + job-name: job-{{ id }} + crawl: {{ id }} + containers: - name: redis image: {{ redis_image }} @@ -195,6 +216,27 @@ spec: configMap: name: crawl-config-{{ cid }} + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: nodeType + operator: In + values: + - "{{ crawler_node_type }}" + + podAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 2 + podAffinityTerm: + topologyKey: "failure-domain.beta.kubernetes.io/zone" + labelSelector: + matchLabels: + job-name: job-{{ id }} + crawl: {{ id }} + containers: - name: crawler image: {{ crawler_image }} diff --git a/backend/btrixcloud/swarm/utils.py b/backend/btrixcloud/swarm/utils.py index 5cf88824..9a75b66e 100644 --- a/backend/btrixcloud/swarm/utils.py +++ b/backend/btrixcloud/swarm/utils.py @@ -21,7 +21,12 @@ def run_swarm_stack(name, data): fh_io.flush() try: - docker.stack.deploy(name, compose_files=[fh_io.name], orchestrator="swarm", resolve_image="never") + docker.stack.deploy( + name, + compose_files=[fh_io.name], + orchestrator="swarm", + resolve_image="never", + ) except DockerException as exc: print(exc, flush=True) diff --git a/backend/test/test_login.py b/backend/test/test_login.py index f828dc75..18188d2c 100644 --- a/backend/test/test_login.py +++ b/backend/test/test_login.py @@ -2,19 +2,27 @@ import requests api_prefix = "http://127.0.0.1:9871/api" + def test_login_invalid(): username = "admin@example.com" password = "invalid" - r = requests.post(f"{api_prefix}/auth/jwt/login", data={"username": username, "password": password, "grant_type": "password"}) + r = requests.post( + f"{api_prefix}/auth/jwt/login", + data={"username": username, "password": password, "grant_type": "password"}, + ) data = r.json() assert r.status_code == 400 assert data["detail"] == "LOGIN_BAD_CREDENTIALS" + def test_login(): username = "admin@example.com" password = "PASSW0RD0" - r = requests.post(f"{api_prefix}/auth/jwt/login", data={"username": username, "password": password, "grant_type": "password"}) + r = requests.post( + f"{api_prefix}/auth/jwt/login", + data={"username": username, "password": password, "grant_type": "password"}, + ) data = r.json() assert r.status_code == 200 diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index 646b9207..c056c361 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -103,7 +103,8 @@ data: crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}" - crawler_node_type: crawling + crawler_node_type: "{{ .Values.crawler_node_type }}" + redis_node_type: "{{ .Values.redis_node_type }}" redis.conf: | appendonly yes diff --git a/configs/config.yaml b/configs/config.yaml index 77e46066..229609f0 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -1,5 +1,5 @@ redis_image: redis -crawler_image: webrecorder/browsertrix-crawler:cloud +crawler_image: localhost:5000/webrecorder/browsertrix-crawler:latest crawler_requests_cpu: "0.8" crawler_limits_cpu: "1.0"