affinity: add affinity for k8s crawl deployments:

- prefer deploy crawler, redis and job to same zone
- prefer deploying crawler and job together via crawler node type, redis via redis node type (all optional)
This commit is contained in:
Ilya Kreymer 2022-06-07 21:52:04 -07:00
parent 21b1a87534
commit dee354f252
7 changed files with 75 additions and 5 deletions

View File

@ -22,6 +22,8 @@ class BaseCrawlManager(ABC):
self.no_delete_jobs = os.environ.get("NO_DELETE_JOBS", "0") != "0" self.no_delete_jobs = os.environ.get("NO_DELETE_JOBS", "0") != "0"
self.crawler_node_type = os.environ.get("CRAWLER_NODE_TYPE", "")
self.templates = Jinja2Templates(directory=templates) self.templates = Jinja2Templates(directory=templates)
self.loop = asyncio.get_running_loop() self.loop = asyncio.get_running_loop()
@ -172,6 +174,7 @@ class BaseCrawlManager(ABC):
"aid": str(crawlconfig.aid), "aid": str(crawlconfig.aid),
"job_image": self.job_image, "job_image": self.job_image,
"manual": "1" if manual else "0", "manual": "1" if manual else "0",
"crawler_node_type": self.crawler_node_type,
"schedule": schedule, "schedule": schedule,
} }

View File

@ -21,6 +21,17 @@ spec:
btrix.crawlconfig: {{ cid }} btrix.crawlconfig: {{ cid }}
spec: spec:
restartPolicy: OnFailure restartPolicy: OnFailure
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
preference:
matchExpressions:
- key: nodeType
operator: In
values:
- "{{ crawler_node_type }}"
containers: containers:
- name: crawl-job - name: crawl-job
image: {{ job_image }} image: {{ job_image }}

View File

@ -89,6 +89,27 @@ spec:
# - name: redis-conf # - name: redis-conf
# mountPath: /redis-conf # mountPath: /redis-conf
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
preference:
matchExpressions:
- key: nodeType
operator: In
values:
- "{{ redis_node_type }}"
podAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 2
podAffinityTerm:
topologyKey: "failure-domain.beta.kubernetes.io/zone"
labelSelector:
matchLabels:
job-name: job-{{ id }}
crawl: {{ id }}
containers: containers:
- name: redis - name: redis
image: {{ redis_image }} image: {{ redis_image }}
@ -195,6 +216,27 @@ spec:
configMap: configMap:
name: crawl-config-{{ cid }} name: crawl-config-{{ cid }}
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
preference:
matchExpressions:
- key: nodeType
operator: In
values:
- "{{ crawler_node_type }}"
podAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 2
podAffinityTerm:
topologyKey: "failure-domain.beta.kubernetes.io/zone"
labelSelector:
matchLabels:
job-name: job-{{ id }}
crawl: {{ id }}
containers: containers:
- name: crawler - name: crawler
image: {{ crawler_image }} image: {{ crawler_image }}

View File

@ -21,7 +21,12 @@ def run_swarm_stack(name, data):
fh_io.flush() fh_io.flush()
try: try:
docker.stack.deploy(name, compose_files=[fh_io.name], orchestrator="swarm", resolve_image="never") docker.stack.deploy(
name,
compose_files=[fh_io.name],
orchestrator="swarm",
resolve_image="never",
)
except DockerException as exc: except DockerException as exc:
print(exc, flush=True) print(exc, flush=True)

View File

@ -2,19 +2,27 @@ import requests
api_prefix = "http://127.0.0.1:9871/api" api_prefix = "http://127.0.0.1:9871/api"
def test_login_invalid(): def test_login_invalid():
username = "admin@example.com" username = "admin@example.com"
password = "invalid" password = "invalid"
r = requests.post(f"{api_prefix}/auth/jwt/login", data={"username": username, "password": password, "grant_type": "password"}) r = requests.post(
f"{api_prefix}/auth/jwt/login",
data={"username": username, "password": password, "grant_type": "password"},
)
data = r.json() data = r.json()
assert r.status_code == 400 assert r.status_code == 400
assert data["detail"] == "LOGIN_BAD_CREDENTIALS" assert data["detail"] == "LOGIN_BAD_CREDENTIALS"
def test_login(): def test_login():
username = "admin@example.com" username = "admin@example.com"
password = "PASSW0RD0" password = "PASSW0RD0"
r = requests.post(f"{api_prefix}/auth/jwt/login", data={"username": username, "password": password, "grant_type": "password"}) r = requests.post(
f"{api_prefix}/auth/jwt/login",
data={"username": username, "password": password, "grant_type": "password"},
)
data = r.json() data = r.json()
assert r.status_code == 200 assert r.status_code == 200

View File

@ -103,7 +103,8 @@ data:
crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}" crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}"
crawler_node_type: crawling crawler_node_type: "{{ .Values.crawler_node_type }}"
redis_node_type: "{{ .Values.redis_node_type }}"
redis.conf: | redis.conf: |
appendonly yes appendonly yes

View File

@ -1,5 +1,5 @@
redis_image: redis redis_image: redis
crawler_image: webrecorder/browsertrix-crawler:cloud crawler_image: localhost:5000/webrecorder/browsertrix-crawler:latest
crawler_requests_cpu: "0.8" crawler_requests_cpu: "0.8"
crawler_limits_cpu: "1.0" crawler_limits_cpu: "1.0"