better resources scaling by number of browsers per crawler container (#1103)

- set crawler cpu / memory with fixed base + incremental bumps based on number of browsers
- allow parsing k8s quantities with parse_quantity, compute in operator
- set 'crawler_cpu = crawler_cpu_base + crawler_extra_cpu_per_browser * (num_browsers - 1)'
and same for memory
This commit is contained in:
Ilya Kreymer 2023-09-06 01:42:44 -04:00 committed by GitHub
parent 8850e35f7a
commit dce1ae6129
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 72 additions and 16 deletions

View File

@ -14,6 +14,8 @@ import humanize
from pydantic import BaseModel
from kubernetes.utils import parse_quantity
from .utils import (
from_k8s_date,
to_k8s_date,
@ -137,6 +139,29 @@ class BtrixOperator(K8sAPI):
with open(self.config_file, encoding="utf-8") as fh_config:
self.shared_params = yaml.safe_load(fh_config)
self.compute_crawler_resources()
def compute_crawler_resources(self):
"""compute memory / cpu resources for crawlers"""
# pylint: disable=invalid-name
p = self.shared_params
num = max(int(p["crawler_browser_instances"]) - 1, 0)
if not p.get("crawler_cpu"):
base = parse_quantity(p["crawler_cpu_base"])
extra = parse_quantity(p["crawler_extra_cpu_per_browser"])
p["crawler_cpu"] = float(base + num * extra)
print(f"cpu = {base} + {num} * {extra} = {p['crawler_cpu']}")
if not p.get("crawler_memory"):
base = parse_quantity(p["crawler_memory_base"])
extra = parse_quantity(p["crawler_extra_memory_per_browser"])
p["crawler_memory"] = float(base + num * extra)
print(f"memory = {base} + {num} * {extra} = {p['crawler_memory']}")
async def sync_profile_browsers(self, data: MCSyncData):
"""sync profile browsers"""
spec = data.parent.get("spec", {})

View File

@ -39,7 +39,7 @@ spec:
resources:
requests:
storage: {{ requests_hd }}
storage: {{ crawler_storage }}
{% if volume_storage_class %}
storageClassName: {{ volume_storage_class }}

View File

@ -39,7 +39,7 @@ spec:
resources:
requests:
storage: 1Gi
storage: {{ redis_storage }}
{% if volume_storage_class %}
storageClassName: {{ volume_storage_class }}

View File

@ -3,7 +3,8 @@ fastapi==0.71.0
fastapi-users[mongodb]==9.2.2
loguru
aiofiles
kubernetes-asyncio==22.6.5
kubernetes-asyncio==25.11.0
kubernetes
aiobotocore
redis>=5.0.0
pyyaml

View File

@ -69,10 +69,6 @@ data:
namespace: {{ .Values.crawler_namespace }}
termination_grace_secs: "{{ .Values.grace_period_secs | default 600 }}"
volume_storage_class: "{{ .Values.volume_storage_class }}"
requests_hd: "{{ .Values.crawler_storage }}"
# redis
redis_image: {{ .Values.redis_image }}
redis_image_pull_policy: {{ .Values.redis_pull_policy }}
@ -81,14 +77,26 @@ data:
redis_memory: "{{ .Values.redis_memory }}"
redis_storage: "{{ .Values.redis_storage }}"
# crawler
crawler_image: {{ .Values.crawler_image }}
crawler_image_pull_policy: {{ .Values.crawler_pull_policy }}
crawler_cpu: "{{ mul .Values.crawler_cpu_per_browser .Values.crawler_browser_instances }}m"
crawler_cpu_base: "{{ .Values.crawler_cpu_base }}"
crawler_memory_base: "{{ .Values.crawler_memory_base }}"
crawler_memory: "{{ mul .Values.crawler_memory_per_browser .Values.crawler_browser_instances }}Mi"
crawler_extra_cpu_per_browser: "{{ .Values.crawler_extra_cpu_per_browser | default 0 }}"
crawler_extra_memory_per_browser: "{{ .Values.crawler_extra_memory_per_browser | default 0 }}"
crawler_browser_instances: "{{ .Values.crawler_browser_instances }}"
crawler_cpu: "{{ .Values.crawler_cpu }}"
crawler_memory: "{{ .Values.crawler_memory }}"
crawler_storage: "{{ .Values.crawler_storage }}"
volume_storage_class: "{{ .Values.volume_storage_class }}"
crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}"

View File

@ -117,9 +117,9 @@ profile_browser_idle_seconds: 60
frontend_image: "docker.io/webrecorder/browsertrix-frontend:1.7.0-beta.0"
frontend_pull_policy: "Always"
frontend_cpu: "5m"
frontend_cpu: "10m"
frontend_memory: "36Mi"
frontend_memory: "64Mi"
# if set, maps nginx to a fixed port on host machine
# must be between 30000 - 32767
@ -165,6 +165,9 @@ redis_cpu: "5m"
redis_memory: "48Mi"
redis_storage: "1Gi"
# Crawler Image
# =========================================
@ -180,15 +183,34 @@ crawler_namespace: "crawlers"
# num retries
crawl_retries: 1000
# Crawler Resources
# -----------------
# base cpu for for 1 browser
crawler_cpu_base: 900m
# base memory per for 1 browser
crawler_memory_base: 1024Mi
# number of browsers per crawler instances
crawler_browser_instances: 2
# note: the following values are multipled by 'crawler_browser_instances' to get final value
# this value is added to crawler_cpu_base, for each additional browser
# crawler_cpu = crawler_cpu_base + crawler_pu_per_extra_browser * (crawler_browser_instances - 1)
crawler_extra_cpu_per_browser: 300m
# this value is an integer in 'm' (millicpu) units, multiplied by 'crawler_browser_instances'
crawler_cpu_per_browser: 650
crawler_extra_memory_per_browser: 256Mi
# this value is an integer in 'Mi' (Megabyte) units, multiplied by 'crawler_browser_instances'
crawler_memory_per_browser: 675
# if not set, defaults to the following, but can be overridden directly:
# crawler_cpu = crawler_cpu_base + crawler_cpu_per_extra_browser * (crawler_browser_instances - 1)
# crawler_cpu:
# if not set, defaults to the following, but can be overridden directly:
# crawler_memory = crawler_memory_base + crawler_memory_per_extra_browser * (crawler_browser_instances - 1)
# crawler_memory:
# Other Crawler Settings
# ----------------------
# minimum size allocated to each crawler
# should be at least double crawl session size to ensure space for WACZ