better resources scaling by number of browsers per crawler container (#1103)
- set crawler cpu / memory with fixed base + incremental bumps based on number of browsers - allow parsing k8s quantities with parse_quantity, compute in operator - set 'crawler_cpu = crawler_cpu_base + crawler_extra_cpu_per_browser * (num_browsers - 1)' and same for memory
This commit is contained in:
parent
8850e35f7a
commit
dce1ae6129
@ -14,6 +14,8 @@ import humanize
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from kubernetes.utils import parse_quantity
|
||||
|
||||
from .utils import (
|
||||
from_k8s_date,
|
||||
to_k8s_date,
|
||||
@ -137,6 +139,29 @@ class BtrixOperator(K8sAPI):
|
||||
with open(self.config_file, encoding="utf-8") as fh_config:
|
||||
self.shared_params = yaml.safe_load(fh_config)
|
||||
|
||||
self.compute_crawler_resources()
|
||||
|
||||
def compute_crawler_resources(self):
|
||||
"""compute memory / cpu resources for crawlers"""
|
||||
# pylint: disable=invalid-name
|
||||
p = self.shared_params
|
||||
num = max(int(p["crawler_browser_instances"]) - 1, 0)
|
||||
if not p.get("crawler_cpu"):
|
||||
base = parse_quantity(p["crawler_cpu_base"])
|
||||
extra = parse_quantity(p["crawler_extra_cpu_per_browser"])
|
||||
|
||||
p["crawler_cpu"] = float(base + num * extra)
|
||||
|
||||
print(f"cpu = {base} + {num} * {extra} = {p['crawler_cpu']}")
|
||||
|
||||
if not p.get("crawler_memory"):
|
||||
base = parse_quantity(p["crawler_memory_base"])
|
||||
extra = parse_quantity(p["crawler_extra_memory_per_browser"])
|
||||
|
||||
p["crawler_memory"] = float(base + num * extra)
|
||||
|
||||
print(f"memory = {base} + {num} * {extra} = {p['crawler_memory']}")
|
||||
|
||||
async def sync_profile_browsers(self, data: MCSyncData):
|
||||
"""sync profile browsers"""
|
||||
spec = data.parent.get("spec", {})
|
||||
|
@ -39,7 +39,7 @@ spec:
|
||||
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ requests_hd }}
|
||||
storage: {{ crawler_storage }}
|
||||
|
||||
{% if volume_storage_class %}
|
||||
storageClassName: {{ volume_storage_class }}
|
||||
|
@ -39,7 +39,7 @@ spec:
|
||||
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
storage: {{ redis_storage }}
|
||||
|
||||
{% if volume_storage_class %}
|
||||
storageClassName: {{ volume_storage_class }}
|
||||
|
@ -3,7 +3,8 @@ fastapi==0.71.0
|
||||
fastapi-users[mongodb]==9.2.2
|
||||
loguru
|
||||
aiofiles
|
||||
kubernetes-asyncio==22.6.5
|
||||
kubernetes-asyncio==25.11.0
|
||||
kubernetes
|
||||
aiobotocore
|
||||
redis>=5.0.0
|
||||
pyyaml
|
||||
|
@ -69,10 +69,6 @@ data:
|
||||
namespace: {{ .Values.crawler_namespace }}
|
||||
termination_grace_secs: "{{ .Values.grace_period_secs | default 600 }}"
|
||||
|
||||
volume_storage_class: "{{ .Values.volume_storage_class }}"
|
||||
|
||||
requests_hd: "{{ .Values.crawler_storage }}"
|
||||
|
||||
# redis
|
||||
redis_image: {{ .Values.redis_image }}
|
||||
redis_image_pull_policy: {{ .Values.redis_pull_policy }}
|
||||
@ -81,14 +77,26 @@ data:
|
||||
|
||||
redis_memory: "{{ .Values.redis_memory }}"
|
||||
|
||||
redis_storage: "{{ .Values.redis_storage }}"
|
||||
|
||||
# crawler
|
||||
crawler_image: {{ .Values.crawler_image }}
|
||||
crawler_image_pull_policy: {{ .Values.crawler_pull_policy }}
|
||||
|
||||
crawler_cpu: "{{ mul .Values.crawler_cpu_per_browser .Values.crawler_browser_instances }}m"
|
||||
crawler_cpu_base: "{{ .Values.crawler_cpu_base }}"
|
||||
crawler_memory_base: "{{ .Values.crawler_memory_base }}"
|
||||
|
||||
crawler_memory: "{{ mul .Values.crawler_memory_per_browser .Values.crawler_browser_instances }}Mi"
|
||||
crawler_extra_cpu_per_browser: "{{ .Values.crawler_extra_cpu_per_browser | default 0 }}"
|
||||
crawler_extra_memory_per_browser: "{{ .Values.crawler_extra_memory_per_browser | default 0 }}"
|
||||
|
||||
crawler_browser_instances: "{{ .Values.crawler_browser_instances }}"
|
||||
|
||||
crawler_cpu: "{{ .Values.crawler_cpu }}"
|
||||
crawler_memory: "{{ .Values.crawler_memory }}"
|
||||
|
||||
crawler_storage: "{{ .Values.crawler_storage }}"
|
||||
|
||||
volume_storage_class: "{{ .Values.volume_storage_class }}"
|
||||
|
||||
crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}"
|
||||
|
||||
|
@ -117,9 +117,9 @@ profile_browser_idle_seconds: 60
|
||||
frontend_image: "docker.io/webrecorder/browsertrix-frontend:1.7.0-beta.0"
|
||||
frontend_pull_policy: "Always"
|
||||
|
||||
frontend_cpu: "5m"
|
||||
frontend_cpu: "10m"
|
||||
|
||||
frontend_memory: "36Mi"
|
||||
frontend_memory: "64Mi"
|
||||
|
||||
# if set, maps nginx to a fixed port on host machine
|
||||
# must be between 30000 - 32767
|
||||
@ -165,6 +165,9 @@ redis_cpu: "5m"
|
||||
redis_memory: "48Mi"
|
||||
|
||||
|
||||
redis_storage: "1Gi"
|
||||
|
||||
|
||||
# Crawler Image
|
||||
# =========================================
|
||||
|
||||
@ -180,15 +183,34 @@ crawler_namespace: "crawlers"
|
||||
# num retries
|
||||
crawl_retries: 1000
|
||||
|
||||
# Crawler Resources
|
||||
# -----------------
|
||||
|
||||
# base cpu for for 1 browser
|
||||
crawler_cpu_base: 900m
|
||||
|
||||
# base memory per for 1 browser
|
||||
crawler_memory_base: 1024Mi
|
||||
|
||||
# number of browsers per crawler instances
|
||||
crawler_browser_instances: 2
|
||||
|
||||
# note: the following values are multipled by 'crawler_browser_instances' to get final value
|
||||
# this value is added to crawler_cpu_base, for each additional browser
|
||||
# crawler_cpu = crawler_cpu_base + crawler_pu_per_extra_browser * (crawler_browser_instances - 1)
|
||||
crawler_extra_cpu_per_browser: 300m
|
||||
|
||||
# this value is an integer in 'm' (millicpu) units, multiplied by 'crawler_browser_instances'
|
||||
crawler_cpu_per_browser: 650
|
||||
crawler_extra_memory_per_browser: 256Mi
|
||||
|
||||
# this value is an integer in 'Mi' (Megabyte) units, multiplied by 'crawler_browser_instances'
|
||||
crawler_memory_per_browser: 675
|
||||
# if not set, defaults to the following, but can be overridden directly:
|
||||
# crawler_cpu = crawler_cpu_base + crawler_cpu_per_extra_browser * (crawler_browser_instances - 1)
|
||||
# crawler_cpu:
|
||||
|
||||
# if not set, defaults to the following, but can be overridden directly:
|
||||
# crawler_memory = crawler_memory_base + crawler_memory_per_extra_browser * (crawler_browser_instances - 1)
|
||||
# crawler_memory:
|
||||
|
||||
# Other Crawler Settings
|
||||
# ----------------------
|
||||
|
||||
# minimum size allocated to each crawler
|
||||
# should be at least double crawl session size to ensure space for WACZ
|
||||
|
Loading…
Reference in New Issue
Block a user