better resources scaling by number of browsers per crawler container (#1103)
- set crawler cpu / memory with fixed base + incremental bumps based on number of browsers - allow parsing k8s quantities with parse_quantity, compute in operator - set 'crawler_cpu = crawler_cpu_base + crawler_extra_cpu_per_browser * (num_browsers - 1)' and same for memory
This commit is contained in:
parent
8850e35f7a
commit
dce1ae6129
@ -14,6 +14,8 @@ import humanize
|
|||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from kubernetes.utils import parse_quantity
|
||||||
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
from_k8s_date,
|
from_k8s_date,
|
||||||
to_k8s_date,
|
to_k8s_date,
|
||||||
@ -137,6 +139,29 @@ class BtrixOperator(K8sAPI):
|
|||||||
with open(self.config_file, encoding="utf-8") as fh_config:
|
with open(self.config_file, encoding="utf-8") as fh_config:
|
||||||
self.shared_params = yaml.safe_load(fh_config)
|
self.shared_params = yaml.safe_load(fh_config)
|
||||||
|
|
||||||
|
self.compute_crawler_resources()
|
||||||
|
|
||||||
|
def compute_crawler_resources(self):
|
||||||
|
"""compute memory / cpu resources for crawlers"""
|
||||||
|
# pylint: disable=invalid-name
|
||||||
|
p = self.shared_params
|
||||||
|
num = max(int(p["crawler_browser_instances"]) - 1, 0)
|
||||||
|
if not p.get("crawler_cpu"):
|
||||||
|
base = parse_quantity(p["crawler_cpu_base"])
|
||||||
|
extra = parse_quantity(p["crawler_extra_cpu_per_browser"])
|
||||||
|
|
||||||
|
p["crawler_cpu"] = float(base + num * extra)
|
||||||
|
|
||||||
|
print(f"cpu = {base} + {num} * {extra} = {p['crawler_cpu']}")
|
||||||
|
|
||||||
|
if not p.get("crawler_memory"):
|
||||||
|
base = parse_quantity(p["crawler_memory_base"])
|
||||||
|
extra = parse_quantity(p["crawler_extra_memory_per_browser"])
|
||||||
|
|
||||||
|
p["crawler_memory"] = float(base + num * extra)
|
||||||
|
|
||||||
|
print(f"memory = {base} + {num} * {extra} = {p['crawler_memory']}")
|
||||||
|
|
||||||
async def sync_profile_browsers(self, data: MCSyncData):
|
async def sync_profile_browsers(self, data: MCSyncData):
|
||||||
"""sync profile browsers"""
|
"""sync profile browsers"""
|
||||||
spec = data.parent.get("spec", {})
|
spec = data.parent.get("spec", {})
|
||||||
|
|||||||
@ -39,7 +39,7 @@ spec:
|
|||||||
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
storage: {{ requests_hd }}
|
storage: {{ crawler_storage }}
|
||||||
|
|
||||||
{% if volume_storage_class %}
|
{% if volume_storage_class %}
|
||||||
storageClassName: {{ volume_storage_class }}
|
storageClassName: {{ volume_storage_class }}
|
||||||
|
|||||||
@ -39,7 +39,7 @@ spec:
|
|||||||
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
storage: 1Gi
|
storage: {{ redis_storage }}
|
||||||
|
|
||||||
{% if volume_storage_class %}
|
{% if volume_storage_class %}
|
||||||
storageClassName: {{ volume_storage_class }}
|
storageClassName: {{ volume_storage_class }}
|
||||||
|
|||||||
@ -3,7 +3,8 @@ fastapi==0.71.0
|
|||||||
fastapi-users[mongodb]==9.2.2
|
fastapi-users[mongodb]==9.2.2
|
||||||
loguru
|
loguru
|
||||||
aiofiles
|
aiofiles
|
||||||
kubernetes-asyncio==22.6.5
|
kubernetes-asyncio==25.11.0
|
||||||
|
kubernetes
|
||||||
aiobotocore
|
aiobotocore
|
||||||
redis>=5.0.0
|
redis>=5.0.0
|
||||||
pyyaml
|
pyyaml
|
||||||
|
|||||||
@ -69,10 +69,6 @@ data:
|
|||||||
namespace: {{ .Values.crawler_namespace }}
|
namespace: {{ .Values.crawler_namespace }}
|
||||||
termination_grace_secs: "{{ .Values.grace_period_secs | default 600 }}"
|
termination_grace_secs: "{{ .Values.grace_period_secs | default 600 }}"
|
||||||
|
|
||||||
volume_storage_class: "{{ .Values.volume_storage_class }}"
|
|
||||||
|
|
||||||
requests_hd: "{{ .Values.crawler_storage }}"
|
|
||||||
|
|
||||||
# redis
|
# redis
|
||||||
redis_image: {{ .Values.redis_image }}
|
redis_image: {{ .Values.redis_image }}
|
||||||
redis_image_pull_policy: {{ .Values.redis_pull_policy }}
|
redis_image_pull_policy: {{ .Values.redis_pull_policy }}
|
||||||
@ -81,14 +77,26 @@ data:
|
|||||||
|
|
||||||
redis_memory: "{{ .Values.redis_memory }}"
|
redis_memory: "{{ .Values.redis_memory }}"
|
||||||
|
|
||||||
|
redis_storage: "{{ .Values.redis_storage }}"
|
||||||
|
|
||||||
# crawler
|
# crawler
|
||||||
crawler_image: {{ .Values.crawler_image }}
|
crawler_image: {{ .Values.crawler_image }}
|
||||||
crawler_image_pull_policy: {{ .Values.crawler_pull_policy }}
|
crawler_image_pull_policy: {{ .Values.crawler_pull_policy }}
|
||||||
|
|
||||||
crawler_cpu: "{{ mul .Values.crawler_cpu_per_browser .Values.crawler_browser_instances }}m"
|
crawler_cpu_base: "{{ .Values.crawler_cpu_base }}"
|
||||||
|
crawler_memory_base: "{{ .Values.crawler_memory_base }}"
|
||||||
|
|
||||||
crawler_memory: "{{ mul .Values.crawler_memory_per_browser .Values.crawler_browser_instances }}Mi"
|
crawler_extra_cpu_per_browser: "{{ .Values.crawler_extra_cpu_per_browser | default 0 }}"
|
||||||
|
crawler_extra_memory_per_browser: "{{ .Values.crawler_extra_memory_per_browser | default 0 }}"
|
||||||
|
|
||||||
|
crawler_browser_instances: "{{ .Values.crawler_browser_instances }}"
|
||||||
|
|
||||||
|
crawler_cpu: "{{ .Values.crawler_cpu }}"
|
||||||
|
crawler_memory: "{{ .Values.crawler_memory }}"
|
||||||
|
|
||||||
|
crawler_storage: "{{ .Values.crawler_storage }}"
|
||||||
|
|
||||||
|
volume_storage_class: "{{ .Values.volume_storage_class }}"
|
||||||
|
|
||||||
crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}"
|
crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}"
|
||||||
|
|
||||||
|
|||||||
@ -117,9 +117,9 @@ profile_browser_idle_seconds: 60
|
|||||||
frontend_image: "docker.io/webrecorder/browsertrix-frontend:1.7.0-beta.0"
|
frontend_image: "docker.io/webrecorder/browsertrix-frontend:1.7.0-beta.0"
|
||||||
frontend_pull_policy: "Always"
|
frontend_pull_policy: "Always"
|
||||||
|
|
||||||
frontend_cpu: "5m"
|
frontend_cpu: "10m"
|
||||||
|
|
||||||
frontend_memory: "36Mi"
|
frontend_memory: "64Mi"
|
||||||
|
|
||||||
# if set, maps nginx to a fixed port on host machine
|
# if set, maps nginx to a fixed port on host machine
|
||||||
# must be between 30000 - 32767
|
# must be between 30000 - 32767
|
||||||
@ -165,6 +165,9 @@ redis_cpu: "5m"
|
|||||||
redis_memory: "48Mi"
|
redis_memory: "48Mi"
|
||||||
|
|
||||||
|
|
||||||
|
redis_storage: "1Gi"
|
||||||
|
|
||||||
|
|
||||||
# Crawler Image
|
# Crawler Image
|
||||||
# =========================================
|
# =========================================
|
||||||
|
|
||||||
@ -180,15 +183,34 @@ crawler_namespace: "crawlers"
|
|||||||
# num retries
|
# num retries
|
||||||
crawl_retries: 1000
|
crawl_retries: 1000
|
||||||
|
|
||||||
|
# Crawler Resources
|
||||||
|
# -----------------
|
||||||
|
|
||||||
|
# base cpu for for 1 browser
|
||||||
|
crawler_cpu_base: 900m
|
||||||
|
|
||||||
|
# base memory per for 1 browser
|
||||||
|
crawler_memory_base: 1024Mi
|
||||||
|
|
||||||
|
# number of browsers per crawler instances
|
||||||
crawler_browser_instances: 2
|
crawler_browser_instances: 2
|
||||||
|
|
||||||
# note: the following values are multipled by 'crawler_browser_instances' to get final value
|
# this value is added to crawler_cpu_base, for each additional browser
|
||||||
|
# crawler_cpu = crawler_cpu_base + crawler_pu_per_extra_browser * (crawler_browser_instances - 1)
|
||||||
|
crawler_extra_cpu_per_browser: 300m
|
||||||
|
|
||||||
# this value is an integer in 'm' (millicpu) units, multiplied by 'crawler_browser_instances'
|
crawler_extra_memory_per_browser: 256Mi
|
||||||
crawler_cpu_per_browser: 650
|
|
||||||
|
|
||||||
# this value is an integer in 'Mi' (Megabyte) units, multiplied by 'crawler_browser_instances'
|
# if not set, defaults to the following, but can be overridden directly:
|
||||||
crawler_memory_per_browser: 675
|
# crawler_cpu = crawler_cpu_base + crawler_cpu_per_extra_browser * (crawler_browser_instances - 1)
|
||||||
|
# crawler_cpu:
|
||||||
|
|
||||||
|
# if not set, defaults to the following, but can be overridden directly:
|
||||||
|
# crawler_memory = crawler_memory_base + crawler_memory_per_extra_browser * (crawler_browser_instances - 1)
|
||||||
|
# crawler_memory:
|
||||||
|
|
||||||
|
# Other Crawler Settings
|
||||||
|
# ----------------------
|
||||||
|
|
||||||
# minimum size allocated to each crawler
|
# minimum size allocated to each crawler
|
||||||
# should be at least double crawl session size to ensure space for WACZ
|
# should be at least double crawl session size to ensure space for WACZ
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user