allow disabling of auto-resize of crawler pods (#1964)

- only enable if 'enable_auto_resize' is true, default to false
- if true, set memory limit to 1.2 of memory requests, resize when
hitting 'soft oom' of initial request, adjust by 1.2 (current behavior)
up to max_crawler_memory
- if false, set memory limit to max_crawler_memory and never adjust
memory requests or memory limits
- part of #1959
This commit is contained in:
Ilya Kreymer 2024-07-23 18:00:40 -07:00 committed by GitHub
parent a8c5f07b7c
commit 01ddf95a56
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 31 additions and 10 deletions

View File

@ -7,6 +7,7 @@ from kubernetes.utils import parse_quantity
import yaml
from btrixcloud.k8sapi import K8sAPI
from btrixcloud.utils import is_bool
if TYPE_CHECKING:
@ -30,6 +31,7 @@ class K8sOpAPI(K8sAPI):
"""Additional k8s api for operators"""
has_pod_metrics: bool
enable_auto_resize: bool
max_crawler_memory_size: int
def __init__(self):
@ -39,6 +41,7 @@ class K8sOpAPI(K8sAPI):
self.shared_params = yaml.safe_load(fh_config)
self.has_pod_metrics = False
self.enable_auto_resize = False
self.max_crawler_memory_size = 0
self.compute_crawler_resources()
@ -127,6 +130,11 @@ class K8sOpAPI(K8sAPI):
self.has_pod_metrics = await self.is_pod_metrics_available()
print("Pod Metrics Available:", self.has_pod_metrics)
self.enable_auto_resize = self.has_pod_metrics and is_bool(
os.environ.get("ENABLE_AUTO_RESIZE_CRAWLERS")
)
print("Auto-Resize Enabled", self.enable_auto_resize)
# pylint: disable=too-many-instance-attributes, too-many-arguments
# ============================================================================

View File

@ -29,11 +29,7 @@ from btrixcloud.models import (
StorageRef,
)
from btrixcloud.utils import (
from_k8s_date,
to_k8s_date,
dt_now,
)
from btrixcloud.utils import from_k8s_date, to_k8s_date, dt_now
from .baseoperator import BaseOperator, Redis
from .models import (
@ -84,6 +80,13 @@ MEM_LIMIT_PADDING = 1.2
class CrawlOperator(BaseOperator):
"""CrawlOperator Handler"""
done_key: str
pages_key: str
errors_key: str
fast_retry_secs: int
log_failed_crawl_lines: int
def __init__(self, *args):
super().__init__(*args)
@ -221,8 +224,7 @@ class CrawlOperator(BaseOperator):
data.related.get(METRICS, {}),
)
# auto-scaling not possible without pod metrics
if self.k8s.has_pod_metrics:
if self.k8s.enable_auto_resize:
# auto sizing handled here
await self.handle_auto_size(status.podStatus)
@ -377,7 +379,10 @@ class CrawlOperator(BaseOperator):
params["priorityClassName"] = pri_class
params["cpu"] = pod_info.newCpu or params.get(cpu_field)
params["memory"] = pod_info.newMemory or params.get(mem_field)
params["memory_limit"] = float(params["memory"]) * MEM_LIMIT_PADDING
if self.k8s.enable_auto_resize:
params["memory_limit"] = float(params["memory"]) * MEM_LIMIT_PADDING
else:
params["memory_limit"] = self.k8s.max_crawler_memory_size
params["workers"] = params.get(worker_field) or 1
params["do_restart"] = (
pod_info.should_restart_pod() or params.get("force_restart")
@ -555,7 +560,7 @@ class CrawlOperator(BaseOperator):
},
]
if self.k8s.has_pod_metrics:
if self.k8s.enable_auto_resize:
related_resources.append(
{
"apiVersion": METRICS_API,
@ -1072,7 +1077,7 @@ class CrawlOperator(BaseOperator):
pod_info.used.storage = storage
# if no pod metrics, get memory estimate from redis itself
if not self.k8s.has_pod_metrics:
if not self.k8s.enable_auto_resize:
stats = await redis.info("memory")
pod_info.used.memory = int(stats.get("used_memory_rss", 0))

View File

@ -58,6 +58,8 @@ data:
MAX_CRAWLER_MEMORY: "{{ .Values.max_crawler_memory }}"
ENABLE_AUTO_RESIZE_CRAWLERS: "{{ .Values.enable_auto_resize_crawlers }}"
BILLING_ENABLED: "{{ .Values.billing_enabled }}"
SALES_EMAIL: "{{ .Values.sales_email }}"

View File

@ -262,6 +262,12 @@ crawler_extra_memory_per_browser: 768Mi
# crawler_memory = crawler_memory_base + crawler_memory_per_extra_browser * (crawler_browser_instances - 1)
# crawler_memory:
# Crawler Autoscaling
# ---------------------
# if set to true, automatically adjust crawler memory usage up to max_crawler_memory
enable_auto_resize_crawlers: false
# max crawler memory, if set, will enable auto-resizing of crawler pods up to this size
# if not set, no auto-resizing is done, and crawls always use 'crawler_memory' memory