allow disabling of auto-resize of crawler pods (#1964)
- only enable if 'enable_auto_resize' is true, default to false - if true, set memory limit to 1.2 of memory requests, resize when hitting 'soft oom' of initial request, adjust by 1.2 (current behavior) up to max_crawler_memory - if false, set memory limit to max_crawler_memory and never adjust memory requests or memory limits - part of #1959
This commit is contained in:
parent
a8c5f07b7c
commit
01ddf95a56
@ -7,6 +7,7 @@ from kubernetes.utils import parse_quantity
|
|||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
from btrixcloud.k8sapi import K8sAPI
|
from btrixcloud.k8sapi import K8sAPI
|
||||||
|
from btrixcloud.utils import is_bool
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@ -30,6 +31,7 @@ class K8sOpAPI(K8sAPI):
|
|||||||
"""Additional k8s api for operators"""
|
"""Additional k8s api for operators"""
|
||||||
|
|
||||||
has_pod_metrics: bool
|
has_pod_metrics: bool
|
||||||
|
enable_auto_resize: bool
|
||||||
max_crawler_memory_size: int
|
max_crawler_memory_size: int
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -39,6 +41,7 @@ class K8sOpAPI(K8sAPI):
|
|||||||
self.shared_params = yaml.safe_load(fh_config)
|
self.shared_params = yaml.safe_load(fh_config)
|
||||||
|
|
||||||
self.has_pod_metrics = False
|
self.has_pod_metrics = False
|
||||||
|
self.enable_auto_resize = False
|
||||||
self.max_crawler_memory_size = 0
|
self.max_crawler_memory_size = 0
|
||||||
|
|
||||||
self.compute_crawler_resources()
|
self.compute_crawler_resources()
|
||||||
@ -127,6 +130,11 @@ class K8sOpAPI(K8sAPI):
|
|||||||
self.has_pod_metrics = await self.is_pod_metrics_available()
|
self.has_pod_metrics = await self.is_pod_metrics_available()
|
||||||
print("Pod Metrics Available:", self.has_pod_metrics)
|
print("Pod Metrics Available:", self.has_pod_metrics)
|
||||||
|
|
||||||
|
self.enable_auto_resize = self.has_pod_metrics and is_bool(
|
||||||
|
os.environ.get("ENABLE_AUTO_RESIZE_CRAWLERS")
|
||||||
|
)
|
||||||
|
print("Auto-Resize Enabled", self.enable_auto_resize)
|
||||||
|
|
||||||
|
|
||||||
# pylint: disable=too-many-instance-attributes, too-many-arguments
|
# pylint: disable=too-many-instance-attributes, too-many-arguments
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
@ -29,11 +29,7 @@ from btrixcloud.models import (
|
|||||||
StorageRef,
|
StorageRef,
|
||||||
)
|
)
|
||||||
|
|
||||||
from btrixcloud.utils import (
|
from btrixcloud.utils import from_k8s_date, to_k8s_date, dt_now
|
||||||
from_k8s_date,
|
|
||||||
to_k8s_date,
|
|
||||||
dt_now,
|
|
||||||
)
|
|
||||||
|
|
||||||
from .baseoperator import BaseOperator, Redis
|
from .baseoperator import BaseOperator, Redis
|
||||||
from .models import (
|
from .models import (
|
||||||
@ -84,6 +80,13 @@ MEM_LIMIT_PADDING = 1.2
|
|||||||
class CrawlOperator(BaseOperator):
|
class CrawlOperator(BaseOperator):
|
||||||
"""CrawlOperator Handler"""
|
"""CrawlOperator Handler"""
|
||||||
|
|
||||||
|
done_key: str
|
||||||
|
pages_key: str
|
||||||
|
errors_key: str
|
||||||
|
|
||||||
|
fast_retry_secs: int
|
||||||
|
log_failed_crawl_lines: int
|
||||||
|
|
||||||
def __init__(self, *args):
|
def __init__(self, *args):
|
||||||
super().__init__(*args)
|
super().__init__(*args)
|
||||||
|
|
||||||
@ -221,8 +224,7 @@ class CrawlOperator(BaseOperator):
|
|||||||
data.related.get(METRICS, {}),
|
data.related.get(METRICS, {}),
|
||||||
)
|
)
|
||||||
|
|
||||||
# auto-scaling not possible without pod metrics
|
if self.k8s.enable_auto_resize:
|
||||||
if self.k8s.has_pod_metrics:
|
|
||||||
# auto sizing handled here
|
# auto sizing handled here
|
||||||
await self.handle_auto_size(status.podStatus)
|
await self.handle_auto_size(status.podStatus)
|
||||||
|
|
||||||
@ -377,7 +379,10 @@ class CrawlOperator(BaseOperator):
|
|||||||
params["priorityClassName"] = pri_class
|
params["priorityClassName"] = pri_class
|
||||||
params["cpu"] = pod_info.newCpu or params.get(cpu_field)
|
params["cpu"] = pod_info.newCpu or params.get(cpu_field)
|
||||||
params["memory"] = pod_info.newMemory or params.get(mem_field)
|
params["memory"] = pod_info.newMemory or params.get(mem_field)
|
||||||
params["memory_limit"] = float(params["memory"]) * MEM_LIMIT_PADDING
|
if self.k8s.enable_auto_resize:
|
||||||
|
params["memory_limit"] = float(params["memory"]) * MEM_LIMIT_PADDING
|
||||||
|
else:
|
||||||
|
params["memory_limit"] = self.k8s.max_crawler_memory_size
|
||||||
params["workers"] = params.get(worker_field) or 1
|
params["workers"] = params.get(worker_field) or 1
|
||||||
params["do_restart"] = (
|
params["do_restart"] = (
|
||||||
pod_info.should_restart_pod() or params.get("force_restart")
|
pod_info.should_restart_pod() or params.get("force_restart")
|
||||||
@ -555,7 +560,7 @@ class CrawlOperator(BaseOperator):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
if self.k8s.has_pod_metrics:
|
if self.k8s.enable_auto_resize:
|
||||||
related_resources.append(
|
related_resources.append(
|
||||||
{
|
{
|
||||||
"apiVersion": METRICS_API,
|
"apiVersion": METRICS_API,
|
||||||
@ -1072,7 +1077,7 @@ class CrawlOperator(BaseOperator):
|
|||||||
pod_info.used.storage = storage
|
pod_info.used.storage = storage
|
||||||
|
|
||||||
# if no pod metrics, get memory estimate from redis itself
|
# if no pod metrics, get memory estimate from redis itself
|
||||||
if not self.k8s.has_pod_metrics:
|
if not self.k8s.enable_auto_resize:
|
||||||
stats = await redis.info("memory")
|
stats = await redis.info("memory")
|
||||||
pod_info.used.memory = int(stats.get("used_memory_rss", 0))
|
pod_info.used.memory = int(stats.get("used_memory_rss", 0))
|
||||||
|
|
||||||
|
@ -58,6 +58,8 @@ data:
|
|||||||
|
|
||||||
MAX_CRAWLER_MEMORY: "{{ .Values.max_crawler_memory }}"
|
MAX_CRAWLER_MEMORY: "{{ .Values.max_crawler_memory }}"
|
||||||
|
|
||||||
|
ENABLE_AUTO_RESIZE_CRAWLERS: "{{ .Values.enable_auto_resize_crawlers }}"
|
||||||
|
|
||||||
BILLING_ENABLED: "{{ .Values.billing_enabled }}"
|
BILLING_ENABLED: "{{ .Values.billing_enabled }}"
|
||||||
|
|
||||||
SALES_EMAIL: "{{ .Values.sales_email }}"
|
SALES_EMAIL: "{{ .Values.sales_email }}"
|
||||||
|
@ -262,6 +262,12 @@ crawler_extra_memory_per_browser: 768Mi
|
|||||||
# crawler_memory = crawler_memory_base + crawler_memory_per_extra_browser * (crawler_browser_instances - 1)
|
# crawler_memory = crawler_memory_base + crawler_memory_per_extra_browser * (crawler_browser_instances - 1)
|
||||||
# crawler_memory:
|
# crawler_memory:
|
||||||
|
|
||||||
|
# Crawler Autoscaling
|
||||||
|
# ---------------------
|
||||||
|
|
||||||
|
# if set to true, automatically adjust crawler memory usage up to max_crawler_memory
|
||||||
|
enable_auto_resize_crawlers: false
|
||||||
|
|
||||||
|
|
||||||
# max crawler memory, if set, will enable auto-resizing of crawler pods up to this size
|
# max crawler memory, if set, will enable auto-resizing of crawler pods up to this size
|
||||||
# if not set, no auto-resizing is done, and crawls always use 'crawler_memory' memory
|
# if not set, no auto-resizing is done, and crawls always use 'crawler_memory' memory
|
||||||
|
Loading…
Reference in New Issue
Block a user