From ec74eb42428d24e41dd17bd83174de8ed76d7124 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 24 Apr 2024 15:16:32 +0200 Subject: [PATCH] operator: add 'max_crawler_memory' to limit autosizing of crawler pods (#1746) Adds a `max_crawler_memory` chart setting, which, if set, will defines the upper crawler memory limit that crawler pods can be resized up to. If not set, auto resizing is disabled and pods are always set to 'crawler_memory' memory --- backend/btrixcloud/operator/baseoperator.py | 15 +++++++++++++++ backend/btrixcloud/operator/crawls.py | 7 +++++++ chart/templates/configmap.yaml | 2 ++ chart/values.yaml | 5 +++++ 4 files changed, 29 insertions(+) diff --git a/backend/btrixcloud/operator/baseoperator.py b/backend/btrixcloud/operator/baseoperator.py index 9a3c67db..4ff6300d 100644 --- a/backend/btrixcloud/operator/baseoperator.py +++ b/backend/btrixcloud/operator/baseoperator.py @@ -1,6 +1,7 @@ """ Base Operator class for all operators """ import asyncio +import os from typing import TYPE_CHECKING from kubernetes.utils import parse_quantity @@ -28,6 +29,9 @@ else: class K8sOpAPI(K8sAPI): """Additional k8s api for operators""" + has_pod_metrics: bool + max_crawler_memory_size: int + def __init__(self): super().__init__() self.config_file = "/config/config.yaml" @@ -38,6 +42,8 @@ class K8sOpAPI(K8sAPI): self.compute_crawler_resources() self.compute_profile_resources() + self.max_crawler_memory_size = 0 + def compute_crawler_resources(self): """compute memory / cpu resources for crawlers""" p = self.shared_params @@ -69,6 +75,15 @@ class K8sOpAPI(K8sAPI): crawler_memory = int(parse_quantity(p["crawler_memory"])) print(f"memory = {crawler_memory}") + max_crawler_memory_size = 0 + max_crawler_memory = os.environ.get("MAX_CRAWLER_MEMORY") + if max_crawler_memory: + max_crawler_memory_size = int(parse_quantity(max_crawler_memory_size)) + + self.max_crawler_memory_size = max_crawler_memory_size or crawler_memory + + print("max crawler memory size", self.max_crawler_memory_size) + p["crawler_cpu"] = crawler_cpu p["crawler_memory"] = crawler_memory diff --git a/backend/btrixcloud/operator/crawls.py b/backend/btrixcloud/operator/crawls.py index 3bb2c11c..95f657b5 100644 --- a/backend/btrixcloud/operator/crawls.py +++ b/backend/btrixcloud/operator/crawls.py @@ -1033,6 +1033,13 @@ class CrawlOperator(BaseOperator): # if pod is using >MEM_SCALE_UP_THRESHOLD of its memory, increase mem if mem_usage > MEM_SCALE_UP_THRESHOLD: + if new_memory > self.k8s.max_crawler_memory_size: + print( + f"Mem {mem_usage}: Not resizing pod {name}: " + + f"mem {new_memory} > max allowed {self.k8s.max_crawler_memory_size}" + ) + return + pod.newMemory = new_memory print( f"Mem {mem_usage}: Resizing pod {name} -> mem {pod.newMemory} - Scale Up" diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index ea0b067e..ffde193e 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -56,6 +56,8 @@ data: MIN_QA_CRAWLER_IMAGE: "{{ .Values.min_qa_crawler_image }}" + MAX_CRAWLER_MEMORY: "{{ .Values.max_crawler_memory }}" + --- apiVersion: v1 kind: ConfigMap diff --git a/chart/values.yaml b/chart/values.yaml index e509912c..fe97ed32 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -249,6 +249,11 @@ crawler_extra_memory_per_browser: 768Mi # crawler_memory = crawler_memory_base + crawler_memory_per_extra_browser * (crawler_browser_instances - 1) # crawler_memory: + +# max crawler memory, if set, will enable auto-resizing of crawler pods up to this size +# if not set, no auto-resizing is done, and crawls always use 'crawler_memory' memory +# max_crawler_memory: + # optional: defaults to crawler_memory_base and crawler_cpu_base if not set # profile_browser_memory: #