Allow custom auto-resize crawler volume ratio adjustable (#2076)
Make the avail / used storage ratio (for crawler volumes) adjustable. Disable auto-resize if set to 0. Follow-up to #2023
This commit is contained in:
parent
49ce894353
commit
1f919de294
@ -76,9 +76,6 @@ MEM_SOFT_OOM_THRESHOLD = 1.0
|
|||||||
# set memory limit to this much of request for extra padding
|
# set memory limit to this much of request for extra padding
|
||||||
MEM_LIMIT_PADDING = 1.2
|
MEM_LIMIT_PADDING = 1.2
|
||||||
|
|
||||||
# ensure available storage is at least this much times used storage
|
|
||||||
AVAIL_STORAGE_RATIO = 2.5
|
|
||||||
|
|
||||||
|
|
||||||
# pylint: disable=too-many-public-methods, too-many-locals, too-many-branches, too-many-statements
|
# pylint: disable=too-many-public-methods, too-many-locals, too-many-branches, too-many-statements
|
||||||
# pylint: disable=invalid-name, too-many-lines, too-many-return-statements
|
# pylint: disable=invalid-name, too-many-lines, too-many-return-statements
|
||||||
@ -93,6 +90,8 @@ class CrawlOperator(BaseOperator):
|
|||||||
fast_retry_secs: int
|
fast_retry_secs: int
|
||||||
log_failed_crawl_lines: int
|
log_failed_crawl_lines: int
|
||||||
|
|
||||||
|
min_avail_storage_ratio: float
|
||||||
|
|
||||||
def __init__(self, *args):
|
def __init__(self, *args):
|
||||||
super().__init__(*args)
|
super().__init__(*args)
|
||||||
|
|
||||||
@ -104,6 +103,11 @@ class CrawlOperator(BaseOperator):
|
|||||||
|
|
||||||
self.log_failed_crawl_lines = int(os.environ.get("LOG_FAILED_CRAWL_LINES") or 0)
|
self.log_failed_crawl_lines = int(os.environ.get("LOG_FAILED_CRAWL_LINES") or 0)
|
||||||
|
|
||||||
|
# ensure available storage is at least this much times used storage
|
||||||
|
self.min_avail_storage_ratio = float(
|
||||||
|
os.environ.get("CRAWLER_MIN_AVAIL_STORAGE_RATIO") or 0
|
||||||
|
)
|
||||||
|
|
||||||
def init_routes(self, app):
|
def init_routes(self, app):
|
||||||
"""init routes for this operator"""
|
"""init routes for this operator"""
|
||||||
|
|
||||||
@ -1336,12 +1340,15 @@ class CrawlOperator(BaseOperator):
|
|||||||
|
|
||||||
if (
|
if (
|
||||||
status.state == "running"
|
status.state == "running"
|
||||||
|
and self.min_avail_storage_ratio
|
||||||
and pod_info.allocated.storage
|
and pod_info.allocated.storage
|
||||||
and pod_info.used.storage * AVAIL_STORAGE_RATIO
|
and pod_info.used.storage * self.min_avail_storage_ratio
|
||||||
> pod_info.allocated.storage
|
> pod_info.allocated.storage
|
||||||
):
|
):
|
||||||
new_storage = math.ceil(
|
new_storage = math.ceil(
|
||||||
pod_info.used.storage * AVAIL_STORAGE_RATIO / 1_000_000_000
|
pod_info.used.storage
|
||||||
|
* self.min_avail_storage_ratio
|
||||||
|
/ 1_000_000_000
|
||||||
)
|
)
|
||||||
pod_info.newStorage = f"{new_storage}Gi"
|
pod_info.newStorage = f"{new_storage}Gi"
|
||||||
print(
|
print(
|
||||||
|
@ -60,6 +60,8 @@ data:
|
|||||||
|
|
||||||
MAX_CRAWLER_MEMORY: "{{ .Values.max_crawler_memory }}"
|
MAX_CRAWLER_MEMORY: "{{ .Values.max_crawler_memory }}"
|
||||||
|
|
||||||
|
CRAWLER_MIN_AVAIL_STORAGE_RATIO: "{{ .Values.crawler_min_avail_storage_ratio }}"
|
||||||
|
|
||||||
ENABLE_AUTO_RESIZE_CRAWLERS: "{{ .Values.enable_auto_resize_crawlers }}"
|
ENABLE_AUTO_RESIZE_CRAWLERS: "{{ .Values.enable_auto_resize_crawlers }}"
|
||||||
|
|
||||||
BILLING_ENABLED: "{{ .Values.billing_enabled }}"
|
BILLING_ENABLED: "{{ .Values.billing_enabled }}"
|
||||||
|
@ -75,7 +75,7 @@ allow_dupe_invites: "0"
|
|||||||
invite_expire_seconds: 604800
|
invite_expire_seconds: 604800
|
||||||
|
|
||||||
# base url for replayweb.page
|
# base url for replayweb.page
|
||||||
rwp_base_url: "https://cdn.jsdelivr.net/npm/replaywebpage@1.8.15/"
|
rwp_base_url: "https://cdn.jsdelivr.net/npm/replaywebpage@2.1.4/"
|
||||||
|
|
||||||
superuser:
|
superuser:
|
||||||
# set this to enable a superuser admin
|
# set this to enable a superuser admin
|
||||||
@ -288,12 +288,19 @@ enable_auto_resize_crawlers: false
|
|||||||
# the workdir is used to store the browser profile data and other temporary files
|
# the workdir is used to store the browser profile data and other temporary files
|
||||||
# profile_browser_workdir_size: 4Gi
|
# profile_browser_workdir_size: 4Gi
|
||||||
|
|
||||||
|
|
||||||
# Other Crawler Settings
|
# Other Crawler Settings
|
||||||
# ----------------------
|
# ----------------------
|
||||||
|
|
||||||
# minimum size allocated to each crawler
|
# minimum size allocated to each crawler
|
||||||
# should be at least double crawl session size to ensure space for WACZ and browser profile data
|
# should be at least double crawl session size to ensure space for WACZ and browser profile data
|
||||||
crawler_storage: "26Gi"
|
crawler_storage: "25Gi"
|
||||||
|
|
||||||
|
|
||||||
|
# if set, will ensure 'crawler_storage' is at least this times used storage
|
||||||
|
# eg. if crawler session reaches 10Gb, and this value is 2.5, will attempt
|
||||||
|
# to resize to at least 25Gb.
|
||||||
|
crawler_min_avail_storage_ratio: 2.5
|
||||||
|
|
||||||
# max size at which crawler will commit current crawl session
|
# max size at which crawler will commit current crawl session
|
||||||
crawler_session_size_limit_bytes: "10000000000"
|
crawler_session_size_limit_bytes: "10000000000"
|
||||||
|
Loading…
Reference in New Issue
Block a user