concurrent crawls: filter concurrent crawls check (#2701)

ensure concurrent crawls check only counts running or waiting crawls only, not all existing crawljobs
This commit is contained in:
Ilya Kreymer 2025-07-03 09:57:07 -07:00 committed by GitHub
parent 5b4fee73e6
commit 8152223750
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -19,7 +19,6 @@ from btrixcloud.models import (
TYPE_NON_RUNNING_STATES, TYPE_NON_RUNNING_STATES,
TYPE_RUNNING_STATES, TYPE_RUNNING_STATES,
TYPE_ALL_CRAWL_STATES, TYPE_ALL_CRAWL_STATES,
NON_RUNNING_STATES,
RUNNING_STATES, RUNNING_STATES,
WAITING_STATES, WAITING_STATES,
RUNNING_AND_STARTING_ONLY, RUNNING_AND_STARTING_ONLY,
@ -757,22 +756,22 @@ class CrawlOperator(BaseOperator):
if not max_crawls: if not max_crawls:
return True return True
if len(data.related[CJS]) <= max_crawls:
return True
name = data.parent.get("metadata", {}).get("name") name = data.parent.get("metadata", {}).get("name")
i = 0 active_crawls = 0
for crawl_sorted in data.related[CJS].values(): for crawl_sorted in data.related[CJS].values():
if crawl_sorted.get("status", {}).get("state") in NON_RUNNING_STATES: crawl_state = crawl_sorted.get("status", {}).get("state", "")
# don't count ourselves
if crawl_sorted.get("metadata", {}).get("name") == name:
continue continue
if crawl_sorted.get("metadata").get("name") == name: if crawl_state in RUNNING_AND_WAITING_STATES:
if i < max_crawls: active_crawls += 1
return True
break if active_crawls <= max_crawls:
i += 1 return True
await self.set_state( await self.set_state(
"waiting_org_limit", status, crawl, allowed_from=["starting"] "waiting_org_limit", status, crawl, allowed_from=["starting"]