concurrent crawls: filter concurrent crawls check (#2701)

ensure concurrent crawls check only counts running or waiting crawls only, not all existing crawljobs
This commit is contained in:
Ilya Kreymer 2025-07-03 09:57:07 -07:00 committed by GitHub
parent 5b4fee73e6
commit 8152223750
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -19,7 +19,6 @@ from btrixcloud.models import (
TYPE_NON_RUNNING_STATES,
TYPE_RUNNING_STATES,
TYPE_ALL_CRAWL_STATES,
NON_RUNNING_STATES,
RUNNING_STATES,
WAITING_STATES,
RUNNING_AND_STARTING_ONLY,
@ -757,22 +756,22 @@ class CrawlOperator(BaseOperator):
if not max_crawls:
return True
if len(data.related[CJS]) <= max_crawls:
return True
name = data.parent.get("metadata", {}).get("name")
i = 0
active_crawls = 0
for crawl_sorted in data.related[CJS].values():
if crawl_sorted.get("status", {}).get("state") in NON_RUNNING_STATES:
crawl_state = crawl_sorted.get("status", {}).get("state", "")
# don't count ourselves
if crawl_sorted.get("metadata", {}).get("name") == name:
continue
if crawl_sorted.get("metadata").get("name") == name:
if i < max_crawls:
return True
if crawl_state in RUNNING_AND_WAITING_STATES:
active_crawls += 1
break
i += 1
if active_crawls <= max_crawls:
return True
await self.set_state(
"waiting_org_limit", status, crawl, allowed_from=["starting"]