diff --git a/backend/btrixcloud/operator/crawls.py b/backend/btrixcloud/operator/crawls.py index 5434439e..fa3132a0 100644 --- a/backend/btrixcloud/operator/crawls.py +++ b/backend/btrixcloud/operator/crawls.py @@ -19,6 +19,7 @@ from btrixcloud.models import ( TYPE_NON_RUNNING_STATES, TYPE_RUNNING_STATES, TYPE_ALL_CRAWL_STATES, + NON_RUNNING_STATES, RUNNING_STATES, WAITING_STATES, RUNNING_AND_STARTING_ONLY, @@ -664,7 +665,7 @@ class CrawlOperator(BaseOperator): the following state transitions are supported: from starting to org concurrent crawl limit and back: - - starting -> waiting_org_capacity -> starting + - starting -> waiting_org_limit -> starting from starting to running: - starting -> running @@ -756,22 +757,27 @@ class CrawlOperator(BaseOperator): if not max_crawls: return True + # if total crawls < concurrent, always allow, no need to check further + if len(data.related[CJS]) <= max_crawls: + return True + name = data.parent.get("metadata", {}).get("name") - active_crawls = 0 - + # assume crawls already sorted from oldest to newest + # (seems to be the case always) + i = 0 for crawl_sorted in data.related[CJS].values(): - crawl_state = crawl_sorted.get("status", {}).get("state", "") - - # don't count ourselves - if crawl_sorted.get("metadata", {}).get("name") == name: + # if crawl not running, don't count + if crawl_sorted.get("status", {}).get("state") in NON_RUNNING_STATES: continue - if crawl_state in RUNNING_AND_WAITING_STATES: - active_crawls += 1 + # if reached current crawl, if did not reach crawl quota, allow current crawl to run + if crawl_sorted.get("metadata").get("name") == name: + if i < max_crawls: + return True - if active_crawls <= max_crawls: - return True + break + i += 1 await self.set_state( "waiting_org_limit", status, crawl, allowed_from=["starting"]