backend: Fix for total crawl time limit. (#665)

* backend: fix for total crawl timelimit:
- time limit is computed for total job run time
- when limit is exceeded, job starts to stop crawls gracefully, equivalent to 'stop crawl' operation
- fix for #664

* rename crawl-timeout -> crawl_expire_time

* fix lint
This commit is contained in:
Ilya Kreymer 2023-03-10 11:43:16 -08:00 committed by GitHub
parent 8ca4276c57
commit 86ca9c4bac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 26 additions and 1 deletions

View File

@ -57,6 +57,10 @@ class CrawlJob(ABC):
self.storage_path = os.environ.get("STORE_PATH")
self.storage_name = os.environ.get("STORAGE_NAME")
self.crawl_expire_time = os.environ.get("CRAWL_EXPIRE_TIME")
if self.crawl_expire_time:
self.crawl_expire_time = datetime.fromisoformat(self.crawl_expire_time)
self.last_done = None
self.last_found = None
self.redis = None
@ -141,7 +145,7 @@ class CrawlJob(ABC):
# check crawl status
await self.check_crawl_status()
# pylint: disable=broad-except
# pylint: disable=broad-except
except Exception as exc:
print(f"Retrying crawls done loop: {exc}")
await asyncio.sleep(10)
@ -174,6 +178,15 @@ class CrawlJob(ABC):
await self.delete_crawl()
# check crawl expiry
if self.crawl_expire_time and datetime.utcnow() > self.crawl_expire_time:
res = await self.graceful_shutdown()
if res.get("success"):
print(
"Job duration expired at {self.crawl_expire_time}, "
+ "gracefully stopping crawl"
)
async def delete_crawl(self):
"""delete crawl stateful sets, services and pvcs"""
self._delete_pending = True

View File

@ -178,12 +178,21 @@ class BaseCrawlManager(ABC):
return crawl_id
async def _load_job_template(self, crawlconfig, job_id, manual, schedule=None):
if crawlconfig.crawlTimeout:
crawl_expire_time = datetime.datetime.utcnow() + datetime.timedelta(
seconds=crawlconfig.crawlTimeout
)
crawl_expire_time = crawl_expire_time.isoformat()
else:
crawl_expire_time = ""
params = {
"id": job_id,
"cid": str(crawlconfig.id),
"rev": str(crawlconfig.rev),
"userid": str(crawlconfig.modifiedBy),
"oid": str(crawlconfig.oid),
"crawl_expire_time": crawl_expire_time,
"job_image": self.job_image,
"job_pull_policy": self.job_pull_policy,
"manual": "1" if manual else "0",

View File

@ -76,6 +76,9 @@ spec:
- name: TAGS
value: "{{ tags }}"
- name: CRAWL_EXPIRE_TIME
value: "{{ crawl_expire_time }}"
- name: STORE_PATH
valueFrom:
configMapKeyRef: