make crawlTimeout a per-crawconfig property
allow crawl complete/partial complete to update existing crawl state, eg. timeout enable handling backofflimitexceeded / deadlineexceeded failure, with possible success able to override the failure state filter out only active jobs in running crawls listing
This commit is contained in:
parent
ed27f3e3ee
commit
20b19f932f
@ -75,7 +75,7 @@ class CrawlConfigIn(BaseModel):
|
|||||||
schedule: Optional[str] = ""
|
schedule: Optional[str] = ""
|
||||||
runNow: Optional[bool] = False
|
runNow: Optional[bool] = False
|
||||||
|
|
||||||
# storageName: Optional[str] = "default"
|
crawlTimeout: Optional[int] = 0
|
||||||
|
|
||||||
config: RawCrawlConfig
|
config: RawCrawlConfig
|
||||||
|
|
||||||
@ -93,6 +93,8 @@ class CrawlConfig(BaseMongoModel):
|
|||||||
|
|
||||||
config: RawCrawlConfig
|
config: RawCrawlConfig
|
||||||
|
|
||||||
|
crawlTimeout: Optional[int] = 0
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class CrawlOps:
|
class CrawlOps:
|
||||||
|
|||||||
@ -1,13 +1,13 @@
|
|||||||
""" Crawl API """
|
""" Crawl API """
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import traceback
|
|
||||||
|
|
||||||
from typing import Optional, List
|
from typing import Optional, List
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from fastapi import Depends, HTTPException
|
from fastapi import Depends, HTTPException
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
import pymongo
|
||||||
|
|
||||||
from db import BaseMongoModel
|
from db import BaseMongoModel
|
||||||
from archives import Archive
|
from archives import Archive
|
||||||
@ -74,11 +74,20 @@ class CrawlOps:
|
|||||||
print("Not a valid crawl complete msg!", flush=True)
|
print("Not a valid crawl complete msg!", flush=True)
|
||||||
return
|
return
|
||||||
|
|
||||||
await self.handle_finished(crawl)
|
await self.store_crawl(crawl, update_existing=True)
|
||||||
|
|
||||||
async def handle_finished(self, crawl: Crawl):
|
async def store_crawl(self, crawl: Crawl, update_existing=False):
|
||||||
""" Add finished crawl to db, increment archive usage """
|
""" Add finished crawl to db, increment archive usage """
|
||||||
|
if update_existing:
|
||||||
|
await self.crawls.find_one_and_replace(
|
||||||
|
{"_id": crawl.id}, crawl.to_dict(), upsert=True
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
await self.crawls.insert_one(crawl.to_dict())
|
await self.crawls.insert_one(crawl.to_dict())
|
||||||
|
except pymongo.errors.DuplicateKeyError:
|
||||||
|
print(f"Crawl Already Added: {crawl.id}")
|
||||||
|
return False
|
||||||
|
|
||||||
dura = int((crawl.finished - crawl.started).total_seconds())
|
dura = int((crawl.finished - crawl.started).total_seconds())
|
||||||
|
|
||||||
@ -150,7 +159,7 @@ def init_crawls_api(app, mdb, crawl_manager, archives):
|
|||||||
status_code=404, detail=f"Crawl not found: {crawl_id}"
|
status_code=404, detail=f"Crawl not found: {crawl_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
await ops.handle_finished(crawl)
|
await ops.store_crawl(crawl)
|
||||||
|
|
||||||
except HTTPException as httpe:
|
except HTTPException as httpe:
|
||||||
raise httpe
|
raise httpe
|
||||||
@ -182,7 +191,6 @@ def init_crawls_api(app, mdb, crawl_manager, archives):
|
|||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
# pylint: disable=raise-missing-from
|
# pylint: disable=raise-missing-from
|
||||||
traceback.print_exc()
|
|
||||||
raise HTTPException(status_code=400, detail=f"Error Stopping Crawl: {exc}")
|
raise HTTPException(status_code=400, detail=f"Error Stopping Crawl: {exc}")
|
||||||
|
|
||||||
return {"stopped_gracefully": True}
|
return {"stopped_gracefully": True}
|
||||||
|
|||||||
@ -37,17 +37,26 @@ class K8SManager:
|
|||||||
self.crawler_image = os.environ.get("CRAWLER_IMAGE")
|
self.crawler_image = os.environ.get("CRAWLER_IMAGE")
|
||||||
self.crawler_image_pull_policy = "IfNotPresent"
|
self.crawler_image_pull_policy = "IfNotPresent"
|
||||||
|
|
||||||
self.crawl_timeout = int(os.environ.get("CRAWL_TIMEOUT", "1000000"))
|
|
||||||
self.crawl_retries = int(os.environ.get("CRAWL_RETRIES", "3"))
|
self.crawl_retries = int(os.environ.get("CRAWL_RETRIES", "3"))
|
||||||
|
|
||||||
self.loop = asyncio.get_running_loop()
|
self.loop = asyncio.get_running_loop()
|
||||||
self.loop.create_task(self.watch_job_loop())
|
self.loop.create_task(self.run_event_loop())
|
||||||
|
|
||||||
def set_crawl_ops(self, ops):
|
def set_crawl_ops(self, ops):
|
||||||
""" Set crawl ops handler """
|
""" Set crawl ops handler """
|
||||||
self.crawl_ops = ops
|
self.crawl_ops = ops
|
||||||
|
|
||||||
async def watch_job_loop(self):
|
async def run_event_loop(self):
|
||||||
|
""" Run the job watch loop, retry in case of failure"""
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
await self.watch_events()
|
||||||
|
# pylint: disable=broad-except
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"Retrying job loop: {exc}")
|
||||||
|
await asyncio.sleep(10)
|
||||||
|
|
||||||
|
async def watch_events(self):
|
||||||
""" Get events for completed jobs"""
|
""" Get events for completed jobs"""
|
||||||
async with watch.Watch().stream(
|
async with watch.Watch().stream(
|
||||||
self.core_api.list_namespaced_event,
|
self.core_api.list_namespaced_event,
|
||||||
@ -62,12 +71,12 @@ class K8SManager:
|
|||||||
self.handle_crawl_failed(obj.involved_object.name, "failed")
|
self.handle_crawl_failed(obj.involved_object.name, "failed")
|
||||||
)
|
)
|
||||||
|
|
||||||
# elif obj.reason == "DeadlineExceeded":
|
elif obj.reason == "DeadlineExceeded":
|
||||||
# self.loop.create_task(
|
self.loop.create_task(
|
||||||
# self.handle_crawl_failed(
|
self.handle_crawl_failed(
|
||||||
# obj.involved_object.name, "timed_out"
|
obj.involved_object.name, "timed_out"
|
||||||
# )
|
)
|
||||||
# )
|
)
|
||||||
|
|
||||||
# pylint: disable=broad-except
|
# pylint: disable=broad-except
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
@ -131,7 +140,7 @@ class K8SManager:
|
|||||||
extra_crawl_params = extra_crawl_params or []
|
extra_crawl_params = extra_crawl_params or []
|
||||||
|
|
||||||
job_template = self._get_job_template(
|
job_template = self._get_job_template(
|
||||||
cid, labels, annotations, extra_crawl_params
|
cid, labels, annotations, crawlconfig.crawlTimeout, extra_crawl_params
|
||||||
)
|
)
|
||||||
|
|
||||||
spec = client.V1beta1CronJobSpec(
|
spec = client.V1beta1CronJobSpec(
|
||||||
@ -205,6 +214,15 @@ class K8SManager:
|
|||||||
cron_job.spec.suspend = suspend
|
cron_job.spec.suspend = suspend
|
||||||
changed = True
|
changed = True
|
||||||
|
|
||||||
|
if (
|
||||||
|
crawlconfig.crawlTimeout
|
||||||
|
!= cron_job.spec.job_template.spec.active_deadline_seconds
|
||||||
|
):
|
||||||
|
cron_job.spec.job_template.spec.active_deadline_seconds = (
|
||||||
|
crawlconfig.crawlTimeout
|
||||||
|
)
|
||||||
|
changed = True
|
||||||
|
|
||||||
if changed:
|
if changed:
|
||||||
cron_job.spec.job_template.metadata.annotations[
|
cron_job.spec.job_template.metadata.annotations[
|
||||||
"btrix.run.schedule"
|
"btrix.run.schedule"
|
||||||
@ -248,7 +266,11 @@ class K8SManager:
|
|||||||
field_selector="status.successful=0",
|
field_selector="status.successful=0",
|
||||||
)
|
)
|
||||||
|
|
||||||
return [self._make_crawl_for_job(job, "running") for job in jobs.items]
|
return [
|
||||||
|
self._make_crawl_for_job(job, "running")
|
||||||
|
for job in jobs.items
|
||||||
|
if job.status.active
|
||||||
|
]
|
||||||
|
|
||||||
async def validate_crawl_complete(self, crawlcomplete):
|
async def validate_crawl_complete(self, crawlcomplete):
|
||||||
"""Ensure the crawlcomplete data is valid (job exists and user matches)
|
"""Ensure the crawlcomplete data is valid (job exists and user matches)
|
||||||
@ -332,7 +354,7 @@ class K8SManager:
|
|||||||
|
|
||||||
crawl = self._make_crawl_for_job(job, reason, True)
|
crawl = self._make_crawl_for_job(job, reason, True)
|
||||||
|
|
||||||
await self.crawl_ops.handle_finished(crawl)
|
await self.crawl_ops.store_crawl(crawl)
|
||||||
|
|
||||||
await self._delete_job(job_name)
|
await self._delete_job(job_name)
|
||||||
|
|
||||||
@ -360,7 +382,7 @@ class K8SManager:
|
|||||||
await self.batch_api.delete_namespaced_job(
|
await self.batch_api.delete_namespaced_job(
|
||||||
name=name,
|
name=name,
|
||||||
namespace=self.namespace,
|
namespace=self.namespace,
|
||||||
grace_period_seconds=120,
|
grace_period_seconds=60,
|
||||||
propagation_policy="Foreground",
|
propagation_policy="Foreground",
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -474,7 +496,9 @@ class K8SManager:
|
|||||||
body=job, namespace=self.namespace
|
body=job, namespace=self.namespace
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_job_template(self, uid, labels, annotations, extra_crawl_params):
|
def _get_job_template(
|
||||||
|
self, uid, labels, annotations, crawl_timeout, extra_crawl_params
|
||||||
|
):
|
||||||
"""Return crawl job template for crawl job, including labels, adding optiona crawl params"""
|
"""Return crawl job template for crawl job, including labels, adding optiona crawl params"""
|
||||||
|
|
||||||
command = ["crawl", "--config", "/tmp/crawl-config.json"]
|
command = ["crawl", "--config", "/tmp/crawl-config.json"]
|
||||||
@ -556,7 +580,7 @@ class K8SManager:
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.crawl_timeout > 0:
|
if crawl_timeout > 0:
|
||||||
job_template["spec"]["activeDeadlineSeconds"] = self.crawl_timeout
|
job_template["spec"]["activeDeadlineSeconds"] = crawl_timeout
|
||||||
|
|
||||||
return job_template
|
return job_template
|
||||||
|
|||||||
@ -41,9 +41,6 @@ crawler_pull_policy: "Never"
|
|||||||
|
|
||||||
crawler_namespace: "crawlers"
|
crawler_namespace: "crawlers"
|
||||||
|
|
||||||
# set 0 to disable timeout
|
|
||||||
crawl_timeout: 0
|
|
||||||
|
|
||||||
# num retries
|
# num retries
|
||||||
crawl_retries: 1
|
crawl_retries: 1
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user