misc tweaks:
- better error handling for not found resources, ensure 404 - typo in k8smanager - add pylintrc - ensure manual job ares deleted when complete - fix typos, reformat
This commit is contained in:
parent
f1a816be48
commit
223658cfa2
@ -141,10 +141,11 @@ class CrawlOps:
|
||||
|
||||
async def update_crawl_schedule(self, cid: str, update: UpdateSchedule):
|
||||
""" Update schedule for existing crawl config"""
|
||||
|
||||
if not await self.crawl_configs.find_one_and_update(
|
||||
{"_id": cid}, {"$set": {"schedule": update.schedule}}
|
||||
):
|
||||
return None
|
||||
return False
|
||||
|
||||
await self.crawl_manager.update_crawl_schedule(cid, update.schedule)
|
||||
return True
|
||||
@ -222,11 +223,9 @@ def init_crawl_config_api(mdb, user_dep, archive_ops, crawl_manager):
|
||||
cid: str,
|
||||
):
|
||||
|
||||
success = False
|
||||
try:
|
||||
if not await ops.update_crawl_schedule(cid, update):
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Crawl Config '{cid}' not found"
|
||||
)
|
||||
success = await ops.update_crawl_schedule(cid, update)
|
||||
|
||||
except Exception as e:
|
||||
# pylint: disable=raise-missing-from
|
||||
@ -234,6 +233,11 @@ def init_crawl_config_api(mdb, user_dep, archive_ops, crawl_manager):
|
||||
status_code=403, detail=f"Error updating crawl config: {e}"
|
||||
)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Crawl Config '{cid}' not found"
|
||||
)
|
||||
|
||||
return {"updated": cid}
|
||||
|
||||
@router.post("/{cid}/run")
|
||||
@ -265,7 +269,9 @@ def init_crawl_config_api(mdb, user_dep, archive_ops, crawl_manager):
|
||||
):
|
||||
result = await ops.delete_crawl_config(cid, archive)
|
||||
if not result or not result.deleted_count:
|
||||
raise HTTPException(status_code=404, detail="Crawl Config Not Found")
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Crawl Config '{cid}' Not Found"
|
||||
)
|
||||
|
||||
return {"deleted": 1}
|
||||
|
||||
|
@ -125,7 +125,7 @@ def init_crawls_api(app, mdb, crawl_manager, crawl_config_ops, archives):
|
||||
|
||||
archive_crawl_dep = archives.archive_crawl_dep
|
||||
|
||||
@app.post("/crawls/done", tags=["crawls"])
|
||||
@app.post("/_crawls/done", tags=["_internal"])
|
||||
async def crawl_done(msg: CrawlCompleteIn):
|
||||
loop = asyncio.get_running_loop()
|
||||
loop.create_task(ops.on_handle_crawl_complete(msg))
|
||||
@ -152,25 +152,22 @@ def init_crawls_api(app, mdb, crawl_manager, crawl_config_ops, archives):
|
||||
"/archives/{aid}/crawls/{crawl_id}/cancel",
|
||||
tags=["crawls"],
|
||||
)
|
||||
async def crawl_cancel_stop(
|
||||
async def crawl_cancel_immediately(
|
||||
crawl_id, archive: Archive = Depends(archive_crawl_dep)
|
||||
):
|
||||
crawl = None
|
||||
try:
|
||||
crawl = await crawl_manager.stop_crawl(crawl_id, archive.id, graceful=False)
|
||||
if not crawl:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Crawl not found: {crawl_id}"
|
||||
)
|
||||
|
||||
await ops.store_crawl(crawl)
|
||||
|
||||
except HTTPException as httpe:
|
||||
raise httpe
|
||||
|
||||
except Exception as exc:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise HTTPException(status_code=400, detail=f"Error Canceling Crawl: {exc}")
|
||||
|
||||
if not crawl:
|
||||
raise HTTPException(status_code=404, detail=f"Crawl not found: {crawl_id}")
|
||||
|
||||
await ops.store_crawl(crawl)
|
||||
|
||||
return {"canceled": True}
|
||||
|
||||
@app.post(
|
||||
@ -180,27 +177,33 @@ def init_crawls_api(app, mdb, crawl_manager, crawl_config_ops, archives):
|
||||
async def crawl_graceful_stop(
|
||||
crawl_id, archive: Archive = Depends(archive_crawl_dep)
|
||||
):
|
||||
canceled = False
|
||||
try:
|
||||
canceled = await crawl_manager.stop_crawl(
|
||||
crawl_id, archive.id, graceful=True
|
||||
)
|
||||
if not canceled:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Crawl not found: {crawl_id}"
|
||||
)
|
||||
|
||||
except HTTPException as httpe:
|
||||
raise httpe
|
||||
|
||||
except Exception as exc:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise HTTPException(status_code=400, detail=f"Error Stopping Crawl: {exc}")
|
||||
|
||||
if not canceled:
|
||||
raise HTTPException(status_code=404, detail=f"Crawl not found: {crawl_id}")
|
||||
|
||||
return {"stopped_gracefully": True}
|
||||
|
||||
@app.post("/archives/{aid}/crawls/delete", tags=["crawls"])
|
||||
async def delete_crawls(
|
||||
delete_list: DeleteCrawlList, archive: Archive = Depends(archive_crawl_dep)
|
||||
):
|
||||
try:
|
||||
for crawl_id in delete_list:
|
||||
await crawl_manager.stop_crawl(crawl_id, archive.id, graceful=False)
|
||||
|
||||
except Exception as exc:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise HTTPException(status_code=400, detail=f"Error Stopping Crawl: {exc}")
|
||||
|
||||
res = await ops.delete_crawls(archive.id, delete_list)
|
||||
|
||||
return {"deleted": res}
|
||||
|
@ -109,6 +109,7 @@ class DockerManager:
|
||||
timeout = int(container["Labels"]["btrix.timeout"])
|
||||
actual = int(time.time()) - int(container["Created"])
|
||||
if actual >= timeout:
|
||||
# pylint: disable=line-too-long
|
||||
print(
|
||||
f"Crawl {container['Id']} running for {actual} seconds, exceeded timeout {timeout}, stopping..."
|
||||
)
|
||||
@ -181,18 +182,27 @@ class DockerManager:
|
||||
|
||||
async def stop_crawl(self, crawl_id, aid, graceful=True):
|
||||
""" Stop crawl, if not graceful, issue SIGUSR1 to indicate cancelation """
|
||||
container = await self.client.containers.get(crawl_id)
|
||||
|
||||
if container["Config"]["Labels"]["btrix.archive"] != aid:
|
||||
return None
|
||||
result = None
|
||||
|
||||
if not graceful:
|
||||
await container.kill(signal="SIGUSR1")
|
||||
result = self._make_crawl_for_container(container, "canceled", True)
|
||||
else:
|
||||
result = True
|
||||
try:
|
||||
container = await self.client.containers.get(crawl_id)
|
||||
|
||||
await container.kill(signal="SIGTERM")
|
||||
if container["Config"]["Labels"]["btrix.archive"] != aid:
|
||||
return None
|
||||
|
||||
if not graceful:
|
||||
await container.kill(signal="SIGUSR1")
|
||||
result = self._make_crawl_for_container(container, "canceled", True)
|
||||
else:
|
||||
result = True
|
||||
|
||||
await container.kill(signal="SIGTERM")
|
||||
except aiodocker.exceptions.DockerError as exc:
|
||||
if exc.status == 404:
|
||||
return None
|
||||
|
||||
raise exc
|
||||
|
||||
return result
|
||||
|
||||
@ -351,7 +361,7 @@ class DockerManager:
|
||||
f"STORE_ENDPOINT_URL={endpoint_with_coll_url}",
|
||||
f"STORE_ACCESS_KEY={storage.access_key}",
|
||||
f"STORE_SECRET_KEY={storage.secret_key}",
|
||||
"WEBHOOK_URL=http://backend:8000/crawls/done",
|
||||
"WEBHOOK_URL=http://backend:8000/_crawls/done",
|
||||
]
|
||||
|
||||
labels["btrix.run.schedule"] = schedule
|
||||
|
@ -123,7 +123,7 @@ class K8SManager:
|
||||
"STORE_ENDPOINT_URL": endpoint_with_coll_url,
|
||||
"STORE_ACCESS_KEY": storage.access_key,
|
||||
"STORE_SECRET_KEY": storage.secret_key,
|
||||
"WEBHOOK_URL": "http://browsertrix-cloud.default:8000/crawls/done",
|
||||
"WEBHOOK_URL": "http://browsertrix-cloud.default:8000/_crawls/done",
|
||||
},
|
||||
)
|
||||
|
||||
@ -169,7 +169,7 @@ class K8SManager:
|
||||
|
||||
return cron_job
|
||||
|
||||
async def update_crawl_config(self, cid, schedule):
|
||||
async def update_crawl_schedule(self, cid, schedule):
|
||||
""" Update the schedule for existing crawl config """
|
||||
|
||||
cron_jobs = await self.batch_beta_api.list_namespaced_cron_job(
|
||||
@ -195,15 +195,12 @@ class K8SManager:
|
||||
name=cron_job.metadata.name, namespace=self.namespace, body=cron_job
|
||||
)
|
||||
|
||||
async def run_crawl_config(self, cid, manual=True, schedule=""):
|
||||
async def run_crawl_config(self, cid):
|
||||
""" Run crawl job for cron job based on specified crawlconfig id (cid) """
|
||||
cron_jobs = await self.batch_beta_api.list_namespaced_cron_job(
|
||||
namespace=self.namespace, label_selector=f"btrix.crawlconfig={cid}"
|
||||
)
|
||||
|
||||
if not manual or schedule:
|
||||
raise Exception("Manual trigger not supported")
|
||||
|
||||
if len(cron_jobs.items) != 1:
|
||||
raise Exception("Crawl Config Not Found")
|
||||
|
||||
@ -245,8 +242,8 @@ class K8SManager:
|
||||
return None
|
||||
|
||||
manual = job.metadata.annotations.get("btrix.run.manual") == "1"
|
||||
if not manual:
|
||||
await self._delete_job(job.metadata.name)
|
||||
if manual:
|
||||
self.loop.create_task(self._delete_job(job.metadata.name))
|
||||
|
||||
return self._make_crawl_for_job(
|
||||
job,
|
||||
@ -426,6 +423,7 @@ class K8SManager:
|
||||
"""Create new job from cron job to run instantly"""
|
||||
annotations = cron_job.spec.job_template.metadata.annotations
|
||||
annotations["btrix.run.manual"] = "1"
|
||||
annotations["btrix.run.schedule"] = ""
|
||||
|
||||
# owner_ref = client.V1OwnerReference(
|
||||
# kind="CronJob",
|
||||
|
Loading…
Reference in New Issue
Block a user