misc tweaks:
- better error handling for not found resources, ensure 404 - typo in k8smanager - add pylintrc - ensure manual job ares deleted when complete - fix typos, reformat
This commit is contained in:
parent
f1a816be48
commit
223658cfa2
@ -141,10 +141,11 @@ class CrawlOps:
|
|||||||
|
|
||||||
async def update_crawl_schedule(self, cid: str, update: UpdateSchedule):
|
async def update_crawl_schedule(self, cid: str, update: UpdateSchedule):
|
||||||
""" Update schedule for existing crawl config"""
|
""" Update schedule for existing crawl config"""
|
||||||
|
|
||||||
if not await self.crawl_configs.find_one_and_update(
|
if not await self.crawl_configs.find_one_and_update(
|
||||||
{"_id": cid}, {"$set": {"schedule": update.schedule}}
|
{"_id": cid}, {"$set": {"schedule": update.schedule}}
|
||||||
):
|
):
|
||||||
return None
|
return False
|
||||||
|
|
||||||
await self.crawl_manager.update_crawl_schedule(cid, update.schedule)
|
await self.crawl_manager.update_crawl_schedule(cid, update.schedule)
|
||||||
return True
|
return True
|
||||||
@ -222,11 +223,9 @@ def init_crawl_config_api(mdb, user_dep, archive_ops, crawl_manager):
|
|||||||
cid: str,
|
cid: str,
|
||||||
):
|
):
|
||||||
|
|
||||||
|
success = False
|
||||||
try:
|
try:
|
||||||
if not await ops.update_crawl_schedule(cid, update):
|
success = await ops.update_crawl_schedule(cid, update)
|
||||||
raise HTTPException(
|
|
||||||
status_code=404, detail=f"Crawl Config '{cid}' not found"
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# pylint: disable=raise-missing-from
|
# pylint: disable=raise-missing-from
|
||||||
@ -234,6 +233,11 @@ def init_crawl_config_api(mdb, user_dep, archive_ops, crawl_manager):
|
|||||||
status_code=403, detail=f"Error updating crawl config: {e}"
|
status_code=403, detail=f"Error updating crawl config: {e}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404, detail=f"Crawl Config '{cid}' not found"
|
||||||
|
)
|
||||||
|
|
||||||
return {"updated": cid}
|
return {"updated": cid}
|
||||||
|
|
||||||
@router.post("/{cid}/run")
|
@router.post("/{cid}/run")
|
||||||
@ -265,7 +269,9 @@ def init_crawl_config_api(mdb, user_dep, archive_ops, crawl_manager):
|
|||||||
):
|
):
|
||||||
result = await ops.delete_crawl_config(cid, archive)
|
result = await ops.delete_crawl_config(cid, archive)
|
||||||
if not result or not result.deleted_count:
|
if not result or not result.deleted_count:
|
||||||
raise HTTPException(status_code=404, detail="Crawl Config Not Found")
|
raise HTTPException(
|
||||||
|
status_code=404, detail=f"Crawl Config '{cid}' Not Found"
|
||||||
|
)
|
||||||
|
|
||||||
return {"deleted": 1}
|
return {"deleted": 1}
|
||||||
|
|
||||||
|
@ -125,7 +125,7 @@ def init_crawls_api(app, mdb, crawl_manager, crawl_config_ops, archives):
|
|||||||
|
|
||||||
archive_crawl_dep = archives.archive_crawl_dep
|
archive_crawl_dep = archives.archive_crawl_dep
|
||||||
|
|
||||||
@app.post("/crawls/done", tags=["crawls"])
|
@app.post("/_crawls/done", tags=["_internal"])
|
||||||
async def crawl_done(msg: CrawlCompleteIn):
|
async def crawl_done(msg: CrawlCompleteIn):
|
||||||
loop = asyncio.get_running_loop()
|
loop = asyncio.get_running_loop()
|
||||||
loop.create_task(ops.on_handle_crawl_complete(msg))
|
loop.create_task(ops.on_handle_crawl_complete(msg))
|
||||||
@ -152,25 +152,22 @@ def init_crawls_api(app, mdb, crawl_manager, crawl_config_ops, archives):
|
|||||||
"/archives/{aid}/crawls/{crawl_id}/cancel",
|
"/archives/{aid}/crawls/{crawl_id}/cancel",
|
||||||
tags=["crawls"],
|
tags=["crawls"],
|
||||||
)
|
)
|
||||||
async def crawl_cancel_stop(
|
async def crawl_cancel_immediately(
|
||||||
crawl_id, archive: Archive = Depends(archive_crawl_dep)
|
crawl_id, archive: Archive = Depends(archive_crawl_dep)
|
||||||
):
|
):
|
||||||
|
crawl = None
|
||||||
try:
|
try:
|
||||||
crawl = await crawl_manager.stop_crawl(crawl_id, archive.id, graceful=False)
|
crawl = await crawl_manager.stop_crawl(crawl_id, archive.id, graceful=False)
|
||||||
if not crawl:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404, detail=f"Crawl not found: {crawl_id}"
|
|
||||||
)
|
|
||||||
|
|
||||||
await ops.store_crawl(crawl)
|
|
||||||
|
|
||||||
except HTTPException as httpe:
|
|
||||||
raise httpe
|
|
||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
# pylint: disable=raise-missing-from
|
# pylint: disable=raise-missing-from
|
||||||
raise HTTPException(status_code=400, detail=f"Error Canceling Crawl: {exc}")
|
raise HTTPException(status_code=400, detail=f"Error Canceling Crawl: {exc}")
|
||||||
|
|
||||||
|
if not crawl:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Crawl not found: {crawl_id}")
|
||||||
|
|
||||||
|
await ops.store_crawl(crawl)
|
||||||
|
|
||||||
return {"canceled": True}
|
return {"canceled": True}
|
||||||
|
|
||||||
@app.post(
|
@app.post(
|
||||||
@ -180,27 +177,33 @@ def init_crawls_api(app, mdb, crawl_manager, crawl_config_ops, archives):
|
|||||||
async def crawl_graceful_stop(
|
async def crawl_graceful_stop(
|
||||||
crawl_id, archive: Archive = Depends(archive_crawl_dep)
|
crawl_id, archive: Archive = Depends(archive_crawl_dep)
|
||||||
):
|
):
|
||||||
|
canceled = False
|
||||||
try:
|
try:
|
||||||
canceled = await crawl_manager.stop_crawl(
|
canceled = await crawl_manager.stop_crawl(
|
||||||
crawl_id, archive.id, graceful=True
|
crawl_id, archive.id, graceful=True
|
||||||
)
|
)
|
||||||
if not canceled:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404, detail=f"Crawl not found: {crawl_id}"
|
|
||||||
)
|
|
||||||
|
|
||||||
except HTTPException as httpe:
|
|
||||||
raise httpe
|
|
||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
# pylint: disable=raise-missing-from
|
# pylint: disable=raise-missing-from
|
||||||
raise HTTPException(status_code=400, detail=f"Error Stopping Crawl: {exc}")
|
raise HTTPException(status_code=400, detail=f"Error Stopping Crawl: {exc}")
|
||||||
|
|
||||||
|
if not canceled:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Crawl not found: {crawl_id}")
|
||||||
|
|
||||||
return {"stopped_gracefully": True}
|
return {"stopped_gracefully": True}
|
||||||
|
|
||||||
@app.post("/archives/{aid}/crawls/delete", tags=["crawls"])
|
@app.post("/archives/{aid}/crawls/delete", tags=["crawls"])
|
||||||
async def delete_crawls(
|
async def delete_crawls(
|
||||||
delete_list: DeleteCrawlList, archive: Archive = Depends(archive_crawl_dep)
|
delete_list: DeleteCrawlList, archive: Archive = Depends(archive_crawl_dep)
|
||||||
):
|
):
|
||||||
|
try:
|
||||||
|
for crawl_id in delete_list:
|
||||||
|
await crawl_manager.stop_crawl(crawl_id, archive.id, graceful=False)
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
# pylint: disable=raise-missing-from
|
||||||
|
raise HTTPException(status_code=400, detail=f"Error Stopping Crawl: {exc}")
|
||||||
|
|
||||||
res = await ops.delete_crawls(archive.id, delete_list)
|
res = await ops.delete_crawls(archive.id, delete_list)
|
||||||
|
|
||||||
return {"deleted": res}
|
return {"deleted": res}
|
||||||
|
@ -109,6 +109,7 @@ class DockerManager:
|
|||||||
timeout = int(container["Labels"]["btrix.timeout"])
|
timeout = int(container["Labels"]["btrix.timeout"])
|
||||||
actual = int(time.time()) - int(container["Created"])
|
actual = int(time.time()) - int(container["Created"])
|
||||||
if actual >= timeout:
|
if actual >= timeout:
|
||||||
|
# pylint: disable=line-too-long
|
||||||
print(
|
print(
|
||||||
f"Crawl {container['Id']} running for {actual} seconds, exceeded timeout {timeout}, stopping..."
|
f"Crawl {container['Id']} running for {actual} seconds, exceeded timeout {timeout}, stopping..."
|
||||||
)
|
)
|
||||||
@ -181,6 +182,10 @@ class DockerManager:
|
|||||||
|
|
||||||
async def stop_crawl(self, crawl_id, aid, graceful=True):
|
async def stop_crawl(self, crawl_id, aid, graceful=True):
|
||||||
""" Stop crawl, if not graceful, issue SIGUSR1 to indicate cancelation """
|
""" Stop crawl, if not graceful, issue SIGUSR1 to indicate cancelation """
|
||||||
|
|
||||||
|
result = None
|
||||||
|
|
||||||
|
try:
|
||||||
container = await self.client.containers.get(crawl_id)
|
container = await self.client.containers.get(crawl_id)
|
||||||
|
|
||||||
if container["Config"]["Labels"]["btrix.archive"] != aid:
|
if container["Config"]["Labels"]["btrix.archive"] != aid:
|
||||||
@ -193,6 +198,11 @@ class DockerManager:
|
|||||||
result = True
|
result = True
|
||||||
|
|
||||||
await container.kill(signal="SIGTERM")
|
await container.kill(signal="SIGTERM")
|
||||||
|
except aiodocker.exceptions.DockerError as exc:
|
||||||
|
if exc.status == 404:
|
||||||
|
return None
|
||||||
|
|
||||||
|
raise exc
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@ -351,7 +361,7 @@ class DockerManager:
|
|||||||
f"STORE_ENDPOINT_URL={endpoint_with_coll_url}",
|
f"STORE_ENDPOINT_URL={endpoint_with_coll_url}",
|
||||||
f"STORE_ACCESS_KEY={storage.access_key}",
|
f"STORE_ACCESS_KEY={storage.access_key}",
|
||||||
f"STORE_SECRET_KEY={storage.secret_key}",
|
f"STORE_SECRET_KEY={storage.secret_key}",
|
||||||
"WEBHOOK_URL=http://backend:8000/crawls/done",
|
"WEBHOOK_URL=http://backend:8000/_crawls/done",
|
||||||
]
|
]
|
||||||
|
|
||||||
labels["btrix.run.schedule"] = schedule
|
labels["btrix.run.schedule"] = schedule
|
||||||
|
@ -123,7 +123,7 @@ class K8SManager:
|
|||||||
"STORE_ENDPOINT_URL": endpoint_with_coll_url,
|
"STORE_ENDPOINT_URL": endpoint_with_coll_url,
|
||||||
"STORE_ACCESS_KEY": storage.access_key,
|
"STORE_ACCESS_KEY": storage.access_key,
|
||||||
"STORE_SECRET_KEY": storage.secret_key,
|
"STORE_SECRET_KEY": storage.secret_key,
|
||||||
"WEBHOOK_URL": "http://browsertrix-cloud.default:8000/crawls/done",
|
"WEBHOOK_URL": "http://browsertrix-cloud.default:8000/_crawls/done",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -169,7 +169,7 @@ class K8SManager:
|
|||||||
|
|
||||||
return cron_job
|
return cron_job
|
||||||
|
|
||||||
async def update_crawl_config(self, cid, schedule):
|
async def update_crawl_schedule(self, cid, schedule):
|
||||||
""" Update the schedule for existing crawl config """
|
""" Update the schedule for existing crawl config """
|
||||||
|
|
||||||
cron_jobs = await self.batch_beta_api.list_namespaced_cron_job(
|
cron_jobs = await self.batch_beta_api.list_namespaced_cron_job(
|
||||||
@ -195,15 +195,12 @@ class K8SManager:
|
|||||||
name=cron_job.metadata.name, namespace=self.namespace, body=cron_job
|
name=cron_job.metadata.name, namespace=self.namespace, body=cron_job
|
||||||
)
|
)
|
||||||
|
|
||||||
async def run_crawl_config(self, cid, manual=True, schedule=""):
|
async def run_crawl_config(self, cid):
|
||||||
""" Run crawl job for cron job based on specified crawlconfig id (cid) """
|
""" Run crawl job for cron job based on specified crawlconfig id (cid) """
|
||||||
cron_jobs = await self.batch_beta_api.list_namespaced_cron_job(
|
cron_jobs = await self.batch_beta_api.list_namespaced_cron_job(
|
||||||
namespace=self.namespace, label_selector=f"btrix.crawlconfig={cid}"
|
namespace=self.namespace, label_selector=f"btrix.crawlconfig={cid}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if not manual or schedule:
|
|
||||||
raise Exception("Manual trigger not supported")
|
|
||||||
|
|
||||||
if len(cron_jobs.items) != 1:
|
if len(cron_jobs.items) != 1:
|
||||||
raise Exception("Crawl Config Not Found")
|
raise Exception("Crawl Config Not Found")
|
||||||
|
|
||||||
@ -245,8 +242,8 @@ class K8SManager:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
manual = job.metadata.annotations.get("btrix.run.manual") == "1"
|
manual = job.metadata.annotations.get("btrix.run.manual") == "1"
|
||||||
if not manual:
|
if manual:
|
||||||
await self._delete_job(job.metadata.name)
|
self.loop.create_task(self._delete_job(job.metadata.name))
|
||||||
|
|
||||||
return self._make_crawl_for_job(
|
return self._make_crawl_for_job(
|
||||||
job,
|
job,
|
||||||
@ -426,6 +423,7 @@ class K8SManager:
|
|||||||
"""Create new job from cron job to run instantly"""
|
"""Create new job from cron job to run instantly"""
|
||||||
annotations = cron_job.spec.job_template.metadata.annotations
|
annotations = cron_job.spec.job_template.metadata.annotations
|
||||||
annotations["btrix.run.manual"] = "1"
|
annotations["btrix.run.manual"] = "1"
|
||||||
|
annotations["btrix.run.schedule"] = ""
|
||||||
|
|
||||||
# owner_ref = client.V1OwnerReference(
|
# owner_ref = client.V1OwnerReference(
|
||||||
# kind="CronJob",
|
# kind="CronJob",
|
||||||
|
Loading…
Reference in New Issue
Block a user