misc tweaks:

- better error handling for not found resources, ensure 404 - typo in k8smanager - add pylintrc - ensure manual job ares deleted when complete - fix typos, reformat
2021-08-25 18:20:17 -07:00 · 2021-08-25 18:20:17 -07:00 · 223658cfa2
commit 223658cfa2
parent f1a816be48
5 changed files with 61 additions and 42 deletions
--- a/backend/crawlconfigs.py
+++ b/backend/crawlconfigs.py
@ -141,10 +141,11 @@ class CrawlOps:

    async def update_crawl_schedule(self, cid: str, update: UpdateSchedule):
        """ Update schedule for existing crawl config"""
+
        if not await self.crawl_configs.find_one_and_update(
            {"_id": cid}, {"$set": {"schedule": update.schedule}}
        ):
-            return None
+            return False

        await self.crawl_manager.update_crawl_schedule(cid, update.schedule)
        return True
@ -222,11 +223,9 @@ def init_crawl_config_api(mdb, user_dep, archive_ops, crawl_manager):
        cid: str,
    ):

+        success = False
        try:
-            if not await ops.update_crawl_schedule(cid, update):
-                raise HTTPException(
-                    status_code=404, detail=f"Crawl Config '{cid}' not found"
-                )
+            success = await ops.update_crawl_schedule(cid, update)

        except Exception as e:
            # pylint: disable=raise-missing-from
@ -234,6 +233,11 @@ def init_crawl_config_api(mdb, user_dep, archive_ops, crawl_manager):
                status_code=403, detail=f"Error updating crawl config: {e}"
            )

+        if not success:
+            raise HTTPException(
+                status_code=404, detail=f"Crawl Config '{cid}' not found"
+            )
+
        return {"updated": cid}

    @router.post("/{cid}/run")
@ -265,7 +269,9 @@ def init_crawl_config_api(mdb, user_dep, archive_ops, crawl_manager):
    ):
        result = await ops.delete_crawl_config(cid, archive)
        if not result or not result.deleted_count:
-            raise HTTPException(status_code=404, detail="Crawl Config Not Found")
+            raise HTTPException(
+                status_code=404, detail=f"Crawl Config '{cid}' Not Found"
+            )

        return {"deleted": 1}

--- a/backend/crawls.py
+++ b/backend/crawls.py
@ -125,7 +125,7 @@ def init_crawls_api(app, mdb, crawl_manager, crawl_config_ops, archives):

    archive_crawl_dep = archives.archive_crawl_dep

-    @app.post("/crawls/done", tags=["crawls"])
+    @app.post("/_crawls/done", tags=["_internal"])
    async def crawl_done(msg: CrawlCompleteIn):
        loop = asyncio.get_running_loop()
        loop.create_task(ops.on_handle_crawl_complete(msg))
@ -152,25 +152,22 @@ def init_crawls_api(app, mdb, crawl_manager, crawl_config_ops, archives):
        "/archives/{aid}/crawls/{crawl_id}/cancel",
        tags=["crawls"],
    )
-    async def crawl_cancel_stop(
+    async def crawl_cancel_immediately(
        crawl_id, archive: Archive = Depends(archive_crawl_dep)
    ):
+        crawl = None
        try:
            crawl = await crawl_manager.stop_crawl(crawl_id, archive.id, graceful=False)
-            if not crawl:
-                raise HTTPException(
-                    status_code=404, detail=f"Crawl not found: {crawl_id}"
-                )
-
-            await ops.store_crawl(crawl)
-
-        except HTTPException as httpe:
-            raise httpe

        except Exception as exc:
            # pylint: disable=raise-missing-from
            raise HTTPException(status_code=400, detail=f"Error Canceling Crawl: {exc}")

+        if not crawl:
+            raise HTTPException(status_code=404, detail=f"Crawl not found: {crawl_id}")
+
+        await ops.store_crawl(crawl)
+
        return {"canceled": True}

    @app.post(
@ -180,27 +177,33 @@ def init_crawls_api(app, mdb, crawl_manager, crawl_config_ops, archives):
    async def crawl_graceful_stop(
        crawl_id, archive: Archive = Depends(archive_crawl_dep)
    ):
+        canceled = False
        try:
            canceled = await crawl_manager.stop_crawl(
                crawl_id, archive.id, graceful=True
            )
-            if not canceled:
-                raise HTTPException(
-                    status_code=404, detail=f"Crawl not found: {crawl_id}"
-                )
-
-        except HTTPException as httpe:
-            raise httpe

        except Exception as exc:
            # pylint: disable=raise-missing-from
            raise HTTPException(status_code=400, detail=f"Error Stopping Crawl: {exc}")

+        if not canceled:
+            raise HTTPException(status_code=404, detail=f"Crawl not found: {crawl_id}")
+
        return {"stopped_gracefully": True}

    @app.post("/archives/{aid}/crawls/delete", tags=["crawls"])
    async def delete_crawls(
        delete_list: DeleteCrawlList, archive: Archive = Depends(archive_crawl_dep)
    ):
+        try:
+            for crawl_id in delete_list:
+                await crawl_manager.stop_crawl(crawl_id, archive.id, graceful=False)
+
+        except Exception as exc:
+            # pylint: disable=raise-missing-from
+            raise HTTPException(status_code=400, detail=f"Error Stopping Crawl: {exc}")
+
        res = await ops.delete_crawls(archive.id, delete_list)
+
        return {"deleted": res}
--- a/backend/dockerman.py
+++ b/backend/dockerman.py
@ -109,6 +109,7 @@ class DockerManager:
                timeout = int(container["Labels"]["btrix.timeout"])
                actual = int(time.time()) - int(container["Created"])
                if actual >= timeout:
+                    # pylint: disable=line-too-long
                    print(
                        f"Crawl {container['Id']} running for {actual} seconds, exceeded timeout {timeout}, stopping..."
                    )
@ -181,18 +182,27 @@ class DockerManager:

    async def stop_crawl(self, crawl_id, aid, graceful=True):
        """ Stop crawl, if not graceful, issue SIGUSR1 to indicate cancelation """
-        container = await self.client.containers.get(crawl_id)

-        if container["Config"]["Labels"]["btrix.archive"] != aid:
-            return None
+        result = None

-        if not graceful:
-            await container.kill(signal="SIGUSR1")
-            result = self._make_crawl_for_container(container, "canceled", True)
-        else:
-            result = True
+        try:
+            container = await self.client.containers.get(crawl_id)

-        await container.kill(signal="SIGTERM")
+            if container["Config"]["Labels"]["btrix.archive"] != aid:
+                return None
+
+            if not graceful:
+                await container.kill(signal="SIGUSR1")
+                result = self._make_crawl_for_container(container, "canceled", True)
+            else:
+                result = True
+
+            await container.kill(signal="SIGTERM")
+        except aiodocker.exceptions.DockerError as exc:
+            if exc.status == 404:
+                return None
+
+            raise exc

        return result

@ -351,7 +361,7 @@ class DockerManager:
            f"STORE_ENDPOINT_URL={endpoint_with_coll_url}",
            f"STORE_ACCESS_KEY={storage.access_key}",
            f"STORE_SECRET_KEY={storage.secret_key}",
-            "WEBHOOK_URL=http://backend:8000/crawls/done",
+            "WEBHOOK_URL=http://backend:8000/_crawls/done",
        ]

        labels["btrix.run.schedule"] = schedule
--- a/backend/k8sman.py
+++ b/backend/k8sman.py
@ -123,7 +123,7 @@ class K8SManager:
                "STORE_ENDPOINT_URL": endpoint_with_coll_url,
                "STORE_ACCESS_KEY": storage.access_key,
                "STORE_SECRET_KEY": storage.secret_key,
-                "WEBHOOK_URL": "http://browsertrix-cloud.default:8000/crawls/done",
+                "WEBHOOK_URL": "http://browsertrix-cloud.default:8000/_crawls/done",
            },
        )

@ -169,7 +169,7 @@ class K8SManager:

        return cron_job

-    async def update_crawl_config(self, cid, schedule):
+    async def update_crawl_schedule(self, cid, schedule):
        """ Update the schedule for existing crawl config """

        cron_jobs = await self.batch_beta_api.list_namespaced_cron_job(
@ -195,15 +195,12 @@ class K8SManager:
                name=cron_job.metadata.name, namespace=self.namespace, body=cron_job
            )

-    async def run_crawl_config(self, cid, manual=True, schedule=""):
+    async def run_crawl_config(self, cid):
        """ Run crawl job for cron job based on specified crawlconfig id (cid) """
        cron_jobs = await self.batch_beta_api.list_namespaced_cron_job(
            namespace=self.namespace, label_selector=f"btrix.crawlconfig={cid}"
        )

-        if not manual or schedule:
-            raise Exception("Manual trigger not supported")
-
        if len(cron_jobs.items) != 1:
            raise Exception("Crawl Config Not Found")

@ -245,8 +242,8 @@ class K8SManager:
            return None

        manual = job.metadata.annotations.get("btrix.run.manual") == "1"
-        if not manual:
-            await self._delete_job(job.metadata.name)
+        if manual:
+            self.loop.create_task(self._delete_job(job.metadata.name))

        return self._make_crawl_for_job(
            job,
@ -426,6 +423,7 @@ class K8SManager:
        """Create new job from cron job to run instantly"""
        annotations = cron_job.spec.job_template.metadata.annotations
        annotations["btrix.run.manual"] = "1"
+        annotations["btrix.run.schedule"] = ""

        # owner_ref = client.V1OwnerReference(
        #    kind="CronJob",
--- a/2
+++ b/2
@ -0,0 +1,2 @@
+[MESSAGE CONTROL]
+disable=duplicate-code