diff --git a/backend/btrixcloud/operator.py b/backend/btrixcloud/operator.py index dea26161..2af445b8 100644 --- a/backend/btrixcloud/operator.py +++ b/backend/btrixcloud/operator.py @@ -362,7 +362,9 @@ class BtrixOperator(K8sAPI): # pylint: disable=bare-except, broad-except except: # fail crawl if config somehow missing, shouldn't generally happen - await self.fail_crawl(crawl_id, uuid.UUID(cid), status, pods) + await self.fail_crawl( + crawl_id, uuid.UUID(cid), uuid.UUID(oid), status, pods + ) return self._empty_response(status) @@ -661,7 +663,14 @@ class BtrixOperator(K8sAPI): ) return False - async def cancel_crawl(self, crawl_id, cid, oid, status, pods): + async def cancel_crawl( + self, + crawl_id: str, + cid: uuid.UUID, + oid: uuid.UUID, + status: CrawlStatus, + pods: dict, + ) -> bool: """Mark crawl as canceled""" if not await self.mark_finished(crawl_id, cid, oid, status, "canceled"): return False @@ -698,12 +707,20 @@ class BtrixOperator(K8sAPI): return status.canceled - async def fail_crawl(self, crawl_id, cid, status, pods, stats=None): + async def fail_crawl( + self, + crawl_id: str, + cid: uuid.UUID, + oid: uuid.UUID, + status: CrawlStatus, + pods: dict, + stats=None, + ) -> bool: """Mark crawl as failed, log crawl state and print crawl logs, if possible""" prev_state = status.state if not await self.mark_finished( - crawl_id, cid, None, status, "failed", stats=stats + crawl_id, cid, oid, status, "failed", stats=stats ): return False @@ -1138,7 +1155,9 @@ class BtrixOperator(K8sAPI): # check if one-page crawls actually succeeded # if only one page found, and no files, assume failed if status.pagesFound == 1 and not status.filesAdded: - await self.fail_crawl(crawl.id, crawl.cid, status, pods, stats) + await self.fail_crawl( + crawl.id, crawl.cid, crawl.oid, status, pods, stats + ) return status completed = status.pagesDone and status.pagesDone >= status.pagesFound @@ -1157,7 +1176,9 @@ class BtrixOperator(K8sAPI): crawl.id, crawl.cid, crawl.oid, status, "canceled", crawl, stats ) else: - await self.fail_crawl(crawl.id, crawl.cid, status, pods, stats) + await self.fail_crawl( + crawl.id, crawl.cid, crawl.oid, status, pods, stats + ) # check for other statuses else: @@ -1181,8 +1202,15 @@ class BtrixOperator(K8sAPI): # pylint: disable=too-many-arguments async def mark_finished( - self, crawl_id, cid, oid, status, state, crawl=None, stats=None - ): + self, + crawl_id: str, + cid: uuid.UUID, + oid: uuid.UUID, + status: CrawlStatus, + state: str, + crawl=None, + stats=None, + ) -> bool: """mark crawl as finished, set finished timestamp and final state""" finished = dt_now() @@ -1192,9 +1220,9 @@ class BtrixOperator(K8sAPI): kwargs["stats"] = stats if state in SUCCESSFUL_STATES: - allowed_from = RUNNING_STATES + allowed_from = list(RUNNING_STATES) else: - allowed_from = RUNNING_AND_STARTING_STATES + allowed_from = list(RUNNING_AND_STARTING_STATES) # if set_state returns false, already set to same status, return if not await self.set_state( @@ -1221,8 +1249,13 @@ class BtrixOperator(K8sAPI): # pylint: disable=too-many-arguments async def do_crawl_finished_tasks( - self, crawl_id, cid, oid, files_added_size, state - ): + self, + crawl_id: str, + cid: uuid.UUID, + oid: uuid.UUID, + files_added_size: int, + state: str, + ) -> None: """Run tasks after crawl completes in asyncio.task coroutine.""" await self.crawl_config_ops.stats_recompute_last(cid, files_added_size, 1) @@ -1230,7 +1263,9 @@ class BtrixOperator(K8sAPI): await self.org_ops.inc_org_bytes_stored(oid, files_added_size, "crawl") await self.coll_ops.add_successful_crawl_to_collections(crawl_id, cid) - await self.event_webhook_ops.create_crawl_finished_notification(crawl_id, state) + await self.event_webhook_ops.create_crawl_finished_notification( + crawl_id, oid, state + ) # add crawl errors to db await self.add_crawl_errors_to_db(crawl_id) diff --git a/backend/btrixcloud/uploads.py b/backend/btrixcloud/uploads.py index 3a54ad71..16e1631c 100644 --- a/backend/btrixcloud/uploads.py +++ b/backend/btrixcloud/uploads.py @@ -186,7 +186,7 @@ class UploadOps(BaseCrawlOps): ) asyncio.create_task( - self.event_webhook_ops.create_upload_finished_notification(crawl_id) + self.event_webhook_ops.create_upload_finished_notification(crawl_id, org.id) ) quota_reached = await self.orgs.inc_org_bytes_stored( diff --git a/backend/btrixcloud/webhooks.py b/backend/btrixcloud/webhooks.py index 713bb9f1..14f56551 100644 --- a/backend/btrixcloud/webhooks.py +++ b/backend/btrixcloud/webhooks.py @@ -214,10 +214,11 @@ class EventWebhookOps: crawl_ids=[crawl_id], coll_id=coll_id, org=org ) - async def create_crawl_finished_notification(self, crawl_id: str, state: str): + async def create_crawl_finished_notification( + self, crawl_id: str, oid: uuid.UUID, state: str + ) -> None: """Create webhook notification for finished crawl.""" - crawl_res = await self.crawls.find_one({"_id": crawl_id}) - org = await self.org_ops.get_org_by_id(crawl_res["oid"]) + org = await self.org_ops.get_org_by_id(oid) if not org.webhookUrls or not org.webhookUrls.crawlFinished: return @@ -233,10 +234,11 @@ class EventWebhookOps: ), ) - async def create_upload_finished_notification(self, crawl_id: str): + async def create_upload_finished_notification( + self, crawl_id: str, oid: uuid.UUID + ) -> None: """Create webhook notification for finished upload.""" - crawl_res = await self.crawls.find_one({"_id": crawl_id}) - org = await self.org_ops.get_org_by_id(crawl_res["oid"]) + org = await self.org_ops.get_org_by_id(oid) if not org.webhookUrls or not org.webhookUrls.uploadFinished: return @@ -252,7 +254,7 @@ class EventWebhookOps: async def create_crawl_started_notification( self, crawl_id: str, oid: uuid.UUID, scheduled: bool = False - ): + ) -> None: """Create webhook notification for started crawl.""" org = await self.org_ops.get_org_by_id(oid) @@ -318,7 +320,7 @@ class EventWebhookOps: crawl_ids: List[str], coll_id: uuid.UUID, org: Organization, - ): + ) -> None: """Create webhook notification for item added to collection""" if not org.webhookUrls or not org.webhookUrls.addedToCollection: return @@ -339,7 +341,7 @@ class EventWebhookOps: crawl_ids: List[str], coll_id: uuid.UUID, org: Organization, - ): + ) -> None: """Create webhook notification for item removed from collection""" if not org.webhookUrls or not org.webhookUrls.removedFromCollection: return