diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index f20dd169..a93f6f7e 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -186,8 +186,75 @@ class BaseCrawlOps: return {"updated": True} + async def update_crawl_state(self, crawl_id: str, state: str): + """called only when job container is being stopped/canceled""" + + data = {"state": state} + # if cancelation, set the finish time here + if state == "canceled": + data["finished"] = dt_now() + + await self.crawls.find_one_and_update( + { + "_id": crawl_id, + "type": "crawl", + "state": {"$in": RUNNING_AND_STARTING_STATES}, + }, + {"$set": data}, + ) + + async def shutdown_crawl(self, crawl_id: str, org: Organization, graceful: bool): + """stop or cancel specified crawl""" + crawl = await self.get_crawl_raw(crawl_id, org) + if crawl.get("type") != "crawl": + return + + result = None + try: + result = await self.crawl_manager.shutdown_crawl( + crawl_id, org.id_str, graceful=graceful + ) + + if result.get("success"): + if graceful: + await self.crawls.find_one_and_update( + {"_id": crawl_id, "type": "crawl", "oid": org.id}, + {"$set": {"stopping": True}}, + ) + return result + + except Exception as exc: + # pylint: disable=raise-missing-from + # if reached here, probably crawl doesn't exist anymore + raise HTTPException( + status_code=404, detail=f"crawl_not_found, (details: {exc})" + ) + + # if job no longer running, canceling is considered success, + # but graceful stoppage is not possible, so would be a failure + if result.get("error") == "Not Found": + if not graceful: + await self.update_crawl_state(crawl_id, "canceled") + crawl = await self.get_crawl_raw(crawl_id, org) + if not await self.crawl_configs.stats_recompute_last( + crawl["cid"], 0, -1 + ): + raise HTTPException( + status_code=404, + detail=f"crawl_config_not_found: {crawl['cid']}", + ) + + return {"success": True} + + # return whatever detail may be included in the response + raise HTTPException(status_code=400, detail=result) + async def delete_crawls( - self, org: Organization, delete_list: DeleteCrawlList, type_: str + self, + org: Organization, + delete_list: DeleteCrawlList, + type_: str, + user: Optional[User] = None, ): """Delete a list of crawls by id for given org""" cids_to_update: dict[str, dict[str, int]] = {} @@ -199,6 +266,21 @@ class BaseCrawlOps: if crawl.get("type") != type_: continue + # Ensure user has appropriate permissions for all crawls in list: + # - Crawler users can delete their own crawls + # - Org owners can delete any crawls in org + if user and (crawl.get("userid") != user.id) and not org.is_owner(user): + raise HTTPException(status_code=403, detail="not_allowed") + + if type_ == "crawl" and not crawl.get("finished"): + try: + await self.shutdown_crawl(crawl_id, org, graceful=False) + except Exception as exc: + # pylint: disable=raise-missing-from + raise HTTPException( + status_code=400, detail=f"Error Stopping Crawl: {exc}" + ) + crawl_size = await self._delete_crawl_files(crawl, org) size += crawl_size @@ -486,24 +568,37 @@ class BaseCrawlOps: return crawls, total async def delete_crawls_all_types( - self, delete_list: DeleteCrawlList, org: Organization + self, + delete_list: DeleteCrawlList, + org: Organization, + user: Optional[User] = None, ): """Delete uploaded crawls""" - if len(delete_list.crawl_ids) == 0: + crawls: list[str] = [] + uploads: list[str] = [] + + for crawl_id in delete_list.crawl_ids: + crawl = await self.get_crawl_raw(crawl_id, org) + type_ = crawl.get("type") + if type_ == "crawl": + crawls.append(crawl_id) + if type_ == "upload": + uploads.append(crawl_id) + + crawls_length = len(crawls) + uploads_length = len(uploads) + + if crawls_length + uploads_length == 0: raise HTTPException(status_code=400, detail="nothing_to_delete") deleted_count = 0 # Value is set in delete calls, but initialize to keep linter happy. quota_reached = False - crawls_to_delete, uploads_to_delete = await self._split_delete_list_by_type( - delete_list, org - ) - - if len(crawls_to_delete) > 0: - crawl_delete_list = DeleteCrawlList(crawl_ids=crawls_to_delete) + if crawls_length: + crawl_delete_list = DeleteCrawlList(crawl_ids=crawls) deleted, cids_to_update, quota_reached = await self.delete_crawls( - org, crawl_delete_list, "crawl" + org, crawl_delete_list, "crawl", user ) deleted_count += deleted @@ -512,10 +607,10 @@ class BaseCrawlOps: cid_inc = cid_dict["inc"] await self.crawl_configs.stats_recompute_last(cid, -cid_size, -cid_inc) - if len(uploads_to_delete) > 0: - upload_delete_list = DeleteCrawlList(crawl_ids=uploads_to_delete) + if uploads_length: + upload_delete_list = DeleteCrawlList(crawl_ids=uploads) deleted, _, quota_reached = await self.delete_crawls( - org, upload_delete_list, "upload" + org, upload_delete_list, "upload", user ) deleted_count += deleted @@ -524,26 +619,6 @@ class BaseCrawlOps: return {"deleted": True, "storageQuotaReached": quota_reached} - async def _split_delete_list_by_type( - self, delete_list: DeleteCrawlList, org: Organization - ): - """Return separate crawl and upload arrays from mixed input""" - crawls: list[str] = [] - uploads: list[str] = [] - - for crawl_id in delete_list.crawl_ids: - try: - crawl_raw = await self.get_crawl_raw(crawl_id, org) - crawl_type = crawl_raw.get("type") - if crawl_type == "crawl": - crawls.append(crawl_id) - elif crawl_type == "upload": - uploads.append(crawl_id) - # pylint: disable=broad-exception-caught - except Exception as err: - print(err, flush=True) - return crawls, uploads - async def get_all_crawl_search_values( self, org: Organization, type_: Optional[str] = None ): @@ -690,6 +765,7 @@ def init_base_crawls_api( @app.post("/orgs/{oid}/all-crawls/delete", tags=["all-crawls"]) async def delete_crawls_all_types( delete_list: DeleteCrawlList, + user: User = Depends(user_dep), org: Organization = Depends(org_crawl_dep), ): - return await ops.delete_crawls_all_types(delete_list, org) + return await ops.delete_crawls_all_types(delete_list, org, user) diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py index cc3ccc8d..ac0b1da6 100644 --- a/backend/btrixcloud/crawls.py +++ b/backend/btrixcloud/crawls.py @@ -203,12 +203,16 @@ class CrawlOps(BaseCrawlOps): return crawls, total async def delete_crawls( - self, org: Organization, delete_list: DeleteCrawlList, type_="crawl" + self, + org: Organization, + delete_list: DeleteCrawlList, + type_="crawl", + user: Optional[User] = None, ): """Delete a list of crawls by id for given org""" count, cids_to_update, quota_reached = await super().delete_crawls( - org, delete_list, type_ + org, delete_list, type_, user ) if count < 1: @@ -295,65 +299,6 @@ class CrawlOps(BaseCrawlOps): return True - async def update_crawl_state(self, crawl_id: str, state: str): - """called only when job container is being stopped/canceled""" - - data = {"state": state} - # if cancelation, set the finish time here - if state == "canceled": - data["finished"] = dt_now() - - await self.crawls.find_one_and_update( - { - "_id": crawl_id, - "type": "crawl", - "state": {"$in": RUNNING_AND_STARTING_STATES}, - }, - {"$set": data}, - ) - - async def shutdown_crawl(self, crawl_id: str, org: Organization, graceful: bool): - """stop or cancel specified crawl""" - result = None - try: - result = await self.crawl_manager.shutdown_crawl( - crawl_id, org.id_str, graceful=graceful - ) - - if result.get("success"): - if graceful: - await self.crawls.find_one_and_update( - {"_id": crawl_id, "type": "crawl", "oid": org.id}, - {"$set": {"stopping": True}}, - ) - return result - - except Exception as exc: - # pylint: disable=raise-missing-from - # if reached here, probably crawl doesn't exist anymore - raise HTTPException( - status_code=404, detail=f"crawl_not_found, (details: {exc})" - ) - - # if job no longer running, canceling is considered success, - # but graceful stoppage is not possible, so would be a failure - if result.get("error") == "Not Found": - if not graceful: - await self.update_crawl_state(crawl_id, "canceled") - crawl = await self.get_crawl_raw(crawl_id, org) - if not await self.crawl_configs.stats_recompute_last( - crawl["cid"], 0, -1 - ): - raise HTTPException( - status_code=404, - detail=f"crawl_config_not_found: {crawl['cid']}", - ) - - return {"success": True} - - # return whatever detail may be included in the response - raise HTTPException(status_code=400, detail=result) - async def _crawl_queue_len(self, redis, key): try: return await redis.zcard(key) @@ -774,25 +719,7 @@ def init_crawls_api( user: User = Depends(user_dep), org: Organization = Depends(org_crawl_dep), ): - # Ensure user has appropriate permissions for all crawls in list: - # - Crawler users can delete their own crawls - # - Org owners can delete any crawls in org - for crawl_id in delete_list.crawl_ids: - crawl_raw = await ops.get_crawl_raw(crawl_id, org) - crawl = Crawl.from_dict(crawl_raw) - if (crawl.userid != user.id) and not org.is_owner(user): - raise HTTPException(status_code=403, detail="Not Allowed") - - if not crawl.finished: - try: - await ops.shutdown_crawl(crawl_id, org, graceful=False) - except Exception as exc: - # pylint: disable=raise-missing-from - raise HTTPException( - status_code=400, detail=f"Error Stopping Crawl: {exc}" - ) - - return await ops.delete_crawls(org, delete_list) + return await ops.delete_crawls(org, delete_list, "crawl", user) @app.get( "/orgs/all/crawls/{crawl_id}/replay.json", diff --git a/backend/btrixcloud/uploads.py b/backend/btrixcloud/uploads.py index 4860f647..02f16f39 100644 --- a/backend/btrixcloud/uploads.py +++ b/backend/btrixcloud/uploads.py @@ -187,10 +187,15 @@ class UploadOps(BaseCrawlOps): return {"id": crawl_id, "added": True, "storageQuotaReached": quota_reached} - async def delete_uploads(self, delete_list: DeleteCrawlList, org: Organization): + async def delete_uploads( + self, + delete_list: DeleteCrawlList, + org: Organization, + user: Optional[User] = None, + ): """Delete uploaded crawls""" deleted_count, _, quota_reached = await self.delete_crawls( - org, delete_list, "upload" + org, delete_list, "upload", user ) if deleted_count < 1: @@ -399,6 +404,7 @@ def init_uploads_api( @app.post("/orgs/{oid}/uploads/delete", tags=["uploads"]) async def delete_uploads( delete_list: DeleteCrawlList, + user: User = Depends(user_dep), org: Organization = Depends(org_crawl_dep), ): - return await ops.delete_uploads(delete_list, org) + return await ops.delete_uploads(delete_list, org, user) diff --git a/backend/test/test_run_crawl.py b/backend/test/test_run_crawl.py index 194fda9c..9d87c68a 100644 --- a/backend/test/test_run_crawl.py +++ b/backend/test/test_run_crawl.py @@ -308,7 +308,7 @@ def test_delete_crawls_crawler( ) assert r.status_code == 403 data = r.json() - assert data["detail"] == "Not Allowed" + assert data["detail"] == "not_allowed" # Test that crawler user can delete own crawl r = requests.post( diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py index e3541110..ef1540c8 100644 --- a/backend/test/test_uploads.py +++ b/backend/test/test_uploads.py @@ -302,7 +302,17 @@ def test_update_upload_metadata(admin_auth_headers, default_org_id): assert data["collectionIds"] == UPDATED_COLLECTION_IDS -def test_delete_stream_upload(admin_auth_headers, default_org_id): +def test_delete_stream_upload(admin_auth_headers, crawler_auth_headers, default_org_id): + # Verify non-admin user who didn't upload crawl can't delete it + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/uploads/delete", + headers=crawler_auth_headers, + json={"crawl_ids": [upload_id]}, + ) + assert r.status_code == 403 + assert r.json()["detail"] == "not_allowed" + + # Verify user who created upload can delete it r = requests.post( f"{API_PREFIX}/orgs/{default_org_id}/uploads/delete", headers=admin_auth_headers, @@ -842,6 +852,7 @@ def test_update_upload_metadata_all_crawls(admin_auth_headers, default_org_id): def test_delete_form_upload_and_crawls_from_all_crawls( admin_auth_headers, + crawler_auth_headers, default_org_id, all_crawls_delete_crawl_ids, all_crawls_delete_config_id, @@ -890,6 +901,15 @@ def test_delete_form_upload_and_crawls_from_all_crawls( combined_crawl_size = crawl_1_size + crawl_2_size total_size = combined_crawl_size + upload_size + # Verify that non-admin user can't delete another's items + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/all-crawls/delete", + headers=crawler_auth_headers, + json={"crawl_ids": crawls_to_delete}, + ) + assert r.status_code == 403 + assert r.json()["detail"] == "not_allowed" + # Delete mixed type archived items r = requests.post( f"{API_PREFIX}/orgs/{default_org_id}/all-crawls/delete", diff --git a/frontend/src/pages/org/crawl-detail.ts b/frontend/src/pages/org/crawl-detail.ts index e7889989..56aaa10a 100644 --- a/frontend/src/pages/org/crawl-detail.ts +++ b/frontend/src/pages/org/crawl-detail.ts @@ -1024,10 +1024,20 @@ ${this.crawl?.description} icon: "check2-circle", }); } catch (e: any) { + let message = msg( + str`Sorry, couldn't delete archived item at this time.` + ); + if (e.isApiError) { + if (e.details == "not_allowed") { + message = msg( + str`Only org owners can delete other users' archived items.` + ); + } else if (e.message) { + message = e.message; + } + } this.notify({ - message: - (e.isApiError && e.message) || - msg("Sorry, couldn't run crawl at this time."), + message: message, variant: "danger", icon: "exclamation-octagon", }); diff --git a/frontend/src/pages/org/crawls-list.ts b/frontend/src/pages/org/crawls-list.ts index 4952533c..db27502b 100644 --- a/frontend/src/pages/org/crawls-list.ts +++ b/frontend/src/pages/org/crawls-list.ts @@ -719,10 +719,20 @@ export class CrawlsList extends LiteElement { }); this.fetchArchivedItems(); } catch (e: any) { + let message = msg( + str`Sorry, couldn't delete archived item at this time.` + ); + if (e.isApiError) { + if (e.details == "not_allowed") { + message = msg( + str`Only org owners can delete other users' archived items.` + ); + } else if (e.message) { + message = e.message; + } + } this.notify({ - message: - (e.isApiError && e.message) || - msg(str`Sorry, couldn't delete archived item at this time.`), + message: message, variant: "danger", icon: "exclamation-octagon", }); diff --git a/frontend/src/pages/org/workflow-detail.ts b/frontend/src/pages/org/workflow-detail.ts index 1a7dc9bf..74d2c324 100644 --- a/frontend/src/pages/org/workflow-detail.ts +++ b/frontend/src/pages/org/workflow-detail.ts @@ -1626,10 +1626,20 @@ export class WorkflowDetail extends LiteElement { }); this.fetchCrawls(); } catch (e: any) { + let message = msg( + str`Sorry, couldn't delete archived item at this time.` + ); + if (e.isApiError) { + if (e.details == "not_allowed") { + message = msg( + str`Only org owners can delete other users' archived items.` + ); + } else if (e.message) { + message = e.message; + } + } this.notify({ - message: - (e.isApiError && e.message) || - msg("Sorry, couldn't run crawl at this time."), + message: message, variant: "danger", icon: "exclamation-octagon", });