API delete endpoint improvements (#1232)
- Applies user permissions check before deleting anything in all /delete endpoints - Shuts down running crawls before deleting anything in /all-crawls/delete as well as /crawls/delete - Splits delete_list.crawl_ids into crawls and upload lists at same time as checks in /all-crawls/delete - Updates frontend notification message to Only org owners can delete other users' archived items. when a crawler user attempts to delete another users' archived items
This commit is contained in:
parent
df190e12b9
commit
e9bac4c088
@ -186,8 +186,75 @@ class BaseCrawlOps:
|
||||
|
||||
return {"updated": True}
|
||||
|
||||
async def update_crawl_state(self, crawl_id: str, state: str):
|
||||
"""called only when job container is being stopped/canceled"""
|
||||
|
||||
data = {"state": state}
|
||||
# if cancelation, set the finish time here
|
||||
if state == "canceled":
|
||||
data["finished"] = dt_now()
|
||||
|
||||
await self.crawls.find_one_and_update(
|
||||
{
|
||||
"_id": crawl_id,
|
||||
"type": "crawl",
|
||||
"state": {"$in": RUNNING_AND_STARTING_STATES},
|
||||
},
|
||||
{"$set": data},
|
||||
)
|
||||
|
||||
async def shutdown_crawl(self, crawl_id: str, org: Organization, graceful: bool):
|
||||
"""stop or cancel specified crawl"""
|
||||
crawl = await self.get_crawl_raw(crawl_id, org)
|
||||
if crawl.get("type") != "crawl":
|
||||
return
|
||||
|
||||
result = None
|
||||
try:
|
||||
result = await self.crawl_manager.shutdown_crawl(
|
||||
crawl_id, org.id_str, graceful=graceful
|
||||
)
|
||||
|
||||
if result.get("success"):
|
||||
if graceful:
|
||||
await self.crawls.find_one_and_update(
|
||||
{"_id": crawl_id, "type": "crawl", "oid": org.id},
|
||||
{"$set": {"stopping": True}},
|
||||
)
|
||||
return result
|
||||
|
||||
except Exception as exc:
|
||||
# pylint: disable=raise-missing-from
|
||||
# if reached here, probably crawl doesn't exist anymore
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"crawl_not_found, (details: {exc})"
|
||||
)
|
||||
|
||||
# if job no longer running, canceling is considered success,
|
||||
# but graceful stoppage is not possible, so would be a failure
|
||||
if result.get("error") == "Not Found":
|
||||
if not graceful:
|
||||
await self.update_crawl_state(crawl_id, "canceled")
|
||||
crawl = await self.get_crawl_raw(crawl_id, org)
|
||||
if not await self.crawl_configs.stats_recompute_last(
|
||||
crawl["cid"], 0, -1
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"crawl_config_not_found: {crawl['cid']}",
|
||||
)
|
||||
|
||||
return {"success": True}
|
||||
|
||||
# return whatever detail may be included in the response
|
||||
raise HTTPException(status_code=400, detail=result)
|
||||
|
||||
async def delete_crawls(
|
||||
self, org: Organization, delete_list: DeleteCrawlList, type_: str
|
||||
self,
|
||||
org: Organization,
|
||||
delete_list: DeleteCrawlList,
|
||||
type_: str,
|
||||
user: Optional[User] = None,
|
||||
):
|
||||
"""Delete a list of crawls by id for given org"""
|
||||
cids_to_update: dict[str, dict[str, int]] = {}
|
||||
@ -199,6 +266,21 @@ class BaseCrawlOps:
|
||||
if crawl.get("type") != type_:
|
||||
continue
|
||||
|
||||
# Ensure user has appropriate permissions for all crawls in list:
|
||||
# - Crawler users can delete their own crawls
|
||||
# - Org owners can delete any crawls in org
|
||||
if user and (crawl.get("userid") != user.id) and not org.is_owner(user):
|
||||
raise HTTPException(status_code=403, detail="not_allowed")
|
||||
|
||||
if type_ == "crawl" and not crawl.get("finished"):
|
||||
try:
|
||||
await self.shutdown_crawl(crawl_id, org, graceful=False)
|
||||
except Exception as exc:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise HTTPException(
|
||||
status_code=400, detail=f"Error Stopping Crawl: {exc}"
|
||||
)
|
||||
|
||||
crawl_size = await self._delete_crawl_files(crawl, org)
|
||||
size += crawl_size
|
||||
|
||||
@ -486,24 +568,37 @@ class BaseCrawlOps:
|
||||
return crawls, total
|
||||
|
||||
async def delete_crawls_all_types(
|
||||
self, delete_list: DeleteCrawlList, org: Organization
|
||||
self,
|
||||
delete_list: DeleteCrawlList,
|
||||
org: Organization,
|
||||
user: Optional[User] = None,
|
||||
):
|
||||
"""Delete uploaded crawls"""
|
||||
if len(delete_list.crawl_ids) == 0:
|
||||
crawls: list[str] = []
|
||||
uploads: list[str] = []
|
||||
|
||||
for crawl_id in delete_list.crawl_ids:
|
||||
crawl = await self.get_crawl_raw(crawl_id, org)
|
||||
type_ = crawl.get("type")
|
||||
if type_ == "crawl":
|
||||
crawls.append(crawl_id)
|
||||
if type_ == "upload":
|
||||
uploads.append(crawl_id)
|
||||
|
||||
crawls_length = len(crawls)
|
||||
uploads_length = len(uploads)
|
||||
|
||||
if crawls_length + uploads_length == 0:
|
||||
raise HTTPException(status_code=400, detail="nothing_to_delete")
|
||||
|
||||
deleted_count = 0
|
||||
# Value is set in delete calls, but initialize to keep linter happy.
|
||||
quota_reached = False
|
||||
|
||||
crawls_to_delete, uploads_to_delete = await self._split_delete_list_by_type(
|
||||
delete_list, org
|
||||
)
|
||||
|
||||
if len(crawls_to_delete) > 0:
|
||||
crawl_delete_list = DeleteCrawlList(crawl_ids=crawls_to_delete)
|
||||
if crawls_length:
|
||||
crawl_delete_list = DeleteCrawlList(crawl_ids=crawls)
|
||||
deleted, cids_to_update, quota_reached = await self.delete_crawls(
|
||||
org, crawl_delete_list, "crawl"
|
||||
org, crawl_delete_list, "crawl", user
|
||||
)
|
||||
deleted_count += deleted
|
||||
|
||||
@ -512,10 +607,10 @@ class BaseCrawlOps:
|
||||
cid_inc = cid_dict["inc"]
|
||||
await self.crawl_configs.stats_recompute_last(cid, -cid_size, -cid_inc)
|
||||
|
||||
if len(uploads_to_delete) > 0:
|
||||
upload_delete_list = DeleteCrawlList(crawl_ids=uploads_to_delete)
|
||||
if uploads_length:
|
||||
upload_delete_list = DeleteCrawlList(crawl_ids=uploads)
|
||||
deleted, _, quota_reached = await self.delete_crawls(
|
||||
org, upload_delete_list, "upload"
|
||||
org, upload_delete_list, "upload", user
|
||||
)
|
||||
deleted_count += deleted
|
||||
|
||||
@ -524,26 +619,6 @@ class BaseCrawlOps:
|
||||
|
||||
return {"deleted": True, "storageQuotaReached": quota_reached}
|
||||
|
||||
async def _split_delete_list_by_type(
|
||||
self, delete_list: DeleteCrawlList, org: Organization
|
||||
):
|
||||
"""Return separate crawl and upload arrays from mixed input"""
|
||||
crawls: list[str] = []
|
||||
uploads: list[str] = []
|
||||
|
||||
for crawl_id in delete_list.crawl_ids:
|
||||
try:
|
||||
crawl_raw = await self.get_crawl_raw(crawl_id, org)
|
||||
crawl_type = crawl_raw.get("type")
|
||||
if crawl_type == "crawl":
|
||||
crawls.append(crawl_id)
|
||||
elif crawl_type == "upload":
|
||||
uploads.append(crawl_id)
|
||||
# pylint: disable=broad-exception-caught
|
||||
except Exception as err:
|
||||
print(err, flush=True)
|
||||
return crawls, uploads
|
||||
|
||||
async def get_all_crawl_search_values(
|
||||
self, org: Organization, type_: Optional[str] = None
|
||||
):
|
||||
@ -690,6 +765,7 @@ def init_base_crawls_api(
|
||||
@app.post("/orgs/{oid}/all-crawls/delete", tags=["all-crawls"])
|
||||
async def delete_crawls_all_types(
|
||||
delete_list: DeleteCrawlList,
|
||||
user: User = Depends(user_dep),
|
||||
org: Organization = Depends(org_crawl_dep),
|
||||
):
|
||||
return await ops.delete_crawls_all_types(delete_list, org)
|
||||
return await ops.delete_crawls_all_types(delete_list, org, user)
|
||||
|
||||
@ -203,12 +203,16 @@ class CrawlOps(BaseCrawlOps):
|
||||
return crawls, total
|
||||
|
||||
async def delete_crawls(
|
||||
self, org: Organization, delete_list: DeleteCrawlList, type_="crawl"
|
||||
self,
|
||||
org: Organization,
|
||||
delete_list: DeleteCrawlList,
|
||||
type_="crawl",
|
||||
user: Optional[User] = None,
|
||||
):
|
||||
"""Delete a list of crawls by id for given org"""
|
||||
|
||||
count, cids_to_update, quota_reached = await super().delete_crawls(
|
||||
org, delete_list, type_
|
||||
org, delete_list, type_, user
|
||||
)
|
||||
|
||||
if count < 1:
|
||||
@ -295,65 +299,6 @@ class CrawlOps(BaseCrawlOps):
|
||||
|
||||
return True
|
||||
|
||||
async def update_crawl_state(self, crawl_id: str, state: str):
|
||||
"""called only when job container is being stopped/canceled"""
|
||||
|
||||
data = {"state": state}
|
||||
# if cancelation, set the finish time here
|
||||
if state == "canceled":
|
||||
data["finished"] = dt_now()
|
||||
|
||||
await self.crawls.find_one_and_update(
|
||||
{
|
||||
"_id": crawl_id,
|
||||
"type": "crawl",
|
||||
"state": {"$in": RUNNING_AND_STARTING_STATES},
|
||||
},
|
||||
{"$set": data},
|
||||
)
|
||||
|
||||
async def shutdown_crawl(self, crawl_id: str, org: Organization, graceful: bool):
|
||||
"""stop or cancel specified crawl"""
|
||||
result = None
|
||||
try:
|
||||
result = await self.crawl_manager.shutdown_crawl(
|
||||
crawl_id, org.id_str, graceful=graceful
|
||||
)
|
||||
|
||||
if result.get("success"):
|
||||
if graceful:
|
||||
await self.crawls.find_one_and_update(
|
||||
{"_id": crawl_id, "type": "crawl", "oid": org.id},
|
||||
{"$set": {"stopping": True}},
|
||||
)
|
||||
return result
|
||||
|
||||
except Exception as exc:
|
||||
# pylint: disable=raise-missing-from
|
||||
# if reached here, probably crawl doesn't exist anymore
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"crawl_not_found, (details: {exc})"
|
||||
)
|
||||
|
||||
# if job no longer running, canceling is considered success,
|
||||
# but graceful stoppage is not possible, so would be a failure
|
||||
if result.get("error") == "Not Found":
|
||||
if not graceful:
|
||||
await self.update_crawl_state(crawl_id, "canceled")
|
||||
crawl = await self.get_crawl_raw(crawl_id, org)
|
||||
if not await self.crawl_configs.stats_recompute_last(
|
||||
crawl["cid"], 0, -1
|
||||
):
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"crawl_config_not_found: {crawl['cid']}",
|
||||
)
|
||||
|
||||
return {"success": True}
|
||||
|
||||
# return whatever detail may be included in the response
|
||||
raise HTTPException(status_code=400, detail=result)
|
||||
|
||||
async def _crawl_queue_len(self, redis, key):
|
||||
try:
|
||||
return await redis.zcard(key)
|
||||
@ -774,25 +719,7 @@ def init_crawls_api(
|
||||
user: User = Depends(user_dep),
|
||||
org: Organization = Depends(org_crawl_dep),
|
||||
):
|
||||
# Ensure user has appropriate permissions for all crawls in list:
|
||||
# - Crawler users can delete their own crawls
|
||||
# - Org owners can delete any crawls in org
|
||||
for crawl_id in delete_list.crawl_ids:
|
||||
crawl_raw = await ops.get_crawl_raw(crawl_id, org)
|
||||
crawl = Crawl.from_dict(crawl_raw)
|
||||
if (crawl.userid != user.id) and not org.is_owner(user):
|
||||
raise HTTPException(status_code=403, detail="Not Allowed")
|
||||
|
||||
if not crawl.finished:
|
||||
try:
|
||||
await ops.shutdown_crawl(crawl_id, org, graceful=False)
|
||||
except Exception as exc:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise HTTPException(
|
||||
status_code=400, detail=f"Error Stopping Crawl: {exc}"
|
||||
)
|
||||
|
||||
return await ops.delete_crawls(org, delete_list)
|
||||
return await ops.delete_crawls(org, delete_list, "crawl", user)
|
||||
|
||||
@app.get(
|
||||
"/orgs/all/crawls/{crawl_id}/replay.json",
|
||||
|
||||
@ -187,10 +187,15 @@ class UploadOps(BaseCrawlOps):
|
||||
|
||||
return {"id": crawl_id, "added": True, "storageQuotaReached": quota_reached}
|
||||
|
||||
async def delete_uploads(self, delete_list: DeleteCrawlList, org: Organization):
|
||||
async def delete_uploads(
|
||||
self,
|
||||
delete_list: DeleteCrawlList,
|
||||
org: Organization,
|
||||
user: Optional[User] = None,
|
||||
):
|
||||
"""Delete uploaded crawls"""
|
||||
deleted_count, _, quota_reached = await self.delete_crawls(
|
||||
org, delete_list, "upload"
|
||||
org, delete_list, "upload", user
|
||||
)
|
||||
|
||||
if deleted_count < 1:
|
||||
@ -399,6 +404,7 @@ def init_uploads_api(
|
||||
@app.post("/orgs/{oid}/uploads/delete", tags=["uploads"])
|
||||
async def delete_uploads(
|
||||
delete_list: DeleteCrawlList,
|
||||
user: User = Depends(user_dep),
|
||||
org: Organization = Depends(org_crawl_dep),
|
||||
):
|
||||
return await ops.delete_uploads(delete_list, org)
|
||||
return await ops.delete_uploads(delete_list, org, user)
|
||||
|
||||
@ -308,7 +308,7 @@ def test_delete_crawls_crawler(
|
||||
)
|
||||
assert r.status_code == 403
|
||||
data = r.json()
|
||||
assert data["detail"] == "Not Allowed"
|
||||
assert data["detail"] == "not_allowed"
|
||||
|
||||
# Test that crawler user can delete own crawl
|
||||
r = requests.post(
|
||||
|
||||
@ -302,7 +302,17 @@ def test_update_upload_metadata(admin_auth_headers, default_org_id):
|
||||
assert data["collectionIds"] == UPDATED_COLLECTION_IDS
|
||||
|
||||
|
||||
def test_delete_stream_upload(admin_auth_headers, default_org_id):
|
||||
def test_delete_stream_upload(admin_auth_headers, crawler_auth_headers, default_org_id):
|
||||
# Verify non-admin user who didn't upload crawl can't delete it
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads/delete",
|
||||
headers=crawler_auth_headers,
|
||||
json={"crawl_ids": [upload_id]},
|
||||
)
|
||||
assert r.status_code == 403
|
||||
assert r.json()["detail"] == "not_allowed"
|
||||
|
||||
# Verify user who created upload can delete it
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads/delete",
|
||||
headers=admin_auth_headers,
|
||||
@ -842,6 +852,7 @@ def test_update_upload_metadata_all_crawls(admin_auth_headers, default_org_id):
|
||||
|
||||
def test_delete_form_upload_and_crawls_from_all_crawls(
|
||||
admin_auth_headers,
|
||||
crawler_auth_headers,
|
||||
default_org_id,
|
||||
all_crawls_delete_crawl_ids,
|
||||
all_crawls_delete_config_id,
|
||||
@ -890,6 +901,15 @@ def test_delete_form_upload_and_crawls_from_all_crawls(
|
||||
combined_crawl_size = crawl_1_size + crawl_2_size
|
||||
total_size = combined_crawl_size + upload_size
|
||||
|
||||
# Verify that non-admin user can't delete another's items
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls/delete",
|
||||
headers=crawler_auth_headers,
|
||||
json={"crawl_ids": crawls_to_delete},
|
||||
)
|
||||
assert r.status_code == 403
|
||||
assert r.json()["detail"] == "not_allowed"
|
||||
|
||||
# Delete mixed type archived items
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls/delete",
|
||||
|
||||
@ -1024,10 +1024,20 @@ ${this.crawl?.description}
|
||||
icon: "check2-circle",
|
||||
});
|
||||
} catch (e: any) {
|
||||
let message = msg(
|
||||
str`Sorry, couldn't delete archived item at this time.`
|
||||
);
|
||||
if (e.isApiError) {
|
||||
if (e.details == "not_allowed") {
|
||||
message = msg(
|
||||
str`Only org owners can delete other users' archived items.`
|
||||
);
|
||||
} else if (e.message) {
|
||||
message = e.message;
|
||||
}
|
||||
}
|
||||
this.notify({
|
||||
message:
|
||||
(e.isApiError && e.message) ||
|
||||
msg("Sorry, couldn't run crawl at this time."),
|
||||
message: message,
|
||||
variant: "danger",
|
||||
icon: "exclamation-octagon",
|
||||
});
|
||||
|
||||
@ -719,10 +719,20 @@ export class CrawlsList extends LiteElement {
|
||||
});
|
||||
this.fetchArchivedItems();
|
||||
} catch (e: any) {
|
||||
let message = msg(
|
||||
str`Sorry, couldn't delete archived item at this time.`
|
||||
);
|
||||
if (e.isApiError) {
|
||||
if (e.details == "not_allowed") {
|
||||
message = msg(
|
||||
str`Only org owners can delete other users' archived items.`
|
||||
);
|
||||
} else if (e.message) {
|
||||
message = e.message;
|
||||
}
|
||||
}
|
||||
this.notify({
|
||||
message:
|
||||
(e.isApiError && e.message) ||
|
||||
msg(str`Sorry, couldn't delete archived item at this time.`),
|
||||
message: message,
|
||||
variant: "danger",
|
||||
icon: "exclamation-octagon",
|
||||
});
|
||||
|
||||
@ -1626,10 +1626,20 @@ export class WorkflowDetail extends LiteElement {
|
||||
});
|
||||
this.fetchCrawls();
|
||||
} catch (e: any) {
|
||||
let message = msg(
|
||||
str`Sorry, couldn't delete archived item at this time.`
|
||||
);
|
||||
if (e.isApiError) {
|
||||
if (e.details == "not_allowed") {
|
||||
message = msg(
|
||||
str`Only org owners can delete other users' archived items.`
|
||||
);
|
||||
} else if (e.message) {
|
||||
message = e.message;
|
||||
}
|
||||
}
|
||||
this.notify({
|
||||
message:
|
||||
(e.isApiError && e.message) ||
|
||||
msg("Sorry, couldn't run crawl at this time."),
|
||||
message: message,
|
||||
variant: "danger",
|
||||
icon: "exclamation-octagon",
|
||||
});
|
||||
|
||||
Loading…
Reference in New Issue
Block a user