Add API endpoint to delete org (#1448)
Fixes #903 Adds superuser-only API endpoint to delete an org and all of its data --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
parent
9088101ef6
commit
192737ea99
@ -197,7 +197,8 @@ class BackgroundJobOps:
|
|||||||
job_id = await self.create_delete_replica_job(
|
job_id = await self.create_delete_replica_job(
|
||||||
org, file, object_id, object_type, replica_ref
|
org, file, object_id, object_type, replica_ref
|
||||||
)
|
)
|
||||||
ids.append(job_id)
|
if job_id:
|
||||||
|
ids.append(job_id)
|
||||||
|
|
||||||
return {"added": True, "ids": ids}
|
return {"added": True, "ids": ids}
|
||||||
|
|
||||||
@ -209,17 +210,17 @@ class BackgroundJobOps:
|
|||||||
object_type: str,
|
object_type: str,
|
||||||
replica_ref: StorageRef,
|
replica_ref: StorageRef,
|
||||||
existing_job_id: Optional[str] = None,
|
existing_job_id: Optional[str] = None,
|
||||||
) -> str:
|
) -> Optional[str]:
|
||||||
"""Create a job to delete one replica of a given file"""
|
"""Create a job to delete one replica of a given file"""
|
||||||
replica_storage = self.storage_ops.get_org_storage_by_ref(org, replica_ref)
|
|
||||||
replica_endpoint, bucket_suffix = self.strip_bucket(
|
|
||||||
replica_storage.endpoint_url
|
|
||||||
)
|
|
||||||
replica_file_path = bucket_suffix + file.filename
|
|
||||||
|
|
||||||
job_type = BgJobType.DELETE_REPLICA.value
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
replica_storage = self.storage_ops.get_org_storage_by_ref(org, replica_ref)
|
||||||
|
replica_endpoint, bucket_suffix = self.strip_bucket(
|
||||||
|
replica_storage.endpoint_url
|
||||||
|
)
|
||||||
|
replica_file_path = bucket_suffix + file.filename
|
||||||
|
|
||||||
|
job_type = BgJobType.DELETE_REPLICA.value
|
||||||
|
|
||||||
job_id = await self.crawl_manager.run_replica_job(
|
job_id = await self.crawl_manager.run_replica_job(
|
||||||
oid=str(org.id),
|
oid=str(org.id),
|
||||||
job_type=job_type,
|
job_type=job_type,
|
||||||
@ -262,11 +263,13 @@ class BackgroundJobOps:
|
|||||||
|
|
||||||
return job_id
|
return job_id
|
||||||
|
|
||||||
|
# pylint: disable=broad-exception-caught
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
# pylint: disable=raise-missing-from
|
print(
|
||||||
raise HTTPException(
|
"warning: replica deletion job could not be started "
|
||||||
status_code=400, detail=f"Error starting background job: {exc}"
|
+ f"for {object_type} {file}: {exc}"
|
||||||
)
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
async def job_finished(
|
async def job_finished(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@ -162,7 +162,7 @@ def main():
|
|||||||
|
|
||||||
init_uploads_api(*base_crawl_init)
|
init_uploads_api(*base_crawl_init)
|
||||||
|
|
||||||
org_ops.set_base_crawl_ops(base_crawl_ops)
|
org_ops.set_ops(base_crawl_ops, profiles, coll_ops)
|
||||||
|
|
||||||
user_manager.set_ops(org_ops, crawl_config_ops, base_crawl_ops)
|
user_manager.set_ops(org_ops, crawl_config_ops, base_crawl_ops)
|
||||||
|
|
||||||
|
|||||||
@ -54,6 +54,7 @@ from .models import (
|
|||||||
Collection,
|
Collection,
|
||||||
OrgOutExport,
|
OrgOutExport,
|
||||||
PageWithAllQA,
|
PageWithAllQA,
|
||||||
|
DeleteCrawlList,
|
||||||
)
|
)
|
||||||
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
|
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
|
||||||
from .utils import slug_from_name, validate_slug, JSONSerializer
|
from .utils import slug_from_name, validate_slug, JSONSerializer
|
||||||
@ -61,15 +62,20 @@ from .utils import slug_from_name, validate_slug, JSONSerializer
|
|||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from .invites import InviteOps
|
from .invites import InviteOps
|
||||||
from .basecrawls import BaseCrawlOps
|
from .basecrawls import BaseCrawlOps
|
||||||
|
from .colls import CollectionOps
|
||||||
|
from .profiles import ProfileOps
|
||||||
from .users import UserManager
|
from .users import UserManager
|
||||||
else:
|
else:
|
||||||
InviteOps = BaseCrawlOps = UserManager = object
|
InviteOps = BaseCrawlOps = ProfileOps = CollectionOps = UserManager = object
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_ORG = os.environ.get("DEFAULT_ORG", "My Organization")
|
DEFAULT_ORG = os.environ.get("DEFAULT_ORG", "My Organization")
|
||||||
|
|
||||||
MAX_CRAWL_SCALE = int(os.environ.get("MAX_CRAWL_SCALE", 3))
|
MAX_CRAWL_SCALE = int(os.environ.get("MAX_CRAWL_SCALE", 3))
|
||||||
|
|
||||||
|
# number of items to delete at a time
|
||||||
|
DEL_ITEMS = 1000
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# pylint: disable=too-many-public-methods, too-many-instance-attributes, too-many-locals
|
# pylint: disable=too-many-public-methods, too-many-instance-attributes, too-many-locals
|
||||||
@ -91,6 +97,7 @@ class OrgOps:
|
|||||||
self.users_db = mdb["users"]
|
self.users_db = mdb["users"]
|
||||||
self.pages_db = mdb["pages"]
|
self.pages_db = mdb["pages"]
|
||||||
self.version_db = mdb["version"]
|
self.version_db = mdb["version"]
|
||||||
|
self.invites_db = mdb["invites"]
|
||||||
|
|
||||||
self.router = None
|
self.router = None
|
||||||
self.org_viewer_dep = None
|
self.org_viewer_dep = None
|
||||||
@ -104,9 +111,17 @@ class OrgOps:
|
|||||||
self.user_manager = user_manager
|
self.user_manager = user_manager
|
||||||
self.register_to_org_id = os.environ.get("REGISTER_TO_ORG_ID")
|
self.register_to_org_id = os.environ.get("REGISTER_TO_ORG_ID")
|
||||||
|
|
||||||
def set_base_crawl_ops(self, base_crawl_ops: BaseCrawlOps) -> None:
|
def set_ops(
|
||||||
|
self,
|
||||||
|
base_crawl_ops: BaseCrawlOps,
|
||||||
|
profile_ops: ProfileOps,
|
||||||
|
coll_ops: CollectionOps,
|
||||||
|
) -> None:
|
||||||
"""Set base crawl ops"""
|
"""Set base crawl ops"""
|
||||||
|
# pylint: disable=attribute-defined-outside-init
|
||||||
self.base_crawl_ops = base_crawl_ops
|
self.base_crawl_ops = base_crawl_ops
|
||||||
|
self.profile_ops = profile_ops
|
||||||
|
self.coll_ops = coll_ops
|
||||||
|
|
||||||
def set_default_primary_storage(self, storage: StorageRef):
|
def set_default_primary_storage(self, storage: StorageRef):
|
||||||
"""set default primary storage"""
|
"""set default primary storage"""
|
||||||
@ -1023,6 +1038,59 @@ class OrgOps:
|
|||||||
collection = json_stream.to_standard_types(collection)
|
collection = json_stream.to_standard_types(collection)
|
||||||
await self.colls_db.insert_one(Collection.from_dict(collection).to_dict())
|
await self.colls_db.insert_one(Collection.from_dict(collection).to_dict())
|
||||||
|
|
||||||
|
async def delete_org_and_data(self, org: Organization, user_manager: UserManager):
|
||||||
|
"""Delete org and all of its associated data."""
|
||||||
|
# Delete archived items
|
||||||
|
cursor = self.crawls_db.find({"oid": org.id}, projection=["_id"])
|
||||||
|
items = await cursor.to_list(length=DEL_ITEMS)
|
||||||
|
while items:
|
||||||
|
item_ids = [item["_id"] for item in items]
|
||||||
|
|
||||||
|
await self.base_crawl_ops.delete_crawls_all_types(
|
||||||
|
delete_list=DeleteCrawlList(crawl_ids=item_ids), org=org
|
||||||
|
)
|
||||||
|
|
||||||
|
items = await cursor.to_list(length=DEL_ITEMS)
|
||||||
|
|
||||||
|
# Delete workflows and revisions
|
||||||
|
cursor = self.crawl_configs_db.find({"oid": org.id}, projection=["_id"])
|
||||||
|
workflows = await cursor.to_list(length=DEL_ITEMS)
|
||||||
|
while workflows:
|
||||||
|
workflow_ids = [workflow["_id"] for workflow in workflows]
|
||||||
|
await self.configs_revs_db.delete_many({"cid": {"$in": workflow_ids}})
|
||||||
|
|
||||||
|
workflows = await cursor.to_list(length=DEL_ITEMS)
|
||||||
|
|
||||||
|
await self.crawl_configs_db.delete_many({"oid": org.id})
|
||||||
|
|
||||||
|
# Delete profiles
|
||||||
|
async for profile in self.profiles_db.find({"oid": org.id}, projection=["_id"]):
|
||||||
|
await self.profile_ops.delete_profile(profile["_id"], org)
|
||||||
|
|
||||||
|
# Delete collections
|
||||||
|
async for coll in self.colls_db.find({"oid": org.id}, projection=["_id"]):
|
||||||
|
await self.coll_ops.delete_collection(coll["_id"], org)
|
||||||
|
|
||||||
|
# Delete users that only belong to this org
|
||||||
|
for org_user_id in org.users.keys():
|
||||||
|
user = await user_manager.get_by_id(UUID(org_user_id))
|
||||||
|
if not user:
|
||||||
|
continue
|
||||||
|
orgs, total_orgs = await self.get_orgs_for_user(user)
|
||||||
|
if total_orgs == 1:
|
||||||
|
first_org = orgs[0]
|
||||||
|
if first_org.id != org.id:
|
||||||
|
continue
|
||||||
|
await self.users_db.delete_one({"id": user.id})
|
||||||
|
|
||||||
|
# Delete invites
|
||||||
|
await self.invites_db.delete_many({"oid": org.id})
|
||||||
|
|
||||||
|
# Delete org
|
||||||
|
await self.orgs.delete_one({"_id": org.id})
|
||||||
|
|
||||||
|
return {"deleted": True}
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# pylint: disable=too-many-statements, too-many-arguments
|
# pylint: disable=too-many-statements, too-many-arguments
|
||||||
@ -1165,6 +1233,15 @@ def init_orgs_api(
|
|||||||
org_out.execMinutesQuotaReached = await ops.exec_mins_quota_reached(org.id)
|
org_out.execMinutesQuotaReached = await ops.exec_mins_quota_reached(org.id)
|
||||||
return org_out
|
return org_out
|
||||||
|
|
||||||
|
@router.delete("", tags=["organizations"])
|
||||||
|
async def delete_org(
|
||||||
|
org: Organization = Depends(org_dep), user: User = Depends(user_dep)
|
||||||
|
):
|
||||||
|
if not user.is_superuser:
|
||||||
|
raise HTTPException(status_code=403, detail="Not Allowed")
|
||||||
|
|
||||||
|
return await ops.delete_org_and_data(org, user_manager)
|
||||||
|
|
||||||
@router.post("/rename", tags=["organizations"])
|
@router.post("/rename", tags=["organizations"])
|
||||||
async def rename_org(
|
async def rename_org(
|
||||||
rename: RenameOrg,
|
rename: RenameOrg,
|
||||||
|
|||||||
@ -1028,5 +1028,5 @@ def test_delete_form_upload_and_crawls_from_all_crawls(
|
|||||||
if count + 1 == MAX_ATTEMPTS:
|
if count + 1 == MAX_ATTEMPTS:
|
||||||
assert False
|
assert False
|
||||||
|
|
||||||
time.sleep(5)
|
time.sleep(10)
|
||||||
count += 1
|
count += 1
|
||||||
|
|||||||
42
backend/test/test_z_delete_org.py
Normal file
42
backend/test/test_z_delete_org.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import requests
|
||||||
|
|
||||||
|
from .conftest import API_PREFIX
|
||||||
|
|
||||||
|
|
||||||
|
def test_delete_org_non_superadmin(crawler_auth_headers, default_org_id):
|
||||||
|
# Assert that non-superadmin can't delete org
|
||||||
|
r = requests.delete(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}", headers=crawler_auth_headers
|
||||||
|
)
|
||||||
|
assert r.status_code == 403
|
||||||
|
assert r.json()["detail"] == "Not Allowed"
|
||||||
|
|
||||||
|
|
||||||
|
def test_delete_org_superadmin(admin_auth_headers, default_org_id):
|
||||||
|
# Track items in org to ensure they're deleted later (we may want to expand
|
||||||
|
# this, but currently only have the ability to check items across all orgs)
|
||||||
|
item_ids = []
|
||||||
|
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls", headers=admin_auth_headers
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
assert data["total"] > 0
|
||||||
|
for item in data["items"]:
|
||||||
|
item_ids.append(item["id"])
|
||||||
|
|
||||||
|
# Delete org and its data
|
||||||
|
r = requests.delete(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}", headers=admin_auth_headers
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
assert r.json()["deleted"]
|
||||||
|
|
||||||
|
# Ensure items got deleted
|
||||||
|
for item_id in item_ids:
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/all/all-crawls/{item_id}/replay.json",
|
||||||
|
headers=admin_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 404
|
||||||
Loading…
Reference in New Issue
Block a user