Add API endpoint to recalculate org storage (#1943)
Fixes #1942 This process might be a bit slow for large orgs, may consider moving it to background job in #1898.
This commit is contained in:
parent
6ccaad26d8
commit
2237120cd5
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from typing import Optional, List, Union, Dict, Any, Type, TYPE_CHECKING, cast
|
from typing import Optional, List, Union, Dict, Any, Type, TYPE_CHECKING, cast, Tuple
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
@ -797,6 +797,34 @@ class BaseCrawlOps:
|
|||||||
"firstSeeds": list(first_seeds),
|
"firstSeeds": list(first_seeds),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async def calculate_org_crawl_file_storage(
|
||||||
|
self, oid: UUID, type_: Optional[str] = None
|
||||||
|
) -> Tuple[int, int, int]:
|
||||||
|
"""Calculate and return total size of crawl files in org.
|
||||||
|
|
||||||
|
Returns tuple of (total, crawls only, uploads only)
|
||||||
|
"""
|
||||||
|
total_size = 0
|
||||||
|
crawls_size = 0
|
||||||
|
uploads_size = 0
|
||||||
|
|
||||||
|
cursor = self.crawls.find({"oid": oid})
|
||||||
|
async for crawl_dict in cursor:
|
||||||
|
files = crawl_dict.get("files", [])
|
||||||
|
type_ = crawl_dict.get("type")
|
||||||
|
|
||||||
|
item_size = 0
|
||||||
|
for file_ in files:
|
||||||
|
item_size += file_.get("size", 0)
|
||||||
|
|
||||||
|
total_size += item_size
|
||||||
|
if type_ == "crawl":
|
||||||
|
crawls_size += item_size
|
||||||
|
if type_ == "upload":
|
||||||
|
uploads_size += item_size
|
||||||
|
|
||||||
|
return total_size, crawls_size, uploads_size
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
def init_base_crawls_api(app, user_dep, *args):
|
def init_base_crawls_api(app, user_dep, *args):
|
||||||
|
@ -69,6 +69,7 @@ from .models import (
|
|||||||
UpdatedResponse,
|
UpdatedResponse,
|
||||||
AddedResponse,
|
AddedResponse,
|
||||||
AddedResponseId,
|
AddedResponseId,
|
||||||
|
SuccessResponse,
|
||||||
OrgInviteResponse,
|
OrgInviteResponse,
|
||||||
OrgAcceptInviteResponse,
|
OrgAcceptInviteResponse,
|
||||||
OrgDeleteInviteResponse,
|
OrgDeleteInviteResponse,
|
||||||
@ -1319,6 +1320,39 @@ class OrgOps:
|
|||||||
# Delete org
|
# Delete org
|
||||||
await self.orgs.delete_one({"_id": org.id})
|
await self.orgs.delete_one({"_id": org.id})
|
||||||
|
|
||||||
|
async def recalculate_storage(self, org: Organization) -> dict[str, bool]:
|
||||||
|
"""Recalculate org storage use"""
|
||||||
|
try:
|
||||||
|
total_crawl_size, crawl_size, upload_size = (
|
||||||
|
await self.base_crawl_ops.calculate_org_crawl_file_storage(
|
||||||
|
org.id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
profile_size = await self.profile_ops.calculate_org_profile_file_storage(
|
||||||
|
org.id
|
||||||
|
)
|
||||||
|
|
||||||
|
org_size = total_crawl_size + profile_size
|
||||||
|
|
||||||
|
await self.orgs.find_one_and_update(
|
||||||
|
{"_id": org.id},
|
||||||
|
{
|
||||||
|
"$set": {
|
||||||
|
"bytesStored": org_size,
|
||||||
|
"bytesStoredCrawls": crawl_size,
|
||||||
|
"bytesStoredUploads": upload_size,
|
||||||
|
"bytesStoredProfiles": profile_size,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
# pylint: disable=broad-exception-caught, raise-missing-from
|
||||||
|
except Exception as err:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400, detail=f"Error calculating size: {err}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"success": True}
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# pylint: disable=too-many-statements, too-many-arguments
|
# pylint: disable=too-many-statements, too-many-arguments
|
||||||
@ -1534,6 +1568,12 @@ def init_orgs_api(
|
|||||||
|
|
||||||
return {"updated": True}
|
return {"updated": True}
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/recalculate-storage", tags=["organizations"], response_model=SuccessResponse
|
||||||
|
)
|
||||||
|
async def recalculate_org_storage(org: Organization = Depends(org_owner_dep)):
|
||||||
|
return await ops.recalculate_storage(org)
|
||||||
|
|
||||||
@router.post("/invite", tags=["invites"], response_model=OrgInviteResponse)
|
@router.post("/invite", tags=["invites"], response_model=OrgInviteResponse)
|
||||||
async def invite_user_to_org(
|
async def invite_user_to_org(
|
||||||
invite: InviteToOrgRequest,
|
invite: InviteToOrgRequest,
|
||||||
|
@ -475,6 +475,18 @@ class ProfileOps:
|
|||||||
{"$push": {"resource.replicas": {"name": ref.name, "custom": ref.custom}}},
|
{"$push": {"resource.replicas": {"name": ref.name, "custom": ref.custom}}},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def calculate_org_profile_file_storage(self, oid: UUID) -> int:
|
||||||
|
"""Calculate and return total size of profile files in org"""
|
||||||
|
total_size = 0
|
||||||
|
|
||||||
|
cursor = self.profiles.find({"oid": oid})
|
||||||
|
async for profile_dict in cursor:
|
||||||
|
file_ = profile_dict.get("resource")
|
||||||
|
if file_:
|
||||||
|
total_size += file_.get("size", 0)
|
||||||
|
|
||||||
|
return total_size
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# pylint: disable=redefined-builtin,invalid-name,too-many-locals,too-many-arguments
|
# pylint: disable=redefined-builtin,invalid-name,too-many-locals,too-many-arguments
|
||||||
|
@ -3,6 +3,29 @@ import requests
|
|||||||
from .conftest import API_PREFIX
|
from .conftest import API_PREFIX
|
||||||
|
|
||||||
|
|
||||||
|
def test_recalculate_org_storage(admin_auth_headers, default_org_id):
|
||||||
|
# Prior to deleting org, ensure recalculating storage works now that
|
||||||
|
# resources of all types have been created.
|
||||||
|
r = requests.post(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/recalculate-storage",
|
||||||
|
headers=admin_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
assert r.json()["success"]
|
||||||
|
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}",
|
||||||
|
headers=admin_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
|
||||||
|
assert data["bytesStored"] > 0
|
||||||
|
assert data["bytesStoredCrawls"] > 0
|
||||||
|
assert data["bytesStoredUploads"] > 0
|
||||||
|
assert data["bytesStoredProfiles"] > 0
|
||||||
|
|
||||||
|
|
||||||
def test_delete_org_non_superadmin(crawler_auth_headers, default_org_id):
|
def test_delete_org_non_superadmin(crawler_auth_headers, default_org_id):
|
||||||
# Assert that non-superadmin can't delete org
|
# Assert that non-superadmin can't delete org
|
||||||
r = requests.delete(
|
r = requests.delete(
|
||||||
|
Loading…
Reference in New Issue
Block a user