Add API endpoint to recalculate org storage (#1943)
Fixes #1942 This process might be a bit slow for large orgs, may consider moving it to background job in #1898.
This commit is contained in:
parent
6ccaad26d8
commit
2237120cd5
@ -2,7 +2,7 @@
|
||||
|
||||
import os
|
||||
from datetime import timedelta
|
||||
from typing import Optional, List, Union, Dict, Any, Type, TYPE_CHECKING, cast
|
||||
from typing import Optional, List, Union, Dict, Any, Type, TYPE_CHECKING, cast, Tuple
|
||||
from uuid import UUID
|
||||
import urllib.parse
|
||||
|
||||
@ -797,6 +797,34 @@ class BaseCrawlOps:
|
||||
"firstSeeds": list(first_seeds),
|
||||
}
|
||||
|
||||
async def calculate_org_crawl_file_storage(
|
||||
self, oid: UUID, type_: Optional[str] = None
|
||||
) -> Tuple[int, int, int]:
|
||||
"""Calculate and return total size of crawl files in org.
|
||||
|
||||
Returns tuple of (total, crawls only, uploads only)
|
||||
"""
|
||||
total_size = 0
|
||||
crawls_size = 0
|
||||
uploads_size = 0
|
||||
|
||||
cursor = self.crawls.find({"oid": oid})
|
||||
async for crawl_dict in cursor:
|
||||
files = crawl_dict.get("files", [])
|
||||
type_ = crawl_dict.get("type")
|
||||
|
||||
item_size = 0
|
||||
for file_ in files:
|
||||
item_size += file_.get("size", 0)
|
||||
|
||||
total_size += item_size
|
||||
if type_ == "crawl":
|
||||
crawls_size += item_size
|
||||
if type_ == "upload":
|
||||
uploads_size += item_size
|
||||
|
||||
return total_size, crawls_size, uploads_size
|
||||
|
||||
|
||||
# ============================================================================
|
||||
def init_base_crawls_api(app, user_dep, *args):
|
||||
|
@ -69,6 +69,7 @@ from .models import (
|
||||
UpdatedResponse,
|
||||
AddedResponse,
|
||||
AddedResponseId,
|
||||
SuccessResponse,
|
||||
OrgInviteResponse,
|
||||
OrgAcceptInviteResponse,
|
||||
OrgDeleteInviteResponse,
|
||||
@ -1319,6 +1320,39 @@ class OrgOps:
|
||||
# Delete org
|
||||
await self.orgs.delete_one({"_id": org.id})
|
||||
|
||||
async def recalculate_storage(self, org: Organization) -> dict[str, bool]:
|
||||
"""Recalculate org storage use"""
|
||||
try:
|
||||
total_crawl_size, crawl_size, upload_size = (
|
||||
await self.base_crawl_ops.calculate_org_crawl_file_storage(
|
||||
org.id,
|
||||
)
|
||||
)
|
||||
profile_size = await self.profile_ops.calculate_org_profile_file_storage(
|
||||
org.id
|
||||
)
|
||||
|
||||
org_size = total_crawl_size + profile_size
|
||||
|
||||
await self.orgs.find_one_and_update(
|
||||
{"_id": org.id},
|
||||
{
|
||||
"$set": {
|
||||
"bytesStored": org_size,
|
||||
"bytesStoredCrawls": crawl_size,
|
||||
"bytesStoredUploads": upload_size,
|
||||
"bytesStoredProfiles": profile_size,
|
||||
}
|
||||
},
|
||||
)
|
||||
# pylint: disable=broad-exception-caught, raise-missing-from
|
||||
except Exception as err:
|
||||
raise HTTPException(
|
||||
status_code=400, detail=f"Error calculating size: {err}"
|
||||
)
|
||||
|
||||
return {"success": True}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# pylint: disable=too-many-statements, too-many-arguments
|
||||
@ -1534,6 +1568,12 @@ def init_orgs_api(
|
||||
|
||||
return {"updated": True}
|
||||
|
||||
@router.post(
|
||||
"/recalculate-storage", tags=["organizations"], response_model=SuccessResponse
|
||||
)
|
||||
async def recalculate_org_storage(org: Organization = Depends(org_owner_dep)):
|
||||
return await ops.recalculate_storage(org)
|
||||
|
||||
@router.post("/invite", tags=["invites"], response_model=OrgInviteResponse)
|
||||
async def invite_user_to_org(
|
||||
invite: InviteToOrgRequest,
|
||||
|
@ -475,6 +475,18 @@ class ProfileOps:
|
||||
{"$push": {"resource.replicas": {"name": ref.name, "custom": ref.custom}}},
|
||||
)
|
||||
|
||||
async def calculate_org_profile_file_storage(self, oid: UUID) -> int:
|
||||
"""Calculate and return total size of profile files in org"""
|
||||
total_size = 0
|
||||
|
||||
cursor = self.profiles.find({"oid": oid})
|
||||
async for profile_dict in cursor:
|
||||
file_ = profile_dict.get("resource")
|
||||
if file_:
|
||||
total_size += file_.get("size", 0)
|
||||
|
||||
return total_size
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# pylint: disable=redefined-builtin,invalid-name,too-many-locals,too-many-arguments
|
||||
|
@ -3,6 +3,29 @@ import requests
|
||||
from .conftest import API_PREFIX
|
||||
|
||||
|
||||
def test_recalculate_org_storage(admin_auth_headers, default_org_id):
|
||||
# Prior to deleting org, ensure recalculating storage works now that
|
||||
# resources of all types have been created.
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/recalculate-storage",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["success"]
|
||||
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
|
||||
assert data["bytesStored"] > 0
|
||||
assert data["bytesStoredCrawls"] > 0
|
||||
assert data["bytesStoredUploads"] > 0
|
||||
assert data["bytesStoredProfiles"] > 0
|
||||
|
||||
|
||||
def test_delete_org_non_superadmin(crawler_auth_headers, default_org_id):
|
||||
# Assert that non-superadmin can't delete org
|
||||
r = requests.delete(
|
||||
|
Loading…
Reference in New Issue
Block a user