Add API endpoint to recalculate org storage (#1943)

Fixes #1942 

This process might be a bit slow for large orgs, may consider moving it to background job in #1898.
This commit is contained in:
Tessa Walsh 2024-07-19 21:29:20 -04:00 committed by GitHub
parent 6ccaad26d8
commit 2237120cd5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 104 additions and 1 deletions

View File

@ -2,7 +2,7 @@
import os
from datetime import timedelta
from typing import Optional, List, Union, Dict, Any, Type, TYPE_CHECKING, cast
from typing import Optional, List, Union, Dict, Any, Type, TYPE_CHECKING, cast, Tuple
from uuid import UUID
import urllib.parse
@ -797,6 +797,34 @@ class BaseCrawlOps:
"firstSeeds": list(first_seeds),
}
async def calculate_org_crawl_file_storage(
self, oid: UUID, type_: Optional[str] = None
) -> Tuple[int, int, int]:
"""Calculate and return total size of crawl files in org.
Returns tuple of (total, crawls only, uploads only)
"""
total_size = 0
crawls_size = 0
uploads_size = 0
cursor = self.crawls.find({"oid": oid})
async for crawl_dict in cursor:
files = crawl_dict.get("files", [])
type_ = crawl_dict.get("type")
item_size = 0
for file_ in files:
item_size += file_.get("size", 0)
total_size += item_size
if type_ == "crawl":
crawls_size += item_size
if type_ == "upload":
uploads_size += item_size
return total_size, crawls_size, uploads_size
# ============================================================================
def init_base_crawls_api(app, user_dep, *args):

View File

@ -69,6 +69,7 @@ from .models import (
UpdatedResponse,
AddedResponse,
AddedResponseId,
SuccessResponse,
OrgInviteResponse,
OrgAcceptInviteResponse,
OrgDeleteInviteResponse,
@ -1319,6 +1320,39 @@ class OrgOps:
# Delete org
await self.orgs.delete_one({"_id": org.id})
async def recalculate_storage(self, org: Organization) -> dict[str, bool]:
"""Recalculate org storage use"""
try:
total_crawl_size, crawl_size, upload_size = (
await self.base_crawl_ops.calculate_org_crawl_file_storage(
org.id,
)
)
profile_size = await self.profile_ops.calculate_org_profile_file_storage(
org.id
)
org_size = total_crawl_size + profile_size
await self.orgs.find_one_and_update(
{"_id": org.id},
{
"$set": {
"bytesStored": org_size,
"bytesStoredCrawls": crawl_size,
"bytesStoredUploads": upload_size,
"bytesStoredProfiles": profile_size,
}
},
)
# pylint: disable=broad-exception-caught, raise-missing-from
except Exception as err:
raise HTTPException(
status_code=400, detail=f"Error calculating size: {err}"
)
return {"success": True}
# ============================================================================
# pylint: disable=too-many-statements, too-many-arguments
@ -1534,6 +1568,12 @@ def init_orgs_api(
return {"updated": True}
@router.post(
"/recalculate-storage", tags=["organizations"], response_model=SuccessResponse
)
async def recalculate_org_storage(org: Organization = Depends(org_owner_dep)):
return await ops.recalculate_storage(org)
@router.post("/invite", tags=["invites"], response_model=OrgInviteResponse)
async def invite_user_to_org(
invite: InviteToOrgRequest,

View File

@ -475,6 +475,18 @@ class ProfileOps:
{"$push": {"resource.replicas": {"name": ref.name, "custom": ref.custom}}},
)
async def calculate_org_profile_file_storage(self, oid: UUID) -> int:
"""Calculate and return total size of profile files in org"""
total_size = 0
cursor = self.profiles.find({"oid": oid})
async for profile_dict in cursor:
file_ = profile_dict.get("resource")
if file_:
total_size += file_.get("size", 0)
return total_size
# ============================================================================
# pylint: disable=redefined-builtin,invalid-name,too-many-locals,too-many-arguments

View File

@ -3,6 +3,29 @@ import requests
from .conftest import API_PREFIX
def test_recalculate_org_storage(admin_auth_headers, default_org_id):
# Prior to deleting org, ensure recalculating storage works now that
# resources of all types have been created.
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/recalculate-storage",
headers=admin_auth_headers,
)
assert r.status_code == 200
assert r.json()["success"]
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["bytesStored"] > 0
assert data["bytesStoredCrawls"] > 0
assert data["bytesStoredUploads"] > 0
assert data["bytesStoredProfiles"] > 0
def test_delete_org_non_superadmin(crawler_auth_headers, default_org_id):
# Assert that non-superadmin can't delete org
r = requests.delete(