Add API endpoint to recalculate org storage (#1943)
Fixes #1942 This process might be a bit slow for large orgs, may consider moving it to background job in #1898.
This commit is contained in:
		
							parent
							
								
									6ccaad26d8
								
							
						
					
					
						commit
						2237120cd5
					
				@ -2,7 +2,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
from datetime import timedelta
 | 
					from datetime import timedelta
 | 
				
			||||||
from typing import Optional, List, Union, Dict, Any, Type, TYPE_CHECKING, cast
 | 
					from typing import Optional, List, Union, Dict, Any, Type, TYPE_CHECKING, cast, Tuple
 | 
				
			||||||
from uuid import UUID
 | 
					from uuid import UUID
 | 
				
			||||||
import urllib.parse
 | 
					import urllib.parse
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -797,6 +797,34 @@ class BaseCrawlOps:
 | 
				
			|||||||
            "firstSeeds": list(first_seeds),
 | 
					            "firstSeeds": list(first_seeds),
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    async def calculate_org_crawl_file_storage(
 | 
				
			||||||
 | 
					        self, oid: UUID, type_: Optional[str] = None
 | 
				
			||||||
 | 
					    ) -> Tuple[int, int, int]:
 | 
				
			||||||
 | 
					        """Calculate and return total size of crawl files in org.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Returns tuple of (total, crawls only, uploads only)
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        total_size = 0
 | 
				
			||||||
 | 
					        crawls_size = 0
 | 
				
			||||||
 | 
					        uploads_size = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        cursor = self.crawls.find({"oid": oid})
 | 
				
			||||||
 | 
					        async for crawl_dict in cursor:
 | 
				
			||||||
 | 
					            files = crawl_dict.get("files", [])
 | 
				
			||||||
 | 
					            type_ = crawl_dict.get("type")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            item_size = 0
 | 
				
			||||||
 | 
					            for file_ in files:
 | 
				
			||||||
 | 
					                item_size += file_.get("size", 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            total_size += item_size
 | 
				
			||||||
 | 
					            if type_ == "crawl":
 | 
				
			||||||
 | 
					                crawls_size += item_size
 | 
				
			||||||
 | 
					            if type_ == "upload":
 | 
				
			||||||
 | 
					                uploads_size += item_size
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return total_size, crawls_size, uploads_size
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# ============================================================================
 | 
					# ============================================================================
 | 
				
			||||||
def init_base_crawls_api(app, user_dep, *args):
 | 
					def init_base_crawls_api(app, user_dep, *args):
 | 
				
			||||||
 | 
				
			|||||||
@ -69,6 +69,7 @@ from .models import (
 | 
				
			|||||||
    UpdatedResponse,
 | 
					    UpdatedResponse,
 | 
				
			||||||
    AddedResponse,
 | 
					    AddedResponse,
 | 
				
			||||||
    AddedResponseId,
 | 
					    AddedResponseId,
 | 
				
			||||||
 | 
					    SuccessResponse,
 | 
				
			||||||
    OrgInviteResponse,
 | 
					    OrgInviteResponse,
 | 
				
			||||||
    OrgAcceptInviteResponse,
 | 
					    OrgAcceptInviteResponse,
 | 
				
			||||||
    OrgDeleteInviteResponse,
 | 
					    OrgDeleteInviteResponse,
 | 
				
			||||||
@ -1319,6 +1320,39 @@ class OrgOps:
 | 
				
			|||||||
        # Delete org
 | 
					        # Delete org
 | 
				
			||||||
        await self.orgs.delete_one({"_id": org.id})
 | 
					        await self.orgs.delete_one({"_id": org.id})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    async def recalculate_storage(self, org: Organization) -> dict[str, bool]:
 | 
				
			||||||
 | 
					        """Recalculate org storage use"""
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            total_crawl_size, crawl_size, upload_size = (
 | 
				
			||||||
 | 
					                await self.base_crawl_ops.calculate_org_crawl_file_storage(
 | 
				
			||||||
 | 
					                    org.id,
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					            profile_size = await self.profile_ops.calculate_org_profile_file_storage(
 | 
				
			||||||
 | 
					                org.id
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            org_size = total_crawl_size + profile_size
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            await self.orgs.find_one_and_update(
 | 
				
			||||||
 | 
					                {"_id": org.id},
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    "$set": {
 | 
				
			||||||
 | 
					                        "bytesStored": org_size,
 | 
				
			||||||
 | 
					                        "bytesStoredCrawls": crawl_size,
 | 
				
			||||||
 | 
					                        "bytesStoredUploads": upload_size,
 | 
				
			||||||
 | 
					                        "bytesStoredProfiles": profile_size,
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					        # pylint: disable=broad-exception-caught, raise-missing-from
 | 
				
			||||||
 | 
					        except Exception as err:
 | 
				
			||||||
 | 
					            raise HTTPException(
 | 
				
			||||||
 | 
					                status_code=400, detail=f"Error calculating size: {err}"
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return {"success": True}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# ============================================================================
 | 
					# ============================================================================
 | 
				
			||||||
# pylint: disable=too-many-statements, too-many-arguments
 | 
					# pylint: disable=too-many-statements, too-many-arguments
 | 
				
			||||||
@ -1534,6 +1568,12 @@ def init_orgs_api(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        return {"updated": True}
 | 
					        return {"updated": True}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @router.post(
 | 
				
			||||||
 | 
					        "/recalculate-storage", tags=["organizations"], response_model=SuccessResponse
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    async def recalculate_org_storage(org: Organization = Depends(org_owner_dep)):
 | 
				
			||||||
 | 
					        return await ops.recalculate_storage(org)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @router.post("/invite", tags=["invites"], response_model=OrgInviteResponse)
 | 
					    @router.post("/invite", tags=["invites"], response_model=OrgInviteResponse)
 | 
				
			||||||
    async def invite_user_to_org(
 | 
					    async def invite_user_to_org(
 | 
				
			||||||
        invite: InviteToOrgRequest,
 | 
					        invite: InviteToOrgRequest,
 | 
				
			||||||
 | 
				
			|||||||
@ -475,6 +475,18 @@ class ProfileOps:
 | 
				
			|||||||
            {"$push": {"resource.replicas": {"name": ref.name, "custom": ref.custom}}},
 | 
					            {"$push": {"resource.replicas": {"name": ref.name, "custom": ref.custom}}},
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    async def calculate_org_profile_file_storage(self, oid: UUID) -> int:
 | 
				
			||||||
 | 
					        """Calculate and return total size of profile files in org"""
 | 
				
			||||||
 | 
					        total_size = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        cursor = self.profiles.find({"oid": oid})
 | 
				
			||||||
 | 
					        async for profile_dict in cursor:
 | 
				
			||||||
 | 
					            file_ = profile_dict.get("resource")
 | 
				
			||||||
 | 
					            if file_:
 | 
				
			||||||
 | 
					                total_size += file_.get("size", 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return total_size
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# ============================================================================
 | 
					# ============================================================================
 | 
				
			||||||
# pylint: disable=redefined-builtin,invalid-name,too-many-locals,too-many-arguments
 | 
					# pylint: disable=redefined-builtin,invalid-name,too-many-locals,too-many-arguments
 | 
				
			||||||
 | 
				
			|||||||
@ -3,6 +3,29 @@ import requests
 | 
				
			|||||||
from .conftest import API_PREFIX
 | 
					from .conftest import API_PREFIX
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_recalculate_org_storage(admin_auth_headers, default_org_id):
 | 
				
			||||||
 | 
					    # Prior to deleting org, ensure recalculating storage works now that
 | 
				
			||||||
 | 
					    # resources of all types have been created.
 | 
				
			||||||
 | 
					    r = requests.post(
 | 
				
			||||||
 | 
					        f"{API_PREFIX}/orgs/{default_org_id}/recalculate-storage",
 | 
				
			||||||
 | 
					        headers=admin_auth_headers,
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    assert r.status_code == 200
 | 
				
			||||||
 | 
					    assert r.json()["success"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    r = requests.get(
 | 
				
			||||||
 | 
					        f"{API_PREFIX}/orgs/{default_org_id}",
 | 
				
			||||||
 | 
					        headers=admin_auth_headers,
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    assert r.status_code == 200
 | 
				
			||||||
 | 
					    data = r.json()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert data["bytesStored"] > 0
 | 
				
			||||||
 | 
					    assert data["bytesStoredCrawls"] > 0
 | 
				
			||||||
 | 
					    assert data["bytesStoredUploads"] > 0
 | 
				
			||||||
 | 
					    assert data["bytesStoredProfiles"] > 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_delete_org_non_superadmin(crawler_auth_headers, default_org_id):
 | 
					def test_delete_org_non_superadmin(crawler_auth_headers, default_org_id):
 | 
				
			||||||
    # Assert that non-superadmin can't delete org
 | 
					    # Assert that non-superadmin can't delete org
 | 
				
			||||||
    r = requests.delete(
 | 
					    r = requests.delete(
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user