Add org metrics API endpoint (#1196)
* Initial implementation of org metrics (This can eventually be sped up significantly by precomputing the values and storing them in the db.) * Rename storageQuota to storageQuotaBytes to be consistent * Update tests to include metrics
This commit is contained in:
parent
bd99840fca
commit
83f80d4103
@ -750,6 +750,26 @@ class OrgOut(BaseMongoModel):
|
|||||||
quotas: Optional[OrgQuotas] = OrgQuotas()
|
quotas: Optional[OrgQuotas] = OrgQuotas()
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class OrgMetrics(BaseModel):
|
||||||
|
"""Organization API metrics model"""
|
||||||
|
|
||||||
|
storageUsedBytes: int
|
||||||
|
storageUsedGB: float
|
||||||
|
storageQuotaBytes: int
|
||||||
|
storageQuotaGB: float
|
||||||
|
archivedItemCount: int
|
||||||
|
crawlCount: int
|
||||||
|
uploadCount: int
|
||||||
|
pageCount: int
|
||||||
|
profileCount: int
|
||||||
|
workflowsRunningCount: int
|
||||||
|
maxConcurrentCrawls: int
|
||||||
|
workflowsQueuedCount: int
|
||||||
|
collectionsCount: int
|
||||||
|
publicCollectionsCount: int
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
### PAGINATION ###
|
### PAGINATION ###
|
||||||
|
@ -13,11 +13,13 @@ from pymongo import ReturnDocument
|
|||||||
from pymongo.errors import AutoReconnect, DuplicateKeyError
|
from pymongo.errors import AutoReconnect, DuplicateKeyError
|
||||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||||
|
|
||||||
|
from .basecrawls import SUCCESSFUL_STATES, RUNNING_STATES, STARTING_STATES
|
||||||
from .models import (
|
from .models import (
|
||||||
Organization,
|
Organization,
|
||||||
DefaultStorage,
|
DefaultStorage,
|
||||||
S3Storage,
|
S3Storage,
|
||||||
OrgQuotas,
|
OrgQuotas,
|
||||||
|
OrgMetrics,
|
||||||
OrgWebhookUrls,
|
OrgWebhookUrls,
|
||||||
RenameOrg,
|
RenameOrg,
|
||||||
UpdateRole,
|
UpdateRole,
|
||||||
@ -35,14 +37,19 @@ from .pagination import DEFAULT_PAGE_SIZE, paginated_format
|
|||||||
|
|
||||||
DEFAULT_ORG = os.environ.get("DEFAULT_ORG", "My Organization")
|
DEFAULT_ORG = os.environ.get("DEFAULT_ORG", "My Organization")
|
||||||
|
|
||||||
|
BYTES_IN_GB = 1_000_000_000
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# pylint: disable=too-many-public-methods
|
# pylint: disable=too-many-public-methods, too-many-instance-attributes
|
||||||
class OrgOps:
|
class OrgOps:
|
||||||
"""Organization API operations"""
|
"""Organization API operations"""
|
||||||
|
|
||||||
def __init__(self, mdb, invites):
|
def __init__(self, mdb, invites):
|
||||||
self.orgs = mdb["organizations"]
|
self.orgs = mdb["organizations"]
|
||||||
|
self.crawls_db = mdb["crawls"]
|
||||||
|
self.profiles_db = mdb["profiles"]
|
||||||
|
self.colls_db = mdb["collections"]
|
||||||
|
|
||||||
self.router = None
|
self.router = None
|
||||||
self.org_viewer_dep = None
|
self.org_viewer_dep = None
|
||||||
@ -326,6 +333,66 @@ class OrgOps:
|
|||||||
{"_id": oid}, {"$inc": {"bytesStored": size}}
|
{"_id": oid}, {"$inc": {"bytesStored": size}}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def get_org_metrics(self, org: Organization):
|
||||||
|
"""Calculate and return org metrics"""
|
||||||
|
# pylint: disable=too-many-locals
|
||||||
|
storage_quota_gb = 0
|
||||||
|
storage_quota = await self.get_org_storage_quota(org.id)
|
||||||
|
if storage_quota:
|
||||||
|
storage_quota_gb = round(storage_quota / BYTES_IN_GB)
|
||||||
|
|
||||||
|
max_concurrent_crawls = await self.get_max_concurrent_crawls(org.id)
|
||||||
|
|
||||||
|
# Calculate these counts in loop to avoid having db iterate through
|
||||||
|
# archived items several times.
|
||||||
|
archived_item_count = 0
|
||||||
|
crawl_count = 0
|
||||||
|
upload_count = 0
|
||||||
|
page_count = 0
|
||||||
|
|
||||||
|
cursor = self.crawls_db.find({"oid": org.id})
|
||||||
|
items = await cursor.to_list(length=10_000)
|
||||||
|
for item in items:
|
||||||
|
if item["state"] not in SUCCESSFUL_STATES:
|
||||||
|
continue
|
||||||
|
archived_item_count += 1
|
||||||
|
type_ = item.get("type")
|
||||||
|
if type_ == "crawl":
|
||||||
|
crawl_count += 1
|
||||||
|
if type_ == "upload":
|
||||||
|
upload_count += 1
|
||||||
|
if item.get("stats"):
|
||||||
|
page_count += item.get("stats", {}).get("done", 0)
|
||||||
|
|
||||||
|
profile_count = await self.profiles_db.count_documents({"oid": org.id})
|
||||||
|
workflows_running_count = await self.crawls_db.count_documents(
|
||||||
|
{"oid": org.id, "state": {"$in": list(RUNNING_STATES)}}
|
||||||
|
)
|
||||||
|
workflows_queued_count = await self.crawls_db.count_documents(
|
||||||
|
{"oid": org.id, "state": {"$in": list(STARTING_STATES)}}
|
||||||
|
)
|
||||||
|
collections_count = await self.colls_db.count_documents({"oid": org.id})
|
||||||
|
public_collections_count = await self.colls_db.count_documents(
|
||||||
|
{"oid": org.id, "isPublic": True}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"storageUsedBytes": org.bytesStored,
|
||||||
|
"storageUsedGB": round((org.bytesStored / BYTES_IN_GB), 2),
|
||||||
|
"storageQuotaBytes": storage_quota,
|
||||||
|
"storageQuotaGB": storage_quota_gb,
|
||||||
|
"archivedItemCount": archived_item_count,
|
||||||
|
"crawlCount": crawl_count,
|
||||||
|
"uploadCount": upload_count,
|
||||||
|
"pageCount": page_count,
|
||||||
|
"profileCount": profile_count,
|
||||||
|
"workflowsRunningCount": workflows_running_count,
|
||||||
|
"maxConcurrentCrawls": max_concurrent_crawls,
|
||||||
|
"workflowsQueuedCount": workflows_queued_count,
|
||||||
|
"collectionsCount": collections_count,
|
||||||
|
"publicCollectionsCount": public_collections_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# pylint: disable=too-many-statements
|
# pylint: disable=too-many-statements
|
||||||
@ -579,4 +646,8 @@ def init_orgs_api(app, mdb, user_manager, invites, user_dep):
|
|||||||
await set_role(update_role, org, user)
|
await set_role(update_role, org, user)
|
||||||
return {"added": True}
|
return {"added": True}
|
||||||
|
|
||||||
|
@router.get("/metrics", tags=["organizations"], response_model=OrgMetrics)
|
||||||
|
async def get_org_metrics(org: Organization = Depends(org_dep)):
|
||||||
|
return await ops.get_org_metrics(org)
|
||||||
|
|
||||||
return ops
|
return ops
|
||||||
|
@ -358,3 +358,27 @@ def test_update_event_webhook_urls_org_crawler(crawler_auth_headers, default_org
|
|||||||
)
|
)
|
||||||
assert r.status_code == 403
|
assert r.status_code == 403
|
||||||
assert r.json()["detail"] == "User does not have permission to perform this action"
|
assert r.json()["detail"] == "User does not have permission to perform this action"
|
||||||
|
|
||||||
|
|
||||||
|
def test_org_metrics(crawler_auth_headers, default_org_id):
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/metrics",
|
||||||
|
headers=crawler_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
|
||||||
|
assert data["storageUsedBytes"] > 0
|
||||||
|
assert data["storageUsedGB"] > 0
|
||||||
|
assert data["storageQuotaBytes"] >= 0
|
||||||
|
assert data["storageQuotaGB"] >= 0
|
||||||
|
assert data["archivedItemCount"] > 0
|
||||||
|
assert data["crawlCount"] > 0
|
||||||
|
assert data["uploadCount"] >= 0
|
||||||
|
assert data["archivedItemCount"] == data["crawlCount"] + data["uploadCount"]
|
||||||
|
assert data["pageCount"] > 0
|
||||||
|
assert data["profileCount"] >= 0
|
||||||
|
assert data["workflowsRunningCount"] >= 0
|
||||||
|
assert data["workflowsQueuedCount"] >= 0
|
||||||
|
assert data["collectionsCount"] > 0
|
||||||
|
assert data["publicCollectionsCount"] >= 0
|
||||||
|
Loading…
Reference in New Issue
Block a user