diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index 3815c9f6..58719de8 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -182,7 +182,7 @@ class CollectionOps: aggregate = [{"$match": match_query}] if sort_by: - if sort_by not in ("modified", "name", "description"): + if sort_by not in ("modified", "name", "description", "totalSize"): raise HTTPException(status_code=400, detail="invalid_sort_by") if sort_direction not in (1, -1): raise HTTPException(status_code=400, detail="invalid_sort_direction") @@ -267,6 +267,7 @@ async def update_collection_counts_and_tags( """Set current crawl info in config when crawl begins""" crawl_count = 0 page_count = 0 + total_size = 0 tags = [] cursor = crawls.find({"collections": collection_id}) @@ -275,6 +276,9 @@ async def update_collection_counts_and_tags( if crawl["state"] not in SUCCESSFUL_STATES: continue crawl_count += 1 + files = crawl.get("files", []) + for file in files: + total_size += file.get("size", 0) if crawl.get("stats"): page_count += crawl.get("stats", {}).get("done", 0) if crawl.get("tags"): @@ -288,6 +292,7 @@ async def update_collection_counts_and_tags( "$set": { "crawlCount": crawl_count, "pageCount": page_count, + "totalSize": total_size, "tags": sorted_tags, } }, diff --git a/backend/btrixcloud/db.py b/backend/btrixcloud/db.py index 7f94f525..d17e5c39 100644 --- a/backend/btrixcloud/db.py +++ b/backend/btrixcloud/db.py @@ -15,7 +15,7 @@ from pymongo.errors import InvalidName from .migrations import BaseMigration -CURR_DB_VERSION = "0009" +CURR_DB_VERSION = "0010" # ============================================================================ diff --git a/backend/btrixcloud/migrations/migration_0010_collection_total_size.py b/backend/btrixcloud/migrations/migration_0010_collection_total_size.py new file mode 100644 index 00000000..4fa2a291 --- /dev/null +++ b/backend/btrixcloud/migrations/migration_0010_collection_total_size.py @@ -0,0 +1,36 @@ +""" +Migration 0010 - Precomputing collection total size +""" +from btrixcloud.colls import update_collection_counts_and_tags +from btrixcloud.migrations import BaseMigration + + +MIGRATION_VERSION = "0010" + + +class Migration(BaseMigration): + """Migration class.""" + + def __init__(self, mdb, migration_version=MIGRATION_VERSION): + super().__init__(mdb, migration_version) + + async def migrate_up(self): + """Perform migration up. + + Recompute collection data to include totalSize. + """ + # pylint: disable=duplicate-code + colls = self.mdb["collections"] + crawls = self.mdb["crawls"] + + colls_to_update = [res async for res in colls.find({})] + if not colls_to_update: + return + + for coll in colls_to_update: + coll_id = coll["_id"] + try: + await update_collection_counts_and_tags(colls, crawls, coll_id) + # pylint: disable=broad-exception-caught + except Exception as err: + print(f"Unable to update collection {coll_id}: {err}", flush=True) diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 35263499..a5929ef6 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -463,6 +463,7 @@ class Collection(BaseMongoModel): crawlCount: Optional[int] = 0 pageCount: Optional[int] = 0 + totalSize: Optional[int] = 0 # Sorted by count, descending tags: Optional[List[str]] = [] diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py index 0e3707ae..3e289754 100644 --- a/backend/test/test_collections.py +++ b/backend/test/test_collections.py @@ -98,6 +98,7 @@ def test_update_collection( assert data["description"] == DESCRIPTION assert data["crawlCount"] == 1 assert data["pageCount"] > 0 + assert data["totalSize"] > 0 global modified modified = data["modified"] assert modified @@ -172,6 +173,7 @@ def test_add_remove_crawl_from_collection( assert data["id"] == _coll_id assert data["crawlCount"] == 2 assert data["pageCount"] > 0 + assert data["totalSize"] > 0 assert data["modified"] >= modified assert data["tags"] == ["wr-test-2", "wr-test-1"] @@ -193,6 +195,7 @@ def test_add_remove_crawl_from_collection( assert data["id"] == _coll_id assert data["crawlCount"] == 0 assert data["pageCount"] == 0 + assert data["totalSize"] == 0 assert data["modified"] >= modified assert data.get("tags", []) == [] @@ -220,6 +223,7 @@ def test_add_remove_crawl_from_collection( assert data["id"] == _coll_id assert data["crawlCount"] == 2 assert data["pageCount"] > 0 + assert data["totalSize"] > 0 assert data["modified"] >= modified assert data["tags"] == ["wr-test-2", "wr-test-1"] @@ -237,6 +241,7 @@ def test_get_collection(crawler_auth_headers, default_org_id): assert data["description"] == DESCRIPTION assert data["crawlCount"] == 2 assert data["pageCount"] > 0 + assert data["totalSize"] > 0 assert data["modified"] >= modified assert data["tags"] == ["wr-test-2", "wr-test-1"] @@ -256,6 +261,7 @@ def test_get_collection_replay( assert data["description"] == DESCRIPTION assert data["crawlCount"] == 2 assert data["pageCount"] > 0 + assert data["totalSize"] > 0 assert data["modified"] >= modified assert data["tags"] == ["wr-test-2", "wr-test-1"] @@ -292,6 +298,7 @@ def test_add_upload_to_collection(crawler_auth_headers, default_org_id): assert data["id"] == _coll_id assert data["crawlCount"] == 3 assert data["pageCount"] > 0 + assert data["totalSize"] > 0 assert data["modified"] assert data["tags"] == ["wr-test-2", "wr-test-1"] @@ -323,6 +330,7 @@ def test_list_collections( assert first_coll["description"] == DESCRIPTION assert first_coll["crawlCount"] == 3 assert first_coll["pageCount"] > 0 + assert first_coll["totalSize"] > 0 assert first_coll["modified"] assert first_coll["tags"] == ["wr-test-2", "wr-test-1"] @@ -333,6 +341,7 @@ def test_list_collections( assert second_coll.get("description") is None assert second_coll["crawlCount"] == 1 assert second_coll["pageCount"] > 0 + assert second_coll["totalSize"] > 0 assert second_coll["modified"] assert second_coll["tags"] == ["wr-test-2"] @@ -349,6 +358,7 @@ def test_remove_upload_from_collection(crawler_auth_headers, default_org_id): assert data["id"] == _coll_id assert data["crawlCount"] == 2 assert data["pageCount"] > 0 + assert data["totalSize"] > 0 assert data["modified"] >= modified assert data.get("tags") == ["wr-test-2", "wr-test-1"] @@ -499,6 +509,46 @@ def test_filter_sort_collections( items = data["items"] assert items[0]["modified"] >= items[1]["modified"] + # Test sorting by size, ascending + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/collections?sortBy=totalSize", + headers=crawler_auth_headers, + ) + assert r.status_code == 200 + data = r.json() + assert data["total"] == 2 + + items = data["items"] + assert items[0]["totalSize"] <= items[1]["totalSize"] + + # Test sorting by size, descending + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/collections?sortBy=totalSize&sortDirection=-1", + headers=crawler_auth_headers, + ) + assert r.status_code == 200 + data = r.json() + assert data["total"] == 2 + + items = data["items"] + assert items[0]["totalSize"] >= items[1]["totalSize"] + + # Invalid sort value + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/collections?sortBy=invalid", + headers=crawler_auth_headers, + ) + assert r.status_code == 400 + assert r.json()["detail"] == "invalid_sort_by" + + # Invalid sort_direction value + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/collections?sortBy=modified&sortDirection=0", + headers=crawler_auth_headers, + ) + assert r.status_code == 400 + assert r.json()["detail"] == "invalid_sort_direction" + def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id): # Delete second collection