diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index 35538841..cfef43d1 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -684,11 +684,19 @@ class CollectionOps: ) async def update_crawl_collections(self, crawl_id: str): - """Update counts and tags for all collections in crawl""" + """Update counts, dates, and modified for all collections in crawl""" crawl = await self.crawls.find_one({"_id": crawl_id}) crawl_coll_ids = crawl.get("collectionIds") - for collection_id in crawl_coll_ids: - await self.update_collection_counts_and_tags(collection_id) + modified = dt_now() + + for coll_id in crawl_coll_ids: + await self.update_collection_counts_and_tags(coll_id) + await self.update_collection_dates(coll_id) + await self.collections.find_one_and_update( + {"_id": coll_id}, + {"$set": {"modified": modified}}, + return_document=pymongo.ReturnDocument.AFTER, + ) async def add_successful_crawl_to_collections(self, crawl_id: str, cid: UUID): """Add successful crawl to its auto-add collections.""" diff --git a/backend/btrixcloud/uploads.py b/backend/btrixcloud/uploads.py index 9f59991e..96dc3334 100644 --- a/backend/btrixcloud/uploads.py +++ b/backend/btrixcloud/uploads.py @@ -190,7 +190,9 @@ class UploadOps(BaseCrawlOps): self.event_webhook_ops.create_upload_finished_notification(crawl_id, org.id) ) - asyncio.create_task(self.page_ops.add_crawl_pages_to_db_from_wacz(crawl_id)) + asyncio.create_task( + self._add_pages_and_update_collections(crawl_id, collections) + ) await self.orgs.inc_org_bytes_stored(org.id, file_size, "upload") @@ -204,6 +206,13 @@ class UploadOps(BaseCrawlOps): return {"id": crawl_id, "added": True, "storageQuotaReached": quota_reached} + async def _add_pages_and_update_collections( + crawl_id: str, collections: Optional[List[str]] = None + ): + await self.page_ops.add_crawl_pages_to_db_from_wacz(crawl_id) + if collections: + await self.colls.update_crawl_collections(crawl_id) + async def delete_uploads( self, delete_list: DeleteCrawlList, diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py index ab3e8165..719a247e 100644 --- a/backend/test/test_uploads.py +++ b/backend/test/test_uploads.py @@ -285,6 +285,26 @@ def test_get_upload_pages(admin_auth_headers, default_org_id, upload_id): assert data["uniquePageCount"] > 0 +def test_uploads_collection_updated( + admin_auth_headers, default_org_id, uploads_collection_id, upload_id +): + # Verify that collection is updated when WACZ is added on upload + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/collections/{uploads_collection_id}", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + data = r.json() + + assert data["crawlCount"] > 0 + assert data["pageCount"] > 0 + assert data["uniquePageCount"] > 0 + assert data["totalSize"] > 0 + assert data["dateEarliest"] + assert data["dateLatest"] + assert data["modified"] > data["created"] + + def test_replace_upload( admin_auth_headers, default_org_id, uploads_collection_id, upload_id ):