From 2e3b3cb228ec179118bf1fe901f2b08607fbd3da Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Wed, 1 Feb 2023 22:24:36 -0500 Subject: [PATCH] Add API endpoint to update crawl tags (#545) * Add API endpoint to update crawls (tags only for now) * Allow setting tags to empty list in crawlconfig updates --- backend/btrixcloud/crawlconfigs.py | 4 +-- backend/btrixcloud/crawls.py | 32 +++++++++++++++++++++ backend/test/test_crawlconfigs.py | 18 ++++++++++++ backend/test/test_run_crawl.py | 46 ++++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+), 3 deletions(-) diff --git a/backend/btrixcloud/crawlconfigs.py b/backend/btrixcloud/crawlconfigs.py index f5804e73..3ecbb86c 100644 --- a/backend/btrixcloud/crawlconfigs.py +++ b/backend/btrixcloud/crawlconfigs.py @@ -317,9 +317,7 @@ class CrawlConfigOps: """Update name, scale, schedule, and/or tags for an existing crawl config""" # set update query - query = update.dict( - exclude_unset=True, exclude_defaults=True, exclude_none=True - ) + query = update.dict(exclude_unset=True, exclude_none=True) if len(query) == 0: raise HTTPException(status_code=400, detail="no_update_data") diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py index 160e0ed6..7533d025 100644 --- a/backend/btrixcloud/crawls.py +++ b/backend/btrixcloud/crawls.py @@ -147,6 +147,13 @@ class CrawlCompleteIn(BaseModel): completed: Optional[bool] = True +# ============================================================================ +class UpdateCrawl(BaseModel): + """Update crawl tags""" + + tags: Optional[List[str]] = [] + + # ============================================================================ class CrawlOps: """Crawl Ops""" @@ -368,6 +375,25 @@ class CrawlOps: # print(f"Crawl Already Added: {crawl.id} - {crawl.state}") return False + async def update_crawl(self, crawl_id: str, org: Organization, update: UpdateCrawl): + """Update existing crawl (tags only for now)""" + query = update.dict(exclude_unset=True, exclude_none=True) + + if len(query) == 0: + raise HTTPException(status_code=400, detail="no_update_data") + + # update in db + result = await self.crawls.find_one_and_update( + {"_id": crawl_id, "oid": org.id}, + {"$set": query}, + return_document=pymongo.ReturnDocument.AFTER, + ) + + if not result: + raise HTTPException(status_code=404, detail=f"Crawl '{crawl_id}' not found") + + return {"success": True} + async def update_crawl_state(self, crawl_id: str, state: str): """called only when job container is being stopped/canceled""" @@ -680,6 +706,12 @@ def init_crawls_api(app, mdb, users, crawl_manager, crawl_config_ops, orgs, user return crawls[0] + @app.patch("/orgs/{oid}/crawls/{crawl_id}", tags=["crawls"]) + async def update_crawl( + update: UpdateCrawl, crawl_id: str, org: Organization = Depends(org_crawl_dep) + ): + return await ops.update_crawl(crawl_id, org, update) + @app.post( "/orgs/{oid}/crawls/{crawl_id}/scale", tags=["crawls"], diff --git a/backend/test/test_crawlconfigs.py b/backend/test/test_crawlconfigs.py index 1fc23173..230add29 100644 --- a/backend/test/test_crawlconfigs.py +++ b/backend/test/test_crawlconfigs.py @@ -40,3 +40,21 @@ def test_add_update_crawl_config( data = r.json() assert data["name"] == UPDATED_NAME assert sorted(data["tags"]) == sorted(UPDATED_TAGS) + + # Verify that deleting tags works as well + r = requests.patch( + f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{cid}/", + headers=crawler_auth_headers, + json={"tags": []}, + ) + assert r.status_code == 200 + + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{cid}/", + headers=crawler_auth_headers, + ) + assert r.status_code == 200 + + data = r.json() + assert data["name"] == UPDATED_NAME + assert data["tags"] == [] diff --git a/backend/test/test_run_crawl.py b/backend/test/test_run_crawl.py index 8f8f751a..392895e2 100644 --- a/backend/test/test_run_crawl.py +++ b/backend/test/test_run_crawl.py @@ -96,3 +96,49 @@ def test_verify_wacz(): pages = z.open("pages/pages.jsonl").read().decode("utf-8") assert '"https://webrecorder.net/"' in pages + + +def test_update_tags(admin_auth_headers, default_org_id, admin_crawl_id): + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + data = r.json() + assert sorted(data["tags"]) == ["wr-test-1", "wr-test-2"] + + # Submit patch request to update tags + UPDATED_TAGS = ["wr-test-1-updated", "wr-test-2-updated"] + r = requests.patch( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + headers=admin_auth_headers, + json={"tags": UPDATED_TAGS}, + ) + assert r.status_code == 200 + data = r.json() + assert data["success"] + + # Verify update was successful + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + data = r.json() + assert sorted(data["tags"]) == sorted(UPDATED_TAGS) + + # Verify deleting all tags works as well + r = requests.patch( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + headers=admin_auth_headers, + json={"tags": []}, + ) + assert r.status_code == 200 + + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + data = r.json() + assert data["tags"] == []