Add support for collectionIds to archived item PATCH endpoints (#1121)

* Add support for collectionIds to patch endpoints

* Make update available via all-crawls/ and add test

* Fix tests

* Always remove collectionIds from udpate

* Remove unnecessary fallback

* One more pass on expected values before update
This commit is contained in:
Tessa Walsh 2023-08-30 10:41:30 -04:00 committed by GitHub
parent ceaaf630f2
commit f6369ee01e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 126 additions and 5 deletions

View File

@ -137,14 +137,40 @@ class BaseCrawlOps:
)
return res
async def _update_crawl_collections(
self, crawl_id: str, org: Organization, collection_ids: List[UUID4]
):
"""Update crawl collections to match updated list."""
crawl = await self.get_crawl(crawl_id, org, cls_type=CrawlOut)
prior_coll_ids = set(crawl.collectionIds)
updated_coll_ids = set(collection_ids)
# Add new collections
added = list(updated_coll_ids.difference(prior_coll_ids))
for coll_id in added:
await self.colls.add_crawls_to_collection(coll_id, [crawl_id], org)
# Remove collections crawl no longer belongs to
removed = list(prior_coll_ids.difference(updated_coll_ids))
for coll_id in removed:
await self.colls.remove_crawls_from_collection(coll_id, [crawl_id], org)
async def update_crawl(
self, crawl_id: str, org: Organization, update: UpdateCrawl, type_=None
):
"""Update existing crawl (tags and notes only for now)"""
"""Update existing crawl"""
update_values = update.dict(exclude_unset=True)
if len(update_values) == 0:
raise HTTPException(status_code=400, detail="no_update_data")
# Update collections then unset from update_values
# We handle these separately due to updates required for collection changes
collection_ids = update_values.get("collectionIds")
if collection_ids:
await self._update_crawl_collections(crawl_id, org, collection_ids)
update_values.pop("collectionIds", None)
query = {"_id": crawl_id, "oid": org.id}
if type_:
query["type"] = type_
@ -603,6 +629,12 @@ def init_base_crawls_api(
async def get_crawl(crawl_id, org: Organization = Depends(org_viewer_dep)):
return await ops.get_crawl(crawl_id, org)
@app.patch("/orgs/{oid}/all-crawls/{crawl_id}", tags=["all-crawls"])
async def update_crawl(
update: UpdateCrawl, crawl_id: str, org: Organization = Depends(org_crawl_dep)
):
return await ops.update_crawl(crawl_id, org, update)
@app.post("/orgs/{oid}/all-crawls/delete", tags=["all-crawls"])
async def delete_crawls_all_types(
delete_list: DeleteCrawlList,

View File

@ -388,6 +388,7 @@ class UpdateCrawl(BaseModel):
description: Optional[str]
tags: Optional[List[str]]
description: Optional[str]
collectionIds: Optional[List[UUID4]]
# ============================================================================

View File

@ -179,7 +179,11 @@ def test_verify_wacz():
assert len(pages.strip().split("\n")) == 4
def test_update_crawl(admin_auth_headers, default_org_id, admin_crawl_id):
def test_update_crawl(
admin_auth_headers,
default_org_id,
admin_crawl_id,
):
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}",
headers=admin_auth_headers,
@ -187,15 +191,30 @@ def test_update_crawl(admin_auth_headers, default_org_id, admin_crawl_id):
assert r.status_code == 200
data = r.json()
assert sorted(data["tags"]) == ["wr-test-1", "wr-test-2"]
assert len(data["collectionIds"]) == 1
# Submit patch request to update tags and description
# Make new collection
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections",
headers=admin_auth_headers,
json={"name": "Crawl Update Test Collection"},
)
new_coll_id = r.json()["id"]
# Submit patch request
UPDATED_TAGS = ["wr-test-1-updated", "wr-test-2-updated"]
UPDATED_DESC = "Lorem ipsum test note."
UPDATED_NAME = "Updated crawl name"
UPDATED_COLLECTION_IDS = [new_coll_id]
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}",
headers=admin_auth_headers,
json={"tags": UPDATED_TAGS, "description": UPDATED_DESC, "name": UPDATED_NAME},
json={
"tags": UPDATED_TAGS,
"description": UPDATED_DESC,
"name": UPDATED_NAME,
"collectionIds": UPDATED_COLLECTION_IDS,
},
)
assert r.status_code == 200
data = r.json()
@ -211,6 +230,7 @@ def test_update_crawl(admin_auth_headers, default_org_id, admin_crawl_id):
assert sorted(data["tags"]) == sorted(UPDATED_TAGS)
assert data["description"] == UPDATED_DESC
assert data["name"] == UPDATED_NAME
assert data["collectionIds"] == UPDATED_COLLECTION_IDS
# Verify deleting works as well
r = requests.patch(

View File

@ -258,15 +258,30 @@ def test_update_upload_metadata(admin_auth_headers, default_org_id):
assert data["name"] == "My Upload Updated"
assert not data["tags"]
assert not data["description"]
assert len(data["collectionIds"]) == 1
# Make new collection
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections",
headers=admin_auth_headers,
json={"name": "Patch Update Test Collection"},
)
new_coll_id = r.json()["id"]
# Submit patch request to update name, tags, and description
UPDATED_NAME = "New Upload Name"
UPDATED_TAGS = ["wr-test-1-updated", "wr-test-2-updated"]
UPDATED_DESC = "Lorem ipsum test note."
UPDATED_COLLECTION_IDS = [new_coll_id]
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}",
headers=admin_auth_headers,
json={"tags": UPDATED_TAGS, "description": UPDATED_DESC, "name": UPDATED_NAME},
json={
"tags": UPDATED_TAGS,
"description": UPDATED_DESC,
"name": UPDATED_NAME,
"collectionIds": UPDATED_COLLECTION_IDS,
},
)
assert r.status_code == 200
data = r.json()
@ -282,6 +297,7 @@ def test_update_upload_metadata(admin_auth_headers, default_org_id):
assert sorted(data["tags"]) == sorted(UPDATED_TAGS)
assert data["description"] == UPDATED_DESC
assert data["name"] == UPDATED_NAME
assert data["collectionIds"] == UPDATED_COLLECTION_IDS
def test_delete_stream_upload(admin_auth_headers, default_org_id):
@ -743,6 +759,58 @@ def test_get_upload_replay_json_admin_from_all_crawls(
assert "files" not in data
def test_update_upload_metadata_all_crawls(admin_auth_headers, default_org_id):
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls/{upload_id}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["name"] == "My Upload Updated"
assert not data["tags"]
assert not data["description"]
assert len(data["collectionIds"]) == 1
# Make new collection
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections",
headers=admin_auth_headers,
json={"name": "Patch Update Test Collection 2"},
)
new_coll_id = r.json()["id"]
# Submit patch request to update name, tags, and description
UPDATED_NAME = "New Upload Name 2"
UPDATED_TAGS = ["wr-test-1-updated-again", "wr-test-2-updated-again"]
UPDATED_DESC = "Lorem ipsum test note 2."
UPDATED_COLLECTION_IDS = [new_coll_id]
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls/{upload_id}",
headers=admin_auth_headers,
json={
"tags": UPDATED_TAGS,
"description": UPDATED_DESC,
"name": UPDATED_NAME,
"collectionIds": UPDATED_COLLECTION_IDS,
},
)
assert r.status_code == 200
data = r.json()
assert data["updated"]
# Verify update was successful
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls/{upload_id}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert sorted(data["tags"]) == sorted(UPDATED_TAGS)
assert data["description"] == UPDATED_DESC
assert data["name"] == UPDATED_NAME
assert data["collectionIds"] == UPDATED_COLLECTION_IDS
def test_delete_form_upload_from_all_crawls(admin_auth_headers, default_org_id):
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls/delete",