Fix replay.json 400 response for empty collection (#2445)

- fix #2443 
- don't throw error in list_pages() if no crawls provided, just return
empty list
- ensure an empty collection returns 200 on replay.json, add tests
This commit is contained in:
Ilya Kreymer 2025-03-03 09:38:19 -08:00 committed by GitHub
parent 2e86ee3fcc
commit 2263745df3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 35 additions and 5 deletions

View File

@ -538,11 +538,9 @@ class PageOps:
crawl_ids = await self.coll_ops.get_collection_crawl_ids(
coll_id, public_or_unlisted_only
)
elif not crawl_ids:
# neither coll_id nor crawl_id, error
raise HTTPException(
status_code=400, detail="either crawl_ids or coll_id must be provided"
)
if not crawl_ids:
return [], 0
query: dict[str, object] = {
"crawl_id": {"$in": crawl_ids},

View File

@ -155,6 +155,38 @@ def test_create_collection_empty_name(
assert r.status_code == 422
def test_create_empty_collection(
crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
):
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections",
headers=crawler_auth_headers,
json={
"name": "Empty Collection",
},
)
assert r.status_code == 200
coll_id = r.json()["id"]
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/collections/{coll_id}/replay.json",
headers=crawler_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["crawlCount"] == 0
assert data["pageCount"] == 0
assert len(data["resources"]) == 0
# Delete the empty collection
r = requests.delete(
f"{API_PREFIX}/orgs/{default_org_id}/collections/{coll_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["success"]
def test_update_collection(
crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
):