crawls list: unset errors in crawls list response to avoid very large… (#904)
* crawls list: unset errors in crawls list response to avoid very large responses #872 * Remove errors from crawl replay.json * Add tests to ensure errors are excluded from crawl GET endpoints * Update tests to accept None for errors --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
parent
0284903b34
commit
3f42515914
@ -280,7 +280,7 @@ class CrawlOps:
|
||||
{"$set": {"fileCount": {"$size": "$files"}}},
|
||||
{"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}},
|
||||
{"$set": {"firstSeed": "$firstSeedObject.url"}},
|
||||
{"$unset": ["firstSeedObject"]},
|
||||
{"$unset": ["firstSeedObject", "errors"]},
|
||||
{
|
||||
"$lookup": {
|
||||
"from": "crawl_configs",
|
||||
@ -394,6 +394,8 @@ class CrawlOps:
|
||||
|
||||
res["resources"] = await self._resolve_signed_urls(files, org, crawlid)
|
||||
|
||||
del res["errors"]
|
||||
|
||||
crawl = CrawlOut.from_dict(res)
|
||||
|
||||
return await self._resolve_crawl_refs(crawl, org)
|
||||
|
@ -115,6 +115,36 @@ def test_crawls_include_seed_info(admin_auth_headers, default_org_id, admin_craw
|
||||
assert crawl["seedCount"] > 0
|
||||
|
||||
|
||||
def test_crawls_exclude_errors(admin_auth_headers, default_org_id, admin_crawl_id):
|
||||
# Get endpoint
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert "errors" not in data or data.get("errors") is None
|
||||
|
||||
# replay.json endpoint
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/replay.json",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert "errors" not in data or data.get("errors") is None
|
||||
|
||||
# List endpoint
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
crawls = r.json()["items"]
|
||||
for crawl in crawls:
|
||||
assert "errors" not in crawl or crawl.get("errors") is None
|
||||
|
||||
|
||||
def test_download_wacz():
|
||||
r = requests.get(HOST_PREFIX + wacz_path)
|
||||
assert r.status_code == 200
|
||||
|
Loading…
Reference in New Issue
Block a user