crawls list: unset errors in crawls list response to avoid very large… (#904)
* crawls list: unset errors in crawls list response to avoid very large responses #872 * Remove errors from crawl replay.json * Add tests to ensure errors are excluded from crawl GET endpoints * Update tests to accept None for errors --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
parent
0284903b34
commit
3f42515914
@ -280,7 +280,7 @@ class CrawlOps:
|
|||||||
{"$set": {"fileCount": {"$size": "$files"}}},
|
{"$set": {"fileCount": {"$size": "$files"}}},
|
||||||
{"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}},
|
{"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}},
|
||||||
{"$set": {"firstSeed": "$firstSeedObject.url"}},
|
{"$set": {"firstSeed": "$firstSeedObject.url"}},
|
||||||
{"$unset": ["firstSeedObject"]},
|
{"$unset": ["firstSeedObject", "errors"]},
|
||||||
{
|
{
|
||||||
"$lookup": {
|
"$lookup": {
|
||||||
"from": "crawl_configs",
|
"from": "crawl_configs",
|
||||||
@ -394,6 +394,8 @@ class CrawlOps:
|
|||||||
|
|
||||||
res["resources"] = await self._resolve_signed_urls(files, org, crawlid)
|
res["resources"] = await self._resolve_signed_urls(files, org, crawlid)
|
||||||
|
|
||||||
|
del res["errors"]
|
||||||
|
|
||||||
crawl = CrawlOut.from_dict(res)
|
crawl = CrawlOut.from_dict(res)
|
||||||
|
|
||||||
return await self._resolve_crawl_refs(crawl, org)
|
return await self._resolve_crawl_refs(crawl, org)
|
||||||
|
@ -115,6 +115,36 @@ def test_crawls_include_seed_info(admin_auth_headers, default_org_id, admin_craw
|
|||||||
assert crawl["seedCount"] > 0
|
assert crawl["seedCount"] > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_crawls_exclude_errors(admin_auth_headers, default_org_id, admin_crawl_id):
|
||||||
|
# Get endpoint
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}",
|
||||||
|
headers=admin_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
assert "errors" not in data or data.get("errors") is None
|
||||||
|
|
||||||
|
# replay.json endpoint
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/replay.json",
|
||||||
|
headers=admin_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
assert "errors" not in data or data.get("errors") is None
|
||||||
|
|
||||||
|
# List endpoint
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls",
|
||||||
|
headers=admin_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
crawls = r.json()["items"]
|
||||||
|
for crawl in crawls:
|
||||||
|
assert "errors" not in crawl or crawl.get("errors") is None
|
||||||
|
|
||||||
|
|
||||||
def test_download_wacz():
|
def test_download_wacz():
|
||||||
r = requests.get(HOST_PREFIX + wacz_path)
|
r = requests.get(HOST_PREFIX + wacz_path)
|
||||||
assert r.status_code == 200
|
assert r.status_code == 200
|
||||||
|
Loading…
Reference in New Issue
Block a user