Fix post-crawl collection stats update and add test (#918)

This fixes #917, where crawls added to a collection via the workflow
autoAddCollections were not successfully represented in the crawl
and page count stats in the collection after completing.
This commit is contained in:
Tessa Walsh 2023-06-10 22:06:25 -04:00 committed by GitHub
parent 8477919989
commit 325355d991
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 18 additions and 2 deletions

View File

@ -334,8 +334,8 @@ async def update_collection_counts_and_tags(
async def update_crawl_collections(collections, crawls, crawl_id: str):
"""Update counts and tags for all collections in crawl"""
crawl = await crawls.find_one({"_id": crawl_id})
collections = crawl.get("collections")
for collection_id in collections:
crawl_collections = crawl.get("collections")
for collection_id in crawl_collections:
await update_collection_counts_and_tags(collections, crawls, collection_id)

View File

@ -26,7 +26,15 @@ def test_workflow_crawl_auto_added_subsequent_runs(
auto_add_crawl_id,
auto_add_config_id,
):
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/collections/{auto_add_collection_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
crawl_count = r.json()["crawlCount"]
# Run workflow again and make sure new crawl is also in collection
# and crawl count has been incremented.
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{auto_add_config_id}/run",
headers=crawler_auth_headers,
@ -52,3 +60,11 @@ def test_workflow_crawl_auto_added_subsequent_runs(
)
assert r.status_code == 200
assert auto_add_collection_id in r.json()["collections"]
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/collections/{auto_add_collection_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
new_crawl_count = r.json()["crawlCount"]
assert new_crawl_count == crawl_count + 1