Fix post-crawl collection stats update and add test (#918)
This fixes #917, where crawls added to a collection via the workflow autoAddCollections were not successfully represented in the crawl and page count stats in the collection after completing.
This commit is contained in:
		
							parent
							
								
									8477919989
								
							
						
					
					
						commit
						325355d991
					
				| @ -334,8 +334,8 @@ async def update_collection_counts_and_tags( | |||||||
| async def update_crawl_collections(collections, crawls, crawl_id: str): | async def update_crawl_collections(collections, crawls, crawl_id: str): | ||||||
|     """Update counts and tags for all collections in crawl""" |     """Update counts and tags for all collections in crawl""" | ||||||
|     crawl = await crawls.find_one({"_id": crawl_id}) |     crawl = await crawls.find_one({"_id": crawl_id}) | ||||||
|     collections = crawl.get("collections") |     crawl_collections = crawl.get("collections") | ||||||
|     for collection_id in collections: |     for collection_id in crawl_collections: | ||||||
|         await update_collection_counts_and_tags(collections, crawls, collection_id) |         await update_collection_counts_and_tags(collections, crawls, collection_id) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -26,7 +26,15 @@ def test_workflow_crawl_auto_added_subsequent_runs( | |||||||
|     auto_add_crawl_id, |     auto_add_crawl_id, | ||||||
|     auto_add_config_id, |     auto_add_config_id, | ||||||
| ): | ): | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/collections/{auto_add_collection_id}", | ||||||
|  |         headers=crawler_auth_headers, | ||||||
|  |     ) | ||||||
|  |     assert r.status_code == 200 | ||||||
|  |     crawl_count = r.json()["crawlCount"] | ||||||
|  | 
 | ||||||
|     # Run workflow again and make sure new crawl is also in collection |     # Run workflow again and make sure new crawl is also in collection | ||||||
|  |     # and crawl count has been incremented. | ||||||
|     r = requests.post( |     r = requests.post( | ||||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{auto_add_config_id}/run", |         f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{auto_add_config_id}/run", | ||||||
|         headers=crawler_auth_headers, |         headers=crawler_auth_headers, | ||||||
| @ -52,3 +60,11 @@ def test_workflow_crawl_auto_added_subsequent_runs( | |||||||
|     ) |     ) | ||||||
|     assert r.status_code == 200 |     assert r.status_code == 200 | ||||||
|     assert auto_add_collection_id in r.json()["collections"] |     assert auto_add_collection_id in r.json()["collections"] | ||||||
|  | 
 | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/collections/{auto_add_collection_id}", | ||||||
|  |         headers=crawler_auth_headers, | ||||||
|  |     ) | ||||||
|  |     assert r.status_code == 200 | ||||||
|  |     new_crawl_count = r.json()["crawlCount"] | ||||||
|  |     assert new_crawl_count == crawl_count + 1 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user