Add reviewStatus, qaState, and qaRunCount sort options to crawls/all-crawls list endpoints (#1686)
Backend work for #1672 Adds new sort options to /crawls and /all-crawls GET list endpoints: - `reviewStatus` - `qaRunCount`: number of completed QA runs for crawl (also added to CrawlOut) - `qaState` (sorts by `activeQAState` first, then `lastQAState`, both of which are added to CrawlOut)
This commit is contained in:
		
							parent
							
								
									87e0873f1a
								
							
						
					
					
						commit
						c800da1732
					
				| @ -551,6 +551,50 @@ class BaseCrawlOps: | |||||||
|             {"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}}, |             {"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}}, | ||||||
|             {"$set": {"firstSeed": "$firstSeedObject.url"}}, |             {"$set": {"firstSeed": "$firstSeedObject.url"}}, | ||||||
|             {"$unset": ["firstSeedObject", "errors", "config"]}, |             {"$unset": ["firstSeedObject", "errors", "config"]}, | ||||||
|  |             {"$set": {"qaState": "$qa.state"}}, | ||||||
|  |             {"$set": {"activeQAState": "$qaState"}}, | ||||||
|  |             { | ||||||
|  |                 "$set": { | ||||||
|  |                     "qaFinishedArray": { | ||||||
|  |                         "$map": { | ||||||
|  |                             "input": {"$objectToArray": "$qaFinished"}, | ||||||
|  |                             "in": "$$this.v", | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 "$set": { | ||||||
|  |                     "sortedQARuns": { | ||||||
|  |                         "$sortArray": { | ||||||
|  |                             "input": "$qaFinishedArray", | ||||||
|  |                             "sortBy": {"started": -1}, | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|  |             {"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}}, | ||||||
|  |             {"$set": {"lastQAState": "$lastQARun.state"}}, | ||||||
|  |             { | ||||||
|  |                 "$set": { | ||||||
|  |                     "qaRunCount": { | ||||||
|  |                         "$size": { | ||||||
|  |                             "$cond": [ | ||||||
|  |                                 {"$isArray": "$qaFinishedArray"}, | ||||||
|  |                                 "$qaFinishedArray", | ||||||
|  |                                 [], | ||||||
|  |                             ] | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 "$unset": [ | ||||||
|  |                     "lastQARun", | ||||||
|  |                     "qaFinishedArray", | ||||||
|  |                     "sortedQARuns", | ||||||
|  |                 ] | ||||||
|  |             }, | ||||||
|         ] |         ] | ||||||
| 
 | 
 | ||||||
|         if not resources: |         if not resources: | ||||||
| @ -569,12 +613,25 @@ class BaseCrawlOps: | |||||||
|             aggregate.extend([{"$match": {"collectionIds": {"$in": [collection_id]}}}]) |             aggregate.extend([{"$match": {"collectionIds": {"$in": [collection_id]}}}]) | ||||||
| 
 | 
 | ||||||
|         if sort_by: |         if sort_by: | ||||||
|             if sort_by not in ("started", "finished", "fileSize"): |             if sort_by not in ( | ||||||
|  |                 "started", | ||||||
|  |                 "finished", | ||||||
|  |                 "fileSize", | ||||||
|  |                 "reviewStatus", | ||||||
|  |                 "qaRunCount", | ||||||
|  |                 "qaState", | ||||||
|  |             ): | ||||||
|                 raise HTTPException(status_code=400, detail="invalid_sort_by") |                 raise HTTPException(status_code=400, detail="invalid_sort_by") | ||||||
|             if sort_direction not in (1, -1): |             if sort_direction not in (1, -1): | ||||||
|                 raise HTTPException(status_code=400, detail="invalid_sort_direction") |                 raise HTTPException(status_code=400, detail="invalid_sort_direction") | ||||||
| 
 | 
 | ||||||
|             aggregate.extend([{"$sort": {sort_by: sort_direction}}]) |             sort_query = {sort_by: sort_direction} | ||||||
|  | 
 | ||||||
|  |             # Add secondary sort for qaState - sorted by current, then last | ||||||
|  |             if sort_by == "qaState": | ||||||
|  |                 sort_query["lastQAState"] = sort_direction | ||||||
|  | 
 | ||||||
|  |             aggregate.extend([{"$sort": sort_query}]) | ||||||
| 
 | 
 | ||||||
|         aggregate.extend( |         aggregate.extend( | ||||||
|             [ |             [ | ||||||
|  | |||||||
| @ -165,6 +165,50 @@ class CrawlOps(BaseCrawlOps): | |||||||
|             {"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}}, |             {"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}}, | ||||||
|             {"$set": {"firstSeed": "$firstSeedObject.url"}}, |             {"$set": {"firstSeed": "$firstSeedObject.url"}}, | ||||||
|             {"$unset": ["firstSeedObject", "errors", "config"]}, |             {"$unset": ["firstSeedObject", "errors", "config"]}, | ||||||
|  |             {"$set": {"qaState": "$qa.state"}}, | ||||||
|  |             {"$set": {"activeQAState": "$qaState"}}, | ||||||
|  |             { | ||||||
|  |                 "$set": { | ||||||
|  |                     "qaFinishedArray": { | ||||||
|  |                         "$map": { | ||||||
|  |                             "input": {"$objectToArray": "$qaFinished"}, | ||||||
|  |                             "in": "$$this.v", | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 "$set": { | ||||||
|  |                     "sortedQARuns": { | ||||||
|  |                         "$sortArray": { | ||||||
|  |                             "input": "$qaFinishedArray", | ||||||
|  |                             "sortBy": {"started": -1}, | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|  |             {"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}}, | ||||||
|  |             {"$set": {"lastQAState": "$lastQARun.state"}}, | ||||||
|  |             { | ||||||
|  |                 "$set": { | ||||||
|  |                     "qaRunCount": { | ||||||
|  |                         "$size": { | ||||||
|  |                             "$cond": [ | ||||||
|  |                                 {"$isArray": "$qaFinishedArray"}, | ||||||
|  |                                 "$qaFinishedArray", | ||||||
|  |                                 [], | ||||||
|  |                             ] | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 "$unset": [ | ||||||
|  |                     "lastQARun", | ||||||
|  |                     "qaFinishedArray", | ||||||
|  |                     "sortedQARuns", | ||||||
|  |                 ] | ||||||
|  |             }, | ||||||
|         ] |         ] | ||||||
| 
 | 
 | ||||||
|         if not resources: |         if not resources: | ||||||
| @ -188,12 +232,21 @@ class CrawlOps(BaseCrawlOps): | |||||||
|                 "finished", |                 "finished", | ||||||
|                 "fileSize", |                 "fileSize", | ||||||
|                 "firstSeed", |                 "firstSeed", | ||||||
|  |                 "reviewStatus", | ||||||
|  |                 "qaRunCount", | ||||||
|  |                 "qaState", | ||||||
|             ): |             ): | ||||||
|                 raise HTTPException(status_code=400, detail="invalid_sort_by") |                 raise HTTPException(status_code=400, detail="invalid_sort_by") | ||||||
|             if sort_direction not in (1, -1): |             if sort_direction not in (1, -1): | ||||||
|                 raise HTTPException(status_code=400, detail="invalid_sort_direction") |                 raise HTTPException(status_code=400, detail="invalid_sort_direction") | ||||||
| 
 | 
 | ||||||
|             aggregate.extend([{"$sort": {sort_by: sort_direction}}]) |             sort_query = {sort_by: sort_direction} | ||||||
|  | 
 | ||||||
|  |             # Add secondary sort for qaState - sorted by current, then last | ||||||
|  |             if sort_by == "qaState": | ||||||
|  |                 sort_query["lastQAState"] = sort_direction | ||||||
|  | 
 | ||||||
|  |             aggregate.extend([{"$sort": sort_query}]) | ||||||
| 
 | 
 | ||||||
|         aggregate.extend( |         aggregate.extend( | ||||||
|             [ |             [ | ||||||
|  | |||||||
| @ -658,6 +658,10 @@ class CrawlOut(BaseMongoModel): | |||||||
| 
 | 
 | ||||||
|     reviewStatus: Optional[conint(ge=1, le=5)] = None  # type: ignore |     reviewStatus: Optional[conint(ge=1, le=5)] = None  # type: ignore | ||||||
| 
 | 
 | ||||||
|  |     qaRunCount: int = 0 | ||||||
|  |     activeQAState: Optional[str] | ||||||
|  |     lastQAState: Optional[str] | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| # ============================================================================ | # ============================================================================ | ||||||
| class CrawlOutWithResources(CrawlOut): | class CrawlOutWithResources(CrawlOut): | ||||||
|  | |||||||
| @ -116,6 +116,28 @@ def failed_qa_run_id(crawler_crawl_id, crawler_auth_headers, default_org_id): | |||||||
|     assert qa["started"] |     assert qa["started"] | ||||||
|     assert not qa["finished"] |     assert not qa["finished"] | ||||||
| 
 | 
 | ||||||
|  |     # Ensure sorting by qaState works as expected - current floated to top | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaState", | ||||||
|  |         headers=crawler_auth_headers, | ||||||
|  |     ) | ||||||
|  |     assert r.status_code == 200 | ||||||
|  |     crawls = r.json()["items"] | ||||||
|  |     assert crawls[0]["id"] == crawler_crawl_id | ||||||
|  |     assert crawls[0]["activeQAState"] | ||||||
|  |     assert crawls[0]["lastQAState"] | ||||||
|  | 
 | ||||||
|  |     # Ensure sorting by qaState works as expected with all-crawls | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaState", | ||||||
|  |         headers=crawler_auth_headers, | ||||||
|  |     ) | ||||||
|  |     assert r.status_code == 200 | ||||||
|  |     crawls = r.json()["items"] | ||||||
|  |     assert crawls[0]["id"] == crawler_crawl_id | ||||||
|  |     assert crawls[0]["activeQAState"] | ||||||
|  |     assert crawls[0]["lastQAState"] | ||||||
|  | 
 | ||||||
|     # Cancel crawl |     # Cancel crawl | ||||||
|     r = requests.post( |     r = requests.post( | ||||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/cancel", |         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/cancel", | ||||||
| @ -340,6 +362,96 @@ def test_failed_qa_run( | |||||||
|     assert qa["crawlExecSeconds"] > 0 |     assert qa["crawlExecSeconds"] > 0 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def test_sort_crawls_by_qa_runs( | ||||||
|  |     crawler_crawl_id, | ||||||
|  |     crawler_auth_headers, | ||||||
|  |     default_org_id, | ||||||
|  |     failed_qa_run_id, | ||||||
|  |     qa_run_pages_ready, | ||||||
|  | ): | ||||||
|  |     # Test that sorting by qaRunCount works as expected | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaRunCount", | ||||||
|  |         headers=crawler_auth_headers, | ||||||
|  |     ) | ||||||
|  |     assert r.status_code == 200 | ||||||
|  |     crawls = r.json()["items"] | ||||||
|  | 
 | ||||||
|  |     assert crawls[0]["id"] == crawler_crawl_id | ||||||
|  |     qa_run_count = crawls[0]["qaRunCount"] | ||||||
|  |     assert qa_run_count > 0 | ||||||
|  | 
 | ||||||
|  |     last_count = qa_run_count | ||||||
|  |     for crawl in crawls: | ||||||
|  |         if crawl["id"] == crawler_crawl_id: | ||||||
|  |             continue | ||||||
|  |         crawl_qa_count = crawl["qaRunCount"] | ||||||
|  |         assert isinstance(crawl_qa_count, int) | ||||||
|  |         assert crawl_qa_count <= last_count | ||||||
|  |         last_count = crawl_qa_count | ||||||
|  | 
 | ||||||
|  |     # Test ascending sort | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaRunCount&sortDirection=1", | ||||||
|  |         headers=crawler_auth_headers, | ||||||
|  |     ) | ||||||
|  |     assert r.status_code == 200 | ||||||
|  |     crawls = r.json()["items"] | ||||||
|  | 
 | ||||||
|  |     assert crawls[-1]["id"] == crawler_crawl_id | ||||||
|  |     assert crawls[-1]["qaRunCount"] > 0 | ||||||
|  | 
 | ||||||
|  |     last_count = 0 | ||||||
|  |     for crawl in crawls: | ||||||
|  |         if crawl["id"] == crawler_crawl_id: | ||||||
|  |             continue | ||||||
|  |         crawl_qa_count = crawl["qaRunCount"] | ||||||
|  |         assert isinstance(crawl_qa_count, int) | ||||||
|  |         assert crawl_qa_count >= last_count | ||||||
|  |         last_count = crawl_qa_count | ||||||
|  | 
 | ||||||
|  |     # Test same with all-crawls | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaRunCount", | ||||||
|  |         headers=crawler_auth_headers, | ||||||
|  |     ) | ||||||
|  |     assert r.status_code == 200 | ||||||
|  |     crawls = r.json()["items"] | ||||||
|  | 
 | ||||||
|  |     assert crawls[0]["id"] == crawler_crawl_id | ||||||
|  |     qa_run_count = crawls[0]["qaRunCount"] | ||||||
|  |     assert qa_run_count > 0 | ||||||
|  | 
 | ||||||
|  |     last_count = qa_run_count | ||||||
|  |     for crawl in crawls: | ||||||
|  |         if crawl["id"] == crawler_crawl_id: | ||||||
|  |             continue | ||||||
|  |         crawl_qa_count = crawl["qaRunCount"] | ||||||
|  |         assert isinstance(crawl_qa_count, int) | ||||||
|  |         assert crawl_qa_count <= last_count | ||||||
|  |         last_count = crawl_qa_count | ||||||
|  | 
 | ||||||
|  |     # Test ascending sort | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaRunCount&sortDirection=1", | ||||||
|  |         headers=crawler_auth_headers, | ||||||
|  |     ) | ||||||
|  |     assert r.status_code == 200 | ||||||
|  |     crawls = r.json()["items"] | ||||||
|  | 
 | ||||||
|  |     assert crawls[-1]["id"] == crawler_crawl_id | ||||||
|  |     assert crawls[-1]["qaRunCount"] > 0 | ||||||
|  | 
 | ||||||
|  |     last_count = 0 | ||||||
|  |     for crawl in crawls: | ||||||
|  |         if crawl["id"] == crawler_crawl_id: | ||||||
|  |             continue | ||||||
|  |         crawl_qa_count = crawl["qaRunCount"] | ||||||
|  |         assert isinstance(crawl_qa_count, int) | ||||||
|  |         assert crawl_qa_count >= last_count | ||||||
|  |         last_count = crawl_qa_count | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def test_delete_qa_runs( | def test_delete_qa_runs( | ||||||
|     crawler_crawl_id, |     crawler_crawl_id, | ||||||
|     crawler_auth_headers, |     crawler_auth_headers, | ||||||
|  | |||||||
| @ -303,6 +303,44 @@ def test_update_crawl( | |||||||
|     assert r.status_code == 200 |     assert r.status_code == 200 | ||||||
|     assert r.json()["reviewStatus"] == 5 |     assert r.json()["reviewStatus"] == 5 | ||||||
| 
 | 
 | ||||||
|  |     # Test sorting on reviewStatus | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=reviewStatus", | ||||||
|  |         headers=admin_auth_headers, | ||||||
|  |     ) | ||||||
|  |     assert r.status_code == 200 | ||||||
|  |     crawls = r.json()["items"] | ||||||
|  |     assert crawls[0]["id"] == admin_crawl_id | ||||||
|  |     assert crawls[0]["reviewStatus"] == 5 | ||||||
|  | 
 | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=reviewStatus&sortDirection=1", | ||||||
|  |         headers=admin_auth_headers, | ||||||
|  |     ) | ||||||
|  |     assert r.status_code == 200 | ||||||
|  |     crawls = r.json()["items"] | ||||||
|  |     assert crawls[-1]["id"] == admin_crawl_id | ||||||
|  |     assert crawls[-1]["reviewStatus"] == 5 | ||||||
|  | 
 | ||||||
|  |     # Test sorting on reviewStatus for all-crawls | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=reviewStatus", | ||||||
|  |         headers=admin_auth_headers, | ||||||
|  |     ) | ||||||
|  |     assert r.status_code == 200 | ||||||
|  |     crawls = r.json()["items"] | ||||||
|  |     assert crawls[0]["id"] == admin_crawl_id | ||||||
|  |     assert crawls[0]["reviewStatus"] == 5 | ||||||
|  | 
 | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=reviewStatus&sortDirection=1", | ||||||
|  |         headers=admin_auth_headers, | ||||||
|  |     ) | ||||||
|  |     assert r.status_code == 200 | ||||||
|  |     crawls = r.json()["items"] | ||||||
|  |     assert crawls[-1]["id"] == admin_crawl_id | ||||||
|  |     assert crawls[-1]["reviewStatus"] == 5 | ||||||
|  | 
 | ||||||
|     # Try to update to invalid reviewStatus |     # Try to update to invalid reviewStatus | ||||||
|     r = requests.patch( |     r = requests.patch( | ||||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", |         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user