Add reviewStatus, qaState, and qaRunCount sort options to crawls/all-crawls list endpoints (#1686)
Backend work for #1672 Adds new sort options to /crawls and /all-crawls GET list endpoints: - `reviewStatus` - `qaRunCount`: number of completed QA runs for crawl (also added to CrawlOut) - `qaState` (sorts by `activeQAState` first, then `lastQAState`, both of which are added to CrawlOut)
This commit is contained in:
		
							parent
							
								
									87e0873f1a
								
							
						
					
					
						commit
						c800da1732
					
				| @ -551,6 +551,50 @@ class BaseCrawlOps: | ||||
|             {"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}}, | ||||
|             {"$set": {"firstSeed": "$firstSeedObject.url"}}, | ||||
|             {"$unset": ["firstSeedObject", "errors", "config"]}, | ||||
|             {"$set": {"qaState": "$qa.state"}}, | ||||
|             {"$set": {"activeQAState": "$qaState"}}, | ||||
|             { | ||||
|                 "$set": { | ||||
|                     "qaFinishedArray": { | ||||
|                         "$map": { | ||||
|                             "input": {"$objectToArray": "$qaFinished"}, | ||||
|                             "in": "$$this.v", | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 "$set": { | ||||
|                     "sortedQARuns": { | ||||
|                         "$sortArray": { | ||||
|                             "input": "$qaFinishedArray", | ||||
|                             "sortBy": {"started": -1}, | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             }, | ||||
|             {"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}}, | ||||
|             {"$set": {"lastQAState": "$lastQARun.state"}}, | ||||
|             { | ||||
|                 "$set": { | ||||
|                     "qaRunCount": { | ||||
|                         "$size": { | ||||
|                             "$cond": [ | ||||
|                                 {"$isArray": "$qaFinishedArray"}, | ||||
|                                 "$qaFinishedArray", | ||||
|                                 [], | ||||
|                             ] | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 "$unset": [ | ||||
|                     "lastQARun", | ||||
|                     "qaFinishedArray", | ||||
|                     "sortedQARuns", | ||||
|                 ] | ||||
|             }, | ||||
|         ] | ||||
| 
 | ||||
|         if not resources: | ||||
| @ -569,12 +613,25 @@ class BaseCrawlOps: | ||||
|             aggregate.extend([{"$match": {"collectionIds": {"$in": [collection_id]}}}]) | ||||
| 
 | ||||
|         if sort_by: | ||||
|             if sort_by not in ("started", "finished", "fileSize"): | ||||
|             if sort_by not in ( | ||||
|                 "started", | ||||
|                 "finished", | ||||
|                 "fileSize", | ||||
|                 "reviewStatus", | ||||
|                 "qaRunCount", | ||||
|                 "qaState", | ||||
|             ): | ||||
|                 raise HTTPException(status_code=400, detail="invalid_sort_by") | ||||
|             if sort_direction not in (1, -1): | ||||
|                 raise HTTPException(status_code=400, detail="invalid_sort_direction") | ||||
| 
 | ||||
|             aggregate.extend([{"$sort": {sort_by: sort_direction}}]) | ||||
|             sort_query = {sort_by: sort_direction} | ||||
| 
 | ||||
|             # Add secondary sort for qaState - sorted by current, then last | ||||
|             if sort_by == "qaState": | ||||
|                 sort_query["lastQAState"] = sort_direction | ||||
| 
 | ||||
|             aggregate.extend([{"$sort": sort_query}]) | ||||
| 
 | ||||
|         aggregate.extend( | ||||
|             [ | ||||
|  | ||||
| @ -165,6 +165,50 @@ class CrawlOps(BaseCrawlOps): | ||||
|             {"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}}, | ||||
|             {"$set": {"firstSeed": "$firstSeedObject.url"}}, | ||||
|             {"$unset": ["firstSeedObject", "errors", "config"]}, | ||||
|             {"$set": {"qaState": "$qa.state"}}, | ||||
|             {"$set": {"activeQAState": "$qaState"}}, | ||||
|             { | ||||
|                 "$set": { | ||||
|                     "qaFinishedArray": { | ||||
|                         "$map": { | ||||
|                             "input": {"$objectToArray": "$qaFinished"}, | ||||
|                             "in": "$$this.v", | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 "$set": { | ||||
|                     "sortedQARuns": { | ||||
|                         "$sortArray": { | ||||
|                             "input": "$qaFinishedArray", | ||||
|                             "sortBy": {"started": -1}, | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             }, | ||||
|             {"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}}, | ||||
|             {"$set": {"lastQAState": "$lastQARun.state"}}, | ||||
|             { | ||||
|                 "$set": { | ||||
|                     "qaRunCount": { | ||||
|                         "$size": { | ||||
|                             "$cond": [ | ||||
|                                 {"$isArray": "$qaFinishedArray"}, | ||||
|                                 "$qaFinishedArray", | ||||
|                                 [], | ||||
|                             ] | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 "$unset": [ | ||||
|                     "lastQARun", | ||||
|                     "qaFinishedArray", | ||||
|                     "sortedQARuns", | ||||
|                 ] | ||||
|             }, | ||||
|         ] | ||||
| 
 | ||||
|         if not resources: | ||||
| @ -188,12 +232,21 @@ class CrawlOps(BaseCrawlOps): | ||||
|                 "finished", | ||||
|                 "fileSize", | ||||
|                 "firstSeed", | ||||
|                 "reviewStatus", | ||||
|                 "qaRunCount", | ||||
|                 "qaState", | ||||
|             ): | ||||
|                 raise HTTPException(status_code=400, detail="invalid_sort_by") | ||||
|             if sort_direction not in (1, -1): | ||||
|                 raise HTTPException(status_code=400, detail="invalid_sort_direction") | ||||
| 
 | ||||
|             aggregate.extend([{"$sort": {sort_by: sort_direction}}]) | ||||
|             sort_query = {sort_by: sort_direction} | ||||
| 
 | ||||
|             # Add secondary sort for qaState - sorted by current, then last | ||||
|             if sort_by == "qaState": | ||||
|                 sort_query["lastQAState"] = sort_direction | ||||
| 
 | ||||
|             aggregate.extend([{"$sort": sort_query}]) | ||||
| 
 | ||||
|         aggregate.extend( | ||||
|             [ | ||||
|  | ||||
| @ -658,6 +658,10 @@ class CrawlOut(BaseMongoModel): | ||||
| 
 | ||||
|     reviewStatus: Optional[conint(ge=1, le=5)] = None  # type: ignore | ||||
| 
 | ||||
|     qaRunCount: int = 0 | ||||
|     activeQAState: Optional[str] | ||||
|     lastQAState: Optional[str] | ||||
| 
 | ||||
| 
 | ||||
| # ============================================================================ | ||||
| class CrawlOutWithResources(CrawlOut): | ||||
|  | ||||
| @ -116,6 +116,28 @@ def failed_qa_run_id(crawler_crawl_id, crawler_auth_headers, default_org_id): | ||||
|     assert qa["started"] | ||||
|     assert not qa["finished"] | ||||
| 
 | ||||
|     # Ensure sorting by qaState works as expected - current floated to top | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaState", | ||||
|         headers=crawler_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
|     crawls = r.json()["items"] | ||||
|     assert crawls[0]["id"] == crawler_crawl_id | ||||
|     assert crawls[0]["activeQAState"] | ||||
|     assert crawls[0]["lastQAState"] | ||||
| 
 | ||||
|     # Ensure sorting by qaState works as expected with all-crawls | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaState", | ||||
|         headers=crawler_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
|     crawls = r.json()["items"] | ||||
|     assert crawls[0]["id"] == crawler_crawl_id | ||||
|     assert crawls[0]["activeQAState"] | ||||
|     assert crawls[0]["lastQAState"] | ||||
| 
 | ||||
|     # Cancel crawl | ||||
|     r = requests.post( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/cancel", | ||||
| @ -340,6 +362,96 @@ def test_failed_qa_run( | ||||
|     assert qa["crawlExecSeconds"] > 0 | ||||
| 
 | ||||
| 
 | ||||
| def test_sort_crawls_by_qa_runs( | ||||
|     crawler_crawl_id, | ||||
|     crawler_auth_headers, | ||||
|     default_org_id, | ||||
|     failed_qa_run_id, | ||||
|     qa_run_pages_ready, | ||||
| ): | ||||
|     # Test that sorting by qaRunCount works as expected | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaRunCount", | ||||
|         headers=crawler_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
|     crawls = r.json()["items"] | ||||
| 
 | ||||
|     assert crawls[0]["id"] == crawler_crawl_id | ||||
|     qa_run_count = crawls[0]["qaRunCount"] | ||||
|     assert qa_run_count > 0 | ||||
| 
 | ||||
|     last_count = qa_run_count | ||||
|     for crawl in crawls: | ||||
|         if crawl["id"] == crawler_crawl_id: | ||||
|             continue | ||||
|         crawl_qa_count = crawl["qaRunCount"] | ||||
|         assert isinstance(crawl_qa_count, int) | ||||
|         assert crawl_qa_count <= last_count | ||||
|         last_count = crawl_qa_count | ||||
| 
 | ||||
|     # Test ascending sort | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaRunCount&sortDirection=1", | ||||
|         headers=crawler_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
|     crawls = r.json()["items"] | ||||
| 
 | ||||
|     assert crawls[-1]["id"] == crawler_crawl_id | ||||
|     assert crawls[-1]["qaRunCount"] > 0 | ||||
| 
 | ||||
|     last_count = 0 | ||||
|     for crawl in crawls: | ||||
|         if crawl["id"] == crawler_crawl_id: | ||||
|             continue | ||||
|         crawl_qa_count = crawl["qaRunCount"] | ||||
|         assert isinstance(crawl_qa_count, int) | ||||
|         assert crawl_qa_count >= last_count | ||||
|         last_count = crawl_qa_count | ||||
| 
 | ||||
|     # Test same with all-crawls | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaRunCount", | ||||
|         headers=crawler_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
|     crawls = r.json()["items"] | ||||
| 
 | ||||
|     assert crawls[0]["id"] == crawler_crawl_id | ||||
|     qa_run_count = crawls[0]["qaRunCount"] | ||||
|     assert qa_run_count > 0 | ||||
| 
 | ||||
|     last_count = qa_run_count | ||||
|     for crawl in crawls: | ||||
|         if crawl["id"] == crawler_crawl_id: | ||||
|             continue | ||||
|         crawl_qa_count = crawl["qaRunCount"] | ||||
|         assert isinstance(crawl_qa_count, int) | ||||
|         assert crawl_qa_count <= last_count | ||||
|         last_count = crawl_qa_count | ||||
| 
 | ||||
|     # Test ascending sort | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaRunCount&sortDirection=1", | ||||
|         headers=crawler_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
|     crawls = r.json()["items"] | ||||
| 
 | ||||
|     assert crawls[-1]["id"] == crawler_crawl_id | ||||
|     assert crawls[-1]["qaRunCount"] > 0 | ||||
| 
 | ||||
|     last_count = 0 | ||||
|     for crawl in crawls: | ||||
|         if crawl["id"] == crawler_crawl_id: | ||||
|             continue | ||||
|         crawl_qa_count = crawl["qaRunCount"] | ||||
|         assert isinstance(crawl_qa_count, int) | ||||
|         assert crawl_qa_count >= last_count | ||||
|         last_count = crawl_qa_count | ||||
| 
 | ||||
| 
 | ||||
| def test_delete_qa_runs( | ||||
|     crawler_crawl_id, | ||||
|     crawler_auth_headers, | ||||
|  | ||||
| @ -303,6 +303,44 @@ def test_update_crawl( | ||||
|     assert r.status_code == 200 | ||||
|     assert r.json()["reviewStatus"] == 5 | ||||
| 
 | ||||
|     # Test sorting on reviewStatus | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=reviewStatus", | ||||
|         headers=admin_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
|     crawls = r.json()["items"] | ||||
|     assert crawls[0]["id"] == admin_crawl_id | ||||
|     assert crawls[0]["reviewStatus"] == 5 | ||||
| 
 | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=reviewStatus&sortDirection=1", | ||||
|         headers=admin_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
|     crawls = r.json()["items"] | ||||
|     assert crawls[-1]["id"] == admin_crawl_id | ||||
|     assert crawls[-1]["reviewStatus"] == 5 | ||||
| 
 | ||||
|     # Test sorting on reviewStatus for all-crawls | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=reviewStatus", | ||||
|         headers=admin_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
|     crawls = r.json()["items"] | ||||
|     assert crawls[0]["id"] == admin_crawl_id | ||||
|     assert crawls[0]["reviewStatus"] == 5 | ||||
| 
 | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=reviewStatus&sortDirection=1", | ||||
|         headers=admin_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
|     crawls = r.json()["items"] | ||||
|     assert crawls[-1]["id"] == admin_crawl_id | ||||
|     assert crawls[-1]["reviewStatus"] == 5 | ||||
| 
 | ||||
|     # Try to update to invalid reviewStatus | ||||
|     r = requests.patch( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user