Support sorting by last QA started time (#1712)
To support #1683, it would be useful to be able to sort by 'last QA start time' in addition to/instead of last QA state. - make sorting consistent with workflow sorting - sortBy fields renamed to lastQAState and lastQAStarted - Current QA runs are now included in the lastQAState/lastQAStarted fields, rather than being separated out to different values --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
		
							parent
							
								
									b574f00d2b
								
							
						
					
					
						commit
						1844e761dc
					
				| @ -551,8 +551,6 @@ class BaseCrawlOps: | ||||
|             {"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}}, | ||||
|             {"$set": {"firstSeed": "$firstSeedObject.url"}}, | ||||
|             {"$unset": ["firstSeedObject", "errors", "config"]}, | ||||
|             {"$set": {"qaState": "$qa.state"}}, | ||||
|             {"$set": {"activeQAState": "$qaState"}}, | ||||
|             {"$set": {"activeQAStats": "$qa.stats"}}, | ||||
|             { | ||||
|                 "$set": { | ||||
| @ -564,11 +562,23 @@ class BaseCrawlOps: | ||||
|                     } | ||||
|                 } | ||||
|             }, | ||||
|             # Add active QA run to array if exists prior to sorting, taking care not to | ||||
|             # pass null to $concatArrays so that our result isn't null | ||||
|             { | ||||
|                 "$set": { | ||||
|                     "qaActiveArray": {"$cond": [{"$ne": ["$qa", None]}, ["$qa"], []]} | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 "$set": { | ||||
|                     "qaArray": {"$concatArrays": ["$qaFinishedArray", "$qaActiveArray"]} | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 "$set": { | ||||
|                     "sortedQARuns": { | ||||
|                         "$sortArray": { | ||||
|                             "input": "$qaFinishedArray", | ||||
|                             "input": "$qaArray", | ||||
|                             "sortBy": {"started": -1}, | ||||
|                         } | ||||
|                     } | ||||
| @ -576,13 +586,14 @@ class BaseCrawlOps: | ||||
|             }, | ||||
|             {"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}}, | ||||
|             {"$set": {"lastQAState": "$lastQARun.state"}}, | ||||
|             {"$set": {"lastQAStarted": "$lastQARun.started"}}, | ||||
|             { | ||||
|                 "$set": { | ||||
|                     "qaRunCount": { | ||||
|                         "$size": { | ||||
|                             "$cond": [ | ||||
|                                 {"$isArray": "$qaFinishedArray"}, | ||||
|                                 "$qaFinishedArray", | ||||
|                                 {"$isArray": "$qaArray"}, | ||||
|                                 "$qaArray", | ||||
|                                 [], | ||||
|                             ] | ||||
|                         } | ||||
| @ -592,7 +603,9 @@ class BaseCrawlOps: | ||||
|             { | ||||
|                 "$unset": [ | ||||
|                     "lastQARun", | ||||
|                     "qaActiveArray", | ||||
|                     "qaFinishedArray", | ||||
|                     "qaArray", | ||||
|                     "sortedQARuns", | ||||
|                 ] | ||||
|             }, | ||||
| @ -619,8 +632,9 @@ class BaseCrawlOps: | ||||
|                 "finished", | ||||
|                 "fileSize", | ||||
|                 "reviewStatus", | ||||
|                 "lastQAStarted", | ||||
|                 "lastQAState", | ||||
|                 "qaRunCount", | ||||
|                 "qaState", | ||||
|             ): | ||||
|                 raise HTTPException(status_code=400, detail="invalid_sort_by") | ||||
|             if sort_direction not in (1, -1): | ||||
| @ -628,10 +642,8 @@ class BaseCrawlOps: | ||||
| 
 | ||||
|             sort_query = {sort_by: sort_direction} | ||||
| 
 | ||||
|             # Secondary sort for qaState - sorted by current, then last | ||||
|             # Tertiary sort for qaState - type, always ascending so crawls are first | ||||
|             if sort_by == "qaState": | ||||
|                 sort_query["lastQAState"] = sort_direction | ||||
|             # Ensure crawls are always sorted first for QA-related sorts | ||||
|             if sort_by in ("lastQAStarted", "lastQAState"): | ||||
|                 sort_query["type"] = 1 | ||||
| 
 | ||||
|             aggregate.extend([{"$sort": sort_query}]) | ||||
|  | ||||
| @ -169,8 +169,6 @@ class CrawlOps(BaseCrawlOps): | ||||
|             {"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}}, | ||||
|             {"$set": {"firstSeed": "$firstSeedObject.url"}}, | ||||
|             {"$unset": ["firstSeedObject", "errors", "config"]}, | ||||
|             {"$set": {"qaState": "$qa.state"}}, | ||||
|             {"$set": {"activeQAState": "$qaState"}}, | ||||
|             {"$set": {"activeQAStats": "$qa.stats"}}, | ||||
|             { | ||||
|                 "$set": { | ||||
| @ -182,11 +180,23 @@ class CrawlOps(BaseCrawlOps): | ||||
|                     } | ||||
|                 } | ||||
|             }, | ||||
|             # Add active QA run to array if exists prior to sorting, taking care not to | ||||
|             # pass null to $concatArrays so that our result isn't null | ||||
|             { | ||||
|                 "$set": { | ||||
|                     "qaActiveArray": {"$cond": [{"$ne": ["$qa", None]}, ["$qa"], []]} | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 "$set": { | ||||
|                     "qaArray": {"$concatArrays": ["$qaFinishedArray", "$qaActiveArray"]} | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 "$set": { | ||||
|                     "sortedQARuns": { | ||||
|                         "$sortArray": { | ||||
|                             "input": "$qaFinishedArray", | ||||
|                             "input": "$qaArray", | ||||
|                             "sortBy": {"started": -1}, | ||||
|                         } | ||||
|                     } | ||||
| @ -194,13 +204,14 @@ class CrawlOps(BaseCrawlOps): | ||||
|             }, | ||||
|             {"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}}, | ||||
|             {"$set": {"lastQAState": "$lastQARun.state"}}, | ||||
|             {"$set": {"lastQAStarted": "$lastQARun.started"}}, | ||||
|             { | ||||
|                 "$set": { | ||||
|                     "qaRunCount": { | ||||
|                         "$size": { | ||||
|                             "$cond": [ | ||||
|                                 {"$isArray": "$qaFinishedArray"}, | ||||
|                                 "$qaFinishedArray", | ||||
|                                 {"$isArray": "$qaArray"}, | ||||
|                                 "$qaArray", | ||||
|                                 [], | ||||
|                             ] | ||||
|                         } | ||||
| @ -210,7 +221,9 @@ class CrawlOps(BaseCrawlOps): | ||||
|             { | ||||
|                 "$unset": [ | ||||
|                     "lastQARun", | ||||
|                     "qaActiveArray", | ||||
|                     "qaFinishedArray", | ||||
|                     "qaArray", | ||||
|                     "sortedQARuns", | ||||
|                 ] | ||||
|             }, | ||||
| @ -239,19 +252,14 @@ class CrawlOps(BaseCrawlOps): | ||||
|                 "firstSeed", | ||||
|                 "reviewStatus", | ||||
|                 "qaRunCount", | ||||
|                 "qaState", | ||||
|                 "lastQAState", | ||||
|                 "lastQAStarted", | ||||
|             ): | ||||
|                 raise HTTPException(status_code=400, detail="invalid_sort_by") | ||||
|             if sort_direction not in (1, -1): | ||||
|                 raise HTTPException(status_code=400, detail="invalid_sort_direction") | ||||
| 
 | ||||
|             sort_query = {sort_by: sort_direction} | ||||
| 
 | ||||
|             # Add secondary sort for qaState - sorted by current, then last | ||||
|             if sort_by == "qaState": | ||||
|                 sort_query["lastQAState"] = sort_direction | ||||
| 
 | ||||
|             aggregate.extend([{"$sort": sort_query}]) | ||||
|             aggregate.extend([{"$sort": {sort_by: sort_direction}}]) | ||||
| 
 | ||||
|         aggregate.extend( | ||||
|             [ | ||||
|  | ||||
| @ -660,9 +660,9 @@ class CrawlOut(BaseMongoModel): | ||||
|     reviewStatus: Optional[conint(ge=1, le=5)] = None  # type: ignore | ||||
| 
 | ||||
|     qaRunCount: int = 0 | ||||
|     activeQAState: Optional[str] | ||||
|     activeQAStats: Optional[CrawlStats] | ||||
|     lastQAState: Optional[str] | ||||
|     lastQAStarted: Optional[datetime] | ||||
| 
 | ||||
| 
 | ||||
| # ============================================================================ | ||||
|  | ||||
| @ -116,29 +116,53 @@ def failed_qa_run_id(crawler_crawl_id, crawler_auth_headers, default_org_id): | ||||
|     assert qa["started"] | ||||
|     assert not qa["finished"] | ||||
| 
 | ||||
|     # Ensure sorting by qaState works as expected - current floated to top | ||||
|     # Ensure sorting by lastQAState works as expected - current floated to top | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaState", | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=lastQAState", | ||||
|         headers=crawler_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
|     crawls = r.json()["items"] | ||||
|     assert crawls[0]["id"] == crawler_crawl_id | ||||
|     assert crawls[0]["activeQAState"] | ||||
|     assert crawls[0]["activeQAStats"] | ||||
|     assert crawls[0]["lastQAState"] | ||||
|     assert crawls[0]["lastQAStarted"] | ||||
| 
 | ||||
|     # Ensure sorting by qaState works as expected with all-crawls | ||||
|     # Ensure sorting by lastQAState works as expected with all-crawls | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaState", | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=lastQAState", | ||||
|         headers=crawler_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
|     crawls = r.json()["items"] | ||||
|     assert crawls[0]["id"] == crawler_crawl_id | ||||
|     assert crawls[0]["activeQAState"] | ||||
|     assert crawls[0]["activeQAStats"] | ||||
|     assert crawls[0]["lastQAState"] | ||||
|     assert crawls[0]["lastQAStarted"] | ||||
| 
 | ||||
|     # Ensure sorting by lastQAStarted works as expected - current floated to top | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=lastQAStarted", | ||||
|         headers=crawler_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
|     crawls = r.json()["items"] | ||||
|     assert crawls[0]["id"] == crawler_crawl_id | ||||
|     assert crawls[0]["activeQAStats"] | ||||
|     assert crawls[0]["lastQAState"] | ||||
|     assert crawls[0]["lastQAStarted"] | ||||
| 
 | ||||
|     # Ensure sorting by lastQAState works as expected with all-crawls | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=lastQAStarted", | ||||
|         headers=crawler_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
|     crawls = r.json()["items"] | ||||
|     assert crawls[0]["id"] == crawler_crawl_id | ||||
|     assert crawls[0]["activeQAStats"] | ||||
|     assert crawls[0]["lastQAState"] | ||||
|     assert crawls[0]["lastQAStarted"] | ||||
| 
 | ||||
|     # Cancel crawl | ||||
|     r = requests.post( | ||||
|  | ||||
| @ -419,9 +419,22 @@ def test_list_all_crawls( | ||||
|         assert item["finished"] | ||||
|         assert item["state"] | ||||
| 
 | ||||
|     # Test that all-crawls qaState sort always puts crawls before uploads | ||||
|     # Test that all-crawls lastQAState and lastQAStarted sorts always puts crawls before uploads | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaState", | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=lastQAState", | ||||
|         headers=admin_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
|     data = r.json() | ||||
| 
 | ||||
|     last_type = None | ||||
|     for item in data["items"]: | ||||
|         if last_type == "upload": | ||||
|             assert item["type"] != "crawl" | ||||
|         last_type = item["type"] | ||||
| 
 | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=lastQAStarted", | ||||
|         headers=admin_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user