Support sorting by last QA started time (#1712)
To support #1683, it would be useful to be able to sort by 'last QA start time' in addition to/instead of last QA state. - make sorting consistent with workflow sorting - sortBy fields renamed to lastQAState and lastQAStarted - Current QA runs are now included in the lastQAState/lastQAStarted fields, rather than being separated out to different values --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
		
							parent
							
								
									b574f00d2b
								
							
						
					
					
						commit
						1844e761dc
					
				| @ -551,8 +551,6 @@ class BaseCrawlOps: | |||||||
|             {"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}}, |             {"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}}, | ||||||
|             {"$set": {"firstSeed": "$firstSeedObject.url"}}, |             {"$set": {"firstSeed": "$firstSeedObject.url"}}, | ||||||
|             {"$unset": ["firstSeedObject", "errors", "config"]}, |             {"$unset": ["firstSeedObject", "errors", "config"]}, | ||||||
|             {"$set": {"qaState": "$qa.state"}}, |  | ||||||
|             {"$set": {"activeQAState": "$qaState"}}, |  | ||||||
|             {"$set": {"activeQAStats": "$qa.stats"}}, |             {"$set": {"activeQAStats": "$qa.stats"}}, | ||||||
|             { |             { | ||||||
|                 "$set": { |                 "$set": { | ||||||
| @ -564,11 +562,23 @@ class BaseCrawlOps: | |||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|             }, |             }, | ||||||
|  |             # Add active QA run to array if exists prior to sorting, taking care not to | ||||||
|  |             # pass null to $concatArrays so that our result isn't null | ||||||
|  |             { | ||||||
|  |                 "$set": { | ||||||
|  |                     "qaActiveArray": {"$cond": [{"$ne": ["$qa", None]}, ["$qa"], []]} | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 "$set": { | ||||||
|  |                     "qaArray": {"$concatArrays": ["$qaFinishedArray", "$qaActiveArray"]} | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|             { |             { | ||||||
|                 "$set": { |                 "$set": { | ||||||
|                     "sortedQARuns": { |                     "sortedQARuns": { | ||||||
|                         "$sortArray": { |                         "$sortArray": { | ||||||
|                             "input": "$qaFinishedArray", |                             "input": "$qaArray", | ||||||
|                             "sortBy": {"started": -1}, |                             "sortBy": {"started": -1}, | ||||||
|                         } |                         } | ||||||
|                     } |                     } | ||||||
| @ -576,13 +586,14 @@ class BaseCrawlOps: | |||||||
|             }, |             }, | ||||||
|             {"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}}, |             {"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}}, | ||||||
|             {"$set": {"lastQAState": "$lastQARun.state"}}, |             {"$set": {"lastQAState": "$lastQARun.state"}}, | ||||||
|  |             {"$set": {"lastQAStarted": "$lastQARun.started"}}, | ||||||
|             { |             { | ||||||
|                 "$set": { |                 "$set": { | ||||||
|                     "qaRunCount": { |                     "qaRunCount": { | ||||||
|                         "$size": { |                         "$size": { | ||||||
|                             "$cond": [ |                             "$cond": [ | ||||||
|                                 {"$isArray": "$qaFinishedArray"}, |                                 {"$isArray": "$qaArray"}, | ||||||
|                                 "$qaFinishedArray", |                                 "$qaArray", | ||||||
|                                 [], |                                 [], | ||||||
|                             ] |                             ] | ||||||
|                         } |                         } | ||||||
| @ -592,7 +603,9 @@ class BaseCrawlOps: | |||||||
|             { |             { | ||||||
|                 "$unset": [ |                 "$unset": [ | ||||||
|                     "lastQARun", |                     "lastQARun", | ||||||
|  |                     "qaActiveArray", | ||||||
|                     "qaFinishedArray", |                     "qaFinishedArray", | ||||||
|  |                     "qaArray", | ||||||
|                     "sortedQARuns", |                     "sortedQARuns", | ||||||
|                 ] |                 ] | ||||||
|             }, |             }, | ||||||
| @ -619,8 +632,9 @@ class BaseCrawlOps: | |||||||
|                 "finished", |                 "finished", | ||||||
|                 "fileSize", |                 "fileSize", | ||||||
|                 "reviewStatus", |                 "reviewStatus", | ||||||
|  |                 "lastQAStarted", | ||||||
|  |                 "lastQAState", | ||||||
|                 "qaRunCount", |                 "qaRunCount", | ||||||
|                 "qaState", |  | ||||||
|             ): |             ): | ||||||
|                 raise HTTPException(status_code=400, detail="invalid_sort_by") |                 raise HTTPException(status_code=400, detail="invalid_sort_by") | ||||||
|             if sort_direction not in (1, -1): |             if sort_direction not in (1, -1): | ||||||
| @ -628,10 +642,8 @@ class BaseCrawlOps: | |||||||
| 
 | 
 | ||||||
|             sort_query = {sort_by: sort_direction} |             sort_query = {sort_by: sort_direction} | ||||||
| 
 | 
 | ||||||
|             # Secondary sort for qaState - sorted by current, then last |             # Ensure crawls are always sorted first for QA-related sorts | ||||||
|             # Tertiary sort for qaState - type, always ascending so crawls are first |             if sort_by in ("lastQAStarted", "lastQAState"): | ||||||
|             if sort_by == "qaState": |  | ||||||
|                 sort_query["lastQAState"] = sort_direction |  | ||||||
|                 sort_query["type"] = 1 |                 sort_query["type"] = 1 | ||||||
| 
 | 
 | ||||||
|             aggregate.extend([{"$sort": sort_query}]) |             aggregate.extend([{"$sort": sort_query}]) | ||||||
|  | |||||||
| @ -169,8 +169,6 @@ class CrawlOps(BaseCrawlOps): | |||||||
|             {"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}}, |             {"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}}, | ||||||
|             {"$set": {"firstSeed": "$firstSeedObject.url"}}, |             {"$set": {"firstSeed": "$firstSeedObject.url"}}, | ||||||
|             {"$unset": ["firstSeedObject", "errors", "config"]}, |             {"$unset": ["firstSeedObject", "errors", "config"]}, | ||||||
|             {"$set": {"qaState": "$qa.state"}}, |  | ||||||
|             {"$set": {"activeQAState": "$qaState"}}, |  | ||||||
|             {"$set": {"activeQAStats": "$qa.stats"}}, |             {"$set": {"activeQAStats": "$qa.stats"}}, | ||||||
|             { |             { | ||||||
|                 "$set": { |                 "$set": { | ||||||
| @ -182,11 +180,23 @@ class CrawlOps(BaseCrawlOps): | |||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|             }, |             }, | ||||||
|  |             # Add active QA run to array if exists prior to sorting, taking care not to | ||||||
|  |             # pass null to $concatArrays so that our result isn't null | ||||||
|  |             { | ||||||
|  |                 "$set": { | ||||||
|  |                     "qaActiveArray": {"$cond": [{"$ne": ["$qa", None]}, ["$qa"], []]} | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 "$set": { | ||||||
|  |                     "qaArray": {"$concatArrays": ["$qaFinishedArray", "$qaActiveArray"]} | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|             { |             { | ||||||
|                 "$set": { |                 "$set": { | ||||||
|                     "sortedQARuns": { |                     "sortedQARuns": { | ||||||
|                         "$sortArray": { |                         "$sortArray": { | ||||||
|                             "input": "$qaFinishedArray", |                             "input": "$qaArray", | ||||||
|                             "sortBy": {"started": -1}, |                             "sortBy": {"started": -1}, | ||||||
|                         } |                         } | ||||||
|                     } |                     } | ||||||
| @ -194,13 +204,14 @@ class CrawlOps(BaseCrawlOps): | |||||||
|             }, |             }, | ||||||
|             {"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}}, |             {"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}}, | ||||||
|             {"$set": {"lastQAState": "$lastQARun.state"}}, |             {"$set": {"lastQAState": "$lastQARun.state"}}, | ||||||
|  |             {"$set": {"lastQAStarted": "$lastQARun.started"}}, | ||||||
|             { |             { | ||||||
|                 "$set": { |                 "$set": { | ||||||
|                     "qaRunCount": { |                     "qaRunCount": { | ||||||
|                         "$size": { |                         "$size": { | ||||||
|                             "$cond": [ |                             "$cond": [ | ||||||
|                                 {"$isArray": "$qaFinishedArray"}, |                                 {"$isArray": "$qaArray"}, | ||||||
|                                 "$qaFinishedArray", |                                 "$qaArray", | ||||||
|                                 [], |                                 [], | ||||||
|                             ] |                             ] | ||||||
|                         } |                         } | ||||||
| @ -210,7 +221,9 @@ class CrawlOps(BaseCrawlOps): | |||||||
|             { |             { | ||||||
|                 "$unset": [ |                 "$unset": [ | ||||||
|                     "lastQARun", |                     "lastQARun", | ||||||
|  |                     "qaActiveArray", | ||||||
|                     "qaFinishedArray", |                     "qaFinishedArray", | ||||||
|  |                     "qaArray", | ||||||
|                     "sortedQARuns", |                     "sortedQARuns", | ||||||
|                 ] |                 ] | ||||||
|             }, |             }, | ||||||
| @ -239,19 +252,14 @@ class CrawlOps(BaseCrawlOps): | |||||||
|                 "firstSeed", |                 "firstSeed", | ||||||
|                 "reviewStatus", |                 "reviewStatus", | ||||||
|                 "qaRunCount", |                 "qaRunCount", | ||||||
|                 "qaState", |                 "lastQAState", | ||||||
|  |                 "lastQAStarted", | ||||||
|             ): |             ): | ||||||
|                 raise HTTPException(status_code=400, detail="invalid_sort_by") |                 raise HTTPException(status_code=400, detail="invalid_sort_by") | ||||||
|             if sort_direction not in (1, -1): |             if sort_direction not in (1, -1): | ||||||
|                 raise HTTPException(status_code=400, detail="invalid_sort_direction") |                 raise HTTPException(status_code=400, detail="invalid_sort_direction") | ||||||
| 
 | 
 | ||||||
|             sort_query = {sort_by: sort_direction} |             aggregate.extend([{"$sort": {sort_by: sort_direction}}]) | ||||||
| 
 |  | ||||||
|             # Add secondary sort for qaState - sorted by current, then last |  | ||||||
|             if sort_by == "qaState": |  | ||||||
|                 sort_query["lastQAState"] = sort_direction |  | ||||||
| 
 |  | ||||||
|             aggregate.extend([{"$sort": sort_query}]) |  | ||||||
| 
 | 
 | ||||||
|         aggregate.extend( |         aggregate.extend( | ||||||
|             [ |             [ | ||||||
|  | |||||||
| @ -660,9 +660,9 @@ class CrawlOut(BaseMongoModel): | |||||||
|     reviewStatus: Optional[conint(ge=1, le=5)] = None  # type: ignore |     reviewStatus: Optional[conint(ge=1, le=5)] = None  # type: ignore | ||||||
| 
 | 
 | ||||||
|     qaRunCount: int = 0 |     qaRunCount: int = 0 | ||||||
|     activeQAState: Optional[str] |  | ||||||
|     activeQAStats: Optional[CrawlStats] |     activeQAStats: Optional[CrawlStats] | ||||||
|     lastQAState: Optional[str] |     lastQAState: Optional[str] | ||||||
|  |     lastQAStarted: Optional[datetime] | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # ============================================================================ | # ============================================================================ | ||||||
|  | |||||||
| @ -116,29 +116,53 @@ def failed_qa_run_id(crawler_crawl_id, crawler_auth_headers, default_org_id): | |||||||
|     assert qa["started"] |     assert qa["started"] | ||||||
|     assert not qa["finished"] |     assert not qa["finished"] | ||||||
| 
 | 
 | ||||||
|     # Ensure sorting by qaState works as expected - current floated to top |     # Ensure sorting by lastQAState works as expected - current floated to top | ||||||
|     r = requests.get( |     r = requests.get( | ||||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaState", |         f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=lastQAState", | ||||||
|         headers=crawler_auth_headers, |         headers=crawler_auth_headers, | ||||||
|     ) |     ) | ||||||
|     assert r.status_code == 200 |     assert r.status_code == 200 | ||||||
|     crawls = r.json()["items"] |     crawls = r.json()["items"] | ||||||
|     assert crawls[0]["id"] == crawler_crawl_id |     assert crawls[0]["id"] == crawler_crawl_id | ||||||
|     assert crawls[0]["activeQAState"] |  | ||||||
|     assert crawls[0]["activeQAStats"] |     assert crawls[0]["activeQAStats"] | ||||||
|     assert crawls[0]["lastQAState"] |     assert crawls[0]["lastQAState"] | ||||||
|  |     assert crawls[0]["lastQAStarted"] | ||||||
| 
 | 
 | ||||||
|     # Ensure sorting by qaState works as expected with all-crawls |     # Ensure sorting by lastQAState works as expected with all-crawls | ||||||
|     r = requests.get( |     r = requests.get( | ||||||
|         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaState", |         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=lastQAState", | ||||||
|         headers=crawler_auth_headers, |         headers=crawler_auth_headers, | ||||||
|     ) |     ) | ||||||
|     assert r.status_code == 200 |     assert r.status_code == 200 | ||||||
|     crawls = r.json()["items"] |     crawls = r.json()["items"] | ||||||
|     assert crawls[0]["id"] == crawler_crawl_id |     assert crawls[0]["id"] == crawler_crawl_id | ||||||
|     assert crawls[0]["activeQAState"] |  | ||||||
|     assert crawls[0]["activeQAStats"] |     assert crawls[0]["activeQAStats"] | ||||||
|     assert crawls[0]["lastQAState"] |     assert crawls[0]["lastQAState"] | ||||||
|  |     assert crawls[0]["lastQAStarted"] | ||||||
|  | 
 | ||||||
|  |     # Ensure sorting by lastQAStarted works as expected - current floated to top | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=lastQAStarted", | ||||||
|  |         headers=crawler_auth_headers, | ||||||
|  |     ) | ||||||
|  |     assert r.status_code == 200 | ||||||
|  |     crawls = r.json()["items"] | ||||||
|  |     assert crawls[0]["id"] == crawler_crawl_id | ||||||
|  |     assert crawls[0]["activeQAStats"] | ||||||
|  |     assert crawls[0]["lastQAState"] | ||||||
|  |     assert crawls[0]["lastQAStarted"] | ||||||
|  | 
 | ||||||
|  |     # Ensure sorting by lastQAState works as expected with all-crawls | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=lastQAStarted", | ||||||
|  |         headers=crawler_auth_headers, | ||||||
|  |     ) | ||||||
|  |     assert r.status_code == 200 | ||||||
|  |     crawls = r.json()["items"] | ||||||
|  |     assert crawls[0]["id"] == crawler_crawl_id | ||||||
|  |     assert crawls[0]["activeQAStats"] | ||||||
|  |     assert crawls[0]["lastQAState"] | ||||||
|  |     assert crawls[0]["lastQAStarted"] | ||||||
| 
 | 
 | ||||||
|     # Cancel crawl |     # Cancel crawl | ||||||
|     r = requests.post( |     r = requests.post( | ||||||
|  | |||||||
| @ -419,9 +419,22 @@ def test_list_all_crawls( | |||||||
|         assert item["finished"] |         assert item["finished"] | ||||||
|         assert item["state"] |         assert item["state"] | ||||||
| 
 | 
 | ||||||
|     # Test that all-crawls qaState sort always puts crawls before uploads |     # Test that all-crawls lastQAState and lastQAStarted sorts always puts crawls before uploads | ||||||
|     r = requests.get( |     r = requests.get( | ||||||
|         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaState", |         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=lastQAState", | ||||||
|  |         headers=admin_auth_headers, | ||||||
|  |     ) | ||||||
|  |     assert r.status_code == 200 | ||||||
|  |     data = r.json() | ||||||
|  | 
 | ||||||
|  |     last_type = None | ||||||
|  |     for item in data["items"]: | ||||||
|  |         if last_type == "upload": | ||||||
|  |             assert item["type"] != "crawl" | ||||||
|  |         last_type = item["type"] | ||||||
|  | 
 | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=lastQAStarted", | ||||||
|         headers=admin_auth_headers, |         headers=admin_auth_headers, | ||||||
|     ) |     ) | ||||||
|     assert r.status_code == 200 |     assert r.status_code == 200 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user