diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index f982b414..c6396ba3 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -551,8 +551,6 @@ class BaseCrawlOps: {"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}}, {"$set": {"firstSeed": "$firstSeedObject.url"}}, {"$unset": ["firstSeedObject", "errors", "config"]}, - {"$set": {"qaState": "$qa.state"}}, - {"$set": {"activeQAState": "$qaState"}}, {"$set": {"activeQAStats": "$qa.stats"}}, { "$set": { @@ -564,11 +562,23 @@ class BaseCrawlOps: } } }, + # Add active QA run to array if exists prior to sorting, taking care not to + # pass null to $concatArrays so that our result isn't null + { + "$set": { + "qaActiveArray": {"$cond": [{"$ne": ["$qa", None]}, ["$qa"], []]} + } + }, + { + "$set": { + "qaArray": {"$concatArrays": ["$qaFinishedArray", "$qaActiveArray"]} + } + }, { "$set": { "sortedQARuns": { "$sortArray": { - "input": "$qaFinishedArray", + "input": "$qaArray", "sortBy": {"started": -1}, } } @@ -576,13 +586,14 @@ class BaseCrawlOps: }, {"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}}, {"$set": {"lastQAState": "$lastQARun.state"}}, + {"$set": {"lastQAStarted": "$lastQARun.started"}}, { "$set": { "qaRunCount": { "$size": { "$cond": [ - {"$isArray": "$qaFinishedArray"}, - "$qaFinishedArray", + {"$isArray": "$qaArray"}, + "$qaArray", [], ] } @@ -592,7 +603,9 @@ class BaseCrawlOps: { "$unset": [ "lastQARun", + "qaActiveArray", "qaFinishedArray", + "qaArray", "sortedQARuns", ] }, @@ -619,8 +632,9 @@ class BaseCrawlOps: "finished", "fileSize", "reviewStatus", + "lastQAStarted", + "lastQAState", "qaRunCount", - "qaState", ): raise HTTPException(status_code=400, detail="invalid_sort_by") if sort_direction not in (1, -1): @@ -628,10 +642,8 @@ class BaseCrawlOps: sort_query = {sort_by: sort_direction} - # Secondary sort for qaState - sorted by current, then last - # Tertiary sort for qaState - type, always ascending so crawls are first - if sort_by == "qaState": - sort_query["lastQAState"] = sort_direction + # Ensure crawls are always sorted first for QA-related sorts + if sort_by in ("lastQAStarted", "lastQAState"): sort_query["type"] = 1 aggregate.extend([{"$sort": sort_query}]) diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py index 2c388ed3..43dffff7 100644 --- a/backend/btrixcloud/crawls.py +++ b/backend/btrixcloud/crawls.py @@ -169,8 +169,6 @@ class CrawlOps(BaseCrawlOps): {"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}}, {"$set": {"firstSeed": "$firstSeedObject.url"}}, {"$unset": ["firstSeedObject", "errors", "config"]}, - {"$set": {"qaState": "$qa.state"}}, - {"$set": {"activeQAState": "$qaState"}}, {"$set": {"activeQAStats": "$qa.stats"}}, { "$set": { @@ -182,11 +180,23 @@ class CrawlOps(BaseCrawlOps): } } }, + # Add active QA run to array if exists prior to sorting, taking care not to + # pass null to $concatArrays so that our result isn't null + { + "$set": { + "qaActiveArray": {"$cond": [{"$ne": ["$qa", None]}, ["$qa"], []]} + } + }, + { + "$set": { + "qaArray": {"$concatArrays": ["$qaFinishedArray", "$qaActiveArray"]} + } + }, { "$set": { "sortedQARuns": { "$sortArray": { - "input": "$qaFinishedArray", + "input": "$qaArray", "sortBy": {"started": -1}, } } @@ -194,13 +204,14 @@ class CrawlOps(BaseCrawlOps): }, {"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}}, {"$set": {"lastQAState": "$lastQARun.state"}}, + {"$set": {"lastQAStarted": "$lastQARun.started"}}, { "$set": { "qaRunCount": { "$size": { "$cond": [ - {"$isArray": "$qaFinishedArray"}, - "$qaFinishedArray", + {"$isArray": "$qaArray"}, + "$qaArray", [], ] } @@ -210,7 +221,9 @@ class CrawlOps(BaseCrawlOps): { "$unset": [ "lastQARun", + "qaActiveArray", "qaFinishedArray", + "qaArray", "sortedQARuns", ] }, @@ -239,19 +252,14 @@ class CrawlOps(BaseCrawlOps): "firstSeed", "reviewStatus", "qaRunCount", - "qaState", + "lastQAState", + "lastQAStarted", ): raise HTTPException(status_code=400, detail="invalid_sort_by") if sort_direction not in (1, -1): raise HTTPException(status_code=400, detail="invalid_sort_direction") - sort_query = {sort_by: sort_direction} - - # Add secondary sort for qaState - sorted by current, then last - if sort_by == "qaState": - sort_query["lastQAState"] = sort_direction - - aggregate.extend([{"$sort": sort_query}]) + aggregate.extend([{"$sort": {sort_by: sort_direction}}]) aggregate.extend( [ diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 7287e5f1..bb7b7e90 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -660,9 +660,9 @@ class CrawlOut(BaseMongoModel): reviewStatus: Optional[conint(ge=1, le=5)] = None # type: ignore qaRunCount: int = 0 - activeQAState: Optional[str] activeQAStats: Optional[CrawlStats] lastQAState: Optional[str] + lastQAStarted: Optional[datetime] # ============================================================================ diff --git a/backend/test/test_qa.py b/backend/test/test_qa.py index 26391c6d..b586c65a 100644 --- a/backend/test/test_qa.py +++ b/backend/test/test_qa.py @@ -116,29 +116,53 @@ def failed_qa_run_id(crawler_crawl_id, crawler_auth_headers, default_org_id): assert qa["started"] assert not qa["finished"] - # Ensure sorting by qaState works as expected - current floated to top + # Ensure sorting by lastQAState works as expected - current floated to top r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaState", + f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=lastQAState", headers=crawler_auth_headers, ) assert r.status_code == 200 crawls = r.json()["items"] assert crawls[0]["id"] == crawler_crawl_id - assert crawls[0]["activeQAState"] assert crawls[0]["activeQAStats"] assert crawls[0]["lastQAState"] + assert crawls[0]["lastQAStarted"] - # Ensure sorting by qaState works as expected with all-crawls + # Ensure sorting by lastQAState works as expected with all-crawls r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaState", + f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=lastQAState", headers=crawler_auth_headers, ) assert r.status_code == 200 crawls = r.json()["items"] assert crawls[0]["id"] == crawler_crawl_id - assert crawls[0]["activeQAState"] assert crawls[0]["activeQAStats"] assert crawls[0]["lastQAState"] + assert crawls[0]["lastQAStarted"] + + # Ensure sorting by lastQAStarted works as expected - current floated to top + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=lastQAStarted", + headers=crawler_auth_headers, + ) + assert r.status_code == 200 + crawls = r.json()["items"] + assert crawls[0]["id"] == crawler_crawl_id + assert crawls[0]["activeQAStats"] + assert crawls[0]["lastQAState"] + assert crawls[0]["lastQAStarted"] + + # Ensure sorting by lastQAState works as expected with all-crawls + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=lastQAStarted", + headers=crawler_auth_headers, + ) + assert r.status_code == 200 + crawls = r.json()["items"] + assert crawls[0]["id"] == crawler_crawl_id + assert crawls[0]["activeQAStats"] + assert crawls[0]["lastQAState"] + assert crawls[0]["lastQAStarted"] # Cancel crawl r = requests.post( diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py index 399187f4..4395ddb3 100644 --- a/backend/test/test_uploads.py +++ b/backend/test/test_uploads.py @@ -419,9 +419,22 @@ def test_list_all_crawls( assert item["finished"] assert item["state"] - # Test that all-crawls qaState sort always puts crawls before uploads + # Test that all-crawls lastQAState and lastQAStarted sorts always puts crawls before uploads r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaState", + f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=lastQAState", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + data = r.json() + + last_type = None + for item in data["items"]: + if last_type == "upload": + assert item["type"] != "crawl" + last_type = item["type"] + + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=lastQAStarted", headers=admin_auth_headers, ) assert r.status_code == 200