diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index a02ab3fa..640381a9 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -551,6 +551,50 @@ class BaseCrawlOps: {"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}}, {"$set": {"firstSeed": "$firstSeedObject.url"}}, {"$unset": ["firstSeedObject", "errors", "config"]}, + {"$set": {"qaState": "$qa.state"}}, + {"$set": {"activeQAState": "$qaState"}}, + { + "$set": { + "qaFinishedArray": { + "$map": { + "input": {"$objectToArray": "$qaFinished"}, + "in": "$$this.v", + } + } + } + }, + { + "$set": { + "sortedQARuns": { + "$sortArray": { + "input": "$qaFinishedArray", + "sortBy": {"started": -1}, + } + } + } + }, + {"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}}, + {"$set": {"lastQAState": "$lastQARun.state"}}, + { + "$set": { + "qaRunCount": { + "$size": { + "$cond": [ + {"$isArray": "$qaFinishedArray"}, + "$qaFinishedArray", + [], + ] + } + } + } + }, + { + "$unset": [ + "lastQARun", + "qaFinishedArray", + "sortedQARuns", + ] + }, ] if not resources: @@ -569,12 +613,25 @@ class BaseCrawlOps: aggregate.extend([{"$match": {"collectionIds": {"$in": [collection_id]}}}]) if sort_by: - if sort_by not in ("started", "finished", "fileSize"): + if sort_by not in ( + "started", + "finished", + "fileSize", + "reviewStatus", + "qaRunCount", + "qaState", + ): raise HTTPException(status_code=400, detail="invalid_sort_by") if sort_direction not in (1, -1): raise HTTPException(status_code=400, detail="invalid_sort_direction") - aggregate.extend([{"$sort": {sort_by: sort_direction}}]) + sort_query = {sort_by: sort_direction} + + # Add secondary sort for qaState - sorted by current, then last + if sort_by == "qaState": + sort_query["lastQAState"] = sort_direction + + aggregate.extend([{"$sort": sort_query}]) aggregate.extend( [ diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py index 92426d84..9b7058ba 100644 --- a/backend/btrixcloud/crawls.py +++ b/backend/btrixcloud/crawls.py @@ -165,6 +165,50 @@ class CrawlOps(BaseCrawlOps): {"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}}, {"$set": {"firstSeed": "$firstSeedObject.url"}}, {"$unset": ["firstSeedObject", "errors", "config"]}, + {"$set": {"qaState": "$qa.state"}}, + {"$set": {"activeQAState": "$qaState"}}, + { + "$set": { + "qaFinishedArray": { + "$map": { + "input": {"$objectToArray": "$qaFinished"}, + "in": "$$this.v", + } + } + } + }, + { + "$set": { + "sortedQARuns": { + "$sortArray": { + "input": "$qaFinishedArray", + "sortBy": {"started": -1}, + } + } + } + }, + {"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}}, + {"$set": {"lastQAState": "$lastQARun.state"}}, + { + "$set": { + "qaRunCount": { + "$size": { + "$cond": [ + {"$isArray": "$qaFinishedArray"}, + "$qaFinishedArray", + [], + ] + } + } + } + }, + { + "$unset": [ + "lastQARun", + "qaFinishedArray", + "sortedQARuns", + ] + }, ] if not resources: @@ -188,12 +232,21 @@ class CrawlOps(BaseCrawlOps): "finished", "fileSize", "firstSeed", + "reviewStatus", + "qaRunCount", + "qaState", ): raise HTTPException(status_code=400, detail="invalid_sort_by") if sort_direction not in (1, -1): raise HTTPException(status_code=400, detail="invalid_sort_direction") - aggregate.extend([{"$sort": {sort_by: sort_direction}}]) + sort_query = {sort_by: sort_direction} + + # Add secondary sort for qaState - sorted by current, then last + if sort_by == "qaState": + sort_query["lastQAState"] = sort_direction + + aggregate.extend([{"$sort": sort_query}]) aggregate.extend( [ diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 0cfcb33f..72791f6b 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -658,6 +658,10 @@ class CrawlOut(BaseMongoModel): reviewStatus: Optional[conint(ge=1, le=5)] = None # type: ignore + qaRunCount: int = 0 + activeQAState: Optional[str] + lastQAState: Optional[str] + # ============================================================================ class CrawlOutWithResources(CrawlOut): diff --git a/backend/test/test_qa.py b/backend/test/test_qa.py index a18359e0..80dd5aa8 100644 --- a/backend/test/test_qa.py +++ b/backend/test/test_qa.py @@ -116,6 +116,28 @@ def failed_qa_run_id(crawler_crawl_id, crawler_auth_headers, default_org_id): assert qa["started"] assert not qa["finished"] + # Ensure sorting by qaState works as expected - current floated to top + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaState", + headers=crawler_auth_headers, + ) + assert r.status_code == 200 + crawls = r.json()["items"] + assert crawls[0]["id"] == crawler_crawl_id + assert crawls[0]["activeQAState"] + assert crawls[0]["lastQAState"] + + # Ensure sorting by qaState works as expected with all-crawls + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaState", + headers=crawler_auth_headers, + ) + assert r.status_code == 200 + crawls = r.json()["items"] + assert crawls[0]["id"] == crawler_crawl_id + assert crawls[0]["activeQAState"] + assert crawls[0]["lastQAState"] + # Cancel crawl r = requests.post( f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/cancel", @@ -340,6 +362,96 @@ def test_failed_qa_run( assert qa["crawlExecSeconds"] > 0 +def test_sort_crawls_by_qa_runs( + crawler_crawl_id, + crawler_auth_headers, + default_org_id, + failed_qa_run_id, + qa_run_pages_ready, +): + # Test that sorting by qaRunCount works as expected + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaRunCount", + headers=crawler_auth_headers, + ) + assert r.status_code == 200 + crawls = r.json()["items"] + + assert crawls[0]["id"] == crawler_crawl_id + qa_run_count = crawls[0]["qaRunCount"] + assert qa_run_count > 0 + + last_count = qa_run_count + for crawl in crawls: + if crawl["id"] == crawler_crawl_id: + continue + crawl_qa_count = crawl["qaRunCount"] + assert isinstance(crawl_qa_count, int) + assert crawl_qa_count <= last_count + last_count = crawl_qa_count + + # Test ascending sort + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaRunCount&sortDirection=1", + headers=crawler_auth_headers, + ) + assert r.status_code == 200 + crawls = r.json()["items"] + + assert crawls[-1]["id"] == crawler_crawl_id + assert crawls[-1]["qaRunCount"] > 0 + + last_count = 0 + for crawl in crawls: + if crawl["id"] == crawler_crawl_id: + continue + crawl_qa_count = crawl["qaRunCount"] + assert isinstance(crawl_qa_count, int) + assert crawl_qa_count >= last_count + last_count = crawl_qa_count + + # Test same with all-crawls + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaRunCount", + headers=crawler_auth_headers, + ) + assert r.status_code == 200 + crawls = r.json()["items"] + + assert crawls[0]["id"] == crawler_crawl_id + qa_run_count = crawls[0]["qaRunCount"] + assert qa_run_count > 0 + + last_count = qa_run_count + for crawl in crawls: + if crawl["id"] == crawler_crawl_id: + continue + crawl_qa_count = crawl["qaRunCount"] + assert isinstance(crawl_qa_count, int) + assert crawl_qa_count <= last_count + last_count = crawl_qa_count + + # Test ascending sort + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaRunCount&sortDirection=1", + headers=crawler_auth_headers, + ) + assert r.status_code == 200 + crawls = r.json()["items"] + + assert crawls[-1]["id"] == crawler_crawl_id + assert crawls[-1]["qaRunCount"] > 0 + + last_count = 0 + for crawl in crawls: + if crawl["id"] == crawler_crawl_id: + continue + crawl_qa_count = crawl["qaRunCount"] + assert isinstance(crawl_qa_count, int) + assert crawl_qa_count >= last_count + last_count = crawl_qa_count + + def test_delete_qa_runs( crawler_crawl_id, crawler_auth_headers, diff --git a/backend/test/test_run_crawl.py b/backend/test/test_run_crawl.py index 3554ff44..10daf21b 100644 --- a/backend/test/test_run_crawl.py +++ b/backend/test/test_run_crawl.py @@ -303,6 +303,44 @@ def test_update_crawl( assert r.status_code == 200 assert r.json()["reviewStatus"] == 5 + # Test sorting on reviewStatus + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=reviewStatus", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + crawls = r.json()["items"] + assert crawls[0]["id"] == admin_crawl_id + assert crawls[0]["reviewStatus"] == 5 + + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=reviewStatus&sortDirection=1", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + crawls = r.json()["items"] + assert crawls[-1]["id"] == admin_crawl_id + assert crawls[-1]["reviewStatus"] == 5 + + # Test sorting on reviewStatus for all-crawls + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=reviewStatus", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + crawls = r.json()["items"] + assert crawls[0]["id"] == admin_crawl_id + assert crawls[0]["reviewStatus"] == 5 + + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=reviewStatus&sortDirection=1", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + crawls = r.json()["items"] + assert crawls[-1]["id"] == admin_crawl_id + assert crawls[-1]["reviewStatus"] == 5 + # Try to update to invalid reviewStatus r = requests.patch( f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}",