Add reviewStatus, qaState, and qaRunCount sort options to crawls/all-crawls list endpoints (#1686)
Backend work for #1672 Adds new sort options to /crawls and /all-crawls GET list endpoints: - `reviewStatus` - `qaRunCount`: number of completed QA runs for crawl (also added to CrawlOut) - `qaState` (sorts by `activeQAState` first, then `lastQAState`, both of which are added to CrawlOut)
This commit is contained in:
parent
87e0873f1a
commit
c800da1732
@ -551,6 +551,50 @@ class BaseCrawlOps:
|
||||
{"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}},
|
||||
{"$set": {"firstSeed": "$firstSeedObject.url"}},
|
||||
{"$unset": ["firstSeedObject", "errors", "config"]},
|
||||
{"$set": {"qaState": "$qa.state"}},
|
||||
{"$set": {"activeQAState": "$qaState"}},
|
||||
{
|
||||
"$set": {
|
||||
"qaFinishedArray": {
|
||||
"$map": {
|
||||
"input": {"$objectToArray": "$qaFinished"},
|
||||
"in": "$$this.v",
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$set": {
|
||||
"sortedQARuns": {
|
||||
"$sortArray": {
|
||||
"input": "$qaFinishedArray",
|
||||
"sortBy": {"started": -1},
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}},
|
||||
{"$set": {"lastQAState": "$lastQARun.state"}},
|
||||
{
|
||||
"$set": {
|
||||
"qaRunCount": {
|
||||
"$size": {
|
||||
"$cond": [
|
||||
{"$isArray": "$qaFinishedArray"},
|
||||
"$qaFinishedArray",
|
||||
[],
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$unset": [
|
||||
"lastQARun",
|
||||
"qaFinishedArray",
|
||||
"sortedQARuns",
|
||||
]
|
||||
},
|
||||
]
|
||||
|
||||
if not resources:
|
||||
@ -569,12 +613,25 @@ class BaseCrawlOps:
|
||||
aggregate.extend([{"$match": {"collectionIds": {"$in": [collection_id]}}}])
|
||||
|
||||
if sort_by:
|
||||
if sort_by not in ("started", "finished", "fileSize"):
|
||||
if sort_by not in (
|
||||
"started",
|
||||
"finished",
|
||||
"fileSize",
|
||||
"reviewStatus",
|
||||
"qaRunCount",
|
||||
"qaState",
|
||||
):
|
||||
raise HTTPException(status_code=400, detail="invalid_sort_by")
|
||||
if sort_direction not in (1, -1):
|
||||
raise HTTPException(status_code=400, detail="invalid_sort_direction")
|
||||
|
||||
aggregate.extend([{"$sort": {sort_by: sort_direction}}])
|
||||
sort_query = {sort_by: sort_direction}
|
||||
|
||||
# Add secondary sort for qaState - sorted by current, then last
|
||||
if sort_by == "qaState":
|
||||
sort_query["lastQAState"] = sort_direction
|
||||
|
||||
aggregate.extend([{"$sort": sort_query}])
|
||||
|
||||
aggregate.extend(
|
||||
[
|
||||
|
@ -165,6 +165,50 @@ class CrawlOps(BaseCrawlOps):
|
||||
{"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}},
|
||||
{"$set": {"firstSeed": "$firstSeedObject.url"}},
|
||||
{"$unset": ["firstSeedObject", "errors", "config"]},
|
||||
{"$set": {"qaState": "$qa.state"}},
|
||||
{"$set": {"activeQAState": "$qaState"}},
|
||||
{
|
||||
"$set": {
|
||||
"qaFinishedArray": {
|
||||
"$map": {
|
||||
"input": {"$objectToArray": "$qaFinished"},
|
||||
"in": "$$this.v",
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$set": {
|
||||
"sortedQARuns": {
|
||||
"$sortArray": {
|
||||
"input": "$qaFinishedArray",
|
||||
"sortBy": {"started": -1},
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}},
|
||||
{"$set": {"lastQAState": "$lastQARun.state"}},
|
||||
{
|
||||
"$set": {
|
||||
"qaRunCount": {
|
||||
"$size": {
|
||||
"$cond": [
|
||||
{"$isArray": "$qaFinishedArray"},
|
||||
"$qaFinishedArray",
|
||||
[],
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$unset": [
|
||||
"lastQARun",
|
||||
"qaFinishedArray",
|
||||
"sortedQARuns",
|
||||
]
|
||||
},
|
||||
]
|
||||
|
||||
if not resources:
|
||||
@ -188,12 +232,21 @@ class CrawlOps(BaseCrawlOps):
|
||||
"finished",
|
||||
"fileSize",
|
||||
"firstSeed",
|
||||
"reviewStatus",
|
||||
"qaRunCount",
|
||||
"qaState",
|
||||
):
|
||||
raise HTTPException(status_code=400, detail="invalid_sort_by")
|
||||
if sort_direction not in (1, -1):
|
||||
raise HTTPException(status_code=400, detail="invalid_sort_direction")
|
||||
|
||||
aggregate.extend([{"$sort": {sort_by: sort_direction}}])
|
||||
sort_query = {sort_by: sort_direction}
|
||||
|
||||
# Add secondary sort for qaState - sorted by current, then last
|
||||
if sort_by == "qaState":
|
||||
sort_query["lastQAState"] = sort_direction
|
||||
|
||||
aggregate.extend([{"$sort": sort_query}])
|
||||
|
||||
aggregate.extend(
|
||||
[
|
||||
|
@ -658,6 +658,10 @@ class CrawlOut(BaseMongoModel):
|
||||
|
||||
reviewStatus: Optional[conint(ge=1, le=5)] = None # type: ignore
|
||||
|
||||
qaRunCount: int = 0
|
||||
activeQAState: Optional[str]
|
||||
lastQAState: Optional[str]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class CrawlOutWithResources(CrawlOut):
|
||||
|
@ -116,6 +116,28 @@ def failed_qa_run_id(crawler_crawl_id, crawler_auth_headers, default_org_id):
|
||||
assert qa["started"]
|
||||
assert not qa["finished"]
|
||||
|
||||
# Ensure sorting by qaState works as expected - current floated to top
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaState",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
crawls = r.json()["items"]
|
||||
assert crawls[0]["id"] == crawler_crawl_id
|
||||
assert crawls[0]["activeQAState"]
|
||||
assert crawls[0]["lastQAState"]
|
||||
|
||||
# Ensure sorting by qaState works as expected with all-crawls
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaState",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
crawls = r.json()["items"]
|
||||
assert crawls[0]["id"] == crawler_crawl_id
|
||||
assert crawls[0]["activeQAState"]
|
||||
assert crawls[0]["lastQAState"]
|
||||
|
||||
# Cancel crawl
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/cancel",
|
||||
@ -340,6 +362,96 @@ def test_failed_qa_run(
|
||||
assert qa["crawlExecSeconds"] > 0
|
||||
|
||||
|
||||
def test_sort_crawls_by_qa_runs(
|
||||
crawler_crawl_id,
|
||||
crawler_auth_headers,
|
||||
default_org_id,
|
||||
failed_qa_run_id,
|
||||
qa_run_pages_ready,
|
||||
):
|
||||
# Test that sorting by qaRunCount works as expected
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaRunCount",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
crawls = r.json()["items"]
|
||||
|
||||
assert crawls[0]["id"] == crawler_crawl_id
|
||||
qa_run_count = crawls[0]["qaRunCount"]
|
||||
assert qa_run_count > 0
|
||||
|
||||
last_count = qa_run_count
|
||||
for crawl in crawls:
|
||||
if crawl["id"] == crawler_crawl_id:
|
||||
continue
|
||||
crawl_qa_count = crawl["qaRunCount"]
|
||||
assert isinstance(crawl_qa_count, int)
|
||||
assert crawl_qa_count <= last_count
|
||||
last_count = crawl_qa_count
|
||||
|
||||
# Test ascending sort
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaRunCount&sortDirection=1",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
crawls = r.json()["items"]
|
||||
|
||||
assert crawls[-1]["id"] == crawler_crawl_id
|
||||
assert crawls[-1]["qaRunCount"] > 0
|
||||
|
||||
last_count = 0
|
||||
for crawl in crawls:
|
||||
if crawl["id"] == crawler_crawl_id:
|
||||
continue
|
||||
crawl_qa_count = crawl["qaRunCount"]
|
||||
assert isinstance(crawl_qa_count, int)
|
||||
assert crawl_qa_count >= last_count
|
||||
last_count = crawl_qa_count
|
||||
|
||||
# Test same with all-crawls
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaRunCount",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
crawls = r.json()["items"]
|
||||
|
||||
assert crawls[0]["id"] == crawler_crawl_id
|
||||
qa_run_count = crawls[0]["qaRunCount"]
|
||||
assert qa_run_count > 0
|
||||
|
||||
last_count = qa_run_count
|
||||
for crawl in crawls:
|
||||
if crawl["id"] == crawler_crawl_id:
|
||||
continue
|
||||
crawl_qa_count = crawl["qaRunCount"]
|
||||
assert isinstance(crawl_qa_count, int)
|
||||
assert crawl_qa_count <= last_count
|
||||
last_count = crawl_qa_count
|
||||
|
||||
# Test ascending sort
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaRunCount&sortDirection=1",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
crawls = r.json()["items"]
|
||||
|
||||
assert crawls[-1]["id"] == crawler_crawl_id
|
||||
assert crawls[-1]["qaRunCount"] > 0
|
||||
|
||||
last_count = 0
|
||||
for crawl in crawls:
|
||||
if crawl["id"] == crawler_crawl_id:
|
||||
continue
|
||||
crawl_qa_count = crawl["qaRunCount"]
|
||||
assert isinstance(crawl_qa_count, int)
|
||||
assert crawl_qa_count >= last_count
|
||||
last_count = crawl_qa_count
|
||||
|
||||
|
||||
def test_delete_qa_runs(
|
||||
crawler_crawl_id,
|
||||
crawler_auth_headers,
|
||||
|
@ -303,6 +303,44 @@ def test_update_crawl(
|
||||
assert r.status_code == 200
|
||||
assert r.json()["reviewStatus"] == 5
|
||||
|
||||
# Test sorting on reviewStatus
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=reviewStatus",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
crawls = r.json()["items"]
|
||||
assert crawls[0]["id"] == admin_crawl_id
|
||||
assert crawls[0]["reviewStatus"] == 5
|
||||
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=reviewStatus&sortDirection=1",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
crawls = r.json()["items"]
|
||||
assert crawls[-1]["id"] == admin_crawl_id
|
||||
assert crawls[-1]["reviewStatus"] == 5
|
||||
|
||||
# Test sorting on reviewStatus for all-crawls
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=reviewStatus",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
crawls = r.json()["items"]
|
||||
assert crawls[0]["id"] == admin_crawl_id
|
||||
assert crawls[0]["reviewStatus"] == 5
|
||||
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=reviewStatus&sortDirection=1",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
crawls = r.json()["items"]
|
||||
assert crawls[-1]["id"] == admin_crawl_id
|
||||
assert crawls[-1]["reviewStatus"] == 5
|
||||
|
||||
# Try to update to invalid reviewStatus
|
||||
r = requests.patch(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}",
|
||||
|
Loading…
Reference in New Issue
Block a user