Add reviewStatus, qaState, and qaRunCount sort options to crawls/all-crawls list endpoints (#1686)
Backend work for #1672 Adds new sort options to /crawls and /all-crawls GET list endpoints: - `reviewStatus` - `qaRunCount`: number of completed QA runs for crawl (also added to CrawlOut) - `qaState` (sorts by `activeQAState` first, then `lastQAState`, both of which are added to CrawlOut)
This commit is contained in:
parent
87e0873f1a
commit
c800da1732
@ -551,6 +551,50 @@ class BaseCrawlOps:
|
|||||||
{"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}},
|
{"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}},
|
||||||
{"$set": {"firstSeed": "$firstSeedObject.url"}},
|
{"$set": {"firstSeed": "$firstSeedObject.url"}},
|
||||||
{"$unset": ["firstSeedObject", "errors", "config"]},
|
{"$unset": ["firstSeedObject", "errors", "config"]},
|
||||||
|
{"$set": {"qaState": "$qa.state"}},
|
||||||
|
{"$set": {"activeQAState": "$qaState"}},
|
||||||
|
{
|
||||||
|
"$set": {
|
||||||
|
"qaFinishedArray": {
|
||||||
|
"$map": {
|
||||||
|
"input": {"$objectToArray": "$qaFinished"},
|
||||||
|
"in": "$$this.v",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$set": {
|
||||||
|
"sortedQARuns": {
|
||||||
|
"$sortArray": {
|
||||||
|
"input": "$qaFinishedArray",
|
||||||
|
"sortBy": {"started": -1},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}},
|
||||||
|
{"$set": {"lastQAState": "$lastQARun.state"}},
|
||||||
|
{
|
||||||
|
"$set": {
|
||||||
|
"qaRunCount": {
|
||||||
|
"$size": {
|
||||||
|
"$cond": [
|
||||||
|
{"$isArray": "$qaFinishedArray"},
|
||||||
|
"$qaFinishedArray",
|
||||||
|
[],
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$unset": [
|
||||||
|
"lastQARun",
|
||||||
|
"qaFinishedArray",
|
||||||
|
"sortedQARuns",
|
||||||
|
]
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
if not resources:
|
if not resources:
|
||||||
@ -569,12 +613,25 @@ class BaseCrawlOps:
|
|||||||
aggregate.extend([{"$match": {"collectionIds": {"$in": [collection_id]}}}])
|
aggregate.extend([{"$match": {"collectionIds": {"$in": [collection_id]}}}])
|
||||||
|
|
||||||
if sort_by:
|
if sort_by:
|
||||||
if sort_by not in ("started", "finished", "fileSize"):
|
if sort_by not in (
|
||||||
|
"started",
|
||||||
|
"finished",
|
||||||
|
"fileSize",
|
||||||
|
"reviewStatus",
|
||||||
|
"qaRunCount",
|
||||||
|
"qaState",
|
||||||
|
):
|
||||||
raise HTTPException(status_code=400, detail="invalid_sort_by")
|
raise HTTPException(status_code=400, detail="invalid_sort_by")
|
||||||
if sort_direction not in (1, -1):
|
if sort_direction not in (1, -1):
|
||||||
raise HTTPException(status_code=400, detail="invalid_sort_direction")
|
raise HTTPException(status_code=400, detail="invalid_sort_direction")
|
||||||
|
|
||||||
aggregate.extend([{"$sort": {sort_by: sort_direction}}])
|
sort_query = {sort_by: sort_direction}
|
||||||
|
|
||||||
|
# Add secondary sort for qaState - sorted by current, then last
|
||||||
|
if sort_by == "qaState":
|
||||||
|
sort_query["lastQAState"] = sort_direction
|
||||||
|
|
||||||
|
aggregate.extend([{"$sort": sort_query}])
|
||||||
|
|
||||||
aggregate.extend(
|
aggregate.extend(
|
||||||
[
|
[
|
||||||
|
@ -165,6 +165,50 @@ class CrawlOps(BaseCrawlOps):
|
|||||||
{"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}},
|
{"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}},
|
||||||
{"$set": {"firstSeed": "$firstSeedObject.url"}},
|
{"$set": {"firstSeed": "$firstSeedObject.url"}},
|
||||||
{"$unset": ["firstSeedObject", "errors", "config"]},
|
{"$unset": ["firstSeedObject", "errors", "config"]},
|
||||||
|
{"$set": {"qaState": "$qa.state"}},
|
||||||
|
{"$set": {"activeQAState": "$qaState"}},
|
||||||
|
{
|
||||||
|
"$set": {
|
||||||
|
"qaFinishedArray": {
|
||||||
|
"$map": {
|
||||||
|
"input": {"$objectToArray": "$qaFinished"},
|
||||||
|
"in": "$$this.v",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$set": {
|
||||||
|
"sortedQARuns": {
|
||||||
|
"$sortArray": {
|
||||||
|
"input": "$qaFinishedArray",
|
||||||
|
"sortBy": {"started": -1},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}},
|
||||||
|
{"$set": {"lastQAState": "$lastQARun.state"}},
|
||||||
|
{
|
||||||
|
"$set": {
|
||||||
|
"qaRunCount": {
|
||||||
|
"$size": {
|
||||||
|
"$cond": [
|
||||||
|
{"$isArray": "$qaFinishedArray"},
|
||||||
|
"$qaFinishedArray",
|
||||||
|
[],
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$unset": [
|
||||||
|
"lastQARun",
|
||||||
|
"qaFinishedArray",
|
||||||
|
"sortedQARuns",
|
||||||
|
]
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
if not resources:
|
if not resources:
|
||||||
@ -188,12 +232,21 @@ class CrawlOps(BaseCrawlOps):
|
|||||||
"finished",
|
"finished",
|
||||||
"fileSize",
|
"fileSize",
|
||||||
"firstSeed",
|
"firstSeed",
|
||||||
|
"reviewStatus",
|
||||||
|
"qaRunCount",
|
||||||
|
"qaState",
|
||||||
):
|
):
|
||||||
raise HTTPException(status_code=400, detail="invalid_sort_by")
|
raise HTTPException(status_code=400, detail="invalid_sort_by")
|
||||||
if sort_direction not in (1, -1):
|
if sort_direction not in (1, -1):
|
||||||
raise HTTPException(status_code=400, detail="invalid_sort_direction")
|
raise HTTPException(status_code=400, detail="invalid_sort_direction")
|
||||||
|
|
||||||
aggregate.extend([{"$sort": {sort_by: sort_direction}}])
|
sort_query = {sort_by: sort_direction}
|
||||||
|
|
||||||
|
# Add secondary sort for qaState - sorted by current, then last
|
||||||
|
if sort_by == "qaState":
|
||||||
|
sort_query["lastQAState"] = sort_direction
|
||||||
|
|
||||||
|
aggregate.extend([{"$sort": sort_query}])
|
||||||
|
|
||||||
aggregate.extend(
|
aggregate.extend(
|
||||||
[
|
[
|
||||||
|
@ -658,6 +658,10 @@ class CrawlOut(BaseMongoModel):
|
|||||||
|
|
||||||
reviewStatus: Optional[conint(ge=1, le=5)] = None # type: ignore
|
reviewStatus: Optional[conint(ge=1, le=5)] = None # type: ignore
|
||||||
|
|
||||||
|
qaRunCount: int = 0
|
||||||
|
activeQAState: Optional[str]
|
||||||
|
lastQAState: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class CrawlOutWithResources(CrawlOut):
|
class CrawlOutWithResources(CrawlOut):
|
||||||
|
@ -116,6 +116,28 @@ def failed_qa_run_id(crawler_crawl_id, crawler_auth_headers, default_org_id):
|
|||||||
assert qa["started"]
|
assert qa["started"]
|
||||||
assert not qa["finished"]
|
assert not qa["finished"]
|
||||||
|
|
||||||
|
# Ensure sorting by qaState works as expected - current floated to top
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaState",
|
||||||
|
headers=crawler_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
crawls = r.json()["items"]
|
||||||
|
assert crawls[0]["id"] == crawler_crawl_id
|
||||||
|
assert crawls[0]["activeQAState"]
|
||||||
|
assert crawls[0]["lastQAState"]
|
||||||
|
|
||||||
|
# Ensure sorting by qaState works as expected with all-crawls
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaState",
|
||||||
|
headers=crawler_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
crawls = r.json()["items"]
|
||||||
|
assert crawls[0]["id"] == crawler_crawl_id
|
||||||
|
assert crawls[0]["activeQAState"]
|
||||||
|
assert crawls[0]["lastQAState"]
|
||||||
|
|
||||||
# Cancel crawl
|
# Cancel crawl
|
||||||
r = requests.post(
|
r = requests.post(
|
||||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/cancel",
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/cancel",
|
||||||
@ -340,6 +362,96 @@ def test_failed_qa_run(
|
|||||||
assert qa["crawlExecSeconds"] > 0
|
assert qa["crawlExecSeconds"] > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_sort_crawls_by_qa_runs(
|
||||||
|
crawler_crawl_id,
|
||||||
|
crawler_auth_headers,
|
||||||
|
default_org_id,
|
||||||
|
failed_qa_run_id,
|
||||||
|
qa_run_pages_ready,
|
||||||
|
):
|
||||||
|
# Test that sorting by qaRunCount works as expected
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaRunCount",
|
||||||
|
headers=crawler_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
crawls = r.json()["items"]
|
||||||
|
|
||||||
|
assert crawls[0]["id"] == crawler_crawl_id
|
||||||
|
qa_run_count = crawls[0]["qaRunCount"]
|
||||||
|
assert qa_run_count > 0
|
||||||
|
|
||||||
|
last_count = qa_run_count
|
||||||
|
for crawl in crawls:
|
||||||
|
if crawl["id"] == crawler_crawl_id:
|
||||||
|
continue
|
||||||
|
crawl_qa_count = crawl["qaRunCount"]
|
||||||
|
assert isinstance(crawl_qa_count, int)
|
||||||
|
assert crawl_qa_count <= last_count
|
||||||
|
last_count = crawl_qa_count
|
||||||
|
|
||||||
|
# Test ascending sort
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaRunCount&sortDirection=1",
|
||||||
|
headers=crawler_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
crawls = r.json()["items"]
|
||||||
|
|
||||||
|
assert crawls[-1]["id"] == crawler_crawl_id
|
||||||
|
assert crawls[-1]["qaRunCount"] > 0
|
||||||
|
|
||||||
|
last_count = 0
|
||||||
|
for crawl in crawls:
|
||||||
|
if crawl["id"] == crawler_crawl_id:
|
||||||
|
continue
|
||||||
|
crawl_qa_count = crawl["qaRunCount"]
|
||||||
|
assert isinstance(crawl_qa_count, int)
|
||||||
|
assert crawl_qa_count >= last_count
|
||||||
|
last_count = crawl_qa_count
|
||||||
|
|
||||||
|
# Test same with all-crawls
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaRunCount",
|
||||||
|
headers=crawler_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
crawls = r.json()["items"]
|
||||||
|
|
||||||
|
assert crawls[0]["id"] == crawler_crawl_id
|
||||||
|
qa_run_count = crawls[0]["qaRunCount"]
|
||||||
|
assert qa_run_count > 0
|
||||||
|
|
||||||
|
last_count = qa_run_count
|
||||||
|
for crawl in crawls:
|
||||||
|
if crawl["id"] == crawler_crawl_id:
|
||||||
|
continue
|
||||||
|
crawl_qa_count = crawl["qaRunCount"]
|
||||||
|
assert isinstance(crawl_qa_count, int)
|
||||||
|
assert crawl_qa_count <= last_count
|
||||||
|
last_count = crawl_qa_count
|
||||||
|
|
||||||
|
# Test ascending sort
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaRunCount&sortDirection=1",
|
||||||
|
headers=crawler_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
crawls = r.json()["items"]
|
||||||
|
|
||||||
|
assert crawls[-1]["id"] == crawler_crawl_id
|
||||||
|
assert crawls[-1]["qaRunCount"] > 0
|
||||||
|
|
||||||
|
last_count = 0
|
||||||
|
for crawl in crawls:
|
||||||
|
if crawl["id"] == crawler_crawl_id:
|
||||||
|
continue
|
||||||
|
crawl_qa_count = crawl["qaRunCount"]
|
||||||
|
assert isinstance(crawl_qa_count, int)
|
||||||
|
assert crawl_qa_count >= last_count
|
||||||
|
last_count = crawl_qa_count
|
||||||
|
|
||||||
|
|
||||||
def test_delete_qa_runs(
|
def test_delete_qa_runs(
|
||||||
crawler_crawl_id,
|
crawler_crawl_id,
|
||||||
crawler_auth_headers,
|
crawler_auth_headers,
|
||||||
|
@ -303,6 +303,44 @@ def test_update_crawl(
|
|||||||
assert r.status_code == 200
|
assert r.status_code == 200
|
||||||
assert r.json()["reviewStatus"] == 5
|
assert r.json()["reviewStatus"] == 5
|
||||||
|
|
||||||
|
# Test sorting on reviewStatus
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=reviewStatus",
|
||||||
|
headers=admin_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
crawls = r.json()["items"]
|
||||||
|
assert crawls[0]["id"] == admin_crawl_id
|
||||||
|
assert crawls[0]["reviewStatus"] == 5
|
||||||
|
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=reviewStatus&sortDirection=1",
|
||||||
|
headers=admin_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
crawls = r.json()["items"]
|
||||||
|
assert crawls[-1]["id"] == admin_crawl_id
|
||||||
|
assert crawls[-1]["reviewStatus"] == 5
|
||||||
|
|
||||||
|
# Test sorting on reviewStatus for all-crawls
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=reviewStatus",
|
||||||
|
headers=admin_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
crawls = r.json()["items"]
|
||||||
|
assert crawls[0]["id"] == admin_crawl_id
|
||||||
|
assert crawls[0]["reviewStatus"] == 5
|
||||||
|
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=reviewStatus&sortDirection=1",
|
||||||
|
headers=admin_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
crawls = r.json()["items"]
|
||||||
|
assert crawls[-1]["id"] == admin_crawl_id
|
||||||
|
assert crawls[-1]["reviewStatus"] == 5
|
||||||
|
|
||||||
# Try to update to invalid reviewStatus
|
# Try to update to invalid reviewStatus
|
||||||
r = requests.patch(
|
r = requests.patch(
|
||||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}",
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}",
|
||||||
|
Loading…
Reference in New Issue
Block a user