Support sorting by last QA started time (#1712)
To support #1683, it would be useful to be able to sort by 'last QA start time' in addition to/instead of last QA state. - make sorting consistent with workflow sorting - sortBy fields renamed to lastQAState and lastQAStarted - Current QA runs are now included in the lastQAState/lastQAStarted fields, rather than being separated out to different values --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
parent
b574f00d2b
commit
1844e761dc
@ -551,8 +551,6 @@ class BaseCrawlOps:
|
||||
{"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}},
|
||||
{"$set": {"firstSeed": "$firstSeedObject.url"}},
|
||||
{"$unset": ["firstSeedObject", "errors", "config"]},
|
||||
{"$set": {"qaState": "$qa.state"}},
|
||||
{"$set": {"activeQAState": "$qaState"}},
|
||||
{"$set": {"activeQAStats": "$qa.stats"}},
|
||||
{
|
||||
"$set": {
|
||||
@ -564,11 +562,23 @@ class BaseCrawlOps:
|
||||
}
|
||||
}
|
||||
},
|
||||
# Add active QA run to array if exists prior to sorting, taking care not to
|
||||
# pass null to $concatArrays so that our result isn't null
|
||||
{
|
||||
"$set": {
|
||||
"qaActiveArray": {"$cond": [{"$ne": ["$qa", None]}, ["$qa"], []]}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$set": {
|
||||
"qaArray": {"$concatArrays": ["$qaFinishedArray", "$qaActiveArray"]}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$set": {
|
||||
"sortedQARuns": {
|
||||
"$sortArray": {
|
||||
"input": "$qaFinishedArray",
|
||||
"input": "$qaArray",
|
||||
"sortBy": {"started": -1},
|
||||
}
|
||||
}
|
||||
@ -576,13 +586,14 @@ class BaseCrawlOps:
|
||||
},
|
||||
{"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}},
|
||||
{"$set": {"lastQAState": "$lastQARun.state"}},
|
||||
{"$set": {"lastQAStarted": "$lastQARun.started"}},
|
||||
{
|
||||
"$set": {
|
||||
"qaRunCount": {
|
||||
"$size": {
|
||||
"$cond": [
|
||||
{"$isArray": "$qaFinishedArray"},
|
||||
"$qaFinishedArray",
|
||||
{"$isArray": "$qaArray"},
|
||||
"$qaArray",
|
||||
[],
|
||||
]
|
||||
}
|
||||
@ -592,7 +603,9 @@ class BaseCrawlOps:
|
||||
{
|
||||
"$unset": [
|
||||
"lastQARun",
|
||||
"qaActiveArray",
|
||||
"qaFinishedArray",
|
||||
"qaArray",
|
||||
"sortedQARuns",
|
||||
]
|
||||
},
|
||||
@ -619,8 +632,9 @@ class BaseCrawlOps:
|
||||
"finished",
|
||||
"fileSize",
|
||||
"reviewStatus",
|
||||
"lastQAStarted",
|
||||
"lastQAState",
|
||||
"qaRunCount",
|
||||
"qaState",
|
||||
):
|
||||
raise HTTPException(status_code=400, detail="invalid_sort_by")
|
||||
if sort_direction not in (1, -1):
|
||||
@ -628,10 +642,8 @@ class BaseCrawlOps:
|
||||
|
||||
sort_query = {sort_by: sort_direction}
|
||||
|
||||
# Secondary sort for qaState - sorted by current, then last
|
||||
# Tertiary sort for qaState - type, always ascending so crawls are first
|
||||
if sort_by == "qaState":
|
||||
sort_query["lastQAState"] = sort_direction
|
||||
# Ensure crawls are always sorted first for QA-related sorts
|
||||
if sort_by in ("lastQAStarted", "lastQAState"):
|
||||
sort_query["type"] = 1
|
||||
|
||||
aggregate.extend([{"$sort": sort_query}])
|
||||
|
@ -169,8 +169,6 @@ class CrawlOps(BaseCrawlOps):
|
||||
{"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}},
|
||||
{"$set": {"firstSeed": "$firstSeedObject.url"}},
|
||||
{"$unset": ["firstSeedObject", "errors", "config"]},
|
||||
{"$set": {"qaState": "$qa.state"}},
|
||||
{"$set": {"activeQAState": "$qaState"}},
|
||||
{"$set": {"activeQAStats": "$qa.stats"}},
|
||||
{
|
||||
"$set": {
|
||||
@ -182,11 +180,23 @@ class CrawlOps(BaseCrawlOps):
|
||||
}
|
||||
}
|
||||
},
|
||||
# Add active QA run to array if exists prior to sorting, taking care not to
|
||||
# pass null to $concatArrays so that our result isn't null
|
||||
{
|
||||
"$set": {
|
||||
"qaActiveArray": {"$cond": [{"$ne": ["$qa", None]}, ["$qa"], []]}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$set": {
|
||||
"qaArray": {"$concatArrays": ["$qaFinishedArray", "$qaActiveArray"]}
|
||||
}
|
||||
},
|
||||
{
|
||||
"$set": {
|
||||
"sortedQARuns": {
|
||||
"$sortArray": {
|
||||
"input": "$qaFinishedArray",
|
||||
"input": "$qaArray",
|
||||
"sortBy": {"started": -1},
|
||||
}
|
||||
}
|
||||
@ -194,13 +204,14 @@ class CrawlOps(BaseCrawlOps):
|
||||
},
|
||||
{"$set": {"lastQARun": {"$arrayElemAt": ["$sortedQARuns", 0]}}},
|
||||
{"$set": {"lastQAState": "$lastQARun.state"}},
|
||||
{"$set": {"lastQAStarted": "$lastQARun.started"}},
|
||||
{
|
||||
"$set": {
|
||||
"qaRunCount": {
|
||||
"$size": {
|
||||
"$cond": [
|
||||
{"$isArray": "$qaFinishedArray"},
|
||||
"$qaFinishedArray",
|
||||
{"$isArray": "$qaArray"},
|
||||
"$qaArray",
|
||||
[],
|
||||
]
|
||||
}
|
||||
@ -210,7 +221,9 @@ class CrawlOps(BaseCrawlOps):
|
||||
{
|
||||
"$unset": [
|
||||
"lastQARun",
|
||||
"qaActiveArray",
|
||||
"qaFinishedArray",
|
||||
"qaArray",
|
||||
"sortedQARuns",
|
||||
]
|
||||
},
|
||||
@ -239,19 +252,14 @@ class CrawlOps(BaseCrawlOps):
|
||||
"firstSeed",
|
||||
"reviewStatus",
|
||||
"qaRunCount",
|
||||
"qaState",
|
||||
"lastQAState",
|
||||
"lastQAStarted",
|
||||
):
|
||||
raise HTTPException(status_code=400, detail="invalid_sort_by")
|
||||
if sort_direction not in (1, -1):
|
||||
raise HTTPException(status_code=400, detail="invalid_sort_direction")
|
||||
|
||||
sort_query = {sort_by: sort_direction}
|
||||
|
||||
# Add secondary sort for qaState - sorted by current, then last
|
||||
if sort_by == "qaState":
|
||||
sort_query["lastQAState"] = sort_direction
|
||||
|
||||
aggregate.extend([{"$sort": sort_query}])
|
||||
aggregate.extend([{"$sort": {sort_by: sort_direction}}])
|
||||
|
||||
aggregate.extend(
|
||||
[
|
||||
|
@ -660,9 +660,9 @@ class CrawlOut(BaseMongoModel):
|
||||
reviewStatus: Optional[conint(ge=1, le=5)] = None # type: ignore
|
||||
|
||||
qaRunCount: int = 0
|
||||
activeQAState: Optional[str]
|
||||
activeQAStats: Optional[CrawlStats]
|
||||
lastQAState: Optional[str]
|
||||
lastQAStarted: Optional[datetime]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
|
@ -116,29 +116,53 @@ def failed_qa_run_id(crawler_crawl_id, crawler_auth_headers, default_org_id):
|
||||
assert qa["started"]
|
||||
assert not qa["finished"]
|
||||
|
||||
# Ensure sorting by qaState works as expected - current floated to top
|
||||
# Ensure sorting by lastQAState works as expected - current floated to top
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=qaState",
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=lastQAState",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
crawls = r.json()["items"]
|
||||
assert crawls[0]["id"] == crawler_crawl_id
|
||||
assert crawls[0]["activeQAState"]
|
||||
assert crawls[0]["activeQAStats"]
|
||||
assert crawls[0]["lastQAState"]
|
||||
assert crawls[0]["lastQAStarted"]
|
||||
|
||||
# Ensure sorting by qaState works as expected with all-crawls
|
||||
# Ensure sorting by lastQAState works as expected with all-crawls
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaState",
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=lastQAState",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
crawls = r.json()["items"]
|
||||
assert crawls[0]["id"] == crawler_crawl_id
|
||||
assert crawls[0]["activeQAState"]
|
||||
assert crawls[0]["activeQAStats"]
|
||||
assert crawls[0]["lastQAState"]
|
||||
assert crawls[0]["lastQAStarted"]
|
||||
|
||||
# Ensure sorting by lastQAStarted works as expected - current floated to top
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls?sortBy=lastQAStarted",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
crawls = r.json()["items"]
|
||||
assert crawls[0]["id"] == crawler_crawl_id
|
||||
assert crawls[0]["activeQAStats"]
|
||||
assert crawls[0]["lastQAState"]
|
||||
assert crawls[0]["lastQAStarted"]
|
||||
|
||||
# Ensure sorting by lastQAState works as expected with all-crawls
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=lastQAStarted",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
crawls = r.json()["items"]
|
||||
assert crawls[0]["id"] == crawler_crawl_id
|
||||
assert crawls[0]["activeQAStats"]
|
||||
assert crawls[0]["lastQAState"]
|
||||
assert crawls[0]["lastQAStarted"]
|
||||
|
||||
# Cancel crawl
|
||||
r = requests.post(
|
||||
|
@ -419,9 +419,22 @@ def test_list_all_crawls(
|
||||
assert item["finished"]
|
||||
assert item["state"]
|
||||
|
||||
# Test that all-crawls qaState sort always puts crawls before uploads
|
||||
# Test that all-crawls lastQAState and lastQAStarted sorts always puts crawls before uploads
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=qaState",
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=lastQAState",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
|
||||
last_type = None
|
||||
for item in data["items"]:
|
||||
if last_type == "upload":
|
||||
assert item["type"] != "crawl"
|
||||
last_type = item["type"]
|
||||
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=lastQAStarted",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
|
Loading…
Reference in New Issue
Block a user