Track failed QA runs and include in list endpoint (#1650)

Fixes #1648 

- Tracks failed QA runs in database, not only successful ones
- Includes failed QA runs in list endpoint by default
- Adds `skipFailed` param to list endpoint to return only successful
runs

---------
Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
Tessa Walsh 2024-04-04 21:51:06 -04:00 committed by GitHub
parent 5c08c9679c
commit 4229b94736
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 129 additions and 12 deletions

View File

@ -40,6 +40,7 @@ from .models import (
PaginatedResponse,
RUNNING_AND_STARTING_STATES,
SUCCESSFUL_STATES,
NON_RUNNING_STATES,
ALL_CRAWL_STATES,
TYPE_ALL_CRAWL_STATES,
)
@ -726,7 +727,9 @@ class CrawlOps(BaseCrawlOps):
# pylint: disable=raise-missing-from
raise HTTPException(status_code=500, detail=f"Error starting crawl: {exc}")
async def stop_crawl_qa_run(self, crawl_id: str, org: Organization):
async def stop_crawl_qa_run(
self, crawl_id: str, org: Organization, graceful: bool = True
):
"""Stop crawl QA run, QA run removed when actually finished"""
crawl = await self.get_crawl(crawl_id, org)
@ -734,7 +737,9 @@ class CrawlOps(BaseCrawlOps):
raise HTTPException(status_code=400, detail="qa_not_running")
try:
result = await self.crawl_manager.shutdown_crawl(crawl.qa.id, graceful=True)
result = await self.crawl_manager.shutdown_crawl(
crawl.qa.id, graceful=graceful
)
if result.get("error") == "Not Found":
# treat as success, qa crawl no longer exists, so mark as no qa
@ -775,7 +780,7 @@ class CrawlOps(BaseCrawlOps):
query: Dict[str, Any] = {"qa": None}
if crawl.qa.finished and crawl.qa.state in SUCCESSFUL_STATES:
if crawl.qa.finished and crawl.qa.state in NON_RUNNING_STATES:
query[f"qaFinished.{crawl.qa.id}"] = crawl.qa.dict()
if await self.crawls.find_one_and_update(
@ -786,17 +791,28 @@ class CrawlOps(BaseCrawlOps):
return False
async def get_qa_runs(
self, crawl_id: str, org: Optional[Organization] = None
self,
crawl_id: str,
skip_failed: bool = False,
org: Optional[Organization] = None,
) -> List[QARunOut]:
"""Return list of QA runs"""
crawl_data = await self.get_crawl_raw(
crawl_id, org, "crawl", project={"qaFinished": True, "qa": True}
)
qa_finished = crawl_data.get("qaFinished") or {}
all_qa = [QARunOut(**qa_run_data) for qa_run_data in qa_finished.values()]
if skip_failed:
all_qa = [
QARunOut(**qa_run_data)
for qa_run_data in qa_finished.values()
if qa_run_data.get("state") in SUCCESSFUL_STATES
]
else:
all_qa = [QARunOut(**qa_run_data) for qa_run_data in qa_finished.values()]
all_qa.sort(key=lambda x: x.finished or dt_now(), reverse=True)
qa = crawl_data.get("qa")
if qa:
# ensure current QA run didn't just fail, just in case
if qa and (not skip_failed or qa.get("state") in SUCCESSFUL_STATES):
all_qa.insert(0, QARunOut(**qa))
return all_qa
@ -1061,6 +1077,13 @@ def init_crawls_api(crawl_manager: CrawlManager, app, user_dep, *args):
# pylint: disable=unused-argument
return await ops.stop_crawl_qa_run(crawl_id, org)
@app.post("/orgs/{oid}/crawls/{crawl_id}/qa/cancel", tags=["qa"])
async def cancel_crawl_qa_run(
crawl_id: str, org: Organization = Depends(org_crawl_dep)
):
# pylint: disable=unused-argument
return await ops.stop_crawl_qa_run(crawl_id, org, graceful=False)
@app.post("/orgs/{oid}/crawls/{crawl_id}/qa/delete", tags=["qa"])
async def delete_crawl_qa_runs(
crawl_id: str,
@ -1075,8 +1098,10 @@ def init_crawls_api(crawl_manager: CrawlManager, app, user_dep, *args):
tags=["qa"],
response_model=List[QARunOut],
)
async def get_qa_runs(crawl_id, org: Organization = Depends(org_viewer_dep)):
return await ops.get_qa_runs(crawl_id, org)
async def get_qa_runs(
crawl_id, org: Organization = Depends(org_viewer_dep), skipFailed: bool = False
):
return await ops.get_qa_runs(crawl_id, skip_failed=skipFailed, org=org)
@app.get(
"/orgs/{oid}/crawls/{crawl_id}/qa/activeQA",

View File

@ -3,7 +3,9 @@ import requests
import time
from datetime import datetime
qa_run_id = None
failed_qa_run_id = None
def test_run_qa(crawler_crawl_id, crawler_auth_headers, default_org_id):
@ -182,15 +184,105 @@ def test_run_qa_not_running(crawler_crawl_id, crawler_auth_headers, default_org_
assert r.json()["detail"] == "qa_not_running"
def test_delete_qa_run(crawler_crawl_id, crawler_auth_headers, default_org_id):
def test_fail_running_qa(crawler_crawl_id, crawler_auth_headers, default_org_id):
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/delete",
json={"qa_run_ids": [qa_run_id]},
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/start",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["deleted"] == True
data = r.json()
assert data["started"]
global failed_qa_run_id
failed_qa_run_id = data["started"]
# Wait until it's properly running
count = 0
while count < 24:
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/activeQA",
headers=crawler_auth_headers,
)
data = r.json()
if data.get("qa") and data["qa"].get("state") == "running":
break
time.sleep(5)
count += 1
# Cancel crawl
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/cancel",
headers=crawler_auth_headers,
)
assert r.status_code == 200
# Wait until it stops with canceled state
count = 0
while count < 24:
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/activeQA",
headers=crawler_auth_headers,
)
data = r.json()
if data.get("qa") and data["qa"].get("state") == "canceled":
break
time.sleep(5)
count += 1
# Ensure failed QA run is included in list endpoint
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa",
headers=crawler_auth_headers,
)
data = r.json()
assert len(data) == 2
failed_run = [qa_run for qa_run in data if qa_run.get("id") == failed_qa_run_id][0]
assert failed_run
assert failed_run["state"] == "canceled"
assert failed_run["started"]
assert failed_run["finished"]
assert failed_run["stats"]
assert failed_run["crawlExecSeconds"] >= 0
# Ensure failed QA run not included in list endpoint with skipFailed param
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa?skipFailed=true",
headers=crawler_auth_headers,
)
data = r.json()
assert len(data) == 1
qa = data[0]
assert qa
assert qa["state"] == "complete"
assert qa["started"]
assert qa["finished"]
assert qa["stats"]["found"] == 1
assert qa["stats"]["done"] == 1
assert qa["crawlExecSeconds"] > 0
def test_delete_qa_run(crawler_crawl_id, crawler_auth_headers, default_org_id):
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/delete",
json={"qa_run_ids": [qa_run_id, failed_qa_run_id]},
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["deleted"] == 2
time.sleep(5)
# deleted from finished qa list
r = requests.get(