Track failed QA runs and include in list endpoint (#1650)
Fixes #1648 - Tracks failed QA runs in database, not only successful ones - Includes failed QA runs in list endpoint by default - Adds `skipFailed` param to list endpoint to return only successful runs --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
		
							parent
							
								
									5c08c9679c
								
							
						
					
					
						commit
						4229b94736
					
				| @ -40,6 +40,7 @@ from .models import ( | ||||
|     PaginatedResponse, | ||||
|     RUNNING_AND_STARTING_STATES, | ||||
|     SUCCESSFUL_STATES, | ||||
|     NON_RUNNING_STATES, | ||||
|     ALL_CRAWL_STATES, | ||||
|     TYPE_ALL_CRAWL_STATES, | ||||
| ) | ||||
| @ -726,7 +727,9 @@ class CrawlOps(BaseCrawlOps): | ||||
|             # pylint: disable=raise-missing-from | ||||
|             raise HTTPException(status_code=500, detail=f"Error starting crawl: {exc}") | ||||
| 
 | ||||
|     async def stop_crawl_qa_run(self, crawl_id: str, org: Organization): | ||||
|     async def stop_crawl_qa_run( | ||||
|         self, crawl_id: str, org: Organization, graceful: bool = True | ||||
|     ): | ||||
|         """Stop crawl QA run, QA run removed when actually finished""" | ||||
|         crawl = await self.get_crawl(crawl_id, org) | ||||
| 
 | ||||
| @ -734,7 +737,9 @@ class CrawlOps(BaseCrawlOps): | ||||
|             raise HTTPException(status_code=400, detail="qa_not_running") | ||||
| 
 | ||||
|         try: | ||||
|             result = await self.crawl_manager.shutdown_crawl(crawl.qa.id, graceful=True) | ||||
|             result = await self.crawl_manager.shutdown_crawl( | ||||
|                 crawl.qa.id, graceful=graceful | ||||
|             ) | ||||
| 
 | ||||
|             if result.get("error") == "Not Found": | ||||
|                 # treat as success, qa crawl no longer exists, so mark as no qa | ||||
| @ -775,7 +780,7 @@ class CrawlOps(BaseCrawlOps): | ||||
| 
 | ||||
|         query: Dict[str, Any] = {"qa": None} | ||||
| 
 | ||||
|         if crawl.qa.finished and crawl.qa.state in SUCCESSFUL_STATES: | ||||
|         if crawl.qa.finished and crawl.qa.state in NON_RUNNING_STATES: | ||||
|             query[f"qaFinished.{crawl.qa.id}"] = crawl.qa.dict() | ||||
| 
 | ||||
|         if await self.crawls.find_one_and_update( | ||||
| @ -786,17 +791,28 @@ class CrawlOps(BaseCrawlOps): | ||||
|         return False | ||||
| 
 | ||||
|     async def get_qa_runs( | ||||
|         self, crawl_id: str, org: Optional[Organization] = None | ||||
|         self, | ||||
|         crawl_id: str, | ||||
|         skip_failed: bool = False, | ||||
|         org: Optional[Organization] = None, | ||||
|     ) -> List[QARunOut]: | ||||
|         """Return list of QA runs""" | ||||
|         crawl_data = await self.get_crawl_raw( | ||||
|             crawl_id, org, "crawl", project={"qaFinished": True, "qa": True} | ||||
|         ) | ||||
|         qa_finished = crawl_data.get("qaFinished") or {} | ||||
|         if skip_failed: | ||||
|             all_qa = [ | ||||
|                 QARunOut(**qa_run_data) | ||||
|                 for qa_run_data in qa_finished.values() | ||||
|                 if qa_run_data.get("state") in SUCCESSFUL_STATES | ||||
|             ] | ||||
|         else: | ||||
|             all_qa = [QARunOut(**qa_run_data) for qa_run_data in qa_finished.values()] | ||||
|         all_qa.sort(key=lambda x: x.finished or dt_now(), reverse=True) | ||||
|         qa = crawl_data.get("qa") | ||||
|         if qa: | ||||
|         # ensure current QA run didn't just fail, just in case | ||||
|         if qa and (not skip_failed or qa.get("state") in SUCCESSFUL_STATES): | ||||
|             all_qa.insert(0, QARunOut(**qa)) | ||||
|         return all_qa | ||||
| 
 | ||||
| @ -1061,6 +1077,13 @@ def init_crawls_api(crawl_manager: CrawlManager, app, user_dep, *args): | ||||
|         # pylint: disable=unused-argument | ||||
|         return await ops.stop_crawl_qa_run(crawl_id, org) | ||||
| 
 | ||||
|     @app.post("/orgs/{oid}/crawls/{crawl_id}/qa/cancel", tags=["qa"]) | ||||
|     async def cancel_crawl_qa_run( | ||||
|         crawl_id: str, org: Organization = Depends(org_crawl_dep) | ||||
|     ): | ||||
|         # pylint: disable=unused-argument | ||||
|         return await ops.stop_crawl_qa_run(crawl_id, org, graceful=False) | ||||
| 
 | ||||
|     @app.post("/orgs/{oid}/crawls/{crawl_id}/qa/delete", tags=["qa"]) | ||||
|     async def delete_crawl_qa_runs( | ||||
|         crawl_id: str, | ||||
| @ -1075,8 +1098,10 @@ def init_crawls_api(crawl_manager: CrawlManager, app, user_dep, *args): | ||||
|         tags=["qa"], | ||||
|         response_model=List[QARunOut], | ||||
|     ) | ||||
|     async def get_qa_runs(crawl_id, org: Organization = Depends(org_viewer_dep)): | ||||
|         return await ops.get_qa_runs(crawl_id, org) | ||||
|     async def get_qa_runs( | ||||
|         crawl_id, org: Organization = Depends(org_viewer_dep), skipFailed: bool = False | ||||
|     ): | ||||
|         return await ops.get_qa_runs(crawl_id, skip_failed=skipFailed, org=org) | ||||
| 
 | ||||
|     @app.get( | ||||
|         "/orgs/{oid}/crawls/{crawl_id}/qa/activeQA", | ||||
|  | ||||
| @ -3,7 +3,9 @@ import requests | ||||
| import time | ||||
| from datetime import datetime | ||||
| 
 | ||||
| 
 | ||||
| qa_run_id = None | ||||
| failed_qa_run_id = None | ||||
| 
 | ||||
| 
 | ||||
| def test_run_qa(crawler_crawl_id, crawler_auth_headers, default_org_id): | ||||
| @ -182,15 +184,105 @@ def test_run_qa_not_running(crawler_crawl_id, crawler_auth_headers, default_org_ | ||||
|     assert r.json()["detail"] == "qa_not_running" | ||||
| 
 | ||||
| 
 | ||||
| def test_delete_qa_run(crawler_crawl_id, crawler_auth_headers, default_org_id): | ||||
| def test_fail_running_qa(crawler_crawl_id, crawler_auth_headers, default_org_id): | ||||
|     r = requests.post( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/delete", | ||||
|         json={"qa_run_ids": [qa_run_id]}, | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/start", | ||||
|         headers=crawler_auth_headers, | ||||
|     ) | ||||
| 
 | ||||
|     assert r.status_code == 200 | ||||
|     assert r.json()["deleted"] == True | ||||
| 
 | ||||
|     data = r.json() | ||||
|     assert data["started"] | ||||
|     global failed_qa_run_id | ||||
|     failed_qa_run_id = data["started"] | ||||
| 
 | ||||
|     # Wait until it's properly running | ||||
|     count = 0 | ||||
|     while count < 24: | ||||
|         r = requests.get( | ||||
|             f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/activeQA", | ||||
|             headers=crawler_auth_headers, | ||||
|         ) | ||||
| 
 | ||||
|         data = r.json() | ||||
|         if data.get("qa") and data["qa"].get("state") == "running": | ||||
|             break | ||||
| 
 | ||||
|         time.sleep(5) | ||||
|         count += 1 | ||||
| 
 | ||||
|     # Cancel crawl | ||||
|     r = requests.post( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/cancel", | ||||
|         headers=crawler_auth_headers, | ||||
|     ) | ||||
|     assert r.status_code == 200 | ||||
| 
 | ||||
|     # Wait until it stops with canceled state | ||||
|     count = 0 | ||||
|     while count < 24: | ||||
|         r = requests.get( | ||||
|             f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/activeQA", | ||||
|             headers=crawler_auth_headers, | ||||
|         ) | ||||
| 
 | ||||
|         data = r.json() | ||||
|         if data.get("qa") and data["qa"].get("state") == "canceled": | ||||
|             break | ||||
| 
 | ||||
|         time.sleep(5) | ||||
|         count += 1 | ||||
| 
 | ||||
|     # Ensure failed QA run is included in list endpoint | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa", | ||||
|         headers=crawler_auth_headers, | ||||
|     ) | ||||
| 
 | ||||
|     data = r.json() | ||||
| 
 | ||||
|     assert len(data) == 2 | ||||
| 
 | ||||
|     failed_run = [qa_run for qa_run in data if qa_run.get("id") == failed_qa_run_id][0] | ||||
|     assert failed_run | ||||
|     assert failed_run["state"] == "canceled" | ||||
|     assert failed_run["started"] | ||||
|     assert failed_run["finished"] | ||||
|     assert failed_run["stats"] | ||||
|     assert failed_run["crawlExecSeconds"] >= 0 | ||||
| 
 | ||||
|     # Ensure failed QA run not included in list endpoint with skipFailed param | ||||
|     r = requests.get( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa?skipFailed=true", | ||||
|         headers=crawler_auth_headers, | ||||
|     ) | ||||
| 
 | ||||
|     data = r.json() | ||||
| 
 | ||||
|     assert len(data) == 1 | ||||
| 
 | ||||
|     qa = data[0] | ||||
|     assert qa | ||||
|     assert qa["state"] == "complete" | ||||
|     assert qa["started"] | ||||
|     assert qa["finished"] | ||||
|     assert qa["stats"]["found"] == 1 | ||||
|     assert qa["stats"]["done"] == 1 | ||||
|     assert qa["crawlExecSeconds"] > 0 | ||||
| 
 | ||||
| 
 | ||||
| def test_delete_qa_run(crawler_crawl_id, crawler_auth_headers, default_org_id): | ||||
|     r = requests.post( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/delete", | ||||
|         json={"qa_run_ids": [qa_run_id, failed_qa_run_id]}, | ||||
|         headers=crawler_auth_headers, | ||||
|     ) | ||||
| 
 | ||||
|     assert r.status_code == 200 | ||||
|     assert r.json()["deleted"] == 2 | ||||
| 
 | ||||
|     time.sleep(5) | ||||
| 
 | ||||
|     # deleted from finished qa list | ||||
|     r = requests.get( | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user