Track failed QA runs and include in list endpoint (#1650)
Fixes #1648 - Tracks failed QA runs in database, not only successful ones - Includes failed QA runs in list endpoint by default - Adds `skipFailed` param to list endpoint to return only successful runs --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
		
							parent
							
								
									5c08c9679c
								
							
						
					
					
						commit
						4229b94736
					
				| @ -40,6 +40,7 @@ from .models import ( | |||||||
|     PaginatedResponse, |     PaginatedResponse, | ||||||
|     RUNNING_AND_STARTING_STATES, |     RUNNING_AND_STARTING_STATES, | ||||||
|     SUCCESSFUL_STATES, |     SUCCESSFUL_STATES, | ||||||
|  |     NON_RUNNING_STATES, | ||||||
|     ALL_CRAWL_STATES, |     ALL_CRAWL_STATES, | ||||||
|     TYPE_ALL_CRAWL_STATES, |     TYPE_ALL_CRAWL_STATES, | ||||||
| ) | ) | ||||||
| @ -726,7 +727,9 @@ class CrawlOps(BaseCrawlOps): | |||||||
|             # pylint: disable=raise-missing-from |             # pylint: disable=raise-missing-from | ||||||
|             raise HTTPException(status_code=500, detail=f"Error starting crawl: {exc}") |             raise HTTPException(status_code=500, detail=f"Error starting crawl: {exc}") | ||||||
| 
 | 
 | ||||||
|     async def stop_crawl_qa_run(self, crawl_id: str, org: Organization): |     async def stop_crawl_qa_run( | ||||||
|  |         self, crawl_id: str, org: Organization, graceful: bool = True | ||||||
|  |     ): | ||||||
|         """Stop crawl QA run, QA run removed when actually finished""" |         """Stop crawl QA run, QA run removed when actually finished""" | ||||||
|         crawl = await self.get_crawl(crawl_id, org) |         crawl = await self.get_crawl(crawl_id, org) | ||||||
| 
 | 
 | ||||||
| @ -734,7 +737,9 @@ class CrawlOps(BaseCrawlOps): | |||||||
|             raise HTTPException(status_code=400, detail="qa_not_running") |             raise HTTPException(status_code=400, detail="qa_not_running") | ||||||
| 
 | 
 | ||||||
|         try: |         try: | ||||||
|             result = await self.crawl_manager.shutdown_crawl(crawl.qa.id, graceful=True) |             result = await self.crawl_manager.shutdown_crawl( | ||||||
|  |                 crawl.qa.id, graceful=graceful | ||||||
|  |             ) | ||||||
| 
 | 
 | ||||||
|             if result.get("error") == "Not Found": |             if result.get("error") == "Not Found": | ||||||
|                 # treat as success, qa crawl no longer exists, so mark as no qa |                 # treat as success, qa crawl no longer exists, so mark as no qa | ||||||
| @ -775,7 +780,7 @@ class CrawlOps(BaseCrawlOps): | |||||||
| 
 | 
 | ||||||
|         query: Dict[str, Any] = {"qa": None} |         query: Dict[str, Any] = {"qa": None} | ||||||
| 
 | 
 | ||||||
|         if crawl.qa.finished and crawl.qa.state in SUCCESSFUL_STATES: |         if crawl.qa.finished and crawl.qa.state in NON_RUNNING_STATES: | ||||||
|             query[f"qaFinished.{crawl.qa.id}"] = crawl.qa.dict() |             query[f"qaFinished.{crawl.qa.id}"] = crawl.qa.dict() | ||||||
| 
 | 
 | ||||||
|         if await self.crawls.find_one_and_update( |         if await self.crawls.find_one_and_update( | ||||||
| @ -786,17 +791,28 @@ class CrawlOps(BaseCrawlOps): | |||||||
|         return False |         return False | ||||||
| 
 | 
 | ||||||
|     async def get_qa_runs( |     async def get_qa_runs( | ||||||
|         self, crawl_id: str, org: Optional[Organization] = None |         self, | ||||||
|  |         crawl_id: str, | ||||||
|  |         skip_failed: bool = False, | ||||||
|  |         org: Optional[Organization] = None, | ||||||
|     ) -> List[QARunOut]: |     ) -> List[QARunOut]: | ||||||
|         """Return list of QA runs""" |         """Return list of QA runs""" | ||||||
|         crawl_data = await self.get_crawl_raw( |         crawl_data = await self.get_crawl_raw( | ||||||
|             crawl_id, org, "crawl", project={"qaFinished": True, "qa": True} |             crawl_id, org, "crawl", project={"qaFinished": True, "qa": True} | ||||||
|         ) |         ) | ||||||
|         qa_finished = crawl_data.get("qaFinished") or {} |         qa_finished = crawl_data.get("qaFinished") or {} | ||||||
|         all_qa = [QARunOut(**qa_run_data) for qa_run_data in qa_finished.values()] |         if skip_failed: | ||||||
|  |             all_qa = [ | ||||||
|  |                 QARunOut(**qa_run_data) | ||||||
|  |                 for qa_run_data in qa_finished.values() | ||||||
|  |                 if qa_run_data.get("state") in SUCCESSFUL_STATES | ||||||
|  |             ] | ||||||
|  |         else: | ||||||
|  |             all_qa = [QARunOut(**qa_run_data) for qa_run_data in qa_finished.values()] | ||||||
|         all_qa.sort(key=lambda x: x.finished or dt_now(), reverse=True) |         all_qa.sort(key=lambda x: x.finished or dt_now(), reverse=True) | ||||||
|         qa = crawl_data.get("qa") |         qa = crawl_data.get("qa") | ||||||
|         if qa: |         # ensure current QA run didn't just fail, just in case | ||||||
|  |         if qa and (not skip_failed or qa.get("state") in SUCCESSFUL_STATES): | ||||||
|             all_qa.insert(0, QARunOut(**qa)) |             all_qa.insert(0, QARunOut(**qa)) | ||||||
|         return all_qa |         return all_qa | ||||||
| 
 | 
 | ||||||
| @ -1061,6 +1077,13 @@ def init_crawls_api(crawl_manager: CrawlManager, app, user_dep, *args): | |||||||
|         # pylint: disable=unused-argument |         # pylint: disable=unused-argument | ||||||
|         return await ops.stop_crawl_qa_run(crawl_id, org) |         return await ops.stop_crawl_qa_run(crawl_id, org) | ||||||
| 
 | 
 | ||||||
|  |     @app.post("/orgs/{oid}/crawls/{crawl_id}/qa/cancel", tags=["qa"]) | ||||||
|  |     async def cancel_crawl_qa_run( | ||||||
|  |         crawl_id: str, org: Organization = Depends(org_crawl_dep) | ||||||
|  |     ): | ||||||
|  |         # pylint: disable=unused-argument | ||||||
|  |         return await ops.stop_crawl_qa_run(crawl_id, org, graceful=False) | ||||||
|  | 
 | ||||||
|     @app.post("/orgs/{oid}/crawls/{crawl_id}/qa/delete", tags=["qa"]) |     @app.post("/orgs/{oid}/crawls/{crawl_id}/qa/delete", tags=["qa"]) | ||||||
|     async def delete_crawl_qa_runs( |     async def delete_crawl_qa_runs( | ||||||
|         crawl_id: str, |         crawl_id: str, | ||||||
| @ -1075,8 +1098,10 @@ def init_crawls_api(crawl_manager: CrawlManager, app, user_dep, *args): | |||||||
|         tags=["qa"], |         tags=["qa"], | ||||||
|         response_model=List[QARunOut], |         response_model=List[QARunOut], | ||||||
|     ) |     ) | ||||||
|     async def get_qa_runs(crawl_id, org: Organization = Depends(org_viewer_dep)): |     async def get_qa_runs( | ||||||
|         return await ops.get_qa_runs(crawl_id, org) |         crawl_id, org: Organization = Depends(org_viewer_dep), skipFailed: bool = False | ||||||
|  |     ): | ||||||
|  |         return await ops.get_qa_runs(crawl_id, skip_failed=skipFailed, org=org) | ||||||
| 
 | 
 | ||||||
|     @app.get( |     @app.get( | ||||||
|         "/orgs/{oid}/crawls/{crawl_id}/qa/activeQA", |         "/orgs/{oid}/crawls/{crawl_id}/qa/activeQA", | ||||||
|  | |||||||
| @ -3,7 +3,9 @@ import requests | |||||||
| import time | import time | ||||||
| from datetime import datetime | from datetime import datetime | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| qa_run_id = None | qa_run_id = None | ||||||
|  | failed_qa_run_id = None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def test_run_qa(crawler_crawl_id, crawler_auth_headers, default_org_id): | def test_run_qa(crawler_crawl_id, crawler_auth_headers, default_org_id): | ||||||
| @ -182,15 +184,105 @@ def test_run_qa_not_running(crawler_crawl_id, crawler_auth_headers, default_org_ | |||||||
|     assert r.json()["detail"] == "qa_not_running" |     assert r.json()["detail"] == "qa_not_running" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def test_delete_qa_run(crawler_crawl_id, crawler_auth_headers, default_org_id): | def test_fail_running_qa(crawler_crawl_id, crawler_auth_headers, default_org_id): | ||||||
|     r = requests.post( |     r = requests.post( | ||||||
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/delete", |         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/start", | ||||||
|         json={"qa_run_ids": [qa_run_id]}, |  | ||||||
|         headers=crawler_auth_headers, |         headers=crawler_auth_headers, | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
|     assert r.status_code == 200 |     assert r.status_code == 200 | ||||||
|     assert r.json()["deleted"] == True | 
 | ||||||
|  |     data = r.json() | ||||||
|  |     assert data["started"] | ||||||
|  |     global failed_qa_run_id | ||||||
|  |     failed_qa_run_id = data["started"] | ||||||
|  | 
 | ||||||
|  |     # Wait until it's properly running | ||||||
|  |     count = 0 | ||||||
|  |     while count < 24: | ||||||
|  |         r = requests.get( | ||||||
|  |             f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/activeQA", | ||||||
|  |             headers=crawler_auth_headers, | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         data = r.json() | ||||||
|  |         if data.get("qa") and data["qa"].get("state") == "running": | ||||||
|  |             break | ||||||
|  | 
 | ||||||
|  |         time.sleep(5) | ||||||
|  |         count += 1 | ||||||
|  | 
 | ||||||
|  |     # Cancel crawl | ||||||
|  |     r = requests.post( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/cancel", | ||||||
|  |         headers=crawler_auth_headers, | ||||||
|  |     ) | ||||||
|  |     assert r.status_code == 200 | ||||||
|  | 
 | ||||||
|  |     # Wait until it stops with canceled state | ||||||
|  |     count = 0 | ||||||
|  |     while count < 24: | ||||||
|  |         r = requests.get( | ||||||
|  |             f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/activeQA", | ||||||
|  |             headers=crawler_auth_headers, | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         data = r.json() | ||||||
|  |         if data.get("qa") and data["qa"].get("state") == "canceled": | ||||||
|  |             break | ||||||
|  | 
 | ||||||
|  |         time.sleep(5) | ||||||
|  |         count += 1 | ||||||
|  | 
 | ||||||
|  |     # Ensure failed QA run is included in list endpoint | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa", | ||||||
|  |         headers=crawler_auth_headers, | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  |     data = r.json() | ||||||
|  | 
 | ||||||
|  |     assert len(data) == 2 | ||||||
|  | 
 | ||||||
|  |     failed_run = [qa_run for qa_run in data if qa_run.get("id") == failed_qa_run_id][0] | ||||||
|  |     assert failed_run | ||||||
|  |     assert failed_run["state"] == "canceled" | ||||||
|  |     assert failed_run["started"] | ||||||
|  |     assert failed_run["finished"] | ||||||
|  |     assert failed_run["stats"] | ||||||
|  |     assert failed_run["crawlExecSeconds"] >= 0 | ||||||
|  | 
 | ||||||
|  |     # Ensure failed QA run not included in list endpoint with skipFailed param | ||||||
|  |     r = requests.get( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa?skipFailed=true", | ||||||
|  |         headers=crawler_auth_headers, | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  |     data = r.json() | ||||||
|  | 
 | ||||||
|  |     assert len(data) == 1 | ||||||
|  | 
 | ||||||
|  |     qa = data[0] | ||||||
|  |     assert qa | ||||||
|  |     assert qa["state"] == "complete" | ||||||
|  |     assert qa["started"] | ||||||
|  |     assert qa["finished"] | ||||||
|  |     assert qa["stats"]["found"] == 1 | ||||||
|  |     assert qa["stats"]["done"] == 1 | ||||||
|  |     assert qa["crawlExecSeconds"] > 0 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_delete_qa_run(crawler_crawl_id, crawler_auth_headers, default_org_id): | ||||||
|  |     r = requests.post( | ||||||
|  |         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/qa/delete", | ||||||
|  |         json={"qa_run_ids": [qa_run_id, failed_qa_run_id]}, | ||||||
|  |         headers=crawler_auth_headers, | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  |     assert r.status_code == 200 | ||||||
|  |     assert r.json()["deleted"] == 2 | ||||||
|  | 
 | ||||||
|  |     time.sleep(5) | ||||||
| 
 | 
 | ||||||
|     # deleted from finished qa list |     # deleted from finished qa list | ||||||
|     r = requests.get( |     r = requests.get( | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user