Add crawlSuccessfulCount to workflows (#871)

This commit is contained in:
Tessa Walsh 2023-05-22 19:06:37 -04:00 committed by GitHub
parent bd8b306fbd
commit 28f1c815d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 10 additions and 2 deletions

View File

@ -175,13 +175,14 @@ class CrawlConfig(CrawlConfigCore):
colls: Optional[List[str]] = [] colls: Optional[List[str]] = []
crawlAttemptCount: Optional[int] = 0
inactive: Optional[bool] = False inactive: Optional[bool] = False
rev: int = 0 rev: int = 0
crawlAttemptCount: Optional[int] = 0
crawlCount: Optional[int] = 0 crawlCount: Optional[int] = 0
crawlSuccessfulCount: Optional[int] = 0
totalSize: Optional[int] = 0 totalSize: Optional[int] = 0
lastCrawlId: Optional[str] lastCrawlId: Optional[str]
@ -944,6 +945,7 @@ async def update_config_crawl_stats(crawl_configs, crawls, cid: uuid.UUID):
""" """
update_query = { update_query = {
"crawlCount": 0, "crawlCount": 0,
"crawlSuccessfulCount": 0,
"totalSize": 0, "totalSize": 0,
"lastCrawlId": None, "lastCrawlId": None,
"lastCrawlStartTime": None, "lastCrawlStartTime": None,
@ -961,6 +963,10 @@ async def update_config_crawl_stats(crawl_configs, crawls, cid: uuid.UUID):
if results: if results:
update_query["crawlCount"] = len(results) update_query["crawlCount"] = len(results)
update_query["crawlSuccessfulCount"] = len(
[res for res in results if res["state"] not in ("canceled", "failed")]
)
last_crawl = results[0] last_crawl = results[0]
last_crawl_finished = last_crawl.get("finished") last_crawl_finished = last_crawl.get("finished")

View File

@ -226,6 +226,7 @@ def test_workflow_total_size_and_last_crawl_stats(
if last_crawl_id and last_crawl_id in (admin_crawl_id, crawler_crawl_id): if last_crawl_id and last_crawl_id in (admin_crawl_id, crawler_crawl_id):
assert workflow["totalSize"] > 0 assert workflow["totalSize"] > 0
assert workflow["crawlCount"] > 0 assert workflow["crawlCount"] > 0
assert workflow["crawlSuccessfulCount"] > 0
assert workflow["lastCrawlId"] assert workflow["lastCrawlId"]
assert workflow["lastCrawlStartTime"] assert workflow["lastCrawlStartTime"]
@ -249,6 +250,7 @@ def test_workflow_total_size_and_last_crawl_stats(
data = r.json() data = r.json()
assert data["totalSize"] > 0 assert data["totalSize"] > 0
assert data["crawlCount"] > 0 assert data["crawlCount"] > 0
assert data["crawlSuccessfulCount"] > 0
assert data["lastCrawlId"] assert data["lastCrawlId"]
assert data["lastCrawlStartTime"] assert data["lastCrawlStartTime"]