Add totalSize to workflow API endpoints (#783)
This commit is contained in:
parent
3f41498c5c
commit
a2435a013b
@ -199,6 +199,8 @@ class CrawlConfigOut(CrawlConfig):
|
||||
|
||||
firstSeed: Optional[str]
|
||||
|
||||
totalSize: Optional[int] = 0
|
||||
|
||||
crawlCount: Optional[int] = 0
|
||||
lastCrawlId: Optional[str]
|
||||
lastCrawlStartTime: Optional[datetime]
|
||||
@ -563,8 +565,19 @@ class CrawlConfigOps:
|
||||
}
|
||||
}
|
||||
},
|
||||
# total size
|
||||
# {"$set": {"totalSize": {"$sum": "$$finishedCrawls.$$files.size"}}},
|
||||
{
|
||||
"$set": {
|
||||
"totalSize": {
|
||||
"$sum": {
|
||||
"$map": {
|
||||
"input": "$sortedCrawls.files",
|
||||
"as": "crawlFile",
|
||||
"in": {"$arrayElemAt": ["$$crawlFile.size", 0]},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
# unset
|
||||
{"$unset": ["lastCrawl"]},
|
||||
{"$unset": ["sortedCrawls"]},
|
||||
@ -680,6 +693,7 @@ class CrawlConfigOps:
|
||||
cid=crawlconfig.id
|
||||
)
|
||||
crawlconfig.crawlCount = crawl_stats["crawl_count"]
|
||||
crawlconfig.totalSize = crawl_stats["total_size"]
|
||||
crawlconfig.lastCrawlId = crawl_stats["last_crawl_id"]
|
||||
crawlconfig.lastCrawlStartTime = crawl_stats["last_crawl_started"]
|
||||
crawlconfig.lastCrawlTime = crawl_stats["last_crawl_finished"]
|
||||
|
@ -384,6 +384,7 @@ class CrawlOps:
|
||||
"""Get crawl statistics for a crawl_config with id cid."""
|
||||
stats = {
|
||||
"crawl_count": 0,
|
||||
"total_size": 0,
|
||||
"last_crawl_id": None,
|
||||
"last_crawl_started": None,
|
||||
"last_crawl_finished": None,
|
||||
@ -407,6 +408,13 @@ class CrawlOps:
|
||||
if user:
|
||||
stats["last_started_by"] = user.name
|
||||
|
||||
total_size = 0
|
||||
for res in results:
|
||||
files = res["files"]
|
||||
for file in files:
|
||||
total_size += file["size"]
|
||||
stats["total_size"] = total_size
|
||||
|
||||
return stats
|
||||
|
||||
async def _resolve_crawl_refs(
|
||||
|
@ -205,3 +205,32 @@ def test_verify_revs_history(crawler_auth_headers, default_org_id):
|
||||
assert len(items) == 2
|
||||
sorted_data = sorted(items, key=lambda revision: revision["rev"])
|
||||
assert sorted_data[0]["config"]["scopeType"] == "prefix"
|
||||
|
||||
|
||||
def test_workflow_total_size(crawler_auth_headers, default_org_id, admin_crawl_id, crawler_crawl_id):
|
||||
admin_crawl_cid = ""
|
||||
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert data["total"] > 0
|
||||
items = data["items"]
|
||||
for workflow in items:
|
||||
last_crawl_id = workflow.get("lastCrawlId")
|
||||
if last_crawl_id and last_crawl_id in (admin_crawl_id, crawler_crawl_id):
|
||||
assert workflow["totalSize"] > 0
|
||||
if last_crawl_id == admin_crawl_id:
|
||||
admin_crawl_cid = workflow["id"]
|
||||
else:
|
||||
assert workflow["totalSize"] == 0
|
||||
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{admin_crawl_cid}",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert data["totalSize"] > 0
|
||||
|
Loading…
Reference in New Issue
Block a user