Add totalSize to workflow API endpoints (#783)
This commit is contained in:
parent
3f41498c5c
commit
a2435a013b
@ -199,6 +199,8 @@ class CrawlConfigOut(CrawlConfig):
|
|||||||
|
|
||||||
firstSeed: Optional[str]
|
firstSeed: Optional[str]
|
||||||
|
|
||||||
|
totalSize: Optional[int] = 0
|
||||||
|
|
||||||
crawlCount: Optional[int] = 0
|
crawlCount: Optional[int] = 0
|
||||||
lastCrawlId: Optional[str]
|
lastCrawlId: Optional[str]
|
||||||
lastCrawlStartTime: Optional[datetime]
|
lastCrawlStartTime: Optional[datetime]
|
||||||
@ -563,8 +565,19 @@ class CrawlConfigOps:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# total size
|
{
|
||||||
# {"$set": {"totalSize": {"$sum": "$$finishedCrawls.$$files.size"}}},
|
"$set": {
|
||||||
|
"totalSize": {
|
||||||
|
"$sum": {
|
||||||
|
"$map": {
|
||||||
|
"input": "$sortedCrawls.files",
|
||||||
|
"as": "crawlFile",
|
||||||
|
"in": {"$arrayElemAt": ["$$crawlFile.size", 0]},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
# unset
|
# unset
|
||||||
{"$unset": ["lastCrawl"]},
|
{"$unset": ["lastCrawl"]},
|
||||||
{"$unset": ["sortedCrawls"]},
|
{"$unset": ["sortedCrawls"]},
|
||||||
@ -680,6 +693,7 @@ class CrawlConfigOps:
|
|||||||
cid=crawlconfig.id
|
cid=crawlconfig.id
|
||||||
)
|
)
|
||||||
crawlconfig.crawlCount = crawl_stats["crawl_count"]
|
crawlconfig.crawlCount = crawl_stats["crawl_count"]
|
||||||
|
crawlconfig.totalSize = crawl_stats["total_size"]
|
||||||
crawlconfig.lastCrawlId = crawl_stats["last_crawl_id"]
|
crawlconfig.lastCrawlId = crawl_stats["last_crawl_id"]
|
||||||
crawlconfig.lastCrawlStartTime = crawl_stats["last_crawl_started"]
|
crawlconfig.lastCrawlStartTime = crawl_stats["last_crawl_started"]
|
||||||
crawlconfig.lastCrawlTime = crawl_stats["last_crawl_finished"]
|
crawlconfig.lastCrawlTime = crawl_stats["last_crawl_finished"]
|
||||||
|
@ -384,6 +384,7 @@ class CrawlOps:
|
|||||||
"""Get crawl statistics for a crawl_config with id cid."""
|
"""Get crawl statistics for a crawl_config with id cid."""
|
||||||
stats = {
|
stats = {
|
||||||
"crawl_count": 0,
|
"crawl_count": 0,
|
||||||
|
"total_size": 0,
|
||||||
"last_crawl_id": None,
|
"last_crawl_id": None,
|
||||||
"last_crawl_started": None,
|
"last_crawl_started": None,
|
||||||
"last_crawl_finished": None,
|
"last_crawl_finished": None,
|
||||||
@ -407,6 +408,13 @@ class CrawlOps:
|
|||||||
if user:
|
if user:
|
||||||
stats["last_started_by"] = user.name
|
stats["last_started_by"] = user.name
|
||||||
|
|
||||||
|
total_size = 0
|
||||||
|
for res in results:
|
||||||
|
files = res["files"]
|
||||||
|
for file in files:
|
||||||
|
total_size += file["size"]
|
||||||
|
stats["total_size"] = total_size
|
||||||
|
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
async def _resolve_crawl_refs(
|
async def _resolve_crawl_refs(
|
||||||
|
@ -205,3 +205,32 @@ def test_verify_revs_history(crawler_auth_headers, default_org_id):
|
|||||||
assert len(items) == 2
|
assert len(items) == 2
|
||||||
sorted_data = sorted(items, key=lambda revision: revision["rev"])
|
sorted_data = sorted(items, key=lambda revision: revision["rev"])
|
||||||
assert sorted_data[0]["config"]["scopeType"] == "prefix"
|
assert sorted_data[0]["config"]["scopeType"] == "prefix"
|
||||||
|
|
||||||
|
|
||||||
|
def test_workflow_total_size(crawler_auth_headers, default_org_id, admin_crawl_id, crawler_crawl_id):
|
||||||
|
admin_crawl_cid = ""
|
||||||
|
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs",
|
||||||
|
headers=crawler_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
assert data["total"] > 0
|
||||||
|
items = data["items"]
|
||||||
|
for workflow in items:
|
||||||
|
last_crawl_id = workflow.get("lastCrawlId")
|
||||||
|
if last_crawl_id and last_crawl_id in (admin_crawl_id, crawler_crawl_id):
|
||||||
|
assert workflow["totalSize"] > 0
|
||||||
|
if last_crawl_id == admin_crawl_id:
|
||||||
|
admin_crawl_cid = workflow["id"]
|
||||||
|
else:
|
||||||
|
assert workflow["totalSize"] == 0
|
||||||
|
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{admin_crawl_cid}",
|
||||||
|
headers=crawler_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
data = r.json()
|
||||||
|
assert data["totalSize"] > 0
|
||||||
|
Loading…
Reference in New Issue
Block a user