Add single crawl info api at /crawls/{crawl_id} (#418)
* backend: crawl info apis: - add /crawls/{crawl_id} api endpoint which just lists the crawl info, without resolving the individual files - move /crawls/{crawl_id}.json -> /crawls/{crawl_id}/replay.json for clarity that it's used for replay * frontend: update api for new replay.json endpoint
This commit is contained in:
parent
ad07b6ab43
commit
dfca09fc9c
@ -173,6 +173,7 @@ class CrawlOps:
|
||||
archive: Optional[Archive] = None,
|
||||
cid: uuid.UUID = None,
|
||||
collid: uuid.UUID = None,
|
||||
crawl_id: str = None,
|
||||
exclude_files=True,
|
||||
running_only=False,
|
||||
):
|
||||
@ -193,6 +194,9 @@ class CrawlOps:
|
||||
if running_only:
|
||||
query["state"] = {"$in": ["running", "starting", "stopping"]}
|
||||
|
||||
if crawl_id:
|
||||
query["_id"] = crawl_id
|
||||
|
||||
# pylint: disable=duplicate-code
|
||||
aggregate = [
|
||||
{"$match": query},
|
||||
@ -613,7 +617,7 @@ def init_crawls_api(
|
||||
return {"deleted": res}
|
||||
|
||||
@app.get(
|
||||
"/archives/all/crawls/{crawl_id}.json",
|
||||
"/archives/all/crawls/{crawl_id}/replay.json",
|
||||
tags=["crawls"],
|
||||
response_model=CrawlOut,
|
||||
)
|
||||
@ -624,13 +628,43 @@ def init_crawls_api(
|
||||
return await ops.get_crawl(crawl_id, None)
|
||||
|
||||
@app.get(
|
||||
"/archives/{aid}/crawls/{crawl_id}.json",
|
||||
"/archives/{aid}/crawls/{crawl_id}/replay.json",
|
||||
tags=["crawls"],
|
||||
response_model=CrawlOut,
|
||||
)
|
||||
async def get_crawl(crawl_id, archive: Archive = Depends(archive_crawl_dep)):
|
||||
return await ops.get_crawl(crawl_id, archive)
|
||||
|
||||
@app.get(
|
||||
"/archives/all/crawls/{crawl_id}",
|
||||
tags=["crawls"],
|
||||
response_model=ListCrawlOut,
|
||||
)
|
||||
async def list_single_crawl_admin(crawl_id, user: User = Depends(user_dep)):
|
||||
if not user.is_superuser:
|
||||
raise HTTPException(status_code=403, detail="Not Allowed")
|
||||
|
||||
crawls = await ops.list_crawls(crawl_id=crawl_id)
|
||||
print("crawls", crawls)
|
||||
if len(crawls) < 1:
|
||||
raise HTTPException(status_code=404, detail="crawl_not_found")
|
||||
|
||||
return crawls[0]
|
||||
|
||||
@app.get(
|
||||
"/archives/{aid}/crawls/{crawl_id}",
|
||||
tags=["crawls"],
|
||||
response_model=ListCrawlOut,
|
||||
)
|
||||
async def list_single_crawl(
|
||||
crawl_id, archive: Archive = Depends(archive_crawl_dep)
|
||||
):
|
||||
crawls = await ops.list_crawls(archive, crawl_id=crawl_id)
|
||||
if len(crawls) < 1:
|
||||
raise HTTPException(status_code=404, detail="crawl_not_found")
|
||||
|
||||
return crawls[0]
|
||||
|
||||
@app.post(
|
||||
"/archives/{aid}/crawls/{crawl_id}/scale",
|
||||
tags=["crawls"],
|
||||
|
@ -81,7 +81,7 @@ def test_wait_for_complete():
|
||||
|
||||
while True:
|
||||
r = requests.get(
|
||||
f"{api_prefix}/archives/{archive_id}/crawls/{crawl_id}.json",
|
||||
f"{api_prefix}/archives/{archive_id}/crawls/{crawl_id}/replay.json",
|
||||
headers=headers,
|
||||
)
|
||||
data = r.json()
|
||||
@ -105,6 +105,13 @@ def test_wait_for_complete():
|
||||
wacz_size = data["resources"][0]["size"]
|
||||
wacz_hash = data["resources"][0]["hash"]
|
||||
|
||||
def test_crawl_info():
|
||||
r = requests.get(
|
||||
f"{api_prefix}/archives/{archive_id}/crawls/{crawl_id}",
|
||||
headers=headers,
|
||||
)
|
||||
data = r.json()
|
||||
assert data["fileSize"] == wacz_size
|
||||
|
||||
def test_download_wacz():
|
||||
r = requests.get(host_prefix + wacz_path)
|
||||
|
@ -560,7 +560,7 @@ export class CrawlDetail extends LiteElement {
|
||||
const bearer = this.authState?.headers?.Authorization?.split(" ", 2)[1];
|
||||
|
||||
// for now, just use the first file until multi-wacz support is fully implemented
|
||||
const replaySource = `/api/archives/${this.crawl?.aid}/crawls/${this.crawlId}.json?auth_bearer=${bearer}`;
|
||||
const replaySource = `/api/archives/${this.crawl?.aid}/crawls/${this.crawlId}/replay.json?auth_bearer=${bearer}`;
|
||||
//const replaySource = this.crawl?.resources?.[0]?.path;
|
||||
|
||||
const canReplay = replaySource && this.hasFiles;
|
||||
@ -881,7 +881,7 @@ export class CrawlDetail extends LiteElement {
|
||||
|
||||
private async getCrawl(): Promise<Crawl> {
|
||||
const data: Crawl = await this.apiFetch(
|
||||
`${this.crawlsAPIBaseUrl || this.crawlsBaseUrl}/${this.crawlId}.json`,
|
||||
`${this.crawlsAPIBaseUrl || this.crawlsBaseUrl}/${this.crawlId}/replay.json`,
|
||||
this.authState!
|
||||
);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user