Add single crawl info api at /crawls/{crawl_id} (#418)
* backend: crawl info apis: - add /crawls/{crawl_id} api endpoint which just lists the crawl info, without resolving the individual files - move /crawls/{crawl_id}.json -> /crawls/{crawl_id}/replay.json for clarity that it's used for replay * frontend: update api for new replay.json endpoint
This commit is contained in:
parent
ad07b6ab43
commit
dfca09fc9c
@ -173,6 +173,7 @@ class CrawlOps:
|
|||||||
archive: Optional[Archive] = None,
|
archive: Optional[Archive] = None,
|
||||||
cid: uuid.UUID = None,
|
cid: uuid.UUID = None,
|
||||||
collid: uuid.UUID = None,
|
collid: uuid.UUID = None,
|
||||||
|
crawl_id: str = None,
|
||||||
exclude_files=True,
|
exclude_files=True,
|
||||||
running_only=False,
|
running_only=False,
|
||||||
):
|
):
|
||||||
@ -193,6 +194,9 @@ class CrawlOps:
|
|||||||
if running_only:
|
if running_only:
|
||||||
query["state"] = {"$in": ["running", "starting", "stopping"]}
|
query["state"] = {"$in": ["running", "starting", "stopping"]}
|
||||||
|
|
||||||
|
if crawl_id:
|
||||||
|
query["_id"] = crawl_id
|
||||||
|
|
||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
aggregate = [
|
aggregate = [
|
||||||
{"$match": query},
|
{"$match": query},
|
||||||
@ -613,7 +617,7 @@ def init_crawls_api(
|
|||||||
return {"deleted": res}
|
return {"deleted": res}
|
||||||
|
|
||||||
@app.get(
|
@app.get(
|
||||||
"/archives/all/crawls/{crawl_id}.json",
|
"/archives/all/crawls/{crawl_id}/replay.json",
|
||||||
tags=["crawls"],
|
tags=["crawls"],
|
||||||
response_model=CrawlOut,
|
response_model=CrawlOut,
|
||||||
)
|
)
|
||||||
@ -624,13 +628,43 @@ def init_crawls_api(
|
|||||||
return await ops.get_crawl(crawl_id, None)
|
return await ops.get_crawl(crawl_id, None)
|
||||||
|
|
||||||
@app.get(
|
@app.get(
|
||||||
"/archives/{aid}/crawls/{crawl_id}.json",
|
"/archives/{aid}/crawls/{crawl_id}/replay.json",
|
||||||
tags=["crawls"],
|
tags=["crawls"],
|
||||||
response_model=CrawlOut,
|
response_model=CrawlOut,
|
||||||
)
|
)
|
||||||
async def get_crawl(crawl_id, archive: Archive = Depends(archive_crawl_dep)):
|
async def get_crawl(crawl_id, archive: Archive = Depends(archive_crawl_dep)):
|
||||||
return await ops.get_crawl(crawl_id, archive)
|
return await ops.get_crawl(crawl_id, archive)
|
||||||
|
|
||||||
|
@app.get(
|
||||||
|
"/archives/all/crawls/{crawl_id}",
|
||||||
|
tags=["crawls"],
|
||||||
|
response_model=ListCrawlOut,
|
||||||
|
)
|
||||||
|
async def list_single_crawl_admin(crawl_id, user: User = Depends(user_dep)):
|
||||||
|
if not user.is_superuser:
|
||||||
|
raise HTTPException(status_code=403, detail="Not Allowed")
|
||||||
|
|
||||||
|
crawls = await ops.list_crawls(crawl_id=crawl_id)
|
||||||
|
print("crawls", crawls)
|
||||||
|
if len(crawls) < 1:
|
||||||
|
raise HTTPException(status_code=404, detail="crawl_not_found")
|
||||||
|
|
||||||
|
return crawls[0]
|
||||||
|
|
||||||
|
@app.get(
|
||||||
|
"/archives/{aid}/crawls/{crawl_id}",
|
||||||
|
tags=["crawls"],
|
||||||
|
response_model=ListCrawlOut,
|
||||||
|
)
|
||||||
|
async def list_single_crawl(
|
||||||
|
crawl_id, archive: Archive = Depends(archive_crawl_dep)
|
||||||
|
):
|
||||||
|
crawls = await ops.list_crawls(archive, crawl_id=crawl_id)
|
||||||
|
if len(crawls) < 1:
|
||||||
|
raise HTTPException(status_code=404, detail="crawl_not_found")
|
||||||
|
|
||||||
|
return crawls[0]
|
||||||
|
|
||||||
@app.post(
|
@app.post(
|
||||||
"/archives/{aid}/crawls/{crawl_id}/scale",
|
"/archives/{aid}/crawls/{crawl_id}/scale",
|
||||||
tags=["crawls"],
|
tags=["crawls"],
|
||||||
|
@ -81,7 +81,7 @@ def test_wait_for_complete():
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
r = requests.get(
|
r = requests.get(
|
||||||
f"{api_prefix}/archives/{archive_id}/crawls/{crawl_id}.json",
|
f"{api_prefix}/archives/{archive_id}/crawls/{crawl_id}/replay.json",
|
||||||
headers=headers,
|
headers=headers,
|
||||||
)
|
)
|
||||||
data = r.json()
|
data = r.json()
|
||||||
@ -105,6 +105,13 @@ def test_wait_for_complete():
|
|||||||
wacz_size = data["resources"][0]["size"]
|
wacz_size = data["resources"][0]["size"]
|
||||||
wacz_hash = data["resources"][0]["hash"]
|
wacz_hash = data["resources"][0]["hash"]
|
||||||
|
|
||||||
|
def test_crawl_info():
|
||||||
|
r = requests.get(
|
||||||
|
f"{api_prefix}/archives/{archive_id}/crawls/{crawl_id}",
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
data = r.json()
|
||||||
|
assert data["fileSize"] == wacz_size
|
||||||
|
|
||||||
def test_download_wacz():
|
def test_download_wacz():
|
||||||
r = requests.get(host_prefix + wacz_path)
|
r = requests.get(host_prefix + wacz_path)
|
||||||
|
@ -560,7 +560,7 @@ export class CrawlDetail extends LiteElement {
|
|||||||
const bearer = this.authState?.headers?.Authorization?.split(" ", 2)[1];
|
const bearer = this.authState?.headers?.Authorization?.split(" ", 2)[1];
|
||||||
|
|
||||||
// for now, just use the first file until multi-wacz support is fully implemented
|
// for now, just use the first file until multi-wacz support is fully implemented
|
||||||
const replaySource = `/api/archives/${this.crawl?.aid}/crawls/${this.crawlId}.json?auth_bearer=${bearer}`;
|
const replaySource = `/api/archives/${this.crawl?.aid}/crawls/${this.crawlId}/replay.json?auth_bearer=${bearer}`;
|
||||||
//const replaySource = this.crawl?.resources?.[0]?.path;
|
//const replaySource = this.crawl?.resources?.[0]?.path;
|
||||||
|
|
||||||
const canReplay = replaySource && this.hasFiles;
|
const canReplay = replaySource && this.hasFiles;
|
||||||
@ -881,7 +881,7 @@ export class CrawlDetail extends LiteElement {
|
|||||||
|
|
||||||
private async getCrawl(): Promise<Crawl> {
|
private async getCrawl(): Promise<Crawl> {
|
||||||
const data: Crawl = await this.apiFetch(
|
const data: Crawl = await this.apiFetch(
|
||||||
`${this.crawlsAPIBaseUrl || this.crawlsBaseUrl}/${this.crawlId}.json`,
|
`${this.crawlsAPIBaseUrl || this.crawlsBaseUrl}/${this.crawlId}/replay.json`,
|
||||||
this.authState!
|
this.authState!
|
||||||
);
|
);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user