Add single crawl info api at /crawls/{crawl_id} (#418)

* backend: crawl info apis: - add /crawls/{crawl_id} api endpoint which just lists the crawl info, without resolving the individual files - move /crawls/{crawl_id}.json -> /crawls/{crawl_id}/replay.json for clarity that it's used for replay * frontend: update api for new replay.json endpoint
2022-12-19 14:54:48 -08:00 · 2022-12-19 14:54:48 -08:00 · dfca09fc9c
commit dfca09fc9c
parent ad07b6ab43
3 changed files with 46 additions and 5 deletions
--- a/backend/btrixcloud/crawls.py
+++ b/backend/btrixcloud/crawls.py
@ -173,6 +173,7 @@ class CrawlOps:
        archive: Optional[Archive] = None,
        cid: uuid.UUID = None,
        collid: uuid.UUID = None,
+        crawl_id: str = None,
        exclude_files=True,
        running_only=False,
    ):
@ -193,6 +194,9 @@ class CrawlOps:
        if running_only:
            query["state"] = {"$in": ["running", "starting", "stopping"]}

+        if crawl_id:
+            query["_id"] = crawl_id
+
        # pylint: disable=duplicate-code
        aggregate = [
            {"$match": query},
@ -613,7 +617,7 @@ def init_crawls_api(
        return {"deleted": res}

    @app.get(
-        "/archives/all/crawls/{crawl_id}.json",
+        "/archives/all/crawls/{crawl_id}/replay.json",
        tags=["crawls"],
        response_model=CrawlOut,
    )
@ -624,13 +628,43 @@ def init_crawls_api(
        return await ops.get_crawl(crawl_id, None)

    @app.get(
-        "/archives/{aid}/crawls/{crawl_id}.json",
+        "/archives/{aid}/crawls/{crawl_id}/replay.json",
        tags=["crawls"],
        response_model=CrawlOut,
    )
    async def get_crawl(crawl_id, archive: Archive = Depends(archive_crawl_dep)):
        return await ops.get_crawl(crawl_id, archive)

+    @app.get(
+        "/archives/all/crawls/{crawl_id}",
+        tags=["crawls"],
+        response_model=ListCrawlOut,
+    )
+    async def list_single_crawl_admin(crawl_id, user: User = Depends(user_dep)):
+        if not user.is_superuser:
+            raise HTTPException(status_code=403, detail="Not Allowed")
+
+        crawls = await ops.list_crawls(crawl_id=crawl_id)
+        print("crawls", crawls)
+        if len(crawls) < 1:
+            raise HTTPException(status_code=404, detail="crawl_not_found")
+
+        return crawls[0]
+
+    @app.get(
+        "/archives/{aid}/crawls/{crawl_id}",
+        tags=["crawls"],
+        response_model=ListCrawlOut,
+    )
+    async def list_single_crawl(
+        crawl_id, archive: Archive = Depends(archive_crawl_dep)
+    ):
+        crawls = await ops.list_crawls(archive, crawl_id=crawl_id)
+        if len(crawls) < 1:
+            raise HTTPException(status_code=404, detail="crawl_not_found")
+
+        return crawls[0]
+
    @app.post(
        "/archives/{aid}/crawls/{crawl_id}/scale",
        tags=["crawls"],
--- a/backend/test/test_run_crawl.py
+++ b/backend/test/test_run_crawl.py
@ -81,7 +81,7 @@ def test_wait_for_complete():

    while True:
        r = requests.get(
-            f"{api_prefix}/archives/{archive_id}/crawls/{crawl_id}.json",
+            f"{api_prefix}/archives/{archive_id}/crawls/{crawl_id}/replay.json",
            headers=headers,
        )
        data = r.json()
@ -105,6 +105,13 @@ def test_wait_for_complete():
    wacz_size = data["resources"][0]["size"]
    wacz_hash = data["resources"][0]["hash"]

+def test_crawl_info():
+    r = requests.get(
+        f"{api_prefix}/archives/{archive_id}/crawls/{crawl_id}",
+        headers=headers,
+    )
+    data = r.json()
+    assert data["fileSize"] == wacz_size

 def test_download_wacz():
    r = requests.get(host_prefix + wacz_path)
--- a/frontend/src/pages/archive/crawl-detail.ts
+++ b/frontend/src/pages/archive/crawl-detail.ts
@ -560,7 +560,7 @@ export class CrawlDetail extends LiteElement {
    const bearer = this.authState?.headers?.Authorization?.split(" ", 2)[1];

    // for now, just use the first file until multi-wacz support is fully implemented
-    const replaySource = `/api/archives/${this.crawl?.aid}/crawls/${this.crawlId}.json?auth_bearer=${bearer}`;
+    const replaySource = `/api/archives/${this.crawl?.aid}/crawls/${this.crawlId}/replay.json?auth_bearer=${bearer}`;
    //const replaySource = this.crawl?.resources?.[0]?.path;

    const canReplay = replaySource && this.hasFiles;
@ -881,7 +881,7 @@ export class CrawlDetail extends LiteElement {

  private async getCrawl(): Promise<Crawl> {
    const data: Crawl = await this.apiFetch(
-      `${this.crawlsAPIBaseUrl || this.crawlsBaseUrl}/${this.crawlId}.json`,
+      `${this.crawlsAPIBaseUrl || this.crawlsBaseUrl}/${this.crawlId}/replay.json`,
      this.authState!
    );