replay api: add downloadUrl to replay endpoints to be used by RWP (#2456)

RWP (2.3.3+) can determine if the 'Download Archive' menu item should be showed based on the value of downloadUrl. If set to 'null', will hide the menu item: - set downloadUrl to public collection download for public collections replay - set downloadUrl to null for private collection and crawl replay to hide the download menu item in RWP (otherwise have to add the auth_header query with bearer token and should assess security before doing that..) --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
2025-03-03 14:11:28 -08:00 · 2025-03-03 14:11:28 -08:00 · afa892000b
commit afa892000b
parent 65a40c4816
5 changed files with 22 additions and 4 deletions
--- a/backend/btrixcloud/basecrawls.py
+++ b/backend/btrixcloud/basecrawls.py
@ -169,11 +169,14 @@ class BaseCrawlOps:

                oid = res.get("oid")
                if oid:
+                    origin = get_origin(headers)
                    res["pagesQueryUrl"] = (
-                        get_origin(headers)
-                        + f"/api/orgs/{oid}/crawls/{crawlid}/pagesSearch"
+                        origin + f"/api/orgs/{oid}/crawls/{crawlid}/pagesSearch"
                    )

+                # this will now disable the downloadUrl in RWP
+                res["downloadUrl"] = None
+
        crawl = CrawlOutWithResources.from_dict(res)

        if not skip_resources:
--- a/backend/btrixcloud/colls.py
+++ b/backend/btrixcloud/colls.py
@ -352,11 +352,21 @@ class CollectionOps:

            public = "public/" if public_or_unlisted_only else ""

+            origin = get_origin(headers)
+
+            if public_or_unlisted_only:
+                slug = result.get("slug")
+                result["downloadUrl"] = (
+                    origin + f"/api/{public}orgs/{org.slug}/collections/{slug}/download"
+                )
+            else:
+                # disable download link, as not public without auth
+                result["downloadUrl"] = None
+
            if pages_optimized:
                result["initialPages"] = initial_pages
                result["pagesQueryUrl"] = (
-                    get_origin(headers)
-                    + f"/api/orgs/{org.id}/collections/{coll_id}/{public}pages"
+                    origin + f"/api/orgs/{org.id}/collections/{coll_id}/{public}pages"
                )

        thumbnail = result.get("thumbnail")
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@ -1389,6 +1389,7 @@ class CrawlOutWithResources(CrawlOut):

    initialPages: List[PageOut] = []
    pagesQueryUrl: str = ""
+    downloadUrl: Optional[str] = None


 # ============================================================================
@ -1518,6 +1519,7 @@ class CollOut(BaseMongoModel):
    initialPages: List[PageOut] = []
    preloadResources: List[PreloadResource] = []
    pagesQueryUrl: str = ""
+    downloadUrl: Optional[str] = None


 # ============================================================================
--- a/backend/test/test_collections.py
+++ b/backend/test/test_collections.py
@ -418,6 +418,7 @@ def test_get_collection_replay(crawler_auth_headers, default_org_id):
    assert data["pagesQueryUrl"].endswith(
        f"/orgs/{default_org_id}/collections/{_coll_id}/pages"
    )
+    assert data["downloadUrl"] is None
    assert "preloadResources" in data

    resources = data["resources"]
@ -455,6 +456,7 @@ def test_collection_public(crawler_auth_headers, default_org_id):
    assert data["pagesQueryUrl"].endswith(
        f"/orgs/{default_org_id}/collections/{_coll_id}/public/pages"
    )
+    assert data["downloadUrl"] is not None
    assert "preloadResources" in data

    assert r.status_code == 200
--- a/backend/test/test_run_crawl.py
+++ b/backend/test/test_run_crawl.py
@ -188,6 +188,7 @@ def test_wait_for_complete(admin_auth_headers, default_org_id):
    assert data["pagesQueryUrl"].endswith(
        f"/orgs/{default_org_id}/crawls/{admin_crawl_id}/pagesSearch"
    )
+    assert data["downloadUrl"] is None

    # ensure filename matches specified pattern
    # set in default_crawl_filename_template