replay api: add downloadUrl to replay endpoints to be used by RWP (#2456)

RWP (2.3.3+) can determine if the 'Download Archive' menu item should be
showed based on the value of downloadUrl.
If set to 'null', will hide the menu item:
- set downloadUrl to public collection download for public collections
replay
- set downloadUrl to null for private collection and crawl replay to
hide the download menu item in RWP (otherwise have to add the
auth_header query with bearer token and should assess security before
doing that..)

---------

Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
Ilya Kreymer 2025-03-03 14:11:28 -08:00 committed by GitHub
parent 65a40c4816
commit afa892000b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 22 additions and 4 deletions

View File

@ -169,11 +169,14 @@ class BaseCrawlOps:
oid = res.get("oid")
if oid:
origin = get_origin(headers)
res["pagesQueryUrl"] = (
get_origin(headers)
+ f"/api/orgs/{oid}/crawls/{crawlid}/pagesSearch"
origin + f"/api/orgs/{oid}/crawls/{crawlid}/pagesSearch"
)
# this will now disable the downloadUrl in RWP
res["downloadUrl"] = None
crawl = CrawlOutWithResources.from_dict(res)
if not skip_resources:

View File

@ -352,11 +352,21 @@ class CollectionOps:
public = "public/" if public_or_unlisted_only else ""
origin = get_origin(headers)
if public_or_unlisted_only:
slug = result.get("slug")
result["downloadUrl"] = (
origin + f"/api/{public}orgs/{org.slug}/collections/{slug}/download"
)
else:
# disable download link, as not public without auth
result["downloadUrl"] = None
if pages_optimized:
result["initialPages"] = initial_pages
result["pagesQueryUrl"] = (
get_origin(headers)
+ f"/api/orgs/{org.id}/collections/{coll_id}/{public}pages"
origin + f"/api/orgs/{org.id}/collections/{coll_id}/{public}pages"
)
thumbnail = result.get("thumbnail")

View File

@ -1389,6 +1389,7 @@ class CrawlOutWithResources(CrawlOut):
initialPages: List[PageOut] = []
pagesQueryUrl: str = ""
downloadUrl: Optional[str] = None
# ============================================================================
@ -1518,6 +1519,7 @@ class CollOut(BaseMongoModel):
initialPages: List[PageOut] = []
preloadResources: List[PreloadResource] = []
pagesQueryUrl: str = ""
downloadUrl: Optional[str] = None
# ============================================================================

View File

@ -418,6 +418,7 @@ def test_get_collection_replay(crawler_auth_headers, default_org_id):
assert data["pagesQueryUrl"].endswith(
f"/orgs/{default_org_id}/collections/{_coll_id}/pages"
)
assert data["downloadUrl"] is None
assert "preloadResources" in data
resources = data["resources"]
@ -455,6 +456,7 @@ def test_collection_public(crawler_auth_headers, default_org_id):
assert data["pagesQueryUrl"].endswith(
f"/orgs/{default_org_id}/collections/{_coll_id}/public/pages"
)
assert data["downloadUrl"] is not None
assert "preloadResources" in data
assert r.status_code == 200

View File

@ -188,6 +188,7 @@ def test_wait_for_complete(admin_auth_headers, default_org_id):
assert data["pagesQueryUrl"].endswith(
f"/orgs/{default_org_id}/crawls/{admin_crawl_id}/pagesSearch"
)
assert data["downloadUrl"] is None
# ensure filename matches specified pattern
# set in default_crawl_filename_template