replay api: add downloadUrl to replay endpoints to be used by RWP (#2456)

RWP (2.3.3+) can determine if the 'Download Archive' menu item should be
showed based on the value of downloadUrl.
If set to 'null', will hide the menu item:
- set downloadUrl to public collection download for public collections
replay
- set downloadUrl to null for private collection and crawl replay to
hide the download menu item in RWP (otherwise have to add the
auth_header query with bearer token and should assess security before
doing that..)

---------

Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
Ilya Kreymer 2025-03-03 14:11:28 -08:00 committed by GitHub
parent 65a40c4816
commit afa892000b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 22 additions and 4 deletions

View File

@ -169,11 +169,14 @@ class BaseCrawlOps:
oid = res.get("oid") oid = res.get("oid")
if oid: if oid:
origin = get_origin(headers)
res["pagesQueryUrl"] = ( res["pagesQueryUrl"] = (
get_origin(headers) origin + f"/api/orgs/{oid}/crawls/{crawlid}/pagesSearch"
+ f"/api/orgs/{oid}/crawls/{crawlid}/pagesSearch"
) )
# this will now disable the downloadUrl in RWP
res["downloadUrl"] = None
crawl = CrawlOutWithResources.from_dict(res) crawl = CrawlOutWithResources.from_dict(res)
if not skip_resources: if not skip_resources:

View File

@ -352,11 +352,21 @@ class CollectionOps:
public = "public/" if public_or_unlisted_only else "" public = "public/" if public_or_unlisted_only else ""
origin = get_origin(headers)
if public_or_unlisted_only:
slug = result.get("slug")
result["downloadUrl"] = (
origin + f"/api/{public}orgs/{org.slug}/collections/{slug}/download"
)
else:
# disable download link, as not public without auth
result["downloadUrl"] = None
if pages_optimized: if pages_optimized:
result["initialPages"] = initial_pages result["initialPages"] = initial_pages
result["pagesQueryUrl"] = ( result["pagesQueryUrl"] = (
get_origin(headers) origin + f"/api/orgs/{org.id}/collections/{coll_id}/{public}pages"
+ f"/api/orgs/{org.id}/collections/{coll_id}/{public}pages"
) )
thumbnail = result.get("thumbnail") thumbnail = result.get("thumbnail")

View File

@ -1389,6 +1389,7 @@ class CrawlOutWithResources(CrawlOut):
initialPages: List[PageOut] = [] initialPages: List[PageOut] = []
pagesQueryUrl: str = "" pagesQueryUrl: str = ""
downloadUrl: Optional[str] = None
# ============================================================================ # ============================================================================
@ -1518,6 +1519,7 @@ class CollOut(BaseMongoModel):
initialPages: List[PageOut] = [] initialPages: List[PageOut] = []
preloadResources: List[PreloadResource] = [] preloadResources: List[PreloadResource] = []
pagesQueryUrl: str = "" pagesQueryUrl: str = ""
downloadUrl: Optional[str] = None
# ============================================================================ # ============================================================================

View File

@ -418,6 +418,7 @@ def test_get_collection_replay(crawler_auth_headers, default_org_id):
assert data["pagesQueryUrl"].endswith( assert data["pagesQueryUrl"].endswith(
f"/orgs/{default_org_id}/collections/{_coll_id}/pages" f"/orgs/{default_org_id}/collections/{_coll_id}/pages"
) )
assert data["downloadUrl"] is None
assert "preloadResources" in data assert "preloadResources" in data
resources = data["resources"] resources = data["resources"]
@ -455,6 +456,7 @@ def test_collection_public(crawler_auth_headers, default_org_id):
assert data["pagesQueryUrl"].endswith( assert data["pagesQueryUrl"].endswith(
f"/orgs/{default_org_id}/collections/{_coll_id}/public/pages" f"/orgs/{default_org_id}/collections/{_coll_id}/public/pages"
) )
assert data["downloadUrl"] is not None
assert "preloadResources" in data assert "preloadResources" in data
assert r.status_code == 200 assert r.status_code == 200

View File

@ -188,6 +188,7 @@ def test_wait_for_complete(admin_auth_headers, default_org_id):
assert data["pagesQueryUrl"].endswith( assert data["pagesQueryUrl"].endswith(
f"/orgs/{default_org_id}/crawls/{admin_crawl_id}/pagesSearch" f"/orgs/{default_org_id}/crawls/{admin_crawl_id}/pagesSearch"
) )
assert data["downloadUrl"] is None
# ensure filename matches specified pattern # ensure filename matches specified pattern
# set in default_crawl_filename_template # set in default_crawl_filename_template