diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index c6fb2327..7077462c 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -169,11 +169,14 @@ class BaseCrawlOps: oid = res.get("oid") if oid: + origin = get_origin(headers) res["pagesQueryUrl"] = ( - get_origin(headers) - + f"/api/orgs/{oid}/crawls/{crawlid}/pagesSearch" + origin + f"/api/orgs/{oid}/crawls/{crawlid}/pagesSearch" ) + # this will now disable the downloadUrl in RWP + res["downloadUrl"] = None + crawl = CrawlOutWithResources.from_dict(res) if not skip_resources: diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index 9ac75922..e74e5b6a 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -352,11 +352,21 @@ class CollectionOps: public = "public/" if public_or_unlisted_only else "" + origin = get_origin(headers) + + if public_or_unlisted_only: + slug = result.get("slug") + result["downloadUrl"] = ( + origin + f"/api/{public}orgs/{org.slug}/collections/{slug}/download" + ) + else: + # disable download link, as not public without auth + result["downloadUrl"] = None + if pages_optimized: result["initialPages"] = initial_pages result["pagesQueryUrl"] = ( - get_origin(headers) - + f"/api/orgs/{org.id}/collections/{coll_id}/{public}pages" + origin + f"/api/orgs/{org.id}/collections/{coll_id}/{public}pages" ) thumbnail = result.get("thumbnail") diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 241dfd2c..d2d9df81 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -1389,6 +1389,7 @@ class CrawlOutWithResources(CrawlOut): initialPages: List[PageOut] = [] pagesQueryUrl: str = "" + downloadUrl: Optional[str] = None # ============================================================================ @@ -1518,6 +1519,7 @@ class CollOut(BaseMongoModel): initialPages: List[PageOut] = [] preloadResources: List[PreloadResource] = [] pagesQueryUrl: str = "" + downloadUrl: Optional[str] = None # ============================================================================ diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py index 47a72ec2..6eb7c459 100644 --- a/backend/test/test_collections.py +++ b/backend/test/test_collections.py @@ -418,6 +418,7 @@ def test_get_collection_replay(crawler_auth_headers, default_org_id): assert data["pagesQueryUrl"].endswith( f"/orgs/{default_org_id}/collections/{_coll_id}/pages" ) + assert data["downloadUrl"] is None assert "preloadResources" in data resources = data["resources"] @@ -455,6 +456,7 @@ def test_collection_public(crawler_auth_headers, default_org_id): assert data["pagesQueryUrl"].endswith( f"/orgs/{default_org_id}/collections/{_coll_id}/public/pages" ) + assert data["downloadUrl"] is not None assert "preloadResources" in data assert r.status_code == 200 diff --git a/backend/test/test_run_crawl.py b/backend/test/test_run_crawl.py index ef383571..ba95415f 100644 --- a/backend/test/test_run_crawl.py +++ b/backend/test/test_run_crawl.py @@ -188,6 +188,7 @@ def test_wait_for_complete(admin_auth_headers, default_org_id): assert data["pagesQueryUrl"].endswith( f"/orgs/{default_org_id}/crawls/{admin_crawl_id}/pagesSearch" ) + assert data["downloadUrl"] is None # ensure filename matches specified pattern # set in default_crawl_filename_template