diff --git a/backend/btrixcloud/pages.py b/backend/btrixcloud/pages.py index d9e50174..577077b9 100644 --- a/backend/btrixcloud/pages.py +++ b/backend/btrixcloud/pages.py @@ -92,7 +92,7 @@ class PageOps: if not page_dict.get("url"): continue - if not page_dict.get("isSeed"): + if not page_dict.get("isSeed") and not page_dict.get("seed"): page_dict["isSeed"] = False if len(pages_buffer) > batch_size: diff --git a/backend/btrixcloud/storages.py b/backend/btrixcloud/storages.py index d0349748..620c4b29 100644 --- a/backend/btrixcloud/storages.py +++ b/backend/btrixcloud/storages.py @@ -607,7 +607,9 @@ class StorageOps: # pylint: disable=too-many-function-args def stream_page_lines( - pagefile_zipinfo: ZipInfo, wacz_url: str, wacz_filename: str + pagefile_zipinfo: ZipInfo, + wacz_url: str, + wacz_filename: str, ) -> Iterator[Dict[Any, Any]]: """Pass lines as json objects""" filename = pagefile_zipinfo.filename @@ -621,6 +623,8 @@ class StorageOps: for line in line_iter: page_json = _parse_json(line.decode("utf-8", errors="ignore")) page_json["filename"] = os.path.basename(wacz_filename) + if filename == "pages/pages.jsonl": + page_json["seed"] = True yield page_json page_generators: List[Iterator[Dict[Any, Any]]] = [] @@ -637,7 +641,11 @@ class StorageOps: ] for pagefile_zipinfo in page_files: page_generators.append( - stream_page_lines(pagefile_zipinfo, wacz_url, wacz_file.name) + stream_page_lines( + pagefile_zipinfo, + wacz_url, + wacz_file.name, + ) ) return chain.from_iterable(page_generators) diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py index 56e5c1d9..6c4b18df 100644 --- a/backend/test/test_uploads.py +++ b/backend/test/test_uploads.py @@ -254,6 +254,7 @@ def test_get_upload_pages(admin_auth_headers, default_org_id, upload_id): assert page["ts"] assert page["filename"] assert page.get("title") or page.get("title") is None + assert page["isSeed"] page_id = pages[0]["id"] r = requests.get( @@ -270,6 +271,7 @@ def test_get_upload_pages(admin_auth_headers, default_org_id, upload_id): assert page["ts"] assert page["filename"] assert page.get("title") or page.get("title") is None + assert page["isSeed"] assert page["notes"] == [] assert page.get("userid") is None