diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 6a66bbb9..35263499 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -437,6 +437,7 @@ class UploadedCrawl(BaseCrawl): type: str = Field("upload", const=True) name: str + tags: Optional[List[str]] = [] # ============================================================================ @@ -777,7 +778,7 @@ class ProfileCreateUpdate(BaseModel): # ============================================================================ -### PROFILES ### +### USERS ### # ============================================================================ diff --git a/backend/btrixcloud/uploads.py b/backend/btrixcloud/uploads.py index b5058e2a..090c2f47 100644 --- a/backend/btrixcloud/uploads.py +++ b/backend/btrixcloud/uploads.py @@ -4,6 +4,7 @@ import uuid import hashlib import os import base64 +from urllib.parse import unquote from io import BufferedReader from typing import Optional, List @@ -46,6 +47,8 @@ class UploadOps(BaseCrawlOps): filename: str, name: Optional[str], notes: Optional[str], + collections: Optional[List[UUID4]], + tags: Optional[List[str]], org: Organization, user: User, replaceId: Optional[str], @@ -92,7 +95,9 @@ class UploadOps(BaseCrawlOps): except Exception as exc: print("replace file deletion failed", exc) - return await self._create_upload(files, name, notes, id_, org, user) + return await self._create_upload( + files, name, notes, collections, tags, id_, org, user + ) # pylint: disable=too-many-arguments, too-many-locals async def upload_formdata( @@ -100,6 +105,8 @@ class UploadOps(BaseCrawlOps): uploads: List[UploadFile], name: Optional[str], notes: Optional[str], + collections: Optional[List[UUID4]], + tags: Optional[List[str]], org: Organization, user: User, ): @@ -117,9 +124,13 @@ class UploadOps(BaseCrawlOps): ) files.append(file_reader.file_prep.get_crawl_file()) - return await self._create_upload(files, name, notes, id_, org, user) + return await self._create_upload( + files, name, notes, collections, tags, id_, org, user + ) - async def _create_upload(self, files, name, notes, id_, org, user): + async def _create_upload( + self, files, name, notes, collections, tags, id_, org, user + ): now = dt_now() # ts_now = now.strftime("%Y%m%d%H%M%S") # crawl_id = f"upload-{ts_now}-{str(id_)[:12]}" @@ -127,10 +138,16 @@ class UploadOps(BaseCrawlOps): file_size = sum(file_.size for file_ in files) + collection_uuids = [] + for coll in collections: + collection_uuids.append(uuid.UUID(coll)) + uploaded = UploadedCrawl( id=crawl_id, name=name or "New Upload @ " + str(now), notes=notes, + collections=collection_uuids, + tags=tags, userid=user.id, oid=org.id, files=files, @@ -224,10 +241,24 @@ def init_uploads_api(app, mdb, users, crawl_manager, crawl_configs, orgs, user_d uploads: List[UploadFile] = File(...), name: Optional[str] = "", notes: Optional[str] = "", + collections: Optional[str] = "", + tags: Optional[str] = "", org: Organization = Depends(org_crawl_dep), user: User = Depends(user_dep), ): - return await ops.upload_formdata(uploads, name, notes, org, user) + name = unquote(name) + notes = unquote(notes) + colls_list = [] + if collections: + colls_list = unquote(collections).split(",") + + tags_list = [] + if tags: + tags_list = unquote(tags).split(",") + + return await ops.upload_formdata( + uploads, name, notes, colls_list, tags_list, org, user + ) @app.put("/orgs/{oid}/uploads/stream", tags=["uploads"]) async def upload_stream( @@ -235,12 +266,32 @@ def init_uploads_api(app, mdb, users, crawl_manager, crawl_configs, orgs, user_d filename: str, name: Optional[str] = "", notes: Optional[str] = "", + collections: Optional[str] = "", + tags: Optional[str] = "", replaceId: Optional[str] = "", org: Organization = Depends(org_crawl_dep), user: User = Depends(user_dep), ): + name = unquote(name) + notes = unquote(notes) + colls_list = [] + if collections: + colls_list = unquote(collections).split(",") + + tags_list = [] + if tags: + tags_list = unquote(tags).split(",") + return await ops.upload_stream( - request.stream(), filename, name, notes, org, user, replaceId + request.stream(), + filename, + name, + notes, + colls_list, + tags_list, + org, + user, + replaceId, ) @app.get("/orgs/{oid}/uploads", tags=["uploads"], response_model=PaginatedResponse) diff --git a/backend/test/conftest.py b/backend/test/conftest.py index ce9acf8f..25b75d9e 100644 --- a/backend/test/conftest.py +++ b/backend/test/conftest.py @@ -329,3 +329,14 @@ def auto_add_crawl_id(crawler_auth_headers, default_org_id, auto_add_collection_ @pytest.fixture(scope="session") def auto_add_config_id(auto_add_crawl_id): return _auto_add_config_id + + +@pytest.fixture(scope="session") +def uploads_collection_id(crawler_auth_headers, default_org_id): + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/collections", + headers=crawler_auth_headers, + json={"name": "Upload test collection"}, + ) + assert r.status_code == 200 + return r.json()["id"] diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py index b486e077..95a671c5 100644 --- a/backend/test/test_uploads.py +++ b/backend/test/test_uploads.py @@ -13,10 +13,10 @@ upload_dl_path = None curr_dir = os.path.dirname(os.path.realpath(__file__)) -def test_upload_stream(admin_auth_headers, default_org_id): +def test_upload_stream(admin_auth_headers, default_org_id, uploads_collection_id): with open(os.path.join(curr_dir, "data", "example.wacz"), "rb") as fh: r = requests.put( - f"{API_PREFIX}/orgs/{default_org_id}/uploads/stream?filename=test.wacz&name=My%20Upload¬es=Testing%0AData", + f"{API_PREFIX}/orgs/{default_org_id}/uploads/stream?filename=test.wacz&name=My%20Upload¬es=Testing%0AData&collections={uploads_collection_id}&tags=one%2Ctwo", headers=admin_auth_headers, data=read_in_chunks(fh), ) @@ -28,7 +28,7 @@ def test_upload_stream(admin_auth_headers, default_org_id): upload_id = r.json()["id"] -def test_list_stream_upload(admin_auth_headers, default_org_id): +def test_list_stream_upload(admin_auth_headers, default_org_id, uploads_collection_id): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads", headers=admin_auth_headers, @@ -46,17 +46,20 @@ def test_list_stream_upload(admin_auth_headers, default_org_id): assert found assert found["name"] == "My Upload" assert found["notes"] == "Testing\nData" + assert found["collections"] == [uploads_collection_id] + assert sorted(found["tags"]) == ["one", "two"] assert "files" not in found assert "resources" not in found -def test_get_stream_upload(admin_auth_headers, default_org_id): +def test_get_stream_upload(admin_auth_headers, default_org_id, uploads_collection_id): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/replay.json", headers=admin_auth_headers, ) assert r.status_code == 200 result = r.json() + assert uploads_collection_id in result["collections"] assert "files" not in result upload_dl_path = result["resources"][0]["path"] assert "test-" in result["resources"][0]["name"] @@ -79,7 +82,7 @@ def test_get_stream_upload(admin_auth_headers, default_org_id): assert r.status_code == 200 -def test_upload_form(admin_auth_headers, default_org_id): +def test_upload_form(admin_auth_headers, default_org_id, uploads_collection_id): with open(os.path.join(curr_dir, "data", "example.wacz"), "rb") as fh: data = fh.read() @@ -90,7 +93,7 @@ def test_upload_form(admin_auth_headers, default_org_id): ] r = requests.put( - f"{API_PREFIX}/orgs/{default_org_id}/uploads/formdata?name=test2.wacz", + f"{API_PREFIX}/orgs/{default_org_id}/uploads/formdata?name=test2.wacz&collections={uploads_collection_id}&tags=three%2Cfour", headers=admin_auth_headers, files=files, ) @@ -102,7 +105,7 @@ def test_upload_form(admin_auth_headers, default_org_id): upload_id_2 = r.json()["id"] -def test_list_uploads(admin_auth_headers, default_org_id): +def test_list_uploads(admin_auth_headers, default_org_id, uploads_collection_id): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads", headers=admin_auth_headers, @@ -119,58 +122,42 @@ def test_list_uploads(admin_auth_headers, default_org_id): assert found assert found["name"] == "test2.wacz" + assert found["collections"] == [uploads_collection_id] + assert sorted(found["tags"]) == ["four", "three"] assert "files" not in res assert "resources" not in res -def test_collection_uploads(admin_auth_headers, default_org_id): - # Create collection with one upload - r = requests.post( - f"{API_PREFIX}/orgs/{default_org_id}/collections", - headers=admin_auth_headers, - json={ - "crawlIds": [upload_id], - "name": "My Test Coll", - }, - ) - assert r.status_code == 200 - data = r.json() - coll_id = data["id"] - assert data["added"] - +def test_collection_uploads(admin_auth_headers, default_org_id, uploads_collection_id): # Test uploads filtered by collection r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/uploads?collectionId={coll_id}", + f"{API_PREFIX}/orgs/{default_org_id}/uploads?collectionId={uploads_collection_id}", headers=admin_auth_headers, ) results = r.json() - assert len(results["items"]) == 1 - assert results["items"][0]["id"] == upload_id + assert len(results["items"]) == 2 + assert results["items"][0]["id"] in (upload_id, upload_id_2) + assert results["items"][1]["id"] in (upload_id, upload_id_2) # Test all crawls filtered by collection r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?collectionId={coll_id}", + f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?collectionId={uploads_collection_id}", headers=admin_auth_headers, ) results = r.json() - assert len(results["items"]) == 1 - assert results["items"][0]["id"] == upload_id - - # Delete Collection - r = requests.delete( - f"{API_PREFIX}/orgs/{default_org_id}/collections/{coll_id}", - headers=admin_auth_headers, - ) - assert r.status_code == 200 - assert r.json()["success"] + assert len(results["items"]) == 2 + assert results["items"][0]["id"] in (upload_id, upload_id_2) + assert results["items"][1]["id"] in (upload_id, upload_id_2) -def test_get_upload_replay_json(admin_auth_headers, default_org_id): +def test_get_upload_replay_json( + admin_auth_headers, default_org_id, uploads_collection_id +): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/replay.json", headers=admin_auth_headers, @@ -181,6 +168,8 @@ def test_get_upload_replay_json(admin_auth_headers, default_org_id): assert data assert data["id"] == upload_id assert data["name"] == "My Upload" + assert data["collections"] == [uploads_collection_id] + assert sorted(data["tags"]) == ["one", "two"] assert data["resources"] assert data["resources"][0]["path"] assert data["resources"][0]["size"] @@ -189,7 +178,9 @@ def test_get_upload_replay_json(admin_auth_headers, default_org_id): assert "files" not in data -def test_get_upload_replay_json_admin(admin_auth_headers, default_org_id): +def test_get_upload_replay_json_admin( + admin_auth_headers, default_org_id, uploads_collection_id +): r = requests.get( f"{API_PREFIX}/orgs/all/uploads/{upload_id}/replay.json", headers=admin_auth_headers, @@ -200,6 +191,8 @@ def test_get_upload_replay_json_admin(admin_auth_headers, default_org_id): assert data assert data["id"] == upload_id assert data["name"] == "My Upload" + assert data["collections"] == [uploads_collection_id] + assert sorted(data["tags"]) == ["one", "two"] assert data["resources"] assert data["resources"][0]["path"] assert data["resources"][0]["size"] @@ -208,16 +201,20 @@ def test_get_upload_replay_json_admin(admin_auth_headers, default_org_id): assert "files" not in data -def test_replace_upload(admin_auth_headers, default_org_id): - actual_id = do_upload_replace(admin_auth_headers, default_org_id, upload_id) +def test_replace_upload(admin_auth_headers, default_org_id, uploads_collection_id): + actual_id = do_upload_replace( + admin_auth_headers, default_org_id, upload_id, uploads_collection_id + ) assert upload_id == actual_id -def do_upload_replace(admin_auth_headers, default_org_id, upload_id): +def do_upload_replace( + admin_auth_headers, default_org_id, upload_id, uploads_collection_id +): with open(os.path.join(curr_dir, "data", "example-2.wacz"), "rb") as fh: r = requests.put( - f"{API_PREFIX}/orgs/{default_org_id}/uploads/stream?filename=test.wacz&name=My%20Upload%20Updated&replaceId={upload_id}", + f"{API_PREFIX}/orgs/{default_org_id}/uploads/stream?filename=test.wacz&name=My%20Upload%20Updated&replaceId={upload_id}&collections={uploads_collection_id}", headers=admin_auth_headers, data=read_in_chunks(fh), ) @@ -294,11 +291,27 @@ def test_delete_stream_upload(admin_auth_headers, default_org_id): assert r.json()["deleted"] == True -def test_replace_upload_non_existent(admin_auth_headers, default_org_id): +def test_ensure_deleted(admin_auth_headers, default_org_id): + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/uploads", + headers=admin_auth_headers, + ) + results = r.json() + + for res in results["items"]: + if res["id"] == upload_id: + assert False + + +def test_replace_upload_non_existent( + admin_auth_headers, default_org_id, uploads_collection_id +): global upload_id # same replacement, but now to a non-existent upload - actual_id = do_upload_replace(admin_auth_headers, default_org_id, upload_id) + actual_id = do_upload_replace( + admin_auth_headers, default_org_id, upload_id, uploads_collection_id + ) # new upload_id created assert actual_id != upload_id @@ -306,15 +319,6 @@ def test_replace_upload_non_existent(admin_auth_headers, default_org_id): upload_id = actual_id -def test_delete_stream_upload_2(admin_auth_headers, default_org_id): - r = requests.post( - f"{API_PREFIX}/orgs/{default_org_id}/uploads/delete", - headers=admin_auth_headers, - json={"crawl_ids": [upload_id]}, - ) - assert r.json()["deleted"] == True - - def test_verify_from_upload_resource_count(admin_auth_headers, default_org_id): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id_2}/replay.json", @@ -429,15 +433,3 @@ def test_delete_form_upload_from_all_crawls(admin_auth_headers, default_org_id): json={"crawl_ids": [upload_id_2]}, ) assert r.json()["deleted"] == True - - -def test_ensure_deleted(admin_auth_headers, default_org_id): - r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/uploads", - headers=admin_auth_headers, - ) - results = r.json() - - for res in results["items"]: - if res["id"] in (upload_id_2, upload_id): - assert False