import requests import os from urllib.parse import urljoin from .conftest import API_PREFIX from .utils import read_in_chunks upload_id = None upload_id_2 = None upload_dl_path = None curr_dir = os.path.dirname(os.path.realpath(__file__)) def test_upload_stream(admin_auth_headers, default_org_id): with open(os.path.join(curr_dir, "data", "example.wacz"), "rb") as fh: r = requests.put( f"{API_PREFIX}/orgs/{default_org_id}/uploads/stream?filename=test.wacz&name=My%20Upload¬es=Testing%0AData", headers=admin_auth_headers, data=read_in_chunks(fh), ) assert r.status_code == 200 assert r.json()["added"] global upload_id upload_id = r.json()["id"] def test_list_stream_upload(admin_auth_headers, default_org_id): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads", headers=admin_auth_headers, ) results = r.json() assert len(results["items"]) > 0 found = None for res in results["items"]: if res["id"] == upload_id: found = res assert found assert found["name"] == "My Upload" assert found["notes"] == "Testing\nData" assert "files" not in found assert "resources" not in found def test_get_stream_upload(admin_auth_headers, default_org_id): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/replay.json", headers=admin_auth_headers, ) assert r.status_code == 200 result = r.json() assert "files" not in result upload_dl_path = result["resources"][0]["path"] assert "test-" in result["resources"][0]["name"] assert result["resources"][0]["name"].endswith(".wacz") dl_path = urljoin(API_PREFIX, upload_dl_path) wacz_resp = requests.get(dl_path) actual = wacz_resp.content with open(os.path.join(curr_dir, "data", "example.wacz"), "rb") as fh: expected = fh.read() assert len(actual) == len(expected) assert actual == expected r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/all-crawls/{upload_id}", headers=admin_auth_headers, ) assert r.status_code == 200 def test_upload_form(admin_auth_headers, default_org_id): with open(os.path.join(curr_dir, "data", "example.wacz"), "rb") as fh: data = fh.read() files = [ ("uploads", ("test.wacz", data, "application/octet-stream")), ("uploads", ("test-2.wacz", data, "application/octet-stream")), ("uploads", ("test.wacz", data, "application/octet-stream")), ] r = requests.put( f"{API_PREFIX}/orgs/{default_org_id}/uploads/formdata?name=test2.wacz", headers=admin_auth_headers, files=files, ) assert r.status_code == 200 assert r.json()["added"] global upload_id_2 upload_id_2 = r.json()["id"] def test_list_uploads(admin_auth_headers, default_org_id): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads", headers=admin_auth_headers, ) results = r.json() assert len(results["items"]) > 1 found = None for res in results["items"]: if res["id"] == upload_id_2: found = res assert found assert found["name"] == "test2.wacz" assert "files" not in res assert "resources" not in res def test_collection_uploads(admin_auth_headers, default_org_id): # Create collection with one upload r = requests.post( f"{API_PREFIX}/orgs/{default_org_id}/collections", headers=admin_auth_headers, json={ "crawlIds": [upload_id], "name": "My Test Coll", }, ) assert r.status_code == 200 data = r.json() coll_id = data["id"] assert data["added"] # Test uploads filtered by collection r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads?collectionId={coll_id}", headers=admin_auth_headers, ) results = r.json() assert len(results["items"]) == 1 assert results["items"][0]["id"] == upload_id # Test all crawls filtered by collection r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?collectionId={coll_id}", headers=admin_auth_headers, ) results = r.json() assert len(results["items"]) == 1 assert results["items"][0]["id"] == upload_id # Delete Collection r = requests.delete( f"{API_PREFIX}/orgs/{default_org_id}/collections/{coll_id}", headers=admin_auth_headers, ) assert r.status_code == 200 assert r.json()["success"] def test_get_upload_replay_json(admin_auth_headers, default_org_id): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/replay.json", headers=admin_auth_headers, ) assert r.status_code == 200 data = r.json() assert data assert data["id"] == upload_id assert data["name"] == "My Upload" assert data["resources"] assert data["resources"][0]["path"] assert data["resources"][0]["size"] assert data["resources"][0]["hash"] assert data["errors"] == None assert "files" not in data def test_get_upload_replay_json_admin(admin_auth_headers, default_org_id): r = requests.get( f"{API_PREFIX}/orgs/all/uploads/{upload_id}/replay.json", headers=admin_auth_headers, ) assert r.status_code == 200 data = r.json() assert data assert data["id"] == upload_id assert data["name"] == "My Upload" assert data["resources"] assert data["resources"][0]["path"] assert data["resources"][0]["size"] assert data["resources"][0]["hash"] assert data["errors"] == None assert "files" not in data def test_replace_upload(admin_auth_headers, default_org_id): actual_id = do_upload_replace(admin_auth_headers, default_org_id, upload_id) assert upload_id == actual_id def do_upload_replace(admin_auth_headers, default_org_id, upload_id): with open(os.path.join(curr_dir, "data", "example-2.wacz"), "rb") as fh: r = requests.put( f"{API_PREFIX}/orgs/{default_org_id}/uploads/stream?filename=test.wacz&name=My%20Upload%20Updated&replaceId={upload_id}", headers=admin_auth_headers, data=read_in_chunks(fh), ) assert r.status_code == 200 assert r.json()["added"] actual_id = r.json()["id"] r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads/{actual_id}/replay.json", headers=admin_auth_headers, ) assert r.status_code == 200 result = r.json() # only one file, previous file removed assert len(result["resources"]) == 1 dl_path = urljoin(API_PREFIX, result["resources"][0]["path"]) wacz_resp = requests.get(dl_path) actual = wacz_resp.content with open(os.path.join(curr_dir, "data", "example-2.wacz"), "rb") as fh: expected = fh.read() assert len(actual) == len(expected) assert actual == expected return actual_id def test_update_upload_metadata(admin_auth_headers, default_org_id): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}", headers=admin_auth_headers, ) assert r.status_code == 200 data = r.json() assert data["name"] == "My Upload Updated" assert not data["tags"] assert not data["notes"] # Submit patch request to update name, tags, and notes UPDATED_NAME = "New Upload Name" UPDATED_TAGS = ["wr-test-1-updated", "wr-test-2-updated"] UPDATED_NOTES = "Lorem ipsum test note." r = requests.patch( f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}", headers=admin_auth_headers, json={"tags": UPDATED_TAGS, "notes": UPDATED_NOTES, "name": UPDATED_NAME}, ) assert r.status_code == 200 data = r.json() assert data["updated"] # Verify update was successful r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}", headers=admin_auth_headers, ) assert r.status_code == 200 data = r.json() assert sorted(data["tags"]) == sorted(UPDATED_TAGS) assert data["notes"] == UPDATED_NOTES assert data["name"] == UPDATED_NAME def test_delete_stream_upload(admin_auth_headers, default_org_id): r = requests.post( f"{API_PREFIX}/orgs/{default_org_id}/uploads/delete", headers=admin_auth_headers, json={"crawl_ids": [upload_id]}, ) assert r.json()["deleted"] == True def test_replace_upload_non_existent(admin_auth_headers, default_org_id): global upload_id # same replacement, but now to a non-existent upload actual_id = do_upload_replace(admin_auth_headers, default_org_id, upload_id) # new upload_id created assert actual_id != upload_id upload_id = actual_id def test_delete_stream_upload_2(admin_auth_headers, default_org_id): r = requests.post( f"{API_PREFIX}/orgs/{default_org_id}/uploads/delete", headers=admin_auth_headers, json={"crawl_ids": [upload_id]}, ) assert r.json()["deleted"] == True def test_verify_from_upload_resource_count(admin_auth_headers, default_org_id): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id_2}/replay.json", headers=admin_auth_headers, ) assert r.status_code == 200 result = r.json() assert "files" not in result assert len(result["resources"]) == 3 r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/all-crawls/{upload_id_2}", headers=admin_auth_headers, ) assert r.status_code == 200 def test_list_all_crawls(admin_auth_headers, default_org_id): """Test that /all-crawls lists crawls and uploads before deleting uploads""" r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/all-crawls", headers=admin_auth_headers, ) assert r.status_code == 200 data = r.json() items = data["items"] assert len(items) == data["total"] crawls = [item for item in items if item["type"] == "crawl"] assert len(crawls) > 0 uploads = [item for item in items if item["type"] == "upload"] assert len(uploads) > 0 for item in items: assert item["type"] in ("crawl", "upload") if item["type"] == "crawl": assert item["firstSeed"] assert item["seedCount"] assert item.get("name") or item.get("name") == "" assert item["id"] assert item["userid"] assert item["oid"] == default_org_id assert item["started"] assert item["finished"] assert item["state"] def test_get_upload_from_all_crawls(admin_auth_headers, default_org_id): """Test that /all-crawls lists crawls and uploads before deleting uploads""" r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/all-crawls/{upload_id_2}", headers=admin_auth_headers, ) assert r.status_code == 200 data = r.json() assert data["name"] == "test2.wacz" assert "files" not in data assert data["resources"] def test_get_upload_replay_json_from_all_crawls(admin_auth_headers, default_org_id): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/all-crawls/{upload_id_2}/replay.json", headers=admin_auth_headers, ) assert r.status_code == 200 data = r.json() assert data assert data["id"] == upload_id_2 assert data["name"] == "test2.wacz" assert data["resources"] assert data["resources"][0]["path"] assert data["resources"][0]["size"] assert data["resources"][0]["hash"] assert data["errors"] == None assert "files" not in data def test_get_upload_replay_json_admin_from_all_crawls( admin_auth_headers, default_org_id ): r = requests.get( f"{API_PREFIX}/orgs/all/all-crawls/{upload_id_2}/replay.json", headers=admin_auth_headers, ) assert r.status_code == 200 data = r.json() assert data assert data["id"] == upload_id_2 assert data["name"] == "test2.wacz" assert data["resources"] assert data["resources"][0]["path"] assert data["resources"][0]["size"] assert data["resources"][0]["hash"] assert data["errors"] == None assert "files" not in data def test_delete_form_upload_from_all_crawls(admin_auth_headers, default_org_id): r = requests.post( f"{API_PREFIX}/orgs/{default_org_id}/all-crawls/delete", headers=admin_auth_headers, json={"crawl_ids": [upload_id_2]}, ) assert r.json()["deleted"] == True def test_ensure_deleted(admin_auth_headers, default_org_id): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/uploads", headers=admin_auth_headers, ) results = r.json() for res in results["items"]: if res["id"] in (upload_id_2, upload_id): assert False