Add collections and tags to upload API endpoints (#993)
* Add collections and tags to uploads * Fix order of deletion check test * Re-add tags to UploadedCrawl model after rebase * Fix Users model heading
This commit is contained in:
parent
4014d98243
commit
9f32aa697b
@ -437,6 +437,7 @@ class UploadedCrawl(BaseCrawl):
|
||||
type: str = Field("upload", const=True)
|
||||
|
||||
name: str
|
||||
tags: Optional[List[str]] = []
|
||||
|
||||
|
||||
# ============================================================================
|
||||
@ -777,7 +778,7 @@ class ProfileCreateUpdate(BaseModel):
|
||||
|
||||
# ============================================================================
|
||||
|
||||
### PROFILES ###
|
||||
### USERS ###
|
||||
|
||||
|
||||
# ============================================================================
|
||||
|
@ -4,6 +4,7 @@ import uuid
|
||||
import hashlib
|
||||
import os
|
||||
import base64
|
||||
from urllib.parse import unquote
|
||||
|
||||
from io import BufferedReader
|
||||
from typing import Optional, List
|
||||
@ -46,6 +47,8 @@ class UploadOps(BaseCrawlOps):
|
||||
filename: str,
|
||||
name: Optional[str],
|
||||
notes: Optional[str],
|
||||
collections: Optional[List[UUID4]],
|
||||
tags: Optional[List[str]],
|
||||
org: Organization,
|
||||
user: User,
|
||||
replaceId: Optional[str],
|
||||
@ -92,7 +95,9 @@ class UploadOps(BaseCrawlOps):
|
||||
except Exception as exc:
|
||||
print("replace file deletion failed", exc)
|
||||
|
||||
return await self._create_upload(files, name, notes, id_, org, user)
|
||||
return await self._create_upload(
|
||||
files, name, notes, collections, tags, id_, org, user
|
||||
)
|
||||
|
||||
# pylint: disable=too-many-arguments, too-many-locals
|
||||
async def upload_formdata(
|
||||
@ -100,6 +105,8 @@ class UploadOps(BaseCrawlOps):
|
||||
uploads: List[UploadFile],
|
||||
name: Optional[str],
|
||||
notes: Optional[str],
|
||||
collections: Optional[List[UUID4]],
|
||||
tags: Optional[List[str]],
|
||||
org: Organization,
|
||||
user: User,
|
||||
):
|
||||
@ -117,9 +124,13 @@ class UploadOps(BaseCrawlOps):
|
||||
)
|
||||
files.append(file_reader.file_prep.get_crawl_file())
|
||||
|
||||
return await self._create_upload(files, name, notes, id_, org, user)
|
||||
return await self._create_upload(
|
||||
files, name, notes, collections, tags, id_, org, user
|
||||
)
|
||||
|
||||
async def _create_upload(self, files, name, notes, id_, org, user):
|
||||
async def _create_upload(
|
||||
self, files, name, notes, collections, tags, id_, org, user
|
||||
):
|
||||
now = dt_now()
|
||||
# ts_now = now.strftime("%Y%m%d%H%M%S")
|
||||
# crawl_id = f"upload-{ts_now}-{str(id_)[:12]}"
|
||||
@ -127,10 +138,16 @@ class UploadOps(BaseCrawlOps):
|
||||
|
||||
file_size = sum(file_.size for file_ in files)
|
||||
|
||||
collection_uuids = []
|
||||
for coll in collections:
|
||||
collection_uuids.append(uuid.UUID(coll))
|
||||
|
||||
uploaded = UploadedCrawl(
|
||||
id=crawl_id,
|
||||
name=name or "New Upload @ " + str(now),
|
||||
notes=notes,
|
||||
collections=collection_uuids,
|
||||
tags=tags,
|
||||
userid=user.id,
|
||||
oid=org.id,
|
||||
files=files,
|
||||
@ -224,10 +241,24 @@ def init_uploads_api(app, mdb, users, crawl_manager, crawl_configs, orgs, user_d
|
||||
uploads: List[UploadFile] = File(...),
|
||||
name: Optional[str] = "",
|
||||
notes: Optional[str] = "",
|
||||
collections: Optional[str] = "",
|
||||
tags: Optional[str] = "",
|
||||
org: Organization = Depends(org_crawl_dep),
|
||||
user: User = Depends(user_dep),
|
||||
):
|
||||
return await ops.upload_formdata(uploads, name, notes, org, user)
|
||||
name = unquote(name)
|
||||
notes = unquote(notes)
|
||||
colls_list = []
|
||||
if collections:
|
||||
colls_list = unquote(collections).split(",")
|
||||
|
||||
tags_list = []
|
||||
if tags:
|
||||
tags_list = unquote(tags).split(",")
|
||||
|
||||
return await ops.upload_formdata(
|
||||
uploads, name, notes, colls_list, tags_list, org, user
|
||||
)
|
||||
|
||||
@app.put("/orgs/{oid}/uploads/stream", tags=["uploads"])
|
||||
async def upload_stream(
|
||||
@ -235,12 +266,32 @@ def init_uploads_api(app, mdb, users, crawl_manager, crawl_configs, orgs, user_d
|
||||
filename: str,
|
||||
name: Optional[str] = "",
|
||||
notes: Optional[str] = "",
|
||||
collections: Optional[str] = "",
|
||||
tags: Optional[str] = "",
|
||||
replaceId: Optional[str] = "",
|
||||
org: Organization = Depends(org_crawl_dep),
|
||||
user: User = Depends(user_dep),
|
||||
):
|
||||
name = unquote(name)
|
||||
notes = unquote(notes)
|
||||
colls_list = []
|
||||
if collections:
|
||||
colls_list = unquote(collections).split(",")
|
||||
|
||||
tags_list = []
|
||||
if tags:
|
||||
tags_list = unquote(tags).split(",")
|
||||
|
||||
return await ops.upload_stream(
|
||||
request.stream(), filename, name, notes, org, user, replaceId
|
||||
request.stream(),
|
||||
filename,
|
||||
name,
|
||||
notes,
|
||||
colls_list,
|
||||
tags_list,
|
||||
org,
|
||||
user,
|
||||
replaceId,
|
||||
)
|
||||
|
||||
@app.get("/orgs/{oid}/uploads", tags=["uploads"], response_model=PaginatedResponse)
|
||||
|
@ -329,3 +329,14 @@ def auto_add_crawl_id(crawler_auth_headers, default_org_id, auto_add_collection_
|
||||
@pytest.fixture(scope="session")
|
||||
def auto_add_config_id(auto_add_crawl_id):
|
||||
return _auto_add_config_id
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def uploads_collection_id(crawler_auth_headers, default_org_id):
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/collections",
|
||||
headers=crawler_auth_headers,
|
||||
json={"name": "Upload test collection"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
return r.json()["id"]
|
||||
|
@ -13,10 +13,10 @@ upload_dl_path = None
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
|
||||
def test_upload_stream(admin_auth_headers, default_org_id):
|
||||
def test_upload_stream(admin_auth_headers, default_org_id, uploads_collection_id):
|
||||
with open(os.path.join(curr_dir, "data", "example.wacz"), "rb") as fh:
|
||||
r = requests.put(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads/stream?filename=test.wacz&name=My%20Upload¬es=Testing%0AData",
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads/stream?filename=test.wacz&name=My%20Upload¬es=Testing%0AData&collections={uploads_collection_id}&tags=one%2Ctwo",
|
||||
headers=admin_auth_headers,
|
||||
data=read_in_chunks(fh),
|
||||
)
|
||||
@ -28,7 +28,7 @@ def test_upload_stream(admin_auth_headers, default_org_id):
|
||||
upload_id = r.json()["id"]
|
||||
|
||||
|
||||
def test_list_stream_upload(admin_auth_headers, default_org_id):
|
||||
def test_list_stream_upload(admin_auth_headers, default_org_id, uploads_collection_id):
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads",
|
||||
headers=admin_auth_headers,
|
||||
@ -46,17 +46,20 @@ def test_list_stream_upload(admin_auth_headers, default_org_id):
|
||||
assert found
|
||||
assert found["name"] == "My Upload"
|
||||
assert found["notes"] == "Testing\nData"
|
||||
assert found["collections"] == [uploads_collection_id]
|
||||
assert sorted(found["tags"]) == ["one", "two"]
|
||||
assert "files" not in found
|
||||
assert "resources" not in found
|
||||
|
||||
|
||||
def test_get_stream_upload(admin_auth_headers, default_org_id):
|
||||
def test_get_stream_upload(admin_auth_headers, default_org_id, uploads_collection_id):
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/replay.json",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
result = r.json()
|
||||
assert uploads_collection_id in result["collections"]
|
||||
assert "files" not in result
|
||||
upload_dl_path = result["resources"][0]["path"]
|
||||
assert "test-" in result["resources"][0]["name"]
|
||||
@ -79,7 +82,7 @@ def test_get_stream_upload(admin_auth_headers, default_org_id):
|
||||
assert r.status_code == 200
|
||||
|
||||
|
||||
def test_upload_form(admin_auth_headers, default_org_id):
|
||||
def test_upload_form(admin_auth_headers, default_org_id, uploads_collection_id):
|
||||
with open(os.path.join(curr_dir, "data", "example.wacz"), "rb") as fh:
|
||||
data = fh.read()
|
||||
|
||||
@ -90,7 +93,7 @@ def test_upload_form(admin_auth_headers, default_org_id):
|
||||
]
|
||||
|
||||
r = requests.put(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads/formdata?name=test2.wacz",
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads/formdata?name=test2.wacz&collections={uploads_collection_id}&tags=three%2Cfour",
|
||||
headers=admin_auth_headers,
|
||||
files=files,
|
||||
)
|
||||
@ -102,7 +105,7 @@ def test_upload_form(admin_auth_headers, default_org_id):
|
||||
upload_id_2 = r.json()["id"]
|
||||
|
||||
|
||||
def test_list_uploads(admin_auth_headers, default_org_id):
|
||||
def test_list_uploads(admin_auth_headers, default_org_id, uploads_collection_id):
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads",
|
||||
headers=admin_auth_headers,
|
||||
@ -119,58 +122,42 @@ def test_list_uploads(admin_auth_headers, default_org_id):
|
||||
|
||||
assert found
|
||||
assert found["name"] == "test2.wacz"
|
||||
assert found["collections"] == [uploads_collection_id]
|
||||
assert sorted(found["tags"]) == ["four", "three"]
|
||||
|
||||
assert "files" not in res
|
||||
assert "resources" not in res
|
||||
|
||||
|
||||
def test_collection_uploads(admin_auth_headers, default_org_id):
|
||||
# Create collection with one upload
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/collections",
|
||||
headers=admin_auth_headers,
|
||||
json={
|
||||
"crawlIds": [upload_id],
|
||||
"name": "My Test Coll",
|
||||
},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
coll_id = data["id"]
|
||||
assert data["added"]
|
||||
|
||||
def test_collection_uploads(admin_auth_headers, default_org_id, uploads_collection_id):
|
||||
# Test uploads filtered by collection
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads?collectionId={coll_id}",
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads?collectionId={uploads_collection_id}",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
|
||||
results = r.json()
|
||||
|
||||
assert len(results["items"]) == 1
|
||||
assert results["items"][0]["id"] == upload_id
|
||||
assert len(results["items"]) == 2
|
||||
assert results["items"][0]["id"] in (upload_id, upload_id_2)
|
||||
assert results["items"][1]["id"] in (upload_id, upload_id_2)
|
||||
|
||||
# Test all crawls filtered by collection
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?collectionId={coll_id}",
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?collectionId={uploads_collection_id}",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
|
||||
results = r.json()
|
||||
|
||||
assert len(results["items"]) == 1
|
||||
assert results["items"][0]["id"] == upload_id
|
||||
|
||||
# Delete Collection
|
||||
r = requests.delete(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/collections/{coll_id}",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["success"]
|
||||
assert len(results["items"]) == 2
|
||||
assert results["items"][0]["id"] in (upload_id, upload_id_2)
|
||||
assert results["items"][1]["id"] in (upload_id, upload_id_2)
|
||||
|
||||
|
||||
def test_get_upload_replay_json(admin_auth_headers, default_org_id):
|
||||
def test_get_upload_replay_json(
|
||||
admin_auth_headers, default_org_id, uploads_collection_id
|
||||
):
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/replay.json",
|
||||
headers=admin_auth_headers,
|
||||
@ -181,6 +168,8 @@ def test_get_upload_replay_json(admin_auth_headers, default_org_id):
|
||||
assert data
|
||||
assert data["id"] == upload_id
|
||||
assert data["name"] == "My Upload"
|
||||
assert data["collections"] == [uploads_collection_id]
|
||||
assert sorted(data["tags"]) == ["one", "two"]
|
||||
assert data["resources"]
|
||||
assert data["resources"][0]["path"]
|
||||
assert data["resources"][0]["size"]
|
||||
@ -189,7 +178,9 @@ def test_get_upload_replay_json(admin_auth_headers, default_org_id):
|
||||
assert "files" not in data
|
||||
|
||||
|
||||
def test_get_upload_replay_json_admin(admin_auth_headers, default_org_id):
|
||||
def test_get_upload_replay_json_admin(
|
||||
admin_auth_headers, default_org_id, uploads_collection_id
|
||||
):
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/all/uploads/{upload_id}/replay.json",
|
||||
headers=admin_auth_headers,
|
||||
@ -200,6 +191,8 @@ def test_get_upload_replay_json_admin(admin_auth_headers, default_org_id):
|
||||
assert data
|
||||
assert data["id"] == upload_id
|
||||
assert data["name"] == "My Upload"
|
||||
assert data["collections"] == [uploads_collection_id]
|
||||
assert sorted(data["tags"]) == ["one", "two"]
|
||||
assert data["resources"]
|
||||
assert data["resources"][0]["path"]
|
||||
assert data["resources"][0]["size"]
|
||||
@ -208,16 +201,20 @@ def test_get_upload_replay_json_admin(admin_auth_headers, default_org_id):
|
||||
assert "files" not in data
|
||||
|
||||
|
||||
def test_replace_upload(admin_auth_headers, default_org_id):
|
||||
actual_id = do_upload_replace(admin_auth_headers, default_org_id, upload_id)
|
||||
def test_replace_upload(admin_auth_headers, default_org_id, uploads_collection_id):
|
||||
actual_id = do_upload_replace(
|
||||
admin_auth_headers, default_org_id, upload_id, uploads_collection_id
|
||||
)
|
||||
|
||||
assert upload_id == actual_id
|
||||
|
||||
|
||||
def do_upload_replace(admin_auth_headers, default_org_id, upload_id):
|
||||
def do_upload_replace(
|
||||
admin_auth_headers, default_org_id, upload_id, uploads_collection_id
|
||||
):
|
||||
with open(os.path.join(curr_dir, "data", "example-2.wacz"), "rb") as fh:
|
||||
r = requests.put(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads/stream?filename=test.wacz&name=My%20Upload%20Updated&replaceId={upload_id}",
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads/stream?filename=test.wacz&name=My%20Upload%20Updated&replaceId={upload_id}&collections={uploads_collection_id}",
|
||||
headers=admin_auth_headers,
|
||||
data=read_in_chunks(fh),
|
||||
)
|
||||
@ -294,11 +291,27 @@ def test_delete_stream_upload(admin_auth_headers, default_org_id):
|
||||
assert r.json()["deleted"] == True
|
||||
|
||||
|
||||
def test_replace_upload_non_existent(admin_auth_headers, default_org_id):
|
||||
def test_ensure_deleted(admin_auth_headers, default_org_id):
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
results = r.json()
|
||||
|
||||
for res in results["items"]:
|
||||
if res["id"] == upload_id:
|
||||
assert False
|
||||
|
||||
|
||||
def test_replace_upload_non_existent(
|
||||
admin_auth_headers, default_org_id, uploads_collection_id
|
||||
):
|
||||
global upload_id
|
||||
|
||||
# same replacement, but now to a non-existent upload
|
||||
actual_id = do_upload_replace(admin_auth_headers, default_org_id, upload_id)
|
||||
actual_id = do_upload_replace(
|
||||
admin_auth_headers, default_org_id, upload_id, uploads_collection_id
|
||||
)
|
||||
|
||||
# new upload_id created
|
||||
assert actual_id != upload_id
|
||||
@ -306,15 +319,6 @@ def test_replace_upload_non_existent(admin_auth_headers, default_org_id):
|
||||
upload_id = actual_id
|
||||
|
||||
|
||||
def test_delete_stream_upload_2(admin_auth_headers, default_org_id):
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads/delete",
|
||||
headers=admin_auth_headers,
|
||||
json={"crawl_ids": [upload_id]},
|
||||
)
|
||||
assert r.json()["deleted"] == True
|
||||
|
||||
|
||||
def test_verify_from_upload_resource_count(admin_auth_headers, default_org_id):
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id_2}/replay.json",
|
||||
@ -429,15 +433,3 @@ def test_delete_form_upload_from_all_crawls(admin_auth_headers, default_org_id):
|
||||
json={"crawl_ids": [upload_id_2]},
|
||||
)
|
||||
assert r.json()["deleted"] == True
|
||||
|
||||
|
||||
def test_ensure_deleted(admin_auth_headers, default_org_id):
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/uploads",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
results = r.json()
|
||||
|
||||
for res in results["items"]:
|
||||
if res["id"] in (upload_id_2, upload_id):
|
||||
assert False
|
||||
|
Loading…
Reference in New Issue
Block a user