Use collection uuid as id (instead of name) (#855)

Also ensure name is not empty by adding minimum length of 1

Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
Tessa Walsh 2023-05-19 09:03:48 -04:00 committed by GitHub
parent d07204e59d
commit f482831d53
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 146 additions and 89 deletions

View File

@ -7,7 +7,7 @@ from typing import Optional, List
import pymongo
from fastapi import Depends, HTTPException
from pydantic import BaseModel, UUID4
from pydantic import BaseModel, UUID4, Field
from .db import BaseMongoModel
from .orgs import Organization
@ -18,7 +18,7 @@ from .pagination import DEFAULT_PAGE_SIZE, paginated_format
class Collection(BaseMongoModel):
"""Org collection structure"""
name: str
name: str = Field(..., min_length=1)
oid: UUID4
@ -31,7 +31,7 @@ class Collection(BaseMongoModel):
class CollIn(BaseModel):
"""Collection Passed in By User"""
name: str
name: str = Field(..., min_length=1)
description: Optional[str]
crawlIds: Optional[List[str]] = []
@ -40,17 +40,11 @@ class CollIn(BaseModel):
class UpdateColl(BaseModel):
"""Update collection"""
name: Optional[str]
crawlIds: Optional[List[str]] = []
description: Optional[str]
# ============================================================================
class RenameColl(BaseModel):
"""Rename collection"""
name: str
# ============================================================================
class CollectionOps:
"""ops for working with named collections of crawls"""
@ -77,8 +71,9 @@ class CollectionOps:
):
"""Add new collection"""
crawl_ids = crawl_ids if crawl_ids else []
coll_id = uuid.uuid4()
coll = Collection(
id=uuid.uuid4(),
id=coll_id,
oid=oid,
name=name,
crawlIds=crawl_ids,
@ -86,66 +81,60 @@ class CollectionOps:
)
try:
await self.collections.insert_one(coll.to_dict())
return {"added": name}
return {"added": {"id": coll_id, "name": name}}
except pymongo.errors.DuplicateKeyError:
# pylint: disable=raise-missing-from
raise HTTPException(status_code=400, detail="collection_already_exists")
raise HTTPException(status_code=400, detail="collection_name_taken")
async def update_collection(self, oid: uuid.UUID, name: str, update: UpdateColl):
async def update_collection(self, coll_id: uuid.UUID, update: UpdateColl):
"""Update collection"""
query = update.dict(exclude_unset=True)
if len(query) == 0:
raise HTTPException(status_code=400, detail="no_update_data")
result = await self.collections.find_one_and_update(
{"name": name, "oid": oid},
{"$set": query},
return_document=pymongo.ReturnDocument.AFTER,
)
try:
result = await self.collections.find_one_and_update(
{"_id": coll_id},
{"$set": query},
return_document=pymongo.ReturnDocument.AFTER,
)
except pymongo.errors.DuplicateKeyError:
# pylint: disable=raise-missing-from
raise HTTPException(status_code=400, detail="collection_name_taken")
if not result:
raise HTTPException(status_code=404, detail="collection_not_found")
return result
return Collection.from_dict(result)
async def rename_collection(self, oid: uuid.UUID, name: str, new_name: str):
"""Rename collection"""
result = await self.collections.find_one_and_update(
{"name": name, "oid": oid},
{"$set": {"name": new_name}},
)
if not result:
raise HTTPException(status_code=404, detail="collection_not_found")
return new_name
async def add_crawl_to_collection(self, oid: uuid.UUID, name: str, crawl_id: str):
async def add_crawl_to_collection(self, coll_id: uuid.UUID, crawl_id: str):
"""Add crawl to collection"""
result = await self.collections.find_one_and_update(
{"name": name, "oid": oid},
{"_id": coll_id},
{"$push": {"crawlIds": crawl_id}},
return_document=pymongo.ReturnDocument.AFTER,
)
if not result:
raise HTTPException(status_code=404, detail="collection_not_found")
return result
async def remove_crawl_from_collection(
self, oid: uuid.UUID, name: str, crawl_id: str
):
return Collection.from_dict(result)
async def remove_crawl_from_collection(self, coll_id: uuid.UUID, crawl_id: str):
"""Remove crawl from collection"""
result = await self.collections.find_one_and_update(
{"name": name, "oid": oid},
{"_id": coll_id},
{"$pull": {"crawlIds": crawl_id}},
return_document=pymongo.ReturnDocument.AFTER,
)
if not result:
raise HTTPException(status_code=404, detail="collection_not_found")
return result
async def get_collection(self, oid: uuid.UUID, name: str):
"""Get collection by org + name"""
res = await self.collections.find_one({"name": name, "oid": oid})
return Collection.from_dict(result)
async def get_collection(self, coll_id: uuid.UUID):
"""Get collection by id"""
res = await self.collections.find_one({"_id": coll_id})
return Collection.from_dict(res) if res else None
async def find_collections(self, oid: uuid.UUID, names: List[str]):
@ -184,10 +173,10 @@ class CollectionOps:
return collections, total
async def get_collection_crawls(self, oid: uuid.UUID, name: str):
"""Find collection and get all crawls by collection name per org"""
async def get_collection_crawls(self, coll_id: uuid.UUID, oid: uuid.UUID):
"""Find collection and get all crawl resources"""
coll = await self.get_collection(oid, name)
coll = await self.get_collection(coll_id)
if not coll:
raise HTTPException(status_code=404, detail="collection_not_found")
@ -209,7 +198,7 @@ class CollectionOps:
# pylint: disable=too-many-locals
def init_collections_api(app, mdb, crawls, orgs, crawl_manager):
"""init collections api"""
# pylint: disable=invalid-name
# pylint: disable=invalid-name, unused-argument
colls = CollectionOps(mdb, crawls, crawl_manager, orgs)
@ -224,7 +213,10 @@ def init_collections_api(app, mdb, crawls, orgs, crawl_manager):
org.id, new_coll.name, new_coll.crawlIds, new_coll.description
)
@app.get("/orgs/{oid}/collections", tags=["collections"])
@app.get(
"/orgs/{oid}/collections",
tags=["collections"],
)
async def list_collection_all(
org: Organization = Depends(org_viewer_dep),
pageSize: int = DEFAULT_PAGE_SIZE,
@ -235,7 +227,10 @@ def init_collections_api(app, mdb, crawls, orgs, crawl_manager):
)
return paginated_format(collections, total, page, pageSize)
@app.get("/orgs/{oid}/collections/$all", tags=["collections"])
@app.get(
"/orgs/{oid}/collections/$all",
tags=["collections"],
)
async def get_collection_all(org: Organization = Depends(org_viewer_dep)):
results = {}
try:
@ -252,10 +247,15 @@ def init_collections_api(app, mdb, crawls, orgs, crawl_manager):
return results
@app.get("/orgs/{oid}/collections/{name}", tags=["collections"])
async def get_collection(name: str, org: Organization = Depends(org_viewer_dep)):
@app.get(
"/orgs/{oid}/collections/{coll_id}",
tags=["collections"],
)
async def get_collection_crawls(
coll_id: uuid.UUID, org: Organization = Depends(org_viewer_dep)
):
try:
results = await colls.get_collection_crawls(org.id, name)
results = await colls.get_collection_crawls(coll_id, org.id)
except Exception as exc:
# pylint: disable=raise-missing-from
@ -265,29 +265,36 @@ def init_collections_api(app, mdb, crawls, orgs, crawl_manager):
return results
@app.post("/orgs/{oid}/collections/{name}/update", tags=["collections"])
@app.post(
"/orgs/{oid}/collections/{coll_id}/update",
tags=["collections"],
response_model=Collection,
)
async def update_collection(
name: str, update: UpdateColl, org: Organization = Depends(org_crawl_dep)
coll_id: uuid.UUID,
update: UpdateColl,
org: Organization = Depends(org_crawl_dep),
):
return await colls.update_collection(org.id, name, update)
return await colls.update_collection(coll_id, update)
@app.post("/orgs/{oid}/collections/{name}/rename", tags=["collections"])
async def rename_collection(
name: str, rename: RenameColl, org: Organization = Depends(org_crawl_dep)
):
await colls.rename_collection(org.id, name, rename.name)
return {"renamed": rename.name}
@app.get("/orgs/{oid}/collections/{name}/add", tags=["collections"])
@app.get(
"/orgs/{oid}/collections/{coll_id}/add",
tags=["collections"],
response_model=Collection,
)
async def add_crawl_to_collection(
crawlId: str, name: str, org: Organization = Depends(org_crawl_dep)
crawlId: str, coll_id: uuid.UUID, org: Organization = Depends(org_crawl_dep)
):
return await colls.add_crawl_to_collection(org.id, name, crawlId)
return await colls.add_crawl_to_collection(coll_id, crawlId)
@app.get("/orgs/{oid}/collections/{name}/remove", tags=["collections"])
@app.get(
"/orgs/{oid}/collections/{coll_id}/remove",
tags=["collections"],
response_model=Collection,
)
async def remove_crawl_from_collection(
crawlId: str, name: str, org: Organization = Depends(org_crawl_dep)
crawlId: str, coll_id: uuid.UUID, org: Organization = Depends(org_crawl_dep)
):
return await colls.remove_crawl_from_collection(org.id, name, crawlId)
return await colls.remove_crawl_from_collection(coll_id, crawlId)
return colls

View File

@ -7,8 +7,8 @@ import asyncio
import sys
from fastapi import FastAPI
from fastapi.routing import APIRouter
from fastapi.responses import JSONResponse
from fastapi.routing import APIRouter
from .db import init_db, update_and_prepare_db

View File

@ -7,6 +7,8 @@ UPDATED_NAME = "Updated tést cöllection"
SECOND_COLLECTION_NAME = "second-collection"
DESCRIPTION = "Test description"
_coll_id = None
def test_create_collection(
crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
@ -21,14 +23,46 @@ def test_create_collection(
)
assert r.status_code == 200
data = r.json()
assert data["added"] == COLLECTION_NAME
assert data["added"]["name"] == COLLECTION_NAME
global _coll_id
_coll_id = data["added"]["id"]
def test_create_collection_taken_name(
crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
):
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections",
headers=crawler_auth_headers,
json={
"crawlIds": [crawler_crawl_id],
"name": COLLECTION_NAME,
},
)
assert r.status_code == 400
assert r.json()["detail"] == "collection_name_taken"
def test_create_collection_empty_name(
crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
):
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections",
headers=crawler_auth_headers,
json={
"crawlIds": [crawler_crawl_id],
"name": "",
},
)
assert r.status_code == 422
def test_update_collection(
crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
):
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections/{COLLECTION_NAME}/update",
f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/update",
headers=crawler_auth_headers,
json={
"crawlIds": [crawler_crawl_id, admin_crawl_id],
@ -37,6 +71,7 @@ def test_update_collection(
)
assert r.status_code == 200
data = r.json()
assert data["id"] == _coll_id
assert data["name"] == COLLECTION_NAME
assert data["description"] == DESCRIPTION
assert sorted(data["crawlIds"]) == sorted([admin_crawl_id, crawler_crawl_id])
@ -46,24 +81,52 @@ def test_rename_collection(
crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
):
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections/{COLLECTION_NAME}/rename",
f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/update",
headers=crawler_auth_headers,
json={"name": UPDATED_NAME},
)
assert r.status_code == 200
data = r.json()
assert data["renamed"] == UPDATED_NAME
assert data["id"] == _coll_id
assert data["name"] == UPDATED_NAME
def test_rename_collection_taken_name(
crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
):
# Add second collection
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections",
headers=crawler_auth_headers,
json={
"crawlIds": [crawler_crawl_id],
"name": SECOND_COLLECTION_NAME,
},
)
assert r.status_code == 200
data = r.json()
assert data["added"]["name"] == SECOND_COLLECTION_NAME
# Try to rename first coll to second collection's name
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/update",
headers=crawler_auth_headers,
json={"name": SECOND_COLLECTION_NAME},
)
assert r.status_code == 400
assert r.json()["detail"] == "collection_name_taken"
def test_remove_crawl_from_collection(
crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
):
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/collections/{UPDATED_NAME}/remove?crawlId={admin_crawl_id}",
f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/remove?crawlId={admin_crawl_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["id"] == _coll_id
assert data["crawlIds"] == [crawler_crawl_id]
@ -71,11 +134,12 @@ def test_add_crawl_to_collection(
crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
):
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/collections/{UPDATED_NAME}/add?crawlId={admin_crawl_id}",
f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/add?crawlId={admin_crawl_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["id"] == _coll_id
assert sorted(data["crawlIds"]) == sorted([admin_crawl_id, crawler_crawl_id])
@ -83,7 +147,7 @@ def test_get_collection(
crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
):
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/collections/{UPDATED_NAME}",
f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
@ -100,20 +164,6 @@ def test_get_collection(
def test_list_collections(
crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
):
# Add second collection
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections",
headers=crawler_auth_headers,
json={
"crawlIds": [crawler_crawl_id],
"name": SECOND_COLLECTION_NAME,
},
)
assert r.status_code == 200
data = r.json()
assert data["added"] == SECOND_COLLECTION_NAME
# Test endpoint
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/collections", headers=crawler_auth_headers
)