Add public API endpoint for public collections (#2174)

Fixes #1051 

If org with provided slug doesn't exist or no public collections exist
for that org, return same 404 response with a detail of
"public_profile_not_found" to prevent people from using public endpoint
to determine whether an org exists.

Endpoint is `GET /api/public-collections/<org-slug>` (no auth needed) to
avoid collisions with existing org and collection endpoints.
This commit is contained in:
Tessa Walsh 2024-11-27 10:34:25 -05:00 committed by sua yoo
parent 42ebfd303d
commit 190bdeb868
No known key found for this signature in database
GPG Key ID: 5AD1B4C02D4F0567
5 changed files with 207 additions and 34 deletions

View File

@ -30,6 +30,9 @@ from .models import (
UpdatedResponse,
SuccessResponse,
CollectionSearchValuesResponse,
OrgPublicCollections,
PublicOrgDetails,
CollAccessType,
)
from .utils import dt_now
@ -395,6 +398,30 @@ class CollectionOps:
)
await self.update_crawl_collections(crawl_id)
async def get_org_public_collections(self, org_slug: str):
"""List public collections for org"""
try:
org = await self.orgs.get_org_by_slug(org_slug)
# pylint: disable=broad-exception-caught
except Exception:
# pylint: disable=raise-missing-from
raise HTTPException(status_code=404, detail="public_profile_not_found")
if not org.enablePublicProfile:
raise HTTPException(status_code=404, detail="public_profile_not_found")
collections, _ = await self.list_collections(
org.id, access=CollAccessType.PUBLIC
)
public_org_details = PublicOrgDetails(
name=org.name,
description=org.publicDescription or "",
url=org.publicUrl or "",
)
return OrgPublicCollections(org=public_org_details, collections=collections)
# ============================================================================
# pylint: disable=too-many-locals
@ -582,4 +609,12 @@ def init_collections_api(app, mdb, orgs, storage_ops, event_webhook_ops):
):
return await colls.download_collection(coll_id, org)
@app.get(
"/public-collections/{org_slug}",
tags=["collections"],
response_model=OrgPublicCollections,
)
async def get_org_public_collections(org_slug: str):
return await colls.get_org_public_collections(org_slug)
return colls

View File

@ -1152,6 +1152,24 @@ class RenameOrg(BaseModel):
slug: Optional[str] = None
# ============================================================================
class PublicOrgDetails(BaseModel):
"""Model for org details that are available in public profile"""
name: str
description: str = ""
url: str = ""
# ============================================================================
class OrgPublicCollections(BaseModel):
"""Model for listing public collections in org"""
org: PublicOrgDetails
collections: List[CollOut] = []
# ============================================================================
class OrgStorageRefs(BaseModel):
"""Input model for setting primary storage + optional replicas"""
@ -1381,10 +1399,12 @@ class OrgReadOnlyUpdate(BaseModel):
# ============================================================================
class OrgListPublicCollectionsUpdate(BaseModel):
"""Organization listPublicCollections update"""
class OrgPublicProfileUpdate(BaseModel):
"""Organization enablePublicProfile update"""
listPublicCollections: bool
enablePublicProfile: Optional[bool] = None
publicDescription: Optional[str] = None
publicUrl: Optional[str] = None
# ============================================================================
@ -1455,7 +1475,9 @@ class OrgOut(BaseMongoModel):
allowedProxies: list[str] = []
crawlingDefaults: Optional[CrawlConfigDefaults] = None
listPublicCollections: bool = False
enablePublicProfile: bool = False
publicDescription: str = ""
publicUrl: str = ""
# ============================================================================
@ -1512,7 +1534,9 @@ class Organization(BaseMongoModel):
allowedProxies: list[str] = []
crawlingDefaults: Optional[CrawlConfigDefaults] = None
listPublicCollections: bool = False
enablePublicProfile: bool = False
publicDescription: Optional[str] = None
publicUrl: Optional[str] = None
def is_owner(self, user):
"""Check if user is owner"""

View File

@ -78,7 +78,7 @@ from .models import (
RemovedResponse,
OrgSlugsResponse,
OrgImportResponse,
OrgListPublicCollectionsUpdate,
OrgPublicProfileUpdate,
)
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
from .utils import (
@ -295,6 +295,14 @@ class OrgOps:
return Organization.from_dict(res)
async def get_org_by_slug(self, slug: str) -> Organization:
"""Get an org by id"""
res = await self.orgs.find_one({"slug": slug})
if not res:
raise HTTPException(status_code=400, detail="invalid_org_slug")
return Organization.from_dict(res)
async def get_default_org(self) -> Organization:
"""Get default organization"""
res = await self.orgs.find_one({"default": True})
@ -998,13 +1006,18 @@ class OrgOps:
)
return res is not None
async def update_list_public_collections(
self, org: Organization, list_public_collections: bool
async def update_public_profile(
self, org: Organization, update: OrgPublicProfileUpdate
):
"""Update listPublicCollections field on organization"""
"""Update or enable/disable organization's public profile"""
query = update.dict(exclude_unset=True)
if len(query) == 0:
raise HTTPException(status_code=400, detail="no_update_data")
res = await self.orgs.find_one_and_update(
{"_id": org.id},
{"$set": {"listPublicCollections": list_public_collections}},
{"$set": query},
)
return res is not None
@ -1565,15 +1578,15 @@ def init_orgs_api(
return {"updated": True}
@router.post(
"/list-public-collections",
"/public-profile",
tags=["organizations", "collections"],
response_model=UpdatedResponse,
)
async def update_list_public_collections(
update: OrgListPublicCollectionsUpdate,
async def update_public_profile(
update: OrgPublicProfileUpdate,
org: Organization = Depends(org_owner_dep),
):
await ops.update_list_public_collections(org, update.listPublicCollections)
await ops.update_public_profile(org, update)
return {"updated": True}

View File

@ -4,7 +4,7 @@ import os
from zipfile import ZipFile, ZIP_STORED
from tempfile import TemporaryFile
from .conftest import API_PREFIX
from .conftest import API_PREFIX, NON_DEFAULT_ORG_NAME, NON_DEFAULT_ORG_SLUG
from .utils import read_in_chunks
COLLECTION_NAME = "Test collection"
@ -15,6 +15,7 @@ DESCRIPTION = "Test description"
_coll_id = None
_second_coll_id = None
_public_coll_id = None
upload_id = None
modified = None
@ -66,6 +67,7 @@ def test_create_public_collection(
assert data["added"]
assert data["name"] == PUBLIC_COLLECTION_NAME
global _public_coll_id
_public_coll_id = data["id"]
# Verify that it is public
@ -725,6 +727,123 @@ def test_filter_sort_collections(
assert r.json()["detail"] == "invalid_sort_direction"
def test_list_public_collections(
crawler_auth_headers,
admin_auth_headers,
default_org_id,
non_default_org_id,
crawler_crawl_id,
admin_crawl_id,
):
# Create new public collection
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections",
headers=crawler_auth_headers,
json={
"crawlIds": [crawler_crawl_id],
"name": "Second public collection",
"access": "public",
},
)
assert r.status_code == 200
second_public_coll_id = r.json()["id"]
# Get default org slug
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
data = r.json()
org_slug = data["slug"]
org_name = data["name"]
# Verify that public profile isn't enabled
assert data["enablePublicProfile"] is False
assert data["publicDescription"] == ""
assert data["publicUrl"] == ""
# Try listing public collections without org public profile enabled
r = requests.get(f"{API_PREFIX}/public-collections/{org_slug}")
assert r.status_code == 404
assert r.json()["detail"] == "public_profile_not_found"
# Enable public profile on org
public_description = "This is a test public org!"
public_url = "https://example.com"
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/public-profile",
headers=admin_auth_headers,
json={
"enablePublicProfile": True,
"publicDescription": public_description,
"publicUrl": public_url,
},
)
assert r.status_code == 200
assert r.json()["updated"]
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["enablePublicProfile"]
assert data["publicDescription"] == public_description
assert data["publicUrl"] == public_url
# List public collections with no auth (no public profile)
r = requests.get(f"{API_PREFIX}/public-collections/{org_slug}")
assert r.status_code == 200
data = r.json()
org_data = data["org"]
assert org_data["name"] == org_name
assert org_data["description"] == public_description
assert org_data["url"] == public_url
collections = data["collections"]
assert len(collections) == 2
for collection in collections:
assert collection["id"] in (_public_coll_id, second_public_coll_id)
assert collection["access"] == "public"
# Test non-existing slug - it should return a 404 but not reveal
# whether or not an org exists with that slug
r = requests.get(f"{API_PREFIX}/public-collections/nonexistentslug")
assert r.status_code == 404
assert r.json()["detail"] == "public_profile_not_found"
def test_list_public_collections_no_colls(non_default_org_id, admin_auth_headers):
# Test existing org that's not public - should return same 404 as
# if org doesn't exist
r = requests.get(f"{API_PREFIX}/public-collections/{NON_DEFAULT_ORG_SLUG}")
assert r.status_code == 404
assert r.json()["detail"] == "public_profile_not_found"
# Enable public profile on org with zero public collections
r = requests.post(
f"{API_PREFIX}/orgs/{non_default_org_id}/public-profile",
headers=admin_auth_headers,
json={
"enablePublicProfile": True,
},
)
assert r.status_code == 200
assert r.json()["updated"]
# List public collections with no auth - should still get profile even
# with no public collections
r = requests.get(f"{API_PREFIX}/public-collections/{NON_DEFAULT_ORG_SLUG}")
assert r.status_code == 200
data = r.json()
assert data["org"]["name"] == NON_DEFAULT_ORG_NAME
assert data["collections"] == []
def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id):
# Delete second collection
r = requests.delete(

View File

@ -17,7 +17,7 @@ invite_email = "test-user@example.com"
def test_ensure_only_one_default_org(admin_auth_headers):
r = requests.get(f"{API_PREFIX}/orgs", headers=admin_auth_headers)
data = r.json()
assert data["total"] == 1
assert data["total"] == 2
orgs = data["items"]
default_orgs = [org for org in orgs if org["default"]]
@ -697,24 +697,6 @@ def test_update_read_only(admin_auth_headers, default_org_id):
assert data["readOnlyReason"] == ""
def test_update_list_public_collections(admin_auth_headers, default_org_id):
# Test that default is false
r = requests.get(f"{API_PREFIX}/orgs/{default_org_id}", headers=admin_auth_headers)
assert r.json()["listPublicCollections"] is False
# Update
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/list-public-collections",
headers=admin_auth_headers,
json={"listPublicCollections": True},
)
assert r.json()["updated"]
# Test update is reflected in GET response
r = requests.get(f"{API_PREFIX}/orgs/{default_org_id}", headers=admin_auth_headers)
assert r.json()["listPublicCollections"]
def test_sort_orgs(admin_auth_headers):
# Create a few new orgs for testing
r = requests.post(