Add slugs to org backend (#1250)

- Add slug field with uniqueness constraint to Organization
- Use python-slugify to generate slug from name and import that in migration
- Require name in all /rename and org creation requests
- Auto-generate slug for new org with no slug or when /rename is called w/o a slug
- Auto-generate slug for 'default-org' based on name

- Add /api/orgs/slugs GET endpoint to return all slugs in use

- tests: extend backend test-requirements.txt from requirements to allow testing slugify
- tests: move get_redis_crawl_stats() to avoid extra dependency in utils
This commit is contained in:
Tessa Walsh 2023-10-10 21:30:09 -04:00 committed by GitHub
parent 16e7a1d0a2
commit 266afdf8d9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 138 additions and 29 deletions

View File

@ -79,7 +79,7 @@ jobs:
python-version: '3.9'
- name: Install Python Libs
run: pip install pytest requests
run: pip install -r ./backend/test-requirements.txt
- name: Wait for all pods to be ready
run: kubectl wait --for=condition=ready pod --all --timeout=240s

View File

@ -70,7 +70,7 @@ jobs:
python-version: '3.9'
- name: Install Python Libs
run: pip install pytest requests
run: pip install -r ./backend/test-requirements.txt
- name: Wait for all pods to be ready
run: kubectl wait --for=condition=ready pod --all --timeout=240s

View File

@ -58,7 +58,7 @@ jobs:
python-version: '3.9'
- name: Install Python Libs
run: pip install pytest requests
run: pip install -r ./backend/test-requirements.txt
- name: Wait for all pods to be ready
run: sudo microk8s kubectl wait --for=condition=ready pod --all --timeout=240s

View File

@ -15,7 +15,7 @@ from pymongo.errors import InvalidName
from .migrations import BaseMigration
CURR_DB_VERSION = "0018"
CURR_DB_VERSION = "0019"
# ============================================================================

View File

@ -0,0 +1,33 @@
"""
Migration 0019 - Organization slug
"""
from btrixcloud.migrations import BaseMigration
from btrixcloud.utils import slug_from_name
MIGRATION_VERSION = "0019"
class Migration(BaseMigration):
"""Migration class."""
def __init__(self, mdb, migration_version=MIGRATION_VERSION):
super().__init__(mdb, migration_version)
async def migrate_up(self):
"""Perform migration up.
Add slug to all existing orgs.
"""
# pylint: disable=duplicate-code
mdb_orgs = self.mdb["organizations"]
async for org in mdb_orgs.find({"slug": {"$eq": None}}):
oid = org["_id"]
slug = slug_from_name(org["name"])
try:
await mdb_orgs.find_one_and_update(
{"_id": oid}, {"$set": {"slug": slug}}
)
# pylint: disable=broad-exception-caught
except Exception as err:
print(f"Error adding slug to org {oid}: {err}", flush=True)

View File

@ -612,6 +612,7 @@ class RenameOrg(BaseModel):
"""Request to invite another user"""
name: str
slug: Optional[str] = None
# ============================================================================
@ -664,6 +665,7 @@ class Organization(BaseMongoModel):
id: UUID4
name: str
slug: str
users: Dict[str, UserRole]
@ -751,6 +753,7 @@ class OrgOut(BaseMongoModel):
id: UUID4
name: str
slug: str
users: Optional[Dict[str, Any]]
usage: Optional[Dict[str, int]]
crawlExecSeconds: Optional[Dict[str, int]]

View File

@ -19,12 +19,12 @@ import humanize
from pydantic import BaseModel, Field
from kubernetes.utils import parse_quantity
from redis import asyncio as exceptions
from .utils import (
from_k8s_date,
to_k8s_date,
dt_now,
get_redis_crawl_stats,
)
from .k8sapi import K8sAPI
@ -1075,10 +1075,26 @@ class BtrixOperator(K8sAPI):
return False
async def get_redis_crawl_stats(self, redis, crawl_id):
"""get page stats"""
try:
# crawler >0.9.0, done key is a value
pages_done = int(await redis.get(f"{crawl_id}:d") or 0)
except exceptions.ResponseError:
# crawler <=0.9.0, done key is a list
pages_done = await redis.llen(f"{crawl_id}:d")
pages_found = await redis.scard(f"{crawl_id}:s")
sizes = await redis.hgetall(f"{crawl_id}:size")
archive_size = sum(int(x) for x in sizes.values())
stats = {"found": pages_found, "done": pages_done, "size": archive_size}
return stats, sizes
async def update_crawl_state(self, redis, crawl, status, pods, done):
"""update crawl state and check if crawl is now done"""
results = await redis.hgetall(f"{crawl.id}:status")
stats, sizes = await get_redis_crawl_stats(redis, crawl.id)
stats, sizes = await self.get_redis_crawl_stats(redis, crawl.id)
# need to add size of previously completed WACZ files as well!
stats["size"] += status.filesAddedSize

View File

@ -33,6 +33,7 @@ from .models import (
PaginatedResponse,
)
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
from .utils import slug_from_name
DEFAULT_ORG = os.environ.get("DEFAULT_ORG", "My Organization")
@ -61,7 +62,8 @@ class OrgOps:
"""init lookup index"""
while True:
try:
return await self.orgs.create_index("name", unique=True)
await self.orgs.create_index("name", unique=True)
return await self.orgs.create_index("slug", unique=True)
# pylint: disable=duplicate-code
except AutoReconnect:
print(
@ -92,6 +94,7 @@ class OrgOps:
org = Organization(
id=id_,
name=org_name,
slug=slug_from_name(org_name),
users={str(user.id): UserRole.OWNER},
storage=DefaultStorage(name=storage_name, path=storage_path),
)
@ -162,6 +165,7 @@ class OrgOps:
print("Default organization already exists - skipping", flush=True)
else:
default_org.name = DEFAULT_ORG
default_org.slug = slug_from_name(DEFAULT_ORG)
await self.update(default_org)
print(f'Default organization renamed to "{DEFAULT_ORG}"', flush=True)
return
@ -171,6 +175,7 @@ class OrgOps:
org = Organization(
id=id_,
name=DEFAULT_ORG,
slug=slug_from_name(DEFAULT_ORG),
users={},
storage=DefaultStorage(name=storage_name, path=storage_path),
default=True,
@ -392,6 +397,11 @@ class OrgOps:
"publicCollectionsCount": public_collections_count,
}
async def get_all_org_slugs(self):
"""Return list of all org slugs."""
slugs = await self.orgs.distinct("slug", {})
return {"slugs": slugs}
# ============================================================================
# pylint: disable=too-many-statements
@ -479,9 +489,15 @@ def init_orgs_api(app, mdb, user_manager, invites, user_dep):
id_ = uuid.uuid4()
storage_path = str(id_) + "/"
slug = new_org.slug
if not slug:
slug = slug_from_name(new_org.name)
org = Organization(
id=id_,
name=new_org.name,
slug=slug,
users={},
storage=DefaultStorage(name="default", path=storage_path),
)
@ -502,6 +518,11 @@ def init_orgs_api(app, mdb, user_manager, invites, user_dep):
org: Organization = Depends(org_owner_dep),
):
org.name = rename.name
if rename.slug:
org.slug = rename.slug
else:
org.slug = slug_from_name(rename.name)
try:
await ops.update(org)
except DuplicateKeyError:
@ -649,4 +670,8 @@ def init_orgs_api(app, mdb, user_manager, invites, user_dep):
async def get_org_metrics(org: Organization = Depends(org_dep)):
return await ops.get_org_metrics(org)
@app.get("/orgs/slugs", tags=["organizations"])
async def get_all_org_slugs():
return await ops.get_all_org_slugs()
return ops

View File

@ -10,7 +10,7 @@ import atexit
from datetime import datetime
from redis import asyncio as exceptions
from slugify import slugify
def get_templates_dir():
@ -38,23 +38,6 @@ def ts_now():
return str(dt_now())
async def get_redis_crawl_stats(redis, crawl_id):
"""get page stats"""
try:
# crawler >0.9.0, done key is a value
pages_done = int(await redis.get(f"{crawl_id}:d") or 0)
except exceptions.ResponseError:
# crawler <=0.9.0, done key is a list
pages_done = await redis.llen(f"{crawl_id}:d")
pages_found = await redis.scard(f"{crawl_id}:s")
sizes = await redis.hgetall(f"{crawl_id}:size")
archive_size = sum(int(x) for x in sizes.values())
stats = {"found": pages_found, "done": pages_done, "size": archive_size}
return stats, sizes
def run_once_lock(name):
"""run once lock via temp directory
- if dir doesn't exist, return true
@ -109,3 +92,8 @@ def is_bool(stri: Optional[str]) -> bool:
if stri:
return stri.lower() in ("true", "1", "yes")
return False
def slug_from_name(name: str) -> str:
"""Generate slug from name"""
return slugify(name.replace("'", ""))

View File

@ -15,3 +15,4 @@ pathvalidate
https://github.com/ikreymer/stream-zip/archive/refs/heads/stream-uncompress.zip
boto3
backoff>=2.2.1
python-slugify>=8.0.1

View File

@ -0,0 +1,4 @@
-r requirements.txt
pytest
requests

View File

@ -69,7 +69,7 @@ def non_default_org_id(admin_auth_headers):
r = requests.post(
f"{API_PREFIX}/orgs/create",
headers=admin_auth_headers,
json={"name": NON_DEFAULT_ORG_NAME},
json={"name": NON_DEFAULT_ORG_NAME, "slug": "non-default-org"},
)
assert r.status_code == 200

View File

@ -50,7 +50,8 @@ def test_get_org_crawler(crawler_auth_headers, default_org_id):
def test_rename_org(admin_auth_headers, default_org_id):
UPDATED_NAME = "updated org name"
rename_data = {"name": UPDATED_NAME}
UPDATED_SLUG = "updated-org-name"
rename_data = {"name": UPDATED_NAME, "slug": UPDATED_SLUG}
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/rename",
headers=admin_auth_headers,
@ -61,11 +62,12 @@ def test_rename_org(admin_auth_headers, default_org_id):
data = r.json()
assert data["updated"]
# Verify that name is now updated.
# Verify that name and slug are now updated.
r = requests.get(f"{API_PREFIX}/orgs/{default_org_id}", headers=admin_auth_headers)
assert r.status_code == 200
data = r.json()
assert data["name"] == UPDATED_NAME
assert data["slug"] == UPDATED_SLUG
def test_create_org(admin_auth_headers):
@ -73,7 +75,7 @@ def test_create_org(admin_auth_headers):
r = requests.post(
f"{API_PREFIX}/orgs/create",
headers=admin_auth_headers,
json={"name": NEW_ORG_NAME},
json={"name": NEW_ORG_NAME, "slug": "new-org"},
)
assert r.status_code == 200
@ -389,3 +391,21 @@ def test_org_metrics(crawler_auth_headers, default_org_id):
assert data["workflowsQueuedCount"] >= 0
assert data["collectionsCount"] > 0
assert data["publicCollectionsCount"] >= 0
def test_get_org_slugs(admin_auth_headers):
# Fetch org count and slugs from /orgs
r = requests.get(f"{API_PREFIX}/orgs", headers=admin_auth_headers)
assert r.status_code == 200
data = r.json()
org_count = data["total"]
org_slugs = [item["slug"] for item in data["items"]]
# Fetch slugs from /orgs/slugs and verify data looks right
r = requests.get(f"{API_PREFIX}/orgs/slugs", headers=admin_auth_headers)
assert r.status_code == 200
slugs = r.json()["slugs"]
assert len(slugs) == org_count
for slug in slugs:
assert slug in org_slugs

View File

@ -0,0 +1,19 @@
"""utils tests"""
import pytest
from btrixcloud.utils import slug_from_name
@pytest.mark.parametrize(
"name,expected_slug",
[
("Default org", "default-org"),
("User's org", "users-org"),
("User's @ org", "users-org"),
("Org with åccénted charactêrs", "org-with-accented-characters"),
("Org with åccénted! charactêrs@!", "org-with-accented-characters"),
("cATs! 🐈🐈‍⬛", "cats"),
],
)
def test_slug_from_name(name: str, expected_slug: str):
assert slug_from_name(name) == expected_slug