Fix migration to avoid duplicate collection slugs and names (#2318)

Follow-up to #2301 

Updates the 0039 migration to ensure collection slugs and names are
unique by:
- Removing all indexes
- Setting `slug` to random value
- Adding unique index to `slug` field.
- Attempting to set slug from name using `slug_from_name()`
- If rejected due to duplicate, append `-<counter>` at end of slug. Also
update name with ` <counter>`.
- Now that names should also be unique, add unique index on name field.

---------

Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
Ilya Kreymer 2025-01-21 14:23:32 -08:00 committed by GitHub
parent 6797b41de0
commit 28d39d8c4d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 58 additions and 6 deletions

View File

@ -3,6 +3,7 @@ BaseMigration class to subclass in each migration module
"""
import os
import traceback
from pymongo.errors import OperationFailure
@ -73,6 +74,7 @@ class BaseMigration:
await self.set_db_version()
except OperationFailure as err:
print(f"Error running migration {self.migration_version}: {err}")
traceback.print_exc()
return False
else:

View File

@ -2,10 +2,15 @@
Migration 0039 -- collection slugs
"""
from uuid import UUID
from pymongo.errors import DuplicateKeyError
from pymongo.collation import Collation
import pymongo
from btrixcloud.migrations import BaseMigration
from btrixcloud.utils import slug_from_name
MIGRATION_VERSION = "0039"
@ -16,23 +21,68 @@ class Migration(BaseMigration):
def __init__(self, mdb, **kwargs):
super().__init__(mdb, migration_version=MIGRATION_VERSION)
async def dedup_slug(
self, name: str, slug_base: str, coll_id: UUID, colls_mdb
) -> None:
"""attempt to set slug, if duplicate, append suffix until a valid slug is found
also update original name with same suffix"""
slug = slug_base
count = 1
while True:
try:
await colls_mdb.find_one_and_update(
{"_id": coll_id},
{"$set": {"slug": slug}},
)
break
except DuplicateKeyError:
# pylint: disable=raise-missing-from
count += 1
slug = f"{slug_base}-{count}"
if count > 1:
print(f"Duplicate collection name '{name}' set to '{name} {count}'")
await colls_mdb.find_one_and_update(
{"_id": coll_id}, {"$set": {"name": f"{name} {count}"}}
)
async def migrate_up(self):
"""Perform migration up.
Add slug to collections that don't have one yet, based on name
"""
colls_mdb = self.mdb["collections"]
case_insensitive_collation = Collation(locale="en", strength=1)
async for coll_raw in colls_mdb.find({"slug": None}):
await colls_mdb.drop_indexes()
# set slug to random value to ensure uniqueness
await colls_mdb.update_many(
{}, [{"$set": {"slug": {"$toString": {"$rand": {}}}}}]
)
await colls_mdb.create_index(
[("oid", pymongo.ASCENDING), ("slug", pymongo.ASCENDING)],
unique=True,
collation=case_insensitive_collation,
)
async for coll_raw in colls_mdb.find({}):
coll_id = coll_raw["_id"]
try:
await colls_mdb.find_one_and_update(
{"_id": coll_id},
{"$set": {"slug": slug_from_name(coll_raw.get("name", ""))}},
)
name = coll_raw.get("name", "")
slug = slug_from_name(name)
await self.dedup_slug(name, slug, coll_id, colls_mdb)
# pylint: disable=broad-exception-caught
except Exception as err:
print(
f"Error saving slug for collection {coll_id}: {err}",
flush=True,
)
await colls_mdb.create_index(
[("oid", pymongo.ASCENDING), ("name", pymongo.ASCENDING)],
unique=True,
collation=case_insensitive_collation,
)