Fix migration to avoid duplicate collection slugs and names (#2318)

Follow-up to #2301 

Updates the 0039 migration to ensure collection slugs and names are
unique by:
- Removing all indexes
- Setting `slug` to random value
- Adding unique index to `slug` field.
- Attempting to set slug from name using `slug_from_name()`
- If rejected due to duplicate, append `-<counter>` at end of slug. Also
update name with ` <counter>`.
- Now that names should also be unique, add unique index on name field.

---------

Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
Ilya Kreymer 2025-01-21 14:23:32 -08:00 committed by GitHub
parent 6797b41de0
commit 28d39d8c4d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 58 additions and 6 deletions

View File

@ -3,6 +3,7 @@ BaseMigration class to subclass in each migration module
""" """
import os import os
import traceback
from pymongo.errors import OperationFailure from pymongo.errors import OperationFailure
@ -73,6 +74,7 @@ class BaseMigration:
await self.set_db_version() await self.set_db_version()
except OperationFailure as err: except OperationFailure as err:
print(f"Error running migration {self.migration_version}: {err}") print(f"Error running migration {self.migration_version}: {err}")
traceback.print_exc()
return False return False
else: else:

View File

@ -2,10 +2,15 @@
Migration 0039 -- collection slugs Migration 0039 -- collection slugs
""" """
from uuid import UUID
from pymongo.errors import DuplicateKeyError
from pymongo.collation import Collation
import pymongo
from btrixcloud.migrations import BaseMigration from btrixcloud.migrations import BaseMigration
from btrixcloud.utils import slug_from_name from btrixcloud.utils import slug_from_name
MIGRATION_VERSION = "0039" MIGRATION_VERSION = "0039"
@ -16,23 +21,68 @@ class Migration(BaseMigration):
def __init__(self, mdb, **kwargs): def __init__(self, mdb, **kwargs):
super().__init__(mdb, migration_version=MIGRATION_VERSION) super().__init__(mdb, migration_version=MIGRATION_VERSION)
async def dedup_slug(
self, name: str, slug_base: str, coll_id: UUID, colls_mdb
) -> None:
"""attempt to set slug, if duplicate, append suffix until a valid slug is found
also update original name with same suffix"""
slug = slug_base
count = 1
while True:
try:
await colls_mdb.find_one_and_update(
{"_id": coll_id},
{"$set": {"slug": slug}},
)
break
except DuplicateKeyError:
# pylint: disable=raise-missing-from
count += 1
slug = f"{slug_base}-{count}"
if count > 1:
print(f"Duplicate collection name '{name}' set to '{name} {count}'")
await colls_mdb.find_one_and_update(
{"_id": coll_id}, {"$set": {"name": f"{name} {count}"}}
)
async def migrate_up(self): async def migrate_up(self):
"""Perform migration up. """Perform migration up.
Add slug to collections that don't have one yet, based on name Add slug to collections that don't have one yet, based on name
""" """
colls_mdb = self.mdb["collections"] colls_mdb = self.mdb["collections"]
case_insensitive_collation = Collation(locale="en", strength=1)
async for coll_raw in colls_mdb.find({"slug": None}): await colls_mdb.drop_indexes()
# set slug to random value to ensure uniqueness
await colls_mdb.update_many(
{}, [{"$set": {"slug": {"$toString": {"$rand": {}}}}}]
)
await colls_mdb.create_index(
[("oid", pymongo.ASCENDING), ("slug", pymongo.ASCENDING)],
unique=True,
collation=case_insensitive_collation,
)
async for coll_raw in colls_mdb.find({}):
coll_id = coll_raw["_id"] coll_id = coll_raw["_id"]
try: try:
await colls_mdb.find_one_and_update( name = coll_raw.get("name", "")
{"_id": coll_id}, slug = slug_from_name(name)
{"$set": {"slug": slug_from_name(coll_raw.get("name", ""))}}, await self.dedup_slug(name, slug, coll_id, colls_mdb)
)
# pylint: disable=broad-exception-caught # pylint: disable=broad-exception-caught
except Exception as err: except Exception as err:
print( print(
f"Error saving slug for collection {coll_id}: {err}", f"Error saving slug for collection {coll_id}: {err}",
flush=True, flush=True,
) )
await colls_mdb.create_index(
[("oid", pymongo.ASCENDING), ("name", pymongo.ASCENDING)],
unique=True,
collation=case_insensitive_collation,
)