From 28d39d8c4d5f697a7fec2fa0afadcde5f9d6b257 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 21 Jan 2025 14:23:32 -0800 Subject: [PATCH] Fix migration to avoid duplicate collection slugs and names (#2318) Follow-up to #2301 Updates the 0039 migration to ensure collection slugs and names are unique by: - Removing all indexes - Setting `slug` to random value - Adding unique index to `slug` field. - Attempting to set slug from name using `slug_from_name()` - If rejected due to duplicate, append `-` at end of slug. Also update name with ` `. - Now that names should also be unique, add unique index on name field. --------- Co-authored-by: Tessa Walsh --- backend/btrixcloud/migrations/__init__.py | 2 + .../migrations/migration_0039_coll_slugs.py | 62 +++++++++++++++++-- 2 files changed, 58 insertions(+), 6 deletions(-) diff --git a/backend/btrixcloud/migrations/__init__.py b/backend/btrixcloud/migrations/__init__.py index e983072b..b8c3b323 100644 --- a/backend/btrixcloud/migrations/__init__.py +++ b/backend/btrixcloud/migrations/__init__.py @@ -3,6 +3,7 @@ BaseMigration class to subclass in each migration module """ import os +import traceback from pymongo.errors import OperationFailure @@ -73,6 +74,7 @@ class BaseMigration: await self.set_db_version() except OperationFailure as err: print(f"Error running migration {self.migration_version}: {err}") + traceback.print_exc() return False else: diff --git a/backend/btrixcloud/migrations/migration_0039_coll_slugs.py b/backend/btrixcloud/migrations/migration_0039_coll_slugs.py index 6fa4cd01..8bd65ffc 100644 --- a/backend/btrixcloud/migrations/migration_0039_coll_slugs.py +++ b/backend/btrixcloud/migrations/migration_0039_coll_slugs.py @@ -2,10 +2,15 @@ Migration 0039 -- collection slugs """ +from uuid import UUID + +from pymongo.errors import DuplicateKeyError +from pymongo.collation import Collation +import pymongo + from btrixcloud.migrations import BaseMigration from btrixcloud.utils import slug_from_name - MIGRATION_VERSION = "0039" @@ -16,23 +21,68 @@ class Migration(BaseMigration): def __init__(self, mdb, **kwargs): super().__init__(mdb, migration_version=MIGRATION_VERSION) + async def dedup_slug( + self, name: str, slug_base: str, coll_id: UUID, colls_mdb + ) -> None: + """attempt to set slug, if duplicate, append suffix until a valid slug is found + also update original name with same suffix""" + slug = slug_base + count = 1 + + while True: + try: + await colls_mdb.find_one_and_update( + {"_id": coll_id}, + {"$set": {"slug": slug}}, + ) + break + except DuplicateKeyError: + # pylint: disable=raise-missing-from + count += 1 + slug = f"{slug_base}-{count}" + + if count > 1: + print(f"Duplicate collection name '{name}' set to '{name} {count}'") + await colls_mdb.find_one_and_update( + {"_id": coll_id}, {"$set": {"name": f"{name} {count}"}} + ) + async def migrate_up(self): """Perform migration up. Add slug to collections that don't have one yet, based on name """ colls_mdb = self.mdb["collections"] + case_insensitive_collation = Collation(locale="en", strength=1) - async for coll_raw in colls_mdb.find({"slug": None}): + await colls_mdb.drop_indexes() + + # set slug to random value to ensure uniqueness + await colls_mdb.update_many( + {}, [{"$set": {"slug": {"$toString": {"$rand": {}}}}}] + ) + + await colls_mdb.create_index( + [("oid", pymongo.ASCENDING), ("slug", pymongo.ASCENDING)], + unique=True, + collation=case_insensitive_collation, + ) + + async for coll_raw in colls_mdb.find({}): coll_id = coll_raw["_id"] try: - await colls_mdb.find_one_and_update( - {"_id": coll_id}, - {"$set": {"slug": slug_from_name(coll_raw.get("name", ""))}}, - ) + name = coll_raw.get("name", "") + slug = slug_from_name(name) + await self.dedup_slug(name, slug, coll_id, colls_mdb) # pylint: disable=broad-exception-caught except Exception as err: print( f"Error saving slug for collection {coll_id}: {err}", flush=True, ) + + await colls_mdb.create_index( + [("oid", pymongo.ASCENDING), ("name", pymongo.ASCENDING)], + unique=True, + collation=case_insensitive_collation, + )