remove deleted collections from crawlconfigs (#2615)
simplified version of #2608, add a remove_collection_from_all_configs() in CrawlConfigs, also check org. update tests to ensure removal
This commit is contained in:
parent
86e35e358d
commit
8a713155ef
@ -626,11 +626,16 @@ class BaseCrawlOps:
|
||||
{"$pull": {"collectionIds": collection_id}},
|
||||
)
|
||||
|
||||
async def remove_collection_from_all_crawls(self, collection_id: UUID):
|
||||
async def remove_collection_from_all_crawls(
|
||||
self, collection_id: UUID, org: Organization
|
||||
):
|
||||
"""Remove collection id from all crawls it's currently in."""
|
||||
await self.crawls.update_many(
|
||||
{"collectionIds": collection_id},
|
||||
{"$pull": {"collectionIds": collection_id}},
|
||||
await asyncio.gather(
|
||||
self.crawls.update_many(
|
||||
{"oid": org.id, "collectionIds": collection_id},
|
||||
{"$pull": {"collectionIds": collection_id}},
|
||||
),
|
||||
self.crawl_configs.remove_collection_from_all_configs(collection_id, org),
|
||||
)
|
||||
|
||||
# pylint: disable=too-many-branches, invalid-name, too-many-statements
|
||||
|
@ -621,7 +621,7 @@ class CollectionOps:
|
||||
|
||||
async def delete_collection(self, coll_id: UUID, org: Organization):
|
||||
"""Delete collection and remove from associated crawls."""
|
||||
await self.crawl_ops.remove_collection_from_all_crawls(coll_id)
|
||||
await self.crawl_ops.remove_collection_from_all_crawls(coll_id, org)
|
||||
|
||||
result = await self.collections.delete_one({"_id": coll_id, "oid": org.id})
|
||||
if result.deleted_count < 1:
|
||||
|
@ -924,6 +924,15 @@ class CrawlConfigOps:
|
||||
|
||||
return crawl_config.config
|
||||
|
||||
async def remove_collection_from_all_configs(
|
||||
self, coll_id: UUID, org: Organization
|
||||
):
|
||||
"""remove collection from all autoAddCollection list"""
|
||||
await self.crawl_configs.update_many(
|
||||
{"oid": org.id, "autoAddCollections": coll_id},
|
||||
{"$pull": {"autoAddCollections": coll_id}},
|
||||
)
|
||||
|
||||
async def get_crawl_config_tags(self, org):
|
||||
"""get distinct tags from all crawl configs for this org"""
|
||||
tags = await self.crawl_configs.distinct("tags", {"oid": org.id})
|
||||
|
@ -94,7 +94,7 @@ def test_create_collection(
|
||||
assert data["defaultThumbnailName"] == default_thumbnail_name
|
||||
assert data["allowPublicDownload"]
|
||||
|
||||
assert data["topPageHosts"] == [{'count': 3, 'host': 'webrecorder.net'}]
|
||||
assert data["topPageHosts"] == [{"count": 3, "host": "webrecorder.net"}]
|
||||
|
||||
|
||||
def test_create_public_collection(
|
||||
@ -313,7 +313,7 @@ def test_add_remove_crawl_from_collection(
|
||||
assert data["tags"] == ["wr-test-2", "wr-test-1"]
|
||||
assert data["dateEarliest"]
|
||||
assert data["dateLatest"]
|
||||
assert data["topPageHosts"] == [{'count': 7, 'host': 'webrecorder.net'}]
|
||||
assert data["topPageHosts"] == [{"count": 7, "host": "webrecorder.net"}]
|
||||
|
||||
# Verify it was added
|
||||
r = requests.get(
|
||||
|
@ -68,3 +68,33 @@ def test_workflow_crawl_auto_added_subsequent_runs(
|
||||
assert r.status_code == 200
|
||||
new_crawl_count = r.json()["crawlCount"]
|
||||
assert new_crawl_count == crawl_count + 1
|
||||
|
||||
|
||||
def test_workflow_autoadd_collection_removed_on_delete(
|
||||
default_org_id, auto_add_config_id, crawler_auth_headers, auto_add_collection_id
|
||||
):
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{auto_add_config_id}",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
|
||||
data = r.json()
|
||||
assert data["autoAddCollections"] == [auto_add_collection_id]
|
||||
|
||||
# Delete Collection
|
||||
r = requests.delete(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/collections/{auto_add_collection_id}",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["success"]
|
||||
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{auto_add_config_id}",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
|
||||
data = r.json()
|
||||
assert data["autoAddCollections"] == []
|
||||
|
Loading…
Reference in New Issue
Block a user