remove deleted collections from crawlconfigs (#2615)
simplified version of #2608, add a remove_collection_from_all_configs() in CrawlConfigs, also check org. update tests to ensure removal
This commit is contained in:
parent
86e35e358d
commit
8a713155ef
@ -626,11 +626,16 @@ class BaseCrawlOps:
|
|||||||
{"$pull": {"collectionIds": collection_id}},
|
{"$pull": {"collectionIds": collection_id}},
|
||||||
)
|
)
|
||||||
|
|
||||||
async def remove_collection_from_all_crawls(self, collection_id: UUID):
|
async def remove_collection_from_all_crawls(
|
||||||
|
self, collection_id: UUID, org: Organization
|
||||||
|
):
|
||||||
"""Remove collection id from all crawls it's currently in."""
|
"""Remove collection id from all crawls it's currently in."""
|
||||||
await self.crawls.update_many(
|
await asyncio.gather(
|
||||||
{"collectionIds": collection_id},
|
self.crawls.update_many(
|
||||||
{"$pull": {"collectionIds": collection_id}},
|
{"oid": org.id, "collectionIds": collection_id},
|
||||||
|
{"$pull": {"collectionIds": collection_id}},
|
||||||
|
),
|
||||||
|
self.crawl_configs.remove_collection_from_all_configs(collection_id, org),
|
||||||
)
|
)
|
||||||
|
|
||||||
# pylint: disable=too-many-branches, invalid-name, too-many-statements
|
# pylint: disable=too-many-branches, invalid-name, too-many-statements
|
||||||
|
@ -621,7 +621,7 @@ class CollectionOps:
|
|||||||
|
|
||||||
async def delete_collection(self, coll_id: UUID, org: Organization):
|
async def delete_collection(self, coll_id: UUID, org: Organization):
|
||||||
"""Delete collection and remove from associated crawls."""
|
"""Delete collection and remove from associated crawls."""
|
||||||
await self.crawl_ops.remove_collection_from_all_crawls(coll_id)
|
await self.crawl_ops.remove_collection_from_all_crawls(coll_id, org)
|
||||||
|
|
||||||
result = await self.collections.delete_one({"_id": coll_id, "oid": org.id})
|
result = await self.collections.delete_one({"_id": coll_id, "oid": org.id})
|
||||||
if result.deleted_count < 1:
|
if result.deleted_count < 1:
|
||||||
|
@ -924,6 +924,15 @@ class CrawlConfigOps:
|
|||||||
|
|
||||||
return crawl_config.config
|
return crawl_config.config
|
||||||
|
|
||||||
|
async def remove_collection_from_all_configs(
|
||||||
|
self, coll_id: UUID, org: Organization
|
||||||
|
):
|
||||||
|
"""remove collection from all autoAddCollection list"""
|
||||||
|
await self.crawl_configs.update_many(
|
||||||
|
{"oid": org.id, "autoAddCollections": coll_id},
|
||||||
|
{"$pull": {"autoAddCollections": coll_id}},
|
||||||
|
)
|
||||||
|
|
||||||
async def get_crawl_config_tags(self, org):
|
async def get_crawl_config_tags(self, org):
|
||||||
"""get distinct tags from all crawl configs for this org"""
|
"""get distinct tags from all crawl configs for this org"""
|
||||||
tags = await self.crawl_configs.distinct("tags", {"oid": org.id})
|
tags = await self.crawl_configs.distinct("tags", {"oid": org.id})
|
||||||
|
@ -94,7 +94,7 @@ def test_create_collection(
|
|||||||
assert data["defaultThumbnailName"] == default_thumbnail_name
|
assert data["defaultThumbnailName"] == default_thumbnail_name
|
||||||
assert data["allowPublicDownload"]
|
assert data["allowPublicDownload"]
|
||||||
|
|
||||||
assert data["topPageHosts"] == [{'count': 3, 'host': 'webrecorder.net'}]
|
assert data["topPageHosts"] == [{"count": 3, "host": "webrecorder.net"}]
|
||||||
|
|
||||||
|
|
||||||
def test_create_public_collection(
|
def test_create_public_collection(
|
||||||
@ -313,7 +313,7 @@ def test_add_remove_crawl_from_collection(
|
|||||||
assert data["tags"] == ["wr-test-2", "wr-test-1"]
|
assert data["tags"] == ["wr-test-2", "wr-test-1"]
|
||||||
assert data["dateEarliest"]
|
assert data["dateEarliest"]
|
||||||
assert data["dateLatest"]
|
assert data["dateLatest"]
|
||||||
assert data["topPageHosts"] == [{'count': 7, 'host': 'webrecorder.net'}]
|
assert data["topPageHosts"] == [{"count": 7, "host": "webrecorder.net"}]
|
||||||
|
|
||||||
# Verify it was added
|
# Verify it was added
|
||||||
r = requests.get(
|
r = requests.get(
|
||||||
|
@ -68,3 +68,33 @@ def test_workflow_crawl_auto_added_subsequent_runs(
|
|||||||
assert r.status_code == 200
|
assert r.status_code == 200
|
||||||
new_crawl_count = r.json()["crawlCount"]
|
new_crawl_count = r.json()["crawlCount"]
|
||||||
assert new_crawl_count == crawl_count + 1
|
assert new_crawl_count == crawl_count + 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_workflow_autoadd_collection_removed_on_delete(
|
||||||
|
default_org_id, auto_add_config_id, crawler_auth_headers, auto_add_collection_id
|
||||||
|
):
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{auto_add_config_id}",
|
||||||
|
headers=crawler_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
|
||||||
|
data = r.json()
|
||||||
|
assert data["autoAddCollections"] == [auto_add_collection_id]
|
||||||
|
|
||||||
|
# Delete Collection
|
||||||
|
r = requests.delete(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/collections/{auto_add_collection_id}",
|
||||||
|
headers=crawler_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
assert r.json()["success"]
|
||||||
|
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{auto_add_config_id}",
|
||||||
|
headers=crawler_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
|
||||||
|
data = r.json()
|
||||||
|
assert data["autoAddCollections"] == []
|
||||||
|
Loading…
Reference in New Issue
Block a user