browsertrix/backend/test/test_workflow_auto_add_to_collection.py
Ilya Kreymer 8a713155ef
remove deleted collections from crawlconfigs (#2615)
simplified version of #2608, add a remove_collection_from_all_configs() in CrawlConfigs, also check org.
update tests to ensure removal
2025-05-20 18:38:40 -07:00

101 lines
2.9 KiB
Python

import requests
import time
from .conftest import API_PREFIX
def test_workflow_crawl_auto_added_to_collection(
crawler_auth_headers,
default_org_id,
auto_add_collection_id,
auto_add_crawl_id,
):
# Verify that crawl is in collection
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{auto_add_crawl_id}/replay.json",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert auto_add_collection_id in r.json()["collectionIds"]
def test_workflow_crawl_auto_added_subsequent_runs(
crawler_auth_headers,
default_org_id,
auto_add_collection_id,
auto_add_crawl_id,
auto_add_config_id,
):
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/collections/{auto_add_collection_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
crawl_count = r.json()["crawlCount"]
# Run workflow again and make sure new crawl is also in collection
# and crawl count has been incremented.
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{auto_add_config_id}/run",
headers=crawler_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data.get("started")
crawl_id = data["started"]
while True:
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id}/replay.json",
headers=crawler_auth_headers,
)
data = r.json()
if data["state"] == "complete":
break
time.sleep(5)
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id}/replay.json",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert auto_add_collection_id in r.json()["collectionIds"]
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/collections/{auto_add_collection_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
new_crawl_count = r.json()["crawlCount"]
assert new_crawl_count == crawl_count + 1
def test_workflow_autoadd_collection_removed_on_delete(
default_org_id, auto_add_config_id, crawler_auth_headers, auto_add_collection_id
):
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{auto_add_config_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["autoAddCollections"] == [auto_add_collection_id]
# Delete Collection
r = requests.delete(
f"{API_PREFIX}/orgs/{default_org_id}/collections/{auto_add_collection_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["success"]
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{auto_add_config_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["autoAddCollections"] == []