diff --git a/backend/test_nightly/conftest.py b/backend/test_nightly/conftest.py index 23d983da..7041299c 100644 --- a/backend/test_nightly/conftest.py +++ b/backend/test_nightly/conftest.py @@ -203,3 +203,25 @@ def large_crawl_finished(admin_auth_headers, default_org_id, large_crawl_id): time.sleep(30) break time.sleep(5) + + +@pytest.fixture(scope="session") +def timeout_crawl(admin_auth_headers, default_org_id): + # Start crawl + crawl_data = { + "runNow": True, + "name": "Crawl with crawl timeout", + "crawlTimeout": 30, + "config": { + "seeds": [{"url": "https://webrecorder.net/"}], + "scopeType": "domain", + "limit": 100, + }, + } + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/", + headers=admin_auth_headers, + json=crawl_data, + ) + data = r.json() + return data["run_now_job"] diff --git a/backend/test_nightly/test_crawl_timeout.py b/backend/test_nightly/test_crawl_timeout.py new file mode 100644 index 00000000..44eed942 --- /dev/null +++ b/backend/test_nightly/test_crawl_timeout.py @@ -0,0 +1,27 @@ +import requests +import time + +from .conftest import API_PREFIX + + +def test_crawl_timeout(admin_auth_headers, default_org_id, timeout_crawl): + # Verify that crawl has started + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{timeout_crawl}/replay.json", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + data = r.json() + assert data["state"] in ("starting", "running") + + # Wait some time to let crawl start, hit timeout, and gracefully stop + time.sleep(180) + + # Verify crawl was stopped + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{timeout_crawl}/replay.json", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + data = r.json() + assert data["state"] == "partial_complete"