Add crawl timeout nightly test (#762)

This commit is contained in:
Tessa Walsh 2023-04-11 22:36:18 -04:00 committed by GitHub
parent 85b6a05419
commit 1ad82a63e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 49 additions and 0 deletions

View File

@ -203,3 +203,25 @@ def large_crawl_finished(admin_auth_headers, default_org_id, large_crawl_id):
time.sleep(30) time.sleep(30)
break break
time.sleep(5) time.sleep(5)
@pytest.fixture(scope="session")
def timeout_crawl(admin_auth_headers, default_org_id):
# Start crawl
crawl_data = {
"runNow": True,
"name": "Crawl with crawl timeout",
"crawlTimeout": 30,
"config": {
"seeds": [{"url": "https://webrecorder.net/"}],
"scopeType": "domain",
"limit": 100,
},
}
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
headers=admin_auth_headers,
json=crawl_data,
)
data = r.json()
return data["run_now_job"]

View File

@ -0,0 +1,27 @@
import requests
import time
from .conftest import API_PREFIX
def test_crawl_timeout(admin_auth_headers, default_org_id, timeout_crawl):
# Verify that crawl has started
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{timeout_crawl}/replay.json",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["state"] in ("starting", "running")
# Wait some time to let crawl start, hit timeout, and gracefully stop
time.sleep(180)
# Verify crawl was stopped
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{timeout_crawl}/replay.json",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["state"] == "partial_complete"