Add crawl timeout nightly test (#762)

2023-04-11 22:36:18 -04:00 · 2023-04-11 22:36:18 -04:00 · 1ad82a63e6
commit 1ad82a63e6
parent 85b6a05419
2 changed files with 49 additions and 0 deletions
--- a/backend/test_nightly/conftest.py
+++ b/backend/test_nightly/conftest.py
@ -203,3 +203,25 @@ def large_crawl_finished(admin_auth_headers, default_org_id, large_crawl_id):
            time.sleep(30)
            break
        time.sleep(5)
@pytest.fixture(scope="session")
 def timeout_crawl(admin_auth_headers, default_org_id):
    # Start crawl
    crawl_data = {
        "runNow": True,
        "name": "Crawl with crawl timeout",
        "crawlTimeout": 30,
        "config": {
            "seeds": [{"url": "https://webrecorder.net/"}],
            "scopeType": "domain",
            "limit": 100,
        },
    }
    r = requests.post(
        f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
        headers=admin_auth_headers,
        json=crawl_data,
    )
    data = r.json()
    return data["run_now_job"]
--- a/backend/test_nightly/test_crawl_timeout.py
+++ b/backend/test_nightly/test_crawl_timeout.py
@ -0,0 +1,27 @@
 import requests
 import time
 from .conftest import API_PREFIX
 def test_crawl_timeout(admin_auth_headers, default_org_id, timeout_crawl):
    # Verify that crawl has started
    r = requests.get(
        f"{API_PREFIX}/orgs/{default_org_id}/crawls/{timeout_crawl}/replay.json",
        headers=admin_auth_headers,
    )
    assert r.status_code == 200
    data = r.json()
    assert data["state"] in ("starting", "running")
    # Wait some time to let crawl start, hit timeout, and gracefully stop
    time.sleep(180)
    # Verify crawl was stopped
    r = requests.get(
        f"{API_PREFIX}/orgs/{default_org_id}/crawls/{timeout_crawl}/replay.json",
        headers=admin_auth_headers,
    )
    assert r.status_code == 200
    data = r.json()
    assert data["state"] == "partial_complete"