Related to https://github.com/webrecorder/browsertrix-cloud/issues/1620 Follow-up to https://github.com/webrecorder/browsertrix-cloud/pull/1621, which didn't seem to fix the problem. I'm giving it much more time here in the hopes that it solves it (since it's a nightly test, time shouldn't be such a pressing issue).
		
			
				
	
	
		
			83 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			83 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import requests
 | 
						|
import time
 | 
						|
 | 
						|
from .conftest import API_PREFIX
 | 
						|
from .utils import verify_file_replicated
 | 
						|
 | 
						|
 | 
						|
def test_crawl_timeout(admin_auth_headers, default_org_id, timeout_crawl):
 | 
						|
    # Verify that crawl has started
 | 
						|
    r = requests.get(
 | 
						|
        f"{API_PREFIX}/orgs/{default_org_id}/crawls/{timeout_crawl}/replay.json",
 | 
						|
        headers=admin_auth_headers,
 | 
						|
    )
 | 
						|
    assert r.status_code == 200
 | 
						|
    data = r.json()
 | 
						|
    assert data["state"] in ("starting", "running")
 | 
						|
 | 
						|
    attempts = 0
 | 
						|
    while True:
 | 
						|
        # Try for 10 minutes before failing
 | 
						|
        if attempts > 30:
 | 
						|
            assert False
 | 
						|
 | 
						|
        r = requests.get(
 | 
						|
            f"{API_PREFIX}/orgs/{default_org_id}/crawls/{timeout_crawl}/replay.json",
 | 
						|
            headers=admin_auth_headers,
 | 
						|
        )
 | 
						|
        if r.json()["state"] == "complete":
 | 
						|
            break
 | 
						|
        time.sleep(20)
 | 
						|
        attempts += 1
 | 
						|
 | 
						|
 | 
						|
def test_crawl_files_replicated(admin_auth_headers, default_org_id, timeout_crawl):
 | 
						|
    time.sleep(20)
 | 
						|
 | 
						|
    # Verify replication job was successful
 | 
						|
    r = requests.get(
 | 
						|
        f"{API_PREFIX}/orgs/{default_org_id}/jobs?sortBy=started&sortDirection=1&jobType=create-replica",
 | 
						|
        headers=admin_auth_headers,
 | 
						|
    )
 | 
						|
    assert r.status_code == 200
 | 
						|
    latest_job = r.json()["items"][0]
 | 
						|
    assert latest_job["type"] == "create-replica"
 | 
						|
    job_id = latest_job["id"]
 | 
						|
 | 
						|
    attempts = 0
 | 
						|
    while attempts < 5:
 | 
						|
        r = requests.get(
 | 
						|
            f"{API_PREFIX}/orgs/{default_org_id}/jobs/{job_id}",
 | 
						|
            headers=admin_auth_headers,
 | 
						|
        )
 | 
						|
        assert r.status_code == 200
 | 
						|
        job = r.json()
 | 
						|
        finished = latest_job.get("finished")
 | 
						|
        if not finished:
 | 
						|
            attempts += 1
 | 
						|
            time.sleep(10)
 | 
						|
            continue
 | 
						|
 | 
						|
        assert job["success"]
 | 
						|
        break
 | 
						|
 | 
						|
    # Assert file was updated
 | 
						|
    r = requests.get(
 | 
						|
        f"{API_PREFIX}/orgs/{default_org_id}/crawls/{timeout_crawl}/replay.json",
 | 
						|
        headers=admin_auth_headers,
 | 
						|
    )
 | 
						|
    assert r.status_code == 200
 | 
						|
    data = r.json()
 | 
						|
    files = data.get("resources")
 | 
						|
    assert files
 | 
						|
    for file_ in files:
 | 
						|
        assert file_["numReplicas"] == 1
 | 
						|
 | 
						|
    # Verify replica is stored
 | 
						|
    r = requests.get(
 | 
						|
        f"{API_PREFIX}/orgs/{default_org_id}/jobs/{job_id}", headers=admin_auth_headers
 | 
						|
    )
 | 
						|
    assert r.status_code == 200
 | 
						|
    data = r.json()
 | 
						|
    verify_file_replicated(data["file_path"])
 |