Related to https://github.com/webrecorder/browsertrix-cloud/issues/1620 Follow-up to https://github.com/webrecorder/browsertrix-cloud/pull/1621, which didn't seem to fix the problem. I'm giving it much more time here in the hopes that it solves it (since it's a nightly test, time shouldn't be such a pressing issue).
		
			
				
	
	
		
			83 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			83 lines
		
	
	
		
			2.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import requests
 | |
| import time
 | |
| 
 | |
| from .conftest import API_PREFIX
 | |
| from .utils import verify_file_replicated
 | |
| 
 | |
| 
 | |
| def test_crawl_timeout(admin_auth_headers, default_org_id, timeout_crawl):
 | |
|     # Verify that crawl has started
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{timeout_crawl}/replay.json",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["state"] in ("starting", "running")
 | |
| 
 | |
|     attempts = 0
 | |
|     while True:
 | |
|         # Try for 10 minutes before failing
 | |
|         if attempts > 30:
 | |
|             assert False
 | |
| 
 | |
|         r = requests.get(
 | |
|             f"{API_PREFIX}/orgs/{default_org_id}/crawls/{timeout_crawl}/replay.json",
 | |
|             headers=admin_auth_headers,
 | |
|         )
 | |
|         if r.json()["state"] == "complete":
 | |
|             break
 | |
|         time.sleep(20)
 | |
|         attempts += 1
 | |
| 
 | |
| 
 | |
| def test_crawl_files_replicated(admin_auth_headers, default_org_id, timeout_crawl):
 | |
|     time.sleep(20)
 | |
| 
 | |
|     # Verify replication job was successful
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/jobs?sortBy=started&sortDirection=1&jobType=create-replica",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     latest_job = r.json()["items"][0]
 | |
|     assert latest_job["type"] == "create-replica"
 | |
|     job_id = latest_job["id"]
 | |
| 
 | |
|     attempts = 0
 | |
|     while attempts < 5:
 | |
|         r = requests.get(
 | |
|             f"{API_PREFIX}/orgs/{default_org_id}/jobs/{job_id}",
 | |
|             headers=admin_auth_headers,
 | |
|         )
 | |
|         assert r.status_code == 200
 | |
|         job = r.json()
 | |
|         finished = latest_job.get("finished")
 | |
|         if not finished:
 | |
|             attempts += 1
 | |
|             time.sleep(10)
 | |
|             continue
 | |
| 
 | |
|         assert job["success"]
 | |
|         break
 | |
| 
 | |
|     # Assert file was updated
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{timeout_crawl}/replay.json",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     files = data.get("resources")
 | |
|     assert files
 | |
|     for file_ in files:
 | |
|         assert file_["numReplicas"] == 1
 | |
| 
 | |
|     # Verify replica is stored
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/jobs/{job_id}", headers=admin_auth_headers
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     verify_file_replicated(data["file_path"])
 |