- all-crawls list endpoint filters now conform to 'Standardize list controls for archived items #1025' and URL decode values before passing them in - Uploads list endpoint now includes all all-crawls filters relevant to uploads - An all-crawls/search-values endpoint is added to support searching across all archived item types - Crawl configuration names are now copied to the crawl when the crawl is created, and crawl names and descriptions are now editable via the backend API (note: this will require frontend changes as well to make them editable via the UI) - Migration added to copy existing config names for active configs into their associated crawls. This migration has been tested in a local deployment - New statuses generate-wacz, uploading-wacz, and pending-wait are added when relevant to tests to ensure that they pass - Tests coverage added for all new all-crawls endpoints, filters, and sort values
		
			
				
	
	
		
			115 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			115 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import requests
 | |
| import time
 | |
| 
 | |
| from .conftest import API_PREFIX
 | |
| 
 | |
| crawl_id_a = None
 | |
| crawl_id_b = None
 | |
| 
 | |
| 
 | |
| def test_set_concurrent_crawl_limit(org_with_quotas, admin_auth_headers):
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{org_with_quotas}/quotas",
 | |
|         headers=admin_auth_headers,
 | |
|         json={"maxConcurrentCrawls": 1},
 | |
|     )
 | |
|     data = r.json()
 | |
|     assert data.get("updated") == True
 | |
| 
 | |
| 
 | |
| def test_run_two_only_one_concurrent(org_with_quotas, admin_auth_headers):
 | |
|     global crawl_id_a
 | |
|     crawl_id_a = run_crawl(org_with_quotas, admin_auth_headers)
 | |
|     time.sleep(1)
 | |
| 
 | |
|     global crawl_id_b
 | |
|     crawl_id_b = run_crawl(org_with_quotas, admin_auth_headers)
 | |
| 
 | |
|     while get_crawl_status(org_with_quotas, crawl_id_a, admin_auth_headers) in (
 | |
|         "starting",
 | |
|         "waiting_capacity",
 | |
|     ):
 | |
|         time.sleep(2)
 | |
| 
 | |
|     assert get_crawl_status(org_with_quotas, crawl_id_a, admin_auth_headers) in (
 | |
|         "running",
 | |
|         "generate-wacz",
 | |
|         "uploading-wacz",
 | |
|         "pending-wait",
 | |
|     )
 | |
| 
 | |
|     while (
 | |
|         get_crawl_status(org_with_quotas, crawl_id_b, admin_auth_headers) == "starting"
 | |
|     ):
 | |
|         time.sleep(2)
 | |
| 
 | |
|     assert (
 | |
|         get_crawl_status(org_with_quotas, crawl_id_b, admin_auth_headers)
 | |
|         == "waiting_org_limit"
 | |
|     )
 | |
| 
 | |
| 
 | |
| def test_cancel_and_run_other(org_with_quotas, admin_auth_headers):
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{org_with_quotas}/crawls/{crawl_id_a}/cancel",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     data = r.json()
 | |
|     assert data["success"] == True
 | |
| 
 | |
|     while (
 | |
|         get_crawl_status(org_with_quotas, crawl_id_a, admin_auth_headers) != "canceled"
 | |
|     ):
 | |
|         time.sleep(2)
 | |
| 
 | |
|     while (
 | |
|         get_crawl_status(org_with_quotas, crawl_id_b, admin_auth_headers)
 | |
|         == "waiting_org_limit"
 | |
|     ):
 | |
|         time.sleep(5)
 | |
| 
 | |
|     assert get_crawl_status(org_with_quotas, crawl_id_b, admin_auth_headers) in (
 | |
|         "starting",
 | |
|         "running",
 | |
|         "waiting_capacity",
 | |
|         "generate-wacz",
 | |
|         "uploading-wacz",
 | |
|         "pending-wait",
 | |
|     )
 | |
| 
 | |
|     # cancel second crawl as well
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{org_with_quotas}/crawls/{crawl_id_b}/cancel",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     data = r.json()
 | |
|     assert data["success"] == True
 | |
| 
 | |
| 
 | |
| def run_crawl(org_id, headers):
 | |
|     crawl_data = {
 | |
|         "runNow": True,
 | |
|         "name": "Concurrent Crawl",
 | |
|         "config": {
 | |
|             "seeds": [{"url": "https://specs.webrecorder.net/"}],
 | |
|             "limit": 1,
 | |
|         },
 | |
|     }
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{org_id}/crawlconfigs/",
 | |
|         headers=headers,
 | |
|         json=crawl_data,
 | |
|     )
 | |
|     data = r.json()
 | |
| 
 | |
|     return data["run_now_job"]
 | |
| 
 | |
| 
 | |
| def get_crawl_status(org_id, crawl_id, headers):
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{org_id}/crawls/{crawl_id}/replay.json",
 | |
|         headers=headers,
 | |
|     )
 | |
|     data = r.json()
 | |
|     return data["state"]
 |