browsertrix/backend/test_nightly/test_concurrent_crawl_limit.py
Tessa Walsh 7ff57ce6b5
Backend: standardize search values, filters, and sorting for archived items (#1039)
- all-crawls list endpoint filters now conform to 'Standardize list controls for archived items #1025' and URL decode values before passing them in
- Uploads list endpoint now includes all all-crawls filters relevant to uploads
- An all-crawls/search-values endpoint is added to support searching across all archived item types
- Crawl configuration names are now copied to the crawl when the crawl is created, and crawl names and descriptions are now editable via the backend API (note: this will require frontend changes as well to make them editable via the UI)
- Migration added to copy existing config names for active configs into their associated crawls. This migration has been tested in a local deployment
- New statuses generate-wacz, uploading-wacz, and pending-wait are added when relevant to tests to ensure that they pass
- Tests coverage added for all new all-crawls endpoints, filters, and sort values
2023-08-04 09:56:52 -07:00

115 lines
2.8 KiB
Python

import requests
import time
from .conftest import API_PREFIX
crawl_id_a = None
crawl_id_b = None
def test_set_concurrent_crawl_limit(org_with_quotas, admin_auth_headers):
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/quotas",
headers=admin_auth_headers,
json={"maxConcurrentCrawls": 1},
)
data = r.json()
assert data.get("updated") == True
def test_run_two_only_one_concurrent(org_with_quotas, admin_auth_headers):
global crawl_id_a
crawl_id_a = run_crawl(org_with_quotas, admin_auth_headers)
time.sleep(1)
global crawl_id_b
crawl_id_b = run_crawl(org_with_quotas, admin_auth_headers)
while get_crawl_status(org_with_quotas, crawl_id_a, admin_auth_headers) in (
"starting",
"waiting_capacity",
):
time.sleep(2)
assert get_crawl_status(org_with_quotas, crawl_id_a, admin_auth_headers) in (
"running",
"generate-wacz",
"uploading-wacz",
"pending-wait",
)
while (
get_crawl_status(org_with_quotas, crawl_id_b, admin_auth_headers) == "starting"
):
time.sleep(2)
assert (
get_crawl_status(org_with_quotas, crawl_id_b, admin_auth_headers)
== "waiting_org_limit"
)
def test_cancel_and_run_other(org_with_quotas, admin_auth_headers):
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/crawls/{crawl_id_a}/cancel",
headers=admin_auth_headers,
)
data = r.json()
assert data["success"] == True
while (
get_crawl_status(org_with_quotas, crawl_id_a, admin_auth_headers) != "canceled"
):
time.sleep(2)
while (
get_crawl_status(org_with_quotas, crawl_id_b, admin_auth_headers)
== "waiting_org_limit"
):
time.sleep(5)
assert get_crawl_status(org_with_quotas, crawl_id_b, admin_auth_headers) in (
"starting",
"running",
"waiting_capacity",
"generate-wacz",
"uploading-wacz",
"pending-wait",
)
# cancel second crawl as well
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/crawls/{crawl_id_b}/cancel",
headers=admin_auth_headers,
)
data = r.json()
assert data["success"] == True
def run_crawl(org_id, headers):
crawl_data = {
"runNow": True,
"name": "Concurrent Crawl",
"config": {
"seeds": [{"url": "https://specs.webrecorder.net/"}],
"limit": 1,
},
}
r = requests.post(
f"{API_PREFIX}/orgs/{org_id}/crawlconfigs/",
headers=headers,
json=crawl_data,
)
data = r.json()
return data["run_now_job"]
def get_crawl_status(org_id, crawl_id, headers):
r = requests.get(
f"{API_PREFIX}/orgs/{org_id}/crawls/{crawl_id}/replay.json",
headers=headers,
)
data = r.json()
return data["state"]