browsertrix/backend/test_nightly/test_concurrent_crawl_limit.py
Tessa Walsh 38f32f11ea
Enforce quota and hard cap for monthly execution minutes (#1284)
Fixes #1261 Closes #1092

The quota for monthly execution minutes is treated as a hard cap. Once
it is exceeded, an alert indicating that an org has exceeded its monthly
execution minutes will display and the user will be unable to start new
crawls. Any running crawls will be stopped once the quota is exceeded.

An execution minutes meter bar is also added in the Org Dashboard and
displayed if a quota is set. More detail in #1305 which was
merged into this branch.

## Changes

- Enable setting 'maxExecMinutesPerMonth' in orgs list quotas by superadmin
- Enforce quota by stopping crawls in operator once quota is reached
- Show alert banner once execution time quota is hit:
- Once quota is hit, disable Run Crawl buttons in frontend, return 403
message with `exec_minutes_quota_reached` detail in backend from
crawl config `/run` endpoint, and don't run new workflows on creation
(similar to storage quota)
- Display execution time for crawls in the crawl details overview,
immediately below
- Show execution minutes meter on dashboard (from #1305)

---------
Co-authored-by: Henry Wilkinson <henry@wilkinson.graphics>
Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
Co-authored-by: sua yoo <sua@webrecorder.org>
2023-10-26 15:38:51 -07:00

107 lines
2.7 KiB
Python

import requests
import time
from .conftest import API_PREFIX
from .utils import get_crawl_status
crawl_id_a = None
crawl_id_b = None
def test_set_concurrent_crawl_limit(org_with_quotas, admin_auth_headers):
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/quotas",
headers=admin_auth_headers,
json={"maxConcurrentCrawls": 1},
)
data = r.json()
assert data.get("updated") == True
def test_run_two_only_one_concurrent(org_with_quotas, admin_auth_headers):
global crawl_id_a
crawl_id_a = run_crawl(org_with_quotas, admin_auth_headers)
time.sleep(1)
global crawl_id_b
crawl_id_b = run_crawl(org_with_quotas, admin_auth_headers)
while get_crawl_status(org_with_quotas, crawl_id_a, admin_auth_headers) in (
"starting",
"waiting_capacity",
):
time.sleep(2)
assert get_crawl_status(org_with_quotas, crawl_id_a, admin_auth_headers) in (
"running",
"generate-wacz",
"uploading-wacz",
"pending-wait",
)
while (
get_crawl_status(org_with_quotas, crawl_id_b, admin_auth_headers) == "starting"
):
time.sleep(2)
assert (
get_crawl_status(org_with_quotas, crawl_id_b, admin_auth_headers)
== "waiting_org_limit"
)
def test_cancel_and_run_other(org_with_quotas, admin_auth_headers):
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/crawls/{crawl_id_a}/cancel",
headers=admin_auth_headers,
)
data = r.json()
assert data["success"] == True
while (
get_crawl_status(org_with_quotas, crawl_id_a, admin_auth_headers) != "canceled"
):
time.sleep(2)
while (
get_crawl_status(org_with_quotas, crawl_id_b, admin_auth_headers)
== "waiting_org_limit"
):
time.sleep(5)
assert get_crawl_status(org_with_quotas, crawl_id_b, admin_auth_headers) in (
"starting",
"running",
"waiting_capacity",
"generate-wacz",
"uploading-wacz",
"pending-wait",
)
# cancel second crawl as well
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/crawls/{crawl_id_b}/cancel",
headers=admin_auth_headers,
)
data = r.json()
assert data["success"] == True
def run_crawl(org_id, headers):
crawl_data = {
"runNow": True,
"name": "Concurrent Crawl",
"config": {
"seeds": [{"url": "https://specs.webrecorder.net/"}],
"limit": 1,
},
}
r = requests.post(
f"{API_PREFIX}/orgs/{org_id}/crawlconfigs/",
headers=headers,
json=crawl_data,
)
data = r.json()
return data["run_now_job"]