browsertrix/backend/test_nightly/test_execution_minutes_quota.py
Ilya Kreymer dfba4b3940
Replace partial_complete -> stopped_by_user or stopped_quota_reached + operator edge cases (#1368)
- Adds two new crawl finished state, stopped_by_user and
stopped_quota_reached
- Tracking other possible 'stop reasons' in operator, though not making
them distinct states for now.
- Updated frontend with 'Stopped by User' and 'Stopped: Time Quota
Reached', shown with same icon as current partial_complete
- Added migration of partial_complete to either stopped_by_user or
complete (no historical quota data available)
- Addresses edge case in scaling: if crawl never scaled (no redis entry,
no pod), automatically scale down
- Edge case in status: if crawl is somehow 'canceled' but not deleted,
immediately delete crawl object and begin finalizing.

---------
Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
2023-11-14 11:17:16 -08:00

92 lines
2.4 KiB
Python

import math
import requests
import time
from datetime import datetime
from .conftest import API_PREFIX
from .utils import get_crawl_status
EXEC_MINS_QUOTA = 1
EXEC_MINS_ALLOWED_OVERAGE = 10
EXEC_MINS_HARD_CAP = EXEC_MINS_QUOTA + EXEC_MINS_ALLOWED_OVERAGE
config_id = None
def test_set_execution_mins_quota(org_with_quotas, admin_auth_headers):
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/quotas",
headers=admin_auth_headers,
json={"maxExecMinutesPerMonth": EXEC_MINS_QUOTA},
)
data = r.json()
assert data.get("updated") == True
def test_crawl_stopped_when_quota_reached(org_with_quotas, admin_auth_headers):
# Run crawl
global config_id
crawl_id, config_id = run_crawl(org_with_quotas, admin_auth_headers)
time.sleep(1)
while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in (
"starting",
"waiting_capacity",
):
time.sleep(2)
while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in (
"running",
"generate-wacz",
"uploading-wacz",
"pending-wait",
):
time.sleep(2)
# Ensure that crawl was stopped by quota
assert (
get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers)
== "stopped_quota_reached"
)
time.sleep(5)
# Ensure crawl execution seconds went over quota
r = requests.get(
f"{API_PREFIX}/orgs/{org_with_quotas}/crawls/{crawl_id}/replay.json",
headers=admin_auth_headers,
)
data = r.json()
execution_seconds = data["crawlExecSeconds"]
assert math.floor(execution_seconds / 60) >= EXEC_MINS_QUOTA
time.sleep(5)
# Ensure we can't start another crawl when over the quota
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/crawlconfigs/{config_id}/run",
headers=admin_auth_headers,
)
assert r.status_code == 403
assert r.json()["detail"] == "exec_minutes_quota_reached"
def run_crawl(org_id, headers):
crawl_data = {
"runNow": True,
"name": "Execution Mins Quota",
"config": {
"seeds": [{"url": "https://webrecorder.net/"}],
"extraHops": 1,
},
}
r = requests.post(
f"{API_PREFIX}/orgs/{org_id}/crawlconfigs/",
headers=headers,
json=crawl_data,
)
data = r.json()
return data["run_now_job"], data["id"]