browsertrix/backend/test_nightly/test_execution_minutes_quota.py
Tessa Walsh be41c48c27
Add extra and gifted execution minutes (#1361)
Fixes #1358 

- Adds `extraExecMinutes` and `giftedExecMinutes` org quotas, which are
not reset monthly but are updateable amounts that carry across months
- Adds `quotaUpdate` field to `Organization` to track when quotas were
updated with timestamp
- Adds `extraExecMinutesAvailable` and `giftedExecMinutesAvailable`
fields to `Organization` to help with tracking available time left
(includes tested migration to initialize these to 0)
- Modifies org backend to track time across multiple categories, using
monthlyExecSeconds, then giftedExecSeconds, then extraExecSeconds.
All time is also written into crawlExecSeconds, which is now the monthly
total and also contains any overage time above the quotas
- Updates Dashboard crawling meter to include all types of execution
time if `extraExecMinutes` and/or `giftedExecMinutes` are set above 0
- Updates Dashboard Usage History table to include all types of
execution time (only displaying columns that have data)
- Adds backend nightly test to check handling of quotas and execution
time
- Includes migration to add new fields and copy crawlExecSeconds to
monthlyExecSeconds for previous months

Co-authored-by: emma <hi@emma.cafe>
2023-12-07 14:34:37 -05:00

186 lines
5.3 KiB
Python

import math
import requests
import time
from datetime import datetime
from typing import Dict
from .conftest import API_PREFIX
from .utils import get_crawl_status
EXEC_MINS_QUOTA = 1
EXEC_SECS_QUOTA = EXEC_MINS_QUOTA * 60
GIFTED_MINS_QUOTA = 3
GIFTED_SECS_QUOTA = GIFTED_MINS_QUOTA * 60
EXTRA_MINS_QUOTA = 5
EXTRA_SECS_QUOTA = EXTRA_MINS_QUOTA * 60
config_id = None
def test_set_execution_mins_quota(org_with_quotas, admin_auth_headers):
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/quotas",
headers=admin_auth_headers,
json={"maxExecMinutesPerMonth": EXEC_MINS_QUOTA},
)
data = r.json()
assert data.get("updated") == True
def test_crawl_stopped_when_quota_reached(org_with_quotas, admin_auth_headers):
# Run crawl
global config_id
crawl_id, config_id = run_crawl(org_with_quotas, admin_auth_headers)
time.sleep(1)
while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in (
"starting",
"waiting_capacity",
):
time.sleep(2)
while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in (
"running",
"generate-wacz",
"uploading-wacz",
"pending-wait",
):
time.sleep(2)
# Ensure that crawl was stopped by quota
assert (
get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers)
== "stopped_quota_reached"
)
time.sleep(5)
# Ensure crawl execution seconds went over quota
r = requests.get(
f"{API_PREFIX}/orgs/{org_with_quotas}/crawls/{crawl_id}/replay.json",
headers=admin_auth_headers,
)
data = r.json()
execution_seconds = data["crawlExecSeconds"]
assert math.floor(execution_seconds / 60) >= EXEC_MINS_QUOTA
time.sleep(5)
# Ensure we can't start another crawl when over the quota
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/crawlconfigs/{config_id}/run",
headers=admin_auth_headers,
)
assert r.status_code == 403
assert r.json()["detail"] == "exec_minutes_quota_reached"
def test_set_execution_mins_extra_quotas(org_with_quotas, admin_auth_headers):
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/quotas",
headers=admin_auth_headers,
json={
"maxExecMinutesPerMonth": EXEC_MINS_QUOTA,
"extraExecMinutes": EXTRA_MINS_QUOTA,
"giftedExecMinutes": GIFTED_MINS_QUOTA,
},
)
data = r.json()
assert data.get("updated") == True
# Ensure org data looks as we expect
r = requests.get(
f"{API_PREFIX}/orgs/{org_with_quotas}",
headers=admin_auth_headers,
)
data = r.json()
assert data["extraExecSecondsAvailable"] == EXTRA_SECS_QUOTA
assert data["giftedExecSecondsAvailable"] == GIFTED_SECS_QUOTA
assert data["extraExecSeconds"] == {}
assert data["giftedExecSeconds"] == {}
assert get_total_exec_seconds(data["crawlExecSeconds"]) >= EXEC_SECS_QUOTA
assert len(data["quotaUpdates"])
for update in data["quotaUpdates"]:
assert update["modified"]
assert update["update"]
def test_crawl_stopped_when_quota_reached_with_extra(
org_with_quotas, admin_auth_headers
):
# Run crawl
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/crawlconfigs/{config_id}/run",
headers=admin_auth_headers,
)
assert r.status_code == 200
crawl_id = r.json()["started"]
while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in (
"starting",
"waiting_capacity",
):
time.sleep(2)
while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in (
"running",
"generate-wacz",
"uploading-wacz",
"pending-wait",
):
time.sleep(2)
# Ensure that crawl was stopped by quota
assert (
get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers)
== "stopped_quota_reached"
)
time.sleep(5)
# Ensure org data looks as we expect
r = requests.get(
f"{API_PREFIX}/orgs/{org_with_quotas}",
headers=admin_auth_headers,
)
data = r.json()
assert data["extraExecSecondsAvailable"] == 0
assert data["giftedExecSecondsAvailable"] == 0
assert get_total_exec_seconds(data["extraExecSeconds"]) >= EXTRA_SECS_QUOTA
assert get_total_exec_seconds(data["giftedExecSeconds"]) == GIFTED_SECS_QUOTA
assert get_total_exec_seconds(data["crawlExecSeconds"]) >= EXEC_SECS_QUOTA
time.sleep(5)
# Ensure we can't start another crawl when over the quota
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/crawlconfigs/{config_id}/run",
headers=admin_auth_headers,
)
assert r.status_code == 403
assert r.json()["detail"] == "exec_minutes_quota_reached"
def run_crawl(org_id, headers):
crawl_data = {
"runNow": True,
"name": "Execution Mins Quota",
"config": {
"seeds": [{"url": "https://webrecorder.net/"}],
"extraHops": 1,
},
}
r = requests.post(
f"{API_PREFIX}/orgs/{org_id}/crawlconfigs/",
headers=headers,
json=crawl_data,
)
data = r.json()
return data["run_now_job"], data["id"]
def get_total_exec_seconds(execSeconds: Dict[str, int]) -> int:
return sum(list(execSeconds.values()))