browsertrix/backend/test_nightly/test_storage_quota.py
Tessa Walsh d38abbca7f
Standardize handling of storage and execution time quotas (#1969)
Fixes #1968 

Changes:
- `stopped_quota_reached` and `skipped_quota_reached` migrated to new
values that indicate which quota was reached
- Before crawls are run, the operator checks if storage or exec mins
quotas are reached and if so fails the crawl with the appropriate state
of `skipped_storage_quota_reached` or `skipped_time_quota_reached`
- While crawls are running, the operator checks if the exec mins quota
is reached or if the size of all running crawls will mean the storage
quota is reached once uploaded; if so, the crawl is stopped gracefully
and given `stopped_storage_quota_needed` or `stopped_time_quota_reached`
state as appropriate
- Adds new nightly tests for enforcing storage quota
2024-07-25 12:49:11 -07:00

102 lines
2.6 KiB
Python

import math
import requests
import time
from datetime import datetime
from typing import Dict
from .conftest import API_PREFIX
from .utils import get_crawl_status
STORAGE_QUOTA_KB = 5
STORAGE_QUOTA_BYTES = STORAGE_QUOTA_KB * 1000
config_id = None
def run_crawl(org_id, headers):
crawl_data = {
"runNow": True,
"name": "Storage Quota",
"config": {
"seeds": [{"url": "https://webrecorder.net/"}],
"extraHops": 1,
},
}
r = requests.post(
f"{API_PREFIX}/orgs/{org_id}/crawlconfigs/",
headers=headers,
json=crawl_data,
)
data = r.json()
return data["run_now_job"], data["id"]
def test_storage_quota(org_with_quotas, admin_auth_headers):
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/quotas",
headers=admin_auth_headers,
json={"storageQuota": STORAGE_QUOTA_BYTES},
)
assert r.status_code == 200
assert r.json()["updated"]
def test_crawl_stopped_when_storage_quota_reached(org_with_quotas, admin_auth_headers):
# Run crawl
global config_id
crawl_id, config_id = run_crawl(org_with_quotas, admin_auth_headers)
time.sleep(1)
while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in (
"starting",
"waiting_capacity",
):
time.sleep(2)
while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in (
"running",
"generate-wacz",
"uploading-wacz",
"pending-wait",
):
time.sleep(2)
# Ensure that crawl was stopped by quota
assert (
get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers)
== "stopped_storage_quota_reached"
)
time.sleep(10)
# Ensure crawl storage went over quota
r = requests.get(
f"{API_PREFIX}/orgs/{org_with_quotas}",
headers=admin_auth_headers,
)
data = r.json()
bytes_stored = data["bytesStored"]
assert bytes_stored >= STORAGE_QUOTA_BYTES
time.sleep(5)
# Ensure we can't start another crawl when over the quota
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/crawlconfigs/{config_id}/run",
headers=admin_auth_headers,
)
assert r.status_code == 403
assert r.json()["detail"] == "storage_quota_reached"
def test_unset_quotas(org_with_quotas, admin_auth_headers):
r = requests.post(
f"{API_PREFIX}/orgs/{org_with_quotas}/quotas",
headers=admin_auth_headers,
json={"maxExecMinutesPerMonth": 0, "storageQuota": 0},
)
assert r.status_code == 200
assert r.json()["updated"]