Fix nightly tests: modify kubectl exec syntax for creating new minio bucket (#2097)
Fixes #2096 For example failing test run, see: https://github.com/webrecorder/browsertrix/actions/runs/11121185534/job/30899729448 --------- Co-authored-by: Ilya Kreymer <ikreymer@users.noreply.github.com>
This commit is contained in:
parent
1b1819ba5a
commit
f7426cc46a
2
.github/workflows/k3d-nightly-ci.yaml
vendored
2
.github/workflows/k3d-nightly-ci.yaml
vendored
@ -79,7 +79,7 @@ jobs:
|
|||||||
run: kubectl wait --for=condition=ready pod --all --timeout=240s
|
run: kubectl wait --for=condition=ready pod --all --timeout=240s
|
||||||
|
|
||||||
- name: Create Extra Test Buckets
|
- name: Create Extra Test Buckets
|
||||||
run: kubectl exec -i deployment/local-minio -c minio mkdir /data/replica-0
|
run: kubectl exec -i deployment/local-minio -c minio -- mkdir /data/replica-0
|
||||||
|
|
||||||
- name: Run Tests
|
- name: Run Tests
|
||||||
run: pytest -vv ./backend/test_nightly/test_*.py
|
run: pytest -vv ./backend/test_nightly/test_*.py
|
||||||
|
@ -592,8 +592,9 @@ class CrawlConfigOps:
|
|||||||
update_query: dict[str, object] = {}
|
update_query: dict[str, object] = {}
|
||||||
|
|
||||||
running_crawl = await self.get_running_crawl(cid)
|
running_crawl = await self.get_running_crawl(cid)
|
||||||
# only look up last finished crawl if no crawls running, otherwise
|
|
||||||
# lastCrawl* stats are already for running crawl
|
# If crawl is running, lastCrawl* stats are already for running crawl,
|
||||||
|
# so there's nothing to update other than size and crawl count
|
||||||
if not running_crawl:
|
if not running_crawl:
|
||||||
match_query = {
|
match_query = {
|
||||||
"cid": cid,
|
"cid": cid,
|
||||||
@ -603,26 +604,36 @@ class CrawlConfigOps:
|
|||||||
last_crawl = await self.crawls.find_one(
|
last_crawl = await self.crawls.find_one(
|
||||||
match_query, sort=[("finished", pymongo.DESCENDING)]
|
match_query, sort=[("finished", pymongo.DESCENDING)]
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
last_crawl = None
|
|
||||||
|
|
||||||
if last_crawl:
|
# Update to reflect last crawl
|
||||||
last_crawl_finished = last_crawl.get("finished")
|
if last_crawl:
|
||||||
|
last_crawl_finished = last_crawl.get("finished")
|
||||||
|
|
||||||
update_query["lastCrawlId"] = str(last_crawl.get("_id"))
|
update_query["lastCrawlId"] = str(last_crawl.get("_id"))
|
||||||
update_query["lastCrawlStartTime"] = last_crawl.get("started")
|
update_query["lastCrawlStartTime"] = last_crawl.get("started")
|
||||||
update_query["lastStartedBy"] = last_crawl.get("userid")
|
update_query["lastStartedBy"] = last_crawl.get("userid")
|
||||||
update_query["lastStartedByName"] = last_crawl.get("userName")
|
update_query["lastStartedByName"] = last_crawl.get("userName")
|
||||||
update_query["lastCrawlTime"] = last_crawl_finished
|
update_query["lastCrawlTime"] = last_crawl_finished
|
||||||
update_query["lastCrawlState"] = last_crawl.get("state")
|
update_query["lastCrawlState"] = last_crawl.get("state")
|
||||||
update_query["lastCrawlSize"] = sum(
|
update_query["lastCrawlSize"] = sum(
|
||||||
file_.get("size", 0) for file_ in last_crawl.get("files", [])
|
file_.get("size", 0) for file_ in last_crawl.get("files", [])
|
||||||
)
|
)
|
||||||
update_query["lastCrawlStopping"] = False
|
update_query["lastCrawlStopping"] = False
|
||||||
update_query["isCrawlRunning"] = False
|
update_query["isCrawlRunning"] = False
|
||||||
|
|
||||||
if last_crawl_finished:
|
if last_crawl_finished:
|
||||||
update_query["lastRun"] = last_crawl_finished
|
update_query["lastRun"] = last_crawl_finished
|
||||||
|
# If no last crawl exists and no running crawl, reset stats
|
||||||
|
else:
|
||||||
|
update_query["lastCrawlId"] = None
|
||||||
|
update_query["lastCrawlStartTime"] = None
|
||||||
|
update_query["lastStartedBy"] = None
|
||||||
|
update_query["lastStartedByName"] = None
|
||||||
|
update_query["lastCrawlTime"] = None
|
||||||
|
update_query["lastCrawlState"] = None
|
||||||
|
update_query["lastCrawlSize"] = 0
|
||||||
|
update_query["lastRun"] = None
|
||||||
|
update_query["isCrawlRunning"] = False
|
||||||
|
|
||||||
result = await self.crawl_configs.find_one_and_update(
|
result = await self.crawl_configs.find_one_and_update(
|
||||||
{"_id": cid, "inactive": {"$ne": True}},
|
{"_id": cid, "inactive": {"$ne": True}},
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import requests
|
import requests
|
||||||
|
import time
|
||||||
|
|
||||||
from .conftest import API_PREFIX
|
from .conftest import API_PREFIX
|
||||||
|
|
||||||
@ -70,6 +71,8 @@ def test_crawlconfig_crawl_stats(admin_auth_headers, default_org_id, crawl_confi
|
|||||||
data = r.json()
|
data = r.json()
|
||||||
assert data["deleted"]
|
assert data["deleted"]
|
||||||
|
|
||||||
|
time.sleep(10)
|
||||||
|
|
||||||
# Verify crawl stats from /crawlconfigs
|
# Verify crawl stats from /crawlconfigs
|
||||||
r = requests.get(
|
r = requests.get(
|
||||||
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{crawl_config_id}",
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{crawl_config_id}",
|
||||||
|
@ -8,18 +8,19 @@ from .conftest import API_PREFIX
|
|||||||
from .utils import get_crawl_status
|
from .utils import get_crawl_status
|
||||||
|
|
||||||
|
|
||||||
STORAGE_QUOTA_KB = 5
|
STORAGE_QUOTA_MB_TO_INCREASE = 5
|
||||||
STORAGE_QUOTA_BYTES = STORAGE_QUOTA_KB * 1000
|
STORAGE_QUOTA_BYTES_INC = STORAGE_QUOTA_MB_TO_INCREASE * 1000 * 1000
|
||||||
|
|
||||||
config_id = None
|
config_id = None
|
||||||
|
|
||||||
|
storage_quota = None
|
||||||
|
|
||||||
def run_crawl(org_id, headers):
|
def run_crawl(org_id, headers):
|
||||||
crawl_data = {
|
crawl_data = {
|
||||||
"runNow": True,
|
"runNow": True,
|
||||||
"name": "Storage Quota",
|
"name": "Storage Quota",
|
||||||
"config": {
|
"config": {
|
||||||
"seeds": [{"url": "https://webrecorder.net/"}],
|
"seeds": [{"url": "https://specs.webrecorder.net/"}],
|
||||||
"extraHops": 1,
|
"extraHops": 1,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -34,10 +35,22 @@ def run_crawl(org_id, headers):
|
|||||||
|
|
||||||
|
|
||||||
def test_storage_quota(org_with_quotas, admin_auth_headers):
|
def test_storage_quota(org_with_quotas, admin_auth_headers):
|
||||||
|
# Get current storage usage
|
||||||
|
r = requests.get(
|
||||||
|
f"{API_PREFIX}/orgs/{org_with_quotas}",
|
||||||
|
headers=admin_auth_headers,
|
||||||
|
)
|
||||||
|
assert r.status_code == 200
|
||||||
|
bytes_stored = r.json()["bytesStored"]
|
||||||
|
|
||||||
|
global storage_quota
|
||||||
|
storage_quota = bytes_stored + STORAGE_QUOTA_BYTES_INC
|
||||||
|
|
||||||
|
# Set storage quota higher than bytesStored
|
||||||
r = requests.post(
|
r = requests.post(
|
||||||
f"{API_PREFIX}/orgs/{org_with_quotas}/quotas",
|
f"{API_PREFIX}/orgs/{org_with_quotas}/quotas",
|
||||||
headers=admin_auth_headers,
|
headers=admin_auth_headers,
|
||||||
json={"storageQuota": STORAGE_QUOTA_BYTES},
|
json={"storageQuota": storage_quota},
|
||||||
)
|
)
|
||||||
assert r.status_code == 200
|
assert r.status_code == 200
|
||||||
assert r.json()["updated"]
|
assert r.json()["updated"]
|
||||||
@ -49,9 +62,12 @@ def test_crawl_stopped_when_storage_quota_reached(org_with_quotas, admin_auth_he
|
|||||||
crawl_id, config_id = run_crawl(org_with_quotas, admin_auth_headers)
|
crawl_id, config_id = run_crawl(org_with_quotas, admin_auth_headers)
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
|
assert crawl_id
|
||||||
|
|
||||||
while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in (
|
while get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers) in (
|
||||||
"starting",
|
"starting",
|
||||||
"waiting_capacity",
|
"waiting_capacity",
|
||||||
|
"waiting_org_limit",
|
||||||
):
|
):
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
@ -63,14 +79,11 @@ def test_crawl_stopped_when_storage_quota_reached(org_with_quotas, admin_auth_he
|
|||||||
):
|
):
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
# Ensure that crawl was stopped by quota
|
|
||||||
assert (
|
assert (
|
||||||
get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers)
|
get_crawl_status(org_with_quotas, crawl_id, admin_auth_headers)
|
||||||
== "stopped_storage_quota_reached"
|
== "stopped_storage_quota_reached"
|
||||||
)
|
)
|
||||||
|
|
||||||
time.sleep(10)
|
|
||||||
|
|
||||||
# Ensure crawl storage went over quota
|
# Ensure crawl storage went over quota
|
||||||
r = requests.get(
|
r = requests.get(
|
||||||
f"{API_PREFIX}/orgs/{org_with_quotas}",
|
f"{API_PREFIX}/orgs/{org_with_quotas}",
|
||||||
@ -78,7 +91,7 @@ def test_crawl_stopped_when_storage_quota_reached(org_with_quotas, admin_auth_he
|
|||||||
)
|
)
|
||||||
data = r.json()
|
data = r.json()
|
||||||
bytes_stored = data["bytesStored"]
|
bytes_stored = data["bytesStored"]
|
||||||
assert bytes_stored >= STORAGE_QUOTA_BYTES
|
assert bytes_stored >= storage_quota
|
||||||
|
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user