* k8s local deployment work: - make it easier to deploy w/o ingress by setting 'local_service_port' (suggested port 30870) - if using local minio, ensure file endpoints set to /data/ and /data/ proxies correctly to local bucket - if not using minio, ensure file endpoints point to correct access / endpoint url. - setup should work with docker desktop, minikube, microk8s and k3s! - nginx chart: bump nginx memory limit to 20Mi - nginx image: 00-default-override-resolver-config -> 00-browsertrix-nginx-init for clarity - nginx image: use default nginx.conf, pin to nginx 1.23.2 - mongo: readd readiness probe, bump connect wait timeout (needed for ci) - config: set superadmin username to 'admin' - config schema: set 'name' as required - add sample chart values overrides: - chart values: local-config.yaml for running locally with 'local_service_port' - chart values: add microk8s-hosted.yaml for configuring a hosted microk8s setup - chart values: add microk8s-ci.yaml for ci tests - ci: remove docker swarm tests - ci: add microk8s integration tests: launching cluster, logging in, running a crawl of example.com, downloading/checking WACZ - bump to 1.1.0-beta.2
130 lines
2.8 KiB
Python
130 lines
2.8 KiB
Python
import requests
|
|
import hashlib
|
|
import time
|
|
import io
|
|
import zipfile
|
|
|
|
host_prefix = "http://127.0.0.1:30870"
|
|
api_prefix = f"{host_prefix}/api"
|
|
|
|
|
|
access_token = None
|
|
headers = None
|
|
archive_id = None
|
|
|
|
crawl_id = None
|
|
|
|
wacz_path = None
|
|
wacz_size = None
|
|
wacz_hash = None
|
|
|
|
wacz_content = None
|
|
|
|
|
|
def test_login():
|
|
username = "admin@example.com"
|
|
password = "PASSW0RD!"
|
|
r = requests.post(
|
|
f"{api_prefix}/auth/jwt/login",
|
|
data={"username": username, "password": password, "grant_type": "password"},
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
|
|
assert data["token_type"] == "bearer"
|
|
|
|
global access_token
|
|
access_token = data["access_token"]
|
|
|
|
global headers
|
|
headers = {"Authorization": f"Bearer {access_token}"}
|
|
|
|
|
|
def test_list_archives():
|
|
r = requests.get(f"{api_prefix}/archives", headers=headers)
|
|
data = r.json()
|
|
|
|
assert len(data["archives"]) == 1
|
|
assert data["archives"][0]["id"]
|
|
|
|
global archive_id
|
|
archive_id = data["archives"][0]["id"]
|
|
|
|
assert data["archives"][0]["name"] == "admin's Archive"
|
|
|
|
|
|
def test_create_new_config():
|
|
crawl_data = {
|
|
"runNow": True,
|
|
"name": "Test Crawl",
|
|
"config": {"seeds": ["https://example.com/"]},
|
|
}
|
|
r = requests.post(
|
|
f"{api_prefix}/archives/{archive_id}/crawlconfigs/",
|
|
headers=headers,
|
|
json=crawl_data,
|
|
)
|
|
|
|
assert r.status_code == 200
|
|
|
|
data = r.json()
|
|
assert data["added"]
|
|
assert data["run_now_job"]
|
|
|
|
global crawl_id
|
|
crawl_id = data["run_now_job"]
|
|
|
|
|
|
def test_wait_for_complete():
|
|
print("")
|
|
print("---- Running Crawl ----")
|
|
|
|
while True:
|
|
r = requests.get(
|
|
f"{api_prefix}/archives/{archive_id}/crawls/{crawl_id}.json",
|
|
headers=headers,
|
|
)
|
|
data = r.json()
|
|
assert (
|
|
data["state"] == "starting"
|
|
or data["state"] == "running"
|
|
or data["state"] == "complete"
|
|
), data["state"]
|
|
if data["state"] == "complete":
|
|
break
|
|
|
|
time.sleep(5)
|
|
|
|
assert len(data["resources"]) == 1
|
|
assert data["resources"][0]["path"]
|
|
|
|
global wacz_path
|
|
global wacz_size
|
|
global wacz_hash
|
|
wacz_path = data["resources"][0]["path"]
|
|
wacz_size = data["resources"][0]["size"]
|
|
wacz_hash = data["resources"][0]["hash"]
|
|
|
|
|
|
def test_download_wacz():
|
|
r = requests.get(host_prefix + wacz_path)
|
|
assert r.status_code == 200
|
|
assert len(r.content) == wacz_size
|
|
|
|
h = hashlib.sha256()
|
|
h.update(r.content)
|
|
assert h.hexdigest() == wacz_hash, (h.hexdigest(), wacz_hash)
|
|
|
|
global wacz_content
|
|
wacz_content = r.content
|
|
|
|
|
|
def test_verify_wacz():
|
|
b = io.BytesIO(wacz_content)
|
|
z = zipfile.ZipFile(b)
|
|
|
|
assert "pages/pages.jsonl" in z.namelist()
|
|
|
|
pages = z.open("pages/pages.jsonl").read().decode("utf-8")
|
|
assert '"https://example.com/"' in pages
|