diff --git a/.github/workflows/k3d-nightly-ci.yaml b/.github/workflows/k3d-nightly-ci.yaml index d27925af..9d08ab3c 100644 --- a/.github/workflows/k3d-nightly-ci.yaml +++ b/.github/workflows/k3d-nightly-ci.yaml @@ -7,6 +7,9 @@ on: workflow_dispatch: +env: + ECHO_SERVER_HOST_URL: http://host.k3d.internal:18080 + jobs: collect-test-modules: runs-on: ubuntu-latest diff --git a/backend/test/conftest.py b/backend/test/conftest.py index abc7234b..cc4f30df 100644 --- a/backend/test/conftest.py +++ b/backend/test/conftest.py @@ -1,7 +1,6 @@ import os import pytest import requests -import socket import subprocess import time from typing import Dict @@ -691,7 +690,7 @@ def prepare_browser_for_profile_commit( break time.sleep(5) except: - pass + time.sleep(5) attempts += 1 diff --git a/backend/test/test_org.py b/backend/test/test_org.py index 5c41a0fa..8d768751 100644 --- a/backend/test/test_org.py +++ b/backend/test/test_org.py @@ -485,87 +485,6 @@ def test_delete_invite_by_email(admin_auth_headers, non_default_org_id): assert data["detail"] == "invite_not_found" -def test_update_event_webhook_urls_org_admin(admin_auth_headers, default_org_id): - # Verify no URLs are configured - r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}", - headers=admin_auth_headers, - ) - assert r.status_code == 200 - data = r.json() - if data.get("webhooks"): - webhooks = data.get("webhooks") - assert webhooks.get("crawlStarted") is None - assert webhooks.get("crawlFinished") is None - assert webhooks.get("crawlDeleted") is None - assert webhooks.get("uploadFinished") is None - assert webhooks.get("uploadDeleted") is None - assert webhooks.get("addedToCollection") is None - assert webhooks.get("removedFromCollection") is None - assert webhooks.get("collectionDeleted") is None - - # Set URLs and verify - CRAWL_STARTED_URL = "https://example.com/crawl/started" - CRAWL_FINISHED_URL = "https://example.com/crawl/finished" - CRAWL_DELETED_URL = "https://example.com/crawl/deleted" - UPLOAD_FINISHED_URL = "https://example.com/upload/finished" - UPLOAD_DELETED_URL = "https://example.com/upload/deleted" - COLL_ADDED_URL = "https://example.com/coll/added" - COLL_REMOVED_URL = "http://example.com/coll/removed" - COLL_DELETED_URL = "http://example.com/coll/deleted" - - r = requests.post( - f"{API_PREFIX}/orgs/{default_org_id}/event-webhook-urls", - headers=admin_auth_headers, - json={ - "crawlStarted": CRAWL_STARTED_URL, - "crawlFinished": CRAWL_FINISHED_URL, - "crawlDeleted": CRAWL_DELETED_URL, - "uploadFinished": UPLOAD_FINISHED_URL, - "uploadDeleted": UPLOAD_DELETED_URL, - "addedToCollection": COLL_ADDED_URL, - "removedFromCollection": COLL_REMOVED_URL, - "collectionDeleted": COLL_DELETED_URL, - }, - ) - assert r.status_code == 200 - assert r.json()["updated"] - - r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}", - headers=admin_auth_headers, - ) - assert r.status_code == 200 - data = r.json() - urls = data["webhookUrls"] - assert urls["crawlStarted"] == CRAWL_STARTED_URL - assert urls["crawlFinished"] == CRAWL_FINISHED_URL - assert urls["crawlDeleted"] == CRAWL_DELETED_URL - - assert urls["uploadFinished"] == UPLOAD_FINISHED_URL - assert urls["uploadDeleted"] == UPLOAD_DELETED_URL - - assert urls["addedToCollection"] == COLL_ADDED_URL - assert urls["removedFromCollection"] == COLL_REMOVED_URL - assert urls["collectionDeleted"] == COLL_DELETED_URL - - -def test_update_event_webhook_urls_org_crawler(crawler_auth_headers, default_org_id): - r = requests.post( - f"{API_PREFIX}/orgs/{default_org_id}/event-webhook-urls", - headers=crawler_auth_headers, - json={ - "crawlStarted": "https://example.com/crawlstarted", - "crawlFinished": "https://example.com/crawlfinished", - "uploadFinished": "https://example.com/uploadfinished", - "addedToCollection": "https://example.com/added", - "removedFromCollection": "https://example.com/removed", - }, - ) - assert r.status_code == 403 - assert r.json()["detail"] == "User does not have permission to perform this action" - - def test_org_metrics(crawler_auth_headers, default_org_id): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/metrics", diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py index 18a78800..1dd88b52 100644 --- a/backend/test/test_uploads.py +++ b/backend/test/test_uploads.py @@ -308,7 +308,7 @@ def test_uploads_collection_updated( assert data["totalSize"] > 0 assert data["dateEarliest"] assert data["dateLatest"] - assert data["modified"] > data["created"] + assert data["modified"] >= data["created"] def test_replace_upload( diff --git a/backend/test/test_z_delete_org.py b/backend/test/test_z_delete_org.py index fbb9b0f0..ecf4910a 100644 --- a/backend/test/test_z_delete_org.py +++ b/backend/test/test_z_delete_org.py @@ -1,4 +1,5 @@ import requests +import time from .conftest import API_PREFIX @@ -39,7 +40,7 @@ def test_recalculate_org_storage(admin_auth_headers, default_org_id): time.sleep(10) except: - pass + time.sleep(10) attempts += 1 @@ -112,7 +113,7 @@ def test_delete_org_superadmin(admin_auth_headers, default_org_id): time.sleep(10) except: - pass + time.sleep(10) attempts += 1 diff --git a/backend/test_nightly/data/example.wacz b/backend/test_nightly/data/example.wacz new file mode 100644 index 00000000..840227ef Binary files /dev/null and b/backend/test_nightly/data/example.wacz differ diff --git a/backend/test_nightly/echo_server.py b/backend/test_nightly/echo_server.py new file mode 100644 index 00000000..0da8715a --- /dev/null +++ b/backend/test_nightly/echo_server.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +""" +A web server to record POST requests and return them on a GET request +""" +from http.server import HTTPServer, BaseHTTPRequestHandler +import json + +BIND_HOST = "0.0.0.0" +PORT = 18080 + +post_bodies = [] + + +class EchoServerHTTPRequestHandler(BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(200) + self.end_headers() + self.wfile.write(json.dumps({"post_bodies": post_bodies}).encode("utf-8")) + + def do_POST(self): + content_length = int(self.headers.get("content-length", 0)) + body = self.rfile.read(content_length) + self.send_response(200) + if self.path.endswith("/portalUrl"): + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write( + json.dumps({"portalUrl": "https://portal.example.com/path/"}).encode( + "utf-8" + ) + ) + else: + self.end_headers() + + post_bodies.append(json.loads(body.decode("utf-8").replace("'", '"'))) + + +httpd = HTTPServer((BIND_HOST, PORT), EchoServerHTTPRequestHandler) +httpd.serve_forever() diff --git a/backend/test_nightly/test_org_deletion.py b/backend/test_nightly/test_org_deletion.py index c8379936..291ba01b 100644 --- a/backend/test_nightly/test_org_deletion.py +++ b/backend/test_nightly/test_org_deletion.py @@ -168,7 +168,9 @@ def test_delete_org_crawl_running( time.sleep(10) except: - pass + time.sleep(10) + + attempts += 1 @@ -214,7 +216,7 @@ def test_delete_org_qa_running( time.sleep(10) except: - pass + time.sleep(10) attempts += 1 @@ -260,7 +262,7 @@ def test_delete_org_profile_running( time.sleep(10) except: - pass + time.sleep(10) attempts += 1 diff --git a/backend/test/test_webhooks.py b/backend/test_nightly/test_webhooks.py similarity index 69% rename from backend/test/test_webhooks.py rename to backend/test_nightly/test_webhooks.py index fad3244f..f617c72d 100644 --- a/backend/test/test_webhooks.py +++ b/backend/test_nightly/test_webhooks.py @@ -1,7 +1,9 @@ import json import os +import subprocess import time +import pytest import requests from .conftest import API_PREFIX @@ -20,8 +22,150 @@ ECHO_SERVER_URL_FROM_K8S = os.environ.get( "ECHO_SERVER_HOST_URL", "http://host.docker.internal:18080" ) +FAILED_STATES = ["canceled", "failed", "skipped_quota_reached"] -def test_list_webhook_events(admin_auth_headers, default_org_id): +SUCCESSFUL_STATES = ["complete", "stopped_by_user", "stopped_quota_reached"] + +FINISHED_STATES = [*FAILED_STATES, *SUCCESSFUL_STATES] + + +@pytest.fixture(scope="function") +def echo_server(): + print(f"Echo server starting", flush=True) + p = subprocess.Popen(["python3", os.path.join(curr_dir, "echo_server.py")]) + print(f"Echo server started", flush=True) + time.sleep(1) + yield p + time.sleep(10) + print(f"Echo server terminating", flush=True) + p.terminate() + print(f"Echo server terminated", flush=True) + + +@pytest.fixture(scope="session") +def all_crawls_crawl_id(crawler_auth_headers, default_org_id): + # Start crawl. + crawl_data = { + "runNow": True, + "name": "All Crawls Test Crawl", + "description": "Lorem ipsum", + "config": { + "seeds": [{"url": "https://webrecorder.net/"}], + "exclude": "community", + "limit": 3, + }, + } + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/", + headers=crawler_auth_headers, + json=crawl_data, + ) + data = r.json() + crawl_id = data["run_now_job"] + + # Wait for it to complete and then return crawl ID + while True: + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id}/replay.json", + headers=crawler_auth_headers, + ) + data = r.json() + if data["state"] in FINISHED_STATES: + break + time.sleep(5) + + # Add description to crawl + r = requests.patch( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id}", + headers=crawler_auth_headers, + json={"description": "Lorem ipsum"}, + ) + assert r.status_code == 200 + return crawl_id + + +def test_update_event_webhook_urls_org_admin(admin_auth_headers, default_org_id): + # Verify no URLs are configured + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + data = r.json() + if data.get("webhooks"): + webhooks = data.get("webhooks") + assert webhooks.get("crawlStarted") is None + assert webhooks.get("crawlFinished") is None + assert webhooks.get("crawlDeleted") is None + assert webhooks.get("uploadFinished") is None + assert webhooks.get("uploadDeleted") is None + assert webhooks.get("addedToCollection") is None + assert webhooks.get("removedFromCollection") is None + assert webhooks.get("collectionDeleted") is None + + # Set URLs and verify + CRAWL_STARTED_URL = "https://example.com/crawl/started" + CRAWL_FINISHED_URL = "https://example.com/crawl/finished" + CRAWL_DELETED_URL = "https://example.com/crawl/deleted" + UPLOAD_FINISHED_URL = "https://example.com/upload/finished" + UPLOAD_DELETED_URL = "https://example.com/upload/deleted" + COLL_ADDED_URL = "https://example.com/coll/added" + COLL_REMOVED_URL = "http://example.com/coll/removed" + COLL_DELETED_URL = "http://example.com/coll/deleted" + + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/event-webhook-urls", + headers=admin_auth_headers, + json={ + "crawlStarted": CRAWL_STARTED_URL, + "crawlFinished": CRAWL_FINISHED_URL, + "crawlDeleted": CRAWL_DELETED_URL, + "uploadFinished": UPLOAD_FINISHED_URL, + "uploadDeleted": UPLOAD_DELETED_URL, + "addedToCollection": COLL_ADDED_URL, + "removedFromCollection": COLL_REMOVED_URL, + "collectionDeleted": COLL_DELETED_URL, + }, + ) + assert r.status_code == 200 + assert r.json()["updated"] + + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + data = r.json() + urls = data["webhookUrls"] + assert urls["crawlStarted"] == CRAWL_STARTED_URL + assert urls["crawlFinished"] == CRAWL_FINISHED_URL + assert urls["crawlDeleted"] == CRAWL_DELETED_URL + + assert urls["uploadFinished"] == UPLOAD_FINISHED_URL + assert urls["uploadDeleted"] == UPLOAD_DELETED_URL + + assert urls["addedToCollection"] == COLL_ADDED_URL + assert urls["removedFromCollection"] == COLL_REMOVED_URL + assert urls["collectionDeleted"] == COLL_DELETED_URL + + +def test_update_event_webhook_urls_org_crawler(crawler_auth_headers, default_org_id): + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/event-webhook-urls", + headers=crawler_auth_headers, + json={ + "crawlStarted": "https://example.com/crawlstarted", + "crawlFinished": "https://example.com/crawlfinished", + "uploadFinished": "https://example.com/uploadfinished", + "addedToCollection": "https://example.com/added", + "removedFromCollection": "https://example.com/removed", + }, + ) + assert r.status_code == 403 + assert r.json()["detail"] == "User does not have permission to perform this action" + + +def test_list_webhook_events(admin_auth_headers, default_org_id, crawl_id_wr): # Verify that webhook URLs have been set in previous tests r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}", @@ -40,6 +184,8 @@ def test_list_webhook_events(admin_auth_headers, default_org_id): assert urls["collectionDeleted"] # Verify list endpoint works as expected + # At this point we expect webhook attempts to fail since they're not + # configured against a valid endpoint r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/webhooks", headers=admin_auth_headers, @@ -62,7 +208,7 @@ def test_list_webhook_events(admin_auth_headers, default_org_id): assert _webhook_event_id -def test_get_webhook_event(admin_auth_headers, default_org_id): +def test_get_webhook_event(admin_auth_headers, default_org_id, crawl_id_wr): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/webhooks/{_webhook_event_id}", headers=admin_auth_headers, @@ -99,7 +245,7 @@ def test_get_webhook_event(admin_auth_headers, default_org_id): assert len(body["itemIds"]) >= 1 -def test_retry_webhook_event(admin_auth_headers, default_org_id): +def test_retry_webhook_event(admin_auth_headers, default_org_id, crawl_id_wr): # Expect to fail because we haven't set up URLs that accept webhooks r = requests.post( f"{API_PREFIX}/orgs/{default_org_id}/webhooks/{_webhook_event_id}/retry", @@ -175,6 +321,7 @@ def test_webhooks_sent( "autoAddCollections": [webhooks_coll_id], "config": { "seeds": [{"url": "https://webrecorder.net/"}], + "limit": 2, }, } r = requests.post(