Fixes #1307 Fixes #1132 Related to #1306 Deleted webhook notifications include the org id and item/collection id. This PR also includes API docs for the new webhooks and extends the existing tests to account for the new webhooks. This PR also does some additional cleanup for existing webhooks: - Remove `downloadUrls` from item finished webhook bodies - Rename collection webhook body `downloadUrls` to `downloadUrl`, since we only ever have one per collection - Fix API docs for existing webhooks, one of which had the wrong response body
343 lines
10 KiB
Python
343 lines
10 KiB
Python
import json
|
|
import os
|
|
import time
|
|
|
|
import requests
|
|
|
|
from .conftest import API_PREFIX
|
|
from .utils import read_in_chunks
|
|
|
|
_webhook_event_id = None
|
|
|
|
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
ECHO_SERVER_URL = "http://localhost:18080"
|
|
|
|
# Pull address to echo server running on host from CI env var.
|
|
# If not set, default to host.docker.internal (for local testing with
|
|
# Docker Desktop).
|
|
ECHO_SERVER_URL_FROM_K8S = os.environ.get(
|
|
"ECHO_SERVER_HOST_URL", "http://host.docker.internal:18080"
|
|
)
|
|
|
|
|
|
def test_list_webhook_events(admin_auth_headers, default_org_id):
|
|
# Verify that webhook URLs have been set in previous tests
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}",
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
urls = data["webhookUrls"]
|
|
assert urls["crawlStarted"]
|
|
assert urls["crawlFinished"]
|
|
assert urls["crawlDeleted"]
|
|
assert urls["uploadFinished"]
|
|
assert urls["uploadDeleted"]
|
|
assert urls["addedToCollection"]
|
|
assert urls["removedFromCollection"]
|
|
assert urls["collectionDeleted"]
|
|
|
|
# Verify list endpoint works as expected
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/webhooks",
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data["total"] > 0
|
|
for item in data["items"]:
|
|
assert item["id"]
|
|
assert item["event"]
|
|
assert item["oid"]
|
|
assert item["body"]
|
|
assert item["success"] is False
|
|
assert item["attempts"] == 1
|
|
assert item["created"]
|
|
assert item["lastAttempted"]
|
|
|
|
global _webhook_event_id
|
|
_webhook_event_id = data["items"][0]["id"]
|
|
assert _webhook_event_id
|
|
|
|
|
|
def test_get_webhook_event(admin_auth_headers, default_org_id):
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/webhooks/{_webhook_event_id}",
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
item = r.json()
|
|
|
|
assert item["id"]
|
|
assert item["oid"]
|
|
assert item["success"] is False
|
|
assert item["attempts"] == 1
|
|
assert item["created"]
|
|
assert item["lastAttempted"]
|
|
|
|
body = item["body"]
|
|
assert body
|
|
|
|
event = item["event"]
|
|
assert event
|
|
|
|
if event in ("crawlFinished", "uploadFinished"):
|
|
assert len(body["resources"]) >= 1
|
|
assert body["itemId"]
|
|
|
|
elif event in ("crawlStarted"):
|
|
assert len(body.get("resources", [])) == 0
|
|
assert body["itemId"]
|
|
|
|
elif event in ("addedToCollection", "removedFromCollection"):
|
|
assert len(body.get("resources", [])) == 0
|
|
assert body["downloadUrl"]
|
|
assert body["collectionId"]
|
|
assert len(body["itemIds"]) >= 1
|
|
|
|
|
|
def test_retry_webhook_event(admin_auth_headers, default_org_id):
|
|
# Expect to fail because we haven't set up URLs that accept webhooks
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/webhooks/{_webhook_event_id}/retry",
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
assert r.json()["success"]
|
|
|
|
# Give it some time to run with exponential backoff retries
|
|
time.sleep(90)
|
|
|
|
# Verify attempts have been increased
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/webhooks/{_webhook_event_id}",
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
item = r.json()
|
|
assert item["id"]
|
|
assert item["event"]
|
|
assert item["oid"]
|
|
assert item["body"]
|
|
assert item["success"] is False
|
|
assert item["attempts"] == 2
|
|
assert item["created"]
|
|
assert item["lastAttempted"]
|
|
|
|
|
|
def test_webhooks_sent(
|
|
admin_auth_headers,
|
|
default_org_id,
|
|
all_crawls_crawl_id,
|
|
echo_server,
|
|
):
|
|
# Reconfigure event webhooks to use echo server
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/event-webhook-urls",
|
|
headers=admin_auth_headers,
|
|
json={
|
|
"crawlStarted": ECHO_SERVER_URL_FROM_K8S,
|
|
"crawlFinished": ECHO_SERVER_URL_FROM_K8S,
|
|
"crawlDeleted": ECHO_SERVER_URL_FROM_K8S,
|
|
"uploadFinished": ECHO_SERVER_URL_FROM_K8S,
|
|
"uploadDeleted": ECHO_SERVER_URL_FROM_K8S,
|
|
"addedToCollection": ECHO_SERVER_URL_FROM_K8S,
|
|
"removedFromCollection": ECHO_SERVER_URL_FROM_K8S,
|
|
"collectionDeleted": ECHO_SERVER_URL_FROM_K8S,
|
|
},
|
|
)
|
|
assert r.status_code == 200
|
|
assert r.json()["updated"]
|
|
|
|
# Create collection with all_crawls_crawl_id already in it
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/collections",
|
|
headers=admin_auth_headers,
|
|
json={
|
|
"name": "Event webhooks test collection",
|
|
"crawlIds": [all_crawls_crawl_id],
|
|
},
|
|
)
|
|
assert r.status_code == 200
|
|
webhooks_coll_id = r.json()["id"]
|
|
assert webhooks_coll_id
|
|
|
|
# Create and run workflow that adds crawl to collection
|
|
crawl_data = {
|
|
"runNow": True,
|
|
"name": "Webhook crawl test",
|
|
"autoAddCollections": [webhooks_coll_id],
|
|
"config": {
|
|
"seeds": [{"url": "https://webrecorder.net/"}],
|
|
},
|
|
}
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
|
|
headers=admin_auth_headers,
|
|
json=crawl_data,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
webhooks_config_id = data["id"]
|
|
assert webhooks_config_id
|
|
webhooks_crawl_id = data["run_now_job"]
|
|
|
|
# Wait for crawl to complete
|
|
while True:
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{webhooks_crawl_id}/replay.json",
|
|
headers=admin_auth_headers,
|
|
)
|
|
data = r.json()
|
|
if data["state"] == "complete":
|
|
break
|
|
time.sleep(5)
|
|
|
|
# Create upload and add to collection
|
|
with open(os.path.join(curr_dir, "data", "example.wacz"), "rb") as fh:
|
|
r = requests.put(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/uploads/stream?filename=webhookstest.wacz&name=Webhooks%20Upload&collections={webhooks_coll_id}",
|
|
headers=admin_auth_headers,
|
|
data=read_in_chunks(fh),
|
|
)
|
|
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data["added"]
|
|
webhooks_upload_id = data["id"]
|
|
|
|
# Remove upload from collection
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/collections/{webhooks_coll_id}/remove",
|
|
json={"crawlIds": [webhooks_upload_id]},
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data["id"]
|
|
|
|
# Delete upload
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/uploads/delete",
|
|
json={"crawl_ids": [webhooks_upload_id]},
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data["deleted"]
|
|
|
|
# Remove crawls from collection
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/collections/{webhooks_coll_id}/remove",
|
|
json={"crawlIds": [webhooks_crawl_id, all_crawls_crawl_id]},
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data["id"]
|
|
|
|
# Delete crawl
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls/delete",
|
|
json={"crawl_ids": [webhooks_crawl_id]},
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data["deleted"]
|
|
|
|
# Delete collection
|
|
r = requests.delete(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/collections/{webhooks_coll_id}",
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
|
|
# Wait to ensure async notifications are all sent
|
|
time.sleep(30)
|
|
|
|
# Send GET request to echo server to retrieve and verify POSTed data
|
|
r = requests.get(ECHO_SERVER_URL)
|
|
assert r.status_code == 200
|
|
|
|
data = r.json()
|
|
|
|
crawl_started_count = 0
|
|
crawl_finished_count = 0
|
|
crawl_deleted_count = 0
|
|
upload_finished_count = 0
|
|
upload_deleted_count = 0
|
|
added_to_collection_count = 0
|
|
removed_from_collection_count = 0
|
|
collection_deleted_count = 0
|
|
|
|
for post in data["post_bodies"]:
|
|
assert post["orgId"]
|
|
event = post["event"]
|
|
assert event
|
|
|
|
if event == "crawlStarted":
|
|
crawl_started_count += 1
|
|
assert post["itemId"]
|
|
assert post["scheduled"] in (True, False)
|
|
assert post.get("resources") is None
|
|
|
|
elif event == "crawlFinished":
|
|
crawl_finished_count += 1
|
|
assert post["itemId"]
|
|
assert post["state"]
|
|
assert post["resources"]
|
|
|
|
elif event == "crawlDeleted":
|
|
crawl_deleted_count += 1
|
|
assert post["itemId"]
|
|
|
|
elif event == "uploadFinished":
|
|
upload_finished_count += 1
|
|
assert post["itemId"]
|
|
assert post["state"]
|
|
assert post["resources"]
|
|
assert post.get("downloadUrls") is None
|
|
|
|
elif event == "uploadDeleted":
|
|
upload_deleted_count += 1
|
|
assert post["itemId"]
|
|
|
|
elif event == "addedToCollection":
|
|
added_to_collection_count += 1
|
|
assert post["downloadUrl"]
|
|
assert post.get("resources") is None
|
|
assert post["itemIds"]
|
|
assert post["collectionId"]
|
|
|
|
elif event == "removedFromCollection":
|
|
removed_from_collection_count += 1
|
|
assert post["downloadUrl"]
|
|
assert post.get("resources") is None
|
|
assert post["itemIds"]
|
|
assert post["collectionId"]
|
|
|
|
elif event == "collectionDeleted":
|
|
collection_deleted_count += 1
|
|
assert post["collectionId"]
|
|
|
|
# Allow for some variability here due to timing of crawls
|
|
assert crawl_started_count >= 1
|
|
assert crawl_finished_count >= 1
|
|
assert crawl_deleted_count == 1
|
|
assert upload_finished_count == 1
|
|
assert upload_deleted_count == 1
|
|
assert added_to_collection_count >= 2
|
|
assert removed_from_collection_count == 2
|
|
assert collection_deleted_count == 1
|
|
|
|
# Check that we've had expected number of successful webhook notifications
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/webhooks?success=True",
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
assert r.json()["total"] >= 7
|