browsertrix/backend/test/test_profiles.py
Tessa Walsh 7e5d742fd1
Backend: Add modified field and track created/modifier users for profiles (#1820)
This PR introduces backend changes that add the following fields to the
Profile model:
- `modified`
- `modifiedBy`
- `modifiedByName`
- `createdBy`
- `createdByName`

Modified fields are set to the same as the created fields when the
resource is created, and changed when the profile is updated (profile
itself or metadata).

The list profiles endpoint now also supports `sortBy` and
`sortDirection` options. The endpoint defaults to sorting by `modified`
in descending order, but can also sort on `created` and `name`.

Tests have also been updated to reflect all new behavior.
2024-05-28 17:25:22 -04:00

501 lines
16 KiB
Python

import time
from typing import Dict
from uuid import UUID
import requests
import pytest
from .conftest import API_PREFIX, FINISHED_STATES
PROFILE_NAME = "Test profile"
PROFILE_DESC = "Profile used for backend tests"
PROFILE_NAME_UPDATED = "Updated test profile"
PROFILE_DESC_UPDATED = "Updated profile used for backend tests"
PROFILE_2_NAME = "Second test profile"
PROFILE_2_DESC = "Second profile used to test list endpoint"
def prepare_browser_for_profile_commit(
browser_id: str, headers: Dict[str, str], oid: UUID
) -> None:
# Ping to make sure it doesn't expire
r = requests.post(
f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}/ping",
headers=headers,
)
assert r.status_code == 200
data = r.json()
assert data.get("success")
assert data.get("origins") or data.get("origins") == []
# Verify browser seems good
r = requests.get(
f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}",
headers=headers,
)
assert r.status_code == 200
data = r.json()
assert data["url"]
assert data["path"]
assert data["password"]
assert data["auth_bearer"]
assert data["scale"]
assert data["oid"] == oid
# Navigate to new URL
r = requests.post(
f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}/navigate",
headers=headers,
json={"url": "https://webrecorder.net/tools"},
)
assert r.status_code == 200
assert r.json()["success"]
# Ping browser until ready
max_attempts = 20
attempts = 1
while attempts <= max_attempts:
try:
r = requests.post(
f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}/ping",
headers=headers,
)
data = r.json()
if data["success"]:
break
time.sleep(5)
except:
pass
attempts += 1
@pytest.fixture(scope="module")
def profile_id(admin_auth_headers, default_org_id, profile_browser_id):
prepare_browser_for_profile_commit(
profile_browser_id, admin_auth_headers, default_org_id
)
# Create profile
start_time = time.monotonic()
time_limit = 300
while True:
try:
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/profiles",
headers=admin_auth_headers,
json={
"browserid": profile_browser_id,
"name": PROFILE_NAME,
"description": PROFILE_DESC,
},
timeout=10,
)
assert r.status_code == 200
data = r.json()
if data.get("detail") and data.get("detail") == "waiting_for_browser":
time.sleep(5)
continue
if data.get("added"):
assert data["storageQuotaReached"] in (True, False)
return data["id"]
except:
if time.monotonic() - start_time > time_limit:
raise
time.sleep(5)
@pytest.fixture(scope="module")
def profile_config_id(admin_auth_headers, default_org_id, profile_id):
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["id"] == profile_id
assert data["name"] == PROFILE_NAME
assert data["description"] == PROFILE_DESC
assert data["userid"]
assert data["oid"] == default_org_id
assert data.get("origins") or data.get("origins") == []
assert data["created"]
assert data["createdBy"]
assert data["createdByName"] == "admin"
assert data["modified"]
assert data["modifiedBy"]
assert data["modifiedByName"] == "admin"
assert not data["baseid"]
resource = data["resource"]
assert resource
assert resource["filename"]
assert resource["hash"]
assert resource["size"]
assert resource["storage"]
assert resource["storage"]["name"]
assert resource.get("replicas") or resource.get("replicas") == []
assert data.get("crawlconfigs") == []
# Use profile in a workflow
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
headers=admin_auth_headers,
json={
"runNow": False,
"name": "Profile Test Crawl",
"description": "Crawl using browser profile",
"config": {
"seeds": [{"url": "https://webrecorder.net/"}],
"exclude": "community",
},
"profileid": profile_id,
},
)
data = r.json()
return data["id"]
@pytest.fixture(scope="module")
def profile_2_id(admin_auth_headers, default_org_id, profile_browser_2_id):
prepare_browser_for_profile_commit(
profile_browser_2_id, admin_auth_headers, default_org_id
)
# Create profile
start_time = time.monotonic()
time_limit = 300
while True:
try:
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/profiles",
headers=admin_auth_headers,
json={
"browserid": profile_browser_2_id,
"name": PROFILE_2_NAME,
"description": PROFILE_2_DESC,
},
timeout=10,
)
assert r.status_code == 200
data = r.json()
if data.get("detail") and data.get("detail") == "waiting_for_browser":
time.sleep(5)
if data.get("added"):
assert data["storageQuotaReached"] in (True, False)
return data["id"]
except:
if time.monotonic() - start_time > time_limit:
raise
time.sleep(5)
def test_commit_browser_to_new_profile(admin_auth_headers, default_org_id, profile_id):
assert profile_id
def test_get_profile(admin_auth_headers, default_org_id, profile_id, profile_config_id):
start_time = time.monotonic()
time_limit = 10
# Check get endpoint again and check that crawlconfigs is updated
while True:
try:
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["id"] == profile_id
assert data["name"] == PROFILE_NAME
assert data["description"] == PROFILE_DESC
assert data["userid"]
assert data["oid"] == default_org_id
assert data.get("origins") or data.get("origins") == []
assert data["created"]
assert data["createdBy"]
assert data["createdByName"] == "admin"
assert data["modified"]
assert data["modifiedBy"]
assert data["modifiedByName"] == "admin"
assert not data["baseid"]
resource = data["resource"]
assert resource
assert resource["filename"]
assert resource["hash"]
assert resource["size"]
assert resource["storage"]
assert resource["storage"]["name"]
assert resource.get("replicas") or resource.get("replicas") == []
crawl_configs = data.get("crawlconfigs")
assert crawl_configs
assert len(crawl_configs) == 1
assert crawl_configs[0]["id"] == profile_config_id
assert crawl_configs[0]["name"] == "Profile Test Crawl"
break
except:
if time.monotonic() - start_time > time_limit:
raise
time.sleep(1)
def test_commit_second_profile(profile_2_id):
assert profile_2_id
def test_list_profiles(admin_auth_headers, default_org_id, profile_id, profile_2_id):
start_time = time.monotonic()
time_limit = 10
# Check get endpoint again and check that crawlconfigs is updated
while True:
try:
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/profiles",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 2
profiles = data["items"]
assert len(profiles) == 2
# Second profile should be listed first by default because it was
# modified more recently
profile_2 = profiles[0]
assert profile_2["id"] == profile_2_id
assert profile_2["name"] == PROFILE_2_NAME
assert profile_2["description"] == PROFILE_2_DESC
assert profile_2["userid"]
assert profile_2["oid"] == default_org_id
assert profile_2.get("origins") or data.get("origins") == []
assert profile_2["created"]
assert profile_2["createdBy"]
assert profile_2["createdByName"] == "admin"
assert profile_2["modified"]
assert profile_2["modifiedBy"]
assert profile_2["modifiedByName"] == "admin"
assert not profile_2["baseid"]
resource = profile_2["resource"]
assert resource
assert resource["filename"]
assert resource["hash"]
assert resource["size"]
assert resource["storage"]
assert resource["storage"]["name"]
assert resource.get("replicas") or resource.get("replicas") == []
# First profile should be listed second by default because it was
# modified less recently
profile_1 = profiles[1]
assert profile_1["id"] == profile_id
assert profile_1["name"] == PROFILE_NAME
assert profile_1["description"] == PROFILE_DESC
assert profile_1["userid"]
assert profile_1["oid"] == default_org_id
assert profile_1.get("origins") or data.get("origins") == []
assert profile_1["created"]
assert profile_1["createdBy"]
assert profile_1["createdByName"] == "admin"
assert profile_1["modified"]
assert profile_1["modifiedBy"]
assert profile_1["modifiedByName"] == "admin"
assert not profile_1["baseid"]
resource = profile_1["resource"]
assert resource
assert resource["filename"]
assert resource["hash"]
assert resource["size"]
assert resource["storage"]
assert resource["storage"]["name"]
assert resource.get("replicas") or resource.get("replicas") == []
break
except:
if time.monotonic() - start_time > time_limit:
raise
time.sleep(1)
def test_update_profile_metadata(crawler_auth_headers, default_org_id, profile_id):
# Get original created/modified times
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
data = r.json()
original_created = data["created"]
original_modified = data["modified"]
# Update name and description
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=crawler_auth_headers,
json={
"name": PROFILE_NAME_UPDATED,
"description": PROFILE_DESC_UPDATED,
},
)
assert r.status_code == 200
assert r.json()["updated"]
time.sleep(5)
# Verify update
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["id"] == profile_id
assert data["name"] == PROFILE_NAME_UPDATED
assert data["description"] == PROFILE_DESC_UPDATED
# Ensure modified was updated but created was not
assert data["modified"] > original_modified
assert data["modifiedBy"]
assert data["modifiedByName"] == "new-crawler"
assert data["created"] == original_created
assert data["createdBy"]
assert data["createdByName"] == "admin"
def test_commit_browser_to_existing_profile(
admin_auth_headers, default_org_id, profile_browser_3_id, profile_id
):
# Get original modified time
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
original_created = data["created"]
original_modified = data["modified"]
prepare_browser_for_profile_commit(
profile_browser_3_id, admin_auth_headers, default_org_id
)
# Commit new browser to existing profile
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=admin_auth_headers,
json={
"browserid": profile_browser_3_id,
"name": PROFILE_NAME_UPDATED,
"description": PROFILE_DESC_UPDATED,
},
)
assert r.status_code == 200
assert r.json()["updated"]
time.sleep(5)
# Ensure modified was updated but created was not
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["modified"] > original_modified
assert data["modifiedBy"]
assert data["modifiedByName"] == "admin"
assert data["created"] == original_created
assert data["createdBy"]
assert data["createdByName"] == "admin"
@pytest.mark.parametrize(
"sort_by,sort_direction,profile_1_index,profile_2_index",
[
# Modified, descending
("modified", -1, 0, 1),
# Modified, ascending
("modified", 1, 1, 0),
# Created, descending
("created", -1, 1, 0),
# Created, ascending
("created", 1, 0, 1),
# Name, descending
("name", -1, 0, 1),
# Name, ascending
("name", 1, 1, 0),
],
)
def test_sort_profiles(
admin_auth_headers,
default_org_id,
profile_id,
profile_2_id,
sort_by,
sort_direction,
profile_1_index,
profile_2_index,
):
start_time = time.monotonic()
time_limit = 10
while True:
try:
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/profiles?sortBy={sort_by}&sortDirection={sort_direction}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 2
profiles = data["items"]
assert len(profiles) == 2
profile_1 = profiles[profile_1_index]
assert profile_1["id"] == profile_id
assert profile_1["name"] == PROFILE_NAME_UPDATED
profile_2 = profiles[profile_2_index]
assert profile_2["id"] == profile_2_id
assert profile_2["name"] == PROFILE_2_NAME
break
except:
if time.monotonic() - start_time > time_limit:
raise
time.sleep(1)
def test_delete_profile(admin_auth_headers, default_org_id, profile_2_id):
# Delete second profile
r = requests.delete(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_2_id}",
headers=admin_auth_headers,
)
assert r.status_code == 200
assert r.json()["success"]
# Verify profile has been deleted
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_2_id}",
headers=admin_auth_headers,
)
assert r.status_code == 404
assert r.json()["detail"] == "profile_not_found"
# Try to delete it again and verify we get a 404
r = requests.delete(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_2_id}",
headers=admin_auth_headers,
)
assert r.status_code == 404
assert r.json()["detail"] == "profile_not_found"