This PR introduces backend changes that add the following fields to the Profile model: - `modified` - `modifiedBy` - `modifiedByName` - `createdBy` - `createdByName` Modified fields are set to the same as the created fields when the resource is created, and changed when the profile is updated (profile itself or metadata). The list profiles endpoint now also supports `sortBy` and `sortDirection` options. The endpoint defaults to sorting by `modified` in descending order, but can also sort on `created` and `name`. Tests have also been updated to reflect all new behavior.
501 lines
16 KiB
Python
501 lines
16 KiB
Python
import time
|
|
from typing import Dict
|
|
from uuid import UUID
|
|
|
|
import requests
|
|
import pytest
|
|
|
|
from .conftest import API_PREFIX, FINISHED_STATES
|
|
|
|
|
|
PROFILE_NAME = "Test profile"
|
|
PROFILE_DESC = "Profile used for backend tests"
|
|
|
|
PROFILE_NAME_UPDATED = "Updated test profile"
|
|
PROFILE_DESC_UPDATED = "Updated profile used for backend tests"
|
|
|
|
PROFILE_2_NAME = "Second test profile"
|
|
PROFILE_2_DESC = "Second profile used to test list endpoint"
|
|
|
|
|
|
def prepare_browser_for_profile_commit(
|
|
browser_id: str, headers: Dict[str, str], oid: UUID
|
|
) -> None:
|
|
# Ping to make sure it doesn't expire
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}/ping",
|
|
headers=headers,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data.get("success")
|
|
assert data.get("origins") or data.get("origins") == []
|
|
|
|
# Verify browser seems good
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}",
|
|
headers=headers,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data["url"]
|
|
assert data["path"]
|
|
assert data["password"]
|
|
assert data["auth_bearer"]
|
|
assert data["scale"]
|
|
assert data["oid"] == oid
|
|
|
|
# Navigate to new URL
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}/navigate",
|
|
headers=headers,
|
|
json={"url": "https://webrecorder.net/tools"},
|
|
)
|
|
assert r.status_code == 200
|
|
assert r.json()["success"]
|
|
|
|
# Ping browser until ready
|
|
max_attempts = 20
|
|
attempts = 1
|
|
while attempts <= max_attempts:
|
|
try:
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}/ping",
|
|
headers=headers,
|
|
)
|
|
data = r.json()
|
|
if data["success"]:
|
|
break
|
|
time.sleep(5)
|
|
except:
|
|
pass
|
|
attempts += 1
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def profile_id(admin_auth_headers, default_org_id, profile_browser_id):
|
|
prepare_browser_for_profile_commit(
|
|
profile_browser_id, admin_auth_headers, default_org_id
|
|
)
|
|
|
|
# Create profile
|
|
start_time = time.monotonic()
|
|
time_limit = 300
|
|
while True:
|
|
try:
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/profiles",
|
|
headers=admin_auth_headers,
|
|
json={
|
|
"browserid": profile_browser_id,
|
|
"name": PROFILE_NAME,
|
|
"description": PROFILE_DESC,
|
|
},
|
|
timeout=10,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
if data.get("detail") and data.get("detail") == "waiting_for_browser":
|
|
time.sleep(5)
|
|
continue
|
|
if data.get("added"):
|
|
assert data["storageQuotaReached"] in (True, False)
|
|
return data["id"]
|
|
except:
|
|
if time.monotonic() - start_time > time_limit:
|
|
raise
|
|
time.sleep(5)
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def profile_config_id(admin_auth_headers, default_org_id, profile_id):
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data["id"] == profile_id
|
|
assert data["name"] == PROFILE_NAME
|
|
assert data["description"] == PROFILE_DESC
|
|
assert data["userid"]
|
|
assert data["oid"] == default_org_id
|
|
assert data.get("origins") or data.get("origins") == []
|
|
assert data["created"]
|
|
assert data["createdBy"]
|
|
assert data["createdByName"] == "admin"
|
|
assert data["modified"]
|
|
assert data["modifiedBy"]
|
|
assert data["modifiedByName"] == "admin"
|
|
assert not data["baseid"]
|
|
|
|
resource = data["resource"]
|
|
assert resource
|
|
assert resource["filename"]
|
|
assert resource["hash"]
|
|
assert resource["size"]
|
|
assert resource["storage"]
|
|
assert resource["storage"]["name"]
|
|
assert resource.get("replicas") or resource.get("replicas") == []
|
|
|
|
assert data.get("crawlconfigs") == []
|
|
|
|
# Use profile in a workflow
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
|
|
headers=admin_auth_headers,
|
|
json={
|
|
"runNow": False,
|
|
"name": "Profile Test Crawl",
|
|
"description": "Crawl using browser profile",
|
|
"config": {
|
|
"seeds": [{"url": "https://webrecorder.net/"}],
|
|
"exclude": "community",
|
|
},
|
|
"profileid": profile_id,
|
|
},
|
|
)
|
|
data = r.json()
|
|
return data["id"]
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def profile_2_id(admin_auth_headers, default_org_id, profile_browser_2_id):
|
|
prepare_browser_for_profile_commit(
|
|
profile_browser_2_id, admin_auth_headers, default_org_id
|
|
)
|
|
|
|
# Create profile
|
|
start_time = time.monotonic()
|
|
time_limit = 300
|
|
while True:
|
|
try:
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/profiles",
|
|
headers=admin_auth_headers,
|
|
json={
|
|
"browserid": profile_browser_2_id,
|
|
"name": PROFILE_2_NAME,
|
|
"description": PROFILE_2_DESC,
|
|
},
|
|
timeout=10,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
if data.get("detail") and data.get("detail") == "waiting_for_browser":
|
|
time.sleep(5)
|
|
if data.get("added"):
|
|
assert data["storageQuotaReached"] in (True, False)
|
|
|
|
return data["id"]
|
|
except:
|
|
if time.monotonic() - start_time > time_limit:
|
|
raise
|
|
time.sleep(5)
|
|
|
|
|
|
def test_commit_browser_to_new_profile(admin_auth_headers, default_org_id, profile_id):
|
|
assert profile_id
|
|
|
|
|
|
def test_get_profile(admin_auth_headers, default_org_id, profile_id, profile_config_id):
|
|
start_time = time.monotonic()
|
|
time_limit = 10
|
|
# Check get endpoint again and check that crawlconfigs is updated
|
|
while True:
|
|
try:
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data["id"] == profile_id
|
|
assert data["name"] == PROFILE_NAME
|
|
assert data["description"] == PROFILE_DESC
|
|
assert data["userid"]
|
|
assert data["oid"] == default_org_id
|
|
assert data.get("origins") or data.get("origins") == []
|
|
assert data["created"]
|
|
assert data["createdBy"]
|
|
assert data["createdByName"] == "admin"
|
|
assert data["modified"]
|
|
assert data["modifiedBy"]
|
|
assert data["modifiedByName"] == "admin"
|
|
assert not data["baseid"]
|
|
|
|
resource = data["resource"]
|
|
assert resource
|
|
assert resource["filename"]
|
|
assert resource["hash"]
|
|
assert resource["size"]
|
|
assert resource["storage"]
|
|
assert resource["storage"]["name"]
|
|
assert resource.get("replicas") or resource.get("replicas") == []
|
|
|
|
crawl_configs = data.get("crawlconfigs")
|
|
assert crawl_configs
|
|
assert len(crawl_configs) == 1
|
|
assert crawl_configs[0]["id"] == profile_config_id
|
|
assert crawl_configs[0]["name"] == "Profile Test Crawl"
|
|
break
|
|
except:
|
|
if time.monotonic() - start_time > time_limit:
|
|
raise
|
|
time.sleep(1)
|
|
|
|
|
|
def test_commit_second_profile(profile_2_id):
|
|
assert profile_2_id
|
|
|
|
|
|
def test_list_profiles(admin_auth_headers, default_org_id, profile_id, profile_2_id):
|
|
start_time = time.monotonic()
|
|
time_limit = 10
|
|
# Check get endpoint again and check that crawlconfigs is updated
|
|
while True:
|
|
try:
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/profiles",
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data["total"] == 2
|
|
|
|
profiles = data["items"]
|
|
assert len(profiles) == 2
|
|
|
|
# Second profile should be listed first by default because it was
|
|
# modified more recently
|
|
profile_2 = profiles[0]
|
|
assert profile_2["id"] == profile_2_id
|
|
assert profile_2["name"] == PROFILE_2_NAME
|
|
assert profile_2["description"] == PROFILE_2_DESC
|
|
assert profile_2["userid"]
|
|
assert profile_2["oid"] == default_org_id
|
|
assert profile_2.get("origins") or data.get("origins") == []
|
|
assert profile_2["created"]
|
|
assert profile_2["createdBy"]
|
|
assert profile_2["createdByName"] == "admin"
|
|
assert profile_2["modified"]
|
|
assert profile_2["modifiedBy"]
|
|
assert profile_2["modifiedByName"] == "admin"
|
|
assert not profile_2["baseid"]
|
|
resource = profile_2["resource"]
|
|
assert resource
|
|
assert resource["filename"]
|
|
assert resource["hash"]
|
|
assert resource["size"]
|
|
assert resource["storage"]
|
|
assert resource["storage"]["name"]
|
|
assert resource.get("replicas") or resource.get("replicas") == []
|
|
|
|
# First profile should be listed second by default because it was
|
|
# modified less recently
|
|
profile_1 = profiles[1]
|
|
assert profile_1["id"] == profile_id
|
|
assert profile_1["name"] == PROFILE_NAME
|
|
assert profile_1["description"] == PROFILE_DESC
|
|
assert profile_1["userid"]
|
|
assert profile_1["oid"] == default_org_id
|
|
assert profile_1.get("origins") or data.get("origins") == []
|
|
assert profile_1["created"]
|
|
assert profile_1["createdBy"]
|
|
assert profile_1["createdByName"] == "admin"
|
|
assert profile_1["modified"]
|
|
assert profile_1["modifiedBy"]
|
|
assert profile_1["modifiedByName"] == "admin"
|
|
assert not profile_1["baseid"]
|
|
resource = profile_1["resource"]
|
|
assert resource
|
|
assert resource["filename"]
|
|
assert resource["hash"]
|
|
assert resource["size"]
|
|
assert resource["storage"]
|
|
assert resource["storage"]["name"]
|
|
assert resource.get("replicas") or resource.get("replicas") == []
|
|
|
|
break
|
|
except:
|
|
if time.monotonic() - start_time > time_limit:
|
|
raise
|
|
time.sleep(1)
|
|
|
|
|
|
def test_update_profile_metadata(crawler_auth_headers, default_org_id, profile_id):
|
|
# Get original created/modified times
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
|
|
headers=crawler_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
original_created = data["created"]
|
|
original_modified = data["modified"]
|
|
|
|
# Update name and description
|
|
r = requests.patch(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
|
|
headers=crawler_auth_headers,
|
|
json={
|
|
"name": PROFILE_NAME_UPDATED,
|
|
"description": PROFILE_DESC_UPDATED,
|
|
},
|
|
)
|
|
assert r.status_code == 200
|
|
assert r.json()["updated"]
|
|
|
|
time.sleep(5)
|
|
|
|
# Verify update
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
|
|
headers=crawler_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data["id"] == profile_id
|
|
assert data["name"] == PROFILE_NAME_UPDATED
|
|
assert data["description"] == PROFILE_DESC_UPDATED
|
|
|
|
# Ensure modified was updated but created was not
|
|
assert data["modified"] > original_modified
|
|
assert data["modifiedBy"]
|
|
assert data["modifiedByName"] == "new-crawler"
|
|
|
|
assert data["created"] == original_created
|
|
assert data["createdBy"]
|
|
assert data["createdByName"] == "admin"
|
|
|
|
|
|
def test_commit_browser_to_existing_profile(
|
|
admin_auth_headers, default_org_id, profile_browser_3_id, profile_id
|
|
):
|
|
# Get original modified time
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
original_created = data["created"]
|
|
original_modified = data["modified"]
|
|
|
|
prepare_browser_for_profile_commit(
|
|
profile_browser_3_id, admin_auth_headers, default_org_id
|
|
)
|
|
|
|
# Commit new browser to existing profile
|
|
r = requests.patch(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
|
|
headers=admin_auth_headers,
|
|
json={
|
|
"browserid": profile_browser_3_id,
|
|
"name": PROFILE_NAME_UPDATED,
|
|
"description": PROFILE_DESC_UPDATED,
|
|
},
|
|
)
|
|
assert r.status_code == 200
|
|
assert r.json()["updated"]
|
|
|
|
time.sleep(5)
|
|
|
|
# Ensure modified was updated but created was not
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data["modified"] > original_modified
|
|
assert data["modifiedBy"]
|
|
assert data["modifiedByName"] == "admin"
|
|
|
|
assert data["created"] == original_created
|
|
assert data["createdBy"]
|
|
assert data["createdByName"] == "admin"
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"sort_by,sort_direction,profile_1_index,profile_2_index",
|
|
[
|
|
# Modified, descending
|
|
("modified", -1, 0, 1),
|
|
# Modified, ascending
|
|
("modified", 1, 1, 0),
|
|
# Created, descending
|
|
("created", -1, 1, 0),
|
|
# Created, ascending
|
|
("created", 1, 0, 1),
|
|
# Name, descending
|
|
("name", -1, 0, 1),
|
|
# Name, ascending
|
|
("name", 1, 1, 0),
|
|
],
|
|
)
|
|
def test_sort_profiles(
|
|
admin_auth_headers,
|
|
default_org_id,
|
|
profile_id,
|
|
profile_2_id,
|
|
sort_by,
|
|
sort_direction,
|
|
profile_1_index,
|
|
profile_2_index,
|
|
):
|
|
start_time = time.monotonic()
|
|
time_limit = 10
|
|
while True:
|
|
try:
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/profiles?sortBy={sort_by}&sortDirection={sort_direction}",
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data["total"] == 2
|
|
|
|
profiles = data["items"]
|
|
assert len(profiles) == 2
|
|
|
|
profile_1 = profiles[profile_1_index]
|
|
assert profile_1["id"] == profile_id
|
|
assert profile_1["name"] == PROFILE_NAME_UPDATED
|
|
|
|
profile_2 = profiles[profile_2_index]
|
|
assert profile_2["id"] == profile_2_id
|
|
assert profile_2["name"] == PROFILE_2_NAME
|
|
|
|
break
|
|
except:
|
|
if time.monotonic() - start_time > time_limit:
|
|
raise
|
|
time.sleep(1)
|
|
|
|
|
|
def test_delete_profile(admin_auth_headers, default_org_id, profile_2_id):
|
|
# Delete second profile
|
|
r = requests.delete(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_2_id}",
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 200
|
|
assert r.json()["success"]
|
|
|
|
# Verify profile has been deleted
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_2_id}",
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 404
|
|
assert r.json()["detail"] == "profile_not_found"
|
|
|
|
# Try to delete it again and verify we get a 404
|
|
r = requests.delete(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_2_id}",
|
|
headers=admin_auth_headers,
|
|
)
|
|
assert r.status_code == 404
|
|
assert r.json()["detail"] == "profile_not_found"
|