558 lines
		
	
	
		
			18 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			558 lines
		
	
	
		
			18 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import time
 | |
| from typing import Dict
 | |
| from uuid import UUID
 | |
| 
 | |
| import requests
 | |
| import pytest
 | |
| 
 | |
| from .conftest import API_PREFIX, FINISHED_STATES
 | |
| 
 | |
| 
 | |
| PROFILE_NAME = "Test profile"
 | |
| PROFILE_DESC = "Profile used for backend tests"
 | |
| 
 | |
| PROFILE_NAME_UPDATED = "Updated test profile"
 | |
| PROFILE_DESC_UPDATED = "Updated profile used for backend tests"
 | |
| 
 | |
| PROFILE_2_NAME = "Second test profile"
 | |
| PROFILE_2_DESC = "Second profile used to test list endpoint"
 | |
| 
 | |
| 
 | |
| def prepare_browser_for_profile_commit(
 | |
|     browser_id: str, headers: Dict[str, str], oid: UUID
 | |
| ) -> None:
 | |
|     # Ping to make sure it doesn't expire
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}/ping",
 | |
|         headers=headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data.get("success")
 | |
|     assert data.get("origins") or data.get("origins") == []
 | |
| 
 | |
|     # Verify browser seems good
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}",
 | |
|         headers=headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["url"]
 | |
|     assert data["path"]
 | |
|     assert data["password"]
 | |
|     assert data["auth_bearer"]
 | |
|     assert data["scale"]
 | |
|     assert data["oid"] == oid
 | |
| 
 | |
|     # Navigate to new URL
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}/navigate",
 | |
|         headers=headers,
 | |
|         json={"url": "https://webrecorder.net/tools"},
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     assert r.json()["success"]
 | |
| 
 | |
|     # Ping browser until ready
 | |
|     max_attempts = 20
 | |
|     attempts = 1
 | |
|     while attempts <= max_attempts:
 | |
|         try:
 | |
|             r = requests.post(
 | |
|                 f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}/ping",
 | |
|                 headers=headers,
 | |
|             )
 | |
|             data = r.json()
 | |
|             if data["success"]:
 | |
|                 break
 | |
|             time.sleep(5)
 | |
|         except:
 | |
|             pass
 | |
|         attempts += 1
 | |
| 
 | |
| 
 | |
| @pytest.fixture(scope="module")
 | |
| def profile_id(admin_auth_headers, default_org_id, profile_browser_id):
 | |
|     prepare_browser_for_profile_commit(
 | |
|         profile_browser_id, admin_auth_headers, default_org_id
 | |
|     )
 | |
| 
 | |
|     # Create profile
 | |
|     start_time = time.monotonic()
 | |
|     time_limit = 300
 | |
|     while True:
 | |
|         try:
 | |
|             r = requests.post(
 | |
|                 f"{API_PREFIX}/orgs/{default_org_id}/profiles",
 | |
|                 headers=admin_auth_headers,
 | |
|                 json={
 | |
|                     "browserid": profile_browser_id,
 | |
|                     "name": PROFILE_NAME,
 | |
|                     "description": PROFILE_DESC,
 | |
|                 },
 | |
|                 timeout=10,
 | |
|             )
 | |
|             assert r.status_code == 200
 | |
|             data = r.json()
 | |
|             if data.get("detail") and data.get("detail") == "waiting_for_browser":
 | |
|                 time.sleep(5)
 | |
|                 continue
 | |
|             if data.get("added"):
 | |
|                 assert data["storageQuotaReached"] in (True, False)
 | |
|                 return data["id"]
 | |
|         except:
 | |
|             if time.monotonic() - start_time > time_limit:
 | |
|                 raise
 | |
|             time.sleep(5)
 | |
| 
 | |
| 
 | |
| @pytest.fixture(scope="module")
 | |
| def profile_config_id(admin_auth_headers, default_org_id, profile_id):
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["id"] == profile_id
 | |
|     assert data["name"] == PROFILE_NAME
 | |
|     assert data["description"] == PROFILE_DESC
 | |
|     assert data["userid"]
 | |
|     assert data["oid"] == default_org_id
 | |
|     assert data.get("origins") or data.get("origins") == []
 | |
|     assert data["created"]
 | |
|     assert data["createdBy"]
 | |
|     assert data["createdByName"] == "admin"
 | |
|     assert data["modified"]
 | |
|     assert data["modifiedBy"]
 | |
|     assert data["modifiedByName"] == "admin"
 | |
|     assert not data["baseid"]
 | |
| 
 | |
|     resource = data["resource"]
 | |
|     assert resource
 | |
|     assert resource["filename"]
 | |
|     assert resource["hash"]
 | |
|     assert resource["size"]
 | |
|     assert resource["storage"]
 | |
|     assert resource["storage"]["name"]
 | |
|     assert resource.get("replicas") or resource.get("replicas") == []
 | |
| 
 | |
|     assert data.get("crawlconfigs") == []
 | |
| 
 | |
|     # Use profile in a workflow
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
 | |
|         headers=admin_auth_headers,
 | |
|         json={
 | |
|             "runNow": False,
 | |
|             "name": "Profile Test Crawl",
 | |
|             "description": "Crawl using browser profile",
 | |
|             "config": {
 | |
|                 "seeds": [{"url": "https://webrecorder.net/"}],
 | |
|                 "exclude": "community",
 | |
|             },
 | |
|             "profileid": profile_id,
 | |
|         },
 | |
|     )
 | |
|     data = r.json()
 | |
|     return data["id"]
 | |
| 
 | |
| 
 | |
| @pytest.fixture(scope="module")
 | |
| def profile_2_id(admin_auth_headers, default_org_id, profile_browser_2_id):
 | |
|     prepare_browser_for_profile_commit(
 | |
|         profile_browser_2_id, admin_auth_headers, default_org_id
 | |
|     )
 | |
| 
 | |
|     # Create profile
 | |
|     start_time = time.monotonic()
 | |
|     time_limit = 300
 | |
|     while True:
 | |
|         try:
 | |
|             r = requests.post(
 | |
|                 f"{API_PREFIX}/orgs/{default_org_id}/profiles",
 | |
|                 headers=admin_auth_headers,
 | |
|                 json={
 | |
|                     "browserid": profile_browser_2_id,
 | |
|                     "name": PROFILE_2_NAME,
 | |
|                     "description": PROFILE_2_DESC,
 | |
|                 },
 | |
|                 timeout=10,
 | |
|             )
 | |
|             assert r.status_code == 200
 | |
|             data = r.json()
 | |
|             if data.get("detail") and data.get("detail") == "waiting_for_browser":
 | |
|                 time.sleep(5)
 | |
|             if data.get("added"):
 | |
|                 assert data["storageQuotaReached"] in (True, False)
 | |
| 
 | |
|                 return data["id"]
 | |
|         except:
 | |
|             if time.monotonic() - start_time > time_limit:
 | |
|                 raise
 | |
|             time.sleep(5)
 | |
| 
 | |
| 
 | |
| def test_commit_browser_to_new_profile(admin_auth_headers, default_org_id, profile_id):
 | |
|     assert profile_id
 | |
| 
 | |
| 
 | |
| def test_get_profile(admin_auth_headers, default_org_id, profile_id, profile_config_id):
 | |
|     start_time = time.monotonic()
 | |
|     time_limit = 10
 | |
|     # Check get endpoint again and check that crawlconfigs is updated
 | |
|     while True:
 | |
|         try:
 | |
|             r = requests.get(
 | |
|                 f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
 | |
|                 headers=admin_auth_headers,
 | |
|             )
 | |
|             assert r.status_code == 200
 | |
|             data = r.json()
 | |
|             assert data["id"] == profile_id
 | |
|             assert data["name"] == PROFILE_NAME
 | |
|             assert data["description"] == PROFILE_DESC
 | |
|             assert data["userid"]
 | |
|             assert data["oid"] == default_org_id
 | |
|             assert data.get("origins") or data.get("origins") == []
 | |
|             assert data["created"]
 | |
|             assert data["createdBy"]
 | |
|             assert data["createdByName"] == "admin"
 | |
|             assert data["modified"]
 | |
|             assert data["modifiedBy"]
 | |
|             assert data["modifiedByName"] == "admin"
 | |
|             assert not data["baseid"]
 | |
| 
 | |
|             resource = data["resource"]
 | |
|             assert resource
 | |
|             assert resource["filename"]
 | |
|             assert resource["hash"]
 | |
|             assert resource["size"]
 | |
|             assert resource["storage"]
 | |
|             assert resource["storage"]["name"]
 | |
|             assert resource.get("replicas") or resource.get("replicas") == []
 | |
| 
 | |
|             crawl_configs = data.get("crawlconfigs")
 | |
|             assert crawl_configs
 | |
|             assert len(crawl_configs) == 1
 | |
|             assert crawl_configs[0]["id"] == profile_config_id
 | |
|             assert crawl_configs[0]["name"] == "Profile Test Crawl"
 | |
|             assert crawl_configs[0]["firstSeed"] == "https://webrecorder.net/"
 | |
|             assert crawl_configs[0]["seedCount"] == 1
 | |
|             break
 | |
|         except:
 | |
|             if time.monotonic() - start_time > time_limit:
 | |
|                 raise
 | |
|             time.sleep(1)
 | |
| 
 | |
| 
 | |
| def test_commit_second_profile(profile_2_id):
 | |
|     assert profile_2_id
 | |
| 
 | |
| 
 | |
| def test_list_profiles(admin_auth_headers, default_org_id, profile_id, profile_2_id):
 | |
|     start_time = time.monotonic()
 | |
|     time_limit = 10
 | |
|     # Check get endpoint again and check that crawlconfigs is updated
 | |
|     while True:
 | |
|         try:
 | |
|             r = requests.get(
 | |
|                 f"{API_PREFIX}/orgs/{default_org_id}/profiles",
 | |
|                 headers=admin_auth_headers,
 | |
|             )
 | |
|             assert r.status_code == 200
 | |
|             data = r.json()
 | |
|             assert data["total"] == 2
 | |
| 
 | |
|             profiles = data["items"]
 | |
|             assert len(profiles) == 2
 | |
| 
 | |
|             # Second profile should be listed first by default because it was
 | |
|             # modified more recently
 | |
|             profile_2 = profiles[0]
 | |
|             assert profile_2["id"] == profile_2_id
 | |
|             assert profile_2["name"] == PROFILE_2_NAME
 | |
|             assert profile_2["description"] == PROFILE_2_DESC
 | |
|             assert profile_2["userid"]
 | |
|             assert profile_2["oid"] == default_org_id
 | |
|             assert profile_2.get("origins") or data.get("origins") == []
 | |
|             assert profile_2["created"]
 | |
|             assert profile_2["createdBy"]
 | |
|             assert profile_2["createdByName"] == "admin"
 | |
|             assert profile_2["modified"]
 | |
|             assert profile_2["modifiedBy"]
 | |
|             assert profile_2["modifiedByName"] == "admin"
 | |
|             assert not profile_2["baseid"]
 | |
|             resource = profile_2["resource"]
 | |
|             assert resource
 | |
|             assert resource["filename"]
 | |
|             assert resource["hash"]
 | |
|             assert resource["size"]
 | |
|             assert resource["storage"]
 | |
|             assert resource["storage"]["name"]
 | |
|             assert resource.get("replicas") or resource.get("replicas") == []
 | |
| 
 | |
|             # First profile should be listed second by default because it was
 | |
|             # modified less recently
 | |
|             profile_1 = profiles[1]
 | |
|             assert profile_1["id"] == profile_id
 | |
|             assert profile_1["name"] == PROFILE_NAME
 | |
|             assert profile_1["description"] == PROFILE_DESC
 | |
|             assert profile_1["userid"]
 | |
|             assert profile_1["oid"] == default_org_id
 | |
|             assert profile_1.get("origins") or data.get("origins") == []
 | |
|             assert profile_1["created"]
 | |
|             assert profile_1["createdBy"]
 | |
|             assert profile_1["createdByName"] == "admin"
 | |
|             assert profile_1["modified"]
 | |
|             assert profile_1["modifiedBy"]
 | |
|             assert profile_1["modifiedByName"] == "admin"
 | |
|             assert not profile_1["baseid"]
 | |
|             resource = profile_1["resource"]
 | |
|             assert resource
 | |
|             assert resource["filename"]
 | |
|             assert resource["hash"]
 | |
|             assert resource["size"]
 | |
|             assert resource["storage"]
 | |
|             assert resource["storage"]["name"]
 | |
|             assert resource.get("replicas") or resource.get("replicas") == []
 | |
| 
 | |
|             break
 | |
|         except:
 | |
|             if time.monotonic() - start_time > time_limit:
 | |
|                 raise
 | |
|             time.sleep(1)
 | |
| 
 | |
| 
 | |
| def test_update_profile_metadata(crawler_auth_headers, default_org_id, profile_id):
 | |
|     # Get original created/modified times
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     original_created = data["created"]
 | |
|     original_modified = data["modified"]
 | |
| 
 | |
|     # Update name and description
 | |
|     r = requests.patch(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
 | |
|         headers=crawler_auth_headers,
 | |
|         json={
 | |
|             "name": PROFILE_NAME_UPDATED,
 | |
|             "description": PROFILE_DESC_UPDATED,
 | |
|         },
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     assert r.json()["updated"]
 | |
| 
 | |
|     time.sleep(5)
 | |
| 
 | |
|     # Verify update
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["id"] == profile_id
 | |
|     assert data["name"] == PROFILE_NAME_UPDATED
 | |
|     assert data["description"] == PROFILE_DESC_UPDATED
 | |
| 
 | |
|     # Ensure modified was updated but created was not
 | |
|     assert data["modified"] > original_modified
 | |
|     assert data["modifiedBy"]
 | |
|     assert data["modifiedByName"] == "new-crawler"
 | |
| 
 | |
|     assert data["created"] == original_created
 | |
|     assert data["createdBy"]
 | |
|     assert data["createdByName"] == "admin"
 | |
| 
 | |
| 
 | |
| def test_commit_browser_to_existing_profile(
 | |
|     admin_auth_headers, default_org_id, profile_browser_3_id, profile_id
 | |
| ):
 | |
|     # Get original modified time
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     original_created = data["created"]
 | |
|     original_modified = data["modified"]
 | |
| 
 | |
|     prepare_browser_for_profile_commit(
 | |
|         profile_browser_3_id, admin_auth_headers, default_org_id
 | |
|     )
 | |
| 
 | |
|     # Commit new browser to existing profile
 | |
|     r = requests.patch(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
 | |
|         headers=admin_auth_headers,
 | |
|         json={
 | |
|             "browserid": profile_browser_3_id,
 | |
|             "name": PROFILE_NAME_UPDATED,
 | |
|             "description": PROFILE_DESC_UPDATED,
 | |
|         },
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     assert r.json()["updated"]
 | |
| 
 | |
|     time.sleep(5)
 | |
| 
 | |
|     # Ensure modified was updated but created was not
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["modified"] > original_modified
 | |
|     assert data["modifiedBy"]
 | |
|     assert data["modifiedByName"] == "admin"
 | |
| 
 | |
|     assert data["created"] == original_created
 | |
|     assert data["createdBy"]
 | |
|     assert data["createdByName"] == "admin"
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize(
 | |
|     "sort_by,sort_direction,profile_1_index,profile_2_index",
 | |
|     [
 | |
|         # Modified, descending
 | |
|         ("modified", -1, 0, 1),
 | |
|         # Modified, ascending
 | |
|         ("modified", 1, 1, 0),
 | |
|         # Created, descending
 | |
|         ("created", -1, 1, 0),
 | |
|         # Created, ascending
 | |
|         ("created", 1, 0, 1),
 | |
|         # Name, descending
 | |
|         ("name", -1, 0, 1),
 | |
|         # Name, ascending
 | |
|         ("name", 1, 1, 0),
 | |
|         # URL, descending
 | |
|         ("url", -1, 0, 1),
 | |
|         # URL, ascending
 | |
|         ("url", 1, 1, 0),
 | |
|     ],
 | |
| )
 | |
| def test_sort_profiles(
 | |
|     admin_auth_headers,
 | |
|     default_org_id,
 | |
|     profile_id,
 | |
|     profile_2_id,
 | |
|     sort_by,
 | |
|     sort_direction,
 | |
|     profile_1_index,
 | |
|     profile_2_index,
 | |
| ):
 | |
|     start_time = time.monotonic()
 | |
|     time_limit = 10
 | |
|     while True:
 | |
|         try:
 | |
|             r = requests.get(
 | |
|                 f"{API_PREFIX}/orgs/{default_org_id}/profiles?sortBy={sort_by}&sortDirection={sort_direction}",
 | |
|                 headers=admin_auth_headers,
 | |
|             )
 | |
|             assert r.status_code == 200
 | |
|             data = r.json()
 | |
|             assert data["total"] == 2
 | |
| 
 | |
|             profiles = data["items"]
 | |
|             assert len(profiles) == 2
 | |
| 
 | |
|             profile_1 = profiles[profile_1_index]
 | |
|             assert profile_1["id"] == profile_id
 | |
|             assert profile_1["name"] == PROFILE_NAME_UPDATED
 | |
| 
 | |
|             profile_2 = profiles[profile_2_index]
 | |
|             assert profile_2["id"] == profile_2_id
 | |
|             assert profile_2["name"] == PROFILE_2_NAME
 | |
| 
 | |
|             break
 | |
|         except:
 | |
|             if time.monotonic() - start_time > time_limit:
 | |
|                 raise
 | |
|             time.sleep(1)
 | |
| 
 | |
| 
 | |
| def test_delete_profile(admin_auth_headers, default_org_id, profile_2_id):
 | |
|     # Delete second profile
 | |
|     r = requests.delete(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_2_id}",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     assert r.json()["success"]
 | |
| 
 | |
|     # Verify profile has been deleted
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_2_id}",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 404
 | |
|     assert r.json()["detail"] == "profile_not_found"
 | |
| 
 | |
|     # Try to delete it again and verify we get a 404
 | |
|     r = requests.delete(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_2_id}",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 404
 | |
|     assert r.json()["detail"] == "profile_not_found"
 | |
| 
 | |
| 
 | |
| def test_create_profile_read_only_org(
 | |
|     admin_auth_headers, default_org_id, profile_browser_4_id
 | |
| ):
 | |
|     # Set org to read-only
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/read-only",
 | |
|         headers=admin_auth_headers,
 | |
|         json={"readOnly": True, "readOnlyReason": "For testing purposes"},
 | |
|     )
 | |
|     assert r.json()["updated"]
 | |
| 
 | |
|     prepare_browser_for_profile_commit(
 | |
|         profile_browser_4_id, admin_auth_headers, default_org_id
 | |
|     )
 | |
| 
 | |
|     # Try to create profile, verify we get 403 forbidden
 | |
|     start_time = time.monotonic()
 | |
|     time_limit = 300
 | |
|     while True:
 | |
|         try:
 | |
|             r = requests.post(
 | |
|                 f"{API_PREFIX}/orgs/{default_org_id}/profiles",
 | |
|                 headers=admin_auth_headers,
 | |
|                 json={
 | |
|                     "browserid": profile_browser_4_id,
 | |
|                     "name": "uncreatable",
 | |
|                     "description": "because org is read-only",
 | |
|                 },
 | |
|                 timeout=10,
 | |
|             )
 | |
|             detail = r.json().get("detail")
 | |
|             if detail == "waiting_for_browser":
 | |
|                 time.sleep(5)
 | |
|                 continue
 | |
|             if detail == "org_set_to_read_only":
 | |
|                 assert r.status_code == 403
 | |
|                 break
 | |
|         except:
 | |
|             if time.monotonic() - start_time > time_limit:
 | |
|                 raise
 | |
|             time.sleep(5)
 | |
| 
 | |
|     # Set readOnly back to false on org
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/read-only",
 | |
|         headers=admin_auth_headers,
 | |
|         json={"readOnly": False},
 | |
|     )
 | |
|     assert r.json()["updated"]
 |