- Remove globals from profile, uploads, and qa test modules in favor of fixtures - Add retries to fix intermittent test failures due to timing --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net> Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
		
			
				
	
	
		
			377 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			377 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import time
 | |
| from typing import Dict
 | |
| from uuid import UUID
 | |
| 
 | |
| import requests
 | |
| import pytest
 | |
| 
 | |
| from .conftest import API_PREFIX, FINISHED_STATES
 | |
| 
 | |
| 
 | |
| PROFILE_NAME = "Test profile"
 | |
| PROFILE_DESC = "Profile used for backend tests"
 | |
| 
 | |
| PROFILE_NAME_UPDATED = "Updated test profile"
 | |
| PROFILE_DESC_UPDATED = "Updated profile used for backend tests"
 | |
| 
 | |
| PROFILE_2_NAME = "Second test profile"
 | |
| PROFILE_2_DESC = "Second profile used to test list endpoint"
 | |
| 
 | |
| 
 | |
| def prepare_browser_for_profile_commit(
 | |
|     browser_id: str, headers: Dict[str, str], oid: UUID
 | |
| ) -> None:
 | |
|     # Ping to make sure it doesn't expire
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}/ping",
 | |
|         headers=headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data.get("success")
 | |
|     assert data.get("origins") or data.get("origins") == []
 | |
| 
 | |
|     # Verify browser seems good
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}",
 | |
|         headers=headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["url"]
 | |
|     assert data["path"]
 | |
|     assert data["password"]
 | |
|     assert data["auth_bearer"]
 | |
|     assert data["scale"]
 | |
|     assert data["oid"] == oid
 | |
| 
 | |
|     # Navigate to new URL
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}/navigate",
 | |
|         headers=headers,
 | |
|         json={"url": "https://webrecorder.net/tools"},
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     assert r.json()["success"]
 | |
| 
 | |
|     # Ping browser until ready
 | |
|     max_attempts = 20
 | |
|     attempts = 1
 | |
|     while attempts <= max_attempts:
 | |
|         try:
 | |
|             r = requests.post(
 | |
|                 f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}/ping",
 | |
|                 headers=headers,
 | |
|             )
 | |
|             data = r.json()
 | |
|             if data["success"]:
 | |
|                 break
 | |
|             time.sleep(5)
 | |
|         except:
 | |
|             pass
 | |
|         attempts += 1
 | |
| 
 | |
| 
 | |
| @pytest.fixture(scope="module")
 | |
| def profile_id(admin_auth_headers, default_org_id, profile_browser_id):
 | |
|     prepare_browser_for_profile_commit(
 | |
|         profile_browser_id, admin_auth_headers, default_org_id
 | |
|     )
 | |
| 
 | |
|     # Create profile
 | |
|     start_time = time.monotonic()
 | |
|     time_limit = 300
 | |
|     while True:
 | |
|         try:
 | |
|             r = requests.post(
 | |
|                 f"{API_PREFIX}/orgs/{default_org_id}/profiles",
 | |
|                 headers=admin_auth_headers,
 | |
|                 json={
 | |
|                     "browserid": profile_browser_id,
 | |
|                     "name": PROFILE_NAME,
 | |
|                     "description": PROFILE_DESC,
 | |
|                 },
 | |
|                 timeout=10,
 | |
|             )
 | |
|             assert r.status_code == 200
 | |
|             data = r.json()
 | |
|             if data.get("detail") and data.get("detail") == "waiting_for_browser":
 | |
|                 time.sleep(5)
 | |
|                 continue
 | |
|             if data.get("added"):
 | |
|                 assert data["storageQuotaReached"] in (True, False)
 | |
|                 return data["id"]
 | |
|         except:
 | |
|             if time.monotonic() - start_time > time_limit:
 | |
|                 raise
 | |
|             time.sleep(5)
 | |
| 
 | |
| 
 | |
| @pytest.fixture(scope="module")
 | |
| def profile_config_id(admin_auth_headers, default_org_id, profile_id):
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["id"] == profile_id
 | |
|     assert data["name"] == PROFILE_NAME
 | |
|     assert data["description"] == PROFILE_DESC
 | |
|     assert data["userid"]
 | |
|     assert data["oid"] == default_org_id
 | |
|     assert data.get("origins") or data.get("origins") == []
 | |
|     assert data["created"]
 | |
|     assert not data["baseid"]
 | |
| 
 | |
|     resource = data["resource"]
 | |
|     assert resource
 | |
|     assert resource["filename"]
 | |
|     assert resource["hash"]
 | |
|     assert resource["size"]
 | |
|     assert resource["storage"]
 | |
|     assert resource["storage"]["name"]
 | |
|     assert resource.get("replicas") or resource.get("replicas") == []
 | |
| 
 | |
|     assert data.get("crawlconfigs") == []
 | |
| 
 | |
|     # Use profile in a workflow
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
 | |
|         headers=admin_auth_headers,
 | |
|         json={
 | |
|             "runNow": False,
 | |
|             "name": "Profile Test Crawl",
 | |
|             "description": "Crawl using browser profile",
 | |
|             "config": {
 | |
|                 "seeds": [{"url": "https://webrecorder.net/"}],
 | |
|                 "exclude": "community",
 | |
|             },
 | |
|             "profileid": profile_id,
 | |
|         },
 | |
|     )
 | |
|     data = r.json()
 | |
|     return data["id"]
 | |
| 
 | |
| 
 | |
| @pytest.fixture(scope="module")
 | |
| def profile_2_id(admin_auth_headers, default_org_id, profile_browser_2_id):
 | |
|     prepare_browser_for_profile_commit(
 | |
|         profile_browser_2_id, admin_auth_headers, default_org_id
 | |
|     )
 | |
| 
 | |
|     # Create profile
 | |
|     start_time = time.monotonic()
 | |
|     time_limit = 300
 | |
|     while True:
 | |
|         try:
 | |
|             r = requests.post(
 | |
|                 f"{API_PREFIX}/orgs/{default_org_id}/profiles",
 | |
|                 headers=admin_auth_headers,
 | |
|                 json={
 | |
|                     "browserid": profile_browser_2_id,
 | |
|                     "name": PROFILE_2_NAME,
 | |
|                     "description": PROFILE_2_DESC,
 | |
|                 },
 | |
|                 timeout=10,
 | |
|             )
 | |
|             assert r.status_code == 200
 | |
|             data = r.json()
 | |
|             if data.get("detail") and data.get("detail") == "waiting_for_browser":
 | |
|                 time.sleep(5)
 | |
|             if data.get("added"):
 | |
|                 assert data["storageQuotaReached"] in (True, False)
 | |
| 
 | |
|                 return data["id"]
 | |
|         except:
 | |
|             if time.monotonic() - start_time > time_limit:
 | |
|                 raise
 | |
|             time.sleep(5)
 | |
| 
 | |
| 
 | |
| def test_commit_browser_to_new_profile(admin_auth_headers, default_org_id, profile_id):
 | |
|     assert profile_id
 | |
| 
 | |
| 
 | |
| def test_get_profile(admin_auth_headers, default_org_id, profile_id, profile_config_id):
 | |
|     start_time = time.monotonic()
 | |
|     time_limit = 10
 | |
|     # Check get endpoint again and check that crawlconfigs is updated
 | |
|     while True:
 | |
|         try:
 | |
|             r = requests.get(
 | |
|                 f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
 | |
|                 headers=admin_auth_headers,
 | |
|             )
 | |
|             assert r.status_code == 200
 | |
|             data = r.json()
 | |
|             assert data["id"] == profile_id
 | |
|             assert data["name"] == PROFILE_NAME
 | |
|             assert data["description"] == PROFILE_DESC
 | |
|             assert data["userid"]
 | |
|             assert data["oid"] == default_org_id
 | |
|             assert data.get("origins") or data.get("origins") == []
 | |
|             assert data["created"]
 | |
|             assert not data["baseid"]
 | |
| 
 | |
|             resource = data["resource"]
 | |
|             assert resource
 | |
|             assert resource["filename"]
 | |
|             assert resource["hash"]
 | |
|             assert resource["size"]
 | |
|             assert resource["storage"]
 | |
|             assert resource["storage"]["name"]
 | |
|             assert resource.get("replicas") or resource.get("replicas") == []
 | |
| 
 | |
|             crawl_configs = data.get("crawlconfigs")
 | |
|             assert crawl_configs
 | |
|             assert len(crawl_configs) == 1
 | |
|             assert crawl_configs[0]["id"] == profile_config_id
 | |
|             assert crawl_configs[0]["name"] == "Profile Test Crawl"
 | |
|             break
 | |
|         except:
 | |
|             if time.monotonic() - start_time > time_limit:
 | |
|                 raise
 | |
|             time.sleep(1)
 | |
| 
 | |
| 
 | |
| def test_commit_second_profile(profile_2_id):
 | |
|     assert profile_2_id
 | |
| 
 | |
| 
 | |
| def test_list_profiles(admin_auth_headers, default_org_id, profile_id, profile_2_id):
 | |
|     start_time = time.monotonic()
 | |
|     time_limit = 10
 | |
|     # Check get endpoint again and check that crawlconfigs is updated
 | |
|     while True:
 | |
|         try:
 | |
|             r = requests.get(
 | |
|                 f"{API_PREFIX}/orgs/{default_org_id}/profiles",
 | |
|                 headers=admin_auth_headers,
 | |
|             )
 | |
|             assert r.status_code == 200
 | |
|             data = r.json()
 | |
|             assert data["total"] == 2
 | |
| 
 | |
|             profiles = data["items"]
 | |
|             assert len(profiles) == 2
 | |
| 
 | |
|             profile_1 = [
 | |
|                 profile for profile in profiles if profile["id"] == profile_id
 | |
|             ][0]
 | |
|             assert profile_1["id"] == profile_id
 | |
|             assert profile_1["name"] == PROFILE_NAME
 | |
|             assert profile_1["description"] == PROFILE_DESC
 | |
|             assert profile_1["userid"]
 | |
|             assert profile_1["oid"] == default_org_id
 | |
|             assert profile_1.get("origins") or data.get("origins") == []
 | |
|             assert profile_1["created"]
 | |
|             assert not profile_1["baseid"]
 | |
|             resource = profile_1["resource"]
 | |
|             assert resource
 | |
|             assert resource["filename"]
 | |
|             assert resource["hash"]
 | |
|             assert resource["size"]
 | |
|             assert resource["storage"]
 | |
|             assert resource["storage"]["name"]
 | |
|             assert resource.get("replicas") or resource.get("replicas") == []
 | |
| 
 | |
|             profile_2 = [
 | |
|                 profile for profile in profiles if profile["id"] == profile_2_id
 | |
|             ][0]
 | |
|             assert profile_2["id"] == profile_2_id
 | |
|             assert profile_2["name"] == PROFILE_2_NAME
 | |
|             assert profile_2["description"] == PROFILE_2_DESC
 | |
|             assert profile_2["userid"]
 | |
|             assert profile_2["oid"] == default_org_id
 | |
|             assert profile_2.get("origins") or data.get("origins") == []
 | |
|             assert profile_2["created"]
 | |
|             assert not profile_2["baseid"]
 | |
|             resource = profile_2["resource"]
 | |
|             assert resource
 | |
|             assert resource["filename"]
 | |
|             assert resource["hash"]
 | |
|             assert resource["size"]
 | |
|             assert resource["storage"]
 | |
|             assert resource["storage"]["name"]
 | |
|             assert resource.get("replicas") or resource.get("replicas") == []
 | |
|             break
 | |
|         except:
 | |
|             if time.monotonic() - start_time > time_limit:
 | |
|                 raise
 | |
|             time.sleep(1)
 | |
| 
 | |
| 
 | |
| def test_delete_profile(admin_auth_headers, default_org_id, profile_2_id):
 | |
|     # Delete second profile
 | |
|     r = requests.delete(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_2_id}",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     assert r.json()["success"]
 | |
| 
 | |
|     # Verify profile has been deleted
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_2_id}",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 404
 | |
|     assert r.json()["detail"] == "profile_not_found"
 | |
| 
 | |
|     # Try to delete it again and verify we get a 404
 | |
|     r = requests.delete(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_2_id}",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 404
 | |
|     assert r.json()["detail"] == "profile_not_found"
 | |
| 
 | |
| 
 | |
| def test_update_profile_metadata(admin_auth_headers, default_org_id, profile_id):
 | |
|     # Update name and description
 | |
|     r = requests.patch(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
 | |
|         headers=admin_auth_headers,
 | |
|         json={
 | |
|             "name": PROFILE_NAME_UPDATED,
 | |
|             "description": PROFILE_DESC_UPDATED,
 | |
|         },
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     assert r.json()["updated"]
 | |
| 
 | |
|     time.sleep(5)
 | |
| 
 | |
|     # Verify update
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["id"] == profile_id
 | |
|     assert data["name"] == PROFILE_NAME_UPDATED
 | |
|     assert data["description"] == PROFILE_DESC_UPDATED
 | |
| 
 | |
| 
 | |
| def test_commit_browser_to_existing_profile(
 | |
|     admin_auth_headers, default_org_id, profile_browser_3_id, profile_id
 | |
| ):
 | |
|     prepare_browser_for_profile_commit(
 | |
|         profile_browser_3_id, admin_auth_headers, default_org_id
 | |
|     )
 | |
| 
 | |
|     # Commit new browser to existing profile
 | |
|     r = requests.patch(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
 | |
|         headers=admin_auth_headers,
 | |
|         json={
 | |
|             "browserid": profile_browser_3_id,
 | |
|             "name": PROFILE_NAME_UPDATED,
 | |
|             "description": PROFILE_DESC_UPDATED,
 | |
|         },
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     assert r.json()["updated"]
 |