* Rename archives to orgs and aid to oid on backend * Rename archive to org and aid to oid in frontend * Remove translation artifact * Rename team -> organization * Add database migrations and run once on startup * This commit also applies the new by_one_worker decorator to other asyncio tasks to prevent heavy tasks from being run in each worker. * Run black, pylint, and husky via pre-commit * Set db version and use in migrations * Update and prepare database in single task * Migrate k8s configmaps
		
			
				
	
	
		
			99 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			99 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import requests
 | |
| import hashlib
 | |
| import time
 | |
| import io
 | |
| import zipfile
 | |
| 
 | |
| from .conftest import API_PREFIX, HOST_PREFIX
 | |
| 
 | |
| wacz_path = None
 | |
| wacz_size = None
 | |
| wacz_hash = None
 | |
| 
 | |
| wacz_content = None
 | |
| 
 | |
| 
 | |
| def test_list_orgs(admin_auth_headers, default_org_id):
 | |
|     r = requests.get(f"{API_PREFIX}/orgs", headers=admin_auth_headers)
 | |
|     data = r.json()
 | |
| 
 | |
|     orgs = data["orgs"]
 | |
|     assert len(orgs) > 0
 | |
| 
 | |
|     org_ids = []
 | |
|     for org in orgs:
 | |
|         org_ids.append(org["id"])
 | |
|     assert default_org_id in org_ids
 | |
| 
 | |
| 
 | |
| def test_create_new_config(admin_auth_headers, default_org_id):
 | |
|     crawl_data = {
 | |
|         "runNow": True,
 | |
|         "name": "Test Crawl",
 | |
|         "config": {"seeds": ["https://webrecorder.net/"]},
 | |
|     }
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
 | |
|         headers=admin_auth_headers,
 | |
|         json=crawl_data,
 | |
|     )
 | |
| 
 | |
|     assert r.status_code == 200
 | |
| 
 | |
|     data = r.json()
 | |
|     assert data["added"]
 | |
|     assert data["run_now_job"]
 | |
| 
 | |
| 
 | |
| def test_wait_for_complete(admin_auth_headers, default_org_id, admin_crawl_id):
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/replay.json",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     data = r.json()
 | |
|     assert data["state"] == "complete"
 | |
| 
 | |
|     assert len(data["resources"]) == 1
 | |
|     assert data["resources"][0]["path"]
 | |
| 
 | |
|     assert data["tags"] == ["wr-test-1", "wr-test-2"]
 | |
| 
 | |
|     global wacz_path
 | |
|     global wacz_size
 | |
|     global wacz_hash
 | |
|     wacz_path = data["resources"][0]["path"]
 | |
|     wacz_size = data["resources"][0]["size"]
 | |
|     wacz_hash = data["resources"][0]["hash"]
 | |
| 
 | |
| 
 | |
| def test_crawl_info(admin_auth_headers, default_org_id, admin_crawl_id):
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}",
 | |
|         headers=admin_auth_headers,
 | |
|     )
 | |
|     data = r.json()
 | |
|     assert data["fileSize"] == wacz_size
 | |
| 
 | |
| 
 | |
| def test_download_wacz():
 | |
|     r = requests.get(HOST_PREFIX + wacz_path)
 | |
|     assert r.status_code == 200
 | |
|     assert len(r.content) == wacz_size
 | |
| 
 | |
|     h = hashlib.sha256()
 | |
|     h.update(r.content)
 | |
|     assert h.hexdigest() == wacz_hash, (h.hexdigest(), wacz_hash)
 | |
| 
 | |
|     global wacz_content
 | |
|     wacz_content = r.content
 | |
| 
 | |
| 
 | |
| def test_verify_wacz():
 | |
|     b = io.BytesIO(wacz_content)
 | |
|     z = zipfile.ZipFile(b)
 | |
| 
 | |
|     assert "pages/pages.jsonl" in z.namelist()
 | |
| 
 | |
|     pages = z.open("pages/pages.jsonl").read().decode("utf-8")
 | |
|     assert '"https://webrecorder.net/"' in pages
 |