* basecrawl refactor: make crawls db more generic, supporting different types of 'base crawls': crawls, uploads, manual archives - move shared functionality to basecrawl.py - create a base BaseCrawl object, which contains start / finish time, metadata and files array - create BaseCrawlOps, base class for CrawlOps, which supports base crawl deletion, querying and collection add/remove * uploads api: (part of #929) - new UploadCrawl object which extends BaseCrawl, has name and description - support multipart form data data upload to /uploads/formdata - support streaming upload of a single file via /uploads/stream, using botocore multipart upload to upload to s3-endpoint in parts - require 'filename' param to set upload filename for streaming uploads (otherwise use form data names) - sanitize filename, place uploads in /uploads/<uuid>/<sanitized-filename>-<random>.wacz - uploads have internal id 'upload-<uuid>' - create UploadedCrawl object with CrawlFiles pointing to the newly uploaded files, set state to 'complete' - handle upload failures, abort multipart upload - ensure uploads added within org bucket path - return id / added when adding new UploadedCrawl - support listing, deleting, and patch /uploads - support upload details via /replay.json to support for replay - add support for 'replaceId=<id>', which would remove all previous files in upload after new upload succeeds. if replaceId doesn't exist, create new upload. (only for stream endpoint so far). - support patching upload metadata: notes, tags and name on uploads (UpdateUpload extends UpdateCrawl and adds 'name') * base crawls api: Add /all-crawls list and delete endpoints for all crawl types (without resources) - support all-crawls/<id>/replay.json with resources - Use ListCrawlOut model for /all-crawls list endpoint - Extend BaseCrawlOut from ListCrawlOut, add type - use 'type: crawl' for crawls and 'type: upload' for uploads - migration: ensure all previous crawl objects / missing type are set to 'type: crawl' - indexes: add db indices on 'type' field and with 'type' field and oid, cid, finished, state * tests: add test for multipart and streaming upload, listing uploads, deleting upload - add sample WACZ for upload testing: 'example.wacz' and 'example-2.wacz' * collections: support adding and remove both crawls and uploads via base crawl - include collection_ids in /all-crawls list - collections replay.json can include both crawls and uploads bump version to 1.6.0-beta.2 --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
		
			
				
	
	
		
			539 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			539 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import requests
 | |
| import os
 | |
| 
 | |
| from .conftest import API_PREFIX
 | |
| from .utils import read_in_chunks
 | |
| 
 | |
| COLLECTION_NAME = "Test collection"
 | |
| UPDATED_NAME = "Updated tést cöllection"
 | |
| SECOND_COLLECTION_NAME = "second-collection"
 | |
| DESCRIPTION = "Test description"
 | |
| 
 | |
| _coll_id = None
 | |
| _second_coll_id = None
 | |
| upload_id = None
 | |
| modified = None
 | |
| 
 | |
| curr_dir = os.path.dirname(os.path.realpath(__file__))
 | |
| 
 | |
| 
 | |
| def test_create_collection(
 | |
|     crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
 | |
| ):
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections",
 | |
|         headers=crawler_auth_headers,
 | |
|         json={
 | |
|             "crawlIds": [crawler_crawl_id],
 | |
|             "name": COLLECTION_NAME,
 | |
|         },
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["added"]
 | |
|     assert data["name"] == COLLECTION_NAME
 | |
| 
 | |
|     global _coll_id
 | |
|     _coll_id = data["id"]
 | |
| 
 | |
|     # Verify crawl in collection
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/replay.json",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert _coll_id in r.json()["collections"]
 | |
| 
 | |
| 
 | |
| def test_create_collection_taken_name(
 | |
|     crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
 | |
| ):
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections",
 | |
|         headers=crawler_auth_headers,
 | |
|         json={
 | |
|             "crawlIds": [crawler_crawl_id],
 | |
|             "name": COLLECTION_NAME,
 | |
|         },
 | |
|     )
 | |
|     assert r.status_code == 400
 | |
|     assert r.json()["detail"] == "collection_name_taken"
 | |
| 
 | |
| 
 | |
| def test_create_collection_empty_name(
 | |
|     crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
 | |
| ):
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections",
 | |
|         headers=crawler_auth_headers,
 | |
|         json={
 | |
|             "crawlIds": [crawler_crawl_id],
 | |
|             "name": "",
 | |
|         },
 | |
|     )
 | |
|     assert r.status_code == 422
 | |
| 
 | |
| 
 | |
| def test_update_collection(
 | |
|     crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
 | |
| ):
 | |
|     r = requests.patch(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}",
 | |
|         headers=crawler_auth_headers,
 | |
|         json={
 | |
|             "description": DESCRIPTION,
 | |
|         },
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     assert r.json()["updated"]
 | |
| 
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
| 
 | |
|     assert data["id"] == _coll_id
 | |
|     assert data["name"] == COLLECTION_NAME
 | |
|     assert data["description"] == DESCRIPTION
 | |
|     assert data["crawlCount"] == 1
 | |
|     assert data["pageCount"] > 0
 | |
|     global modified
 | |
|     modified = data["modified"]
 | |
|     assert modified
 | |
| 
 | |
| 
 | |
| def test_rename_collection(
 | |
|     crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
 | |
| ):
 | |
|     r = requests.patch(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}",
 | |
|         headers=crawler_auth_headers,
 | |
|         json={
 | |
|             "name": UPDATED_NAME,
 | |
|         },
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     assert r.json()["updated"]
 | |
| 
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
| 
 | |
|     assert data["id"] == _coll_id
 | |
|     assert data["name"] == UPDATED_NAME
 | |
|     assert data["modified"] >= modified
 | |
| 
 | |
| 
 | |
| def test_rename_collection_taken_name(
 | |
|     crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
 | |
| ):
 | |
|     # Add second collection
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections",
 | |
|         headers=crawler_auth_headers,
 | |
|         json={
 | |
|             "crawlIds": [crawler_crawl_id],
 | |
|             "name": SECOND_COLLECTION_NAME,
 | |
|         },
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["added"]
 | |
|     assert data["name"] == SECOND_COLLECTION_NAME
 | |
| 
 | |
|     global _second_coll_id
 | |
|     _second_coll_id = data["id"]
 | |
| 
 | |
|     # Try to rename first coll to second collection's name
 | |
|     r = requests.patch(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}",
 | |
|         headers=crawler_auth_headers,
 | |
|         json={"name": SECOND_COLLECTION_NAME},
 | |
|     )
 | |
|     assert r.status_code == 400
 | |
|     assert r.json()["detail"] == "collection_name_taken"
 | |
| 
 | |
| 
 | |
| def test_add_remove_crawl_from_collection(
 | |
|     crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
 | |
| ):
 | |
|     # Add crawl
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/add",
 | |
|         json={"crawlIds": [admin_crawl_id]},
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["id"] == _coll_id
 | |
|     assert data["crawlCount"] == 2
 | |
|     assert data["pageCount"] > 0
 | |
|     assert data["modified"] >= modified
 | |
|     assert data["tags"] == ["wr-test-2", "wr-test-1"]
 | |
| 
 | |
|     # Verify it was added
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/replay.json",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert _coll_id in r.json()["collections"]
 | |
| 
 | |
|     # Remove crawls
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/remove",
 | |
|         json={"crawlIds": [admin_crawl_id, crawler_crawl_id]},
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["id"] == _coll_id
 | |
|     assert data["crawlCount"] == 0
 | |
|     assert data["pageCount"] == 0
 | |
|     assert data["modified"] >= modified
 | |
|     assert data.get("tags", []) == []
 | |
| 
 | |
|     # Verify they were removed
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/replay.json",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert _coll_id not in r.json()["collections"]
 | |
| 
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/replay.json",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert _coll_id not in r.json()["collections"]
 | |
| 
 | |
|     # Add crawls back for further tests
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/add",
 | |
|         json={"crawlIds": [admin_crawl_id, crawler_crawl_id]},
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["id"] == _coll_id
 | |
|     assert data["crawlCount"] == 2
 | |
|     assert data["pageCount"] > 0
 | |
|     assert data["modified"] >= modified
 | |
|     assert data["tags"] == ["wr-test-2", "wr-test-1"]
 | |
| 
 | |
| 
 | |
| def test_get_collection(crawler_auth_headers, default_org_id):
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["id"] == _coll_id
 | |
|     assert data["name"] == UPDATED_NAME
 | |
|     assert data["oid"] == default_org_id
 | |
|     assert data["description"] == DESCRIPTION
 | |
|     assert data["crawlCount"] == 2
 | |
|     assert data["pageCount"] > 0
 | |
|     assert data["modified"] >= modified
 | |
|     assert data["tags"] == ["wr-test-2", "wr-test-1"]
 | |
| 
 | |
| 
 | |
| def test_get_collection_replay(
 | |
|     crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
 | |
| ):
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/replay.json",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["id"] == _coll_id
 | |
|     assert data["name"] == UPDATED_NAME
 | |
|     assert data["oid"] == default_org_id
 | |
|     assert data["description"] == DESCRIPTION
 | |
|     assert data["crawlCount"] == 2
 | |
|     assert data["pageCount"] > 0
 | |
|     assert data["modified"] >= modified
 | |
|     assert data["tags"] == ["wr-test-2", "wr-test-1"]
 | |
| 
 | |
|     resources = data["resources"]
 | |
|     assert resources
 | |
|     for resource in resources:
 | |
|         assert resource["name"]
 | |
|         assert resource["path"]
 | |
|         assert resource["size"]
 | |
| 
 | |
| 
 | |
| def test_add_upload_to_collection(crawler_auth_headers, default_org_id):
 | |
|     with open(os.path.join(curr_dir, "data", "example.wacz"), "rb") as fh:
 | |
|         r = requests.put(
 | |
|             f"{API_PREFIX}/orgs/{default_org_id}/uploads/stream?filename=test-upload.wacz",
 | |
|             headers=crawler_auth_headers,
 | |
|             data=read_in_chunks(fh),
 | |
|         )
 | |
| 
 | |
|     assert r.status_code == 200
 | |
|     assert r.json()["added"]
 | |
| 
 | |
|     global upload_id
 | |
|     upload_id = r.json()["id"]
 | |
| 
 | |
|     # Add upload
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/add",
 | |
|         json={"crawlIds": [upload_id]},
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["id"] == _coll_id
 | |
|     assert data["crawlCount"] == 3
 | |
|     assert data["pageCount"] > 0
 | |
|     assert data["modified"]
 | |
|     assert data["tags"] == ["wr-test-2", "wr-test-1"]
 | |
| 
 | |
|     # Verify it was added
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/replay.json",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert _coll_id in r.json()["collections"]
 | |
| 
 | |
| 
 | |
| def test_list_collections(
 | |
|     crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
 | |
| ):
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections", headers=crawler_auth_headers
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["total"] == 2
 | |
| 
 | |
|     items = data["items"]
 | |
|     assert len(items) == 2
 | |
| 
 | |
|     first_coll = [coll for coll in items if coll["name"] == UPDATED_NAME][0]
 | |
|     assert first_coll["id"]
 | |
|     assert first_coll["name"] == UPDATED_NAME
 | |
|     assert first_coll["oid"] == default_org_id
 | |
|     assert first_coll["description"] == DESCRIPTION
 | |
|     assert first_coll["crawlCount"] == 3
 | |
|     assert first_coll["pageCount"] > 0
 | |
|     assert first_coll["modified"]
 | |
|     assert first_coll["tags"] == ["wr-test-2", "wr-test-1"]
 | |
| 
 | |
|     second_coll = [coll for coll in items if coll["name"] == SECOND_COLLECTION_NAME][0]
 | |
|     assert second_coll["id"]
 | |
|     assert second_coll["name"] == SECOND_COLLECTION_NAME
 | |
|     assert second_coll["oid"] == default_org_id
 | |
|     assert second_coll.get("description") is None
 | |
|     assert second_coll["crawlCount"] == 1
 | |
|     assert second_coll["pageCount"] > 0
 | |
|     assert second_coll["modified"]
 | |
|     assert second_coll["tags"] == ["wr-test-2"]
 | |
| 
 | |
| 
 | |
| def test_remove_upload_from_collection(crawler_auth_headers, default_org_id):
 | |
|     # Remove upload
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/remove",
 | |
|         json={"crawlIds": [upload_id]},
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["id"] == _coll_id
 | |
|     assert data["crawlCount"] == 2
 | |
|     assert data["pageCount"] > 0
 | |
|     assert data["modified"] >= modified
 | |
|     assert data.get("tags") == ["wr-test-2", "wr-test-1"]
 | |
| 
 | |
|     # Verify it was removed
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/uploads/{upload_id}/replay.json",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert _coll_id not in r.json()["collections"]
 | |
| 
 | |
| 
 | |
| def test_filter_sort_collections(
 | |
|     crawler_auth_headers, default_org_id, crawler_crawl_id, admin_crawl_id
 | |
| ):
 | |
|     # Test filtering by name
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections?name={SECOND_COLLECTION_NAME}",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["total"] == 1
 | |
| 
 | |
|     items = data["items"]
 | |
|     assert len(items) == 1
 | |
| 
 | |
|     coll = items[0]
 | |
|     assert coll["id"]
 | |
|     assert coll["name"] == SECOND_COLLECTION_NAME
 | |
|     assert coll["oid"] == default_org_id
 | |
|     assert coll.get("description") is None
 | |
| 
 | |
|     # Test filtering by name prefix
 | |
|     name_prefix = SECOND_COLLECTION_NAME[0:4]
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections?namePrefix={name_prefix}",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["total"] == 1
 | |
| 
 | |
|     items = data["items"]
 | |
|     assert len(items) == 1
 | |
| 
 | |
|     coll = items[0]
 | |
|     assert coll["id"]
 | |
|     assert coll["name"] == SECOND_COLLECTION_NAME
 | |
|     assert coll["oid"] == default_org_id
 | |
|     assert coll.get("description") is None
 | |
| 
 | |
|     # Test filtering by name prefix (case insensitive)
 | |
|     name_prefix = name_prefix.upper()
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections?namePrefix={name_prefix}",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["total"] == 1
 | |
| 
 | |
|     items = data["items"]
 | |
|     assert len(items) == 1
 | |
| 
 | |
|     coll = items[0]
 | |
|     assert coll["id"]
 | |
|     assert coll["name"] == SECOND_COLLECTION_NAME
 | |
|     assert coll["oid"] == default_org_id
 | |
|     assert coll.get("description") is None
 | |
| 
 | |
|     # Test sorting by name, ascending (default)
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections?sortBy=name",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["total"] == 2
 | |
| 
 | |
|     items = data["items"]
 | |
|     assert items[0]["name"] == SECOND_COLLECTION_NAME
 | |
|     assert items[1]["name"] == UPDATED_NAME
 | |
| 
 | |
|     # Test sorting by name, descending
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections?sortBy=name&sortDirection=-1",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["total"] == 2
 | |
| 
 | |
|     items = data["items"]
 | |
|     assert items[0]["name"] == UPDATED_NAME
 | |
|     assert items[1]["name"] == SECOND_COLLECTION_NAME
 | |
| 
 | |
|     # Test sorting by description, ascending (default)
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections?sortBy=description",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["total"] == 2
 | |
| 
 | |
|     items = data["items"]
 | |
|     assert items[0]["name"] == SECOND_COLLECTION_NAME
 | |
|     assert items[0].get("description") is None
 | |
|     assert items[1]["name"] == UPDATED_NAME
 | |
|     assert items[1]["description"] == DESCRIPTION
 | |
| 
 | |
|     # Test sorting by description, descending
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections?sortBy=description&sortDirection=-1",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["total"] == 2
 | |
| 
 | |
|     items = data["items"]
 | |
|     assert items[0]["name"] == UPDATED_NAME
 | |
|     assert items[0]["description"] == DESCRIPTION
 | |
|     assert items[1]["name"] == SECOND_COLLECTION_NAME
 | |
|     assert items[1].get("description") is None
 | |
| 
 | |
|     # Test sorting by modified, ascending
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections?sortBy=modified",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["total"] == 2
 | |
| 
 | |
|     items = data["items"]
 | |
|     assert items[0]["modified"] <= items[1]["modified"]
 | |
| 
 | |
|     # Test sorting by modified, descending
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections?sortBy=modified&sortDirection=-1",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["total"] == 2
 | |
| 
 | |
|     items = data["items"]
 | |
|     assert items[0]["modified"] >= items[1]["modified"]
 | |
| 
 | |
| 
 | |
| def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id):
 | |
|     # Delete second collection
 | |
|     r = requests.delete(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_second_coll_id}",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     assert r.json()["success"]
 | |
| 
 | |
|     # Verify collection id was removed from crawl
 | |
|     r = requests.get(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/replay.json",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert _second_coll_id not in r.json()["collections"]
 | |
| 
 | |
|     # Make a new empty (no crawls) collection and delete it
 | |
|     r = requests.post(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections",
 | |
|         headers=crawler_auth_headers,
 | |
|         json={
 | |
|             "name": "To delete",
 | |
|             "description": "Deleting a collection with no crawls should work.",
 | |
|         },
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     data = r.json()
 | |
|     assert data["added"]
 | |
|     coll_id = data["id"]
 | |
| 
 | |
|     r = requests.delete(
 | |
|         f"{API_PREFIX}/orgs/{default_org_id}/collections/{coll_id}",
 | |
|         headers=crawler_auth_headers,
 | |
|     )
 | |
|     assert r.status_code == 200
 | |
|     assert r.json()["success"]
 |