Backend: standardize search values, filters, and sorting for archived items (#1039)

- all-crawls list endpoint filters now conform to 'Standardize list controls for archived items #1025' and URL decode values before passing them in
- Uploads list endpoint now includes all all-crawls filters relevant to uploads
- An all-crawls/search-values endpoint is added to support searching across all archived item types
- Crawl configuration names are now copied to the crawl when the crawl is created, and crawl names and descriptions are now editable via the backend API (note: this will require frontend changes as well to make them editable via the UI)
- Migration added to copy existing config names for active configs into their associated crawls. This migration has been tested in a local deployment
- New statuses generate-wacz, uploading-wacz, and pending-wait are added when relevant to tests to ensure that they pass
- Tests coverage added for all new all-crawls endpoints, filters, and sort values
This commit is contained in:
Tessa Walsh 2023-08-04 12:56:52 -04:00 committed by GitHub
parent 9236a07800
commit 7ff57ce6b5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 505 additions and 36 deletions

View File

@ -5,6 +5,7 @@ import uuid
import os
from datetime import timedelta
from typing import Optional, List, Union
import urllib.parse
from pydantic import UUID4
from fastapi import HTTPException, Depends
@ -196,16 +197,11 @@ class BaseCrawlOps:
config = await self.crawl_configs.get_crawl_config(
crawl.cid, org, active_only=False
)
if config:
if not crawl.name:
crawl.name = config.name
if config.config.seeds:
if add_first_seed:
first_seed = config.config.seeds[0]
crawl.firstSeed = first_seed.url
crawl.seedCount = len(config.config.seeds)
if config and config.config.seeds:
if add_first_seed:
first_seed = config.config.seeds[0]
crawl.firstSeed = first_seed.url
crawl.seedCount = len(config.config.seeds)
if hasattr(crawl, "profileid") and crawl.profileid:
crawl.profileName = await self.crawl_configs.profiles.get_profile_name(
@ -327,7 +323,7 @@ class BaseCrawlOps:
{"$pull": {"collections": collection_id}},
)
# pylint: disable=too-many-branches
# pylint: disable=too-many-branches, invalid-name
async def list_all_base_crawls(
self,
org: Optional[Organization] = None,
@ -336,12 +332,14 @@ class BaseCrawlOps:
description: str = None,
collection_id: str = None,
states: Optional[List[str]] = None,
first_seed: Optional[str] = None,
type_: Optional[str] = None,
cid: Optional[UUID4] = None,
cls_type: Union[CrawlOut, CrawlOutWithResources] = CrawlOut,
page_size: int = DEFAULT_PAGE_SIZE,
page: int = 1,
sort_by: str = None,
sort_direction: int = -1,
type_=None,
):
"""List crawls of all types from the db"""
# Zero-index page for query
@ -367,7 +365,15 @@ class BaseCrawlOps:
# validated_states = [value for value in state if value in ALL_CRAWL_STATES]
query["state"] = {"$in": states}
aggregate = [{"$match": query}, {"$unset": "errors"}]
if cid:
query["cid"] = cid
aggregate = [
{"$match": query},
{"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}},
{"$set": {"firstSeed": "$firstSeedObject.url"}},
{"$unset": ["firstSeedObject", "errors"]},
]
if not resources:
aggregate.extend([{"$unset": ["files"]}])
@ -375,6 +381,9 @@ class BaseCrawlOps:
if name:
aggregate.extend([{"$match": {"name": name}}])
if first_seed:
aggregate.extend([{"$match": {"firstSeed": first_seed}}])
if description:
aggregate.extend([{"$match": {"description": description}}])
@ -382,7 +391,7 @@ class BaseCrawlOps:
aggregate.extend([{"$match": {"collections": {"$in": [collection_id]}}}])
if sort_by:
if sort_by not in ("started", "finished"):
if sort_by not in ("started", "finished", "fileSize"):
raise HTTPException(status_code=400, detail="invalid_sort_by")
if sort_direction not in (1, -1):
raise HTTPException(status_code=400, detail="invalid_sort_direction")
@ -447,13 +456,40 @@ class BaseCrawlOps:
return {"deleted": True}
async def get_all_crawl_search_values(self, org: Organization):
"""List unique names, first seeds, and descriptions from all captures in org"""
names = await self.crawls.distinct("name", {"oid": org.id})
descriptions = await self.crawls.distinct("description", {"oid": org.id})
crawl_ids = await self.crawls.distinct("_id", {"oid": org.id})
cids = await self.crawls.distinct("cid", {"oid": org.id})
# Remove empty strings
names = [name for name in names if name]
descriptions = [description for description in descriptions if description]
# Get first seeds
first_seeds = set()
for cid in cids:
if not cid:
continue
config = await self.crawl_configs.get_crawl_config(cid, org)
first_seed = config.config.seeds[0]
first_seeds.add(first_seed.url)
return {
"names": names,
"descriptions": descriptions,
"firstSeeds": list(first_seeds),
"crawlIds": list(crawl_ids),
}
# ============================================================================
def init_base_crawls_api(
app, mdb, users, crawl_manager, crawl_config_ops, orgs, user_dep
):
"""base crawls api"""
# pylint: disable=invalid-name, duplicate-code, too-many-arguments
# pylint: disable=invalid-name, duplicate-code, too-many-arguments, too-many-locals
ops = BaseCrawlOps(mdb, users, crawl_config_ops, crawl_manager)
@ -472,12 +508,28 @@ def init_base_crawls_api(
userid: Optional[UUID4] = None,
name: Optional[str] = None,
state: Optional[str] = None,
firstSeed: Optional[str] = None,
description: Optional[str] = None,
collectionId: Optional[UUID4] = None,
crawlType: Optional[str] = None,
cid: Optional[UUID4] = None,
sortBy: Optional[str] = "finished",
sortDirection: Optional[int] = -1,
):
states = state.split(",") if state else None
if firstSeed:
firstSeed = urllib.parse.unquote(firstSeed)
if name:
name = urllib.parse.unquote(name)
if description:
description = urllib.parse.unquote(description)
if crawlType and crawlType not in ("crawl", "upload"):
raise HTTPException(status_code=400, detail="invalid_crawl_type")
crawls, total = await ops.list_all_base_crawls(
org,
userid=userid,
@ -485,6 +537,9 @@ def init_base_crawls_api(
description=description,
collection_id=collectionId,
states=states,
first_seed=firstSeed,
type_=crawlType,
cid=cid,
page_size=pageSize,
page=page,
sort_by=sortBy,
@ -492,6 +547,12 @@ def init_base_crawls_api(
)
return paginated_format(crawls, total, page, pageSize)
@app.get("/orgs/{oid}/all-crawls/search-values", tags=["all-crawls"])
async def get_all_crawls_search_values(
org: Organization = Depends(org_viewer_dep),
):
return await ops.get_all_crawl_search_values(org)
@app.get(
"/orgs/{oid}/all-crawls/{crawl_id}",
tags=["all-crawls"],

View File

@ -68,11 +68,15 @@ class CrawlOps(BaseCrawlOps):
await self.crawls.create_index(
[("type", pymongo.HASHED), ("state", pymongo.DESCENDING)]
)
await self.crawls.create_index(
[("type", pymongo.HASHED), ("fileSize", pymongo.DESCENDING)]
)
await self.crawls.create_index([("finished", pymongo.DESCENDING)])
await self.crawls.create_index([("oid", pymongo.HASHED)])
await self.crawls.create_index([("cid", pymongo.HASHED)])
await self.crawls.create_index([("state", pymongo.HASHED)])
await self.crawls.create_index([("fileSize", pymongo.DESCENDING)])
async def list_crawls(
self,
@ -127,15 +131,6 @@ class CrawlOps(BaseCrawlOps):
{"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}},
{"$set": {"firstSeed": "$firstSeedObject.url"}},
{"$unset": ["firstSeedObject", "errors"]},
{
"$lookup": {
"from": "crawl_configs",
"localField": "cid",
"foreignField": "_id",
"as": "crawlConfig",
},
},
{"$set": {"name": {"$arrayElemAt": ["$crawlConfig.name", 0]}}},
]
if not resources:
@ -154,7 +149,12 @@ class CrawlOps(BaseCrawlOps):
aggregate.extend([{"$match": {"collections": {"$in": [collection_id]}}}])
if sort_by:
if sort_by not in ("started", "finished", "fileSize", "firstSeed"):
if sort_by not in (
"started",
"finished",
"fileSize",
"firstSeed",
):
raise HTTPException(status_code=400, detail="invalid_sort_by")
if sort_direction not in (1, -1):
raise HTTPException(status_code=400, detail="invalid_sort_direction")
@ -545,6 +545,7 @@ async def add_new_crawl(
manual=manual,
started=started,
tags=crawlconfig.tags,
name=crawlconfig.name,
)
try:

View File

@ -15,7 +15,7 @@ from pymongo.errors import InvalidName
from .migrations import BaseMigration
CURR_DB_VERSION = "0012"
CURR_DB_VERSION = "0013"
# ============================================================================

View File

@ -0,0 +1,42 @@
"""
Migration 0013 - Copy config name to crawls
"""
from btrixcloud.migrations import BaseMigration
MIGRATION_VERSION = "0013"
class Migration(BaseMigration):
"""Migration class."""
def __init__(self, mdb, migration_version=MIGRATION_VERSION):
super().__init__(mdb, migration_version)
async def migrate_up(self):
"""Perform migration up.
Copy crawl config names to associated crawls.
"""
# pylint: disable=duplicate-code
crawls = self.mdb["crawls"]
crawl_configs = self.mdb["crawl_configs"]
configs = [res async for res in crawl_configs.find({"inactive": {"$ne": True}})]
if not configs:
return
for config in configs:
config_id = config["_id"]
try:
if not config.get("name"):
continue
await crawls.update_many(
{"cid": config_id}, {"$set": {"name": config.get("name")}}
)
# pylint: disable=broad-exception-caught
except Exception as err:
print(
f"Unable to set name for crawls from with config {config_id}: {err}",
flush=True,
)

View File

@ -296,6 +296,8 @@ class BaseCrawl(BaseMongoModel):
started: datetime
finished: Optional[datetime]
name: Optional[str]
state: str
stats: Optional[Dict[str, int]]
@ -368,7 +370,9 @@ class CrawlOutWithResources(CrawlOut):
class UpdateCrawl(BaseModel):
"""Update crawl"""
tags: Optional[List[str]] = []
name: Optional[str]
description: Optional[str]
tags: Optional[List[str]]
description: Optional[str]
@ -433,7 +437,6 @@ class UploadedCrawl(BaseCrawl):
type: str = Field("upload", const=True)
name: str
tags: Optional[List[str]] = []
@ -441,8 +444,6 @@ class UploadedCrawl(BaseCrawl):
class UpdateUpload(UpdateCrawl):
"""Update modal that also includes name"""
name: Optional[str]
# ============================================================================

View File

@ -299,6 +299,7 @@ def init_uploads_api(app, mdb, users, crawl_manager, crawl_configs, orgs, user_d
org: Organization = Depends(org_viewer_dep),
pageSize: int = DEFAULT_PAGE_SIZE,
page: int = 1,
state: Optional[str] = None,
userid: Optional[UUID4] = None,
name: Optional[str] = None,
description: Optional[str] = None,
@ -306,9 +307,18 @@ def init_uploads_api(app, mdb, users, crawl_manager, crawl_configs, orgs, user_d
sortBy: Optional[str] = "finished",
sortDirection: Optional[int] = -1,
):
states = state.split(",") if state else None
if name:
name = unquote(name)
if description:
description = unquote(description)
uploads, total = await ops.list_all_base_crawls(
org,
userid=userid,
states=states,
name=name,
description=description,
page_size=pageSize,

View File

@ -18,6 +18,7 @@ CRAWLER_PW = "crawlerPASSWORD!"
_admin_config_id = None
_crawler_config_id = None
_auto_add_config_id = None
_all_crawls_config_id = None
NON_DEFAULT_ORG_NAME = "Non-default org"
@ -118,6 +119,12 @@ def admin_config_id(admin_crawl_id):
return _admin_config_id
@pytest.fixture(scope="session")
def admin_userid(admin_auth_headers):
r = requests.get(f"{API_PREFIX}/users/me", headers=admin_auth_headers)
return r.json()["id"]
@pytest.fixture(scope="session")
def viewer_auth_headers(admin_auth_headers, default_org_id):
requests.post(
@ -331,6 +338,54 @@ def auto_add_config_id(auto_add_crawl_id):
return _auto_add_config_id
@pytest.fixture(scope="session")
def all_crawls_crawl_id(crawler_auth_headers, default_org_id):
# Start crawl.
crawl_data = {
"runNow": True,
"name": "All Crawls Test Crawl",
"description": "Lorem ipsum",
"config": {
"seeds": [{"url": "https://webrecorder.net/"}],
},
}
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
headers=crawler_auth_headers,
json=crawl_data,
)
data = r.json()
global _all_crawls_config_id
_all_crawls_config_id = data["id"]
crawl_id = data["run_now_job"]
# Wait for it to complete and then return crawl ID
while True:
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id}/replay.json",
headers=crawler_auth_headers,
)
data = r.json()
if data["state"] == "complete":
break
time.sleep(5)
# Add description to crawl
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id}",
headers=crawler_auth_headers,
json={"description": "Lorem ipsum"},
)
assert r.status_code == 200
return crawl_id
@pytest.fixture(scope="session")
def all_crawls_config_id(all_crawls_crawl_id):
return _all_crawls_config_id
@pytest.fixture(scope="session")
def uploads_collection_id(crawler_auth_headers, default_org_id):
r = requests.post(

View File

@ -191,10 +191,11 @@ def test_update_crawl(admin_auth_headers, default_org_id, admin_crawl_id):
# Submit patch request to update tags and description
UPDATED_TAGS = ["wr-test-1-updated", "wr-test-2-updated"]
UPDATED_DESC = "Lorem ipsum test note."
UPDATED_NAME = "Updated crawl name"
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}",
headers=admin_auth_headers,
json={"tags": UPDATED_TAGS, "description": UPDATED_DESC},
json={"tags": UPDATED_TAGS, "description": UPDATED_DESC, "name": UPDATED_NAME},
)
assert r.status_code == 200
data = r.json()
@ -209,6 +210,7 @@ def test_update_crawl(admin_auth_headers, default_org_id, admin_crawl_id):
data = r.json()
assert sorted(data["tags"]) == sorted(UPDATED_TAGS)
assert data["description"] == UPDATED_DESC
assert data["name"] == UPDATED_NAME
# Verify deleting works as well
r = requests.patch(

View File

@ -48,7 +48,14 @@ def test_cancel_crawl(default_org_id, crawler_auth_headers):
data = get_crawl(default_org_id, crawler_auth_headers, crawl_id)
while data["state"] in ("running", "waiting_capacity"):
while data["state"] in (
"starting",
"running",
"waiting_capacity",
"generate-wacz",
"uploading-wacz",
"pending-wait",
):
time.sleep(5)
data = get_crawl(default_org_id, crawler_auth_headers, crawl_id)
@ -88,7 +95,14 @@ def test_start_crawl_and_stop_immediately(
)
assert r.json()["lastCrawlStopping"] == True
while data["state"] in ("starting", "running", "waiting_capacity"):
while data["state"] in (
"starting",
"running",
"waiting_capacity",
"generate-wacz",
"uploading-wacz",
"pending-wait",
):
time.sleep(5)
data = get_crawl(default_org_id, crawler_auth_headers, crawl_id)
@ -149,7 +163,12 @@ def test_stop_crawl_partial(
)
assert r.json()["lastCrawlStopping"] == True
while data["state"] == "running":
while data["state"] in (
"running",
"generate-wacz",
"uploading-wacz",
"pending-wait",
):
time.sleep(5)
data = get_crawl(default_org_id, crawler_auth_headers, crawl_id)

View File

@ -9,6 +9,8 @@ upload_id = None
upload_id_2 = None
upload_dl_path = None
_coll_id = None
curr_dir = os.path.dirname(os.path.realpath(__file__))
@ -371,6 +373,275 @@ def test_list_all_crawls(admin_auth_headers, default_org_id):
assert item["state"]
def test_get_all_crawls_by_name(admin_auth_headers, default_org_id):
"""Test filtering /all-crawls by name"""
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?name=test2.wacz",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 1
items = data["items"]
assert items[0]["id"] == upload_id_2
assert items[0]["name"] == "test2.wacz"
crawl_name = "Crawler User Test Crawl"
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?name={crawl_name}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 3
for item in data["items"]:
assert item["name"] == crawl_name
def test_get_all_crawls_by_first_seed(
admin_auth_headers, default_org_id, crawler_crawl_id
):
"""Test filtering /all-crawls by first seed"""
first_seed = "https://webrecorder.net/"
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?firstSeed={first_seed}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 3
for item in data["items"]:
assert item["firstSeed"] == first_seed
def test_get_all_crawls_by_type(admin_auth_headers, default_org_id, admin_crawl_id):
"""Test filtering /all-crawls by crawl type"""
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?crawlType=crawl",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 3
for item in data["items"]:
assert item["type"] == "crawl"
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?crawlType=upload",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 3
for item in data["items"]:
assert item["type"] == "upload"
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?crawlType=invalid",
headers=admin_auth_headers,
)
assert r.status_code == 400
assert r.json()["detail"] == "invalid_crawl_type"
def test_get_all_crawls_by_user(admin_auth_headers, default_org_id, crawler_userid):
"""Test filtering /all-crawls by userid"""
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?userid={crawler_userid}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 4
for item in data["items"]:
assert item["userid"] == crawler_userid
def test_get_all_crawls_by_cid(
admin_auth_headers, default_org_id, all_crawls_config_id
):
"""Test filtering /all-crawls by cid"""
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?cid={all_crawls_config_id}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 1
assert data["items"][0]["cid"] == all_crawls_config_id
def test_get_all_crawls_by_state(admin_auth_headers, default_org_id, admin_crawl_id):
"""Test filtering /all-crawls by cid"""
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?state=complete,partial_complete",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 5
items = data["items"]
for item in items:
assert item["state"] in ("complete", "partial_complete")
def test_get_all_crawls_by_collection_id(
admin_auth_headers, default_org_id, admin_config_id, all_crawls_crawl_id
):
"""Test filtering /all-crawls by collection id"""
# Create collection and add upload to it
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections",
headers=admin_auth_headers,
json={
"crawlIds": [all_crawls_crawl_id],
"name": "all-crawls collection",
},
)
assert r.status_code == 200
global _coll_id
_coll_id = r.json()["id"]
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?collectionId={_coll_id}",
headers=admin_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1
assert r.json()["items"][0]["id"] == all_crawls_crawl_id
def test_sort_all_crawls(admin_auth_headers, default_org_id, admin_crawl_id):
# Sort by started, descending (default)
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=started",
headers=admin_auth_headers,
)
data = r.json()
assert data["total"] == 7
items = data["items"]
assert len(items) == 7
last_created = None
for crawl in items:
if last_created:
assert crawl["started"] <= last_created
last_created = crawl["started"]
# Sort by started, ascending
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=started&sortDirection=1",
headers=admin_auth_headers,
)
data = r.json()
items = data["items"]
last_created = None
for crawl in items:
if last_created:
assert crawl["started"] >= last_created
last_created = crawl["started"]
# Sort by finished
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=finished",
headers=admin_auth_headers,
)
data = r.json()
items = data["items"]
last_finished = None
for crawl in items:
if not crawl["finished"]:
continue
if last_finished:
assert crawl["finished"] <= last_finished
last_finished = crawl["finished"]
# Sort by finished, ascending
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=finished&sortDirection=1",
headers=admin_auth_headers,
)
data = r.json()
items = data["items"]
last_finished = None
for crawl in items:
if not crawl["finished"]:
continue
if last_finished:
assert crawl["finished"] >= last_finished
last_finished = crawl["finished"]
# Sort by fileSize
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=fileSize",
headers=admin_auth_headers,
)
data = r.json()
items = data["items"]
last_size = None
for crawl in items:
if last_size:
assert crawl["fileSize"] <= last_size
last_size = crawl["fileSize"]
# Sort by fileSize, ascending
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=fileSize&sortDirection=1",
headers=admin_auth_headers,
)
data = r.json()
items = data["items"]
last_size = None
for crawl in items:
if last_size:
assert crawl["fileSize"] >= last_size
last_size = crawl["fileSize"]
# Invalid sort value
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=invalid",
headers=admin_auth_headers,
)
assert r.status_code == 400
assert r.json()["detail"] == "invalid_sort_by"
# Invalid sort_direction value
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls?sortBy=started&sortDirection=0",
headers=admin_auth_headers,
)
assert r.status_code == 400
assert r.json()["detail"] == "invalid_sort_direction"
def test_all_crawls_search_values(admin_auth_headers, default_org_id):
"""Test that all-crawls search values return expected results"""
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/all-crawls/search-values",
headers=admin_auth_headers,
)
data = r.json()
assert len(data["names"]) == 5
expected_names = [
"Crawler User Test Crawl",
"My Upload Updated",
"test2.wacz",
"All Crawls Test Crawl",
]
for expected_name in expected_names:
assert expected_name in data["names"]
assert sorted(data["descriptions"]) == ["Lorem ipsum"]
assert sorted(data["firstSeeds"]) == ["https://webrecorder.net/"]
assert len(data["crawlIds"]) == 7
def test_get_upload_from_all_crawls(admin_auth_headers, default_org_id):
"""Test that /all-crawls lists crawls and uploads before deleting uploads"""
r = requests.get(

View File

@ -31,8 +31,11 @@ def test_run_two_only_one_concurrent(org_with_quotas, admin_auth_headers):
):
time.sleep(2)
assert (
get_crawl_status(org_with_quotas, crawl_id_a, admin_auth_headers) == "running"
assert get_crawl_status(org_with_quotas, crawl_id_a, admin_auth_headers) in (
"running",
"generate-wacz",
"uploading-wacz",
"pending-wait",
)
while (
@ -68,6 +71,10 @@ def test_cancel_and_run_other(org_with_quotas, admin_auth_headers):
assert get_crawl_status(org_with_quotas, crawl_id_b, admin_auth_headers) in (
"starting",
"running",
"waiting_capacity",
"generate-wacz",
"uploading-wacz",
"pending-wait",
)
# cancel second crawl as well