* Re-implement collections, storing crawlIds in collection * Return collections for crawl endpoints and filter on coll name * Remove crawl from all collections when deleted * Revert get_collection_crawls to flat array of resources * Fix tests
121 lines
3.6 KiB
Python
121 lines
3.6 KiB
Python
import requests
|
|
|
|
from .conftest import API_PREFIX
|
|
|
|
NAME_1 = "Workflow 1"
|
|
NAME_2 = "Workflow 2"
|
|
|
|
DESCRIPTION_1 = "Description 1"
|
|
DESCRIPTION_2 = "Description 2"
|
|
|
|
FIRST_SEED_1 = "https://one.example.com"
|
|
FIRST_SEED_2 = "https://two.example.com"
|
|
|
|
|
|
def get_sample_crawl_data(name, description, first_seed):
|
|
return {
|
|
"runNow": False,
|
|
"name": name,
|
|
"config": {"seeds": [{"url": first_seed}]},
|
|
"description": description,
|
|
}
|
|
|
|
|
|
def test_create_new_config_1(admin_auth_headers, default_org_id):
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
|
|
headers=admin_auth_headers,
|
|
json=get_sample_crawl_data(NAME_1, DESCRIPTION_1, FIRST_SEED_1),
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data["added"]
|
|
assert data["run_now_job"] == None
|
|
|
|
|
|
def test_get_search_values_1(admin_auth_headers, default_org_id):
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/search-values",
|
|
headers=admin_auth_headers,
|
|
)
|
|
data = r.json()
|
|
assert sorted(data["names"]) == sorted(
|
|
[NAME_1, "Admin Test Crawl", "Crawler User Test Crawl"]
|
|
)
|
|
assert sorted(data["descriptions"]) == sorted(
|
|
["Admin Test Crawl description", "crawler test crawl", DESCRIPTION_1]
|
|
)
|
|
assert sorted(data["firstSeeds"]) == sorted(
|
|
["https://webrecorder.net/", FIRST_SEED_1]
|
|
)
|
|
|
|
|
|
def test_create_new_config_2(admin_auth_headers, default_org_id):
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
|
|
headers=admin_auth_headers,
|
|
json=get_sample_crawl_data(NAME_2, DESCRIPTION_2, FIRST_SEED_2),
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data["added"]
|
|
assert data["run_now_job"] == None
|
|
|
|
|
|
def test_get_search_values_2(admin_auth_headers, default_org_id):
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/search-values",
|
|
headers=admin_auth_headers,
|
|
)
|
|
data = r.json()
|
|
assert sorted(data["names"]) == sorted(
|
|
[NAME_1, NAME_2, "Admin Test Crawl", "Crawler User Test Crawl"]
|
|
)
|
|
assert sorted(data["descriptions"]) == sorted(
|
|
[
|
|
"Admin Test Crawl description",
|
|
"crawler test crawl",
|
|
DESCRIPTION_1,
|
|
DESCRIPTION_2,
|
|
]
|
|
)
|
|
assert sorted(data["firstSeeds"]) == sorted(
|
|
["https://webrecorder.net/", FIRST_SEED_1, FIRST_SEED_2]
|
|
)
|
|
|
|
|
|
def test_create_new_config_3_duplicates(admin_auth_headers, default_org_id):
|
|
"""Add some duplicate values to ensure they aren't duplicated in response"""
|
|
r = requests.post(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
|
|
headers=admin_auth_headers,
|
|
json=get_sample_crawl_data(NAME_1, DESCRIPTION_2, FIRST_SEED_1),
|
|
)
|
|
assert r.status_code == 200
|
|
data = r.json()
|
|
assert data["added"]
|
|
assert data["run_now_job"] == None
|
|
|
|
|
|
def test_get_search_values_3(admin_auth_headers, default_org_id):
|
|
"""Test we still only get unique values"""
|
|
r = requests.get(
|
|
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/search-values",
|
|
headers=admin_auth_headers,
|
|
)
|
|
data = r.json()
|
|
assert sorted(data["names"]) == sorted(
|
|
[NAME_1, NAME_2, "Admin Test Crawl", "Crawler User Test Crawl"]
|
|
)
|
|
assert sorted(data["descriptions"]) == sorted(
|
|
[
|
|
"Admin Test Crawl description",
|
|
"crawler test crawl",
|
|
DESCRIPTION_1,
|
|
DESCRIPTION_2,
|
|
]
|
|
)
|
|
assert sorted(data["firstSeeds"]) == sorted(
|
|
["https://webrecorder.net/", FIRST_SEED_1, FIRST_SEED_2]
|
|
)
|