API filters by user + crawl collection ids (#462)
backend: object filtering: - add filtering crawls, crawlconfigs and profiles by userid= query arg, fixes #460 - add filtering crawls by crawlconfig via cid= query arg, fixes #400 - tests: add test_filter_results test suite to test filtering crawls and crawlconfigs by user, also create user with 'crawler' permissions, run second crawl with that user.
This commit is contained in:
parent
7b5d82936d
commit
5efeaa58b1
@ -366,7 +366,10 @@ class CrawlConfigOps:
|
||||
return {"success": True}
|
||||
|
||||
async def get_crawl_configs(
|
||||
self, archive: Archive, tags: Optional[List[str]] = None
|
||||
self,
|
||||
archive: Archive,
|
||||
userid: Optional[UUID4] = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
):
|
||||
"""Get all crawl configs for an archive is a member of"""
|
||||
match_query = {"aid": archive.id, "inactive": {"$ne": True}}
|
||||
@ -374,6 +377,9 @@ class CrawlConfigOps:
|
||||
if tags:
|
||||
match_query["tags"] = {"$all": tags}
|
||||
|
||||
if userid:
|
||||
match_query["userid"] = userid
|
||||
|
||||
# pylint: disable=duplicate-code
|
||||
cursor = self.crawl_configs.aggregate(
|
||||
[
|
||||
@ -599,9 +605,10 @@ def init_crawl_config_api(
|
||||
@router.get("", response_model=CrawlConfigsResponse)
|
||||
async def get_crawl_configs(
|
||||
archive: Archive = Depends(archive_crawl_dep),
|
||||
userid: Optional[UUID4] = None,
|
||||
tag: Union[List[str], None] = Query(default=None),
|
||||
):
|
||||
return await ops.get_crawl_configs(archive, tag)
|
||||
return await ops.get_crawl_configs(archive, userid=userid, tags=tag)
|
||||
|
||||
@router.get("/tags")
|
||||
async def get_crawl_config_tags(archive: Archive = Depends(archive_crawl_dep)):
|
||||
|
@ -173,6 +173,7 @@ class CrawlOps:
|
||||
archive: Optional[Archive] = None,
|
||||
cid: uuid.UUID = None,
|
||||
collid: uuid.UUID = None,
|
||||
userid: uuid.UUID = None,
|
||||
crawl_id: str = None,
|
||||
exclude_files=True,
|
||||
running_only=False,
|
||||
@ -191,6 +192,9 @@ class CrawlOps:
|
||||
if collid:
|
||||
query["colls"] = collid
|
||||
|
||||
if userid:
|
||||
query["userid"] = userid
|
||||
|
||||
if running_only:
|
||||
query["state"] = {"$in": ["running", "starting", "stopping"]}
|
||||
|
||||
@ -573,15 +577,31 @@ def init_crawls_api(
|
||||
archive_crawl_dep = archives.archive_crawl_dep
|
||||
|
||||
@app.get("/archives/all/crawls", tags=["crawls"], response_model=ListCrawls)
|
||||
async def list_crawls_admin(user: User = Depends(user_dep)):
|
||||
async def list_crawls_admin(
|
||||
user: User = Depends(user_dep),
|
||||
userid: Optional[UUID4] = None,
|
||||
cid: Optional[UUID4] = None,
|
||||
):
|
||||
if not user.is_superuser:
|
||||
raise HTTPException(status_code=403, detail="Not Allowed")
|
||||
|
||||
return ListCrawls(crawls=await ops.list_crawls(None, running_only=True))
|
||||
return ListCrawls(
|
||||
crawls=await ops.list_crawls(
|
||||
None, userid=userid, cid=cid, running_only=True
|
||||
)
|
||||
)
|
||||
|
||||
@app.get("/archives/{aid}/crawls", tags=["crawls"], response_model=ListCrawls)
|
||||
async def list_crawls(archive: Archive = Depends(archive_viewer_dep)):
|
||||
return ListCrawls(crawls=await ops.list_crawls(archive))
|
||||
async def list_crawls(
|
||||
archive: Archive = Depends(archive_viewer_dep),
|
||||
userid: Optional[UUID4] = None,
|
||||
cid: Optional[UUID4] = None,
|
||||
):
|
||||
return ListCrawls(
|
||||
crawls=await ops.list_crawls(
|
||||
archive, userid=userid, cid=cid, running_only=False
|
||||
)
|
||||
)
|
||||
|
||||
@app.post(
|
||||
"/archives/{aid}/crawls/{crawl_id}/cancel",
|
||||
@ -646,7 +666,6 @@ def init_crawls_api(
|
||||
raise HTTPException(status_code=403, detail="Not Allowed")
|
||||
|
||||
crawls = await ops.list_crawls(crawl_id=crawl_id)
|
||||
print("crawls", crawls)
|
||||
if len(crawls) < 1:
|
||||
raise HTTPException(status_code=404, detail="crawl_not_found")
|
||||
|
||||
|
@ -253,9 +253,13 @@ class ProfileOps:
|
||||
|
||||
return {"success": True}
|
||||
|
||||
async def list_profiles(self, archive: Archive):
|
||||
async def list_profiles(self, archive: Archive, userid: Optional[UUID4] = None):
|
||||
"""list all profiles"""
|
||||
cursor = self.profiles.find({"aid": archive.id})
|
||||
query = {"aid": archive.id}
|
||||
if userid:
|
||||
query["userid"] = userid
|
||||
|
||||
cursor = self.profiles.find(query)
|
||||
results = await cursor.to_list(length=1000)
|
||||
return [Profile.from_dict(res) for res in results]
|
||||
|
||||
@ -395,8 +399,9 @@ def init_profiles_api(mdb, crawl_manager, archive_ops, user_dep):
|
||||
@router.get("", response_model=List[Profile])
|
||||
async def list_profiles(
|
||||
archive: Archive = Depends(archive_crawl_dep),
|
||||
userid: Optional[UUID4] = None,
|
||||
):
|
||||
return await ops.list_profiles(archive)
|
||||
return await ops.list_profiles(archive, userid)
|
||||
|
||||
@router.post("", response_model=Profile)
|
||||
async def commit_browser_to_new(
|
||||
|
@ -12,6 +12,12 @@ ADMIN_PW = "PASSW0RD!"
|
||||
VIEWER_USERNAME = "viewer@example.com"
|
||||
VIEWER_PW = "viewerPASSW0RD!"
|
||||
|
||||
CRAWLER_USERNAME = "crawler@example.com"
|
||||
CRAWLER_PW = "crawlerPASSWORD!"
|
||||
|
||||
_admin_config_id = None
|
||||
_crawler_config_id = None
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def admin_auth_headers():
|
||||
@ -58,6 +64,10 @@ def admin_crawl_id(admin_auth_headers, admin_aid):
|
||||
json=crawl_data,
|
||||
)
|
||||
data = r.json()
|
||||
|
||||
global _admin_config_id
|
||||
_admin_config_id = data["added"]
|
||||
|
||||
crawl_id = data["run_now_job"]
|
||||
# Wait for it to complete and then return crawl ID
|
||||
while True:
|
||||
@ -71,6 +81,11 @@ def admin_crawl_id(admin_auth_headers, admin_aid):
|
||||
time.sleep(5)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def admin_config_id(admin_crawl_id):
|
||||
return _admin_config_id
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def viewer_auth_headers(admin_auth_headers, admin_aid):
|
||||
requests.post(
|
||||
@ -90,8 +105,74 @@ def viewer_auth_headers(admin_auth_headers, admin_aid):
|
||||
"password": VIEWER_PW,
|
||||
"grant_type": "password",
|
||||
},
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
data = r.json()
|
||||
access_token = data.get("access_token")
|
||||
return {"Authorization": f"Bearer {access_token}"}
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def crawler_auth_headers(admin_auth_headers, admin_aid):
|
||||
requests.post(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/add-user",
|
||||
json={
|
||||
"email": CRAWLER_USERNAME,
|
||||
"password": CRAWLER_PW,
|
||||
"name": "new-crawler",
|
||||
"role": 20,
|
||||
},
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/auth/jwt/login",
|
||||
data={
|
||||
"username": CRAWLER_USERNAME,
|
||||
"password": CRAWLER_PW,
|
||||
"grant_type": "password",
|
||||
},
|
||||
)
|
||||
data = r.json()
|
||||
access_token = data.get("access_token")
|
||||
return {"Authorization": f"Bearer {access_token}"}
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def crawler_userid(crawler_auth_headers):
|
||||
r = requests.get(f"{API_PREFIX}/users/me", headers=crawler_auth_headers)
|
||||
return r.json()["id"]
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def crawler_crawl_id(crawler_auth_headers, admin_aid):
|
||||
# Start crawl.
|
||||
crawl_data = {
|
||||
"runNow": True,
|
||||
"name": "Crawler User Test Crawl",
|
||||
"config": {"seeds": ["https://webrecorder.net/"], "limit": 1},
|
||||
}
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawlconfigs/",
|
||||
headers=crawler_auth_headers,
|
||||
json=crawl_data,
|
||||
)
|
||||
data = r.json()
|
||||
|
||||
global _crawler_config_id
|
||||
_crawler_config_id = data["added"]
|
||||
|
||||
crawl_id = data["run_now_job"]
|
||||
# Wait for it to complete and then return crawl ID
|
||||
while True:
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawls/{crawl_id}/replay.json",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
data = r.json()
|
||||
if data["state"] == "complete":
|
||||
return crawl_id
|
||||
time.sleep(5)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def crawler_config_id(crawler_crawl_id):
|
||||
return _crawler_config_id
|
||||
|
@ -5,6 +5,7 @@ from .conftest import API_PREFIX
|
||||
new_cid_1 = None
|
||||
new_cid_2 = None
|
||||
|
||||
|
||||
def get_sample_crawl_data(tags):
|
||||
return {
|
||||
"runNow": False,
|
||||
@ -13,11 +14,12 @@ def get_sample_crawl_data(tags):
|
||||
"tags": tags,
|
||||
}
|
||||
|
||||
|
||||
def test_create_new_config_1(admin_auth_headers, admin_aid):
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawlconfigs/",
|
||||
headers=admin_auth_headers,
|
||||
json=get_sample_crawl_data(["tag-1", "tag-2"])
|
||||
json=get_sample_crawl_data(["tag-1", "tag-2"]),
|
||||
)
|
||||
|
||||
assert r.status_code == 200
|
||||
@ -29,6 +31,7 @@ def test_create_new_config_1(admin_auth_headers, admin_aid):
|
||||
global new_cid_1
|
||||
new_cid_1 = data["added"]
|
||||
|
||||
|
||||
def test_get_config_1(admin_auth_headers, admin_aid):
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawlconfigs/{new_cid_1}",
|
||||
@ -36,6 +39,7 @@ def test_get_config_1(admin_auth_headers, admin_aid):
|
||||
)
|
||||
assert r.json()["tags"] == ["tag-1", "tag-2"]
|
||||
|
||||
|
||||
def test_get_config_by_tag_1(admin_auth_headers, admin_aid):
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawlconfigs/tags",
|
||||
@ -43,11 +47,12 @@ def test_get_config_by_tag_1(admin_auth_headers, admin_aid):
|
||||
)
|
||||
assert r.json() == ["tag-1", "tag-2"]
|
||||
|
||||
|
||||
def test_create_new_config_2(admin_auth_headers, admin_aid):
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawlconfigs/",
|
||||
headers=admin_auth_headers,
|
||||
json=get_sample_crawl_data(["tag-3", "tag-0"])
|
||||
json=get_sample_crawl_data(["tag-3", "tag-0"]),
|
||||
)
|
||||
|
||||
assert r.status_code == 200
|
||||
@ -59,6 +64,7 @@ def test_create_new_config_2(admin_auth_headers, admin_aid):
|
||||
global new_cid_2
|
||||
new_cid_2 = data["added"]
|
||||
|
||||
|
||||
def test_get_config_by_tag_2(admin_auth_headers, admin_aid):
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawlconfigs/tags",
|
||||
@ -66,11 +72,10 @@ def test_get_config_by_tag_2(admin_auth_headers, admin_aid):
|
||||
)
|
||||
assert r.json() == ["tag-0", "tag-1", "tag-2", "tag-3"]
|
||||
|
||||
|
||||
def test_get_config_2(admin_auth_headers, admin_aid):
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawlconfigs/{new_cid_2}",
|
||||
headers=admin_auth_headers,
|
||||
)
|
||||
assert r.json()["tags"] == ["tag-3", "tag-0"]
|
||||
|
||||
|
||||
|
72
backend/test/test_filter_results.py
Normal file
72
backend/test/test_filter_results.py
Normal file
@ -0,0 +1,72 @@
|
||||
import requests
|
||||
|
||||
from .conftest import API_PREFIX
|
||||
|
||||
|
||||
def get_sample_crawl_data():
|
||||
return {
|
||||
"runNow": False,
|
||||
"name": "Test Crawl",
|
||||
"config": {"seeds": ["https://example.com/"]},
|
||||
}
|
||||
|
||||
|
||||
def test_create_new_config_crawler_user(crawler_auth_headers, admin_aid):
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawlconfigs/",
|
||||
headers=crawler_auth_headers,
|
||||
json=get_sample_crawl_data(),
|
||||
)
|
||||
|
||||
assert r.status_code == 200
|
||||
|
||||
data = r.json()
|
||||
assert data["added"]
|
||||
assert data["run_now_job"] == None
|
||||
|
||||
|
||||
def test_get_config_by_user(crawler_auth_headers, admin_aid, crawler_userid):
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawlconfigs?userid={crawler_userid}",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert len(r.json()["crawlConfigs"]) == 1
|
||||
|
||||
|
||||
def test_ensure_crawl_and_admin_user_crawls(
|
||||
admin_aid, crawler_auth_headers, crawler_crawl_id, admin_crawl_id
|
||||
):
|
||||
assert crawler_crawl_id
|
||||
assert admin_crawl_id
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawls",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert len(r.json()["crawls"]) == 2
|
||||
|
||||
|
||||
def test_get_crawl_job_by_user(
|
||||
crawler_auth_headers, admin_aid, crawler_userid, crawler_crawl_id
|
||||
):
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawls?userid={crawler_userid}",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert len(r.json()["crawls"]) == 1
|
||||
|
||||
|
||||
def test_get_crawl_job_by_config(
|
||||
crawler_auth_headers, admin_aid, admin_config_id, crawler_config_id
|
||||
):
|
||||
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawls?cid={admin_config_id}",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert len(r.json()["crawls"]) == 1
|
||||
|
||||
r = requests.get(
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawls?cid={crawler_config_id}",
|
||||
headers=crawler_auth_headers,
|
||||
)
|
||||
assert len(r.json()["crawls"]) == 1
|
@ -8,9 +8,13 @@ def test_admin_get_archive_crawls(admin_auth_headers, admin_aid, admin_crawl_id)
|
||||
f"{API_PREFIX}/archives/{admin_aid}/crawls", headers=admin_auth_headers
|
||||
)
|
||||
data = r.json()
|
||||
assert len(data["crawls"]) > 0
|
||||
assert data["crawls"][0]["id"] == admin_crawl_id
|
||||
assert data["crawls"][0]["aid"] == admin_aid
|
||||
crawls = data["crawls"]
|
||||
crawl_ids = []
|
||||
assert len(crawls) > 0
|
||||
for crawl in crawls:
|
||||
assert crawl["aid"] == admin_aid
|
||||
crawl_ids.append(crawl["id"])
|
||||
assert admin_crawl_id in crawl_ids
|
||||
|
||||
|
||||
def test_viewer_get_archive_crawls(viewer_auth_headers, admin_aid, admin_crawl_id):
|
||||
@ -20,9 +24,10 @@ def test_viewer_get_archive_crawls(viewer_auth_headers, admin_aid, admin_crawl_i
|
||||
data = r.json()
|
||||
crawls = data["crawls"]
|
||||
crawl_ids = []
|
||||
for crawl in crawls:
|
||||
crawl_ids.append(crawl["id"])
|
||||
assert len(crawls) > 0
|
||||
for crawl in crawls:
|
||||
assert crawl["aid"] == admin_aid
|
||||
crawl_ids.append(crawl["id"])
|
||||
assert admin_crawl_id in crawl_ids
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user