Backend: Add modified field and track created/modifier users for profiles (#1820)

This PR introduces backend changes that add the following fields to the
Profile model:
- `modified`
- `modifiedBy`
- `modifiedByName`
- `createdBy`
- `createdByName`

Modified fields are set to the same as the created fields when the
resource is created, and changed when the profile is updated (profile
itself or metadata).

The list profiles endpoint now also supports `sortBy` and
`sortDirection` options. The endpoint defaults to sorting by `modified`
in descending order, but can also sort on `created` and `name`.

Tests have also been updated to reflect all new behavior.
This commit is contained in:
Tessa Walsh 2024-05-28 17:25:22 -04:00 committed by GitHub
parent 6a94c64fa2
commit 7e5d742fd1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 283 additions and 84 deletions

View File

@ -1183,6 +1183,12 @@ class Profile(BaseMongoModel):
resource: Optional[ProfileFile]
created: Optional[datetime]
createdBy: Optional[UUID] = None
createdByName: Optional[str] = None
modified: Optional[datetime] = None
modifiedBy: Optional[UUID] = None
modifiedByName: Optional[str] = None
baseid: Optional[UUID] = None
crawlerChannel: Optional[str]

View File

@ -1,6 +1,6 @@
""" Profile Management """
from typing import Optional, TYPE_CHECKING, Any, cast
from typing import Optional, TYPE_CHECKING, Any, cast, Dict, List
from datetime import datetime
from uuid import UUID, uuid4
import os
@ -155,12 +155,25 @@ class ProfileOps:
self,
browser_commit: ProfileCreate,
storage: StorageRef,
user: User,
metadata: dict,
profileid: Optional[UUID] = None,
existing_profile: Optional[Profile] = None,
) -> dict[str, Any]:
"""commit profile and shutdown profile browser"""
if not profileid:
# pylint: disable=too-many-locals
now = datetime.utcnow().replace(microsecond=0, tzinfo=None)
if existing_profile:
profileid = existing_profile.id
created = existing_profile.created
created_by = existing_profile.createdBy
created_by_name = existing_profile.createdByName
else:
profileid = uuid4()
created = now
created_by = user.id
created_by_name = user.name if user.name else user.email
filename_data = {"filename": f"profiles/profile-{profileid}.tar.gz"}
@ -200,7 +213,12 @@ class ProfileOps:
id=profileid,
name=browser_commit.name,
description=browser_commit.description,
created=datetime.utcnow().replace(microsecond=0, tzinfo=None),
created=created,
createdBy=created_by,
createdByName=created_by_name,
modified=now,
modifiedBy=user.id,
modifiedByName=user.name if user.name else user.email,
origins=json["origins"],
resource=profile_file,
userid=UUID(metadata.get("btrix.user")),
@ -225,9 +243,17 @@ class ProfileOps:
"storageQuotaReached": quota_reached,
}
async def update_profile_metadata(self, profileid: UUID, update: ProfileUpdate):
async def update_profile_metadata(
self, profileid: UUID, update: ProfileUpdate, user: User
):
"""Update name and description metadata only on existing profile"""
query = {"name": update.name}
query = {
"name": update.name,
"modified": datetime.utcnow().replace(microsecond=0, tzinfo=None),
"modifiedBy": user.id,
"modifiedByName": user.name if user.name else user.email,
}
if update.description is not None:
query["description"] = update.description
@ -244,22 +270,55 @@ class ProfileOps:
userid: Optional[UUID] = None,
page_size: int = DEFAULT_PAGE_SIZE,
page: int = 1,
sort_by: str = "modified",
sort_direction: int = -1,
):
"""list all profiles"""
# pylint: disable=too-many-locals
# Zero-index page for query
page = page - 1
skip = page_size * page
query = {"oid": org.id}
match_query = {"oid": org.id}
if userid:
query["userid"] = userid
match_query["userid"] = userid
total = await self.profiles.count_documents(query)
aggregate: List[Dict[str, Any]] = [{"$match": match_query}]
cursor = self.profiles.find(query, skip=skip, limit=page_size)
results = await cursor.to_list(length=page_size)
profiles = [Profile.from_dict(res) for res in results]
if sort_by:
if sort_by not in ("modified", "created", "name"):
raise HTTPException(status_code=400, detail="invalid_sort_by")
if sort_direction not in (1, -1):
raise HTTPException(status_code=400, detail="invalid_sort_direction")
aggregate.extend([{"$sort": {sort_by: sort_direction}}])
aggregate.extend(
[
{
"$facet": {
"items": [
{"$skip": skip},
{"$limit": page_size},
],
"total": [{"$count": "count"}],
}
},
]
)
cursor = self.profiles.aggregate(aggregate)
results = await cursor.to_list(length=1)
result = results[0]
items = result["items"]
try:
total = int(result["total"][0]["count"])
except (IndexError, ValueError):
total = 0
profiles = [Profile.from_dict(res) for res in items]
return profiles, total
async def get_profile(self, profileid: UUID, org: Optional[Organization] = None):
@ -414,9 +473,16 @@ def init_profiles_api(
userid: Optional[UUID] = None,
pageSize: int = DEFAULT_PAGE_SIZE,
page: int = 1,
sortBy: str = "modified",
sortDirection: int = -1,
):
profiles, total = await ops.list_profiles(
org, userid, page_size=pageSize, page=page
org,
userid,
page_size=pageSize,
page=page,
sort_by=sortBy,
sort_direction=sortDirection,
)
return paginated_format(profiles, total, page, pageSize)
@ -424,19 +490,21 @@ def init_profiles_api(
async def commit_browser_to_new(
browser_commit: ProfileCreate,
org: Organization = Depends(org_crawl_dep),
user: User = Depends(user_dep),
):
metadata = await browser_get_metadata(browser_commit.browserid, org)
return await ops.commit_to_profile(browser_commit, org.storage, metadata)
return await ops.commit_to_profile(browser_commit, org.storage, user, metadata)
@router.patch("/{profileid}")
async def commit_browser_to_existing(
browser_commit: ProfileUpdate,
profileid: UUID,
org: Organization = Depends(org_crawl_dep),
user: User = Depends(user_dep),
):
if not browser_commit.browserid:
await ops.update_profile_metadata(profileid, browser_commit)
await ops.update_profile_metadata(profileid, browser_commit, user)
else:
metadata = await browser_get_metadata(browser_commit.browserid, org)
@ -449,8 +517,9 @@ def init_profiles_api(
crawlerChannel=profile.crawlerChannel,
),
storage=org.storage,
user=user,
metadata=metadata,
profileid=profileid,
existing_profile=profile,
)
return {"updated": True}

View File

@ -122,6 +122,11 @@ def profile_config_id(admin_auth_headers, default_org_id, profile_id):
assert data["oid"] == default_org_id
assert data.get("origins") or data.get("origins") == []
assert data["created"]
assert data["createdBy"]
assert data["createdByName"] == "admin"
assert data["modified"]
assert data["modifiedBy"]
assert data["modifiedByName"] == "admin"
assert not data["baseid"]
resource = data["resource"]
@ -212,6 +217,11 @@ def test_get_profile(admin_auth_headers, default_org_id, profile_id, profile_con
assert data["oid"] == default_org_id
assert data.get("origins") or data.get("origins") == []
assert data["created"]
assert data["createdBy"]
assert data["createdByName"] == "admin"
assert data["modified"]
assert data["modifiedBy"]
assert data["modifiedByName"] == "admin"
assert not data["baseid"]
resource = data["resource"]
@ -256,9 +266,34 @@ def test_list_profiles(admin_auth_headers, default_org_id, profile_id, profile_2
profiles = data["items"]
assert len(profiles) == 2
profile_1 = [
profile for profile in profiles if profile["id"] == profile_id
][0]
# Second profile should be listed first by default because it was
# modified more recently
profile_2 = profiles[0]
assert profile_2["id"] == profile_2_id
assert profile_2["name"] == PROFILE_2_NAME
assert profile_2["description"] == PROFILE_2_DESC
assert profile_2["userid"]
assert profile_2["oid"] == default_org_id
assert profile_2.get("origins") or data.get("origins") == []
assert profile_2["created"]
assert profile_2["createdBy"]
assert profile_2["createdByName"] == "admin"
assert profile_2["modified"]
assert profile_2["modifiedBy"]
assert profile_2["modifiedByName"] == "admin"
assert not profile_2["baseid"]
resource = profile_2["resource"]
assert resource
assert resource["filename"]
assert resource["hash"]
assert resource["size"]
assert resource["storage"]
assert resource["storage"]["name"]
assert resource.get("replicas") or resource.get("replicas") == []
# First profile should be listed second by default because it was
# modified less recently
profile_1 = profiles[1]
assert profile_1["id"] == profile_id
assert profile_1["name"] == PROFILE_NAME
assert profile_1["description"] == PROFILE_DESC
@ -266,6 +301,11 @@ def test_list_profiles(admin_auth_headers, default_org_id, profile_id, profile_2
assert profile_1["oid"] == default_org_id
assert profile_1.get("origins") or data.get("origins") == []
assert profile_1["created"]
assert profile_1["createdBy"]
assert profile_1["createdByName"] == "admin"
assert profile_1["modified"]
assert profile_1["modifiedBy"]
assert profile_1["modifiedByName"] == "admin"
assert not profile_1["baseid"]
resource = profile_1["resource"]
assert resource
@ -276,25 +316,157 @@ def test_list_profiles(admin_auth_headers, default_org_id, profile_id, profile_2
assert resource["storage"]["name"]
assert resource.get("replicas") or resource.get("replicas") == []
profile_2 = [
profile for profile in profiles if profile["id"] == profile_2_id
][0]
break
except:
if time.monotonic() - start_time > time_limit:
raise
time.sleep(1)
def test_update_profile_metadata(crawler_auth_headers, default_org_id, profile_id):
# Get original created/modified times
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
data = r.json()
original_created = data["created"]
original_modified = data["modified"]
# Update name and description
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=crawler_auth_headers,
json={
"name": PROFILE_NAME_UPDATED,
"description": PROFILE_DESC_UPDATED,
},
)
assert r.status_code == 200
assert r.json()["updated"]
time.sleep(5)
# Verify update
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["id"] == profile_id
assert data["name"] == PROFILE_NAME_UPDATED
assert data["description"] == PROFILE_DESC_UPDATED
# Ensure modified was updated but created was not
assert data["modified"] > original_modified
assert data["modifiedBy"]
assert data["modifiedByName"] == "new-crawler"
assert data["created"] == original_created
assert data["createdBy"]
assert data["createdByName"] == "admin"
def test_commit_browser_to_existing_profile(
admin_auth_headers, default_org_id, profile_browser_3_id, profile_id
):
# Get original modified time
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
original_created = data["created"]
original_modified = data["modified"]
prepare_browser_for_profile_commit(
profile_browser_3_id, admin_auth_headers, default_org_id
)
# Commit new browser to existing profile
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=admin_auth_headers,
json={
"browserid": profile_browser_3_id,
"name": PROFILE_NAME_UPDATED,
"description": PROFILE_DESC_UPDATED,
},
)
assert r.status_code == 200
assert r.json()["updated"]
time.sleep(5)
# Ensure modified was updated but created was not
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["modified"] > original_modified
assert data["modifiedBy"]
assert data["modifiedByName"] == "admin"
assert data["created"] == original_created
assert data["createdBy"]
assert data["createdByName"] == "admin"
@pytest.mark.parametrize(
"sort_by,sort_direction,profile_1_index,profile_2_index",
[
# Modified, descending
("modified", -1, 0, 1),
# Modified, ascending
("modified", 1, 1, 0),
# Created, descending
("created", -1, 1, 0),
# Created, ascending
("created", 1, 0, 1),
# Name, descending
("name", -1, 0, 1),
# Name, ascending
("name", 1, 1, 0),
],
)
def test_sort_profiles(
admin_auth_headers,
default_org_id,
profile_id,
profile_2_id,
sort_by,
sort_direction,
profile_1_index,
profile_2_index,
):
start_time = time.monotonic()
time_limit = 10
while True:
try:
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/profiles?sortBy={sort_by}&sortDirection={sort_direction}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 2
profiles = data["items"]
assert len(profiles) == 2
profile_1 = profiles[profile_1_index]
assert profile_1["id"] == profile_id
assert profile_1["name"] == PROFILE_NAME_UPDATED
profile_2 = profiles[profile_2_index]
assert profile_2["id"] == profile_2_id
assert profile_2["name"] == PROFILE_2_NAME
assert profile_2["description"] == PROFILE_2_DESC
assert profile_2["userid"]
assert profile_2["oid"] == default_org_id
assert profile_2.get("origins") or data.get("origins") == []
assert profile_2["created"]
assert not profile_2["baseid"]
resource = profile_2["resource"]
assert resource
assert resource["filename"]
assert resource["hash"]
assert resource["size"]
assert resource["storage"]
assert resource["storage"]["name"]
assert resource.get("replicas") or resource.get("replicas") == []
break
except:
if time.monotonic() - start_time > time_limit:
@ -326,51 +498,3 @@ def test_delete_profile(admin_auth_headers, default_org_id, profile_2_id):
)
assert r.status_code == 404
assert r.json()["detail"] == "profile_not_found"
def test_update_profile_metadata(admin_auth_headers, default_org_id, profile_id):
# Update name and description
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=admin_auth_headers,
json={
"name": PROFILE_NAME_UPDATED,
"description": PROFILE_DESC_UPDATED,
},
)
assert r.status_code == 200
assert r.json()["updated"]
time.sleep(5)
# Verify update
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=admin_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["id"] == profile_id
assert data["name"] == PROFILE_NAME_UPDATED
assert data["description"] == PROFILE_DESC_UPDATED
def test_commit_browser_to_existing_profile(
admin_auth_headers, default_org_id, profile_browser_3_id, profile_id
):
prepare_browser_for_profile_commit(
profile_browser_3_id, admin_auth_headers, default_org_id
)
# Commit new browser to existing profile
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}",
headers=admin_auth_headers,
json={
"browserid": profile_browser_3_id,
"name": PROFILE_NAME_UPDATED,
"description": PROFILE_DESC_UPDATED,
},
)
assert r.status_code == 200
assert r.json()["updated"]