Remove workflows from GET profile endpoint + add inUse flag instead (#2703)

Connected to #2661 

- Removes crawl workflows from being returned as part of the profile
response.
- Frontend: removes display of workflows in profile details.
- Adds 'inUse' flag to all profile responses to indicate profile is in
use by at least one workflow
- Adds 'profileid' as possible filter for workflows search in
preparation for filtering by profile id (#2708)
- Make 'profile_in_use' a proper error (returning 400) on profile
delete.

---------

Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
Tessa Walsh 2025-07-02 19:44:12 -04:00 committed by GitHub
parent b915e734d1
commit 5b4fee73e6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 81 additions and 203 deletions

View File

@ -25,7 +25,6 @@ from .models import (
ConfigRevision,
CrawlConfig,
CrawlConfigOut,
CrawlConfigProfileOut,
CrawlOut,
UpdateCrawlConfig,
Organization,
@ -597,6 +596,7 @@ class CrawlConfigOps:
page: int = 1,
created_by: Optional[UUID] = None,
modified_by: Optional[UUID] = None,
profileid: Optional[UUID] = None,
first_seed: Optional[str] = None,
name: Optional[str] = None,
description: Optional[str] = None,
@ -607,7 +607,7 @@ class CrawlConfigOps:
sort_direction: int = -1,
) -> tuple[list[CrawlConfigOut], int]:
"""Get all crawl configs for an organization is a member of"""
# pylint: disable=too-many-locals,too-many-branches
# pylint: disable=too-many-locals,too-many-branches,too-many-statements
# Zero-index page for query
page = page - 1
skip = page * page_size
@ -623,6 +623,9 @@ class CrawlConfigOps:
if modified_by:
match_query["modifiedBy"] = modified_by
if profileid:
match_query["profileid"] = profileid
if name:
match_query["name"] = name
@ -708,25 +711,12 @@ class CrawlConfigOps:
return configs, total
async def get_crawl_config_info_for_profile(
self, profileid: UUID, org: Organization
) -> list[CrawlConfigProfileOut]:
"""Return all crawl configs that are associated with a given profileid"""
query = {"profileid": profileid, "inactive": {"$ne": True}}
if org:
query["oid"] = org.id
results = []
cursor = self.crawl_configs.find(query, projection=["_id"])
workflows = await cursor.to_list(length=1000)
for workflow_dict in workflows:
workflow_out = await self.get_crawl_config_out(
workflow_dict.get("_id"), org
)
results.append(CrawlConfigProfileOut.from_dict(workflow_out.to_dict()))
return results
async def is_profile_in_use(self, profileid: UUID, org: Organization) -> bool:
"""return true/false if any active workflows exist with given profile"""
res = await self.crawl_configs.find_one(
{"profileid": profileid, "inactive": {"$ne": True}, "oid": org.id}
)
return res is not None
async def get_running_crawl(self, cid: UUID) -> Optional[CrawlOut]:
"""Return the id of currently running crawl for this config, if any"""
@ -1371,6 +1361,7 @@ def init_crawl_config_api(
# createdBy, kept as userid for API compatibility
userid: Optional[UUID] = None,
modifiedBy: Optional[UUID] = None,
profileid: Optional[UUID] = None,
firstSeed: Optional[str] = None,
name: Optional[str] = None,
description: Optional[str] = None,
@ -1394,6 +1385,7 @@ def init_crawl_config_api(
org,
created_by=userid,
modified_by=modifiedBy,
profileid=profileid,
first_seed=firstSeed,
name=name,
description=description,

View File

@ -514,15 +514,6 @@ class CrawlConfigOut(CrawlConfigCore, CrawlConfigAdditional):
lastStartedByName: Optional[str] = None
# ============================================================================
class CrawlConfigProfileOut(BaseMongoModel):
"""Crawl Config basic info for profiles"""
name: str
firstSeed: str
seedCount: int
# ============================================================================
class UpdateCrawlConfig(BaseModel):
"""Update crawl config name, crawl schedule, or tags"""
@ -2319,12 +2310,7 @@ class Profile(BaseMongoModel):
crawlerChannel: Optional[str] = None
proxyId: Optional[str] = None
# ============================================================================
class ProfileWithCrawlConfigs(Profile):
"""Profile with list of crawlconfigs using this profile"""
crawlconfigs: List[CrawlConfigProfileOut] = []
inUse: bool = False
# ============================================================================

View File

@ -13,7 +13,6 @@ import aiohttp
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
from .models import (
Profile,
ProfileWithCrawlConfigs,
ProfileFile,
UrlIn,
ProfileLaunchBrowserIn,
@ -31,7 +30,6 @@ from .models import (
SuccessResponseStorageQuota,
ProfilePingResponse,
ProfileBrowserGetUrlResponse,
CrawlConfigProfileOut,
)
from .utils import dt_now
@ -353,33 +351,20 @@ class ProfileOps:
profiles = [Profile.from_dict(res) for res in items]
return profiles, total
async def get_profile(
self, profileid: UUID, org: Optional[Organization] = None
) -> Profile:
async def get_profile(self, profileid: UUID, org: Organization) -> Profile:
"""get profile by id and org"""
query: dict[str, object] = {"_id": profileid}
if org:
query["oid"] = org.id
query: dict[str, object] = {"_id": profileid, "oid": org.id}
res = await self.profiles.find_one(query)
if not res:
raise HTTPException(status_code=404, detail="profile_not_found")
return Profile.from_dict(res)
async def get_profile_with_configs(
self, profileid: UUID, org: Organization
) -> ProfileWithCrawlConfigs:
"""get profile for api output, with crawlconfigs"""
profile = await self.get_profile(profileid, org)
crawlconfigs = await self.get_crawl_configs_for_profile(profileid, org)
return ProfileWithCrawlConfigs(crawlconfigs=crawlconfigs, **profile.dict())
profile = Profile.from_dict(res)
profile.inUse = await self.crawlconfigs.is_profile_in_use(profileid, org)
return profile
async def get_profile_storage_path_and_proxy(
self, profileid: UUID, org: Optional[Organization] = None
self, profileid: UUID, org: Organization
) -> tuple[str, str]:
"""return profile path filename (relative path) for given profile id and org"""
try:
@ -392,9 +377,7 @@ class ProfileOps:
return "", ""
async def get_profile_name(
self, profileid: UUID, org: Optional[Organization] = None
) -> str:
async def get_profile_name(self, profileid: UUID, org: Organization) -> str:
"""return profile for given profile id and org"""
try:
profile = await self.get_profile(profileid, org)
@ -405,25 +388,14 @@ class ProfileOps:
return ""
async def get_crawl_configs_for_profile(
self, profileid: UUID, org: Organization
) -> list[CrawlConfigProfileOut]:
"""Get list of crawl configs with basic info for that use a particular profile"""
crawlconfig_info = await self.crawlconfigs.get_crawl_config_info_for_profile(
profileid, org
)
return crawlconfig_info
async def delete_profile(
self, profileid: UUID, org: Organization
) -> dict[str, Any]:
"""delete profile, if not used in active crawlconfig"""
profile = await self.get_profile_with_configs(profileid, org)
profile = await self.get_profile(profileid, org)
if len(profile.crawlconfigs) > 0:
return {"error": "in_use", "crawlconfigs": profile.crawlconfigs}
if profile.inUse:
raise HTTPException(status_code=400, detail="profile_in_use")
query: dict[str, object] = {"_id": profileid}
if org:
@ -571,7 +543,7 @@ def init_profiles_api(
else:
metadata = await browser_get_metadata(browser_commit.browserid, org)
profile = await ops.get_profile(profileid)
profile = await ops.get_profile(profileid, org)
await ops.commit_to_profile(
browser_commit=ProfileCreate(
browserid=browser_commit.browserid,
@ -588,12 +560,12 @@ def init_profiles_api(
return {"updated": True}
@router.get("/{profileid}", response_model=ProfileWithCrawlConfigs)
@router.get("/{profileid}", response_model=Profile)
async def get_profile(
profileid: UUID,
org: Organization = Depends(org_crawl_dep),
):
return await ops.get_profile_with_configs(profileid, org)
return await ops.get_profile(profileid, org)
@router.delete("/{profileid}", response_model=SuccessResponseStorageQuota)
async def delete_profile(

View File

@ -144,8 +144,6 @@ def profile_config_id(admin_auth_headers, default_org_id, profile_id):
assert resource["storage"]["name"]
assert resource.get("replicas") or resource.get("replicas") == []
assert data.get("crawlconfigs") == []
# Use profile in a workflow
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
@ -207,7 +205,7 @@ def test_commit_browser_to_new_profile(admin_auth_headers, default_org_id, profi
def test_get_profile(admin_auth_headers, default_org_id, profile_id, profile_config_id):
start_time = time.monotonic()
time_limit = 10
# Check get endpoint again and check that crawlconfigs is updated
# Check get endpoint again and check that inUse is updated
while True:
try:
r = requests.get(
@ -239,13 +237,8 @@ def test_get_profile(admin_auth_headers, default_org_id, profile_id, profile_con
assert resource["storage"]["name"]
assert resource.get("replicas") or resource.get("replicas") == []
crawl_configs = data.get("crawlconfigs")
assert crawl_configs
assert len(crawl_configs) == 1
assert crawl_configs[0]["id"] == profile_config_id
assert crawl_configs[0]["name"] == "Profile Test Crawl"
assert crawl_configs[0]["firstSeed"] == "https://webrecorder.net/"
assert crawl_configs[0]["seedCount"] == 1
assert "crawlconfigs" not in data
assert data["inUse"] == True
break
except:
if time.monotonic() - start_time > time_limit:
@ -260,7 +253,6 @@ def test_commit_second_profile(profile_2_id):
def test_list_profiles(admin_auth_headers, default_org_id, profile_id, profile_2_id):
start_time = time.monotonic()
time_limit = 10
# Check get endpoint again and check that crawlconfigs is updated
while True:
try:
r = requests.get(

View File

@ -1,12 +1,12 @@
import { localized, msg, str } from "@lit/localize";
import { html, nothing, type TemplateResult } from "lit";
import { html, nothing } from "lit";
import { customElement, property, query, state } from "lit/decorators.js";
import { ifDefined } from "lit/directives/if-defined.js";
import { when } from "lit/directives/when.js";
import capitalize from "lodash/fp/capitalize";
import queryString from "query-string";
import type { Profile, ProfileWorkflow } from "./types";
import type { Profile } from "./types";
import { BtrixElement } from "@/classes/BtrixElement";
import type { Dialog } from "@/components/ui/dialog";
@ -16,7 +16,6 @@ import { pageNav } from "@/layouts/pageHeader";
import { isApiError } from "@/utils/api";
import { maxLengthValidator } from "@/utils/form";
import { isArchivingDisabled } from "@/utils/orgs";
import { pluralOf } from "@/utils/pluralize";
import { richText } from "@/utils/rich-text";
const DESCRIPTION_MAXLENGTH = 500;
@ -263,17 +262,6 @@ export class BrowserProfilesDetail extends BtrixElement {
>
</section>
<section class="mb-7">
<h2 class="mb-2 text-lg font-medium leading-none">
${msg("Crawl Workflows")}${this.profile?.crawlconfigs?.length
? html`<span class="font-normal text-neutral-500">
(${this.localize.number(this.profile.crawlconfigs.length)})
</span>`
: nothing}
</h2>
${this.renderCrawlWorkflows()}
</section>
<btrix-dialog id="discardChangesDialog" .label=${msg("Cancel Editing?")}>
${msg(
"Are you sure you want to discard changes to this browser profile?",
@ -323,52 +311,6 @@ export class BrowserProfilesDetail extends BtrixElement {
return pageNav(breadcrumbs);
}
private renderCrawlWorkflows() {
if (this.profile?.crawlconfigs?.length) {
return html`<ul>
${this.profile.crawlconfigs.map(
(workflow) => html`
<li
class="border-x border-b first:rounded-t first:border-t last:rounded-b"
>
<a
class="block p-2 transition-colors focus-within:bg-neutral-50 hover:bg-neutral-50"
href=${`${this.navigate.orgBasePath}/workflows/${workflow.id}`}
@click=${this.navigate.link}
>
${this.renderWorkflowName(workflow)}
</a>
</li>
`,
)}
</ul>`;
}
return html`<div class="rounded border p-5 text-center text-neutral-400">
${msg("Not used in any crawl workflows.")}
</div>`;
}
private renderWorkflowName(workflow: ProfileWorkflow) {
if (workflow.name)
return html`<span class="truncate">${workflow.name}</span>`;
if (!workflow.firstSeed)
return html`<span class="truncate font-mono">${workflow.id}</span>
<span class="text-neutral-400">${msg("(no name)")}</span>`;
const remainder = workflow.seedCount - 1;
let nameSuffix: string | TemplateResult<1> = "";
if (remainder) {
nameSuffix = html`<span class="ml-2 text-neutral-500"
>+${this.localize.number(remainder, { notation: "compact" })}
${pluralOf("URLs", remainder)}</span
>`;
}
return html`
<span class="primaryUrl truncate">${workflow.firstSeed}</span
>${nameSuffix}
`;
}
private readonly renderVisitedSites = () => {
return html`
<section class="flex-grow-1 flex flex-col lg:w-[60ch]">
@ -612,36 +554,36 @@ export class BrowserProfilesDetail extends BtrixElement {
const profileName = this.profile!.name;
try {
const data = await this.api.fetch<Profile & { error: boolean }>(
await this.api.fetch<Profile>(
`/orgs/${this.orgId}/profiles/${this.profile!.id}`,
{
method: "DELETE",
},
);
if (data.error && data.crawlconfigs) {
this.notify.toast({
message: msg(
html`Could not delete <strong>${profileName}</strong>, in use by
<strong
>${data.crawlconfigs.map(({ name }) => name).join(", ")}</strong
>. Please remove browser profile from Workflow to continue.`,
),
variant: "warning",
duration: 15000,
});
} else {
this.navigate.to(`${this.navigate.orgBasePath}/browser-profiles`);
this.navigate.to(`${this.navigate.orgBasePath}/browser-profiles`);
this.notify.toast({
message: msg(html`Deleted <strong>${profileName}</strong>.`),
variant: "success",
icon: "check2-circle",
});
}
} catch (e) {
this.notify.toast({
message: msg("Sorry, couldn't delete browser profile at this time."),
message: msg(html`Deleted <strong>${profileName}</strong>.`),
variant: "success",
icon: "check2-circle",
});
} catch (e) {
let message = msg(
html`Sorry, couldn't delete browser profile at this time.`,
);
if (isApiError(e)) {
if (e.message === "profile_in_use") {
message = msg(
html`Could not delete <strong>${profileName}</strong>, currently in
use. Please remove browser profile from all crawl workflows to
continue.`,
);
}
}
this.notify.toast({
message: message,
variant: "danger",
icon: "exclamation-octagon",
id: "browser-profile-error",

View File

@ -23,6 +23,7 @@ import type {
APISortQuery,
} from "@/types/api";
import type { Browser } from "@/types/browser";
import { isApiError } from "@/utils/api";
import { html } from "@/utils/LiteElement";
import { isArchivingDisabled } from "@/utils/orgs";
import { tw } from "@/utils/tailwind";
@ -382,40 +383,40 @@ export class BrowserProfilesList extends BtrixElement {
private async deleteProfile(profile: Profile) {
try {
const data = await this.api.fetch<Profile & { error?: boolean }>(
await this.api.fetch<{ error?: boolean }>(
`/orgs/${this.orgId}/profiles/${profile.id}`,
{
method: "DELETE",
},
);
if (data.error && data.crawlconfigs) {
this.notify.toast({
message: msg(
html`Could not delete <strong>${profile.name}</strong>, in use by
<strong
>${data.crawlconfigs.map(({ name }) => name).join(", ")}</strong
>. Please remove browser profile from Workflow to continue.`,
),
variant: "warning",
duration: 15000,
});
} else {
this.notify.toast({
message: msg(html`Deleted <strong>${profile.name}</strong>.`),
variant: "success",
icon: "check2-circle",
id: "browser-profile-deleted-status",
});
void this.fetchBrowserProfiles();
}
} catch (e) {
this.notify.toast({
message: msg("Sorry, couldn't delete browser profile at this time."),
message: msg(html`Deleted <strong>${profile.name}</strong>.`),
variant: "success",
icon: "check2-circle",
id: "browser-profile-deleted-status",
});
void this.fetchBrowserProfiles();
} catch (e) {
let message = msg(
html`Sorry, couldn't delete browser profile at this time.`,
);
if (isApiError(e)) {
if (e.message === "profile_in_use") {
message = msg(
html`Could not delete <strong>${profile.name}</strong>, currently in
use. Please remove browser profile from all crawl workflows to
continue.`,
);
}
}
this.notify.toast({
message: message,
variant: "danger",
icon: "exclamation-octagon",
id: "browser-profile-deleted-status",
id: "browser-profile-error",
});
}
}

View File

@ -113,13 +113,6 @@ export type ProfileReplica = {
custom?: boolean;
};
export type ProfileWorkflow = {
id: string;
name: string;
firstSeed: string;
seedCount: number;
};
export type Profile = {
id: string;
name: string;
@ -132,7 +125,7 @@ export type Profile = {
profileId: string;
baseProfileName: string;
oid: string;
crawlconfigs?: ProfileWorkflow[];
inUse: boolean;
resource?: {
name: string;
path: string;