Remove workflows from GET profile endpoint + add inUse flag instead (#2703)
Connected to #2661 - Removes crawl workflows from being returned as part of the profile response. - Frontend: removes display of workflows in profile details. - Adds 'inUse' flag to all profile responses to indicate profile is in use by at least one workflow - Adds 'profileid' as possible filter for workflows search in preparation for filtering by profile id (#2708) - Make 'profile_in_use' a proper error (returning 400) on profile delete. --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
parent
b915e734d1
commit
5b4fee73e6
@ -25,7 +25,6 @@ from .models import (
|
||||
ConfigRevision,
|
||||
CrawlConfig,
|
||||
CrawlConfigOut,
|
||||
CrawlConfigProfileOut,
|
||||
CrawlOut,
|
||||
UpdateCrawlConfig,
|
||||
Organization,
|
||||
@ -597,6 +596,7 @@ class CrawlConfigOps:
|
||||
page: int = 1,
|
||||
created_by: Optional[UUID] = None,
|
||||
modified_by: Optional[UUID] = None,
|
||||
profileid: Optional[UUID] = None,
|
||||
first_seed: Optional[str] = None,
|
||||
name: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
@ -607,7 +607,7 @@ class CrawlConfigOps:
|
||||
sort_direction: int = -1,
|
||||
) -> tuple[list[CrawlConfigOut], int]:
|
||||
"""Get all crawl configs for an organization is a member of"""
|
||||
# pylint: disable=too-many-locals,too-many-branches
|
||||
# pylint: disable=too-many-locals,too-many-branches,too-many-statements
|
||||
# Zero-index page for query
|
||||
page = page - 1
|
||||
skip = page * page_size
|
||||
@ -623,6 +623,9 @@ class CrawlConfigOps:
|
||||
if modified_by:
|
||||
match_query["modifiedBy"] = modified_by
|
||||
|
||||
if profileid:
|
||||
match_query["profileid"] = profileid
|
||||
|
||||
if name:
|
||||
match_query["name"] = name
|
||||
|
||||
@ -708,25 +711,12 @@ class CrawlConfigOps:
|
||||
|
||||
return configs, total
|
||||
|
||||
async def get_crawl_config_info_for_profile(
|
||||
self, profileid: UUID, org: Organization
|
||||
) -> list[CrawlConfigProfileOut]:
|
||||
"""Return all crawl configs that are associated with a given profileid"""
|
||||
query = {"profileid": profileid, "inactive": {"$ne": True}}
|
||||
if org:
|
||||
query["oid"] = org.id
|
||||
|
||||
results = []
|
||||
|
||||
cursor = self.crawl_configs.find(query, projection=["_id"])
|
||||
workflows = await cursor.to_list(length=1000)
|
||||
for workflow_dict in workflows:
|
||||
workflow_out = await self.get_crawl_config_out(
|
||||
workflow_dict.get("_id"), org
|
||||
)
|
||||
results.append(CrawlConfigProfileOut.from_dict(workflow_out.to_dict()))
|
||||
|
||||
return results
|
||||
async def is_profile_in_use(self, profileid: UUID, org: Organization) -> bool:
|
||||
"""return true/false if any active workflows exist with given profile"""
|
||||
res = await self.crawl_configs.find_one(
|
||||
{"profileid": profileid, "inactive": {"$ne": True}, "oid": org.id}
|
||||
)
|
||||
return res is not None
|
||||
|
||||
async def get_running_crawl(self, cid: UUID) -> Optional[CrawlOut]:
|
||||
"""Return the id of currently running crawl for this config, if any"""
|
||||
@ -1371,6 +1361,7 @@ def init_crawl_config_api(
|
||||
# createdBy, kept as userid for API compatibility
|
||||
userid: Optional[UUID] = None,
|
||||
modifiedBy: Optional[UUID] = None,
|
||||
profileid: Optional[UUID] = None,
|
||||
firstSeed: Optional[str] = None,
|
||||
name: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
@ -1394,6 +1385,7 @@ def init_crawl_config_api(
|
||||
org,
|
||||
created_by=userid,
|
||||
modified_by=modifiedBy,
|
||||
profileid=profileid,
|
||||
first_seed=firstSeed,
|
||||
name=name,
|
||||
description=description,
|
||||
|
@ -514,15 +514,6 @@ class CrawlConfigOut(CrawlConfigCore, CrawlConfigAdditional):
|
||||
lastStartedByName: Optional[str] = None
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class CrawlConfigProfileOut(BaseMongoModel):
|
||||
"""Crawl Config basic info for profiles"""
|
||||
|
||||
name: str
|
||||
firstSeed: str
|
||||
seedCount: int
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class UpdateCrawlConfig(BaseModel):
|
||||
"""Update crawl config name, crawl schedule, or tags"""
|
||||
@ -2319,12 +2310,7 @@ class Profile(BaseMongoModel):
|
||||
crawlerChannel: Optional[str] = None
|
||||
proxyId: Optional[str] = None
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class ProfileWithCrawlConfigs(Profile):
|
||||
"""Profile with list of crawlconfigs using this profile"""
|
||||
|
||||
crawlconfigs: List[CrawlConfigProfileOut] = []
|
||||
inUse: bool = False
|
||||
|
||||
|
||||
# ============================================================================
|
||||
|
@ -13,7 +13,6 @@ import aiohttp
|
||||
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
|
||||
from .models import (
|
||||
Profile,
|
||||
ProfileWithCrawlConfigs,
|
||||
ProfileFile,
|
||||
UrlIn,
|
||||
ProfileLaunchBrowserIn,
|
||||
@ -31,7 +30,6 @@ from .models import (
|
||||
SuccessResponseStorageQuota,
|
||||
ProfilePingResponse,
|
||||
ProfileBrowserGetUrlResponse,
|
||||
CrawlConfigProfileOut,
|
||||
)
|
||||
from .utils import dt_now
|
||||
|
||||
@ -353,33 +351,20 @@ class ProfileOps:
|
||||
profiles = [Profile.from_dict(res) for res in items]
|
||||
return profiles, total
|
||||
|
||||
async def get_profile(
|
||||
self, profileid: UUID, org: Optional[Organization] = None
|
||||
) -> Profile:
|
||||
async def get_profile(self, profileid: UUID, org: Organization) -> Profile:
|
||||
"""get profile by id and org"""
|
||||
query: dict[str, object] = {"_id": profileid}
|
||||
if org:
|
||||
query["oid"] = org.id
|
||||
query: dict[str, object] = {"_id": profileid, "oid": org.id}
|
||||
|
||||
res = await self.profiles.find_one(query)
|
||||
if not res:
|
||||
raise HTTPException(status_code=404, detail="profile_not_found")
|
||||
|
||||
return Profile.from_dict(res)
|
||||
|
||||
async def get_profile_with_configs(
|
||||
self, profileid: UUID, org: Organization
|
||||
) -> ProfileWithCrawlConfigs:
|
||||
"""get profile for api output, with crawlconfigs"""
|
||||
|
||||
profile = await self.get_profile(profileid, org)
|
||||
|
||||
crawlconfigs = await self.get_crawl_configs_for_profile(profileid, org)
|
||||
|
||||
return ProfileWithCrawlConfigs(crawlconfigs=crawlconfigs, **profile.dict())
|
||||
profile = Profile.from_dict(res)
|
||||
profile.inUse = await self.crawlconfigs.is_profile_in_use(profileid, org)
|
||||
return profile
|
||||
|
||||
async def get_profile_storage_path_and_proxy(
|
||||
self, profileid: UUID, org: Optional[Organization] = None
|
||||
self, profileid: UUID, org: Organization
|
||||
) -> tuple[str, str]:
|
||||
"""return profile path filename (relative path) for given profile id and org"""
|
||||
try:
|
||||
@ -392,9 +377,7 @@ class ProfileOps:
|
||||
|
||||
return "", ""
|
||||
|
||||
async def get_profile_name(
|
||||
self, profileid: UUID, org: Optional[Organization] = None
|
||||
) -> str:
|
||||
async def get_profile_name(self, profileid: UUID, org: Organization) -> str:
|
||||
"""return profile for given profile id and org"""
|
||||
try:
|
||||
profile = await self.get_profile(profileid, org)
|
||||
@ -405,25 +388,14 @@ class ProfileOps:
|
||||
|
||||
return ""
|
||||
|
||||
async def get_crawl_configs_for_profile(
|
||||
self, profileid: UUID, org: Organization
|
||||
) -> list[CrawlConfigProfileOut]:
|
||||
"""Get list of crawl configs with basic info for that use a particular profile"""
|
||||
|
||||
crawlconfig_info = await self.crawlconfigs.get_crawl_config_info_for_profile(
|
||||
profileid, org
|
||||
)
|
||||
|
||||
return crawlconfig_info
|
||||
|
||||
async def delete_profile(
|
||||
self, profileid: UUID, org: Organization
|
||||
) -> dict[str, Any]:
|
||||
"""delete profile, if not used in active crawlconfig"""
|
||||
profile = await self.get_profile_with_configs(profileid, org)
|
||||
profile = await self.get_profile(profileid, org)
|
||||
|
||||
if len(profile.crawlconfigs) > 0:
|
||||
return {"error": "in_use", "crawlconfigs": profile.crawlconfigs}
|
||||
if profile.inUse:
|
||||
raise HTTPException(status_code=400, detail="profile_in_use")
|
||||
|
||||
query: dict[str, object] = {"_id": profileid}
|
||||
if org:
|
||||
@ -571,7 +543,7 @@ def init_profiles_api(
|
||||
|
||||
else:
|
||||
metadata = await browser_get_metadata(browser_commit.browserid, org)
|
||||
profile = await ops.get_profile(profileid)
|
||||
profile = await ops.get_profile(profileid, org)
|
||||
await ops.commit_to_profile(
|
||||
browser_commit=ProfileCreate(
|
||||
browserid=browser_commit.browserid,
|
||||
@ -588,12 +560,12 @@ def init_profiles_api(
|
||||
|
||||
return {"updated": True}
|
||||
|
||||
@router.get("/{profileid}", response_model=ProfileWithCrawlConfigs)
|
||||
@router.get("/{profileid}", response_model=Profile)
|
||||
async def get_profile(
|
||||
profileid: UUID,
|
||||
org: Organization = Depends(org_crawl_dep),
|
||||
):
|
||||
return await ops.get_profile_with_configs(profileid, org)
|
||||
return await ops.get_profile(profileid, org)
|
||||
|
||||
@router.delete("/{profileid}", response_model=SuccessResponseStorageQuota)
|
||||
async def delete_profile(
|
||||
|
@ -144,8 +144,6 @@ def profile_config_id(admin_auth_headers, default_org_id, profile_id):
|
||||
assert resource["storage"]["name"]
|
||||
assert resource.get("replicas") or resource.get("replicas") == []
|
||||
|
||||
assert data.get("crawlconfigs") == []
|
||||
|
||||
# Use profile in a workflow
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
|
||||
@ -207,7 +205,7 @@ def test_commit_browser_to_new_profile(admin_auth_headers, default_org_id, profi
|
||||
def test_get_profile(admin_auth_headers, default_org_id, profile_id, profile_config_id):
|
||||
start_time = time.monotonic()
|
||||
time_limit = 10
|
||||
# Check get endpoint again and check that crawlconfigs is updated
|
||||
# Check get endpoint again and check that inUse is updated
|
||||
while True:
|
||||
try:
|
||||
r = requests.get(
|
||||
@ -239,13 +237,8 @@ def test_get_profile(admin_auth_headers, default_org_id, profile_id, profile_con
|
||||
assert resource["storage"]["name"]
|
||||
assert resource.get("replicas") or resource.get("replicas") == []
|
||||
|
||||
crawl_configs = data.get("crawlconfigs")
|
||||
assert crawl_configs
|
||||
assert len(crawl_configs) == 1
|
||||
assert crawl_configs[0]["id"] == profile_config_id
|
||||
assert crawl_configs[0]["name"] == "Profile Test Crawl"
|
||||
assert crawl_configs[0]["firstSeed"] == "https://webrecorder.net/"
|
||||
assert crawl_configs[0]["seedCount"] == 1
|
||||
assert "crawlconfigs" not in data
|
||||
assert data["inUse"] == True
|
||||
break
|
||||
except:
|
||||
if time.monotonic() - start_time > time_limit:
|
||||
@ -260,7 +253,6 @@ def test_commit_second_profile(profile_2_id):
|
||||
def test_list_profiles(admin_auth_headers, default_org_id, profile_id, profile_2_id):
|
||||
start_time = time.monotonic()
|
||||
time_limit = 10
|
||||
# Check get endpoint again and check that crawlconfigs is updated
|
||||
while True:
|
||||
try:
|
||||
r = requests.get(
|
||||
|
@ -1,12 +1,12 @@
|
||||
import { localized, msg, str } from "@lit/localize";
|
||||
import { html, nothing, type TemplateResult } from "lit";
|
||||
import { html, nothing } from "lit";
|
||||
import { customElement, property, query, state } from "lit/decorators.js";
|
||||
import { ifDefined } from "lit/directives/if-defined.js";
|
||||
import { when } from "lit/directives/when.js";
|
||||
import capitalize from "lodash/fp/capitalize";
|
||||
import queryString from "query-string";
|
||||
|
||||
import type { Profile, ProfileWorkflow } from "./types";
|
||||
import type { Profile } from "./types";
|
||||
|
||||
import { BtrixElement } from "@/classes/BtrixElement";
|
||||
import type { Dialog } from "@/components/ui/dialog";
|
||||
@ -16,7 +16,6 @@ import { pageNav } from "@/layouts/pageHeader";
|
||||
import { isApiError } from "@/utils/api";
|
||||
import { maxLengthValidator } from "@/utils/form";
|
||||
import { isArchivingDisabled } from "@/utils/orgs";
|
||||
import { pluralOf } from "@/utils/pluralize";
|
||||
import { richText } from "@/utils/rich-text";
|
||||
|
||||
const DESCRIPTION_MAXLENGTH = 500;
|
||||
@ -263,17 +262,6 @@ export class BrowserProfilesDetail extends BtrixElement {
|
||||
>
|
||||
</section>
|
||||
|
||||
<section class="mb-7">
|
||||
<h2 class="mb-2 text-lg font-medium leading-none">
|
||||
${msg("Crawl Workflows")}${this.profile?.crawlconfigs?.length
|
||||
? html`<span class="font-normal text-neutral-500">
|
||||
(${this.localize.number(this.profile.crawlconfigs.length)})
|
||||
</span>`
|
||||
: nothing}
|
||||
</h2>
|
||||
${this.renderCrawlWorkflows()}
|
||||
</section>
|
||||
|
||||
<btrix-dialog id="discardChangesDialog" .label=${msg("Cancel Editing?")}>
|
||||
${msg(
|
||||
"Are you sure you want to discard changes to this browser profile?",
|
||||
@ -323,52 +311,6 @@ export class BrowserProfilesDetail extends BtrixElement {
|
||||
return pageNav(breadcrumbs);
|
||||
}
|
||||
|
||||
private renderCrawlWorkflows() {
|
||||
if (this.profile?.crawlconfigs?.length) {
|
||||
return html`<ul>
|
||||
${this.profile.crawlconfigs.map(
|
||||
(workflow) => html`
|
||||
<li
|
||||
class="border-x border-b first:rounded-t first:border-t last:rounded-b"
|
||||
>
|
||||
<a
|
||||
class="block p-2 transition-colors focus-within:bg-neutral-50 hover:bg-neutral-50"
|
||||
href=${`${this.navigate.orgBasePath}/workflows/${workflow.id}`}
|
||||
@click=${this.navigate.link}
|
||||
>
|
||||
${this.renderWorkflowName(workflow)}
|
||||
</a>
|
||||
</li>
|
||||
`,
|
||||
)}
|
||||
</ul>`;
|
||||
}
|
||||
|
||||
return html`<div class="rounded border p-5 text-center text-neutral-400">
|
||||
${msg("Not used in any crawl workflows.")}
|
||||
</div>`;
|
||||
}
|
||||
|
||||
private renderWorkflowName(workflow: ProfileWorkflow) {
|
||||
if (workflow.name)
|
||||
return html`<span class="truncate">${workflow.name}</span>`;
|
||||
if (!workflow.firstSeed)
|
||||
return html`<span class="truncate font-mono">${workflow.id}</span>
|
||||
<span class="text-neutral-400">${msg("(no name)")}</span>`;
|
||||
const remainder = workflow.seedCount - 1;
|
||||
let nameSuffix: string | TemplateResult<1> = "";
|
||||
if (remainder) {
|
||||
nameSuffix = html`<span class="ml-2 text-neutral-500"
|
||||
>+${this.localize.number(remainder, { notation: "compact" })}
|
||||
${pluralOf("URLs", remainder)}</span
|
||||
>`;
|
||||
}
|
||||
return html`
|
||||
<span class="primaryUrl truncate">${workflow.firstSeed}</span
|
||||
>${nameSuffix}
|
||||
`;
|
||||
}
|
||||
|
||||
private readonly renderVisitedSites = () => {
|
||||
return html`
|
||||
<section class="flex-grow-1 flex flex-col lg:w-[60ch]">
|
||||
@ -612,36 +554,36 @@ export class BrowserProfilesDetail extends BtrixElement {
|
||||
const profileName = this.profile!.name;
|
||||
|
||||
try {
|
||||
const data = await this.api.fetch<Profile & { error: boolean }>(
|
||||
await this.api.fetch<Profile>(
|
||||
`/orgs/${this.orgId}/profiles/${this.profile!.id}`,
|
||||
{
|
||||
method: "DELETE",
|
||||
},
|
||||
);
|
||||
|
||||
if (data.error && data.crawlconfigs) {
|
||||
this.notify.toast({
|
||||
message: msg(
|
||||
html`Could not delete <strong>${profileName}</strong>, in use by
|
||||
<strong
|
||||
>${data.crawlconfigs.map(({ name }) => name).join(", ")}</strong
|
||||
>. Please remove browser profile from Workflow to continue.`,
|
||||
),
|
||||
variant: "warning",
|
||||
duration: 15000,
|
||||
});
|
||||
} else {
|
||||
this.navigate.to(`${this.navigate.orgBasePath}/browser-profiles`);
|
||||
this.navigate.to(`${this.navigate.orgBasePath}/browser-profiles`);
|
||||
|
||||
this.notify.toast({
|
||||
message: msg(html`Deleted <strong>${profileName}</strong>.`),
|
||||
variant: "success",
|
||||
icon: "check2-circle",
|
||||
});
|
||||
}
|
||||
} catch (e) {
|
||||
this.notify.toast({
|
||||
message: msg("Sorry, couldn't delete browser profile at this time."),
|
||||
message: msg(html`Deleted <strong>${profileName}</strong>.`),
|
||||
variant: "success",
|
||||
icon: "check2-circle",
|
||||
});
|
||||
} catch (e) {
|
||||
let message = msg(
|
||||
html`Sorry, couldn't delete browser profile at this time.`,
|
||||
);
|
||||
|
||||
if (isApiError(e)) {
|
||||
if (e.message === "profile_in_use") {
|
||||
message = msg(
|
||||
html`Could not delete <strong>${profileName}</strong>, currently in
|
||||
use. Please remove browser profile from all crawl workflows to
|
||||
continue.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
this.notify.toast({
|
||||
message: message,
|
||||
variant: "danger",
|
||||
icon: "exclamation-octagon",
|
||||
id: "browser-profile-error",
|
||||
|
@ -23,6 +23,7 @@ import type {
|
||||
APISortQuery,
|
||||
} from "@/types/api";
|
||||
import type { Browser } from "@/types/browser";
|
||||
import { isApiError } from "@/utils/api";
|
||||
import { html } from "@/utils/LiteElement";
|
||||
import { isArchivingDisabled } from "@/utils/orgs";
|
||||
import { tw } from "@/utils/tailwind";
|
||||
@ -382,40 +383,40 @@ export class BrowserProfilesList extends BtrixElement {
|
||||
|
||||
private async deleteProfile(profile: Profile) {
|
||||
try {
|
||||
const data = await this.api.fetch<Profile & { error?: boolean }>(
|
||||
await this.api.fetch<{ error?: boolean }>(
|
||||
`/orgs/${this.orgId}/profiles/${profile.id}`,
|
||||
{
|
||||
method: "DELETE",
|
||||
},
|
||||
);
|
||||
|
||||
if (data.error && data.crawlconfigs) {
|
||||
this.notify.toast({
|
||||
message: msg(
|
||||
html`Could not delete <strong>${profile.name}</strong>, in use by
|
||||
<strong
|
||||
>${data.crawlconfigs.map(({ name }) => name).join(", ")}</strong
|
||||
>. Please remove browser profile from Workflow to continue.`,
|
||||
),
|
||||
variant: "warning",
|
||||
duration: 15000,
|
||||
});
|
||||
} else {
|
||||
this.notify.toast({
|
||||
message: msg(html`Deleted <strong>${profile.name}</strong>.`),
|
||||
variant: "success",
|
||||
icon: "check2-circle",
|
||||
id: "browser-profile-deleted-status",
|
||||
});
|
||||
|
||||
void this.fetchBrowserProfiles();
|
||||
}
|
||||
} catch (e) {
|
||||
this.notify.toast({
|
||||
message: msg("Sorry, couldn't delete browser profile at this time."),
|
||||
message: msg(html`Deleted <strong>${profile.name}</strong>.`),
|
||||
variant: "success",
|
||||
icon: "check2-circle",
|
||||
id: "browser-profile-deleted-status",
|
||||
});
|
||||
|
||||
void this.fetchBrowserProfiles();
|
||||
} catch (e) {
|
||||
let message = msg(
|
||||
html`Sorry, couldn't delete browser profile at this time.`,
|
||||
);
|
||||
|
||||
if (isApiError(e)) {
|
||||
if (e.message === "profile_in_use") {
|
||||
message = msg(
|
||||
html`Could not delete <strong>${profile.name}</strong>, currently in
|
||||
use. Please remove browser profile from all crawl workflows to
|
||||
continue.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
this.notify.toast({
|
||||
message: message,
|
||||
variant: "danger",
|
||||
icon: "exclamation-octagon",
|
||||
id: "browser-profile-deleted-status",
|
||||
id: "browser-profile-error",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -113,13 +113,6 @@ export type ProfileReplica = {
|
||||
custom?: boolean;
|
||||
};
|
||||
|
||||
export type ProfileWorkflow = {
|
||||
id: string;
|
||||
name: string;
|
||||
firstSeed: string;
|
||||
seedCount: number;
|
||||
};
|
||||
|
||||
export type Profile = {
|
||||
id: string;
|
||||
name: string;
|
||||
@ -132,7 +125,7 @@ export type Profile = {
|
||||
profileId: string;
|
||||
baseProfileName: string;
|
||||
oid: string;
|
||||
crawlconfigs?: ProfileWorkflow[];
|
||||
inUse: boolean;
|
||||
resource?: {
|
||||
name: string;
|
||||
path: string;
|
||||
|
Loading…
Reference in New Issue
Block a user