Remove workflows from GET profile endpoint + add inUse flag instead (#2703)

Connected to #2661 - Removes crawl workflows from being returned as part of the profile response. - Frontend: removes display of workflows in profile details. - Adds 'inUse' flag to all profile responses to indicate profile is in use by at least one workflow - Adds 'profileid' as possible filter for workflows search in preparation for filtering by profile id (#2708) - Make 'profile_in_use' a proper error (returning 400) on profile delete. --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
2025-07-02 19:44:12 -04:00 · 2025-07-02 19:44:12 -04:00 · 5b4fee73e6
commit 5b4fee73e6
parent b915e734d1
7 changed files with 81 additions and 203 deletions
--- a/backend/btrixcloud/crawlconfigs.py
+++ b/backend/btrixcloud/crawlconfigs.py
@ -25,7 +25,6 @@ from .models import (
    ConfigRevision,
    CrawlConfig,
    CrawlConfigOut,
-    CrawlConfigProfileOut,
    CrawlOut,
    UpdateCrawlConfig,
    Organization,
@ -597,6 +596,7 @@ class CrawlConfigOps:
        page: int = 1,
        created_by: Optional[UUID] = None,
        modified_by: Optional[UUID] = None,
+        profileid: Optional[UUID] = None,
        first_seed: Optional[str] = None,
        name: Optional[str] = None,
        description: Optional[str] = None,
@ -607,7 +607,7 @@ class CrawlConfigOps:
        sort_direction: int = -1,
    ) -> tuple[list[CrawlConfigOut], int]:
        """Get all crawl configs for an organization is a member of"""
-        # pylint: disable=too-many-locals,too-many-branches
+        # pylint: disable=too-many-locals,too-many-branches,too-many-statements
        # Zero-index page for query
        page = page - 1
        skip = page * page_size
@ -623,6 +623,9 @@ class CrawlConfigOps:
        if modified_by:
            match_query["modifiedBy"] = modified_by

+        if profileid:
+            match_query["profileid"] = profileid
+
        if name:
            match_query["name"] = name

@ -708,25 +711,12 @@ class CrawlConfigOps:

        return configs, total

-    async def get_crawl_config_info_for_profile(
-        self, profileid: UUID, org: Organization
-    ) -> list[CrawlConfigProfileOut]:
-        """Return all crawl configs that are associated with a given profileid"""
-        query = {"profileid": profileid, "inactive": {"$ne": True}}
-        if org:
-            query["oid"] = org.id
-
-        results = []
-
-        cursor = self.crawl_configs.find(query, projection=["_id"])
-        workflows = await cursor.to_list(length=1000)
-        for workflow_dict in workflows:
-            workflow_out = await self.get_crawl_config_out(
-                workflow_dict.get("_id"), org
-            )
-            results.append(CrawlConfigProfileOut.from_dict(workflow_out.to_dict()))
-
-        return results
+    async def is_profile_in_use(self, profileid: UUID, org: Organization) -> bool:
+        """return true/false if any active workflows exist with given profile"""
+        res = await self.crawl_configs.find_one(
+            {"profileid": profileid, "inactive": {"$ne": True}, "oid": org.id}
+        )
+        return res is not None

    async def get_running_crawl(self, cid: UUID) -> Optional[CrawlOut]:
        """Return the id of currently running crawl for this config, if any"""
@ -1371,6 +1361,7 @@ def init_crawl_config_api(
        # createdBy, kept as userid for API compatibility
        userid: Optional[UUID] = None,
        modifiedBy: Optional[UUID] = None,
+        profileid: Optional[UUID] = None,
        firstSeed: Optional[str] = None,
        name: Optional[str] = None,
        description: Optional[str] = None,
@ -1394,6 +1385,7 @@ def init_crawl_config_api(
            org,
            created_by=userid,
            modified_by=modifiedBy,
+            profileid=profileid,
            first_seed=firstSeed,
            name=name,
            description=description,
--- a/backend/btrixcloud/models.py
+++ b/backend/btrixcloud/models.py
@ -514,15 +514,6 @@ class CrawlConfigOut(CrawlConfigCore, CrawlConfigAdditional):
    lastStartedByName: Optional[str] = None


-# ============================================================================
-class CrawlConfigProfileOut(BaseMongoModel):
-    """Crawl Config basic info for profiles"""
-
-    name: str
-    firstSeed: str
-    seedCount: int
-
-
 # ============================================================================
 class UpdateCrawlConfig(BaseModel):
    """Update crawl config name, crawl schedule, or tags"""
@ -2319,12 +2310,7 @@ class Profile(BaseMongoModel):
    crawlerChannel: Optional[str] = None
    proxyId: Optional[str] = None

-
-# ============================================================================
-class ProfileWithCrawlConfigs(Profile):
-    """Profile with list of crawlconfigs using this profile"""
-
-    crawlconfigs: List[CrawlConfigProfileOut] = []
+    inUse: bool = False


 # ============================================================================
--- a/backend/btrixcloud/profiles.py
+++ b/backend/btrixcloud/profiles.py
@ -13,7 +13,6 @@ import aiohttp
 from .pagination import DEFAULT_PAGE_SIZE, paginated_format
 from .models import (
    Profile,
-    ProfileWithCrawlConfigs,
    ProfileFile,
    UrlIn,
    ProfileLaunchBrowserIn,
@ -31,7 +30,6 @@ from .models import (
    SuccessResponseStorageQuota,
    ProfilePingResponse,
    ProfileBrowserGetUrlResponse,
-    CrawlConfigProfileOut,
 )
 from .utils import dt_now

@ -353,33 +351,20 @@ class ProfileOps:
        profiles = [Profile.from_dict(res) for res in items]
        return profiles, total

-    async def get_profile(
-        self, profileid: UUID, org: Optional[Organization] = None
-    ) -> Profile:
+    async def get_profile(self, profileid: UUID, org: Organization) -> Profile:
        """get profile by id and org"""
-        query: dict[str, object] = {"_id": profileid}
-        if org:
-            query["oid"] = org.id
+        query: dict[str, object] = {"_id": profileid, "oid": org.id}

        res = await self.profiles.find_one(query)
        if not res:
            raise HTTPException(status_code=404, detail="profile_not_found")

-        return Profile.from_dict(res)
-
-    async def get_profile_with_configs(
-        self, profileid: UUID, org: Organization
-    ) -> ProfileWithCrawlConfigs:
-        """get profile for api output, with crawlconfigs"""
-
-        profile = await self.get_profile(profileid, org)
-
-        crawlconfigs = await self.get_crawl_configs_for_profile(profileid, org)
-
-        return ProfileWithCrawlConfigs(crawlconfigs=crawlconfigs, **profile.dict())
+        profile = Profile.from_dict(res)
+        profile.inUse = await self.crawlconfigs.is_profile_in_use(profileid, org)
+        return profile

    async def get_profile_storage_path_and_proxy(
-        self, profileid: UUID, org: Optional[Organization] = None
+        self, profileid: UUID, org: Organization
    ) -> tuple[str, str]:
        """return profile path filename (relative path) for given profile id and org"""
        try:
@ -392,9 +377,7 @@ class ProfileOps:

        return "", ""

-    async def get_profile_name(
-        self, profileid: UUID, org: Optional[Organization] = None
-    ) -> str:
+    async def get_profile_name(self, profileid: UUID, org: Organization) -> str:
        """return profile for given profile id and org"""
        try:
            profile = await self.get_profile(profileid, org)
@ -405,25 +388,14 @@ class ProfileOps:

        return ""

-    async def get_crawl_configs_for_profile(
-        self, profileid: UUID, org: Organization
-    ) -> list[CrawlConfigProfileOut]:
-        """Get list of crawl configs with basic info for that use a particular profile"""
-
-        crawlconfig_info = await self.crawlconfigs.get_crawl_config_info_for_profile(
-            profileid, org
-        )
-
-        return crawlconfig_info
-
    async def delete_profile(
        self, profileid: UUID, org: Organization
    ) -> dict[str, Any]:
        """delete profile, if not used in active crawlconfig"""
-        profile = await self.get_profile_with_configs(profileid, org)
+        profile = await self.get_profile(profileid, org)

-        if len(profile.crawlconfigs) > 0:
-            return {"error": "in_use", "crawlconfigs": profile.crawlconfigs}
+        if profile.inUse:
+            raise HTTPException(status_code=400, detail="profile_in_use")

        query: dict[str, object] = {"_id": profileid}
        if org:
@ -571,7 +543,7 @@ def init_profiles_api(

        else:
            metadata = await browser_get_metadata(browser_commit.browserid, org)
-            profile = await ops.get_profile(profileid)
+            profile = await ops.get_profile(profileid, org)
            await ops.commit_to_profile(
                browser_commit=ProfileCreate(
                    browserid=browser_commit.browserid,
@ -588,12 +560,12 @@ def init_profiles_api(

        return {"updated": True}

-    @router.get("/{profileid}", response_model=ProfileWithCrawlConfigs)
+    @router.get("/{profileid}", response_model=Profile)
    async def get_profile(
        profileid: UUID,
        org: Organization = Depends(org_crawl_dep),
    ):
-        return await ops.get_profile_with_configs(profileid, org)
+        return await ops.get_profile(profileid, org)

    @router.delete("/{profileid}", response_model=SuccessResponseStorageQuota)
    async def delete_profile(
--- a/backend/test/test_profiles.py
+++ b/backend/test/test_profiles.py
@ -144,8 +144,6 @@ def profile_config_id(admin_auth_headers, default_org_id, profile_id):
    assert resource["storage"]["name"]
    assert resource.get("replicas") or resource.get("replicas") == []

-    assert data.get("crawlconfigs") == []
-
    # Use profile in a workflow
    r = requests.post(
        f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
@ -207,7 +205,7 @@ def test_commit_browser_to_new_profile(admin_auth_headers, default_org_id, profi
 def test_get_profile(admin_auth_headers, default_org_id, profile_id, profile_config_id):
    start_time = time.monotonic()
    time_limit = 10
-    # Check get endpoint again and check that crawlconfigs is updated
+    # Check get endpoint again and check that inUse is updated
    while True:
        try:
            r = requests.get(
@ -239,13 +237,8 @@ def test_get_profile(admin_auth_headers, default_org_id, profile_id, profile_con
            assert resource["storage"]["name"]
            assert resource.get("replicas") or resource.get("replicas") == []

-            crawl_configs = data.get("crawlconfigs")
-            assert crawl_configs
-            assert len(crawl_configs) == 1
-            assert crawl_configs[0]["id"] == profile_config_id
-            assert crawl_configs[0]["name"] == "Profile Test Crawl"
-            assert crawl_configs[0]["firstSeed"] == "https://webrecorder.net/"
-            assert crawl_configs[0]["seedCount"] == 1
+            assert "crawlconfigs" not in data
+            assert data["inUse"] == True
            break
        except:
            if time.monotonic() - start_time > time_limit:
@ -260,7 +253,6 @@ def test_commit_second_profile(profile_2_id):
 def test_list_profiles(admin_auth_headers, default_org_id, profile_id, profile_2_id):
    start_time = time.monotonic()
    time_limit = 10
-    # Check get endpoint again and check that crawlconfigs is updated
    while True:
        try:
            r = requests.get(
--- a/frontend/src/pages/org/browser-profiles-detail.ts
+++ b/frontend/src/pages/org/browser-profiles-detail.ts
@ -1,12 +1,12 @@
 import { localized, msg, str } from "@lit/localize";
-import { html, nothing, type TemplateResult } from "lit";
+import { html, nothing } from "lit";
 import { customElement, property, query, state } from "lit/decorators.js";
 import { ifDefined } from "lit/directives/if-defined.js";
 import { when } from "lit/directives/when.js";
 import capitalize from "lodash/fp/capitalize";
 import queryString from "query-string";

-import type { Profile, ProfileWorkflow } from "./types";
+import type { Profile } from "./types";

 import { BtrixElement } from "@/classes/BtrixElement";
 import type { Dialog } from "@/components/ui/dialog";
@ -16,7 +16,6 @@ import { pageNav } from "@/layouts/pageHeader";
 import { isApiError } from "@/utils/api";
 import { maxLengthValidator } from "@/utils/form";
 import { isArchivingDisabled } from "@/utils/orgs";
-import { pluralOf } from "@/utils/pluralize";
 import { richText } from "@/utils/rich-text";

 const DESCRIPTION_MAXLENGTH = 500;
@ -263,17 +262,6 @@ export class BrowserProfilesDetail extends BtrixElement {
        >
      </section>

-      <section class="mb-7">
-        <h2 class="mb-2 text-lg font-medium leading-none">
-          ${msg("Crawl Workflows")}${this.profile?.crawlconfigs?.length
-            ? html`<span class="font-normal text-neutral-500">
-                (${this.localize.number(this.profile.crawlconfigs.length)})
-              </span>`
-            : nothing}
-        </h2>
-        ${this.renderCrawlWorkflows()}
-      </section>
-
      <btrix-dialog id="discardChangesDialog" .label=${msg("Cancel Editing?")}>
        ${msg(
          "Are you sure you want to discard changes to this browser profile?",
@ -323,52 +311,6 @@ export class BrowserProfilesDetail extends BtrixElement {
    return pageNav(breadcrumbs);
  }

-  private renderCrawlWorkflows() {
-    if (this.profile?.crawlconfigs?.length) {
-      return html`<ul>
-        ${this.profile.crawlconfigs.map(
-          (workflow) => html`
-            <li
-              class="border-x border-b first:rounded-t first:border-t last:rounded-b"
-            >
-              <a
-                class="block p-2 transition-colors focus-within:bg-neutral-50 hover:bg-neutral-50"
-                href=${`${this.navigate.orgBasePath}/workflows/${workflow.id}`}
-                @click=${this.navigate.link}
-              >
-                ${this.renderWorkflowName(workflow)}
-              </a>
-            </li>
-          `,
-        )}
-      </ul>`;
-    }
-
-    return html`<div class="rounded border p-5 text-center text-neutral-400">
-      ${msg("Not used in any crawl workflows.")}
-    </div>`;
-  }
-
-  private renderWorkflowName(workflow: ProfileWorkflow) {
-    if (workflow.name)
-      return html`<span class="truncate">${workflow.name}</span>`;
-    if (!workflow.firstSeed)
-      return html`<span class="truncate font-mono">${workflow.id}</span>
-        <span class="text-neutral-400">${msg("(no name)")}</span>`;
-    const remainder = workflow.seedCount - 1;
-    let nameSuffix: string | TemplateResult<1> = "";
-    if (remainder) {
-      nameSuffix = html`<span class="ml-2 text-neutral-500"
-        >+${this.localize.number(remainder, { notation: "compact" })}
-        ${pluralOf("URLs", remainder)}</span
-      >`;
-    }
-    return html`
-      <span class="primaryUrl truncate">${workflow.firstSeed}</span
-      >${nameSuffix}
-    `;
-  }
-
  private readonly renderVisitedSites = () => {
    return html`
      <section class="flex-grow-1 flex flex-col lg:w-[60ch]">
@ -612,36 +554,36 @@ export class BrowserProfilesDetail extends BtrixElement {
    const profileName = this.profile!.name;

    try {
-      const data = await this.api.fetch<Profile & { error: boolean }>(
+      await this.api.fetch<Profile>(
        `/orgs/${this.orgId}/profiles/${this.profile!.id}`,
        {
          method: "DELETE",
        },
      );

-      if (data.error && data.crawlconfigs) {
-        this.notify.toast({
-          message: msg(
-            html`Could not delete <strong>${profileName}</strong>, in use by
-              <strong
-                >${data.crawlconfigs.map(({ name }) => name).join(", ")}</strong
-              >. Please remove browser profile from Workflow to continue.`,
-          ),
-          variant: "warning",
-          duration: 15000,
-        });
-      } else {
-        this.navigate.to(`${this.navigate.orgBasePath}/browser-profiles`);
+      this.navigate.to(`${this.navigate.orgBasePath}/browser-profiles`);

-        this.notify.toast({
-          message: msg(html`Deleted <strong>${profileName}</strong>.`),
-          variant: "success",
-          icon: "check2-circle",
-        });
-      }
-    } catch (e) {
      this.notify.toast({
-        message: msg("Sorry, couldn't delete browser profile at this time."),
+        message: msg(html`Deleted <strong>${profileName}</strong>.`),
+        variant: "success",
+        icon: "check2-circle",
+      });
+    } catch (e) {
+      let message = msg(
+        html`Sorry, couldn't delete browser profile at this time.`,
+      );
+
+      if (isApiError(e)) {
+        if (e.message === "profile_in_use") {
+          message = msg(
+            html`Could not delete <strong>${profileName}</strong>, currently in
+              use. Please remove browser profile from all crawl workflows to
+              continue.`,
+          );
+        }
+      }
+      this.notify.toast({
+        message: message,
        variant: "danger",
        icon: "exclamation-octagon",
        id: "browser-profile-error",
--- a/frontend/src/pages/org/browser-profiles-list.ts
+++ b/frontend/src/pages/org/browser-profiles-list.ts
@ -23,6 +23,7 @@ import type {
  APISortQuery,
 } from "@/types/api";
 import type { Browser } from "@/types/browser";
+import { isApiError } from "@/utils/api";
 import { html } from "@/utils/LiteElement";
 import { isArchivingDisabled } from "@/utils/orgs";
 import { tw } from "@/utils/tailwind";
@ -382,40 +383,40 @@ export class BrowserProfilesList extends BtrixElement {

  private async deleteProfile(profile: Profile) {
    try {
-      const data = await this.api.fetch<Profile & { error?: boolean }>(
+      await this.api.fetch<{ error?: boolean }>(
        `/orgs/${this.orgId}/profiles/${profile.id}`,
        {
          method: "DELETE",
        },
      );

-      if (data.error && data.crawlconfigs) {
-        this.notify.toast({
-          message: msg(
-            html`Could not delete <strong>${profile.name}</strong>, in use by
-              <strong
-                >${data.crawlconfigs.map(({ name }) => name).join(", ")}</strong
-              >. Please remove browser profile from Workflow to continue.`,
-          ),
-          variant: "warning",
-          duration: 15000,
-        });
-      } else {
-        this.notify.toast({
-          message: msg(html`Deleted <strong>${profile.name}</strong>.`),
-          variant: "success",
-          icon: "check2-circle",
-          id: "browser-profile-deleted-status",
-        });
-
-        void this.fetchBrowserProfiles();
-      }
-    } catch (e) {
      this.notify.toast({
-        message: msg("Sorry, couldn't delete browser profile at this time."),
+        message: msg(html`Deleted <strong>${profile.name}</strong>.`),
+        variant: "success",
+        icon: "check2-circle",
+        id: "browser-profile-deleted-status",
+      });
+
+      void this.fetchBrowserProfiles();
+    } catch (e) {
+      let message = msg(
+        html`Sorry, couldn't delete browser profile at this time.`,
+      );
+
+      if (isApiError(e)) {
+        if (e.message === "profile_in_use") {
+          message = msg(
+            html`Could not delete <strong>${profile.name}</strong>, currently in
+              use. Please remove browser profile from all crawl workflows to
+              continue.`,
+          );
+        }
+      }
+      this.notify.toast({
+        message: message,
        variant: "danger",
        icon: "exclamation-octagon",
-        id: "browser-profile-deleted-status",
+        id: "browser-profile-error",
      });
    }
  }
--- a/frontend/src/types/crawler.ts
+++ b/frontend/src/types/crawler.ts
@ -113,13 +113,6 @@ export type ProfileReplica = {
  custom?: boolean;
 };

-export type ProfileWorkflow = {
-  id: string;
-  name: string;
-  firstSeed: string;
-  seedCount: number;
-};
-
 export type Profile = {
  id: string;
  name: string;
@ -132,7 +125,7 @@ export type Profile = {
  profileId: string;
  baseProfileName: string;
  oid: string;
-  crawlconfigs?: ProfileWorkflow[];
+  inUse: boolean;
  resource?: {
    name: string;
    path: string;