From ce5b52f8af1db856d70a9f4fec4931b60fdac444 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Wed, 23 Aug 2023 10:38:36 -0400 Subject: [PATCH] Add and enforce org maxPagesPerCrawl quota (#1044) --- backend/btrixcloud/crawlconfigs.py | 5 +++++ backend/btrixcloud/models.py | 1 + backend/btrixcloud/orgs.py | 14 ++++++++++++ frontend/src/components/orgs-list.ts | 6 +++++- frontend/src/pages/org/workflow-editor.ts | 26 +++++++++++++++++++---- 5 files changed, 47 insertions(+), 5 deletions(-) diff --git a/backend/btrixcloud/crawlconfigs.py b/backend/btrixcloud/crawlconfigs.py index 111da68a..6e5667f8 100644 --- a/backend/btrixcloud/crawlconfigs.py +++ b/backend/btrixcloud/crawlconfigs.py @@ -134,6 +134,11 @@ class CrawlConfigOps: data["created"] = datetime.utcnow().replace(microsecond=0, tzinfo=None) data["modified"] = data["created"] + # Ensure page limit is below org maxPagesPerCall if set + max_pages = await self.org_ops.get_max_pages_per_crawl(org) + if max_pages > 0: + data["config"]["limit"] = max_pages + data["profileid"], profile_filename = await self._lookup_profile( config.profileid, org ) diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 09f82d45..e82294c5 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -608,6 +608,7 @@ class OrgQuotas(BaseModel): """Organization quotas (settable by superadmin)""" maxConcurrentCrawls: Optional[int] = 0 + maxPagesPerCrawl: Optional[int] = 0 # ============================================================================ diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index c6f89af1..d62bd201 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -239,6 +239,10 @@ class OrgOps: org_owners.append(key) return org_owners + async def get_max_pages_per_crawl(self, org: Organization): + """Return org-specific max pages per crawl setting or 0.""" + return await get_max_pages_per_crawl(self.orgs, org.id) + # ============================================================================ async def inc_org_stats(orgs, oid, duration): @@ -258,6 +262,16 @@ async def get_max_concurrent_crawls(orgs, oid): return 0 +# ============================================================================ +async def get_max_pages_per_crawl(orgs, oid): + """return max allowed concurrent crawls, if any""" + org = await orgs.find_one({"_id": oid}) + if org: + org = Organization.from_dict(org) + return org.quotas.maxPagesPerCrawl + return 0 + + # ============================================================================ # pylint: disable=too-many-statements def init_orgs_api(app, mdb, user_manager, invites, user_dep: User): diff --git a/frontend/src/components/orgs-list.ts b/frontend/src/components/orgs-list.ts index c18235fd..d47f3c52 100644 --- a/frontend/src/components/orgs-list.ts +++ b/frontend/src/components/orgs-list.ts @@ -50,9 +50,13 @@ export class OrgsList extends LiteElement { @sl-request-close=${() => (this.currOrg = null)} > ${Object.entries(this.currOrg.quotas).map(([key, value]) => { + const label = + key === "maxConcurrentCrawls" + ? msg("Max Concurrent Crawls") + : msg("Max Pages Per Crawl"); return html` ) { + async willUpdate(changedProperties: Map) { if (changedProperties.has("authState") && this.authState) { - this.fetchAPIDefaults(); + await this.fetchAPIDefaults(); + if (this.orgId) { + await this.fetchOrgQuotaDefaults(); + } } if (changedProperties.get("initialWorkflow") && this.initialWorkflow) { this.initializeEditor(); @@ -359,7 +362,7 @@ export class CrawlConfigEditor extends LiteElement { } } if (changedProperties.get("orgId") && this.orgId) { - this.fetchTags(); + await this.fetchTags(); } } @@ -2226,7 +2229,7 @@ https://archiveweb.page/images/${"logo.svg"}`} if (!resp.ok) { throw new Error(resp.statusText); } - const orgDefaults = { + let orgDefaults = { ...this.orgDefaults, }; const data = await resp.json(); @@ -2244,6 +2247,21 @@ https://archiveweb.page/images/${"logo.svg"}`} console.debug(e); } } + + private async fetchOrgQuotaDefaults() { + try { + const data = await this.apiFetch(`/orgs/${this.orgId}`, this.authState!); + let orgDefaults = { + ...this.orgDefaults, + }; + if (data.quotas.maxPagesPerCrawl && data.quotas.maxPagesPerCrawl > 0) { + orgDefaults.maxPagesPerCrawl = data.quotas.maxPagesPerCrawl; + } + this.orgDefaults = orgDefaults; + } catch (e: any) { + console.debug(e); + } + } } customElements.define("btrix-workflow-editor", CrawlConfigEditor);