Add and enforce org maxPagesPerCrawl quota (#1044)

This commit is contained in:
Tessa Walsh 2023-08-23 10:38:36 -04:00 committed by GitHub
parent 54cf4f23e4
commit ce5b52f8af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 47 additions and 5 deletions

View File

@ -134,6 +134,11 @@ class CrawlConfigOps:
data["created"] = datetime.utcnow().replace(microsecond=0, tzinfo=None) data["created"] = datetime.utcnow().replace(microsecond=0, tzinfo=None)
data["modified"] = data["created"] data["modified"] = data["created"]
# Ensure page limit is below org maxPagesPerCall if set
max_pages = await self.org_ops.get_max_pages_per_crawl(org)
if max_pages > 0:
data["config"]["limit"] = max_pages
data["profileid"], profile_filename = await self._lookup_profile( data["profileid"], profile_filename = await self._lookup_profile(
config.profileid, org config.profileid, org
) )

View File

@ -608,6 +608,7 @@ class OrgQuotas(BaseModel):
"""Organization quotas (settable by superadmin)""" """Organization quotas (settable by superadmin)"""
maxConcurrentCrawls: Optional[int] = 0 maxConcurrentCrawls: Optional[int] = 0
maxPagesPerCrawl: Optional[int] = 0
# ============================================================================ # ============================================================================

View File

@ -239,6 +239,10 @@ class OrgOps:
org_owners.append(key) org_owners.append(key)
return org_owners return org_owners
async def get_max_pages_per_crawl(self, org: Organization):
"""Return org-specific max pages per crawl setting or 0."""
return await get_max_pages_per_crawl(self.orgs, org.id)
# ============================================================================ # ============================================================================
async def inc_org_stats(orgs, oid, duration): async def inc_org_stats(orgs, oid, duration):
@ -258,6 +262,16 @@ async def get_max_concurrent_crawls(orgs, oid):
return 0 return 0
# ============================================================================
async def get_max_pages_per_crawl(orgs, oid):
"""return max allowed concurrent crawls, if any"""
org = await orgs.find_one({"_id": oid})
if org:
org = Organization.from_dict(org)
return org.quotas.maxPagesPerCrawl
return 0
# ============================================================================ # ============================================================================
# pylint: disable=too-many-statements # pylint: disable=too-many-statements
def init_orgs_api(app, mdb, user_manager, invites, user_dep: User): def init_orgs_api(app, mdb, user_manager, invites, user_dep: User):

View File

@ -50,9 +50,13 @@ export class OrgsList extends LiteElement {
@sl-request-close=${() => (this.currOrg = null)} @sl-request-close=${() => (this.currOrg = null)}
> >
${Object.entries(this.currOrg.quotas).map(([key, value]) => { ${Object.entries(this.currOrg.quotas).map(([key, value]) => {
const label =
key === "maxConcurrentCrawls"
? msg("Max Concurrent Crawls")
: msg("Max Pages Per Crawl");
return html` <sl-input return html` <sl-input
name=${key} name=${key}
label=${msg("Max Concurrent Crawls")} label=${label}
value=${value} value=${value}
type="number" type="number"
@sl-input="${this.onUpdateQuota}" @sl-input="${this.onUpdateQuota}"

View File

@ -333,9 +333,12 @@ export class CrawlConfigEditor extends LiteElement {
}); });
} }
willUpdate(changedProperties: Map<string, any>) { async willUpdate(changedProperties: Map<string, any>) {
if (changedProperties.has("authState") && this.authState) { if (changedProperties.has("authState") && this.authState) {
this.fetchAPIDefaults(); await this.fetchAPIDefaults();
if (this.orgId) {
await this.fetchOrgQuotaDefaults();
}
} }
if (changedProperties.get("initialWorkflow") && this.initialWorkflow) { if (changedProperties.get("initialWorkflow") && this.initialWorkflow) {
this.initializeEditor(); this.initializeEditor();
@ -359,7 +362,7 @@ export class CrawlConfigEditor extends LiteElement {
} }
} }
if (changedProperties.get("orgId") && this.orgId) { if (changedProperties.get("orgId") && this.orgId) {
this.fetchTags(); await this.fetchTags();
} }
} }
@ -2226,7 +2229,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
if (!resp.ok) { if (!resp.ok) {
throw new Error(resp.statusText); throw new Error(resp.statusText);
} }
const orgDefaults = { let orgDefaults = {
...this.orgDefaults, ...this.orgDefaults,
}; };
const data = await resp.json(); const data = await resp.json();
@ -2244,6 +2247,21 @@ https://archiveweb.page/images/${"logo.svg"}`}
console.debug(e); console.debug(e);
} }
} }
private async fetchOrgQuotaDefaults() {
try {
const data = await this.apiFetch(`/orgs/${this.orgId}`, this.authState!);
let orgDefaults = {
...this.orgDefaults,
};
if (data.quotas.maxPagesPerCrawl && data.quotas.maxPagesPerCrawl > 0) {
orgDefaults.maxPagesPerCrawl = data.quotas.maxPagesPerCrawl;
}
this.orgDefaults = orgDefaults;
} catch (e: any) {
console.debug(e);
}
}
} }
customElements.define("btrix-workflow-editor", CrawlConfigEditor); customElements.define("btrix-workflow-editor", CrawlConfigEditor);