Add and enforce org maxPagesPerCrawl quota (#1044)

This commit is contained in:
Tessa Walsh 2023-08-23 10:38:36 -04:00 committed by GitHub
parent 54cf4f23e4
commit ce5b52f8af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 47 additions and 5 deletions

View File

@ -134,6 +134,11 @@ class CrawlConfigOps:
data["created"] = datetime.utcnow().replace(microsecond=0, tzinfo=None)
data["modified"] = data["created"]
# Ensure page limit is below org maxPagesPerCall if set
max_pages = await self.org_ops.get_max_pages_per_crawl(org)
if max_pages > 0:
data["config"]["limit"] = max_pages
data["profileid"], profile_filename = await self._lookup_profile(
config.profileid, org
)

View File

@ -608,6 +608,7 @@ class OrgQuotas(BaseModel):
"""Organization quotas (settable by superadmin)"""
maxConcurrentCrawls: Optional[int] = 0
maxPagesPerCrawl: Optional[int] = 0
# ============================================================================

View File

@ -239,6 +239,10 @@ class OrgOps:
org_owners.append(key)
return org_owners
async def get_max_pages_per_crawl(self, org: Organization):
"""Return org-specific max pages per crawl setting or 0."""
return await get_max_pages_per_crawl(self.orgs, org.id)
# ============================================================================
async def inc_org_stats(orgs, oid, duration):
@ -258,6 +262,16 @@ async def get_max_concurrent_crawls(orgs, oid):
return 0
# ============================================================================
async def get_max_pages_per_crawl(orgs, oid):
"""return max allowed concurrent crawls, if any"""
org = await orgs.find_one({"_id": oid})
if org:
org = Organization.from_dict(org)
return org.quotas.maxPagesPerCrawl
return 0
# ============================================================================
# pylint: disable=too-many-statements
def init_orgs_api(app, mdb, user_manager, invites, user_dep: User):

View File

@ -50,9 +50,13 @@ export class OrgsList extends LiteElement {
@sl-request-close=${() => (this.currOrg = null)}
>
${Object.entries(this.currOrg.quotas).map(([key, value]) => {
const label =
key === "maxConcurrentCrawls"
? msg("Max Concurrent Crawls")
: msg("Max Pages Per Crawl");
return html` <sl-input
name=${key}
label=${msg("Max Concurrent Crawls")}
label=${label}
value=${value}
type="number"
@sl-input="${this.onUpdateQuota}"

View File

@ -333,9 +333,12 @@ export class CrawlConfigEditor extends LiteElement {
});
}
willUpdate(changedProperties: Map<string, any>) {
async willUpdate(changedProperties: Map<string, any>) {
if (changedProperties.has("authState") && this.authState) {
this.fetchAPIDefaults();
await this.fetchAPIDefaults();
if (this.orgId) {
await this.fetchOrgQuotaDefaults();
}
}
if (changedProperties.get("initialWorkflow") && this.initialWorkflow) {
this.initializeEditor();
@ -359,7 +362,7 @@ export class CrawlConfigEditor extends LiteElement {
}
}
if (changedProperties.get("orgId") && this.orgId) {
this.fetchTags();
await this.fetchTags();
}
}
@ -2226,7 +2229,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
if (!resp.ok) {
throw new Error(resp.statusText);
}
const orgDefaults = {
let orgDefaults = {
...this.orgDefaults,
};
const data = await resp.json();
@ -2244,6 +2247,21 @@ https://archiveweb.page/images/${"logo.svg"}`}
console.debug(e);
}
}
private async fetchOrgQuotaDefaults() {
try {
const data = await this.apiFetch(`/orgs/${this.orgId}`, this.authState!);
let orgDefaults = {
...this.orgDefaults,
};
if (data.quotas.maxPagesPerCrawl && data.quotas.maxPagesPerCrawl > 0) {
orgDefaults.maxPagesPerCrawl = data.quotas.maxPagesPerCrawl;
}
this.orgDefaults = orgDefaults;
} catch (e: any) {
console.debug(e);
}
}
}
customElements.define("btrix-workflow-editor", CrawlConfigEditor);