Allow users to set additional page time limits (#744)

This commit is contained in:
sua yoo 2023-04-05 20:06:46 -07:00 committed by GitHub
parent 72967a0381
commit 91c2c1ad62
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 270 additions and 130 deletions

View File

@ -8,6 +8,7 @@ import ISO6391 from "iso-639-1";
import LiteElement, { html } from "../utils/LiteElement"; import LiteElement, { html } from "../utils/LiteElement";
import type { CrawlConfig, Seed, SeedConfig } from "../pages/org/types"; import type { CrawlConfig, Seed, SeedConfig } from "../pages/org/types";
import { humanizeSchedule } from "../utils/cron"; import { humanizeSchedule } from "../utils/cron";
import { RelativeDuration } from "./relative-duration";
/** /**
* Usage: * Usage:
@ -30,9 +31,10 @@ export class ConfigDetails extends LiteElement {
hideTags = false; hideTags = false;
@state() @state()
private orgDefaults = { private orgDefaults?: {
behaviorTimeoutMinutes: Infinity, pageLoadTimeoutSeconds?: number;
maxPagesPerCrawl: Infinity, behaviorTimeoutSeconds?: number;
maxPagesPerCrawl?: number;
}; };
private readonly scopeTypeLabels: Record< private readonly scopeTypeLabels: Record<
@ -55,7 +57,25 @@ export class ConfigDetails extends LiteElement {
render() { render() {
const crawlConfig = this.crawlConfig; const crawlConfig = this.crawlConfig;
const exclusions = crawlConfig?.config.exclude || []; const seedsConfig = crawlConfig?.config;
const exclusions = seedsConfig?.exclude || [];
const maxPages = seedsConfig?.seeds[0]?.limit ?? seedsConfig?.limit;
const renderTimeLimit = (
valueSeconds?: number | null,
fallbackValue?: number
) =>
valueSeconds
? RelativeDuration.humanize(valueSeconds * 1000, { verbose: true })
: typeof fallbackValue === "number"
? html`<span class="text-neutral-400"
>${fallbackValue === Infinity
? msg("Unlimited")
: RelativeDuration.humanize(fallbackValue * 1000, {
verbose: true,
})}
${msg("(default)")}</span
>`
: undefined;
return html` return html`
<section id="crawler-settings" class="mb-8"> <section id="crawler-settings" class="mb-8">
@ -85,16 +105,51 @@ export class ConfigDetails extends LiteElement {
() => this.renderSetting(msg("Exclusions"), msg("None")) () => this.renderSetting(msg("Exclusions"), msg("None"))
)} )}
${this.renderSetting( ${this.renderSetting(
msg("Page Time Limit"), msg("Max Pages"),
crawlConfig?.config.behaviorTimeout when(
? msg(str`${crawlConfig?.config.behaviorTimeout / 60} minute(s)`) maxPages,
: msg("None") () => msg(str`${maxPages!.toLocaleString()} pages`),
() =>
this.orgDefaults?.maxPagesPerCrawl
? html`<span class="text-neutral-400"
>${msg(
str`${this.orgDefaults.maxPagesPerCrawl.toLocaleString()} pages`
)}
${msg("(default)")}</span
>`
: undefined
)
)}
${this.renderSetting(
msg("Page Load Timeout"),
renderTimeLimit(
crawlConfig?.config.pageLoadTimeout,
this.orgDefaults?.pageLoadTimeoutSeconds ?? Infinity
)
)}
${this.renderSetting(
msg("Page Behavior Timeout"),
renderTimeLimit(
crawlConfig?.config.behaviorTimeout,
this.orgDefaults?.behaviorTimeoutSeconds ?? Infinity
)
)}
${this.renderSetting(
msg("Auto-Scroll Behavior"),
crawlConfig?.config.behaviors &&
!crawlConfig.config.behaviors.includes("autoscroll")
? msg("Disabled")
: html`<span class="text-neutral-400"
>${msg("Enabled (default)")}</span
>`
)}
${this.renderSetting(
msg("Delay Before Next Page"),
renderTimeLimit(crawlConfig?.config.pageExtraDelay, 0)
)} )}
${this.renderSetting( ${this.renderSetting(
msg("Crawl Time Limit"), msg("Crawl Time Limit"),
crawlConfig?.crawlTimeout renderTimeLimit(crawlConfig?.crawlTimeout, Infinity)
? msg(str`${crawlConfig?.crawlTimeout / 60} minute(s)`)
: msg("None")
)} )}
${this.renderSetting(msg("Crawler Instances"), crawlConfig?.scale)} ${this.renderSetting(msg("Crawler Instances"), crawlConfig?.scale)}
</btrix-desc-list> </btrix-desc-list>
@ -212,10 +267,9 @@ export class ConfigDetails extends LiteElement {
const crawlConfig = this.crawlConfig!; const crawlConfig = this.crawlConfig!;
const seedsConfig = crawlConfig.config; const seedsConfig = crawlConfig.config;
const additionalUrlList = seedsConfig.seeds.slice(1); const additionalUrlList = seedsConfig.seeds.slice(1);
let primarySeedConfig: SeedConfig | Seed = seedsConfig; const primarySeedConfig: SeedConfig | Seed = seedsConfig;
let primarySeedUrl = seedsConfig.seeds[0].url; const primarySeedUrl = seedsConfig.seeds[0].url;
const includeUrlList = primarySeedConfig.include || seedsConfig.include; const includeUrlList = primarySeedConfig.include || seedsConfig.include;
const maxPages = primarySeedConfig.limit ?? seedsConfig.limit;
return html` return html`
${this.renderSetting(msg("Primary Seed URL"), primarySeedUrl, true)} ${this.renderSetting(msg("Primary Seed URL"), primarySeedUrl, true)}
${this.renderSetting( ${this.renderSetting(
@ -255,19 +309,6 @@ export class ConfigDetails extends LiteElement {
: msg("None"), : msg("None"),
true true
)} )}
${this.renderSetting(
msg("Max Pages"),
when(
maxPages,
() => msg(str`${maxPages} page(s)`),
() =>
this.orgDefaults.maxPagesPerCrawl < Infinity
? msg(
str`Maximum Allowed (${this.orgDefaults.maxPagesPerCrawl.toLocaleString()} pages)`
)
: undefined
)
)}
`; `;
}; };
@ -292,7 +333,7 @@ export class ConfigDetails extends LiteElement {
} else if (typeof value === "boolean") { } else if (typeof value === "boolean") {
content = value ? msg("Yes") : msg("No"); content = value ? msg("Yes") : msg("No");
} else if (typeof value !== "number" && !value) { } else if (typeof value !== "number" && !value) {
content = html`<span class="text-neutral-300" content = html`<span class="text-neutral-400"
>${msg("Not specified")}</span >${msg("Not specified")}</span
>`; >`;
} }
@ -304,7 +345,6 @@ export class ConfigDetails extends LiteElement {
} }
private async fetchAPIDefaults() { private async fetchAPIDefaults() {
const orgDefaults = { ...this.orgDefaults };
try { try {
const resp = await fetch("/api/settings", { const resp = await fetch("/api/settings", {
headers: { "Content-Type": "application/json" }, headers: { "Content-Type": "application/json" },
@ -312,17 +352,22 @@ export class ConfigDetails extends LiteElement {
if (!resp.ok) { if (!resp.ok) {
throw new Error(resp.statusText); throw new Error(resp.statusText);
} }
const orgDefaults = {
...this.orgDefaults,
};
const data = await resp.json(); const data = await resp.json();
if (data.defaultBehaviorTimeSeconds) { if (data.defaultBehaviorTimeSeconds > 0) {
orgDefaults.behaviorTimeoutMinutes = orgDefaults.behaviorTimeoutSeconds = data.defaultBehaviorTimeSeconds;
data.defaultBehaviorTimeSeconds / 60; }
if (data.defaultPageLoadTimeSeconds > 0) {
orgDefaults.pageLoadTimeoutSeconds = data.defaultPageLoadTimeSeconds;
} }
if (data.maxPagesPerCrawl > 0) { if (data.maxPagesPerCrawl > 0) {
orgDefaults.maxPagesPerCrawl = data.maxPagesPerCrawl; orgDefaults.maxPagesPerCrawl = data.maxPagesPerCrawl;
} }
this.orgDefaults = orgDefaults;
} catch (e: any) { } catch (e: any) {
console.debug(e); console.debug(e);
} }
this.orgDefaults = orgDefaults;
} }
} }

View File

@ -16,8 +16,6 @@ import compact from "lodash/fp/compact";
import { mergeDeep } from "immutable"; import { mergeDeep } from "immutable";
import flow from "lodash/fp/flow"; import flow from "lodash/fp/flow";
import uniq from "lodash/fp/uniq"; import uniq from "lodash/fp/uniq";
import RegexColorize from "regex-colorize";
import ISO6391 from "iso-639-1";
import Fuse from "fuse.js"; import Fuse from "fuse.js";
import LiteElement, { html } from "../../utils/LiteElement"; import LiteElement, { html } from "../../utils/LiteElement";
@ -78,7 +76,9 @@ type FormState = {
includeLinkedPages: boolean; includeLinkedPages: boolean;
customIncludeUrlList: string; customIncludeUrlList: string;
crawlTimeoutMinutes: number | null; crawlTimeoutMinutes: number | null;
pageTimeoutMinutes: number | null; behaviorTimeoutSeconds: number | null;
pageLoadTimeoutSeconds: number | null;
pageExtraDelaySeconds: number | null;
scopeType: WorkflowParams["config"]["scopeType"]; scopeType: WorkflowParams["config"]["scopeType"];
exclusions: WorkflowParams["config"]["exclude"]; exclusions: WorkflowParams["config"]["exclude"];
pageLimit: WorkflowParams["config"]["limit"]; pageLimit: WorkflowParams["config"]["limit"];
@ -99,6 +99,7 @@ type FormState = {
browserProfile: Profile | null; browserProfile: Profile | null;
tags: Tags; tags: Tags;
description: WorkflowParams["description"]; description: WorkflowParams["description"];
disableAutoscrollBehavior: boolean;
}; };
const getDefaultProgressState = (hasConfigId = false): ProgressState => { const getDefaultProgressState = (hasConfigId = false): ProgressState => {
@ -144,7 +145,9 @@ const getDefaultFormState = (): FormState => ({
includeLinkedPages: false, includeLinkedPages: false,
customIncludeUrlList: "", customIncludeUrlList: "",
crawlTimeoutMinutes: null, crawlTimeoutMinutes: null,
pageTimeoutMinutes: null, behaviorTimeoutSeconds: null,
pageLoadTimeoutSeconds: null,
pageExtraDelaySeconds: null,
scopeType: "host", scopeType: "host",
exclusions: [], exclusions: [],
pageLimit: undefined, pageLimit: undefined,
@ -165,6 +168,7 @@ const getDefaultFormState = (): FormState => ({
browserProfile: null, browserProfile: null,
tags: [], tags: [],
description: null, description: null,
disableAutoscrollBehavior: false,
}); });
const defaultProgressState = getDefaultProgressState(); const defaultProgressState = getDefaultProgressState();
const orderedTabNames = STEPS.filter( const orderedTabNames = STEPS.filter(
@ -191,8 +195,12 @@ const urlListToArray = flow(
(str: string) => (str.length ? str.trim().split(/\s+/g) : []), (str: string) => (str.length ? str.trim().split(/\s+/g) : []),
trimArray trimArray
); );
const DEFAULT_BEHAVIOR_TIMEOUT_MINUTES = 5; const DEFAULT_BEHAVIORS = [
const DEFAULT_MAX_PAGES_PER_CRAWL = Infinity; "autoscroll",
"autoplay",
"autofetch",
"siteSpecific",
];
@localized() @localized()
export class CrawlConfigEditor extends LiteElement { export class CrawlConfigEditor extends LiteElement {
@ -221,9 +229,10 @@ export class CrawlConfigEditor extends LiteElement {
private progressState!: ProgressState; private progressState!: ProgressState;
@state() @state()
private orgDefaults = { private orgDefaults?: {
behaviorTimeoutMinutes: DEFAULT_BEHAVIOR_TIMEOUT_MINUTES, behaviorTimeoutSeconds?: number;
maxPagesPerCrawl: DEFAULT_MAX_PAGES_PER_CRAWL, pageLoadTimeoutSeconds?: number;
maxPagesPerCrawl?: number;
}; };
@state() @state()
@ -394,8 +403,9 @@ export class CrawlConfigEditor extends LiteElement {
return null; return null;
} }
private getInitialFormState(): FormState | {} { private getInitialFormState(): FormState {
if (!this.initialWorkflow) return {}; const defaultFormState = getDefaultFormState();
if (!this.initialWorkflow) return defaultFormState;
const formState: Partial<FormState> = {}; const formState: Partial<FormState> = {};
const seedsConfig = this.initialWorkflow.config; const seedsConfig = this.initialWorkflow.config;
const { seeds } = seedsConfig; const { seeds } = seedsConfig;
@ -456,36 +466,46 @@ export class CrawlConfigEditor extends LiteElement {
if (this.initialWorkflow.tags?.length) { if (this.initialWorkflow.tags?.length) {
formState.tags = this.initialWorkflow.tags; formState.tags = this.initialWorkflow.tags;
} }
if (typeof this.initialWorkflow.crawlTimeout === "number") { const secondsToMinutes = (value: any, fallback: number | null) => {
formState.crawlTimeoutMinutes = this.initialWorkflow.crawlTimeout / 60; if (typeof value === "number" && value > 0) return value / 60;
} return fallback;
if (typeof seedsConfig.behaviorTimeout === "number") { };
formState.pageTimeoutMinutes = seedsConfig.behaviorTimeout / 60;
}
return { return {
primarySeedUrl: "", primarySeedUrl: defaultFormState.primarySeedUrl,
urlList: "", urlList: defaultFormState.urlList,
customIncludeUrlList: "", customIncludeUrlList: defaultFormState.customIncludeUrlList,
crawlTimeoutMinutes: null, crawlTimeoutMinutes: secondsToMinutes(
pageTimeoutMinutes: null, this.initialWorkflow.crawlTimeout,
defaultFormState.crawlTimeoutMinutes
),
behaviorTimeoutSeconds:
seedsConfig.behaviorTimeout ?? defaultFormState.behaviorTimeoutSeconds,
pageLoadTimeoutSeconds:
seedsConfig.pageLoadTimeout ?? defaultFormState.pageLoadTimeoutSeconds,
pageExtraDelaySeconds:
seedsConfig.pageExtraDelay ?? defaultFormState.pageExtraDelaySeconds,
scale: this.initialWorkflow.scale, scale: this.initialWorkflow.scale,
blockAds: this.initialWorkflow.config.blockAds, blockAds: this.initialWorkflow.config.blockAds,
lang: this.initialWorkflow.config.lang, lang: this.initialWorkflow.config.lang,
scheduleType: "none", scheduleType: defaultFormState.scheduleType,
runNow: false, scheduleFrequency: defaultFormState.scheduleFrequency,
runNow: defaultFormState.runNow,
tags: this.initialWorkflow.tags, tags: this.initialWorkflow.tags,
jobName: this.initialWorkflow.name || "", jobName: this.initialWorkflow.name || defaultFormState.jobName,
description: this.initialWorkflow.description, description: this.initialWorkflow.description,
browserProfile: this.initialWorkflow.profileid browserProfile: this.initialWorkflow.profileid
? ({ id: this.initialWorkflow.profileid } as Profile) ? ({ id: this.initialWorkflow.profileid } as Profile)
: null, : defaultFormState.browserProfile,
scopeType: primarySeedConfig.scopeType as FormState["scopeType"], scopeType: primarySeedConfig.scopeType as FormState["scopeType"],
exclusions: seedsConfig.exclude, exclusions: seedsConfig.exclude,
includeLinkedPages: Boolean( includeLinkedPages:
primarySeedConfig.extraHops || seedsConfig.extraHops Boolean(primarySeedConfig.extraHops || seedsConfig.extraHops) ?? true,
), pageLimit:
pageLimit: this.initialWorkflow.config.limit ?? undefined, this.initialWorkflow.config.limit ?? defaultFormState.pageLimit,
disableAutoscrollBehavior: this.initialWorkflow.config.behaviors
? !this.initialWorkflow.config.behaviors.includes("autoscroll")
: defaultFormState.disableAutoscrollBehavior,
...formState, ...formState,
}; };
} }
@ -1154,7 +1174,100 @@ https://archiveweb.page/images/${"logo.svg"}`}
urlListToArray(this.formState.urlList).length + urlListToArray(this.formState.urlList).length +
(this.jobType === "seed-crawl" ? 1 : 0) (this.jobType === "seed-crawl" ? 1 : 0)
); );
const onInputMinMax = async (e: CustomEvent) => {
const inputEl = e.target as SlInput;
await inputEl.updateComplete;
let helpText = "";
if (inputEl.invalid) {
const value = +inputEl.value;
const min = inputEl.min;
const max = inputEl.max;
if (min && value < +min) {
helpText = msg(
str`Must be more than minimum of ${(+min).toLocaleString()}`
);
} else if (max && value > +max) {
helpText = msg(
str`Must be less than maximum of ${(+max).toLocaleString()}`
);
}
}
inputEl.helpText = helpText;
};
return html` return html`
${this.renderSectionHeading(msg("Limit Per Page"))}
${this.renderFormCol(html`
<sl-input
name="pageLoadTimeoutSeconds"
type="number"
label=${msg("Page Load Timeout")}
placeholder=${this.orgDefaults?.pageLoadTimeoutSeconds
? msg(
str`Default: ${this.orgDefaults.pageLoadTimeoutSeconds.toLocaleString()}`
)
: "Default: Unlimited"}
value=${ifDefined(this.formState.pageLoadTimeoutSeconds ?? undefined)}
min="0"
@sl-input=${onInputMinMax}
>
<span slot="suffix">${msg("seconds")}</span>
</sl-input>
`)}
${this.renderHelpTextCol(
msg(
`Limits amount of time to wait for a page to load. Behaviors will run after this timeout only if the page is partially or fully loaded.`
)
)}
${this.renderFormCol(html`
<sl-input
name="behaviorTimeoutSeconds"
type="number"
label=${msg("Behavior Timeout")}
placeholder=${this.orgDefaults?.behaviorTimeoutSeconds
? msg(
str`Default: ${this.orgDefaults.behaviorTimeoutSeconds.toLocaleString()}`
)
: msg("Unlimited")}
value=${ifDefined(this.formState.behaviorTimeoutSeconds ?? undefined)}
min="0"
@sl-input=${onInputMinMax}
>
<span slot="suffix">${msg("seconds")}</span>
</sl-input>
`)}
${this.renderHelpTextCol(
msg(`Limits how long behaviors can run on each page.`)
)}
${this.renderFormCol(html`<sl-checkbox
name="disableAutoscrollBehavior"
?checked=${this.formState.disableAutoscrollBehavior}
>
${msg("Disable Auto-Scroll Behavior")}
</sl-checkbox>`)}
${this.renderHelpTextCol(
msg(
`Prevents browser from automatically scrolling until the end of the page.`
),
false
)}
${this.renderFormCol(html`
<sl-input
name="pageExtraDelaySeconds"
type="number"
label=${msg("Delay Before Next Page")}
placeholder=${"Default: 0"}
value=${ifDefined(this.formState.pageExtraDelaySeconds ?? undefined)}
min="0"
>
<span slot="suffix">${msg("seconds")}</span>
</sl-input>
`)}
${this.renderHelpTextCol(
msg(
`Waits on the page after behaviors are complete before moving onto the next page. Can be helpful for rate limiting.`
)
)}
${this.renderSectionHeading(msg("Limit Per Crawl"))}
${this.renderFormCol(html` ${this.renderFormCol(html`
<sl-mutation-observer <sl-mutation-observer
attr="min" attr="min"
@ -1176,35 +1289,20 @@ https://archiveweb.page/images/${"logo.svg"}`}
type="number" type="number"
value=${this.formState.pageLimit || ""} value=${this.formState.pageLimit || ""}
min=${minPages} min=${minPages}
max=${this.orgDefaults.maxPagesPerCrawl} max=${ifDefined(
placeholder=${this.orgDefaults.maxPagesPerCrawl === Infinity this.orgDefaults?.maxPagesPerCrawl &&
? msg("Unlimited") this.orgDefaults.maxPagesPerCrawl < Infinity
: msg( ? this.orgDefaults.maxPagesPerCrawl
str`Maximum Allowed (${this.orgDefaults.maxPagesPerCrawl.toLocaleString()})` : undefined
)} )}
@sl-input=${async (e: CustomEvent) => { placeholder=${this.orgDefaults?.maxPagesPerCrawl
const inputEl = e.target as SlInput; ? this.orgDefaults.maxPagesPerCrawl === Infinity
await inputEl.updateComplete; ? msg("Default: Unlimited")
let helpText = ""; : msg(
if (inputEl.invalid) { str`Default: ${this.orgDefaults.maxPagesPerCrawl.toLocaleString()}`
const value = +inputEl.value; )
if (value < minPages) { : ""}
helpText = @sl-input=${onInputMinMax}
minPages === 1
? msg(
str`Minimum ${minPages.toLocaleString()} page per crawl`
)
: msg(
str`Minimum ${minPages.toLocaleString()} pages per crawl`
);
} else if (value > this.orgDefaults.maxPagesPerCrawl) {
helpText = msg(
str`Maximum ${this.orgDefaults.maxPagesPerCrawl.toLocaleString()} pages per crawl`
);
}
}
inputEl.helpText = helpText;
}}
> >
<span slot="suffix">${msg("pages")}</span> <span slot="suffix">${msg("pages")}</span>
</sl-input> </sl-input>
@ -1214,33 +1312,12 @@ https://archiveweb.page/images/${"logo.svg"}`}
msg(`Adds a hard limit on the number of pages msg(`Adds a hard limit on the number of pages
that will be crawled.`) that will be crawled.`)
)} )}
${this.renderFormCol(html`
<sl-input
name="pageTimeoutMinutes"
type="number"
label=${msg("Page Time Limit")}
placeholder=${msg("Unlimited")}
value=${ifDefined(
this.formState.pageTimeoutMinutes ??
this.orgDefaults.behaviorTimeoutMinutes
)}
?disabled=${this.orgDefaults.behaviorTimeoutMinutes === undefined}
min="1"
required
>
<span slot="suffix">${msg("minutes")}</span>
</sl-input>
`)}
${this.renderHelpTextCol(
msg(`Adds a hard time limit for how long the crawler can spend on a
single webpage.`)
)}
${this.renderFormCol(html` ${this.renderFormCol(html`
<sl-input <sl-input
name="crawlTimeoutMinutes" name="crawlTimeoutMinutes"
label=${msg("Crawl Time Limit")} label=${msg("Crawl Time Limit")}
value=${this.formState.crawlTimeoutMinutes || ""} value=${this.formState.crawlTimeoutMinutes || ""}
placeholder=${msg("Unlimited")} placeholder=${msg("Default: Unlimited")}
min="0" min="0"
type="number" type="number"
> >
@ -1549,8 +1626,8 @@ https://archiveweb.page/images/${"logo.svg"}`}
? msg( ? msg(
"There are issues with this Workflow. Please go through previous steps and fix all issues to continue." "There are issues with this Workflow. Please go through previous steps and fix all issues to continue."
) )
: msg(html`There is an issue with this Workflow:<br /><br />Crawl : msg(html`There is an issue with this Workflow:<br /><br />Crawl URL(s)
URL(s) required in required in
<a href="${crawlSetupUrl}" class="bold underline hover:no-underline" <a href="${crawlSetupUrl}" class="bold underline hover:no-underline"
>Crawl Setup</a >Crawl Setup</a
>. <br /><br /> >. <br /><br />
@ -1953,21 +2030,27 @@ https://archiveweb.page/images/${"logo.svg"}`}
...(this.jobType === "seed-crawl" ...(this.jobType === "seed-crawl"
? this.parseSeededConfig() ? this.parseSeededConfig()
: this.parseUrlListConfig()), : this.parseUrlListConfig()),
behaviorTimeout: behaviorTimeout: +(this.formState.behaviorTimeoutSeconds || ""),
(this.formState.pageTimeoutMinutes ?? pageLoadTimeout: +(this.formState.pageLoadTimeoutSeconds || ""),
this.orgDefaults.behaviorTimeoutMinutes ?? pageExtraDelay: +(this.formState.pageExtraDelaySeconds || ""),
DEFAULT_BEHAVIOR_TIMEOUT_MINUTES) * 60,
limit: this.formState.pageLimit ? +this.formState.pageLimit : undefined, limit: this.formState.pageLimit ? +this.formState.pageLimit : undefined,
lang: this.formState.lang || "", lang: this.formState.lang || "",
blockAds: this.formState.blockAds, blockAds: this.formState.blockAds,
exclude: trimArray(this.formState.exclusions), exclude: trimArray(this.formState.exclusions),
behaviors: (this.formState.disableAutoscrollBehavior
? DEFAULT_BEHAVIORS.slice(1)
: DEFAULT_BEHAVIORS
).join(","),
}, },
}; };
return config; return config;
} }
private parseUrlListConfig(): NewCrawlConfigParams["config"] { private parseUrlListConfig(): Pick<
NewCrawlConfigParams["config"],
"seeds" | "scopeType" | "extraHops"
> {
const config = { const config = {
seeds: urlListToArray(this.formState.urlList).map((seedUrl) => { seeds: urlListToArray(this.formState.urlList).map((seedUrl) => {
const newSeed: Seed = { url: seedUrl, scopeType: "page" }; const newSeed: Seed = { url: seedUrl, scopeType: "page" };
@ -1980,7 +2063,10 @@ https://archiveweb.page/images/${"logo.svg"}`}
return config; return config;
} }
private parseSeededConfig(): NewCrawlConfigParams["config"] { private parseSeededConfig(): Pick<
NewCrawlConfigParams["config"],
"seeds" | "scopeType"
> {
const primarySeedUrl = this.formState.primarySeedUrl; const primarySeedUrl = this.formState.primarySeedUrl;
const includeUrlList = this.formState.customIncludeUrlList const includeUrlList = this.formState.customIncludeUrlList
? urlListToArray(this.formState.customIncludeUrlList) ? urlListToArray(this.formState.customIncludeUrlList)
@ -2003,7 +2089,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
: [], : [],
extraHops: this.formState.includeLinkedPages ? 1 : 0, extraHops: this.formState.includeLinkedPages ? 1 : 0,
}; };
const config: SeedConfig = { const config = {
seeds: [primarySeed, ...additionalSeedUrlList], seeds: [primarySeed, ...additionalSeedUrlList],
scopeType: additionalSeedUrlList.length scopeType: additionalSeedUrlList.length
? "page" ? "page"
@ -2043,7 +2129,6 @@ https://archiveweb.page/images/${"logo.svg"}`}
} }
private async fetchAPIDefaults() { private async fetchAPIDefaults() {
const orgDefaults = { ...this.orgDefaults };
try { try {
const resp = await fetch("/api/settings", { const resp = await fetch("/api/settings", {
headers: { "Content-Type": "application/json" }, headers: { "Content-Type": "application/json" },
@ -2051,18 +2136,23 @@ https://archiveweb.page/images/${"logo.svg"}`}
if (!resp.ok) { if (!resp.ok) {
throw new Error(resp.statusText); throw new Error(resp.statusText);
} }
const orgDefaults = {
...this.orgDefaults,
};
const data = await resp.json(); const data = await resp.json();
if (data.defaultBehaviorTimeSeconds) { if (data.defaultBehaviorTimeSeconds > 0) {
orgDefaults.behaviorTimeoutMinutes = orgDefaults.behaviorTimeoutSeconds = data.defaultBehaviorTimeSeconds;
data.defaultBehaviorTimeSeconds / 60; }
if (data.defaultPageLoadTimeSeconds > 0) {
orgDefaults.pageLoadTimeoutSeconds = data.defaultPageLoadTimeSeconds;
} }
if (data.maxPagesPerCrawl > 0) { if (data.maxPagesPerCrawl > 0) {
orgDefaults.maxPagesPerCrawl = data.maxPagesPerCrawl; orgDefaults.maxPagesPerCrawl = data.maxPagesPerCrawl;
} }
this.orgDefaults = orgDefaults;
} catch (e: any) { } catch (e: any) {
console.debug(e); console.debug(e);
} }
this.orgDefaults = orgDefaults;
} }
} }

View File

@ -20,6 +20,9 @@ const defaultValue = {
seeds: [], seeds: [],
scopeType: "prefix", scopeType: "prefix",
exclude: [""], exclude: [""],
behaviorTimeout: null,
pageLoadTimeout: null,
pageExtraDelay: null,
}, },
tags: [], tags: [],
crawlTimeout: null, crawlTimeout: null,

View File

@ -20,10 +20,12 @@ export type SeedConfig = Pick<
Seed, Seed,
"scopeType" | "include" | "exclude" | "limit" | "extraHops" "scopeType" | "include" | "exclude" | "limit" | "extraHops"
> & { > & {
seeds: (Seed)[]; seeds: Seed[];
lang?: string | null; lang?: string | null;
blockAds?: boolean; blockAds?: boolean;
behaviorTimeout?: number | null; behaviorTimeout: number | null;
pageLoadTimeout: number | null;
pageExtraDelay: number | null;
behaviors?: string | null; behaviors?: string | null;
extraHops?: number | null; extraHops?: number | null;
}; };