Add post load delay to Browsertrix (#1700)
Fixes #1699 Adds post load delay to: - Backend `RawCrawlConfig` model - Frontend (workflow editor and config details component) - Workflow setup docs
This commit is contained in:
parent
9609ff4194
commit
80008a2853
@ -252,6 +252,7 @@ class RawCrawlConfig(BaseModel):
|
||||
behaviorTimeout: Optional[int]
|
||||
pageLoadTimeout: Optional[int]
|
||||
pageExtraDelay: Optional[int] = 0
|
||||
postLoadDelay: Optional[int] = 0
|
||||
|
||||
workers: Optional[int] = None
|
||||
|
||||
|
@ -134,6 +134,10 @@ Increasing the amount of crawler instances will speed up crawls by using additio
|
||||
|
||||
Limits amount of elapsed time to wait for a page to load. Behaviors will run after this timeout only if the page is partially or fully loaded.
|
||||
|
||||
### Delay After Page Load
|
||||
|
||||
Waits on the page after initial HTML page load for a set number of seconds prior to moving on to next steps such as link extraction and behaviors. Can be useful with pages that are slow to load page contents.
|
||||
|
||||
### Behavior Timeout
|
||||
|
||||
Limits amount of elapsed time behaviors have to complete.
|
||||
|
@ -164,6 +164,10 @@ export class ConfigDetails extends LiteElement {
|
||||
this.orgDefaults?.pageLoadTimeoutSeconds ?? Infinity,
|
||||
),
|
||||
)}
|
||||
${this.renderSetting(
|
||||
msg("Delay After Page Load"),
|
||||
renderTimeLimit(crawlConfig?.config.postLoadDelay, 0),
|
||||
)}
|
||||
${this.renderSetting(
|
||||
msg("Page Behavior Timeout"),
|
||||
renderTimeLimit(
|
||||
|
@ -105,6 +105,7 @@ type FormState = {
|
||||
behaviorTimeoutSeconds: number | null;
|
||||
pageLoadTimeoutSeconds: number | null;
|
||||
pageExtraDelaySeconds: number | null;
|
||||
postLoadDelaySeconds: number | null;
|
||||
maxCrawlSizeGB: number;
|
||||
maxScopeDepth: number | null;
|
||||
scopeType: WorkflowParams["config"]["scopeType"];
|
||||
@ -184,6 +185,7 @@ const getDefaultFormState = (): FormState => ({
|
||||
behaviorTimeoutSeconds: null,
|
||||
pageLoadTimeoutSeconds: null,
|
||||
pageExtraDelaySeconds: null,
|
||||
postLoadDelaySeconds: null,
|
||||
maxScopeDepth: null,
|
||||
scopeType: "host",
|
||||
exclusions: [],
|
||||
@ -580,6 +582,8 @@ export class CrawlConfigEditor extends LiteElement {
|
||||
seedsConfig.pageLoadTimeout ?? defaultFormState.pageLoadTimeoutSeconds,
|
||||
pageExtraDelaySeconds:
|
||||
seedsConfig.pageExtraDelay ?? defaultFormState.pageExtraDelaySeconds,
|
||||
postLoadDelaySeconds:
|
||||
seedsConfig.postLoadDelay ?? defaultFormState.postLoadDelaySeconds,
|
||||
maxScopeDepth: primarySeedConfig.depth ?? defaultFormState.maxScopeDepth,
|
||||
scale: this.initialWorkflow.scale,
|
||||
blockAds: this.initialWorkflow.config.blockAds,
|
||||
@ -1585,6 +1589,24 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
`Limits amount of time to wait for a page to load. Behaviors will run after this timeout only if the page is partially or fully loaded.`,
|
||||
),
|
||||
)}
|
||||
${this.renderFormCol(html`
|
||||
<sl-input
|
||||
name="postLoadDelaySeconds"
|
||||
type="number"
|
||||
inputmode="numeric"
|
||||
label=${msg("Delay After Page Load")}
|
||||
placeholder=${"Default: 0"}
|
||||
value=${ifDefined(this.formState.postLoadDelaySeconds ?? undefined)}
|
||||
min="0"
|
||||
>
|
||||
<span slot="suffix">${msg("seconds")}</span>
|
||||
</sl-input>
|
||||
`)}
|
||||
${this.renderHelpTextCol(
|
||||
msg(
|
||||
`Waits on the page after initial HTML page load prior to moving on to next steps such as link extraction and behaviors. Can be useful with pages that are slow to load page contents.`,
|
||||
),
|
||||
)}
|
||||
${this.renderFormCol(html`
|
||||
<sl-input
|
||||
name="behaviorTimeoutSeconds"
|
||||
@ -2445,6 +2467,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
behaviorTimeout: this.formState.behaviorTimeoutSeconds,
|
||||
pageLoadTimeout: this.formState.pageLoadTimeoutSeconds,
|
||||
pageExtraDelay: this.formState.pageExtraDelaySeconds,
|
||||
postLoadDelay: this.formState.postLoadDelaySeconds,
|
||||
userAgent: this.formState.userAgent,
|
||||
limit: this.formState.pageLimit,
|
||||
lang: this.formState.lang || "",
|
||||
|
@ -24,6 +24,7 @@ const defaultValue = {
|
||||
behaviorTimeout: null,
|
||||
pageLoadTimeout: null,
|
||||
pageExtraDelay: null,
|
||||
postLoadDelay: null,
|
||||
useSitemap: false,
|
||||
failOnFailedSeed: false,
|
||||
userAgent: null,
|
||||
|
@ -26,6 +26,7 @@ export type SeedConfig = Expand<
|
||||
behaviorTimeout: number | null;
|
||||
pageLoadTimeout: number | null;
|
||||
pageExtraDelay: number | null;
|
||||
postLoadDelay: number | null;
|
||||
behaviors?: string | null;
|
||||
extraHops?: number | null;
|
||||
useSitemap?: boolean;
|
||||
|
Loading…
Reference in New Issue
Block a user