Reorder Limits section (#966)
* Reorder Limits section - Minor text change to section names - "Limit Per Page" → "Per-Page Limits" - "Limit Per Crawl" → "Per-Crawl Limits" * Reorder limits section in documentation
This commit is contained in:
parent
fd310f620a
commit
d9e73fcbc3
@ -98,6 +98,18 @@ This can be useful for avoiding crawler traps — sites that may automatically g
|
||||
|
||||
## Limits
|
||||
|
||||
### Max Pages
|
||||
|
||||
Adds a hard limit on the number of pages that will be crawled. The crawl will be gracefully stopped after this limit is reached.
|
||||
|
||||
### Crawl Time Limit
|
||||
|
||||
The crawl will be gracefully stopped after this set period of time.
|
||||
|
||||
### Crawler Instances
|
||||
|
||||
Increasing the amount of crawler instances will speed up crawls by using additional browser windows to capture more pages in parallel. This will also increase the amount of traffic sent to the website and may result in a higher chance of getting rate limited.
|
||||
|
||||
### Page Load Timeout
|
||||
|
||||
Limits amount of time to wait for a page to load. Behaviors will run after this timeout only if the page is partially or fully loaded.
|
||||
@ -114,18 +126,6 @@ When enabled, the browser will automatically scroll to the end of the page.
|
||||
|
||||
Waits on the page for a set period of time after any behaviors have finished running. This can be helpful to avoid rate limiting however it will slow down your crawl.
|
||||
|
||||
### Max Pages
|
||||
|
||||
Adds a hard limit on the number of pages that will be crawled. The crawl will be gracefully stopped after this limit is reached.
|
||||
|
||||
### Crawl Time Limit
|
||||
|
||||
The crawl will be gracefully stopped after this set period of time.
|
||||
|
||||
### Crawler Instances
|
||||
|
||||
Increasing the amount of crawler instances will speed up crawls by using additional browser windows to capture more pages in parallel. This will also increase the amount of traffic sent to the website and may result in a higher chance of getting rate limited.
|
||||
|
||||
## Browser Settings
|
||||
|
||||
### Browser Profile
|
||||
|
@ -1236,82 +1236,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
inputEl.helpText = helpText;
|
||||
};
|
||||
return html`
|
||||
${this.renderSectionHeading(msg("Limit Per Page"))}
|
||||
${this.renderFormCol(html`
|
||||
<sl-input
|
||||
name="pageLoadTimeoutSeconds"
|
||||
type="number"
|
||||
inputmode="numeric"
|
||||
label=${msg("Page Load Timeout")}
|
||||
placeholder=${this.orgDefaults?.pageLoadTimeoutSeconds
|
||||
? msg(
|
||||
str`Default: ${this.orgDefaults.pageLoadTimeoutSeconds.toLocaleString()}`
|
||||
)
|
||||
: "Default: Unlimited"}
|
||||
value=${ifDefined(this.formState.pageLoadTimeoutSeconds ?? undefined)}
|
||||
min="0"
|
||||
@sl-input=${onInputMinMax}
|
||||
>
|
||||
<span slot="suffix">${msg("seconds")}</span>
|
||||
</sl-input>
|
||||
`)}
|
||||
${this.renderHelpTextCol(
|
||||
msg(
|
||||
`Limits amount of time to wait for a page to load. Behaviors will run after this timeout only if the page is partially or fully loaded.`
|
||||
)
|
||||
)}
|
||||
${this.renderFormCol(html`
|
||||
<sl-input
|
||||
name="behaviorTimeoutSeconds"
|
||||
type="number"
|
||||
inputmode="numeric"
|
||||
label=${msg("Behavior Timeout")}
|
||||
placeholder=${this.orgDefaults?.behaviorTimeoutSeconds
|
||||
? msg(
|
||||
str`Default: ${this.orgDefaults.behaviorTimeoutSeconds.toLocaleString()}`
|
||||
)
|
||||
: msg("Unlimited")}
|
||||
value=${ifDefined(this.formState.behaviorTimeoutSeconds ?? undefined)}
|
||||
min="0"
|
||||
@sl-input=${onInputMinMax}
|
||||
>
|
||||
<span slot="suffix">${msg("seconds")}</span>
|
||||
</sl-input>
|
||||
`)}
|
||||
${this.renderHelpTextCol(
|
||||
msg(`Limits how long behaviors can run on each page.`)
|
||||
)}
|
||||
${this.renderFormCol(html`<sl-checkbox
|
||||
name="autoscrollBehavior"
|
||||
?checked=${this.formState.autoscrollBehavior}
|
||||
>
|
||||
${msg("Auto-Scroll Behavior")}
|
||||
</sl-checkbox>`)}
|
||||
${this.renderHelpTextCol(
|
||||
msg(
|
||||
`When enabled the browser will automatically scroll to the end of the page.`
|
||||
),
|
||||
false
|
||||
)}
|
||||
${this.renderFormCol(html`
|
||||
<sl-input
|
||||
name="pageExtraDelaySeconds"
|
||||
type="number"
|
||||
inputmode="numeric"
|
||||
label=${msg("Delay Before Next Page")}
|
||||
placeholder=${"Default: 0"}
|
||||
value=${ifDefined(this.formState.pageExtraDelaySeconds ?? undefined)}
|
||||
min="0"
|
||||
>
|
||||
<span slot="suffix">${msg("seconds")}</span>
|
||||
</sl-input>
|
||||
`)}
|
||||
${this.renderHelpTextCol(
|
||||
msg(
|
||||
`Waits on the page after behaviors are complete before moving onto the next page. Can be helpful for rate limiting.`
|
||||
)
|
||||
)}
|
||||
${this.renderSectionHeading(msg("Limit Per Crawl"))}
|
||||
${this.renderSectionHeading(msg("Per-Crawl Limits"))}
|
||||
${this.renderFormCol(html`
|
||||
<sl-mutation-observer
|
||||
attr="min"
|
||||
@ -1392,6 +1317,81 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
msg(`Increasing parallel crawler instances can speed up crawls, but may
|
||||
increase the chances of getting rate limited.`)
|
||||
)}
|
||||
${this.renderSectionHeading(msg("Per-Page Limits"))}
|
||||
${this.renderFormCol(html`
|
||||
<sl-input
|
||||
name="pageLoadTimeoutSeconds"
|
||||
type="number"
|
||||
inputmode="numeric"
|
||||
label=${msg("Page Load Timeout")}
|
||||
placeholder=${this.orgDefaults?.pageLoadTimeoutSeconds
|
||||
? msg(
|
||||
str`Default: ${this.orgDefaults.pageLoadTimeoutSeconds.toLocaleString()}`
|
||||
)
|
||||
: "Default: Unlimited"}
|
||||
value=${ifDefined(this.formState.pageLoadTimeoutSeconds ?? undefined)}
|
||||
min="0"
|
||||
@sl-input=${onInputMinMax}
|
||||
>
|
||||
<span slot="suffix">${msg("seconds")}</span>
|
||||
</sl-input>
|
||||
`)}
|
||||
${this.renderHelpTextCol(
|
||||
msg(
|
||||
`Limits amount of time to wait for a page to load. Behaviors will run after this timeout only if the page is partially or fully loaded.`
|
||||
)
|
||||
)}
|
||||
${this.renderFormCol(html`
|
||||
<sl-input
|
||||
name="behaviorTimeoutSeconds"
|
||||
type="number"
|
||||
inputmode="numeric"
|
||||
label=${msg("Behavior Timeout")}
|
||||
placeholder=${this.orgDefaults?.behaviorTimeoutSeconds
|
||||
? msg(
|
||||
str`Default: ${this.orgDefaults.behaviorTimeoutSeconds.toLocaleString()}`
|
||||
)
|
||||
: msg("Unlimited")}
|
||||
value=${ifDefined(this.formState.behaviorTimeoutSeconds ?? undefined)}
|
||||
min="0"
|
||||
@sl-input=${onInputMinMax}
|
||||
>
|
||||
<span slot="suffix">${msg("seconds")}</span>
|
||||
</sl-input>
|
||||
`)}
|
||||
${this.renderHelpTextCol(
|
||||
msg(`Limits how long behaviors can run on each page.`)
|
||||
)}
|
||||
${this.renderFormCol(html`<sl-checkbox
|
||||
name="autoscrollBehavior"
|
||||
?checked=${this.formState.autoscrollBehavior}
|
||||
>
|
||||
${msg("Auto-Scroll Behavior")}
|
||||
</sl-checkbox>`)}
|
||||
${this.renderHelpTextCol(
|
||||
msg(
|
||||
`When enabled the browser will automatically scroll to the end of the page.`
|
||||
),
|
||||
false
|
||||
)}
|
||||
${this.renderFormCol(html`
|
||||
<sl-input
|
||||
name="pageExtraDelaySeconds"
|
||||
type="number"
|
||||
inputmode="numeric"
|
||||
label=${msg("Delay Before Next Page")}
|
||||
placeholder=${"Default: 0"}
|
||||
value=${ifDefined(this.formState.pageExtraDelaySeconds ?? undefined)}
|
||||
min="0"
|
||||
>
|
||||
<span slot="suffix">${msg("seconds")}</span>
|
||||
</sl-input>
|
||||
`)}
|
||||
${this.renderHelpTextCol(
|
||||
msg(
|
||||
`Waits on the page after behaviors are complete before moving onto the next page. Can be helpful for rate limiting.`
|
||||
)
|
||||
)}
|
||||
`;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user