feat: Add behaviors section to workflow form (#2464)

- Moves "Per-Page Limits" fields to new "Page Behavior" section
- Fixes workflow settings closing tags with refactor to how sections are
rendered
- Updates user guide with behaviors documentation

---------

Co-authored-by: Henry Wilkinson <henry@wilkinson.graphics>
This commit is contained in:
sua yoo 2025-03-11 11:40:20 -07:00 committed by GitHub
parent a42d83c9f6
commit ac1236f15b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 272 additions and 219 deletions

View File

@ -12,7 +12,7 @@ The status of an archived item depends on its type. Uploads will always have the
| Status | Description |
| ---- | ---- |
| <span class="status-success">:bootstrap-check-circle-fill: Complete</span> | The crawl completed according to the workflow's settings. Workflows with [limits](workflow-setup.md#limits) set may stop running before they capture every queued page, but the resulting archived item will still be marked as "Complete". |
| <span class="status-success">:bootstrap-check-circle-fill: Complete</span> | The crawl completed according to the workflow's settings. Workflows with [crawl limits](workflow-setup.md#crawl-limits) set may stop running before they capture every queued page, but the resulting archived item will still be marked as "Complete". |
| <span class="status-neutral">:bootstrap-dash-square-fill: Stopped</span> | The crawl workflow was _stopped_ gracefully by a user and data is saved. |
| <span class="status-neutral">:bootstrap-exclamation-square-fill: Stopped: Reason</span> | A workflow limit (listed as the reason) was reached and data is saved. |
| <span class="status-warning">:bootstrap-x-octagon-fill: Canceled</span> | The crawl workflow was _canceled_ by a user, no data is saved. |

View File

@ -129,7 +129,7 @@ This can be useful for avoiding crawler traps — sites that may automatically g
e.g: If `#!regex \babout\/?\b` is entered, `example.com/about/` will not be crawled however `example.com/aboutme/` will be crawled.
## Limits
## Crawl Limits
Enforce maximum limits on your crawl.
@ -145,7 +145,36 @@ The crawl will be gracefully stopped after this set period of elapsed time.
The crawl will be gracefully stopped after reaching this set size in GB.
### Page Load Timeout
## Page Behavior
Customize how and when the browser performs specific operations on a page.
**Built-in Behaviors**
Behaviors are browser operations that can be enabled for additional page interactivity.
### Autoscroll
When enabled, the browser will automatically scroll to the end of the page.
### Autoclick
When enabled, the browser will automatically click on all link-like elements.
When clicking a link-like element that would normally result in navigation, autoclick will only record the click and prevent navigation away from the current page.
??? Info "Autoclick use cases"
This behavior can be helpful for:
- Websites that use anchor links (`<a>`) in non-standard ways, such as by using JavaScript in place of the standard `href` attribute to create a hyperlink.
- Websites that use `<a>` in place of a `<button>` to reveal in-page content.
**Page Timing**
Page timing gives you more granular control over how long the browser should stay on a page and when behaviors should run on a page. Add limits to decrease the amount of time the browser spends on a page, and add delays to increase the amount of time the browser waits on a page. Adding delays will increase the total amount of time spent on a crawl and may impact your overall crawl minutes.
### Page Load Limit
Limits amount of elapsed time to wait for a page to load. Behaviors will run after this timeout only if the page is partially or fully loaded.
@ -153,23 +182,13 @@ Limits amount of elapsed time to wait for a page to load. Behaviors will run aft
Waits on the page after initial HTML page load for a set number of seconds prior to moving on to next steps such as link extraction and behaviors. Can be useful with pages that are slow to load page contents.
### Behavior Timeout
### Behavior Limit
Limits amount of elapsed time behaviors have to complete.
### Autoscroll Behavior
When enabled, the browser will automatically scroll to the end of the page.
### Autoclick Behavior
When enabled, the browser will automatically click on all links, even if they're empty or don't navigate to another page.
This can be helpful for web applications that use JavaScript to handle navigation and don't link to things properly with `href=""` attributes.
### Delay Before Next Page
Waits on the page for a set period of elapsed time after any behaviors have finished running. This can be helpful to avoid rate limiting however it will slow down your crawl.
Waits on the page for a set number of seconds before unloading the current page. If any [behaviors](#autoscroll) are enabled, this delay will take place after all behaviors have finished running. This can be helpful to avoid rate limiting.
## Browser Settings

View File

@ -1,6 +1,6 @@
import { localized, msg, str } from "@lit/localize";
import ISO6391 from "iso-639-1";
import { html, nothing } from "lit";
import { html, nothing, type TemplateResult } from "lit";
import { customElement, property, state } from "lit/decorators.js";
import { when } from "lit/directives/when.js";
import { html as staticHtml, unsafeStatic } from "lit/static-html.js";
@ -9,6 +9,7 @@ import RegexColorize from "regex-colorize";
import { BtrixElement } from "@/classes/BtrixElement";
import type { CrawlConfig, Seed, SeedConfig } from "@/pages/org/types";
import { labelFor } from "@/strings/crawl-workflows/labels";
import scopeTypeLabel from "@/strings/crawl-workflows/scopeType";
import sectionStrings from "@/strings/crawl-workflows/section";
import type { Collection } from "@/types/collection";
@ -61,7 +62,6 @@ export class ConfigDetails extends BtrixElement {
render() {
const crawlConfig = this.crawlConfig;
const seedsConfig = crawlConfig?.config;
const renderTimeLimit = (
valueSeconds?: number | null,
fallbackValue?: number,
@ -99,12 +99,11 @@ export class ConfigDetails extends BtrixElement {
};
return html`
<section id="crawler-settings" class="mb-8">
<btrix-section-heading style="--margin: var(--sl-spacing-medium)">
<h4>${sectionStrings.scope}</h4>
</btrix-section-heading>
<btrix-desc-list>
${when(
${this.renderSection({
id: "crawler-settings",
heading: sectionStrings.scope,
renderDescItems: (seedsConfig) =>
when(
seedsConfig,
(config) => html`
${this.renderSetting(
@ -121,10 +120,12 @@ export class ConfigDetails extends BtrixElement {
? this.renderConfirmUrlListSettings(config)
: this.renderConfirmSeededSettings(config)}
`,
)}
<btrix-section-heading style="--margin: var(--sl-spacing-medium)">
<h4>${sectionStrings.perCrawlLimits}</h4>
</btrix-section-heading>
),
})}
${this.renderSection({
id: "crawl-limits",
heading: sectionStrings.limits,
renderDescItems: (seedsConfig) => html`
${this.renderSetting(
msg("Max Pages"),
when(seedsConfig && this.seeds, (seeds) => {
@ -148,62 +149,64 @@ export class ConfigDetails extends BtrixElement {
)}
${this.renderSetting(
msg("Crawl Time Limit"),
renderTimeLimit(crawlConfig?.crawlTimeout, Infinity),
renderTimeLimit(this.crawlConfig?.crawlTimeout, Infinity),
)}
${this.renderSetting(
msg("Crawl Size Limit"),
renderSize(crawlConfig?.maxCrawlSize),
renderSize(this.crawlConfig?.maxCrawlSize),
)}
<btrix-section-heading style="--margin: var(--sl-spacing-medium)">
<h4>${sectionStrings.perPageLimits}</h4>
</btrix-section-heading>
`,
})}
${this.renderSection({
id: "browser-behaviors",
heading: sectionStrings.behaviors,
renderDescItems: (seedsConfig) => html`
${this.renderSetting(
msg("Page Load Timeout"),
renderTimeLimit(
crawlConfig?.config.pageLoadTimeout,
this.orgDefaults?.pageLoadTimeoutSeconds ?? Infinity,
),
)}
${this.renderSetting(
msg("Delay After Page Load"),
renderTimeLimit(crawlConfig?.config.postLoadDelay, 0),
)}
${this.renderSetting(
msg("Behavior Timeout"),
renderTimeLimit(
crawlConfig?.config.behaviorTimeout,
this.orgDefaults?.behaviorTimeoutSeconds ?? Infinity,
),
)}
${this.renderSetting(
msg("Autoscroll Behavior"),
crawlConfig?.config.behaviors &&
!crawlConfig.config.behaviors.includes("autoscroll")
labelFor.autoscrollBehavior,
seedsConfig?.behaviors &&
!seedsConfig.behaviors.includes("autoscroll")
? msg("Disabled")
: html`<span class="text-neutral-400"
>${msg("Enabled (default)")}</span
>`,
)}
${this.renderSetting(
msg("Autoclick Behavior"),
crawlConfig?.config.behaviors &&
crawlConfig.config.behaviors.includes("autoclick")
labelFor.autoclickBehavior,
seedsConfig?.behaviors &&
seedsConfig.behaviors.includes("autoclick")
? msg("Enabled")
: html`<span class="text-neutral-400"
>${msg("Disabled (default)")}</span
>`,
)}
${this.renderSetting(
msg("Delay Before Next Page"),
renderTimeLimit(crawlConfig?.config.pageExtraDelay, 0),
labelFor.pageLoadTimeoutSeconds,
renderTimeLimit(
seedsConfig?.pageLoadTimeout,
this.orgDefaults?.pageLoadTimeoutSeconds ?? Infinity,
),
)}
</btrix-desc-list>
</section>
<section id="browser-settings" class="mb-8">
<btrix-section-heading style="--margin: var(--sl-spacing-medium)">
<h4>${sectionStrings.browserSettings}</h4>
</btrix-section-heading>
<btrix-desc-list>
${this.renderSetting(
labelFor.pageLoadTimeoutSeconds,
renderTimeLimit(seedsConfig?.postLoadDelay, 0),
)}
${this.renderSetting(
labelFor.behaviorTimeoutSeconds,
renderTimeLimit(
seedsConfig?.behaviorTimeout,
this.orgDefaults?.behaviorTimeoutSeconds ?? Infinity,
),
)}
${this.renderSetting(
labelFor.pageExtraDelaySeconds,
renderTimeLimit(seedsConfig?.pageExtraDelay, 0),
)}
`,
})}
${this.renderSection({
id: "browser-settings",
heading: sectionStrings.browserSettings,
renderDescItems: (seedsConfig) => html`
${this.renderSetting(
msg("Browser Profile"),
when(
@ -238,32 +241,31 @@ export class ConfigDetails extends BtrixElement {
)}
${this.renderSetting(
msg("Block Ads by Domain"),
crawlConfig?.config.blockAds,
seedsConfig?.blockAds,
)}
${this.renderSetting(
msg("User Agent"),
crawlConfig?.config.userAgent
? crawlConfig.config.userAgent
seedsConfig?.userAgent
? seedsConfig.userAgent
: html`<span class="text-neutral-400"
>${msg("Browser User Agent (default)")}</span
>`,
)}
${crawlConfig?.config.lang
${seedsConfig?.lang
? this.renderSetting(
msg("Language"),
ISO6391.getName(crawlConfig.config.lang),
ISO6391.getName(seedsConfig.lang),
)
: nothing}
${crawlConfig?.proxyId
? this.renderSetting(msg("Proxy"), capitalize(crawlConfig.proxyId))
: nothing}
</btrix-desc-list>
</section>
<section id="crawl-scheduling" class="mb-8">
<btrix-section-heading style="--margin: var(--sl-spacing-medium)">
<h4>${sectionStrings.scheduling}</h4>
</btrix-section-heading>
<btrix-desc-list>
`,
})}
${this.renderSection({
id: "crawl-scheduling",
heading: sectionStrings.scheduling,
renderDescItems: () => html`
${this.renderSetting(
msg("Crawl Schedule Type"),
crawlConfig?.schedule
@ -278,54 +280,72 @@ export class ConfigDetails extends BtrixElement {
: undefined,
),
)}
`,
})}
${when(!this.hideMetadata, () =>
this.renderSection({
id: "crawl-metadata",
heading: sectionStrings.metadata,
renderDescItems: () => html`
${this.renderSetting(msg("Name"), crawlConfig?.name)}
${this.renderSetting(
msg("Description"),
crawlConfig?.description
? html`
<p class="max-w-prose font-sans">
${crawlConfig.description}
</p>
`
: undefined,
)}
${this.renderSetting(
msg("Tags"),
crawlConfig?.tags.length
? crawlConfig.tags.map(
(tag) =>
html`<btrix-tag class="mr-2 mt-1">${tag}</btrix-tag>`,
)
: [],
)}
${this.renderSetting(
msg("Collections"),
this.collections.length
? this.collections.map(
(coll) =>
html`<sl-tag class="mr-2 mt-1" variant="neutral">
${coll.name}
<span class="font-monostyle pl-1 text-xs">
(${this.localize.number(coll.crawlCount)}
${pluralOf("items", coll.crawlCount)})
</span>
</sl-tag>`,
)
: undefined,
)}
`,
}),
)}
`;
}
private renderSection({
id,
heading,
renderDescItems,
}: {
id: string;
heading: string;
renderDescItems: (seedsConfig?: CrawlConfig["config"]) => TemplateResult;
}) {
return html`
<section id=${id} class="mb-8">
<btrix-section-heading style="--margin: var(--sl-spacing-medium)">
<h4>${heading}</h4>
</btrix-section-heading>
<btrix-desc-list>
${renderDescItems(this.crawlConfig?.config)}
</btrix-desc-list>
</section>
${this.hideMetadata
? nothing
: html`
<section id="crawl-metadata" class="mb-8">
<btrix-section-heading style="--margin: var(--sl-spacing-medium)">
<h4>${msg("Metadata")}</h4>
</btrix-section-heading>
<btrix-desc-list>
${this.renderSetting(msg("Name"), crawlConfig?.name)}
${this.renderSetting(
msg("Description"),
crawlConfig?.description
? html`
<p class="max-w-prose font-sans">
${crawlConfig.description}
</p>
`
: undefined,
)}
${this.renderSetting(
msg("Tags"),
crawlConfig?.tags.length
? crawlConfig.tags.map(
(tag) =>
html`<btrix-tag class="mr-2 mt-1">${tag}</btrix-tag>`,
)
: [],
)}
${this.renderSetting(
msg("Collections"),
this.collections.length
? this.collections.map(
(coll) =>
html`<sl-tag class="mr-2 mt-1" variant="neutral">
${coll.name}
<span class="font-monostyle pl-1 text-xs">
(${this.localize.number(coll.crawlCount)}
${pluralOf("items", coll.crawlCount)})
</span>
</sl-tag>`,
)
: undefined,
)}
</btrix-desc-list>
</section>
`}
`;
}

View File

@ -57,6 +57,7 @@ import { infoCol, inputCol } from "@/layouts/columns";
import { pageSectionsWithNav } from "@/layouts/pageSectionsWithNav";
import { panel } from "@/layouts/panel";
import infoTextStrings from "@/strings/crawl-workflows/infoText";
import { labelFor } from "@/strings/crawl-workflows/labels";
import scopeTypeLabels from "@/strings/crawl-workflows/scopeType";
import sectionStrings from "@/strings/crawl-workflows/section";
import { AnalyticsTrackEvent } from "@/trackEvents";
@ -85,6 +86,7 @@ import {
getDefaultFormState,
getInitialFormState,
getServerDefaults,
SECTIONS,
type FormState,
type WorkflowDefaults,
} from "@/utils/workflow";
@ -96,13 +98,7 @@ type NewCrawlConfigParams = WorkflowParams & {
};
};
const STEPS = [
"crawlSetup",
"crawlLimits",
"browserSettings",
"crawlScheduling",
"crawlMetadata",
] as const;
const STEPS = SECTIONS;
type StepName = (typeof STEPS)[number];
type TabState = {
completed: boolean;
@ -123,7 +119,7 @@ const formName = "newJobConfig" as const;
const panelSuffix = "--panel" as const;
const getDefaultProgressState = (hasConfigId = false): ProgressState => {
let activeTab: StepName = "crawlSetup";
let activeTab: StepName = "scope";
if (window.location.hash) {
const hashValue = window.location.hash.slice(1);
@ -136,8 +132,12 @@ const getDefaultProgressState = (hasConfigId = false): ProgressState => {
activeTab,
// TODO Mark as completed only if form section has data
tabs: {
crawlSetup: { error: false, completed: hasConfigId },
crawlLimits: {
scope: { error: false, completed: hasConfigId },
limits: {
error: false,
completed: hasConfigId,
},
behaviors: {
error: false,
completed: hasConfigId,
},
@ -145,11 +145,11 @@ const getDefaultProgressState = (hasConfigId = false): ProgressState => {
error: false,
completed: hasConfigId,
},
crawlScheduling: {
scheduling: {
error: false,
completed: hasConfigId,
},
crawlMetadata: {
metadata: {
error: false,
completed: hasConfigId,
},
@ -242,13 +242,7 @@ export class WorkflowEditor extends BtrixElement {
private readonly validateNameMax = maxLengthValidator(50);
private readonly validateDescriptionMax = maxLengthValidator(350);
private readonly tabLabels: Record<StepName, string> = {
crawlSetup: sectionStrings.scope,
crawlLimits: msg("Limits"),
browserSettings: sectionStrings.browserSettings,
crawlScheduling: sectionStrings.scheduling,
crawlMetadata: msg("Metadata"),
};
private readonly tabLabels = sectionStrings;
private get formHasError() {
return (
@ -1086,28 +1080,8 @@ https://archiveweb.page/images/${"logo.svg"}`}
urlListToArray(this.formState.urlList).length +
(isPageScopeType(this.formState.scopeType) ? 0 : 1),
);
const onInputMinMax = async (e: CustomEvent) => {
const inputEl = e.target as SlInput;
await inputEl.updateComplete;
let helpText = "";
if (!inputEl.checkValidity()) {
const value = +inputEl.value;
const min = inputEl.min;
const max = inputEl.max;
if (min && value < +min) {
helpText = msg(
str`Must be more than minimum of ${this.localize.number(+min)}`,
);
} else if (max && value > +max) {
helpText = msg(
str`Must be less than maximum of ${this.localize.number(+max)}`,
);
}
}
inputEl.helpText = helpText;
};
return html`
${this.renderSectionHeading(sectionStrings.perCrawlLimits)}
${inputCol(html`
<sl-mutation-observer
attr="min"
@ -1137,7 +1111,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
: undefined,
)}
placeholder=${defaultLabel(this.orgDefaults.maxPagesPerCrawl)}
@sl-input=${onInputMinMax}
@sl-input=${this.onInputMinMax}
>
<span slot="suffix">${msg("pages")}</span>
</sl-input>
@ -1172,17 +1146,49 @@ https://archiveweb.page/images/${"logo.svg"}`}
</sl-input>
`)}
${this.renderHelpTextCol(infoTextStrings["maxCrawlSizeGB"])}
${this.renderSectionHeading(sectionStrings.perPageLimits)}
`;
}
private renderBehaviors() {
return html`
${this.renderSectionHeading(msg("Built-in Behaviors"))}
${inputCol(
html`<sl-checkbox
name="autoscrollBehavior"
?checked=${this.formState.autoscrollBehavior}
>
${labelFor.autoscrollBehavior}
</sl-checkbox>`,
)}
${this.renderHelpTextCol(
msg(`Automatically scroll to the end of the page.`),
false,
)}
${inputCol(
html`<sl-checkbox
name="autoclickBehavior"
?checked=${this.formState.autoclickBehavior}
>
${labelFor.autoclickBehavior}
</sl-checkbox>`,
)}
${this.renderHelpTextCol(
msg(
`Automatically click on all link-like elements. Useful for capturing in-page interactions or for clicking links without navigating away from the page.`,
),
false,
)}
${this.renderSectionHeading(msg("Page Timing"))}
${inputCol(html`
<sl-input
name="pageLoadTimeoutSeconds"
type="number"
inputmode="numeric"
label=${msg("Page Load Timeout")}
label=${labelFor.pageLoadTimeoutSeconds}
placeholder=${defaultLabel(this.orgDefaults.pageLoadTimeoutSeconds)}
value=${ifDefined(this.formState.pageLoadTimeoutSeconds ?? undefined)}
min="0"
@sl-input=${onInputMinMax}
@sl-input=${this.onInputMinMax}
>
<span slot="suffix">${msg("seconds")}</span>
</sl-input>
@ -1193,7 +1199,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
name="postLoadDelaySeconds"
type="number"
inputmode="numeric"
label=${msg("Delay After Page Load")}
label=${labelFor.postLoadDelaySeconds}
placeholder=${defaultLabel(0)}
value=${ifDefined(this.formState.postLoadDelaySeconds ?? undefined)}
min="0"
@ -1207,50 +1213,22 @@ https://archiveweb.page/images/${"logo.svg"}`}
name="behaviorTimeoutSeconds"
type="number"
inputmode="numeric"
label=${msg("Behavior Timeout")}
label=${labelFor.behaviorTimeoutSeconds}
placeholder=${defaultLabel(this.orgDefaults.behaviorTimeoutSeconds)}
value=${ifDefined(this.formState.behaviorTimeoutSeconds ?? undefined)}
min="0"
@sl-input=${onInputMinMax}
@sl-input=${this.onInputMinMax}
>
<span slot="suffix">${msg("seconds")}</span>
</sl-input>
`)}
${this.renderHelpTextCol(infoTextStrings["behaviorTimeoutSeconds"])}
${inputCol(
html`<sl-checkbox
name="autoscrollBehavior"
?checked=${this.formState.autoscrollBehavior}
>
${msg("Autoscroll behavior")}
</sl-checkbox>`,
)}
${this.renderHelpTextCol(
msg(
`When enabled the browser will automatically scroll to the end of the page.`,
),
false,
)}
${inputCol(
html`<sl-checkbox
name="autoclickBehavior"
?checked=${this.formState.autoclickBehavior}
>
${msg("Autoclick behavior")}
</sl-checkbox>`,
)}
${this.renderHelpTextCol(
msg(
`When enabled the browser will automatically click on links that don't navigate to other pages.`,
),
false,
)}
${inputCol(html`
<sl-input
name="pageExtraDelaySeconds"
type="number"
inputmode="numeric"
label=${msg("Delay Before Next Page")}
label=${labelFor.pageExtraDelaySeconds}
placeholder=${defaultLabel(0)}
value=${ifDefined(this.formState.pageExtraDelaySeconds ?? undefined)}
min="0"
@ -1262,7 +1240,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
`;
}
private renderCrawlBehaviors() {
private renderBrowserSettings() {
if (!this.formState.lang) throw new Error("missing formstate.lang");
return html`
${inputCol(html`
@ -1601,35 +1579,59 @@ https://archiveweb.page/images/${"logo.svg"}`}
required?: boolean;
}[] = [
{
name: "crawlSetup",
name: "scope",
desc: msg("Specify the range and depth of your crawl."),
render: this.renderScope,
required: true,
},
{
name: "crawlLimits",
desc: msg("Enforce maximum limits on your crawl."),
name: "limits",
desc: msg("Limit the size and duration of the crawl."),
render: this.renderCrawlLimits,
},
{
name: "browserSettings",
desc: msg(
"Configure the browser that's used to visit URLs during the crawl.",
),
render: this.renderCrawlBehaviors,
name: "behaviors",
desc: msg("Customize how the browser loads and interacts with a page."),
render: this.renderBehaviors,
},
{
name: "crawlScheduling",
name: "browserSettings",
desc: msg("Configure the browser used to crawl."),
render: this.renderBrowserSettings,
},
{
name: "scheduling",
desc: msg("Schedule recurring crawls."),
render: this.renderJobScheduling,
},
{
name: "crawlMetadata",
name: "metadata",
desc: msg("Describe and organize crawls from this workflow."),
render: this.renderJobMetadata,
},
];
private readonly onInputMinMax = async (e: CustomEvent) => {
const inputEl = e.target as SlInput;
await inputEl.updateComplete;
let helpText = "";
if (!inputEl.checkValidity()) {
const value = +inputEl.value;
const min = inputEl.min;
const max = inputEl.max;
if (min && value < +min) {
helpText = msg(
str`Must be more than minimum of ${this.localize.number(+min)}`,
);
} else if (max && value > +max) {
helpText = msg(
str`Must be less than maximum of ${this.localize.number(+max)}`,
);
}
}
inputEl.helpText = helpText;
};
private changeScopeType(value: FormState["scopeType"]) {
const prevScopeType = this.formState.scopeType;
const formState: Partial<FormState> = {
@ -1799,7 +1801,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
if (e.detail.valid === false || !table.checkValidity()) {
this.updateProgressState({
tabs: {
crawlSetup: { error: true },
scope: { error: true },
},
});
} else {

View File

@ -101,7 +101,7 @@ export class OrgSettingsCrawlWorkflows extends BtrixElement {
></btrix-queue-exclusion-table>
`,
};
const perCrawlLimits = {
const limits = {
crawlTimeoutMinutes: html`
<sl-input
size="small"
@ -139,7 +139,7 @@ export class OrgSettingsCrawlWorkflows extends BtrixElement {
</sl-input>
`,
};
const perPageLimits = {
const behaviors = {
pageLoadTimeoutSeconds: html`
<sl-input
size="small"
@ -255,8 +255,8 @@ export class OrgSettingsCrawlWorkflows extends BtrixElement {
return {
scope,
perCrawlLimits,
perPageLimits,
limits,
behaviors,
browserSettings,
};
}

View File

@ -0,0 +1,10 @@
import { msg } from "@lit/localize";
export const labelFor = {
autoscrollBehavior: msg("Autoscroll"),
autoclickBehavior: msg("Autoclick"),
pageLoadTimeoutSeconds: msg("Page Load Limit"),
postLoadDelaySeconds: msg("Delay After Page Load"),
behaviorTimeoutSeconds: "Behavior Limit",
pageExtraDelaySeconds: msg("Delay Before Next Page"),
};

View File

@ -4,10 +4,11 @@ import { type SectionsEnum } from "@/utils/workflow";
const section: Record<SectionsEnum, string> = {
scope: msg("Scope"),
perCrawlLimits: msg("Per-Crawl Limits"),
perPageLimits: msg("Per-Page Limits"),
limits: msg("Crawl Limits"),
behaviors: msg("Page Behavior"),
browserSettings: msg("Browser Settings"),
scheduling: msg("Scheduling"),
metadata: msg("Metadata"),
};
export default section;

View File

@ -25,10 +25,11 @@ export const BYTES_PER_GB = 1e9;
export const SECTIONS = [
"scope",
"perCrawlLimits",
"perPageLimits",
"limits",
"behaviors",
"browserSettings",
"scheduling",
"metadata",
] as const;
export const sectionsEnum = z.enum(SECTIONS);
export type SectionsEnum = z.infer<typeof sectionsEnum>;