feat: Add custom behaviors to org crawling defaults (#2546)
Resolves https://github.com/webrecorder/browsertrix/issues/2513 ## Changes - Allows org admins to set custom behaviors as crawling defaults - Shows warning text if both autoscroll/autoclick and custom behaviors are enabled - Refactors `infoTextStrings` -> `infoTextFor` to match other label/string matchers --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
parent
0a0d2d04d3
commit
7c6bae8d61
@ -67,7 +67,7 @@ import type {
|
||||
import { infoCol, inputCol } from "@/layouts/columns";
|
||||
import { pageSectionsWithNav } from "@/layouts/pageSectionsWithNav";
|
||||
import { panel } from "@/layouts/panel";
|
||||
import infoTextStrings from "@/strings/crawl-workflows/infoText";
|
||||
import { infoTextFor } from "@/strings/crawl-workflows/infoText";
|
||||
import { labelFor } from "@/strings/crawl-workflows/labels";
|
||||
import scopeTypeLabels from "@/strings/crawl-workflows/scopeType";
|
||||
import sectionStrings from "@/strings/crawl-workflows/section";
|
||||
@ -758,10 +758,7 @@ export class WorkflowEditor extends BtrixElement {
|
||||
@btrix-change=${this.handleChangeRegex}
|
||||
></btrix-queue-exclusion-table>
|
||||
`)}
|
||||
${this.renderHelpTextCol(
|
||||
infoTextStrings["exclusions"],
|
||||
false,
|
||||
)}
|
||||
${this.renderHelpTextCol(infoTextFor["exclusions"], false)}
|
||||
</div>
|
||||
</btrix-details>
|
||||
</div>
|
||||
@ -1176,7 +1173,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
)}
|
||||
${this.renderHelpTextCol(
|
||||
html`
|
||||
${infoTextStrings["selectLinks"]}
|
||||
${infoTextFor["selectLinks"]}
|
||||
<br /><br />
|
||||
${msg(
|
||||
html`If none are specified, the crawler will default to
|
||||
@ -1235,7 +1232,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
</sl-input>
|
||||
</sl-mutation-observer>
|
||||
`)}
|
||||
${this.renderHelpTextCol(infoTextStrings["pageLimit"])}
|
||||
${this.renderHelpTextCol(infoTextFor["pageLimit"])}
|
||||
${inputCol(html`
|
||||
<sl-input
|
||||
name="crawlTimeoutMinutes"
|
||||
@ -1249,7 +1246,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
<span slot="suffix">${msg("minutes")}</span>
|
||||
</sl-input>
|
||||
`)}
|
||||
${this.renderHelpTextCol(infoTextStrings["crawlTimeoutMinutes"])}
|
||||
${this.renderHelpTextCol(infoTextFor["crawlTimeoutMinutes"])}
|
||||
${inputCol(html`
|
||||
<sl-input
|
||||
name="maxCrawlSizeGB"
|
||||
@ -1263,19 +1260,34 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
<span slot="suffix">${msg("GB")}</span>
|
||||
</sl-input>
|
||||
`)}
|
||||
${this.renderHelpTextCol(infoTextStrings["maxCrawlSizeGB"])}
|
||||
${this.renderHelpTextCol(infoTextFor["maxCrawlSizeGB"])}
|
||||
`;
|
||||
}
|
||||
|
||||
private renderPageBehavior() {
|
||||
const behaviorOverrideWarning = html`
|
||||
<span slot="help-text" class="text-warning-600">
|
||||
<sl-icon
|
||||
name="exclamation-triangle"
|
||||
class="align-[-.175em] text-sm"
|
||||
></sl-icon>
|
||||
${msg("May be overridden by custom behaviors.")}
|
||||
</span>
|
||||
`;
|
||||
|
||||
return html`
|
||||
${this.renderSectionHeading(labelFor.behaviors)}
|
||||
${inputCol(
|
||||
html`<sl-checkbox
|
||||
name="autoscrollBehavior"
|
||||
class="part-[form-control-help-text]:mt-1.5"
|
||||
?checked=${this.formState.autoscrollBehavior}
|
||||
>
|
||||
${labelFor.autoscrollBehavior}
|
||||
${when(
|
||||
this.formState.autoscrollBehavior && this.formState.customBehavior,
|
||||
() => behaviorOverrideWarning,
|
||||
)}
|
||||
</sl-checkbox>`,
|
||||
)}
|
||||
${this.renderHelpTextCol(
|
||||
@ -1285,9 +1297,14 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
${inputCol(
|
||||
html`<sl-checkbox
|
||||
name="autoclickBehavior"
|
||||
class="part-[form-control-help-text]:mt-1.5"
|
||||
?checked=${this.formState.autoclickBehavior}
|
||||
>
|
||||
${labelFor.autoclickBehavior}
|
||||
${when(
|
||||
this.formState.autoclickBehavior && this.formState.customBehavior,
|
||||
() => behaviorOverrideWarning,
|
||||
)}
|
||||
</sl-checkbox>
|
||||
|
||||
${when(
|
||||
@ -1360,7 +1377,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
<span slot="suffix">${msg("seconds")}</span>
|
||||
</sl-input>
|
||||
`)}
|
||||
${this.renderHelpTextCol(infoTextStrings["pageLoadTimeoutSeconds"])}
|
||||
${this.renderHelpTextCol(infoTextFor["pageLoadTimeoutSeconds"])}
|
||||
${inputCol(html`
|
||||
<sl-input
|
||||
name="postLoadDelaySeconds"
|
||||
@ -1374,7 +1391,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
<span slot="suffix">${msg("seconds")}</span>
|
||||
</sl-input>
|
||||
`)}
|
||||
${this.renderHelpTextCol(infoTextStrings["postLoadDelaySeconds"])}
|
||||
${this.renderHelpTextCol(infoTextFor["postLoadDelaySeconds"])}
|
||||
${inputCol(html`
|
||||
<sl-input
|
||||
name="behaviorTimeoutSeconds"
|
||||
@ -1389,7 +1406,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
<span slot="suffix">${msg("seconds")}</span>
|
||||
</sl-input>
|
||||
`)}
|
||||
${this.renderHelpTextCol(infoTextStrings["behaviorTimeoutSeconds"])}
|
||||
${this.renderHelpTextCol(infoTextFor["behaviorTimeoutSeconds"])}
|
||||
${inputCol(html`
|
||||
<sl-input
|
||||
name="pageExtraDelaySeconds"
|
||||
@ -1403,7 +1420,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
<span slot="suffix">${msg("seconds")}</span>
|
||||
</sl-input>
|
||||
`)}
|
||||
${this.renderHelpTextCol(infoTextStrings["pageExtraDelaySeconds"])}
|
||||
${this.renderHelpTextCol(infoTextFor["pageExtraDelaySeconds"])}
|
||||
`;
|
||||
}
|
||||
|
||||
@ -1433,12 +1450,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
`,
|
||||
)} `,
|
||||
)}
|
||||
${this.renderHelpTextCol(
|
||||
msg(
|
||||
`Enable custom page actions with behavior scripts. You can specify any publicly accessible URL or public Git repository.`,
|
||||
),
|
||||
false,
|
||||
)}
|
||||
${this.renderHelpTextCol(infoTextFor.customBehavior, false)}
|
||||
`;
|
||||
}
|
||||
|
||||
@ -1454,7 +1466,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
})}
|
||||
></btrix-select-browser-profile>
|
||||
`)}
|
||||
${this.renderHelpTextCol(infoTextStrings["browserProfile"])}
|
||||
${this.renderHelpTextCol(infoTextFor["browserProfile"])}
|
||||
${this.proxies?.servers.length
|
||||
? [
|
||||
inputCol(html`
|
||||
@ -1470,7 +1482,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
})}
|
||||
></btrix-select-crawler-proxy>
|
||||
`),
|
||||
this.renderHelpTextCol(infoTextStrings["proxyId"]),
|
||||
this.renderHelpTextCol(infoTextFor["proxyId"]),
|
||||
]
|
||||
: nothing}
|
||||
${inputCol(html`
|
||||
@ -1517,14 +1529,14 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
></btrix-select-crawler>
|
||||
`)}
|
||||
${this.showCrawlerChannels
|
||||
? this.renderHelpTextCol(infoTextStrings["crawlerChannel"])
|
||||
? this.renderHelpTextCol(infoTextFor["crawlerChannel"])
|
||||
: html``}
|
||||
${inputCol(html`
|
||||
<sl-checkbox name="blockAds" ?checked=${this.formState.blockAds}>
|
||||
${msg("Block ads by domain")}
|
||||
</sl-checkbox>
|
||||
`)}
|
||||
${this.renderHelpTextCol(infoTextStrings["blockAds"], false)}
|
||||
${this.renderHelpTextCol(infoTextFor["blockAds"], false)}
|
||||
${inputCol(html`
|
||||
<sl-input
|
||||
name="userAgent"
|
||||
@ -1535,7 +1547,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
>
|
||||
</sl-input>
|
||||
`)}
|
||||
${this.renderHelpTextCol(infoTextStrings["userAgent"])}
|
||||
${this.renderHelpTextCol(infoTextFor["userAgent"])}
|
||||
${inputCol(html`
|
||||
<btrix-language-select
|
||||
.value=${this.formState.lang as LanguageCode}
|
||||
@ -1548,7 +1560,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
<span slot="label">${msg("Language")}</span>
|
||||
</btrix-language-select>
|
||||
`)}
|
||||
${this.renderHelpTextCol(infoTextStrings["lang"])}
|
||||
${this.renderHelpTextCol(infoTextFor["lang"])}
|
||||
`;
|
||||
}
|
||||
|
||||
@ -2191,10 +2203,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
|
||||
// TODO Move away from manual validation check
|
||||
// See https://github.com/webrecorder/browsertrix/issues/2536
|
||||
if (
|
||||
this.formState.autoclickBehavior &&
|
||||
this.clickSelector
|
||||
) {
|
||||
if (this.formState.autoclickBehavior && this.clickSelector) {
|
||||
if (!this.clickSelector.checkValidity()) {
|
||||
this.clickSelector.reportValidity();
|
||||
return;
|
||||
@ -2434,7 +2443,9 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
selectLinks: this.linkSelectorTable?.value.length
|
||||
? this.linkSelectorTable.value
|
||||
: DEFAULT_SELECT_LINKS,
|
||||
customBehaviors: this.customBehaviorsTable?.value || [],
|
||||
customBehaviors:
|
||||
(this.formState.customBehavior && this.customBehaviorsTable?.value) ||
|
||||
[],
|
||||
clickSelector:
|
||||
this.formState.clickSelector || DEFAULT_AUTOCLICK_SELECTOR,
|
||||
},
|
||||
|
@ -12,11 +12,14 @@ import { BtrixElement } from "@/classes/BtrixElement";
|
||||
import type { LanguageSelect } from "@/components/ui/language-select";
|
||||
import type { SelectCrawlerProxy } from "@/components/ui/select-crawler-proxy";
|
||||
import { proxiesContext, type ProxiesContext } from "@/context/org";
|
||||
import type { CustomBehaviorsTable } from "@/features/crawl-workflows/custom-behaviors-table";
|
||||
import type { QueueExclusionTable } from "@/features/crawl-workflows/queue-exclusion-table";
|
||||
import { columns, type Cols } from "@/layouts/columns";
|
||||
import infoTextStrings from "@/strings/crawl-workflows/infoText";
|
||||
import { infoTextFor } from "@/strings/crawl-workflows/infoText";
|
||||
import { labelFor } from "@/strings/crawl-workflows/labels";
|
||||
import sectionStrings from "@/strings/crawl-workflows/section";
|
||||
import { crawlingDefaultsSchema, type CrawlingDefaults } from "@/types/org";
|
||||
import { formValidator } from "@/utils/form";
|
||||
import {
|
||||
appDefaults,
|
||||
BYTES_PER_GB,
|
||||
@ -32,14 +35,10 @@ type Field = Record<FieldName, TemplateResult<1> | undefined>;
|
||||
|
||||
const PLACEHOLDER_EXCLUSIONS = [""]; // Add empty slot
|
||||
|
||||
function section(section: SectionsEnum | "exclusions", cols: Cols) {
|
||||
function section(section: SectionsEnum, cols: Cols) {
|
||||
return html`
|
||||
<section class="p-5">
|
||||
<btrix-section-heading
|
||||
>${section === "exclusions"
|
||||
? msg("Exclusions")
|
||||
: sectionStrings[section]}</btrix-section-heading
|
||||
>
|
||||
<btrix-section-heading>${sectionStrings[section]}</btrix-section-heading>
|
||||
${columns(cols)}
|
||||
</section>
|
||||
`;
|
||||
@ -63,6 +62,9 @@ export class OrgSettingsCrawlWorkflows extends BtrixElement {
|
||||
@query("btrix-queue-exclusion-table")
|
||||
exclusionTable?: QueueExclusionTable | null;
|
||||
|
||||
@query("btrix-custom-behaviors-table")
|
||||
customBehaviorsTable?: CustomBehaviorsTable | null;
|
||||
|
||||
@query("btrix-language-select")
|
||||
languageSelect?: LanguageSelect | null;
|
||||
|
||||
@ -72,6 +74,8 @@ export class OrgSettingsCrawlWorkflows extends BtrixElement {
|
||||
@query('sl-button[type="submit"]')
|
||||
submitButton?: SlButton | null;
|
||||
|
||||
private readonly checkFormValidity = formValidator(this);
|
||||
|
||||
connectedCallback() {
|
||||
super.connectedCallback();
|
||||
|
||||
@ -140,6 +144,13 @@ export class OrgSettingsCrawlWorkflows extends BtrixElement {
|
||||
`,
|
||||
};
|
||||
const behaviors = {
|
||||
customBehavior: html`
|
||||
<label class="form-label text-xs">${labelFor.customBehaviors}</label>
|
||||
<btrix-custom-behaviors-table
|
||||
.customBehaviors=${orgDefaults.customBehaviors || []}
|
||||
editable
|
||||
></btrix-custom-behaviors-table>
|
||||
`,
|
||||
pageLoadTimeoutSeconds: html`
|
||||
<sl-input
|
||||
size="small"
|
||||
@ -258,7 +269,7 @@ export class OrgSettingsCrawlWorkflows extends BtrixElement {
|
||||
limits,
|
||||
behaviors,
|
||||
browserSettings,
|
||||
};
|
||||
} as const;
|
||||
}
|
||||
|
||||
private renderWorkflowDefaults() {
|
||||
@ -270,10 +281,11 @@ export class OrgSettingsCrawlWorkflows extends BtrixElement {
|
||||
section(
|
||||
sectionName as SectionsEnum,
|
||||
Object.entries(fields)
|
||||
.filter(([, field]) => field as unknown)
|
||||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
||||
.filter(([, field]) => field)
|
||||
.map(([fieldName, field]) => [
|
||||
field,
|
||||
infoTextStrings[fieldName as FieldName],
|
||||
infoTextFor[fieldName as keyof typeof infoTextFor],
|
||||
]),
|
||||
),
|
||||
),
|
||||
@ -292,6 +304,31 @@ export class OrgSettingsCrawlWorkflows extends BtrixElement {
|
||||
e.preventDefault();
|
||||
|
||||
const form = e.target as HTMLFormElement;
|
||||
|
||||
// Wait for custom behaviors validation to finish
|
||||
// TODO Move away from manual validation check
|
||||
// See https://github.com/webrecorder/browsertrix/issues/2536
|
||||
if (this.customBehaviorsTable) {
|
||||
if (!this.customBehaviorsTable.checkValidity()) {
|
||||
this.customBehaviorsTable.reportValidity();
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await this.customBehaviorsTable.taskComplete;
|
||||
} catch {
|
||||
this.customBehaviorsTable.reportValidity();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const isValid = await this.checkFormValidity(form);
|
||||
|
||||
if (!isValid) {
|
||||
form.reportValidity();
|
||||
return;
|
||||
}
|
||||
|
||||
const values = serialize(form) as Record<string, string>;
|
||||
const parseNumber = (value: string) => (value ? Number(value) : undefined);
|
||||
const parsedValues: CrawlingDefaults = {
|
||||
@ -312,6 +349,7 @@ export class OrgSettingsCrawlWorkflows extends BtrixElement {
|
||||
userAgent: values.userAgent,
|
||||
lang: this.languageSelect?.value || undefined,
|
||||
exclude: this.exclusionTable?.exclusions?.filter((v) => v) || [],
|
||||
customBehaviors: this.customBehaviorsTable?.value || [],
|
||||
};
|
||||
|
||||
// Set null or empty strings to undefined
|
||||
|
@ -160,6 +160,7 @@ export class WorkflowsNew extends LiteElement {
|
||||
userAgent: org.crawlingDefaults?.userAgent,
|
||||
blockAds: org.crawlingDefaults?.blockAds,
|
||||
lang: org.crawlingDefaults?.lang,
|
||||
customBehaviors: org.crawlingDefaults?.customBehaviors,
|
||||
},
|
||||
crawlTimeout: org.crawlingDefaults?.crawlTimeout,
|
||||
maxCrawlSize: org.crawlingDefaults?.maxCrawlSize,
|
||||
|
@ -5,7 +5,7 @@ import { type FormState } from "@/utils/workflow";
|
||||
|
||||
type Field = keyof FormState;
|
||||
|
||||
const infoText: Partial<Record<Field, string | TemplateResult>> = {
|
||||
export const infoTextFor = {
|
||||
exclusions: msg(
|
||||
"Specify exclusion rules for what pages should not be visited.",
|
||||
),
|
||||
@ -72,6 +72,9 @@ const infoText: Partial<Record<Field, string | TemplateResult>> = {
|
||||
>
|
||||
to find URLs that are defined in custom HTML attributes.`,
|
||||
),
|
||||
};
|
||||
customBehavior: msg(
|
||||
`Enable custom page actions with behavior scripts. You can specify any publicly accessible URL or public Git repository.`,
|
||||
),
|
||||
} as const satisfies Partial<Record<Field, string | TemplateResult>>;
|
||||
|
||||
export default infoText;
|
||||
export default infoTextFor;
|
||||
|
@ -47,6 +47,7 @@ export const crawlingDefaultsSchema = z.object({
|
||||
lang: z.string().optional(),
|
||||
userAgent: z.string().optional(),
|
||||
exclude: z.array(z.string()),
|
||||
customBehaviors: z.array(z.string()),
|
||||
});
|
||||
export type CrawlingDefaults = z.infer<typeof crawlingDefaultsSchema>;
|
||||
|
||||
|
@ -244,6 +244,10 @@ export function getInitialFormState(params: {
|
||||
return fallback;
|
||||
};
|
||||
|
||||
const enableCustomBehaviors = Boolean(
|
||||
params.initialWorkflow.config.customBehaviors.length,
|
||||
);
|
||||
|
||||
return {
|
||||
...defaultFormState,
|
||||
primarySeedUrl: defaultFormState.primarySeedUrl,
|
||||
@ -294,13 +298,15 @@ export function getInitialFormState(params: {
|
||||
params.initialWorkflow.config.limit ?? defaultFormState.pageLimit,
|
||||
autoscrollBehavior: params.initialWorkflow.config.behaviors
|
||||
? params.initialWorkflow.config.behaviors.includes(Behavior.AutoScroll)
|
||||
: defaultFormState.autoscrollBehavior,
|
||||
: enableCustomBehaviors
|
||||
? false
|
||||
: defaultFormState.autoscrollBehavior,
|
||||
autoclickBehavior: params.initialWorkflow.config.behaviors
|
||||
? params.initialWorkflow.config.behaviors.includes(Behavior.AutoClick)
|
||||
: defaultFormState.autoclickBehavior,
|
||||
customBehavior: Boolean(
|
||||
params.initialWorkflow.config.customBehaviors.length,
|
||||
),
|
||||
: enableCustomBehaviors
|
||||
? false
|
||||
: defaultFormState.autoclickBehavior,
|
||||
customBehavior: enableCustomBehaviors,
|
||||
selectLinks: params.initialWorkflow.config.selectLinks,
|
||||
clickSelector: params.initialWorkflow.config.clickSelector,
|
||||
userAgent:
|
||||
|
Loading…
Reference in New Issue
Block a user