import type { LitElement, TemplateResult } from "lit"; import { html as staticHtml, unsafeStatic } from "lit/static-html.js"; import type { SlCheckbox, SlInput, SlRadio, SlRadioGroup, SlSelect, SlTextarea, } from "@shoelace-style/shoelace"; import { state, property, query, queryAsync } from "lit/decorators.js"; import { when } from "lit/directives/when.js"; import { msg, localized, str } from "@lit/localize"; import { ifDefined } from "lit/directives/if-defined.js"; import compact from "lodash/fp/compact"; import { mergeDeep } from "immutable"; import flow from "lodash/fp/flow"; import uniq from "lodash/fp/uniq"; import RegexColorize from "regex-colorize"; import ISO6391 from "iso-639-1"; import LiteElement, { html } from "../../utils/LiteElement"; import { regexEscape } from "../../utils/string"; import type { AuthState } from "../../utils/AuthService"; import { getUTCSchedule, humanizeSchedule, humanizeNextDate, getScheduleInterval, getNextDate, } from "../../utils/cron"; import type { Tab } from "../../components/tab-list"; import type { ExclusionRemoveEvent, ExclusionChangeEvent, } from "../../components/queue-exclusion-table"; import type { TimeInputChangeEvent } from "../../components/time-input"; import type { Tags, TagsChangeEvent } from "../../components/tag-input"; import type { CrawlConfigParams, Profile, InitialCrawlConfig, JobType, } from "./types"; type NewCrawlConfigParams = CrawlConfigParams & { runNow: boolean; oldId?: string; }; const STEPS = [ "crawlSetup", "browserSettings", "crawlScheduling", "crawlInformation", "confirmSettings", ] as const; type StepName = typeof STEPS[number]; type TabState = { enabled: boolean; completed: boolean; error: boolean; }; type Tabs = Record; type ProgressState = { currentStep: StepName; activeTab: StepName; tabs: Tabs; }; type FormState = { primarySeedUrl: string; urlList: string; includeLinkedPages: boolean; allowedExternalUrlList: string; crawlTimeoutMinutes: number | null; pageTimeoutMinutes: number | null; scopeType: CrawlConfigParams["config"]["scopeType"]; exclusions: CrawlConfigParams["config"]["exclude"]; pageLimit: CrawlConfigParams["config"]["limit"]; scale: CrawlConfigParams["scale"]; blockAds: CrawlConfigParams["config"]["blockAds"]; lang: CrawlConfigParams["config"]["lang"]; scheduleType: "now" | "date" | "cron" | "none"; scheduleFrequency: "daily" | "weekly" | "monthly"; scheduleDayOfMonth: number; scheduleDayOfWeek: number; scheduleTime: { hour: number; minute: number; period: "AM" | "PM"; }; runNow: boolean; jobName: CrawlConfigParams["name"]; browserProfile: Profile | null; tags: Tags; }; const getDefaultProgressState = (hasConfigId = false): ProgressState => { let activeTab: StepName = "crawlSetup"; if (hasConfigId && window.location.hash) { const hashValue = window.location.hash.slice(1); if (STEPS.includes(hashValue as any)) { activeTab = hashValue as StepName; } } return { activeTab, currentStep: hasConfigId ? "confirmSettings" : "crawlSetup", tabs: { crawlSetup: { enabled: true, error: false, completed: hasConfigId }, browserSettings: { enabled: hasConfigId, error: false, completed: hasConfigId, }, crawlScheduling: { enabled: hasConfigId, error: false, completed: hasConfigId, }, crawlInformation: { enabled: hasConfigId, error: false, completed: hasConfigId, }, confirmSettings: { enabled: hasConfigId, error: false, completed: hasConfigId, }, }, }; }; const getDefaultFormState = (): FormState => ({ primarySeedUrl: "", urlList: "", includeLinkedPages: false, allowedExternalUrlList: "", crawlTimeoutMinutes: null, pageTimeoutMinutes: null, scopeType: "host", exclusions: [], pageLimit: undefined, scale: 1, blockAds: true, lang: undefined, scheduleType: "now", scheduleFrequency: "weekly", scheduleDayOfMonth: new Date().getDate(), scheduleDayOfWeek: new Date().getDay(), scheduleTime: { hour: 12, minute: 0, period: "AM", }, runNow: false, jobName: "", browserProfile: null, tags: [], }); const defaultProgressState = getDefaultProgressState(); const orderedTabNames = STEPS.filter( (stepName) => defaultProgressState.tabs[stepName as StepName] ) as StepName[]; function getLocalizedWeekDays() { const now = new Date(); // TODO accept locale from locale-picker const { format } = new Intl.DateTimeFormat(undefined, { weekday: "short" }); return Array.from({ length: 7 }).map((x, day) => format(Date.now() - (now.getDay() - day) * 86400000) ); } function validURL(url: string) { return /((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/.test( url ); } const trimExclusions = flow(uniq, compact); const urlListToArray = (str: string) => str.trim().replace(/,/g, " ").split(/\s+/g); @localized() export class CrawlConfigEditor extends LiteElement { @property({ type: Object }) authState!: AuthState; @property({ type: String }) archiveId!: string; @property({ type: String }) configId?: string; @property({ type: String }) jobType?: JobType; @property({ type: Object }) initialCrawlConfig?: InitialCrawlConfig; @state() private isSubmitting = false; @state() private progressState!: ProgressState; @state() private formState!: FormState; @state() private serverError?: TemplateResult | string; private get formHasError() { return Object.values(this.progressState.tabs).some(({ error }) => error); } private get utcSchedule() { return getUTCSchedule({ interval: this.formState.scheduleFrequency, dayOfMonth: this.formState.scheduleDayOfMonth, dayOfWeek: this.formState.scheduleDayOfWeek, ...this.formState.scheduleTime, }); } private readonly daysOfWeek = getLocalizedWeekDays(); private readonly scopeTypeLabels: Record = { prefix: msg("Path Begins with This URL"), host: msg("Pages on This Domain"), domain: msg("Pages on This Domain & Subdomains"), "page-spa": msg("Single Page App (In-Page Links Only)"), page: msg("Page"), custom: msg("Custom"), any: msg("Any"), }; private readonly scheduleTypeLabels: Record< FormState["scheduleType"], string > = { now: msg("Run Immediately on Save"), date: msg("Run on a Specific Date & Time"), cron: msg("Run on a Recurring Basis"), none: msg("No Schedule"), }; private readonly scheduleFrequencyLabels: Record< FormState["scheduleFrequency"], string > = { daily: msg("Daily"), weekly: msg("Weekly"), monthly: msg("Monthly"), }; @query('form[name="newJobConfig"]') formElem!: HTMLFormElement; @queryAsync("btrix-tab-panel[aria-hidden=false]") activeTabPanel!: Promise; connectedCallback(): void { this.initializeEditor(); super.connectedCallback(); } willUpdate(changedProperties: Map) { if ( changedProperties.get("initialCrawlConfig") && this.initialCrawlConfig ) { this.initializeEditor(); } if (changedProperties.get("formState") && this.formState) { this.handleFormStateChange(); } } updated(changedProperties: Map) { if (changedProperties.get("progressState") && this.progressState) { this.handleProgressStateChange( changedProperties.get("progressState") as ProgressState ); } } private initializeEditor() { this.progressState = getDefaultProgressState(Boolean(this.configId)); this.formState = { ...getDefaultFormState(), ...this.getInitialFormState(), }; if (!this.formState.lang) { this.formState.lang = this.getInitialLang(); } if (!this.formState.exclusions?.length) { this.formState.exclusions = [""]; // Add empty slot } } private getInitialLang() { // Default to current user browser language const browserLanguage = window.navigator.language; if (browserLanguage) { return browserLanguage.slice(0, browserLanguage.indexOf("-")); } return null; } private getInitialFormState(): Partial { if (!this.initialCrawlConfig) return {}; const formState: Partial = {}; const { seeds, scopeType } = this.initialCrawlConfig.config; if (this.initialCrawlConfig.jobType === "seed-crawl") { formState.primarySeedUrl = typeof seeds[0] === "string" ? seeds[0] : seeds[0].url; } else { // Treat "custom" like URL list formState.urlList = seeds .map((seed) => (typeof seed === "string" ? seed : seed.url)) .join("\n"); if (this.initialCrawlConfig.jobType === "custom") { formState.scopeType = scopeType || "page"; } } if (this.initialCrawlConfig.schedule) { formState.scheduleType = "cron"; formState.scheduleFrequency = getScheduleInterval( this.initialCrawlConfig.schedule ); const nextDate = getNextDate(this.initialCrawlConfig.schedule)!; formState.scheduleDayOfMonth = nextDate.getDate(); formState.scheduleDayOfWeek = nextDate.getDay(); const hours = nextDate.getHours(); formState.scheduleTime = { hour: hours % 12 || 12, minute: nextDate.getMinutes(), period: hours > 11 ? "PM" : "AM", }; } else { if (this.configId) { formState.scheduleType = "none"; } else { formState.scheduleType = "now"; } } if (this.initialCrawlConfig.tags?.length) { formState.tags = this.initialCrawlConfig.tags; } return { jobName: this.initialCrawlConfig.name, browserProfile: this.initialCrawlConfig.profileid ? ({ id: this.initialCrawlConfig.profileid } as Profile) : undefined, scopeType: this.initialCrawlConfig.config .scopeType as FormState["scopeType"], exclusions: this.initialCrawlConfig.config.exclude, includeLinkedPages: Boolean(this.initialCrawlConfig.config.extraHops), ...formState, }; } render() { const tabLabels: Record = { crawlSetup: msg("Crawl Setup"), browserSettings: msg("Browser Settings"), crawlScheduling: msg("Crawl Scheduling"), crawlInformation: msg("Crawl Information"), confirmSettings: msg("Confirm Settings"), }; return html`

${tabLabels[this.progressState.activeTab]}

${msg( html`Fields marked with * are required` )}

${orderedTabNames.map((tabName) => this.renderNavItem(tabName, tabLabels[tabName]) )} ${this.renderPanelContent( html` ${when(this.jobType === "url-list", this.renderUrlListSetup)} ${when( this.jobType === "seed-crawl", this.renderSeededCrawlSetup )} ${when(this.jobType === "custom", () => this.renderUrlListSetup(true) )} `, { isFirst: true } )} ${this.renderPanelContent(this.renderCrawlBehaviors())} ${this.renderPanelContent(this.renderJobScheduling())} ${this.renderPanelContent(this.renderJobInformation())} ${this.renderPanelContent(this.renderConfirmSettings(), { isLast: true, })}
`; } private renderNavItem(tabName: StepName, content: TemplateResult | string) { const isActive = tabName === this.progressState.activeTab; const isConfirmSettings = tabName === "confirmSettings"; const { error: isInvalid, completed } = this.progressState.tabs[tabName]; const iconProps = { name: "circle", library: "default", class: "text-neutral-300", }; if (isConfirmSettings) { iconProps.name = "info-circle"; iconProps.class = "text-base"; } else { if (isInvalid) { iconProps.name = "exclamation-circle"; iconProps.class = "text-danger"; } else if (isActive) { iconProps.name = "pencil-circle-dashed"; iconProps.library = "app"; iconProps.class = "text-base"; } else if (completed) { iconProps.name = "check-circle"; iconProps.class = "text-success"; } } const { enabled } = this.progressState.tabs[tabName]; const isEnabled = isConfirmSettings ? this.progressState.tabs.confirmSettings.enabled || this.progressState.tabs.crawlSetup.completed : enabled; return html` ${content} `; } private renderPanelContent( content: TemplateResult, { isFirst = false, isLast = false } = {} ) { return html`
${content} ${when(this.serverError, () => this.renderErrorAlert(this.serverError!) )}
${this.renderFooter({ isFirst, isLast })}
`; } private renderFooter({ isFirst = false, isLast = false }) { return html`
${isFirst ? html` ${this.configId ? msg("Cancel") : msg("Start Over")} ` : html` ${msg("Previous Step")} `} ${when( this.configId, () => html`
${when( !isLast, () => html` ${msg("Next")} ` )} ${msg("Save Changes")}
`, () => isLast ? html` ${this.formState.runNow ? msg("Save & Run Crawl") : msg("Save & Schedule Crawl")} ` : html`
${msg("Next Step")} { if (this.hasRequiredFields()) { this.updateProgressState({ activeTab: "confirmSettings", currentStep: "confirmSettings", tabs: { crawlSetup: { completed: true } }, }); } else { this.nextStep(); } }} > ${msg("Confirm & Save")}
` )}
`; } private renderSectionHeading(content: TemplateResult | string) { return html`

${content}

`; } private renderFormCol = (content: TemplateResult) => { return html`
${content}
`; }; private renderHelpTextCol(content: TemplateResult, padTop = true) { return html`
${content}
`; } private renderUrlListSetup = (isCustom = false) => { return html` ${this.renderFormCol(html` { const inputEl = e.target as SlInput; await inputEl.updateComplete; if ( inputEl.invalid && !urlListToArray(inputEl.value).some((url) => !validURL(url)) ) { inputEl.setCustomValidity(""); inputEl.helpText = ""; } }} @sl-blur=${async (e: Event) => { const inputEl = e.target as SlInput; await inputEl.updateComplete; if ( inputEl.value && urlListToArray(inputEl.value).some((url) => !validURL(url)) ) { const text = msg("Please fix invalid URL in list."); inputEl.invalid = true; inputEl.helpText = text; inputEl.setCustomValidity(text); } }} > `)} ${this.renderHelpTextCol( html`The crawler will visit and record each URL listed in the order defined here.` )} ${when( isCustom, () => html` ${this.renderFormCol(html` this.updateFormState({ scopeType: (e.target as HTMLSelectElement) .value as FormState["scopeType"], })} > ${this.scopeTypeLabels["prefix"]} ${this.scopeTypeLabels["host"]} ${this.scopeTypeLabels["domain"]} ${msg("Advanced Options")} ${this.scopeTypeLabels["page-spa"]} ${this.scopeTypeLabels["page"]} ${this.scopeTypeLabels["custom"]} ${this.scopeTypeLabels["any"]} `)} ${this.renderHelpTextCol( html`Tells the crawler which pages it can visit.` )} ` )} ${this.renderFormCol(html` ${msg("Include Linked Pages")} `)} ${this.renderHelpTextCol( html`If checked, the crawler will visit pages one link away from a Crawl URL.`, false )} ${when( this.formState.includeLinkedPages || this.jobType === "custom", () => html` ${this.renderSectionHeading(msg("Page Limits"))} ${this.renderFormCol(html` this.updateFormState({ exclusions: [""], })} > ${msg("Add More")} `)} ${this.renderHelpTextCol( html`Specify exclusion rules for what pages should not be visited. Exclusions apply to all URLs.` )} ` )} ${this.renderCrawlScale()} `; }; private renderSeededCrawlSetup = () => { const urlPlaceholder = "https://example.com"; let exampleUrl = new URL(urlPlaceholder); if (this.formState.primarySeedUrl) { try { exampleUrl = new URL(this.formState.primarySeedUrl); } catch {} } const exampleHost = exampleUrl.host; const exampleProtocol = exampleUrl.protocol; const examplePathname = exampleUrl.pathname.replace(/\/$/, ""); const exampleDomain = `${exampleProtocol}//${exampleHost}`; let helpText: TemplateResult | string; switch (this.formState.scopeType) { case "prefix": helpText = msg( html`Will crawl all page URLs that begin with ${exampleDomain}${examplePathname}, e.g. ${exampleDomain}${examplePathname}/path/page.html` ); break; case "host": helpText = msg( html`Will crawl all pages on ${exampleHost} and ignore pages on any subdomains.` ); break; case "domain": helpText = msg( html`Will crawl all pages on ${exampleHost} and subdomain.${exampleHost}.` ); break; case "page-spa": helpText = msg( html`Will only visit ${exampleDomain}${examplePathname} and links that stay within the same URL, e.g. hash anchor links: ${exampleDomain}${examplePathname}#example-page` ); break; default: helpText = ""; break; } return html` ${this.renderFormCol(html` { const inputEl = e.target as SlInput; await inputEl.updateComplete; if (inputEl.invalid && validURL(inputEl.value)) { inputEl.setCustomValidity(""); inputEl.helpText = ""; } }} @sl-blur=${async (e: Event) => { const inputEl = e.target as SlInput; await inputEl.updateComplete; if (inputEl.value && !validURL(inputEl.value)) { const text = msg("Please enter a valid URL."); inputEl.invalid = true; inputEl.helpText = text; inputEl.setCustomValidity(text); } }} > `)} ${this.renderHelpTextCol(html`The starting point of your crawl.`)} ${this.renderFormCol(html` this.updateFormState({ scopeType: (e.target as HTMLSelectElement) .value as FormState["scopeType"], })} >
${helpText}
${this.scopeTypeLabels["prefix"]} ${this.scopeTypeLabels["host"]} ${this.scopeTypeLabels["domain"]} ${msg("Advanced Options")} ${this.scopeTypeLabels["page-spa"]}
`)} ${this.renderHelpTextCol( html`Tells the crawler which pages it can visit.` )} ${this.renderSectionHeading(msg("Additional Pages"))} ${this.renderFormCol(html` `)} ${this.renderHelpTextCol( html`Crawl pages outside of Crawl Scope that begin with these URLs.` )} ${this.renderFormCol(html` ${msg("Include Any Linked Page (“one hop out”)")} `)} ${this.renderHelpTextCol( html`If checked, the crawler will visit pages one link away outside of Crawl Scope.`, false )} ${this.renderSectionHeading(msg("Page Limits"))} ${this.renderFormCol(html` ${msg("pages")} `)} ${this.renderHelpTextCol(html`Adds a hard limit on the number of pages that will be crawled.`)} ${this.renderFormCol(html` this.updateFormState({ exclusions: [""], })} > ${msg("Add More")} `)} ${this.renderHelpTextCol( html`Specify exclusion rules for what pages should not be visited.` )} ${this.renderCrawlScale()} `; }; private renderCrawlScale() { return html` ${this.renderSectionHeading(msg("Crawl Limits"))} ${this.renderFormCol(html` ${msg("minutes")} `)} ${this.renderHelpTextCol( html`Gracefully stop the crawler after a specified time limit.` )} ${this.renderFormCol(html` this.updateFormState({ scale: +(e.target as SlCheckbox).value, })} > 1 2 3 `)} ${this.renderHelpTextCol( html`Increasing parallel crawler instances will speed up crawls, but take up more system resources.` )} `; } private renderCrawlBehaviors() { return html` ${this.renderFormCol(html` this.updateFormState({ browserProfile: e.detail.value, })} > `)} ${this.renderHelpTextCol( html`Choose a custom profile to make use of saved cookies and logged-in accounts.` )} ${this.renderFormCol(html` ${msg("Block Ads by Domain")} `)} ${this.renderHelpTextCol( html`Blocks advertising content from being loaded. Uses Steven Black’s Hosts file.`, false )} ${this.renderFormCol(html` this.updateFormState({ lang: e.detail.item.value, })} @sl-clear=${() => { this.updateFormState({ lang: null, }); }} > ${msg("Language")} `)} ${this.renderHelpTextCol( html`Websites that observe the browser’s language setting may serve content in that language if available.` )} ${this.renderSectionHeading(msg("On-Page Behavior"))} ${this.renderFormCol(html` ${msg("minutes")} `)} ${this.renderHelpTextCol( html`Adds a hard time limit for how long the crawler can spend on a single webpage.` )} `; } private renderJobScheduling() { return html` ${this.renderFormCol(html` this.updateFormState({ scheduleType: (e.target as SlRadio) .value as FormState["scheduleType"], runNow: (e.target as SlRadio).value === "now", })} > ${this.scheduleTypeLabels["now"]} ${this.scheduleTypeLabels["cron"]} ${this.scheduleTypeLabels["none"]} `)} ${this.renderHelpTextCol( html`Should a crawl run immediately when setup is complete, on a set day, or on a recurring schedule?` )} ${when(this.formState.scheduleType === "cron", this.renderScheduleCron)} `; } private renderScheduleCron = () => { const utcSchedule = this.utcSchedule; return html` ${this.renderSectionHeading(msg("Set Schedule"))} ${this.renderFormCol(html` this.updateFormState({ scheduleFrequency: (e.target as HTMLSelectElement) .value as FormState["scheduleFrequency"], })} > ${this.scheduleFrequencyLabels["daily"]} ${this.scheduleFrequencyLabels["weekly"]} ${this.scheduleFrequencyLabels["monthly"]} `)} ${this.renderHelpTextCol( html`Limit the frequency for how often a crawl will run.` )} ${when( this.formState.scheduleFrequency === "weekly", () => html` ${this.renderFormCol(html` this.updateFormState({ scheduleDayOfWeek: +(e.target as SlRadioGroup).value, })} > ${this.daysOfWeek.map( (label, day) => html`${label}` )} `)} ${this.renderHelpTextCol( html`What day of the week should a crawl run on?` )} ` )} ${when( this.formState.scheduleFrequency === "monthly", () => html` ${this.renderFormCol(html` `)} ${this.renderHelpTextCol( html`What day of the month should a crawl run on?` )} ` )} ${this.renderFormCol(html` { this.updateFormState({ scheduleTime: e.detail, }); }} > ${msg("Start Time")}

${msg( html`Schedule: ${utcSchedule ? humanizeSchedule(utcSchedule) : msg("Invalid date")}.` )}

${msg( html`Next scheduled run: ${utcSchedule ? humanizeNextDate(utcSchedule) : msg("Invalid date")}.` )}

`)} ${this.renderHelpTextCol( html`A crawl will run at this time in your current timezone.` )} ${this.renderFormCol(html` ${msg("Also run a crawl immediately on save")} `)} ${this.renderHelpTextCol( html`If checked, a crawl will run at the time specified above and also once when setup is complete.`, false )} `; }; private renderJobInformation() { const jobNameValue = this.formState.jobName || (this.jobType === "seed-crawl" && this.formState.primarySeedUrl) || ""; return html` ${this.renderFormCol(html` `)} ${this.renderHelpTextCol( html`Try to create a unique name to help keep things organized!` )} ${this.renderFormCol( html` this.updateFormState( { tags: e.detail.tags, }, true )} > ` )} ${this.renderHelpTextCol( html`Create or assign this crawl (and its outputs) to one or more tags to help organize your archived data.` )} `; } private renderErrorAlert(errorMessage: string | TemplateResult) { return html`
${errorMessage}
`; } private renderConfirmSettings = () => { const crawlConfig = this.parseConfig(); return html`
${when(this.formHasError, () => this.renderErrorAlert( msg( "There are issues with this crawl configuration. Please go through previous steps and fix all issues to continue." ) ) )} `; }; private hasRequiredFields(): Boolean { if (this.jobType === "seed-crawl") { return Boolean(this.formState.jobName && this.formState.primarySeedUrl); } return Boolean(this.formState.jobName && this.formState.urlList); } private handleFormStateChange() { if (!this.formState.jobName) { this.setDefaultJobName(); } const hasRequiredFields = this.hasRequiredFields(); if (hasRequiredFields && !this.progressState.tabs.crawlSetup.error) { this.updateProgressState({ tabs: { crawlSetup: { completed: true }, }, }); } } private async handleProgressStateChange(oldState: ProgressState) { const { activeTab } = this.progressState; if (oldState.activeTab !== activeTab) { const activeTabPanel = (await this.activeTabPanel) as HTMLElement; if (activeTabPanel && activeTabPanel.getBoundingClientRect().top < 0) { activeTabPanel.scrollIntoView({ behavior: "smooth", }); } } } private setDefaultJobName() { // Set default crawl name based on seed URLs if (!this.formState.primarySeedUrl && !this.formState.urlList) { return; } let jobName = ""; if (this.jobType === "seed-crawl") { jobName = this.formState.primarySeedUrl; } else { const urlList = urlListToArray(this.formState.urlList); const firstUrl = urlList[0].trim(); if (urlList.length > 1) { const remainder = urlList.length - 1; if (remainder === 1) { jobName = msg(str`${firstUrl} + ${remainder} more URL`); } else { jobName = msg(str`${firstUrl} + ${remainder} more URLs`); } } else { jobName = firstUrl; } } this.updateFormState({ jobName }); } private async handleRemoveRegex(e: ExclusionRemoveEvent) { const { index } = e.detail; if (!this.formState.exclusions) { this.updateFormState( { exclusions: this.formState.exclusions, }, true ); } else { const { exclusions: exclude } = this.formState; this.updateFormState( { exclusions: [...exclude.slice(0, index), ...exclude.slice(index + 1)], }, true ); } // Check if we removed an erroring input const table = e.target as LitElement; await this.updateComplete; await table.updateComplete; this.syncTabErrorState(table); } private handleChangeRegex(e: ExclusionChangeEvent) { const { regex, index } = e.detail; const nextExclusions = [...this.formState.exclusions!]; nextExclusions[index] = regex; this.updateFormState( { exclusions: nextExclusions, }, true ); } private validateOnBlur = async (e: Event) => { const el = e.target as SlInput | SlTextarea | SlSelect | SlCheckbox; const tagName = el.tagName.toLowerCase(); if ( !["sl-input", "sl-textarea", "sl-select", "sl-checkbox"].includes(tagName) ) { return; } await el.updateComplete; await this.updateComplete; const currentTab = this.progressState.activeTab as StepName; const tabs = { ...this.progressState.tabs }; // Check [data-user-invalid] instead of .invalid property // to validate only touched inputs if ("userInvalid" in el.dataset) { tabs[currentTab].error = true; this.updateProgressState({ tabs }); } else if (this.progressState.tabs[currentTab].error) { this.syncTabErrorState(el); } }; private syncTabErrorState(el: HTMLElement) { const currentTab = this.progressState.activeTab as StepName; const tabs = { ...this.progressState.tabs }; const panelEl = el.closest("btrix-tab-panel")!; const hasInvalid = panelEl.querySelector("[data-user-invalid]"); if (!hasInvalid) { tabs[currentTab].error = false; this.updateProgressState({ tabs }); } } private updateFormStateOnChange(e: Event) { const elem = e.target as SlTextarea | SlInput | SlCheckbox; const name = elem.name; const tagName = elem.tagName.toLowerCase(); let value: any; switch (tagName) { case "sl-checkbox": value = (elem as SlCheckbox).checked; break; case "sl-textarea": value = elem.value; break; case "sl-input": { if ((elem as SlInput).type === "number") { value = +elem.value; } else { value = elem.value; } break; } default: return; } if (name in this.formState) { this.updateFormState({ [name]: value, }); } } private tabClickHandler = (step: StepName) => (e: MouseEvent) => { const tab = e.currentTarget as Tab; if (tab.disabled || tab.active) { e.preventDefault(); e.stopPropagation(); return; } window.location.hash = step; this.updateProgressState({ activeTab: step }); }; private backStep() { const targetTabIdx = STEPS.indexOf(this.progressState.activeTab!); if (targetTabIdx) { this.updateProgressState({ activeTab: STEPS[targetTabIdx - 1] as StepName, }); } } private nextStep() { const isValid = this.checkCurrentPanelValidity(); if (isValid) { const { activeTab, tabs, currentStep } = this.progressState; const nextTab = STEPS[STEPS.indexOf(activeTab!) + 1] as StepName; const isFirstTimeEnabled = !tabs[nextTab].enabled; const nextTabs = { ...tabs }; let nextCurrentStep = currentStep; if (isFirstTimeEnabled) { nextTabs[nextTab].enabled = true; nextCurrentStep = nextTab; } nextTabs[activeTab!].completed = true; this.updateProgressState({ activeTab: nextTab, currentStep: nextCurrentStep, tabs: nextTabs, }); } } private checkCurrentPanelValidity = (): boolean => { if (!this.formElem) return false; const currentTab = this.progressState.activeTab as StepName; const activePanel = this.formElem.querySelector( `btrix-tab-panel[name="newJobConfig-${currentTab}"]` ); const invalidElems = [...activePanel!.querySelectorAll("[data-invalid]")]; const hasInvalid = Boolean(invalidElems.length); if (hasInvalid) { invalidElems.forEach((el) => { (el as HTMLInputElement).reportValidity(); }); } return !hasInvalid; }; private onKeyDown(event: KeyboardEvent) { const el = event.target as HTMLElement; const tagName = el.tagName.toLowerCase(); if (tagName !== "sl-input") return; const { key } = event; if ((el as SlInput).type === "number") { // Prevent typing non-numeric keys if (key.length === 1 && /\D/.test(key)) { event.preventDefault(); return; } } if ( key === "Enter" && this.progressState.activeTab !== STEPS[STEPS.length - 1] ) { // Prevent submission by "Enter" keypress if not on last tab event.preventDefault(); } } private async onSubmit(event: SubmitEvent) { event.preventDefault(); const isValid = this.checkCurrentPanelValidity(); await this.updateComplete; if (!isValid || this.formHasError) { return; } const config = this.parseConfig(); this.isSubmitting = true; try { const data = await this.apiFetch( `/archives/${this.archiveId}/crawlconfigs/`, this.authState!, { method: "POST", body: JSON.stringify(config), } ); const crawlId = data.run_now_job; let message = msg("Crawl config created."); if (crawlId) { message = msg("Crawl started with new template."); } else if (this.configId) { message = msg("Crawl config updated."); } this.notify({ message, variant: "success", icon: "check2-circle", duration: 8000, }); if (crawlId) { this.navTo(`/archives/${this.archiveId}/crawls/crawl/${crawlId}`); } else { this.navTo( `/archives/${this.archiveId}/crawl-templates/config/${data.added}` ); } } catch (e: any) { if (e?.isApiError) { const isConfigError = ({ loc }: any) => loc.some((v: string) => v === "config"); if (e.details && e.details.some(isConfigError)) { this.serverError = this.formatConfigServerError(e.details); } else { this.serverError = e.message; } } else { this.serverError = msg("Something unexpected went wrong"); } } this.isSubmitting = false; } private async onReset() { this.initializeEditor(); } /** * Format `config` related API error returned from server */ private formatConfigServerError(details: any): TemplateResult { const detailsWithoutDictError = details.filter( ({ type }: any) => type !== "type_error.dict" ); const renderDetail = ({ loc, msg: detailMsg }: any) => html`
  • ${loc.some((v: string) => v === "seeds") && typeof loc[loc.length - 1] === "number" ? msg(str`Seed URL ${loc[loc.length - 1] + 1}: `) : `${loc[loc.length - 1]}: `} ${detailMsg}
  • `; return html` ${msg( "Couldn't save crawl config. Please fix the following crawl configuration issues:" )}
      ${detailsWithoutDictError.map(renderDetail)}
    `; } private parseConfig(): NewCrawlConfigParams { const config: NewCrawlConfigParams = { jobType: this.jobType || "custom", name: this.formState.jobName || this.formState.primarySeedUrl, scale: this.formState.scale, profileid: this.formState.browserProfile?.id || null, runNow: this.formState.runNow || this.formState.scheduleType === "now", schedule: this.formState.scheduleType === "cron" ? this.utcSchedule : "", crawlTimeout: this.formState.crawlTimeoutMinutes ? this.formState.crawlTimeoutMinutes * 60 : 0, tags: this.formState.tags, config: { ...(this.jobType === "seed-crawl" ? this.parseSeededConfig() : this.parseUrlListConfig()), behaviorTimeout: this.formState.pageTimeoutMinutes ? this.formState.pageTimeoutMinutes * 60 : 0, limit: this.formState.pageLimit ? +this.formState.pageLimit : null, extraHops: this.formState.includeLinkedPages ? 1 : 0, lang: this.formState.lang || null, blockAds: this.formState.blockAds, exclude: trimExclusions(this.formState.exclusions), }, }; if (this.configId) { config.oldId = this.configId; } return config; } private parseUrlListConfig(): NewCrawlConfigParams["config"] { const config = { seeds: urlListToArray(this.formState.urlList), scopeType: "page" as FormState["scopeType"], }; return config; } private parseSeededConfig(): NewCrawlConfigParams["config"] { const primarySeedUrl = this.formState.primarySeedUrl.replace(/\/$/, ""); const externalUrlList = this.formState.allowedExternalUrlList ? urlListToArray(this.formState.allowedExternalUrlList).map((str) => str.replace(/\/$/, "") ) : []; let scopeType = this.formState.scopeType; const include = []; if (externalUrlList.length) { const { host, origin } = new URL(primarySeedUrl); scopeType = "custom"; // Replicate scope type with regex switch (this.formState.scopeType) { case "prefix": include.push(`${regexEscape(primarySeedUrl)}\/.*`); break; case "host": include.push(`${regexEscape(origin)}\/.*`); break; case "domain": include.push( `${regexEscape(origin)}\/.*`, `.*\.${regexEscape(host)}\/.*` ); break; default: break; } externalUrlList.forEach((url) => { include.push(`${regexEscape(url)}\/.*`); }); } const config = { seeds: [primarySeedUrl], scopeType, include, }; return config; } private updateProgressState( nextState: { activeTab?: ProgressState["activeTab"]; currentStep?: ProgressState["currentStep"]; tabs?: { [K in StepName]?: Partial; }; }, shallowMerge = false ) { if (shallowMerge) { this.progressState = { ...this.progressState, ...(nextState as Partial), }; } else { this.progressState = mergeDeep(this.progressState, nextState); } } private updateFormState(nextState: Partial, shallowMerge = false) { if (shallowMerge) { this.formState = { ...this.formState, ...nextState, }; } else { this.formState = mergeDeep(this.formState, nextState); } } } customElements.define("btrix-crawl-config-editor", CrawlConfigEditor);