import { state, property } from "lit/decorators.js"; import { ifDefined } from "lit/directives/if-defined.js"; import { msg, localized, str } from "@lit/localize"; import { parse as yamlToJson, stringify as jsonToYaml } from "yaml"; import compact from "lodash/fp/compact"; import merge from "lodash/fp/merge"; import flow from "lodash/fp/flow"; import uniq from "lodash/fp/uniq"; import type { ExclusionRemoveEvent, ExclusionChangeEvent, } from "../../components/queue-exclusion-table"; import type { AuthState } from "../../utils/AuthService"; import LiteElement, { html } from "../../utils/LiteElement"; import { ScheduleInterval, humanizeNextDate } from "../../utils/cron"; import type { CrawlConfig, Profile } from "./types"; import { getUTCSchedule } from "../../utils/cron"; import { TemplateResult } from "lit"; type NewCrawlTemplate = { id?: string; name: string; schedule: string; runNow: boolean; crawlTimeout?: number; scale: number; config: CrawlConfig; profileid: string | null; }; export type InitialCrawlTemplate = Pick< NewCrawlTemplate, "name" | "config" | "profileid" >; const defaultValue = { name: "", profileid: null, config: { seeds: [], scopeType: "prefix", // Show default empty editable rows exclude: Array.from({ length: 3 }).map(() => ""), }, } as InitialCrawlTemplate; const hours = Array.from({ length: 12 }).map((x, i) => ({ value: i + 1, label: `${i + 1}`, })); const minutes = Array.from({ length: 60 }).map((x, i) => ({ value: i, label: `${i}`.padStart(2, "0"), })); const trimExclusions = flow(uniq, compact); /** * Usage: * ```ts * * ``` */ @localized() export class CrawlTemplatesNew extends LiteElement { @property({ type: Object }) authState!: AuthState; @property({ type: String }) archiveId!: string; // Use custom property accessor to prevent // overriding default crawl template values @property({ type: Object }) get initialCrawlTemplate() { return this._initialCrawlTemplate; } private _initialCrawlTemplate: InitialCrawlTemplate = defaultValue; set initialCrawlTemplate(val: any) { this._initialCrawlTemplate = merge(this._initialCrawlTemplate, val); } @state() private isRunNow: boolean = true; @state() private scheduleInterval: ScheduleInterval | "" = ""; /** Schedule local time */ @state() private scheduleTime: { hour: number; minute: number; period: "AM" | "PM" } = { hour: new Date().getHours() % 12 || 12, minute: 0, period: new Date().getHours() > 11 ? "PM" : "AM", }; @state() private isConfigCodeView: boolean = false; /** YAML or stringified JSON config */ @state() private configCode: string = ""; @state() private exclusions: CrawlConfig["exclude"] = defaultValue.config.exclude; private browserLanguage: CrawlConfig["lang"] = null; @state() private isSubmitting: boolean = false; @state() private browserProfileId?: string | null; @state() private serverError?: TemplateResult | string; @state() private exclusionFieldErrorMessage?: string; private get formattededNextCrawlDate() { const utcSchedule = this.getUTCSchedule(); return this.scheduleInterval ? humanizeNextDate(utcSchedule) : undefined; } connectedCallback(): void { // Show JSON editor view if complex initial config is specified // (e.g. cloning a template) since form UI doesn't support // all available fields in the config const isComplexConfig = this.initialCrawlTemplate.config.seeds.some( (seed: any) => typeof seed !== "string" ); if (isComplexConfig) { this.isConfigCodeView = true; } this.configCode = jsonToYaml(this.initialCrawlTemplate.config); if (this.initialCrawlTemplate.config.exclude?.length) { this.exclusions = this.initialCrawlTemplate.config.exclude; } this.browserProfileId = this.initialCrawlTemplate.profileid; // Default to current user browser language const browserLanguage = window.navigator.language; if (browserLanguage) { this.browserLanguage = browserLanguage.slice( 0, browserLanguage.indexOf("-") ); } super.connectedCallback(); } willUpdate(changedProperties: Map) { if (changedProperties.get("isConfigCodeView") !== undefined) { if (this.isConfigCodeView) { this.configCode = jsonToYaml( merge(this.initialCrawlTemplate.config, { exclude: trimExclusions(this.exclusions), }) ); } else if (this.isConfigCodeView === false) { const exclude = (yamlToJson(this.configCode) as CrawlConfig).exclude; this.exclusions = exclude?.length ? exclude : defaultValue.config.exclude; } } } render() { return html` ${msg("Back to Crawl Templates")} ${msg("New Crawl Template")} ${msg( "Configure a new crawl template. You can choose to run a crawl immediately upon saving this template." )} ${this.renderBasicSettings()} ${this.renderCrawlConfigSettings()} ${this.renderScheduleSettings()} (this.isRunNow = e.target.checked)} >${msg("Run immediately on save")} ${this.serverError ? html`${this.serverError}` : ""} ${this.isRunNow ? msg("Save & Run Template") : msg("Save Template")} `; } private renderBasicSettings() { return html` ${msg("Basic Settings")} (this.browserProfileId = e.detail.value ? e.detail.value.id : null)} > `; } private renderScheduleSettings() { return html` ${msg("Crawl Schedule")} (this.scheduleInterval = e.target.value)} > ${msg("None")} ${msg("Daily")} ${msg("Weekly")} ${msg("Monthly")} ${msg("Time")} (this.scheduleTime = { ...this.scheduleTime, hour: +e.target.value, })} > ${hours.map( ({ value, label }) => html`${label}` )} : (this.scheduleTime = { ...this.scheduleTime, minute: +e.target.value, })} > ${minutes.map( ({ value, label }) => html`${label}` )} (this.scheduleTime = { ...this.scheduleTime, period: "AM", })} >${msg("AM", { desc: "Time AM/PM" })} (this.scheduleTime = { ...this.scheduleTime, period: "PM", })} >${msg("PM", { desc: "Time AM/PM" })} ${this.formattededNextCrawlDate ? msg( html`Next scheduled crawl: ${this.formattededNextCrawlDate}` ) : msg("No crawls scheduled")} ${msg("minutes")} `; } private renderCrawlConfigSettings() { return html` ${msg("Crawl Settings")} ${msg("Crawler Instances")} ${msg("1")} ${msg("2")} ${msg("3")} (this.browserLanguage = e.detail.item.value)} @sl-clear=${() => (this.browserLanguage = null)} > ${msg("Language")} ${this.isConfigCodeView ? msg("Custom Config") : msg("Crawl Configuration")} (this.isConfigCodeView = e.target.checked)} > ${msg("Advanced Editor")} ${this.renderSeedsCodeEditor()} ${this.renderSeedsForm()} ${this.renderExclusionEditor()} `; } private renderSeedsForm() { return html` Page Page SPA Prefix Host Domain Any ${msg("Include External Links (“one hop out”)")} ${msg("pages")} `; } private renderExclusionEditor() { if (!this.initialCrawlTemplate.config) { return; } return html` (this.exclusions = [...(this.exclusions || []), ""])} > ${msg("Add More")} `; } private renderSeedsCodeEditor() { return html` ${msg( html`See Browsertrix Crawler docs for all configuration options.` )} { this.configCode = e.detail.value; }} > `; } private parseTemplate(formData: FormData) { const crawlTimeoutMinutes = formData.get("crawlTimeoutMinutes"); const pageLimit = formData.get("limit"); const seedUrlsStr = formData.get("seedUrls"); const scale = formData.get("scale") as string; const template: Partial = { name: formData.get("name") as string, schedule: this.getUTCSchedule(), runNow: this.isRunNow, crawlTimeout: crawlTimeoutMinutes ? +crawlTimeoutMinutes * 60 : 0, scale: +scale, profileid: this.browserProfileId, }; if (this.isConfigCodeView) { template.config = yamlToJson(this.configCode) as CrawlConfig; } else { template.config = { seeds: (seedUrlsStr as string).trim().replace(/,/g, " ").split(/\s+/g), scopeType: formData.get("scopeType") as string, limit: pageLimit ? +pageLimit : 0, extraHops: formData.get("extraHopsOne") ? 1 : 0, exclude: trimExclusions(this.exclusions), lang: this.browserLanguage || null, }; } return template; } private handleRemoveRegex(e: ExclusionRemoveEvent) { const { index } = e.detail; if (!this.exclusions) { this.exclusions = defaultValue.config.exclude; } else { this.exclusions = [ ...this.exclusions.slice(0, index), ...this.exclusions.slice(index + 1), ]; } } private handleChangeRegex(e: ExclusionChangeEvent) { const { regex, index } = e.detail; const nextExclusions = [...this.exclusions!]; nextExclusions[index] = regex; this.exclusions = nextExclusions; } private async onSubmit(event: SubmitEvent) { event.preventDefault(); if (!this.authState) return; const form = event.target as HTMLFormElement; if (form.querySelector("[invalid]")) { return; } const formData = new FormData(event.target as HTMLFormElement); const params = this.parseTemplate(formData); this.serverError = undefined; this.isSubmitting = true; try { const data = await this.apiFetch( `/archives/${this.archiveId}/crawlconfigs/`, this.authState, { method: "POST", body: JSON.stringify(params), } ); const crawlId = data.run_now_job; this.notify({ message: crawlId ? msg("Crawl started with new template.") : msg("Crawl template created."), variant: "success", icon: "check2-circle", duration: 8000, }); if (crawlId) { this.navTo(`/archives/${this.archiveId}/crawls/crawl/${crawlId}`); } else { this.navTo( `/archives/${this.archiveId}/crawl-templates/config/${data.added}` ); } } catch (e: any) { if (e?.isApiError) { const isConfigError = ({ loc }: any) => loc.some((v: string) => v === "config"); if (e.details && e.details.some(isConfigError)) { this.serverError = this.formatConfigServerError(e.details); } else { this.serverError = e.message; } } else { this.serverError = msg("Something unexpected went wrong"); } } this.isSubmitting = false; } /** * Format `config` related API error returned from server */ private formatConfigServerError(details: any): TemplateResult { const detailsWithoutDictError = details.filter( ({ type }: any) => type !== "type_error.dict" ); const renderDetail = ({ loc, msg: detailMsg }: any) => html` ${loc.some((v: string) => v === "seeds") && typeof loc[loc.length - 1] === "number" ? msg(str`Seed URL ${loc[loc.length - 1] + 1}: `) : `${loc[loc.length - 1]}: `} ${detailMsg} `; return html` ${msg( "Couldn't save crawl template. Please fix the following crawl configuration issues:" )} ${detailsWithoutDictError.map(renderDetail)} `; } private getUTCSchedule(): string { if (!this.scheduleInterval) { return ""; } const { minute, hour, period } = this.scheduleTime; return getUTCSchedule({ interval: this.scheduleInterval, hour, minute, period, }); } } customElements.define("btrix-crawl-templates-new", CrawlTemplatesNew);
${msg( "Configure a new crawl template. You can choose to run a crawl immediately upon saving this template." )}
${msg( html`See Browsertrix Crawler docs for all configuration options.` )}