import { state, property } from "lit/decorators.js"; import { ifDefined } from "lit/directives/if-defined.js"; import { msg, localized, str } from "@lit/localize"; import cronParser from "cron-parser"; import type { AuthState } from "../../utils/AuthService"; import LiteElement, { html } from "../../utils/LiteElement"; import { getLocaleTimeZone } from "../../utils/localization"; import type { CrawlConfig } from "./types"; export type NewCrawlTemplate = { id?: string; name: string; schedule: string; runNow: boolean; crawlTimeout?: number; config: CrawlConfig; }; const initialValues = { name: "", runNow: true, schedule: "@weekly", config: { seeds: [], scopeType: "prefix", }, }; const hours = Array.from({ length: 12 }).map((x, i) => ({ value: i + 1, label: `${i + 1}`, })); const minutes = Array.from({ length: 60 }).map((x, i) => ({ value: i, label: `${i}`.padStart(2, "0"), })); /** * Usage: * ```ts * * ``` */ @localized() export class CrawlTemplatesNew extends LiteElement { @property({ type: Object }) authState!: AuthState; @property({ type: String }) archiveId!: string; @property({ type: Object }) initialCrawlConfig?: CrawlConfig; @state() private isRunNow: boolean = initialValues.runNow; @state() private scheduleInterval: "" | "daily" | "weekly" | "monthly" = "weekly"; /** Schedule local time */ @state() private scheduleTime: { hour: number; minute: number; period: "AM" | "PM" } = { hour: new Date().getHours() % 12 || 12, minute: 0, period: new Date().getHours() > 11 ? "PM" : "AM", }; @state() private isSeedsJsonView: boolean = false; @state() private seedsJson: string = ""; @state() private invalidSeedsJsonMessage: string = ""; @state() private isSubmitting: boolean = false; @state() private serverError?: string; private get timeZone() { return Intl.DateTimeFormat().resolvedOptions().timeZone; } private get timeZoneShortName() { return getLocaleTimeZone(); } private get formattededNextCrawlDate() { const utcSchedule = this.getUTCSchedule(); return this.scheduleInterval ? html`` : undefined; } connectedCallback(): void { // Show JSON editor view if complex initial config is specified // (e.g. cloning a template) since form UI doesn't support // all available fields in the config const isComplexConfig = this.initialCrawlConfig?.seeds.some( (seed: any) => typeof seed !== "string" ); if (isComplexConfig) { this.isSeedsJsonView = true; } this.initialCrawlConfig = { ...initialValues.config, ...this.initialCrawlConfig, }; this.seedsJson = JSON.stringify(this.initialCrawlConfig, null, 2); super.connectedCallback(); } render() { return html`

${msg("New Crawl Template")}

${msg( "Configure a new crawl template. You can choose to run a crawl immediately upon saving this template." )}

${this.renderBasicSettings()} ${this.renderCrawlConfigSettings()} ${this.renderScheduleSettings()}
${this.serverError ? html`${this.serverError}` : ""}
${msg("Save Crawl Template")}
${this.isRunNow || this.scheduleInterval ? html`
${this.isRunNow ? html`

${msg("A crawl will start immediately on save.")}

` : ""} ${this.scheduleInterval ? html`

${msg( html`Scheduled crawl will run ${this.formattededNextCrawlDate}.` )}

` : ""}
` : ""}
`; } private renderBasicSettings() { return html`

${msg("Basic Settings")}

`; } private renderScheduleSettings() { return html`

${msg("Crawl Schedule")}

(this.scheduleInterval = e.target.value)} > ${msg("None")} ${msg("Daily")} ${msg("Weekly")} ${msg("Monthly")}
${msg("At")} (this.scheduleTime = { ...this.scheduleTime, hour: +e.target.value, })} > ${hours.map( ({ value, label }) => html`${label}` )} : (this.scheduleTime = { ...this.scheduleTime, minute: +e.target.value, })} > ${minutes.map( ({ value, label }) => html`${label}` )} (this.scheduleTime = { ...this.scheduleTime, period: e.target.value, })} > ${msg("AM", { desc: "Time AM/PM" })} ${msg("PM", { desc: "Time AM/PM" })} ${this.timeZoneShortName}
${this.formattededNextCrawlDate ? msg( html`Next scheduled crawl: ${this.formattededNextCrawlDate}` ) : msg("No crawls scheduled")}
(this.isRunNow = e.target.checked)} >${msg("Run immediately on save")} ${msg("minutes")}
`; } private renderCrawlConfigSettings() { return html`

${msg("Crawl Configuration")}

${this.isSeedsJsonView ? msg("Custom Config") : msg("Configure Seeds")}

(this.isSeedsJsonView = e.target.checked)} > ${msg("Use JSON Editor")}
${this.isSeedsJsonView ? this.renderSeedsJson() : this.renderSeedsForm()}
`; } private renderSeedsForm() { return html` Page Page SPA Prefix Host Any ${msg("Include External Links ('one hop out')")} ${msg("pages")} `; } private renderSeedsJson() { return html`

${msg( html`See Browsertrix Crawler docs for all configuration options.` )}

${this.renderSeedsJsonInput()}
${this.invalidSeedsJsonMessage ? html` ${this.invalidSeedsJsonMessage} ` : html` ${msg("Valid JSON")} `}
`; } private renderSeedsJsonInput() { return html` `; } private updateSeedsJson(e: any) { const textarea = e.target; const text = textarea.value; try { const json = JSON.parse(text); this.seedsJson = JSON.stringify(json, null, 2); this.invalidSeedsJsonMessage = ""; textarea.setCustomValidity(""); textarea.reportValidity(); } catch (e: any) { this.invalidSeedsJsonMessage = e.message ? msg(str`JSON is invalid: ${e.message.replace("JSON.parse: ", "")}`) : msg("JSON is invalid."); } } private parseTemplate(formData: FormData) { const crawlTimeoutMinutes = formData.get("crawlTimeoutMinutes"); const pageLimit = formData.get("limit"); const seedUrlsStr = formData.get("seedUrls"); const template: Partial = { name: formData.get("name") as string, schedule: this.getUTCSchedule(), runNow: this.isRunNow, crawlTimeout: crawlTimeoutMinutes ? +crawlTimeoutMinutes * 60 : 0, }; if (this.isSeedsJsonView) { template.config = JSON.parse(this.seedsJson); } else { template.config = { seeds: (seedUrlsStr as string) .trim() .replace(/,/g, " ") .split(/\s+/g) .map((url) => ({ url })), scopeType: formData.get("scopeType") as string, limit: pageLimit ? +pageLimit : 0, extraHops: formData.get("extraHopsOne") ? 1 : 0, }; } return template; } private async onSubmit(event: { detail: { formData: FormData }; target: any; }) { if (!this.authState) return; if (this.isSeedsJsonView && this.invalidSeedsJsonMessage) { // Check JSON validity const jsonEditor = event.target.querySelector("#json-editor"); jsonEditor.setCustomValidity(msg("Please correct JSON errors.")); jsonEditor.reportValidity(); return; } const params = this.parseTemplate(event.detail.formData); console.log(params); this.serverError = undefined; this.isSubmitting = true; try { const data = await this.apiFetch( `/archives/${this.archiveId}/crawlconfigs/`, this.authState, { method: "POST", body: JSON.stringify(params), } ); this.notify({ message: data.run_now_job ? msg( str`Crawl running with new template.
View crawl` ) : msg("Crawl template created."), type: "success", icon: "check2-circle", duration: 10000, }); this.navTo(`/archives/${this.archiveId}/crawl-templates`); } catch (e: any) { if (e?.isApiError) { this.serverError = e?.message; } else { this.serverError = msg("Something unexpected went wrong"); } } this.isSubmitting = false; } /** * Get schedule as UTC cron job expression * https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax **/ private getUTCSchedule(): string { if (!this.scheduleInterval) { return ""; } const { minute, hour, period } = this.scheduleTime; const localDate = new Date(); // Convert 12-hr to 24-hr time let periodOffset = 0; if (hour === 12) { if (period === "AM") { periodOffset = -12; } } else if (period === "PM") { periodOffset = 12; } localDate.setHours(+hour + periodOffset); localDate.setMinutes(+minute); const dayOfMonth = this.scheduleInterval === "monthly" ? localDate.getUTCDate() : "*"; const dayOfWeek = this.scheduleInterval === "weekly" ? localDate.getUTCDay() : "*"; const month = "*"; const schedule = `${localDate.getUTCMinutes()} ${localDate.getUTCHours()} ${dayOfMonth} ${month} ${dayOfWeek}`; return schedule; } } customElements.define("btrix-crawl-templates-new", CrawlTemplatesNew);