import { state, property } from "lit/decorators.js";
import { ifDefined } from "lit/directives/if-defined.js";
import { msg, localized, str } from "@lit/localize";
import cronParser from "cron-parser";
import type { AuthState } from "../../utils/AuthService";
import LiteElement, { html } from "../../utils/LiteElement";
import { getLocaleTimeZone } from "../../utils/localization";
import type { CrawlConfig } from "./types";
export type NewCrawlTemplate = {
id?: string;
name: string;
schedule: string;
runNow: boolean;
crawlTimeout?: number;
config: CrawlConfig;
};
const initialValues = {
name: "",
runNow: true,
schedule: "@weekly",
config: {
seeds: [],
scopeType: "prefix",
},
};
const hours = Array.from({ length: 12 }).map((x, i) => ({
value: i + 1,
label: `${i + 1}`,
}));
const minutes = Array.from({ length: 60 }).map((x, i) => ({
value: i,
label: `${i}`.padStart(2, "0"),
}));
/**
* Usage:
* ```ts
*
* ```
*/
@localized()
export class CrawlTemplatesNew extends LiteElement {
@property({ type: Object })
authState!: AuthState;
@property({ type: String })
archiveId!: string;
@property({ type: Object })
initialCrawlConfig?: CrawlConfig;
@state()
private isRunNow: boolean = initialValues.runNow;
@state()
private scheduleInterval: "" | "daily" | "weekly" | "monthly" = "weekly";
/** Schedule local time */
@state()
private scheduleTime: { hour: number; minute: number; period: "AM" | "PM" } =
{
hour: new Date().getHours() % 12 || 12,
minute: 0,
period: new Date().getHours() > 11 ? "PM" : "AM",
};
@state()
private isSeedsJsonView: boolean = false;
@state()
private seedsJson: string = "";
@state()
private invalidSeedsJsonMessage: string = "";
@state()
private isSubmitting: boolean = false;
@state()
private serverError?: string;
private get timeZone() {
return Intl.DateTimeFormat().resolvedOptions().timeZone;
}
private get timeZoneShortName() {
return getLocaleTimeZone();
}
private get formattededNextCrawlDate() {
const utcSchedule = this.getUTCSchedule();
return this.scheduleInterval
? html``
: undefined;
}
connectedCallback(): void {
// Show JSON editor view if complex initial config is specified
// (e.g. cloning a template) since form UI doesn't support
// all available fields in the config
const isComplexConfig = this.initialCrawlConfig?.seeds.some(
(seed: any) => typeof seed !== "string"
);
if (isComplexConfig) {
this.isSeedsJsonView = true;
}
this.initialCrawlConfig = {
...initialValues.config,
...this.initialCrawlConfig,
};
this.seedsJson = JSON.stringify(this.initialCrawlConfig, null, 2);
super.connectedCallback();
}
render() {
return html`
${msg("New Crawl Template")}
${msg(
"Configure a new crawl template. You can choose to run a crawl immediately upon saving this template."
)}
${this.renderBasicSettings()} ${this.renderCrawlConfigSettings()}
${this.renderScheduleSettings()}
${this.serverError
? html`
${this.serverError}`
: ""}
${msg("Save Crawl Template")}
${this.isRunNow || this.scheduleInterval
? html`
${this.isRunNow
? html`
${msg("A crawl will start immediately on save.")}
`
: ""}
${this.scheduleInterval
? html`
${msg(
html`Scheduled crawl will run
${this.formattededNextCrawlDate}.`
)}
`
: ""}
`
: ""}
`;
}
private renderBasicSettings() {
return html`
${msg("Basic Settings")}
`;
}
private renderScheduleSettings() {
return html`
${msg("Crawl Schedule")}
(this.scheduleInterval = e.target.value)}
>
${msg("None")}
${msg("Daily")}
${msg("Weekly")}
${msg("Monthly")}
${msg("At")}
(this.scheduleTime = {
...this.scheduleTime,
hour: +e.target.value,
})}
>
${hours.map(
({ value, label }) =>
html`${label}`
)}
:
(this.scheduleTime = {
...this.scheduleTime,
minute: +e.target.value,
})}
>
${minutes.map(
({ value, label }) =>
html`${label}`
)}
(this.scheduleTime = {
...this.scheduleTime,
period: e.target.value,
})}
>
${msg("AM", { desc: "Time AM/PM" })}
${msg("PM", { desc: "Time AM/PM" })}
${this.timeZoneShortName}
${this.formattededNextCrawlDate
? msg(
html`Next scheduled crawl: ${this.formattededNextCrawlDate}`
)
: msg("No crawls scheduled")}
(this.isRunNow = e.target.checked)}
>${msg("Run immediately on save")}
${msg("minutes")}
`;
}
private renderCrawlConfigSettings() {
return html`
${msg("Crawl Configuration")}
${this.isSeedsJsonView
? msg("Custom Config")
: msg("Configure Seeds")}
(this.isSeedsJsonView = e.target.checked)}
>
${msg("Use JSON Editor")}
${this.isSeedsJsonView
? this.renderSeedsJson()
: this.renderSeedsForm()}
`;
}
private renderSeedsForm() {
return html`
Page
Page SPA
Prefix
Host
Any
${msg("Include External Links ('one hop out')")}
${msg("pages")}
`;
}
private renderSeedsJson() {
return html`
${this.renderSeedsJsonInput()}
${this.invalidSeedsJsonMessage
? html`
${this.invalidSeedsJsonMessage}
`
: html` ${msg("Valid JSON")} `}
`;
}
private renderSeedsJsonInput() {
return html`
`;
}
private updateSeedsJson(e: any) {
const textarea = e.target;
const text = textarea.value;
try {
const json = JSON.parse(text);
this.seedsJson = JSON.stringify(json, null, 2);
this.invalidSeedsJsonMessage = "";
textarea.setCustomValidity("");
textarea.reportValidity();
} catch (e: any) {
this.invalidSeedsJsonMessage = e.message
? msg(str`JSON is invalid: ${e.message.replace("JSON.parse: ", "")}`)
: msg("JSON is invalid.");
}
}
private parseTemplate(formData: FormData) {
const crawlTimeoutMinutes = formData.get("crawlTimeoutMinutes");
const pageLimit = formData.get("limit");
const seedUrlsStr = formData.get("seedUrls");
const template: Partial = {
name: formData.get("name") as string,
schedule: this.getUTCSchedule(),
runNow: this.isRunNow,
crawlTimeout: crawlTimeoutMinutes ? +crawlTimeoutMinutes * 60 : 0,
};
if (this.isSeedsJsonView) {
template.config = JSON.parse(this.seedsJson);
} else {
template.config = {
seeds: (seedUrlsStr as string)
.trim()
.replace(/,/g, " ")
.split(/\s+/g)
.map((url) => ({ url })),
scopeType: formData.get("scopeType") as string,
limit: pageLimit ? +pageLimit : 0,
extraHops: formData.get("extraHopsOne") ? 1 : 0,
};
}
return template;
}
private async onSubmit(event: {
detail: { formData: FormData };
target: any;
}) {
if (!this.authState) return;
if (this.isSeedsJsonView && this.invalidSeedsJsonMessage) {
// Check JSON validity
const jsonEditor = event.target.querySelector("#json-editor");
jsonEditor.setCustomValidity(msg("Please correct JSON errors."));
jsonEditor.reportValidity();
return;
}
const params = this.parseTemplate(event.detail.formData);
console.log(params);
this.serverError = undefined;
this.isSubmitting = true;
try {
const data = await this.apiFetch(
`/archives/${this.archiveId}/crawlconfigs/`,
this.authState,
{
method: "POST",
body: JSON.stringify(params),
}
);
this.notify({
message: data.run_now_job
? msg(
str`Crawl running with new template.
View crawl`
)
: msg("Crawl template created."),
type: "success",
icon: "check2-circle",
duration: 10000,
});
this.navTo(`/archives/${this.archiveId}/crawl-templates`);
} catch (e: any) {
if (e?.isApiError) {
this.serverError = e?.message;
} else {
this.serverError = msg("Something unexpected went wrong");
}
}
this.isSubmitting = false;
}
/**
* Get schedule as UTC cron job expression
* https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax
**/
private getUTCSchedule(): string {
if (!this.scheduleInterval) {
return "";
}
const { minute, hour, period } = this.scheduleTime;
const localDate = new Date();
// Convert 12-hr to 24-hr time
let periodOffset = 0;
if (hour === 12) {
if (period === "AM") {
periodOffset = -12;
}
} else if (period === "PM") {
periodOffset = 12;
}
localDate.setHours(+hour + periodOffset);
localDate.setMinutes(+minute);
const dayOfMonth =
this.scheduleInterval === "monthly" ? localDate.getUTCDate() : "*";
const dayOfWeek =
this.scheduleInterval === "weekly" ? localDate.getUTCDay() : "*";
const month = "*";
const schedule = `${localDate.getUTCMinutes()} ${localDate.getUTCHours()} ${dayOfMonth} ${month} ${dayOfWeek}`;
return schedule;
}
}
customElements.define("btrix-crawl-templates-new", CrawlTemplatesNew);