import type { TemplateResult } from "lit"; import { state, property } from "lit/decorators.js"; import { msg, localized, str } from "@lit/localize"; import { when } from "lit/directives/when.js"; import { guard } from "lit/directives/guard.js"; import { styleMap } from "lit/directives/style-map.js"; import { ref } from "lit/directives/ref.js"; import debounce from "lodash/fp/debounce"; import { mergeDeep } from "immutable"; import omit from "lodash/fp/omit"; import groupBy from "lodash/fp/groupBy"; import keyBy from "lodash/fp/keyBy"; import orderBy from "lodash/fp/orderBy"; import flow from "lodash/fp/flow"; import Fuse from "fuse.js"; import queryString from "query-string"; import { serialize } from "@shoelace-style/shoelace/dist/utilities/form.js"; import type { SlMenuItem } from "@shoelace-style/shoelace"; import type { CheckboxChangeEvent, CheckboxGroupList, } from "../../components/checkbox-list"; import type { MarkdownChangeEvent } from "../../components/markdown-editor"; import type { AuthState } from "../../utils/AuthService"; import LiteElement, { html } from "../../utils/LiteElement"; import { maxLengthValidator } from "../../utils/form"; import type { APIPaginatedList, APIPaginationQuery, APISortQuery, } from "../../types/api"; import type { Collection } from "../../types/collection"; import type { Crawl, CrawlState, Workflow } from "../../types/crawler"; import type { PageChangeEvent } from "../../components/pagination"; const TABS = ["crawls", "metadata"] as const; type Tab = (typeof TABS)[number]; type SearchFields = "name" | "firstSeed"; type SearchResult = { item: { key: SearchFields; value: string; }; }; type SortField = "lastRun" | "modified" | "created" | "firstSeed"; type SortDirection = "asc" | "desc"; const sortableFields: Record< SortField, { label: string; defaultDirection?: SortDirection } > = { lastRun: { label: msg("Latest Crawl"), defaultDirection: "desc", }, modified: { label: msg("Last Modified"), defaultDirection: "desc", }, created: { label: msg("Created At"), defaultDirection: "desc", }, firstSeed: { label: msg("Crawl Start URL"), defaultDirection: "asc", }, }; const finishedCrawlStates: CrawlState[] = [ "complete", "partial_complete", "timed_out", ]; const WORKFLOW_CRAWL_LIMIT = 100; const WORKFLOW_PAGE_SIZE = 10; const CRAWL_PAGE_SIZE = 5; const MIN_SEARCH_LENGTH = 2; export type CollectionSubmitEvent = CustomEvent<{ values: { name: string; description: string | null; crawlIds: string[]; oldCrawlIds?: string[]; }; }>; /** * @event on-submit */ @localized() export class CollectionEditor extends LiteElement { @property({ type: Object }) authState!: AuthState; @property({ type: String }) orgId!: string; @property({ type: Boolean }) isCrawler?: boolean; @property({ type: String }) collectionId?: string; @property({ type: Object }) metadataValues?: Collection; @property({ type: Boolean }) isSubmitting = false; @state() private collectionCrawls?: Crawl[]; // Store crawl IDs to compare later private savedCollectionCrawlIds: string[] = []; @state() private workflows?: APIPaginatedList & { items: Workflow[]; }; @state() private workflowPagination: { [workflowId: string]: APIPaginationQuery & { items: Workflow[]; }; } = {}; @state() private workflowIsLoading: { [workflowId: string]: boolean; } = {}; @state() private selectedCrawls: { [crawlId: string]: Crawl; } = {}; @state() private activeTab: Tab = TABS[0]; @state() private orderWorkflowsBy: { field: SortField; direction: SortDirection; } = { field: "lastRun", direction: sortableFields["lastRun"].defaultDirection!, }; @state() private filterWorkflowsBy: Partial> = {}; @state() private searchByValue: string = ""; @state() private searchResultsOpen = false; private get hasSearchStr() { return this.searchByValue.length >= MIN_SEARCH_LENGTH; } private get selectedSearchFilterKey() { return Object.keys(this.fieldLabels).find((key) => Boolean((this.filterWorkflowsBy as any)[key]) ); } // TODO localize private numberFormatter = new Intl.NumberFormat(undefined, { notation: "compact", }); // For fuzzy search: private fuse = new Fuse([], { keys: ["value"], shouldSort: false, threshold: 0.2, // stricter; default is 0.6 }); private validateNameMax = maxLengthValidator(50); private readonly fieldLabels: Record = { name: msg("Name"), firstSeed: msg("Crawl Start URL"), }; private readonly tabLabels: Record = { crawls: msg("Select Crawls"), metadata: msg("Metadata"), }; protected async willUpdate(changedProperties: Map) { if (changedProperties.has("orgId") && this.orgId) { this.fetchSearchValues(); } if ( (changedProperties.has("orgId") && this.orgId) || changedProperties.has("filterWorkflowsBy") || changedProperties.has("orderWorkflowsBy") ) { this.fetchWorkflows(); } if (changedProperties.has("collectionId") && this.collectionId) { this.fetchCollectionCrawls(); } } connectedCallback(): void { // Set initial active section and dialog based on URL #hash value this.getActivePanelFromHash(); super.connectedCallback(); window.addEventListener("hashchange", this.getActivePanelFromHash); } disconnectedCallback(): void { super.disconnectedCallback(); window.removeEventListener("hashchange", this.getActivePanelFromHash); } render() { return html`
${guard( [this.activeTab], () => html`

${this.tabLabels[this.activeTab]}

${TABS.map(this.renderTab)} ` )} ${this.renderSelectCrawls()} ${guard( [this.metadataValues, this.isSubmitting, this.workflowIsLoading], this.renderMetadata )}
`; } private renderTab = (tab: Tab) => { const isActive = tab === this.activeTab; const completed = false; // TODO const iconProps = { name: "circle", library: "default", class: "text-neutral-400", }; if (isActive) { iconProps.name = "pencil-circle-dashed"; iconProps.library = "app"; iconProps.class = "text-base"; } else if (completed) { iconProps.name = "check-circle"; } return html` this.goToTab(tab)} > ${this.tabLabels[tab]} `; }; private renderSelectCrawls = () => { return html`

${msg("Crawls in Collection")}

${guard( [ this.isCrawler, this.collectionCrawls, this.selectedCrawls, this.workflowPagination, ], this.renderCollectionWorkflowList )}

${msg("All Workflows")}

${when( this.workflows?.total, () => html`
${guard( [ this.searchResultsOpen, this.searchByValue, this.filterWorkflowsBy, this.orderWorkflowsBy, ], this.renderWorkflowListControls )}
` )}
${guard( [ this.isCrawler, this.workflows, this.collectionCrawls, this.selectedCrawls, this.workflowIsLoading, ], this.renderWorkflowList )}
${when( this.workflows?.total, () => html` { await this.fetchWorkflows({ page: e.detail.page, }); // Scroll to top of list this.scrollIntoView({ behavior: "smooth" }); }} > ` )}
${when( this.collectionId, () => html` isLoading === true )} ?loading=${this.isSubmitting} @click=${this.submitCrawlSelectionChanges} > ${msg("Save Crawl Selection")} `, () => html` this.goToTab("metadata")}> ${msg("Enter Metadata")} ` )}
`; }; private renderMetadata = () => { return html`
${when( !this.collectionId, () => html` this.goToTab("crawls")}> ${msg("Select Crawls")} ` )} isLoading === true )} ?loading=${this.isSubmitting} > ${this.collectionId ? msg("Save Metadata") : msg("Save Collection")}
`; }; private renderCollectionWorkflowList = () => { if (this.collectionId && !this.collectionCrawls) { return this.renderLoading(); } if (!this.collectionCrawls?.length) { return html`
${msg("No Crawls in this Collection, yet")}

${(this.workflows && !this.workflows.total) || !this.isCrawler ? msg( "Select Workflows or individual Crawls. You can always come back and add Crawls later." ) : msg( "Create a Workflow to select Crawls. You can always come back and add Crawls later." )}

`; } const groupedByWorkflow = groupBy("cid")(this.collectionCrawls) as any; return html` ${Object.keys(groupedByWorkflow).map((workflowId) => this.renderWorkflowCrawls( workflowId, orderBy(["finished"])(["desc"])( groupedByWorkflow[workflowId] ) as any ) )} `; }; private renderWorkflowCrawls(workflowId: string, crawls: Crawl[]) { const selectedCrawlIds = crawls .filter(({ id }) => this.selectedCrawls[id]) .map(({ id }) => id); const allChecked = crawls.length === selectedCrawlIds.length; // Use latest crawl for workflow information, since we // may not have access to workflow details const firstCrawl = crawls[0]; return html` { if (e.detail.checked || !allChecked) { this.selectCrawls(crawls); } else { this.deselectCrawls(crawls); } }} >
${this.renderCrawlName(firstCrawl)}
${crawls.length === 1 ? msg("1 crawl") : msg(str`${this.numberFormatter.format(crawls.length)} crawls`)}
${guard( [this.selectedCrawls, this.workflowPagination[workflowId]], () => this.renderWorkflowCrawlList(workflowId, crawls) )}
`; } private renderWorkflowCrawlList = (workflowId: string, crawls: Crawl[]) => { const { page = 1 } = this.workflowPagination[workflowId] || {}; return html` ${crawls .slice((page - 1) * CRAWL_PAGE_SIZE, page * CRAWL_PAGE_SIZE) .map((crawl) => this.renderCrawl(crawl, workflowId))} ${when( crawls.length > CRAWL_PAGE_SIZE, () => html`
{ this.workflowPagination = mergeDeep(this.workflowPagination, { [workflowId]: { page: e.detail.page }, }); }} >
` )} `; }; private renderCrawl(crawl: Crawl, workflowId?: string) { return html` { if (e.detail.checked) { this.selectedCrawls = mergeDeep(this.selectedCrawls, { [crawl.id]: crawl, }); } else { this.selectedCrawls = omit([crawl.id])(this.selectedCrawls) as any; } }} >
${workflowId ? html`` : this.renderSeedsLabel(crawl.firstSeed, crawl.seedCount)}
${this.numberFormatter.format(+(crawl.stats?.done || 0))}
`; } private renderWorkflowListControls = () => { return html`
${this.renderSearch()}
${msg("Sort by:")}
{ const field = (e.target as HTMLSelectElement).value as SortField; this.orderWorkflowsBy = { field: field, direction: sortableFields[field].defaultDirection || this.orderWorkflowsBy.direction, }; }} > ${Object.entries(sortableFields).map( ([value, { label }]) => html` ${label} ` )} { this.orderWorkflowsBy = { ...this.orderWorkflowsBy, direction: this.orderWorkflowsBy.direction === "asc" ? "desc" : "asc", }; }} >
`; }; private renderSearch() { return html` { this.searchResultsOpen = false; this.searchByValue = ""; }} @sl-select=${async (e: CustomEvent) => { this.searchResultsOpen = false; const item = e.detail.item as SlMenuItem; const key = item.dataset["key"] as SearchFields; this.searchByValue = item.value; await this.updateComplete; this.filterWorkflowsBy = { ...this.filterWorkflowsBy, [key]: item.value, }; }} > { this.searchResultsOpen = false; this.onSearchInput.cancel(); const { name, firstSeed, ...otherFilters } = this.filterWorkflowsBy; this.filterWorkflowsBy = otherFilters; }} @sl-input=${this.onSearchInput} > ${when( this.selectedSearchFilterKey, () => html`${this.fieldLabels[ this.selectedSearchFilterKey as SearchFields ]}`, () => html`` )} ${this.renderSearchResults()} `; } private renderSearchResults() { if (!this.hasSearchStr) { return html` ${msg("Start typing to view crawl filters.")} `; } const searchResults = this.fuse.search(this.searchByValue).slice(0, 10); if (!searchResults.length) { return html` ${msg("No matching crawls found.")} `; } return html` ${searchResults.map( ({ item }: SearchResult) => html` ${this.fieldLabels[item.key]} ${item.value} ` )} `; } private renderWorkflowList = () => { if (!this.workflows) { return this.renderLoading(); } if (!this.workflows.total) { return html`
${when( this.isCrawler, () => html` ${msg("New Crawl Workflow")} `, () => html`

${msg("Your organization doesn't have any Crawl Workflows.")}

` )}
`; } const groupedByWorkflow = groupBy("cid")(this.collectionCrawls) as any; return html` ${this.workflows.items.map((workflow) => this.renderWorkflowItem(workflow, groupedByWorkflow[workflow.id]) )} `; }; private renderWorkflowItem(workflow: Workflow, crawls: Crawl[] = []) { const selectedCrawls = crawls.filter(({ id }) => this.selectedCrawls[id]); const allChecked = workflow.crawlSuccessfulCount === selectedCrawls.length; return html` { if (e.detail.checked || !allChecked) { this.selectWorkflow(workflow.id); } else { this.deselectCrawls(crawls); } }} >
${this.renderWorkflowDetails(workflow)}
${this.workflowIsLoading[workflow.id] ? html`
` : ""}
`; } private renderWorkflowDetails(workflow: Workflow) { return html`
${this.renderCrawlName(workflow)}
${this.renderCrawlCount(workflow)}
`; } private renderCrawlCount(workflow: Workflow) { const count = Math.min(WORKFLOW_CRAWL_LIMIT, workflow.crawlSuccessfulCount); let message = ""; if (count === 1) { message = msg("1 crawl"); } else { message = msg(str`${this.numberFormatter.format(count)} crawls`); } return html`${message}${workflow.crawlSuccessfulCount > count ? html` ` : ""}`; } private renderCrawlName(item: Workflow | Crawl) { if (item.name) return html`${item.name}`; if (!item.firstSeed) return html`${item.id}`; return this.renderSeedsLabel( item.firstSeed, (item as Crawl).seedCount || item.config?.seeds.length ); } private renderSeedsLabel(firstSeed: string, seedCount: number) { let nameSuffix: any = ""; const remainder = seedCount - 1; if (remainder) { if (remainder === 1) { nameSuffix = html`${msg(str`+${this.numberFormatter.format(remainder)} URL`)}`; } else { nameSuffix = html`${msg(str`+${this.numberFormatter.format(remainder)} URLs`)}`; } } return html`
${firstSeed}${nameSuffix}
`; } private renderLoading = () => html`
`; private selectCrawls(crawls: Crawl[]) { const allCrawls = crawls.reduce( (acc: any, crawl: Crawl) => ({ ...acc, [crawl.id]: crawl, }), {} ); this.selectedCrawls = mergeDeep(this.selectedCrawls, allCrawls); } private deselectCrawls(crawls: Crawl[]) { this.selectedCrawls = omit(crawls.map(({ id }) => id))( this.selectedCrawls ) as any; } private async selectWorkflow(workflowId: string) { const crawls = await this.fetchWorkflowCrawls(workflowId); this.selectCrawls(crawls); } private checkboxGroupUpdated = async (el: any) => { await this.updateComplete; if (el) { await el.updateComplete; if (el.classList.contains("offscreen")) { // Set up initial position for expand/contract toggle el.style.marginTop = `-${el.clientHeight}px`; el.style.opacity = "0"; el.style.pointerEvents = "none"; el.classList.remove("offscreen"); } } }; private toggleWorkflow = async (workflowId: string) => { const checkboxGroup = this.querySelector( `#workflow-${workflowId}-group` ) as HTMLElement; const listItem = checkboxGroup.closest( "btrix-checkbox-list-item" ) as HTMLElement; const expandBtn = listItem.querySelector(".expandBtn") as HTMLElement; const expand = !(expandBtn.getAttribute("aria-expanded") === "true"); expandBtn.setAttribute("aria-expanded", expand.toString()); checkboxGroup.classList.add("transition-all"); if (expand) { expandBtn.classList.add("rotate-180"); checkboxGroup.style.marginTop = "0px"; checkboxGroup.style.opacity = "100%"; checkboxGroup.style.pointerEvents = "auto"; } else { expandBtn.classList.remove("rotate-180"); checkboxGroup.style.marginTop = `-${checkboxGroup.clientHeight}px`; checkboxGroup.style.opacity = "0"; checkboxGroup.style.pointerEvents = "none"; } }; private onSearchInput = debounce(150)((e: any) => { this.searchByValue = e.target.value.trim(); if (this.searchResultsOpen === false && this.hasSearchStr) { this.searchResultsOpen = true; } if (!this.searchByValue && this.selectedSearchFilterKey) { const { [this.selectedSearchFilterKey as SearchFields]: _, ...otherFilters } = this.filterWorkflowsBy; this.filterWorkflowsBy = { ...otherFilters, }; } }) as any; private async submitCrawlSelectionChanges() { this.dispatchEvent( new CustomEvent("on-submit", { detail: { values: { oldCrawlIds: this.savedCollectionCrawlIds, crawlIds: Object.keys(this.selectedCrawls), }, }, }) ); } private async onSubmit(event: SubmitEvent) { event.preventDefault(); event.stopPropagation(); await this.updateComplete; const form = event.target as HTMLFormElement; if (form.querySelector("[data-invalid]")) { return; } const values = serialize(form); if (!this.collectionId) { // Crawl IDs can only be saved in new collections values.crawlIds = Object.keys(this.selectedCrawls); } this.dispatchEvent( new CustomEvent("on-submit", { detail: { values }, }) ); } private getActivePanelFromHash = () => { const hashValue = window.location.hash.slice(1); if (TABS.includes(hashValue as any)) { this.activeTab = hashValue as Tab; } else { this.goToTab(TABS[0], { replace: true }); } }; private goToTab(tab: Tab, { replace = false } = {}) { const path = `${window.location.href.split("#")[0]}#${tab}`; if (replace) { window.history.replaceState(null, "", path); } else { window.history.pushState(null, "", path); } this.activeTab = tab; } private async fetchWorkflows(params: APIPaginationQuery = {}) { try { this.workflows = await this.getWorkflows({ page: params.page || this.workflows?.page || 1, pageSize: params.pageSize || this.workflows?.pageSize || WORKFLOW_PAGE_SIZE, }); } catch (e: any) { this.notify({ message: msg("Sorry, couldn't retrieve Workflows at this time."), variant: "danger", icon: "exclamation-octagon", }); } } private async getWorkflows( params: APIPaginationQuery ): Promise { const query = queryString.stringify({ ...params, ...this.filterWorkflowsBy, sortBy: this.orderWorkflowsBy.field, sortDirection: this.orderWorkflowsBy.direction === "desc" ? -1 : 1, }); const data: APIPaginatedList = await this.apiFetch( `/orgs/${this.orgId}/crawlconfigs?${query}`, this.authState! ); return data; } private async fetchCollectionCrawls() { if (!this.collectionId) return; try { const { items: crawls } = await this.getCrawls({ collectionId: this.collectionId, sortBy: "finished", pageSize: WORKFLOW_CRAWL_LIMIT, }); this.selectedCrawls = mergeDeep( this.selectedCrawls, crawls.reduce( (acc, crawl) => ({ ...acc, [crawl.id]: crawl, }), {} ) ); // TODO remove omit once API removes errors this.collectionCrawls = crawls.map(omit("errors")) as Crawl[]; // Store crawl IDs to compare later this.savedCollectionCrawlIds = this.collectionCrawls.map(({ id }) => id); } catch { this.notify({ message: msg( "Sorry, couldn't retrieve Crawls in Collection at this time." ), variant: "danger", icon: "exclamation-octagon", }); } } private async fetchWorkflowCrawls(workflowId: string): Promise { this.workflowIsLoading = mergeDeep(this.workflowIsLoading, { [workflowId]: true, }); let workflowCrawls: Crawl[] = []; try { const { items } = await this.getCrawls({ cid: workflowId, state: finishedCrawlStates, sortBy: "finished", pageSize: WORKFLOW_CRAWL_LIMIT, }); // TODO remove omit once API removes errors const crawls = items.map(omit("errors")) as Crawl[]; this.collectionCrawls = flow( keyBy("id"), (res) => mergeDeep(keyBy("id")(this.collectionCrawls), res), Object.values )(crawls) as any; workflowCrawls = crawls; } catch { this.notify({ message: msg("Sorry, couldn't retrieve Crawl Workflow at this time."), variant: "danger", icon: "exclamation-octagon", }); } this.workflowIsLoading = mergeDeep(this.workflowIsLoading, { [workflowId]: false, }); return workflowCrawls; } private async getCrawls( params: Partial<{ cid?: string; collectionId?: string; state: CrawlState[]; }> & APIPaginationQuery & APISortQuery ): Promise { const query = queryString.stringify(params || {}, { arrayFormat: "comma", }); const data: APIPaginatedList = await this.apiFetch( `/orgs/${this.orgId}/crawls?${query}`, this.authState! ); return data; } private async fetchSearchValues() { try { const { names, firstSeeds } = await this.apiFetch( `/orgs/${this.orgId}/crawlconfigs/search-values`, this.authState! ); // Update search/filter collection const toSearchItem = (key: SearchFields) => (value: string): SearchResult["item"] => ({ key, value, }); this.fuse.setCollection([ ...names.map(toSearchItem("name")), ...firstSeeds.map(toSearchItem("firstSeed")), ] as any); } catch (e) { console.debug(e); } } } customElements.define("btrix-collection-editor", CollectionEditor);