Add frontend support for auto-adding collections to workflows (#916)

- Adds collections search and list to workflow editor
- Adds collections to workflow details component
- Adds namePrefix filter to backend GET /orgs/{oid}/collections endpoint to support case-insensitive searching of collections
- Adds documentation for new setting

---------

Co-authored-by: Henry Wilkinson <henry@wilkinson.graphics>
This commit is contained in:
Tessa Walsh 2023-06-12 21:18:05 -04:00 committed by GitHub
parent 71e9984e65
commit bd6dc79449
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 469 additions and 3 deletions

View File

@ -203,6 +203,7 @@ class CollectionOps:
sort_by: str = None,
sort_direction: int = 1,
name: Optional[str] = None,
name_prefix: Optional[str] = None,
):
"""List all collections for org"""
# pylint: disable=too-many-locals
@ -215,6 +216,10 @@ class CollectionOps:
if name:
match_query["name"] = name
elif name_prefix:
regex_pattern = f"^{name_prefix}"
match_query["name"] = {"$regex": regex_pattern, "$options": "i"}
aggregate = [{"$match": match_query}]
if sort_by:
@ -384,6 +389,7 @@ def init_collections_api(app, mdb, crawls, orgs, crawl_manager):
sortBy: str = None,
sortDirection: int = 1,
name: Optional[str] = None,
namePrefix: Optional[str] = None,
):
collections, total = await colls.list_collections(
org.id,
@ -392,6 +398,7 @@ def init_collections_api(app, mdb, crawls, orgs, crawl_manager):
sort_by=sortBy,
sort_direction=sortDirection,
name=name,
name_prefix=namePrefix,
)
return paginated_format(collections, total, page, pageSize)

View File

@ -298,6 +298,44 @@ def test_filter_sort_collections(
assert coll["oid"] == default_org_id
assert coll.get("description") is None
# Test filtering by name prefix
name_prefix = SECOND_COLLECTION_NAME[0:4]
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/collections?namePrefix={name_prefix}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 1
items = data["items"]
assert len(items) == 1
coll = items[0]
assert coll["id"]
assert coll["name"] == SECOND_COLLECTION_NAME
assert coll["oid"] == default_org_id
assert coll.get("description") is None
# Test filtering by name prefix (case insensitive)
name_prefix = name_prefix.upper()
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/collections?namePrefix={name_prefix}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 1
items = data["items"]
assert len(items) == 1
coll = items[0]
assert coll["id"]
assert coll["name"] == SECOND_COLLECTION_NAME
assert coll["oid"] == default_org_id
assert coll.get("description") is None
# Test sorting by name, ascending (default)
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/collections?sortBy=name",

View File

@ -186,3 +186,7 @@ Leave optional notes about the workflow's configuration.
### Tags
Apply tags to the workflow. Tags applied to the workflow will propigate to every crawl created with it at the time of crawl creation.
### Collection Auto-Add
Search for and specify collections that this crawl workflow should automatically add content to as soon as crawls finish running. Cancelled and Failed crawls will not be automatically added to collections.

View File

@ -5,8 +5,10 @@ import { msg, localized, str } from "@lit/localize";
import RegexColorize from "regex-colorize";
import ISO6391 from "iso-639-1";
import type { AuthState } from "../utils/AuthService";
import LiteElement, { html } from "../utils/LiteElement";
import type { CrawlConfig, Seed, SeedConfig } from "../pages/org/types";
import type { Collection, CollectionList } from "../types/collection";
import { humanizeSchedule } from "../utils/cron";
import { RelativeDuration } from "./relative-duration";
@ -14,12 +16,16 @@ import { RelativeDuration } from "./relative-duration";
* Usage:
* ```ts
* <btrix-config-details
* .authState=${this.authState!}
* .crawlConfig=${this.crawlConfig}
* ></btrix-config-details>
* ```
*/
@localized()
export class ConfigDetails extends LiteElement {
@property({ type: Object })
authState!: AuthState;
@property({ type: Object })
crawlConfig?: CrawlConfig;
@ -37,6 +43,9 @@ export class ConfigDetails extends LiteElement {
maxPagesPerCrawl?: number;
};
@state()
private collections: CollectionList = [];
private readonly scopeTypeLabels: Record<
CrawlConfig["config"]["scopeType"],
string
@ -50,9 +59,10 @@ export class ConfigDetails extends LiteElement {
any: msg("Any"),
};
connectedCallback() {
async connectedCallback() {
super.connectedCallback();
this.fetchAPIDefaults();
await this.fetchCollections();
}
render() {
@ -255,6 +265,20 @@ export class ConfigDetails extends LiteElement {
)
: undefined
)}
${this.renderSetting(
msg("Collections"),
this.collections.length
? this.collections.map(
(coll) =>
html`<sl-tag class="mt-1 mr-2" variant="neutral">
${coll.name}
<span class="pl-1 font-monostyle text-xs">
(${msg(str`${coll.crawlCount} Crawls`)})
</span>
</sl-tag>`
)
: undefined
)}
</btrix-desc-list>
</section>
`;
@ -378,6 +402,43 @@ export class ConfigDetails extends LiteElement {
`;
}
private async fetchCollections() {
if (this.crawlConfig?.autoAddCollections) {
try {
await this.getCollections();
} catch (e: any) {
this.notify({
message:
e.statusCode === 404
? msg("Collections not found.")
: msg("Sorry, couldn't retrieve Collection details at this time."),
variant: "danger",
icon: "exclamation-octagon",
});
}
}
}
private async getCollections() {
let collections: CollectionList = [];
const orgId = this.crawlConfig?.oid;
if (this.crawlConfig?.autoAddCollections && orgId) {
for (let i = 0; i < this.crawlConfig.autoAddCollections.length; i++) {
const collectionId = this.crawlConfig.autoAddCollections[i];
const data: Collection = await this.apiFetch(
`/orgs/${orgId}/collections/${collectionId}`,
this.authState!
);
if (data) {
collections.push(data);
}
}
}
this.collections = collections;
this.requestUpdate();
}
private async fetchAPIDefaults() {
try {
const resp = await fetch("/api/settings", {

View File

@ -0,0 +1,313 @@
import { state, property } from "lit/decorators.js";
import { msg, localized, str } from "@lit/localize";
import { when } from "lit/directives/when.js";
import debounce from "lodash/fp/debounce";
import type { SlMenuItem, SlIconButton } from "@shoelace-style/shoelace";
import queryString from "query-string";
import type { AuthState } from "../../utils/AuthService";
import type { Collection, CollectionList } from "../../types/collection";
import LiteElement, { html } from "../../utils/LiteElement";
import type {
APIPaginatedList,
APIPaginationQuery,
APISortQuery,
} from "../../types/api";
const INITIAL_PAGE_SIZE = 10;
const MIN_SEARCH_LENGTH = 2;
type CollectionSearchResults = APIPaginatedList & {
items: CollectionList;
};
export type CollectionsChangeEvent = CustomEvent<{
collections: string[];
}>;
/**
* Usage:
* ```ts
* <btrix-collections-add
* .authState=${this.authState}
* .initialCollections=${[]}
* .orgId=${this.orgId}
* .configId=${this.configId}
* @collections-change=${console.log}
* ></btrix-collections-add>
* ```
* @events collections-change
*/
@localized()
export class CollectionsAdd extends LiteElement {
@property({ type: Object })
authState!: AuthState;
@property({ type: Array })
initialCollections?: string[];
@property({ type: String })
orgId!: string;
@property({ type: String })
configId!: string;
@state()
private collections: CollectionList = [];
@state()
private collectionIds: string[] = [];
@state()
private searchByValue: string = "";
@state()
private searchResults: CollectionList = [];
private get hasSearchStr() {
return this.searchByValue.length >= MIN_SEARCH_LENGTH;
}
@state()
private searchResultsOpen = false;
async connectedCallback() {
if (this.initialCollections) {
this.collectionIds = this.initialCollections;
}
await this.initializeCollectionsFromIds();
super.connectedCallback();
}
render() {
return html`
<div class="form-control form-control--has-label">
<label
class="form-control__label"
part="form-control-label"
for="input"
>
<slot name="label">${msg("Collection Auto-Add")}</slot>
</label>
<div class="mb-2 mt-2 p-2 bg-neutral-50 border rounded-lg">
${this.renderSearch()}
</div>
${when(
this.collections,
() =>
this.collections.length
? html`
<div class="mb-2">
<ul class="contents">
${this.collections.map(this.renderCollectionItem, this)}
</ul>
</div>
`
: html`
<div class="mb-2">
<p class="text-center text-0-500">
${msg("Search for a Collection to auto-add crawls")}
</p>
</div>
`)}
</div>`;
}
private renderSearch() {
return html`
<btrix-combobox
?open=${this.searchResultsOpen}
@request-close=${() => {
this.searchResultsOpen = false;
this.searchByValue = "";
}}
@sl-select=${async (e: CustomEvent) => {
this.searchResultsOpen = false;
const item = e.detail.item as SlMenuItem;
const collId = item.dataset["key"];
if (collId && this.collectionIds.indexOf(collId) === -1) {
const coll = this.searchResults.find(collection => collection.id === collId);
if (coll) {
this.collections.push(coll);
this.collectionIds.push(coll.id);
await this.dispatchChange();
}
}
await this.updateComplete;
}}
>
<sl-input
size="small"
placeholder=${msg("Search by Collection name")}
clearable
value=${this.searchByValue}
@sl-clear=${() => {
this.searchResultsOpen = false;
this.onSearchInput.cancel();
}}
@sl-input=${this.onSearchInput}
>
<sl-icon name="search" slot="prefix"></sl-icon>
</sl-input>
${this.renderSearchResults()}
</btrix-combobox>
`;
}
private renderSearchResults() {
if (!this.hasSearchStr) {
return html`
<sl-menu-item slot="menu-item" disabled
>${msg("Start typing to search Collections.")}</sl-menu-item
>
`;
}
if (!this.searchResults.length) {
return html`
<sl-menu-item slot="menu-item" disabled
>${msg("No matching Collections found.")}</sl-menu-item
>
`;
}
return html`
${this.searchResults.map(
(item: Collection) => {
return html`
<sl-menu-item
class="w-full"
slot="menu-item"
data-key=${item.id}
>
<div class="flex w-full gap-2 items-center">
<div class="justify-self-stretch grow truncate">${item.name}</div>
<div class="flex-auto text-right text-neutral-500 text-xs font-monostyle">
${msg(str`${item.crawlCount} Crawls`)}
</div>
</div>
</sl-menu-item>
`;
}
)}
`;
}
private renderCollectionItem(collection: Collection) {
return html`<li class="mt-1 p-2 pl-5 pr-5 border rounded-sm">
<div class="flex flex-row gap-2 justify-between items-center">
<div class="justify-self-stretch grow truncate">${collection.name}</div>
<div class="text-neutral-500 text-xs text-right font-monostyle">
${msg(str`${collection.crawlCount} Crawls`)}
</div>
<sl-icon-button
name="x-lg"
data-key=${collection.id}
@click=${this.removeCollection}>
</sl-icon-button>
</dib>
</li>`;
}
private async removeCollection(event: Event) {
const target = event.currentTarget as HTMLElement;
const collectionId = target.getAttribute("data-key");
if (collectionId) {
const collIdIndex = this.collectionIds.indexOf(collectionId);
if (collIdIndex > -1) {
this.collectionIds.splice(collIdIndex, 1);
}
const collIndex = this.collections.findIndex(collection => collection.id === collectionId);
if (collIndex > -1) {
this.collections.splice(collIndex, 1);
}
}
await this.requestUpdate();
}
private onSearchInput = debounce(200)(async (e: any) => {
this.searchByValue = e.target.value.trim();
if (this.searchResultsOpen === false && this.hasSearchStr) {
this.searchResultsOpen = true;
}
const data: CollectionSearchResults | undefined = await this.fetchCollectionsByPrefix(this.searchByValue);
let searchResults: CollectionList = [];
if (data && data.items.length) {
searchResults = this.filterOutSelectedCollections(data.items);
}
this.searchResults = searchResults;
}) as any;
private filterOutSelectedCollections(results: CollectionList) {
return results.filter((result) => {
return this.collections.every((coll) => {
return coll.id !== result.id;
});
});
}
private async fetchCollectionsByPrefix(namePrefix: string) {
try {
const results: CollectionSearchResults = await this.getCollections({
oid: this.orgId,
namePrefix: namePrefix,
sortBy: "name",
pageSize: INITIAL_PAGE_SIZE,
});
return results
} catch {
this.notify({
message: msg(
"Sorry, couldn't retrieve Collections at this time."
),
variant: "danger",
icon: "exclamation-octagon",
});
}
}
private async getCollections(
params: Partial<{
oid?: string;
namePrefix?: string;
}> &
APIPaginationQuery &
APISortQuery
): Promise<APIPaginatedList> {
const query = queryString.stringify(params || {}, {
arrayFormat: "comma",
});
const data: APIPaginatedList = await this.apiFetch(
`/orgs/${this.orgId}/collections?${query}`,
this.authState!
);
return data;
}
private async initializeCollectionsFromIds() {
for (let i = 0; i < this.collectionIds?.length; i++) {
const collId = this.collectionIds[i];
const data: Collection = await this.apiFetch(
`/orgs/${this.orgId}/collections/${collId}`,
this.authState!
);
if (data) {
this.collections.push(data);
}
}
}
private async dispatchChange() {
await this.updateComplete;
this.dispatchEvent(
<CollectionsChangeEvent>new CustomEvent("collections-change", {
detail: { collections: this.collectionIds },
})
);
}
}
customElements.define("btrix-collections-add", CollectionsAdd);

View File

@ -856,7 +856,8 @@ ${this.crawl?.notes}
if (!this.crawl?.config) return "";
return html`
<btrix-config-details
.crawlConfig=${this.crawl}
.authState=${this.authState!}
.crawlConfig=${{ ...this.crawl, autoAddCollections: this.crawl.collections }}
hideTags
></btrix-config-details>
`;

View File

@ -14,6 +14,7 @@ import "./workflows-list";
import "./workflows-new";
import "./crawl-detail";
import "./crawls-list";
import "./collections-add";
import "./collections-list";
import "./collections-new";
import "./collection-edit";

View File

@ -1146,6 +1146,7 @@ export class WorkflowDetail extends LiteElement {
private renderSettings() {
return html`<section class="border rounded-lg py-3 px-5">
<btrix-config-details
.authState=${this.authState!}
.crawlConfig=${this.workflow}
anchorLinks
></btrix-config-details>

View File

@ -40,6 +40,7 @@ import type {
Tags,
TagsChangeEvent,
} from "../../components/tag-input";
import type { CollectionsChangeEvent } from "./collections-add";
import type {
WorkflowParams,
Profile,
@ -47,6 +48,7 @@ import type {
Seed,
SeedConfig,
} from "./types";
import type { CollectionList } from "../../types/collection";
type NewCrawlConfigParams = WorkflowParams & {
runNow: boolean;
@ -99,6 +101,7 @@ type FormState = {
jobName: WorkflowParams["name"];
browserProfile: Profile | null;
tags: Tags;
autoAddCollections: string[];
description: WorkflowParams["description"];
autoscrollBehavior: boolean;
};
@ -171,6 +174,7 @@ const getDefaultFormState = (): FormState => ({
jobName: "",
browserProfile: null,
tags: [],
autoAddCollections: [],
description: null,
autoscrollBehavior: true,
});
@ -472,6 +476,11 @@ export class CrawlConfigEditor extends LiteElement {
if (this.initialWorkflow.tags?.length) {
formState.tags = this.initialWorkflow.tags;
}
if (this.initialWorkflow.autoAddCollections?.length) {
formState.autoAddCollections = this.initialWorkflow.autoAddCollections;
}
const secondsToMinutes = (value: any, fallback: number | null) => {
if (typeof value === "number" && value > 0) return value / 60;
return fallback;
@ -499,6 +508,7 @@ export class CrawlConfigEditor extends LiteElement {
scheduleFrequency: defaultFormState.scheduleFrequency,
runNow: defaultFormState.runNow,
tags: this.initialWorkflow.tags,
autoAddCollections: this.initialWorkflow.autoAddCollections,
jobName: this.initialWorkflow.name || defaultFormState.jobName,
description: this.initialWorkflow.description,
browserProfile: this.initialWorkflow.profileid
@ -1641,6 +1651,28 @@ https://archiveweb.page/images/${"logo.svg"}`}
msg(`Create or assign this crawl (and its outputs) to one or more tags
to help organize your archived data.`)
)}
${this.renderFormCol(
html`
<btrix-collections-add
.authState=${this.authState}
.initialCollections=${this.formState.autoAddCollections}
.orgId=${this.orgId}
.configId=${this.configId}
@collections-change=${(e: CollectionsChangeEvent) =>
this.updateFormState(
{
autoAddCollections: e.detail.collections,
},
true
)}
></btrix-collections-add>
`
)}
${this.renderHelpTextCol(
msg(`Automatically add crawls from this workflow to one or more collections
as soon as they complete.
Individual crawls can be selected from within the collection later.`)
)}
`;
}
@ -1679,7 +1711,8 @@ https://archiveweb.page/images/${"logo.svg"}`}
const profileName = this.formState.browserProfile?.name;
return html`<btrix-config-details
.crawlConfig=${{ ...crawlConfig, profileName }}
.authState=${this.authState!}
.crawlConfig=${{ ...crawlConfig, profileName, oid: this.orgId }}
>
</btrix-config-details>`;
})}
@ -2060,6 +2093,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
? this.formState.crawlTimeoutMinutes * 60
: null,
tags: this.formState.tags,
autoAddCollections: this.formState.autoAddCollections,
config: {
...(this.jobType === "seed-crawl"
? this.parseSeededConfig()

View File

@ -28,6 +28,7 @@ const defaultValue = {
crawlTimeout: null,
jobType: undefined,
scale: 1,
autoAddCollections: []
} as WorkflowParams;
/**

View File

@ -10,6 +10,8 @@ export type Collection = {
resources: string[];
};
export type CollectionList = Collection[];
export type CollectionSearchValues = {
names: string[];
}

View File

@ -44,6 +44,7 @@ export type WorkflowParams = {
tags: string[];
crawlTimeout: number | null;
description: string | null;
autoAddCollections: string[];
};
export type CrawlConfig = WorkflowParams & {
@ -73,6 +74,7 @@ export type Workflow = CrawlConfig & {
inactive: boolean;
firstSeed: string;
isCrawlRunning: boolean | null;
autoAddCollections: string[];
};
export type Profile = {
@ -120,4 +122,5 @@ export type Crawl = CrawlConfig & {
firstSeed: string;
seedCount: number;
stopping: boolean;
collections: string[];
};