Allow users to set crawl config language (#377)

This commit is contained in:
sua yoo 2022-11-22 15:15:35 -08:00 committed by GitHub
parent 52e6b6199a
commit 003b3c7a78
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 229 additions and 87 deletions

View File

@ -13,6 +13,7 @@
"axios": "^0.22.0",
"color": "^4.0.1",
"fuse.js": "^6.5.3",
"iso-639-1": "^2.1.15",
"lit": "^2.4.1",
"lodash": "^4.17.21",
"path-parser": "^6.1.0",

View File

@ -72,6 +72,9 @@ import("./badge").then(({ Badge }) => {
import("./icon-button").then(({ IconButton }) => {
customElements.define("btrix-icon-button", IconButton);
});
import("./language-select").then(({ LanguageSelect }) => {
customElements.define("btrix-language-select", LanguageSelect);
});
customElements.define("btrix-alert", Alert);
customElements.define("btrix-input", Input);

View File

@ -0,0 +1,56 @@
import { LitElement, html, css } from "lit";
import { state, property } from "lit/decorators.js";
import { localized, msg } from "@lit/localize";
import sortBy from "lodash/fp/sortBy";
import ISO6391 from "iso-639-1";
import type { LanguageCode } from "iso-639-1";
const languages = sortBy("name")(
ISO6391.getLanguages(ISO6391.getAllCodes())
) as unknown as Array<{
code: LanguageCode;
name: string;
nativeName: string;
}>;
/**
* Choose language from dropdown
*
* Usage:
* ```ts
* <btrix-language-select @sl-select=${console.debug}>
* <span slot="label">Label</span>
* </btrix-language-select>
* ```
*/
@localized()
export class LanguageSelect extends LitElement {
static styles = css`
sl-select::part(control) {
box-shadow: var(--sl-shadow-small);
}
sl-menu-item:not(:hover) .secondaryText {
color: var(--sl-color-neutral-400);
}
`;
@property({ type: Boolean })
hoist = false;
render() {
return html`
<sl-select clearable placeholder=${msg("Default")} ?hoist=${this.hoist}>
<div slot="label"><slot name="label"></slot></div>
${languages.map(
({ code, name, nativeName }) => html`
<sl-menu-item value=${code}>
${name} <span class="secondaryText">(${nativeName})</span>
<code slot="suffix" class="secondaryText">${code}</code>
</sl-menu-item>
`
)}
</sl-select>
`;
}
}

View File

@ -4,6 +4,7 @@ import { ifDefined } from "lit/directives/if-defined.js";
import { msg, localized, str } from "@lit/localize";
import { parse as yamlToJson, stringify as jsonToYaml } from "yaml";
import merge from "lodash/fp/merge";
import ISO6391 from "iso-639-1";
import type { AuthState } from "../../utils/AuthService";
import LiteElement, { html } from "../../utils/LiteElement";
@ -49,6 +50,8 @@ export class CrawlTemplatesDetail extends LiteElement {
@state()
private exclusions: CrawlConfig["exclude"] = [];
private browserLanguage: CrawlConfig["lang"] = null;
@state()
private isSubmittingUpdate: boolean = false;
@ -537,93 +540,46 @@ export class CrawlTemplatesDetail extends LiteElement {
}
private renderConfiguration() {
const seeds = this.crawlTemplate?.config.seeds || [];
const configCodeYaml = jsonToYaml(this.crawlTemplate?.config || {});
return html`
<div class="mb-5">
<div class="text-sm text-0-600">${msg("Browser Profile")}</div>
${this.crawlTemplate
? html`
${this.crawlTemplate.profileid
? html`<a
class="font-medium text-neutral-700 hover:text-neutral-900"
href=${`/archives/${this.archiveId}/browser-profiles/profile/${this.crawlTemplate.profileid}`}
@click=${this.navLink}
>
<sl-icon
class="inline-block align-middle"
name="link-45deg"
></sl-icon>
<span class="inline-block align-middle"
>${this.crawlTemplate.profileName}</span
${this.renderSeedsTable()}
<div class="grid grid-cols-1 md:grid-cols-2 mb-5">
<div class="col-span-1">
<div class="text-sm text-neutral-600">${msg("Browser Profile")}</div>
${this.crawlTemplate
? html`
${this.crawlTemplate.profileid
? html`<a
class="font-medium text-neutral-700 hover:text-neutral-900"
href=${`/archives/${this.archiveId}/browser-profiles/profile/${this.crawlTemplate.profileid}`}
@click=${this.navLink}
>
</a>`
: html`<span class="text-0-400">${msg("None")}</span>`}
`
: ""}
</div>
<div class="mb-5" role="table">
<div
class="hidden md:grid grid-cols-5 gap-4 items-end text-xs md:text-sm text-0-600"
role="row"
>
<span class="col-span-3" role="columnheader">${msg("Seed URL")}</span>
<span class="col-span-1" role="columnheader"
>${msg("Scope Type")}</span
>
<span class="col-span-1" role="columnheader"
>${msg("Page Limit")}</span
>
<sl-icon
class="inline-block align-middle"
name="link-45deg"
></sl-icon>
<span class="inline-block align-middle"
>${this.crawlTemplate.profileName}</span
>
</a>`
: html`<span class="text-neutral-400">${msg("None")}</span>`}
`
: ""}
</div>
<div class="col-span-1">
<div class="text-sm text-neutral-600">${msg("Language")}</div>
${this.crawlTemplate
? html`
${this.crawlTemplate.config.lang
? html`${ISO6391.getName(this.crawlTemplate.config.lang)}`
: html`<span class="text-neutral-400"
>${msg("Default")}</span
>`}
`
: ""}
</div>
<ul role="rowgroup">
${seeds
.slice(0, this.showAllSeedURLs ? undefined : SEED_URLS_MAX)
.map(
(seed, i) =>
html`<li
class="grid grid-cols-5 gap-4 items-baseline py-1 border-zinc-100${i
? " border-t"
: ""}"
role="row"
title=${typeof seed === "string" ? seed : seed.url}
>
<div class="col-span-3 break-all leading-tight" role="cell">
${typeof seed === "string" ? seed : seed.url}
</div>
<span
class="col-span-1 uppercase text-0-500 text-xs"
role="cell"
>${(typeof seed !== "string" && seed.scopeType) ||
this.crawlTemplate?.config.scopeType}</span
>
<span
class="col-span-1 uppercase text-0-500 text-xs font-mono"
role="cell"
>${(typeof seed !== "string" && seed.limit) ||
this.crawlTemplate?.config.limit}</span
>
</li>`
)}
</ul>
${seeds.length > SEED_URLS_MAX
? html`<sl-button
class="mt-2"
variant="neutral"
size="small"
@click=${() => (this.showAllSeedURLs = !this.showAllSeedURLs)}
>
<span class="text-sm">
${this.showAllSeedURLs
? msg("Show less")
: msg(str`Show
${seeds.length - SEED_URLS_MAX}
more`)}
</span>
</sl-button>`
: ""}
</div>
<div class="mb-5">
@ -660,6 +616,74 @@ export class CrawlTemplatesDetail extends LiteElement {
`;
}
private renderSeedsTable() {
const seeds = this.crawlTemplate?.config.seeds || [];
return html`
<div class="mb-5" role="table">
<div
class="hidden md:grid grid-cols-4 items-end text-xs md:text-sm text-0-600"
role="row"
>
<span class="col-span-2" role="columnheader">${msg("Seed URL")}</span>
<span class="col-span-1" role="columnheader"
>${msg("Scope Type")}</span
>
<span class="col-span-1" role="columnheader"
>${msg("Page Limit")}</span
>
</div>
<ul role="rowgroup">
${seeds
.slice(0, this.showAllSeedURLs ? undefined : SEED_URLS_MAX)
.map(
(seed, i) =>
html`<li
class="grid grid-cols-4 items-baseline py-1 border-zinc-100${i
? " border-t"
: ""}"
role="row"
title=${typeof seed === "string" ? seed : seed.url}
>
<div class="col-span-2 break-all leading-tight" role="cell">
${typeof seed === "string" ? seed : seed.url}
</div>
<span
class="col-span-1 uppercase text-0-500 text-xs"
role="cell"
>${(typeof seed !== "string" && seed.scopeType) ||
this.crawlTemplate?.config.scopeType}</span
>
<span
class="col-span-1 uppercase text-0-500 text-xs font-mono"
role="cell"
>${(typeof seed !== "string" && seed.limit) ||
this.crawlTemplate?.config.limit}</span
>
</li>`
)}
</ul>
${seeds.length > SEED_URLS_MAX
? html`<sl-button
class="mt-2"
variant="neutral"
size="small"
@click=${() => (this.showAllSeedURLs = !this.showAllSeedURLs)}
>
<span class="text-sm">
${this.showAllSeedURLs
? msg("Show less")
: msg(str`Show
${seeds.length - SEED_URLS_MAX}
more`)}
</span>
</sl-button>`
: ""}
</div>
`;
}
private renderEditConfiguration() {
if (!this.crawlTemplate) return;
@ -687,6 +711,31 @@ export class CrawlTemplatesDetail extends LiteElement {
.authState=${this.authState}
></btrix-select-browser-profile>
</div>
<div>
<btrix-language-select
@sl-select=${(e: CustomEvent) =>
(this.browserLanguage = e.detail.item.value)}
@sl-clear=${() => (this.browserLanguage = null)}
@sl-hide=${this.stopProp}
@sl-after-hide=${this.stopProp}
hoist
>
<div slot="label">
<span class="inline-block align-middle">
${msg("Language")}
</span>
<sl-tooltip
content=${msg(
"The browser language setting used when crawling."
)}
><sl-icon
class="inline-block align-middle ml-1 text-neutral-500"
name="info-circle"
></sl-icon
></sl-tooltip>
</div>
</btrix-language-select>
</div>
<div class="flex flex-wrap justify-between">
<h4 class="font-medium">
${this.isConfigCodeView
@ -1155,6 +1204,7 @@ export class CrawlTemplatesDetail extends LiteElement {
limit: pageLimit ? +pageLimit : 0,
extraHops: formData.get("extraHopsOne") ? 1 : 0,
exclude: this.exclusions,
lang: this.browserLanguage,
};
}

View File

@ -22,6 +22,7 @@ type NewCrawlTemplate = {
scale: number;
config: CrawlConfig;
profileid: string | null;
lang: string | null;
};
export type InitialCrawlTemplate = Pick<
@ -97,6 +98,8 @@ export class CrawlTemplatesNew extends LiteElement {
@state()
private exclusions: CrawlConfig["exclude"] = defaultValue.config.exclude;
private browserLanguage: CrawlConfig["lang"] = null;
@state()
private isSubmitting: boolean = false;
@ -340,7 +343,7 @@ export class CrawlTemplatesNew extends LiteElement {
<sl-input
name="crawlTimeoutMinutes"
label=${msg("Time Limit")}
placeholder=${msg("unlimited")}
placeholder=${msg("Unlimited")}
type="number"
>
<span slot="suffix">${msg("minutes")}</span>
@ -359,7 +362,7 @@ export class CrawlTemplatesNew extends LiteElement {
>
<div class="col-span-1">
<sl-select name="scale" value="1">
<label slot="label">
<div slot="label">
<span class="inline-block align-middle">
${msg("Crawler Instances")}
</span>
@ -372,12 +375,34 @@ export class CrawlTemplatesNew extends LiteElement {
name="info-circle"
></sl-icon
></sl-tooltip>
</label>
</div>
<sl-menu-item value="1">${msg("1")}</sl-menu-item>
<sl-menu-item value="2">${msg("2")}</sl-menu-item>
<sl-menu-item value="3">${msg("3")}</sl-menu-item>
</sl-select>
</div>
<div class="col-span-1">
<btrix-language-select
@sl-select=${(e: CustomEvent) =>
(this.browserLanguage = e.detail.item.value)}
@sl-clear=${() => (this.browserLanguage = null)}
>
<div slot="label">
<span class="inline-block align-middle">
${msg("Language")}
</span>
<sl-tooltip
content=${msg(
"The browser language setting used when crawling."
)}
><sl-icon
class="inline-block align-middle ml-1 text-neutral-500"
name="info-circle"
></sl-icon
></sl-tooltip>
</div>
</btrix-language-select>
</div>
<div class="col-span-1 flex justify-between">
<h4 class="font-medium">
${this.isConfigCodeView
@ -443,7 +468,7 @@ export class CrawlTemplatesNew extends LiteElement {
label=${msg("Page Limit")}
type="number"
value=${ifDefined(this.initialCrawlTemplate.config.limit)}
placeholder=${msg("unlimited")}
placeholder=${msg("Unlimited")}
>
<span slot="suffix">${msg("pages")}</span>
</sl-input>
@ -513,6 +538,7 @@ export class CrawlTemplatesNew extends LiteElement {
crawlTimeout: crawlTimeoutMinutes ? +crawlTimeoutMinutes * 60 : 0,
scale: +scale,
profileid: this.browserProfileId,
lang: this.browserLanguage || null,
};
if (this.isConfigCodeView) {

View File

@ -37,6 +37,7 @@ type SeedConfig = {
export type CrawlConfig = {
seeds: (string | ({ url: string } & SeedConfig))[];
exclude?: string[];
lang?: string | null;
} & SeedConfig;
export type CrawlTemplate = {

View File

@ -85,7 +85,7 @@ const theme = css`
/* Elevate select and buttons */
sl-select::part(control),
sl-button::part(base) {
sl-button:not([variant="text"])::part(base) {
box-shadow: var(--sl-shadow-small);
}

View File

@ -3343,6 +3343,11 @@ isexe@^2.0.0:
resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10"
integrity sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=
iso-639-1@^2.1.15:
version "2.1.15"
resolved "https://registry.yarnpkg.com/iso-639-1/-/iso-639-1-2.1.15.tgz#20cf78a4f691aeb802c16f17a6bad7d99271e85d"
integrity sha512-7c7mBznZu2ktfvyT582E2msM+Udc1EjOyhVRE/0ZsjD9LBtWSm23h3PtiRh2a35XoUsTQQjJXaJzuLjXsOdFDg==
isobject@^3.0.1:
version "3.0.1"
resolved "https://registry.yarnpkg.com/isobject/-/isobject-3.0.1.tgz#4e431e92b11a9731636aa1f9c8d1ccbcfdab78df"