Limit URL list entry to maximum URLs (#1242)

- Limits URL list entry to 1,000 URLs
- Limits additional URL list entry to 100 URLs
- Shows first invalid URL in list in error message
- Quick and dirty fix for long URLs wrapping: Show URLs in list on one line, with entire container scrolling
---------

Co-authored-by: Henry Wilkinson <henry@wilkinson.graphics>
This commit is contained in:
sua yoo 2023-10-03 21:02:32 -07:00 committed by GitHub
parent 99ccdf2de8
commit 38efeccc25
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 102 additions and 44 deletions

View File

@ -290,12 +290,13 @@ export class ConfigDetails extends LiteElement {
private renderConfirmUrlListSettings = () => { private renderConfirmUrlListSettings = () => {
const crawlConfig = this.crawlConfig; const crawlConfig = this.crawlConfig;
return html` return html`
${this.renderSetting( ${this.renderSetting(
msg("List of URLs"), msg("List of URLs"),
html` html`
<ul> <ul class="whitespace-nowrap overflow-x-auto overflow-y-hidden">
${this.seeds?.map((seed: Seed) => html` <li>${seed.url}</li> `)} ${this.seeds?.map((seed: Seed) => html`<li>${seed.url}</li>`)}
</ul> </ul>
`, `,
true true
@ -368,7 +369,7 @@ export class ConfigDetails extends LiteElement {
msg("List of Additional URLs"), msg("List of Additional URLs"),
additionalUrlList?.length additionalUrlList?.length
? html` ? html`
<ul> <ul class="whitespace-nowrap overflow-x-auto overflow-y-hidden">
${additionalUrlList.map( ${additionalUrlList.map(
(seed) => (seed) =>
html`<li>${typeof seed === "string" ? seed : seed.url}</li>` html`<li>${typeof seed === "string" ? seed : seed.url}</li>`

View File

@ -201,7 +201,7 @@ function getLocalizedWeekDays() {
} }
function validURL(url: string) { function validURL(url: string) {
return /((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/.test( return /((((https?):(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/.test(
url url
); );
} }
@ -220,6 +220,7 @@ const DEFAULT_BEHAVIORS = [
"siteSpecific", "siteSpecific",
]; ];
const BYTES_PER_GB = 1e9; const BYTES_PER_GB = 1e9;
const URL_LIST_MAX_URLS = 1000;
@localized() @localized()
export class CrawlConfigEditor extends LiteElement { export class CrawlConfigEditor extends LiteElement {
@ -910,6 +911,7 @@ export class CrawlConfigEditor extends LiteElement {
${this.renderFormCol(html` ${this.renderFormCol(html`
<sl-textarea <sl-textarea
name="urlList" name="urlList"
class="textarea-wrap"
label=${msg("List of URLs")} label=${msg("List of URLs")}
rows="10" rows="10"
autocomplete="off" autocomplete="off"
@ -918,34 +920,42 @@ export class CrawlConfigEditor extends LiteElement {
placeholder=${`https://example.com placeholder=${`https://example.com
https://example.com/path`} https://example.com/path`}
required required
@sl-input=${async (e: Event) => { @keyup=${async (e: KeyboardEvent) => {
const inputEl = e.target as SlInput; if (e.key === "Enter") {
await inputEl.updateComplete; const inputEl = e.target as SlInput;
if ( await inputEl.updateComplete;
!inputEl.checkValidity() && if (!inputEl.value) return;
!urlListToArray(inputEl.value).some((url) => !validURL(url)) const { isValid, helpText } = this.validateUrlList(inputEl.value);
) { inputEl.helpText = helpText;
inputEl.setCustomValidity(""); if (isValid) {
inputEl.helpText = ""; inputEl.setCustomValidity("");
} else {
inputEl.setCustomValidity(helpText);
}
} }
}} }}
@sl-blur=${async (e: Event) => { @sl-input=${(e: CustomEvent) => {
const inputEl = e.target as SlInput; const inputEl = e.target as SlInput;
await inputEl.updateComplete; if (!inputEl.value) {
if ( inputEl.helpText = msg("At least 1 URL is required.");
inputEl.value && }
urlListToArray(inputEl.value).some((url) => !validURL(url)) }}
) { @sl-change=${async (e: CustomEvent) => {
const text = msg("Please fix invalid URL in list."); const inputEl = e.target as SlInput;
inputEl.helpText = text; if (!inputEl.value) return;
inputEl.setCustomValidity(text); const { isValid, helpText } = this.validateUrlList(inputEl.value);
inputEl.helpText = helpText;
if (isValid) {
inputEl.setCustomValidity("");
} else {
inputEl.setCustomValidity(helpText);
} }
}} }}
></sl-textarea> ></sl-textarea>
`)} `)}
${this.renderHelpTextCol( ${this.renderHelpTextCol(
msg(`The crawler will visit and record each URL listed in the order msg(str`The crawler will visit and record each URL listed in the order
defined here.`) defined here. You can enter a maximum of ${URL_LIST_MAX_URLS.toLocaleString()} URLs, separated by a new line.`)
)} )}
${when( ${when(
isCustom, isCustom,
@ -1117,6 +1127,7 @@ https://example.com/path`}
} }
const exclusions = trimArray(this.formState.exclusions || []); const exclusions = trimArray(this.formState.exclusions || []);
const additionalUrlList = urlListToArray(this.formState.urlList); const additionalUrlList = urlListToArray(this.formState.urlList);
const maxAdditionalURls = 100;
return html` return html`
${this.renderFormCol(html` ${this.renderFormCol(html`
@ -1311,34 +1322,48 @@ https://example.net`}
value=${this.formState.urlList} value=${this.formState.urlList}
placeholder=${`https://webrecorder.net/blog placeholder=${`https://webrecorder.net/blog
https://archiveweb.page/images/${"logo.svg"}`} https://archiveweb.page/images/${"logo.svg"}`}
@sl-input=${async (e: Event) => { @keyup=${async (e: KeyboardEvent) => {
const inputEl = e.target as SlInput; if (e.key === "Enter") {
await inputEl.updateComplete; const inputEl = e.target as SlInput;
if ( await inputEl.updateComplete;
!inputEl.checkValidity() && if (!inputEl.value) return;
!urlListToArray(inputEl.value).some((url) => !validURL(url)) const { isValid, helpText } = this.validateUrlList(
) { inputEl.value,
inputEl.setCustomValidity(""); maxAdditionalURls
inputEl.helpText = ""; );
inputEl.helpText = helpText;
if (isValid) {
inputEl.setCustomValidity("");
} else {
inputEl.setCustomValidity(helpText);
}
} }
}} }}
@sl-blur=${async (e: Event) => { @sl-input=${(e: CustomEvent) => {
const inputEl = e.target as SlInput; const inputEl = e.target as SlInput;
await inputEl.updateComplete; if (!inputEl.value) {
if ( inputEl.helpText = msg("At least 1 URL is required.");
inputEl.value && }
urlListToArray(inputEl.value).some((url) => !validURL(url)) }}
) { @sl-change=${async (e: CustomEvent) => {
const text = msg("Please fix invalid URL in list."); const inputEl = e.target as SlInput;
inputEl.helpText = text; if (!inputEl.value) return;
inputEl.setCustomValidity(text); const { isValid, helpText } = this.validateUrlList(
inputEl.value,
maxAdditionalURls
);
inputEl.helpText = helpText;
if (isValid) {
inputEl.setCustomValidity("");
} else {
inputEl.setCustomValidity(helpText);
} }
}} }}
></sl-textarea> ></sl-textarea>
`)} `)}
${this.renderHelpTextCol( ${this.renderHelpTextCol(
msg(`The crawler will visit and record each URL listed here. Other msg(str`The crawler will visit and record each URL listed here. Other
links on these pages will not be crawled.`) links on these pages will not be crawled. You can enter up to ${maxAdditionalURls.toLocaleString()} URLs.`)
)} )}
</div> </div>
</btrix-details> </btrix-details>
@ -2232,6 +2257,33 @@ https://archiveweb.page/images/${"logo.svg"}`}
`; `;
} }
private validateUrlList(
value: string,
max = URL_LIST_MAX_URLS
): { isValid: boolean; helpText: string } {
const urlList = urlListToArray(value);
let isValid = true;
let helpText =
urlList.length === 1
? msg(str`${urlList.length.toLocaleString()} URL entered`)
: msg(str`${urlList.length.toLocaleString()} URLs entered`);
if (urlList.length > max) {
isValid = false;
helpText = msg(
str`Please shorten list to ${max.toLocaleString()} or fewer URLs.`
);
} else {
const invalidUrl = urlList.find((url) => !validURL(url));
if (invalidUrl) {
isValid = false;
helpText = msg(
str`Please remove or fix the following invalid URL: ${invalidUrl}`
);
}
}
return { isValid, helpText };
}
private onTagInput = (e: TagInputEvent) => { private onTagInput = (e: TagInputEvent) => {
const { value } = e.detail; const { value } = e.detail;
if (!value) return; if (!value) return;

View File

@ -232,6 +232,11 @@ const theme = css`
--help-text-align: right; --help-text-align: right;
} }
/* Wrap internal textarea input, e.g. for URL lists */
.textarea-wrap::part(textarea) {
white-space: pre;
}
/* Aesthetically closer to monospaced font: */ /* Aesthetically closer to monospaced font: */
.font-monostyle { .font-monostyle {
font-family: var(--font-monostyle-family); font-family: var(--font-monostyle-family);