diff --git a/frontend/docs/docs/user-guide/workflow-setup.md b/frontend/docs/docs/user-guide/workflow-setup.md index 0596ac79..cc75711a 100644 --- a/frontend/docs/docs/user-guide/workflow-setup.md +++ b/frontend/docs/docs/user-guide/workflow-setup.md @@ -60,7 +60,7 @@ _Site Crawl_ : This scope will crawl all pages on the domain and any subdomains found. If `example.com` is set as the _Crawl Start URL_, both pages on `example.com` and `subdomain.example.com` will be crawled. `Custom Page Prefix` -: This scope will crawl all pages that begin with the _Crawl Start URL_ as well as pages from any URL that begin with the URLs listed in `Extra URL Prefixes in Scope` +: This scope will crawl the _Crawl Start URL_ and then include only those pages that begin with the URLs listed in _URL Prefixes in Scope_. By default, _URL Prefixes in Scope_ will be prefilled with the prefix of the _Crawl Start URL_ to the last `/`. For example, if `https://example.com/path/page` is set as the _Crawl Start URL_, `https://example.com/path/` will be automatically added to `URL Prefixes in Scope`. This prefix can then be removed or modified as needed. ### Page URL(s) diff --git a/frontend/src/features/crawl-workflows/workflow-editor.ts b/frontend/src/features/crawl-workflows/workflow-editor.ts index 8305a1ed..9cc5b520 100644 --- a/frontend/src/features/crawl-workflows/workflow-editor.ts +++ b/frontend/src/features/crawl-workflows/workflow-editor.ts @@ -1034,12 +1034,12 @@ https://archiveweb.page/guide`} break; case ScopeType.Custom: helpText = msg( - html`Will crawl all page URLs that begin with + html`Will start with ${exampleDomain}${examplePathname} - or any URL that begins with those specified in - Extra URL Prefixes in Scope`, + and include only URLs that start with the + URL Prefixes in Scope listed below.`, ); break; default: @@ -1092,6 +1092,30 @@ https://archiveweb.page/guide`} true, ); } + if ( + this.formState.primarySeedUrl && + this.formState.scopeType === ScopeType.Custom && + !this.formState.customIncludeUrlList + ) { + let prefixUrl = this.formState.primarySeedUrl; + try { + const startingUrl = new URL(this.formState.primarySeedUrl); + prefixUrl = + startingUrl.origin + + startingUrl.pathname.slice( + 0, + startingUrl.pathname.lastIndexOf("/") + 1, + ); + } catch (e) { + // ignore + } + this.updateFormState( + { + customIncludeUrlList: prefixUrl, + }, + true, + ); + } }} >