feat: Add additional context around workflow job type options (#2032)
- Updates workflow job type copy and adds additional clarifying text - Changes "List of URLs" label to "Crawl URL(s)" - Refactors `NewWorkflowDialog` into tailwind element
This commit is contained in:
parent
3605d07547
commit
2ca9632057
@ -4,9 +4,13 @@
|
||||
|
||||
The first step in creating a new [crawl workflow](crawl-workflows.md) is to choose what type of crawl you want to run. Crawl types are fixed and cannot be converted or changed later.
|
||||
|
||||
### Known URLs
|
||||
|
||||
`URL List`{ .badge-blue }
|
||||
: The crawler visits every URL specified in a list, and optionally every URL linked on those pages.
|
||||
|
||||
### Automated Discovery
|
||||
|
||||
`Seeded Crawl`{ .badge-orange }
|
||||
: The crawler automatically discovers and archives pages starting from a single seed URL.
|
||||
|
||||
@ -18,7 +22,7 @@ The first step in creating a new [crawl workflow](crawl-workflows.md) is to choo
|
||||
|
||||
**These credentials WILL BE WRITTEN into the archive.** We recommend exercising caution and only archiving with dedicated archival accounts, changing your password or deleting the account when finished.
|
||||
|
||||
### List of URLs
|
||||
### Crawl URL(s)
|
||||
|
||||
`URL List`{ .badge-blue } `Seeded Crawl`{ .badge-orange }
|
||||
|
||||
@ -30,10 +34,10 @@ It is also available under the _Additional URLs_ section for Seeded Crawls where
|
||||
|
||||
`URL List`{ .badge-blue }
|
||||
|
||||
When enabled, the crawler will visit all the links it finds within each page defined in the _List of URLs_ field.
|
||||
When enabled, the crawler will visit all the links it finds within each page defined in the _Crawl URL(s)_ field.
|
||||
|
||||
??? example "Crawling tags & search queries with URL List crawls"
|
||||
This setting can be useful for crawling the content of specific tags or search queries. Specify the tag or search query URL(s) in the _List of URLs_ field, e.g: `https://example.com/search?q=tag`, and enable _Include Any Linked Page_ to crawl all the content present on that search query page.
|
||||
This setting can be useful for crawling the content of specific tags or search queries. Specify the tag or search query URL(s) in the _Crawl URL(s)_ field, e.g: `https://example.com/search?q=tag`, and enable _Include Any Linked Page_ to crawl all the content present on that search query page.
|
||||
|
||||
### Fail Crawl on Failed URL
|
||||
|
||||
@ -235,7 +239,7 @@ When enabled, a crawl will run immediately on save as if the `Run Immediately on
|
||||
|
||||
### Name
|
||||
|
||||
Allows a custom name to be set for the workflow. If no name is set, the workflow's name will be set to the _Crawl Start URL_. For URL List crawls, the workflow's name will be set to the first URL present in the _List of URLs_ field, with an added `(+x)` where `x` represents the total number of URLs in the list.
|
||||
Allows a custom name to be set for the workflow. If no name is set, the workflow's name will be set to the _Crawl Start URL_. For URL List crawls, the workflow's name will be set to the first URL present in the _Crawl URL(s)_ field, with an added `(+x)` where `x` represents the total number of URLs in the list.
|
||||
|
||||
### Description
|
||||
|
||||
|
@ -327,7 +327,7 @@ export class ConfigDetails extends LiteElement {
|
||||
|
||||
return html`
|
||||
${this.renderSetting(
|
||||
msg("List of URLs"),
|
||||
msg("Crawl URL(s)"),
|
||||
html`
|
||||
<ul>
|
||||
${this.seeds?.map(
|
||||
|
@ -1,7 +1,8 @@
|
||||
import { localized, msg } from "@lit/localize";
|
||||
import { css, html, LitElement } from "lit";
|
||||
import { html } from "lit";
|
||||
import { customElement, property } from "lit/decorators.js";
|
||||
|
||||
import { TailwindElement } from "@/classes/TailwindElement";
|
||||
import seededCrawlSvg from "~assets/images/new-crawl-config_Seeded-Crawl.svg";
|
||||
import urlListSvg from "~assets/images/new-crawl-config_URL-List.svg";
|
||||
|
||||
@ -12,78 +13,22 @@ export type SelectJobTypeEvent = CustomEvent<"url-list" | "seed-crawl">;
|
||||
*/
|
||||
@localized()
|
||||
@customElement("btrix-new-workflow-dialog")
|
||||
export class NewWorkflowDialog extends LitElement {
|
||||
// postcss-lit-disable-next-line
|
||||
static styles = css`
|
||||
.title,
|
||||
.container {
|
||||
margin: var(--sl-spacing-large) 0;
|
||||
}
|
||||
|
||||
.container {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: var(--sl-spacing-4x-large);
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.heading {
|
||||
font-size: var(--sl-font-size-large);
|
||||
font-weight: var(--sl-font-weight-semibold);
|
||||
margin-top: 0;
|
||||
margin-bottom: var(--sl-spacing-small);
|
||||
line-height: 1;
|
||||
}
|
||||
|
||||
.description {
|
||||
color: var(--sl-color-neutral-500);
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.jobTypeButton {
|
||||
padding: 0.25rem;
|
||||
display: block;
|
||||
width: 16.5rem;
|
||||
cursor: pointer;
|
||||
background: none;
|
||||
text-align: left;
|
||||
border: none;
|
||||
border-radius: 0.75rem;
|
||||
}
|
||||
|
||||
figure {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.jobTypeButton:hover .jobTypeImg {
|
||||
transform: scale(1.05);
|
||||
}
|
||||
|
||||
.jobTypeImg {
|
||||
width: 100%;
|
||||
max-height: 9rem;
|
||||
transition-property: transform;
|
||||
transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
|
||||
transition-duration: 150ms;
|
||||
margin-bottom: var(--sl-spacing-small);
|
||||
}
|
||||
`;
|
||||
|
||||
export class NewWorkflowDialog extends TailwindElement {
|
||||
@property({ type: Boolean })
|
||||
open = false;
|
||||
|
||||
render() {
|
||||
return html`
|
||||
<btrix-dialog
|
||||
.label=${msg("Choose New Workflow Type")}
|
||||
.label=${msg("What would you like to crawl?")}
|
||||
.open=${this.open}
|
||||
style="--width: 46rem"
|
||||
>
|
||||
<div class="container">
|
||||
<div
|
||||
class="mb-7 mt-5 flex flex-col items-center justify-center gap-7 md:flex-row md:items-start md:gap-16"
|
||||
>
|
||||
<button
|
||||
tabindex="2"
|
||||
class="jobTypeButton"
|
||||
class="group block w-[16.5rem] text-left"
|
||||
@click=${() => {
|
||||
this.dispatchEvent(
|
||||
new CustomEvent("select-job-type", {
|
||||
@ -93,20 +38,26 @@ export class NewWorkflowDialog extends LitElement {
|
||||
}}
|
||||
>
|
||||
<figure>
|
||||
<img class="jobTypeImg" src=${urlListSvg} />
|
||||
<figcaption>
|
||||
<div class="heading">${msg("URL List")}</div>
|
||||
<p class="description">
|
||||
<img
|
||||
class="block transition-transform group-hover:scale-105"
|
||||
src=${urlListSvg}
|
||||
/>
|
||||
<figcaption class="p-1">
|
||||
<div
|
||||
class="my-2 text-lg font-semibold leading-none transition-colors group-hover:text-primary-700"
|
||||
>
|
||||
${msg("Known URLs")}
|
||||
</div>
|
||||
<p class="text-balance leading-normal text-neutral-700">
|
||||
${msg(
|
||||
"The crawler visits every URL specified in a list, and optionally every URL linked on those pages.",
|
||||
"Choose this option to crawl a single page, or if you already know the URL of every page you'd like to crawl.",
|
||||
)}
|
||||
</p>
|
||||
</figcaption>
|
||||
</figure>
|
||||
</button>
|
||||
<button
|
||||
tabindex="1"
|
||||
class="jobTypeButton"
|
||||
class="group block w-[16.5rem] text-left"
|
||||
@click=${() => {
|
||||
this.dispatchEvent(
|
||||
new CustomEvent("select-job-type", {
|
||||
@ -116,19 +67,106 @@ export class NewWorkflowDialog extends LitElement {
|
||||
}}
|
||||
>
|
||||
<figure>
|
||||
<img class="jobTypeImg" src=${seededCrawlSvg} />
|
||||
<figcaption>
|
||||
<div class="heading">${msg("Seeded Crawl")}</div>
|
||||
<p class="description">
|
||||
<img
|
||||
class="block transition-transform group-hover:scale-105"
|
||||
src=${seededCrawlSvg}
|
||||
/>
|
||||
<figcaption class="p-1">
|
||||
<div
|
||||
class="my-2 text-lg font-semibold leading-none transition-colors group-hover:text-primary-700"
|
||||
>
|
||||
${msg("Automated Discovery")}
|
||||
</div>
|
||||
<p class="text-balance leading-normal text-neutral-700">
|
||||
${msg(
|
||||
"The crawler automatically discovers and archives pages starting from a single seed URL.",
|
||||
"Let the crawler automatically discover pages based on a domain or start page that you specify.",
|
||||
)}
|
||||
</p>
|
||||
</figcaption>
|
||||
</figure>
|
||||
</div>
|
||||
</button>
|
||||
</button>
|
||||
</div>
|
||||
<sl-details
|
||||
summary=${msg("Need help deciding?")}
|
||||
@sl-hide=${this.stopProp}
|
||||
@sl-after-hide=${this.stopProp}
|
||||
>
|
||||
<p class="mb-3">
|
||||
${msg(
|
||||
html`Choose <strong>Known URLs</strong> (aka a "URL List" crawl
|
||||
type) if:`,
|
||||
)}
|
||||
</p>
|
||||
<ul class="mb-3 list-disc pl-5">
|
||||
<li>${msg("You want to archive a single page on a website")}</li>
|
||||
<li>
|
||||
${msg("You're archiving just a few specific pages on a website")}
|
||||
</li>
|
||||
<li>
|
||||
${msg("You have a list of URLs that you can copy-and-paste")}
|
||||
</li>
|
||||
</ul>
|
||||
<p class="mb-3">
|
||||
${msg(
|
||||
html`A URL list is simpler to configure, since you don't need to
|
||||
worry about configuring the workflow to exclude parts of the
|
||||
website that you may not want to archive.`,
|
||||
)}
|
||||
</p>
|
||||
<p class="mb-3">
|
||||
${msg(
|
||||
html`Choose <strong>Automated Discovery</strong> (aka a "Seeded
|
||||
Crawl" crawl type) if:`,
|
||||
)}
|
||||
</p>
|
||||
<ul class="mb-3 list-disc pl-5">
|
||||
<li>${msg("You want to archive an entire website")}</li>
|
||||
<li>
|
||||
${msg(
|
||||
html`You're archiving a subset of a website, like everything
|
||||
under <em>website.com/your-username</em>`,
|
||||
)}
|
||||
</li>
|
||||
<li>
|
||||
${msg(
|
||||
html`You're archiving a website <em>and</em> external pages
|
||||
linked to from the website`,
|
||||
)}
|
||||
</li>
|
||||
</ul>
|
||||
<p class="mb-3">
|
||||
${msg(
|
||||
html`Seeded crawls are great for advanced use cases where you
|
||||
don't need to know every single URL that you want to archive. You
|
||||
can configure reasonable crawl limits and page limits so that you
|
||||
don't crawl more than you need to.`,
|
||||
)}
|
||||
</p>
|
||||
<p>
|
||||
${msg(
|
||||
html`Once you choose a crawl type, you can't go back and change
|
||||
it. Check out the
|
||||
<a
|
||||
class="text-blue-500 hover:text-blue-600"
|
||||
href="https://docs.browsertrix.com/user-guide/workflow-setup/"
|
||||
target="_blank"
|
||||
>crawl workflow setup guide</a
|
||||
>
|
||||
if you still need help deciding on a crawl type, and try our
|
||||
<a
|
||||
class="text-blue-500 hover:text-blue-600"
|
||||
href="https://forum.webrecorder.net/c/help/5"
|
||||
target="_blank"
|
||||
>community help forum</a
|
||||
>.`,
|
||||
)}
|
||||
</p>
|
||||
</sl-details>
|
||||
</btrix-dialog>
|
||||
`;
|
||||
}
|
||||
|
||||
private stopProp(e: Event) {
|
||||
e.stopPropagation();
|
||||
}
|
||||
}
|
||||
|
@ -951,7 +951,7 @@ export class CrawlConfigEditor extends LiteElement {
|
||||
<sl-textarea
|
||||
name="urlList"
|
||||
class="textarea-wrap"
|
||||
label=${msg("List of URLs")}
|
||||
label=${msg("Crawl URL(s)")}
|
||||
rows="10"
|
||||
autocomplete="off"
|
||||
inputmode="url"
|
||||
@ -1364,7 +1364,7 @@ https://example.net`}
|
||||
${this.renderFormCol(html`
|
||||
<sl-textarea
|
||||
name="urlList"
|
||||
label=${msg("List of URLs")}
|
||||
label=${msg("Crawl URL(s)")}
|
||||
rows="3"
|
||||
autocomplete="off"
|
||||
inputmode="url"
|
||||
|
Loading…
Reference in New Issue
Block a user