feat: Add additional context around workflow job type options (#2032)

- Updates workflow job type copy and adds additional clarifying text - Changes "List of URLs" label to "Crawl URL(s)" - Refactors `NewWorkflowDialog` into tailwind element
2024-08-21 14:03:43 -07:00 · 2024-08-21 14:03:43 -07:00 · 2ca9632057
commit 2ca9632057
parent 3605d07547
4 changed files with 126 additions and 84 deletions
--- a/docs/user-guide/workflow-setup.md
+++ b/docs/user-guide/workflow-setup.md
@ -4,9 +4,13 @@

 The first step in creating a new [crawl workflow](crawl-workflows.md) is to choose what type of crawl you want to run. Crawl types are fixed and cannot be converted or changed later.

+### Known URLs
+
 `URL List`{ .badge-blue }
 :   The crawler visits every URL specified in a list, and optionally every URL linked on those pages.

+### Automated Discovery
+
 `Seeded Crawl`{ .badge-orange }
 :   The crawler automatically discovers and archives pages starting from a single seed URL.

@ -18,7 +22,7 @@ The first step in creating a new [crawl workflow](crawl-workflows.md) is to choo
    
    **These credentials WILL BE WRITTEN into the archive.** We recommend exercising caution and only archiving with dedicated archival accounts, changing your password or deleting the account when finished.

-### List of URLs
+### Crawl URL(s)

 `URL List`{ .badge-blue } `Seeded Crawl`{ .badge-orange }

@ -30,10 +34,10 @@ It is also available under the _Additional URLs_ section for Seeded Crawls where

 `URL List`{ .badge-blue }

-When enabled, the crawler will visit all the links it finds within each page defined in the _List of URLs_ field.
+When enabled, the crawler will visit all the links it finds within each page defined in the _Crawl URL(s)_ field.

 ??? example "Crawling tags & search queries with URL List crawls"
-    This setting can be useful for crawling the content of specific tags or search queries. Specify the tag or search query URL(s) in the _List of URLs_ field, e.g: `https://example.com/search?q=tag`, and enable _Include Any Linked Page_ to crawl all the content present on that search query page.
+    This setting can be useful for crawling the content of specific tags or search queries. Specify the tag or search query URL(s) in the _Crawl URL(s)_ field, e.g: `https://example.com/search?q=tag`, and enable _Include Any Linked Page_ to crawl all the content present on that search query page.

 ### Fail Crawl on Failed URL

@ -235,7 +239,7 @@ When enabled, a crawl will run immediately on save as if the `Run Immediately on

 ### Name

-Allows a custom name to be set for the workflow. If no name is set, the workflow's name will be set to the _Crawl Start URL_. For URL List crawls, the workflow's name will be set to the first URL present in the _List of URLs_ field, with an added `(+x)` where `x` represents the total number of URLs in the list.
+Allows a custom name to be set for the workflow. If no name is set, the workflow's name will be set to the _Crawl Start URL_. For URL List crawls, the workflow's name will be set to the first URL present in the _Crawl URL(s)_ field, with an added `(+x)` where `x` represents the total number of URLs in the list.

 ### Description

--- a/frontend/src/components/ui/config-details.ts
+++ b/frontend/src/components/ui/config-details.ts
@ -327,7 +327,7 @@ export class ConfigDetails extends LiteElement {

    return html`
      ${this.renderSetting(
-        msg("List of URLs"),
+        msg("Crawl URL(s)"),
        html`
          <ul>
            ${this.seeds?.map(
--- a/frontend/src/features/crawl-workflows/new-workflow-dialog.ts
+++ b/frontend/src/features/crawl-workflows/new-workflow-dialog.ts
@ -1,7 +1,8 @@
 import { localized, msg } from "@lit/localize";
-import { css, html, LitElement } from "lit";
+import { html } from "lit";
 import { customElement, property } from "lit/decorators.js";

+import { TailwindElement } from "@/classes/TailwindElement";
 import seededCrawlSvg from "~assets/images/new-crawl-config_Seeded-Crawl.svg";
 import urlListSvg from "~assets/images/new-crawl-config_URL-List.svg";

@ -12,78 +13,22 @@ export type SelectJobTypeEvent = CustomEvent<"url-list" | "seed-crawl">;
 */
@localized()
@customElement("btrix-new-workflow-dialog")
-export class NewWorkflowDialog extends LitElement {
-  // postcss-lit-disable-next-line
-  static styles = css`
-    .title,
-    .container {
-      margin: var(--sl-spacing-large) 0;
-    }
-
-    .container {
-      display: flex;
-      flex-wrap: wrap;
-      gap: var(--sl-spacing-4x-large);
-      justify-content: center;
-    }
-
-    .heading {
-      font-size: var(--sl-font-size-large);
-      font-weight: var(--sl-font-weight-semibold);
-      margin-top: 0;
-      margin-bottom: var(--sl-spacing-small);
-      line-height: 1;
-    }
-
-    .description {
-      color: var(--sl-color-neutral-500);
-      margin: 0;
-    }
-
-    .jobTypeButton {
-      padding: 0.25rem;
-      display: block;
-      width: 16.5rem;
-      cursor: pointer;
-      background: none;
-      text-align: left;
-      border: none;
-      border-radius: 0.75rem;
-    }
-
-    figure {
-      margin: 0;
-      padding: 0;
-    }
-
-    .jobTypeButton:hover .jobTypeImg {
-      transform: scale(1.05);
-    }
-
-    .jobTypeImg {
-      width: 100%;
-      max-height: 9rem;
-      transition-property: transform;
-      transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
-      transition-duration: 150ms;
-      margin-bottom: var(--sl-spacing-small);
-    }
-  `;
-
+export class NewWorkflowDialog extends TailwindElement {
  @property({ type: Boolean })
  open = false;

  render() {
    return html`
      <btrix-dialog
-        .label=${msg("Choose New Workflow Type")}
+        .label=${msg("What would you like to crawl?")}
        .open=${this.open}
        style="--width: 46rem"
      >
-        <div class="container">
+        <div
+          class="mb-7 mt-5 flex flex-col items-center justify-center gap-7 md:flex-row md:items-start md:gap-16"
+        >
          <button
-            tabindex="2"
-            class="jobTypeButton"
+            class="group block w-[16.5rem] text-left"
            @click=${() => {
              this.dispatchEvent(
                new CustomEvent("select-job-type", {
@ -93,20 +38,26 @@ export class NewWorkflowDialog extends LitElement {
            }}
          >
            <figure>
-              <img class="jobTypeImg" src=${urlListSvg} />
-              <figcaption>
-                <div class="heading">${msg("URL List")}</div>
-                <p class="description">
+              <img
+                class="block transition-transform group-hover:scale-105"
+                src=${urlListSvg}
+              />
+              <figcaption class="p-1">
+                <div
+                  class="my-2 text-lg font-semibold leading-none transition-colors group-hover:text-primary-700"
+                >
+                  ${msg("Known URLs")}
+                </div>
+                <p class="text-balance leading-normal text-neutral-700">
                  ${msg(
-                    "The crawler visits every URL specified in a list, and optionally every URL linked on those pages.",
+                    "Choose this option to crawl a single page, or if you already know the URL of every page you'd like to crawl.",
                  )}
                </p>
              </figcaption>
            </figure>
          </button>
          <button
-            tabindex="1"
-            class="jobTypeButton"
+            class="group block w-[16.5rem] text-left"
            @click=${() => {
              this.dispatchEvent(
                new CustomEvent("select-job-type", {
@ -116,19 +67,106 @@ export class NewWorkflowDialog extends LitElement {
            }}
          >
            <figure>
-              <img class="jobTypeImg" src=${seededCrawlSvg} />
-              <figcaption>
-                <div class="heading">${msg("Seeded Crawl")}</div>
-                <p class="description">
+              <img
+                class="block transition-transform group-hover:scale-105"
+                src=${seededCrawlSvg}
+              />
+              <figcaption class="p-1">
+                <div
+                  class="my-2 text-lg font-semibold leading-none transition-colors group-hover:text-primary-700"
+                >
+                  ${msg("Automated Discovery")}
+                </div>
+                <p class="text-balance leading-normal text-neutral-700">
                  ${msg(
-                    "The crawler automatically discovers and archives pages starting from a single seed URL.",
+                    "Let the crawler automatically discover pages based on a domain or start page that you specify.",
                  )}
                </p>
              </figcaption>
            </figure>
-          </div>
-        </button>
+          </button>
+        </div>
+        <sl-details
+          summary=${msg("Need help deciding?")}
+          @sl-hide=${this.stopProp}
+          @sl-after-hide=${this.stopProp}
+        >
+          <p class="mb-3">
+            ${msg(
+              html`Choose <strong>Known URLs</strong> (aka a "URL List" crawl
+                type) if:`,
+            )}
+          </p>
+          <ul class="mb-3 list-disc pl-5">
+            <li>${msg("You want to archive a single page on a website")}</li>
+            <li>
+              ${msg("You're archiving just a few specific pages on a website")}
+            </li>
+            <li>
+              ${msg("You have a list of URLs that you can copy-and-paste")}
+            </li>
+          </ul>
+          <p class="mb-3">
+            ${msg(
+              html`A URL list is simpler to configure, since you don't need to
+              worry about configuring the workflow to exclude parts of the
+              website that you may not want to archive.`,
+            )}
+          </p>
+          <p class="mb-3">
+            ${msg(
+              html`Choose <strong>Automated Discovery</strong> (aka a "Seeded
+                Crawl" crawl type) if:`,
+            )}
+          </p>
+          <ul class="mb-3 list-disc pl-5">
+            <li>${msg("You want to archive an entire website")}</li>
+            <li>
+              ${msg(
+                html`You're archiving a subset of a website, like everything
+                  under <em>website.com/your-username</em>`,
+              )}
+            </li>
+            <li>
+              ${msg(
+                html`You're archiving a website <em>and</em> external pages
+                  linked to from the website`,
+              )}
+            </li>
+          </ul>
+          <p class="mb-3">
+            ${msg(
+              html`Seeded crawls are great for advanced use cases where you
+              don't need to know every single URL that you want to archive. You
+              can configure reasonable crawl limits and page limits so that you
+              don't crawl more than you need to.`,
+            )}
+          </p>
+          <p>
+            ${msg(
+              html`Once you choose a crawl type, you can't go back and change
+                it. Check out the
+                <a
+                  class="text-blue-500 hover:text-blue-600"
+                  href="https://docs.browsertrix.com/user-guide/workflow-setup/"
+                  target="_blank"
+                  >crawl workflow setup guide</a
+                >
+                if you still need help deciding on a crawl type, and try our
+                <a
+                  class="text-blue-500 hover:text-blue-600"
+                  href="https://forum.webrecorder.net/c/help/5"
+                  target="_blank"
+                  >community help forum</a
+                >.`,
+            )}
+          </p>
+        </sl-details>
      </btrix-dialog>
    `;
  }
+
+  private stopProp(e: Event) {
+    e.stopPropagation();
+  }
 }
--- a/frontend/src/pages/org/workflow-editor.ts
+++ b/frontend/src/pages/org/workflow-editor.ts
@ -951,7 +951,7 @@ export class CrawlConfigEditor extends LiteElement {
        <sl-textarea
          name="urlList"
          class="textarea-wrap"
-          label=${msg("List of URLs")}
+          label=${msg("Crawl URL(s)")}
          rows="10"
          autocomplete="off"
          inputmode="url"
@ -1364,7 +1364,7 @@ https://example.net`}
            ${this.renderFormCol(html`
              <sl-textarea
                name="urlList"
-                label=${msg("List of URLs")}
+                label=${msg("Crawl URL(s)")}
                rows="3"
                autocomplete="off"
                inputmode="url"