feat: Duplicate workflows with seed file (#2744)

Resolves https://github.com/webrecorder/browsertrix/issues/2732

## Changes

Allows users to duplicate workflows with a seed file.
This commit is contained in:
sua yoo 2025-07-22 21:20:12 -07:00 committed by GitHub
parent 795a1a6f58
commit 7df3cb718d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 137 additions and 43 deletions

View File

@ -27,6 +27,7 @@ import type { ProxiesAPIResponse } from "@/types/crawler";
import type { UserOrg } from "@/types/user"; import type { UserOrg } from "@/types/user";
import { isApiError } from "@/utils/api"; import { isApiError } from "@/utils/api";
import type { ViewState } from "@/utils/APIRouter"; import type { ViewState } from "@/utils/APIRouter";
import type { DuplicateWorkflowSettings } from "@/utils/crawl-workflows/settingsForDuplicate";
import { DEFAULT_MAX_SCALE } from "@/utils/crawler"; import { DEFAULT_MAX_SCALE } from "@/utils/crawler";
import { type OrgData } from "@/utils/orgs"; import { type OrgData } from "@/utils/orgs";
import { AppStateService } from "@/utils/state"; import { AppStateService } from "@/utils/state";
@ -542,13 +543,15 @@ export class Org extends BtrixElement {
} }
if (this.orgPath.startsWith("/workflows/new")) { if (this.orgPath.startsWith("/workflows/new")) {
const { workflow, seeds, scopeType } = this.viewStateData || {}; const { workflow, seeds, seedFile, scopeType } = (this.viewStateData ||
{}) satisfies Partial<DuplicateWorkflowSettings>;
return html` <btrix-workflows-new return html` <btrix-workflows-new
class="col-span-5" class="col-span-5"
?isCrawler=${this.appState.isCrawler} ?isCrawler=${this.appState.isCrawler}
.initialWorkflow=${workflow} .initialWorkflow=${workflow}
.initialSeeds=${seeds} .initialSeeds=${seeds}
.initialSeedFile=${seedFile}
scopeType=${ifDefined(scopeType)} scopeType=${ifDefined(scopeType)}
@select-new-dialog=${this.onSelectNewDialog} @select-new-dialog=${this.onSelectNewDialog}
></btrix-workflows-new>`; ></btrix-workflows-new>`;

View File

@ -11,7 +11,7 @@ import { until } from "lit/directives/until.js";
import { when } from "lit/directives/when.js"; import { when } from "lit/directives/when.js";
import queryString from "query-string"; import queryString from "query-string";
import type { Crawl, CrawlLog, Seed, Workflow, WorkflowParams } from "./types"; import type { Crawl, CrawlLog, Seed, Workflow } from "./types";
import { BtrixElement } from "@/classes/BtrixElement"; import { BtrixElement } from "@/classes/BtrixElement";
import type { Alert } from "@/components/ui/alert"; import type { Alert } from "@/components/ui/alert";
@ -29,8 +29,9 @@ import { WorkflowTab } from "@/routes";
import { deleteConfirmation, noData, notApplicable } from "@/strings/ui"; import { deleteConfirmation, noData, notApplicable } from "@/strings/ui";
import type { APIPaginatedList, APIPaginationQuery } from "@/types/api"; import type { APIPaginatedList, APIPaginationQuery } from "@/types/api";
import { type CrawlState } from "@/types/crawlState"; import { type CrawlState } from "@/types/crawlState";
import type { StorageSeedFile } from "@/types/workflow"; import { type StorageSeedFile } from "@/types/workflow";
import { isApiError } from "@/utils/api"; import { isApiError } from "@/utils/api";
import { settingsForDuplicate } from "@/utils/crawl-workflows/settingsForDuplicate";
import { import {
DEFAULT_MAX_SCALE, DEFAULT_MAX_SCALE,
inactiveCrawlStates, inactiveCrawlStates,
@ -2152,7 +2153,7 @@ export class WorkflowDetail extends BtrixElement {
<btrix-config-details <btrix-config-details
.crawlConfig=${this.workflow} .crawlConfig=${this.workflow}
.seeds=${this.seeds?.items} .seeds=${this.seeds?.items}
.seedFile=${this.seedFileTask.value} .seedFile=${this.seedFileTask.value || undefined}
anchorLinks anchorLinks
></btrix-config-details> ></btrix-config-details>
</section>`; </section>`;
@ -2307,27 +2308,46 @@ export class WorkflowDetail extends BtrixElement {
*/ */
private async duplicateConfig() { private async duplicateConfig() {
if (!this.workflow) await this.workflowTask.taskComplete; if (!this.workflow) await this.workflowTask.taskComplete;
if (!this.seeds) await this.seedsTask.taskComplete;
if (this.workflow?.config.seedFileId) {
await this.seedFileTask.taskComplete;
} else {
await this.seedsTask.taskComplete;
}
await this.updateComplete; await this.updateComplete;
if (!this.workflow) return; if (!this.workflow) return;
const workflowParams: WorkflowParams = { const seeds = this.seeds;
...this.workflow,
name: this.workflow.name ? msg(str`${this.workflow.name} Copy`) : "",
};
this.navigate.to(`${this.navigate.orgBasePath}/workflows/new`, { const settings = settingsForDuplicate({
workflow: workflowParams, workflow: this.workflow,
seeds: this.seeds?.items, seeds,
seedFile: this.seedFileTask.value ?? undefined,
}); });
this.navigate.to(`${this.navigate.orgBasePath}/workflows/new`, settings);
if (seeds && seeds.total > seeds.items.length) {
const urlCount = this.localize.number(seeds.items.length);
// This is likely an edge case for old workflows with >1,000 seeds
// or URL list workflows created via API.
this.notify.toast({ this.notify.toast({
message: msg(str`Copied Workflow to new template.`), title: msg(str`Partially copied workflow settings`),
message: msg(str`Only the first ${urlCount} URLs were copied.`),
variant: "warning",
id: "workflow-copied-status",
});
} else {
this.notify.toast({
message: msg("Copied settings to new workflow."),
variant: "success", variant: "success",
icon: "check2-circle", icon: "check2-circle",
id: "workflow-copied-success", id: "workflow-copied-status",
}); });
} }
}
private async delete(): Promise<void> { private async delete(): Promise<void> {
if (!this.workflow) return; if (!this.workflow) return;

View File

@ -12,7 +12,6 @@ import {
type ListWorkflow, type ListWorkflow,
type Seed, type Seed,
type Workflow, type Workflow,
type WorkflowParams,
} from "./types"; } from "./types";
import { BtrixElement } from "@/classes/BtrixElement"; import { BtrixElement } from "@/classes/BtrixElement";
@ -33,8 +32,12 @@ import { WorkflowTab } from "@/routes";
import scopeTypeLabels from "@/strings/crawl-workflows/scopeType"; import scopeTypeLabels from "@/strings/crawl-workflows/scopeType";
import { deleteConfirmation } from "@/strings/ui"; import { deleteConfirmation } from "@/strings/ui";
import type { APIPaginatedList, APIPaginationQuery } from "@/types/api"; import type { APIPaginatedList, APIPaginationQuery } from "@/types/api";
import { NewWorkflowOnlyScopeType } from "@/types/workflow"; import {
NewWorkflowOnlyScopeType,
type StorageSeedFile,
} from "@/types/workflow";
import { isApiError } from "@/utils/api"; import { isApiError } from "@/utils/api";
import { settingsForDuplicate } from "@/utils/crawl-workflows/settingsForDuplicate";
import { isArchivingDisabled } from "@/utils/orgs"; import { isArchivingDisabled } from "@/utils/orgs";
import { tw } from "@/utils/tailwind"; import { tw } from "@/utils/tailwind";
@ -52,8 +55,6 @@ const FILTER_BY_CURRENT_USER_STORAGE_KEY =
const INITIAL_PAGE_SIZE = 10; const INITIAL_PAGE_SIZE = 10;
const POLL_INTERVAL_SECONDS = 10; const POLL_INTERVAL_SECONDS = 10;
const ABORT_REASON_THROTTLE = "throttled"; const ABORT_REASON_THROTTLE = "throttled";
// NOTE Backend pagination max is 1000
const SEEDS_MAX = 1000;
const sortableFields: Record< const sortableFields: Record<
SortField, SortField,
@ -1049,33 +1050,38 @@ export class WorkflowsList extends BtrixElement {
* Create a new template using existing template data * Create a new template using existing template data
*/ */
private async duplicateConfig(workflow: ListWorkflow) { private async duplicateConfig(workflow: ListWorkflow) {
const [fullWorkflow, seeds] = await Promise.all([ const fullWorkflow = await this.getWorkflow(workflow);
this.getWorkflow(workflow), let seeds;
this.getSeeds(workflow), let seedFile;
]);
const workflowParams: WorkflowParams = { if (fullWorkflow.config.seedFileId) {
...fullWorkflow, seedFile = await this.getSeedFile(fullWorkflow.config.seedFileId);
name: workflow.name ? msg(str`${workflow.name} Copy`) : "", } else {
}; seeds = await this.getSeeds(workflow);
}
this.navigate.to(`${this.navigate.orgBasePath}/workflows/new`, { const settings = settingsForDuplicate({
workflow: workflowParams, workflow: fullWorkflow,
seeds: seeds.items, seeds,
seedFile,
}); });
if (seeds.total > SEEDS_MAX) { this.navigate.to(`${this.navigate.orgBasePath}/workflows/new`, settings);
if (seeds && seeds.total > seeds.items.length) {
const urlCount = this.localize.number(seeds.items.length);
// This is likely an edge case for old workflows with >1,000 seeds
// or URL list workflows created via API.
this.notify.toast({ this.notify.toast({
title: msg(str`Partially copied Workflow`), title: msg(str`Partially copied workflow settings`),
message: msg( message: msg(str`The first ${urlCount} URLs were copied.`),
str`Only first ${this.localize.number(SEEDS_MAX)} URLs were copied.`,
),
variant: "warning", variant: "warning",
id: "workflow-copied-status", id: "workflow-copied-status",
}); });
} else { } else {
this.notify.toast({ this.notify.toast({
message: msg(str`Copied Workflow to new template.`), message: msg("Copied settings to new workflow."),
variant: "success", variant: "success",
icon: "check2-circle", icon: "check2-circle",
id: "workflow-copied-status", id: "workflow-copied-status",
@ -1245,4 +1251,11 @@ export class WorkflowsList extends BtrixElement {
); );
return data; return data;
} }
private async getSeedFile(seedFileId: string) {
const data = await this.api.fetch<StorageSeedFile>(
`/orgs/${this.orgId}/files/${seedFileId}`,
);
return data;
}
} }

View File

@ -1,6 +1,7 @@
import { localized, msg } from "@lit/localize"; import { localized, msg } from "@lit/localize";
import clsx from "clsx"; import clsx from "clsx";
import { mergeDeep } from "immutable"; import { mergeDeep } from "immutable";
import { html } from "lit";
import { customElement, property } from "lit/decorators.js"; import { customElement, property } from "lit/decorators.js";
import { ifDefined } from "lit/directives/if-defined.js"; import { ifDefined } from "lit/directives/if-defined.js";
import { when } from "lit/directives/when.js"; import { when } from "lit/directives/when.js";
@ -8,9 +9,9 @@ import type { PartialDeep } from "type-fest";
import { ScopeType, type Seed, type WorkflowParams } from "./types"; import { ScopeType, type Seed, type WorkflowParams } from "./types";
import { BtrixElement } from "@/classes/BtrixElement";
import { pageNav, type Breadcrumb } from "@/layouts/pageHeader"; import { pageNav, type Breadcrumb } from "@/layouts/pageHeader";
import { WorkflowScopeType } from "@/types/workflow"; import { WorkflowScopeType, type StorageSeedFile } from "@/types/workflow";
import LiteElement, { html } from "@/utils/LiteElement";
import { tw } from "@/utils/tailwind"; import { tw } from "@/utils/tailwind";
import { import {
DEFAULT_AUTOCLICK_SELECTOR, DEFAULT_AUTOCLICK_SELECTOR,
@ -28,13 +29,16 @@ import {
*/ */
@customElement("btrix-workflows-new") @customElement("btrix-workflows-new")
@localized() @localized()
export class WorkflowsNew extends LiteElement { export class WorkflowsNew extends BtrixElement {
@property({ type: Boolean }) @property({ type: Boolean })
isCrawler!: boolean; isCrawler!: boolean;
@property({ type: Array }) @property({ type: Array })
initialSeeds?: Seed[]; initialSeeds?: Seed[];
@property({ type: Object })
initialSeedFile?: StorageSeedFile;
@property({ type: String }) @property({ type: String })
scopeType?: WorkflowFormState["scopeType"]; scopeType?: WorkflowFormState["scopeType"];
@ -76,7 +80,7 @@ export class WorkflowsNew extends LiteElement {
private renderBreadcrumbs() { private renderBreadcrumbs() {
const breadcrumbs: Breadcrumb[] = [ const breadcrumbs: Breadcrumb[] = [
{ {
href: `${this.orgBasePath}/workflows`, href: `${this.navigate.orgBasePath}/workflows`,
content: msg("Crawl Workflows"), content: msg("Crawl Workflows"),
}, },
{ {
@ -148,6 +152,7 @@ export class WorkflowsNew extends LiteElement {
)} )}
.initialWorkflow=${initialWorkflow} .initialWorkflow=${initialWorkflow}
.initialSeeds=${this.initialSeeds} .initialSeeds=${this.initialSeeds}
.initialSeedFile=${this.initialSeedFile}
></btrix-workflow-editor> ></btrix-workflow-editor>
`; `;
})} })}

View File

@ -0,0 +1,50 @@
/**
* Join workflow settings for duplicating a workflow
*/
import { msg, str } from "@lit/localize";
import type { APIPaginatedList } from "@/types/api";
import type {
ScopeType,
Seed,
Workflow,
WorkflowParams,
} from "@/types/crawler";
import {
NewWorkflowOnlyScopeType,
type StorageSeedFile,
} from "@/types/workflow";
export type DuplicateWorkflowSettings = {
workflow: WorkflowParams;
scopeType?: ScopeType | NewWorkflowOnlyScopeType;
seeds?: Seed[];
seedFile?: StorageSeedFile;
};
export function settingsForDuplicate({
workflow,
seeds,
seedFile,
}: {
workflow: Workflow;
seeds?: APIPaginatedList<Seed>;
seedFile?: StorageSeedFile;
}): DuplicateWorkflowSettings {
const workflowParams: WorkflowParams = {
...workflow,
name: workflow.name ? msg(str`${workflow.name} Copy`) : "",
};
const seedItems = seeds?.items;
return {
scopeType:
seedFile || (seedItems?.length && seedItems.length > 1)
? NewWorkflowOnlyScopeType.PageList
: workflowParams.config.scopeType,
workflow: workflowParams,
seeds: seedItems,
seedFile,
};
}

View File

@ -1,3 +1,6 @@
/**
* TODO Move to utils/crawl-configs/
*/
import { msg, str } from "@lit/localize"; import { msg, str } from "@lit/localize";
import { z } from "zod"; import { z } from "zod";