feat: Duplicate workflows with seed file (#2744)

Resolves https://github.com/webrecorder/browsertrix/issues/2732

## Changes

Allows users to duplicate workflows with a seed file.
This commit is contained in:
sua yoo 2025-07-22 21:20:12 -07:00 committed by GitHub
parent 795a1a6f58
commit 7df3cb718d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 137 additions and 43 deletions

View File

@ -27,6 +27,7 @@ import type { ProxiesAPIResponse } from "@/types/crawler";
import type { UserOrg } from "@/types/user";
import { isApiError } from "@/utils/api";
import type { ViewState } from "@/utils/APIRouter";
import type { DuplicateWorkflowSettings } from "@/utils/crawl-workflows/settingsForDuplicate";
import { DEFAULT_MAX_SCALE } from "@/utils/crawler";
import { type OrgData } from "@/utils/orgs";
import { AppStateService } from "@/utils/state";
@ -542,13 +543,15 @@ export class Org extends BtrixElement {
}
if (this.orgPath.startsWith("/workflows/new")) {
const { workflow, seeds, scopeType } = this.viewStateData || {};
const { workflow, seeds, seedFile, scopeType } = (this.viewStateData ||
{}) satisfies Partial<DuplicateWorkflowSettings>;
return html` <btrix-workflows-new
class="col-span-5"
?isCrawler=${this.appState.isCrawler}
.initialWorkflow=${workflow}
.initialSeeds=${seeds}
.initialSeedFile=${seedFile}
scopeType=${ifDefined(scopeType)}
@select-new-dialog=${this.onSelectNewDialog}
></btrix-workflows-new>`;

View File

@ -11,7 +11,7 @@ import { until } from "lit/directives/until.js";
import { when } from "lit/directives/when.js";
import queryString from "query-string";
import type { Crawl, CrawlLog, Seed, Workflow, WorkflowParams } from "./types";
import type { Crawl, CrawlLog, Seed, Workflow } from "./types";
import { BtrixElement } from "@/classes/BtrixElement";
import type { Alert } from "@/components/ui/alert";
@ -29,8 +29,9 @@ import { WorkflowTab } from "@/routes";
import { deleteConfirmation, noData, notApplicable } from "@/strings/ui";
import type { APIPaginatedList, APIPaginationQuery } from "@/types/api";
import { type CrawlState } from "@/types/crawlState";
import type { StorageSeedFile } from "@/types/workflow";
import { type StorageSeedFile } from "@/types/workflow";
import { isApiError } from "@/utils/api";
import { settingsForDuplicate } from "@/utils/crawl-workflows/settingsForDuplicate";
import {
DEFAULT_MAX_SCALE,
inactiveCrawlStates,
@ -2152,7 +2153,7 @@ export class WorkflowDetail extends BtrixElement {
<btrix-config-details
.crawlConfig=${this.workflow}
.seeds=${this.seeds?.items}
.seedFile=${this.seedFileTask.value}
.seedFile=${this.seedFileTask.value || undefined}
anchorLinks
></btrix-config-details>
</section>`;
@ -2307,26 +2308,45 @@ export class WorkflowDetail extends BtrixElement {
*/
private async duplicateConfig() {
if (!this.workflow) await this.workflowTask.taskComplete;
if (!this.seeds) await this.seedsTask.taskComplete;
if (this.workflow?.config.seedFileId) {
await this.seedFileTask.taskComplete;
} else {
await this.seedsTask.taskComplete;
}
await this.updateComplete;
if (!this.workflow) return;
const workflowParams: WorkflowParams = {
...this.workflow,
name: this.workflow.name ? msg(str`${this.workflow.name} Copy`) : "",
};
const seeds = this.seeds;
this.navigate.to(`${this.navigate.orgBasePath}/workflows/new`, {
workflow: workflowParams,
seeds: this.seeds?.items,
const settings = settingsForDuplicate({
workflow: this.workflow,
seeds,
seedFile: this.seedFileTask.value ?? undefined,
});
this.notify.toast({
message: msg(str`Copied Workflow to new template.`),
variant: "success",
icon: "check2-circle",
id: "workflow-copied-success",
});
this.navigate.to(`${this.navigate.orgBasePath}/workflows/new`, settings);
if (seeds && seeds.total > seeds.items.length) {
const urlCount = this.localize.number(seeds.items.length);
// This is likely an edge case for old workflows with >1,000 seeds
// or URL list workflows created via API.
this.notify.toast({
title: msg(str`Partially copied workflow settings`),
message: msg(str`Only the first ${urlCount} URLs were copied.`),
variant: "warning",
id: "workflow-copied-status",
});
} else {
this.notify.toast({
message: msg("Copied settings to new workflow."),
variant: "success",
icon: "check2-circle",
id: "workflow-copied-status",
});
}
}
private async delete(): Promise<void> {

View File

@ -12,7 +12,6 @@ import {
type ListWorkflow,
type Seed,
type Workflow,
type WorkflowParams,
} from "./types";
import { BtrixElement } from "@/classes/BtrixElement";
@ -33,8 +32,12 @@ import { WorkflowTab } from "@/routes";
import scopeTypeLabels from "@/strings/crawl-workflows/scopeType";
import { deleteConfirmation } from "@/strings/ui";
import type { APIPaginatedList, APIPaginationQuery } from "@/types/api";
import { NewWorkflowOnlyScopeType } from "@/types/workflow";
import {
NewWorkflowOnlyScopeType,
type StorageSeedFile,
} from "@/types/workflow";
import { isApiError } from "@/utils/api";
import { settingsForDuplicate } from "@/utils/crawl-workflows/settingsForDuplicate";
import { isArchivingDisabled } from "@/utils/orgs";
import { tw } from "@/utils/tailwind";
@ -52,8 +55,6 @@ const FILTER_BY_CURRENT_USER_STORAGE_KEY =
const INITIAL_PAGE_SIZE = 10;
const POLL_INTERVAL_SECONDS = 10;
const ABORT_REASON_THROTTLE = "throttled";
// NOTE Backend pagination max is 1000
const SEEDS_MAX = 1000;
const sortableFields: Record<
SortField,
@ -1049,33 +1050,38 @@ export class WorkflowsList extends BtrixElement {
* Create a new template using existing template data
*/
private async duplicateConfig(workflow: ListWorkflow) {
const [fullWorkflow, seeds] = await Promise.all([
this.getWorkflow(workflow),
this.getSeeds(workflow),
]);
const fullWorkflow = await this.getWorkflow(workflow);
let seeds;
let seedFile;
const workflowParams: WorkflowParams = {
...fullWorkflow,
name: workflow.name ? msg(str`${workflow.name} Copy`) : "",
};
if (fullWorkflow.config.seedFileId) {
seedFile = await this.getSeedFile(fullWorkflow.config.seedFileId);
} else {
seeds = await this.getSeeds(workflow);
}
this.navigate.to(`${this.navigate.orgBasePath}/workflows/new`, {
workflow: workflowParams,
seeds: seeds.items,
const settings = settingsForDuplicate({
workflow: fullWorkflow,
seeds,
seedFile,
});
if (seeds.total > SEEDS_MAX) {
this.navigate.to(`${this.navigate.orgBasePath}/workflows/new`, settings);
if (seeds && seeds.total > seeds.items.length) {
const urlCount = this.localize.number(seeds.items.length);
// This is likely an edge case for old workflows with >1,000 seeds
// or URL list workflows created via API.
this.notify.toast({
title: msg(str`Partially copied Workflow`),
message: msg(
str`Only first ${this.localize.number(SEEDS_MAX)} URLs were copied.`,
),
title: msg(str`Partially copied workflow settings`),
message: msg(str`The first ${urlCount} URLs were copied.`),
variant: "warning",
id: "workflow-copied-status",
});
} else {
this.notify.toast({
message: msg(str`Copied Workflow to new template.`),
message: msg("Copied settings to new workflow."),
variant: "success",
icon: "check2-circle",
id: "workflow-copied-status",
@ -1245,4 +1251,11 @@ export class WorkflowsList extends BtrixElement {
);
return data;
}
private async getSeedFile(seedFileId: string) {
const data = await this.api.fetch<StorageSeedFile>(
`/orgs/${this.orgId}/files/${seedFileId}`,
);
return data;
}
}

View File

@ -1,6 +1,7 @@
import { localized, msg } from "@lit/localize";
import clsx from "clsx";
import { mergeDeep } from "immutable";
import { html } from "lit";
import { customElement, property } from "lit/decorators.js";
import { ifDefined } from "lit/directives/if-defined.js";
import { when } from "lit/directives/when.js";
@ -8,9 +9,9 @@ import type { PartialDeep } from "type-fest";
import { ScopeType, type Seed, type WorkflowParams } from "./types";
import { BtrixElement } from "@/classes/BtrixElement";
import { pageNav, type Breadcrumb } from "@/layouts/pageHeader";
import { WorkflowScopeType } from "@/types/workflow";
import LiteElement, { html } from "@/utils/LiteElement";
import { WorkflowScopeType, type StorageSeedFile } from "@/types/workflow";
import { tw } from "@/utils/tailwind";
import {
DEFAULT_AUTOCLICK_SELECTOR,
@ -28,13 +29,16 @@ import {
*/
@customElement("btrix-workflows-new")
@localized()
export class WorkflowsNew extends LiteElement {
export class WorkflowsNew extends BtrixElement {
@property({ type: Boolean })
isCrawler!: boolean;
@property({ type: Array })
initialSeeds?: Seed[];
@property({ type: Object })
initialSeedFile?: StorageSeedFile;
@property({ type: String })
scopeType?: WorkflowFormState["scopeType"];
@ -76,7 +80,7 @@ export class WorkflowsNew extends LiteElement {
private renderBreadcrumbs() {
const breadcrumbs: Breadcrumb[] = [
{
href: `${this.orgBasePath}/workflows`,
href: `${this.navigate.orgBasePath}/workflows`,
content: msg("Crawl Workflows"),
},
{
@ -148,6 +152,7 @@ export class WorkflowsNew extends LiteElement {
)}
.initialWorkflow=${initialWorkflow}
.initialSeeds=${this.initialSeeds}
.initialSeedFile=${this.initialSeedFile}
></btrix-workflow-editor>
`;
})}

View File

@ -0,0 +1,50 @@
/**
* Join workflow settings for duplicating a workflow
*/
import { msg, str } from "@lit/localize";
import type { APIPaginatedList } from "@/types/api";
import type {
ScopeType,
Seed,
Workflow,
WorkflowParams,
} from "@/types/crawler";
import {
NewWorkflowOnlyScopeType,
type StorageSeedFile,
} from "@/types/workflow";
export type DuplicateWorkflowSettings = {
workflow: WorkflowParams;
scopeType?: ScopeType | NewWorkflowOnlyScopeType;
seeds?: Seed[];
seedFile?: StorageSeedFile;
};
export function settingsForDuplicate({
workflow,
seeds,
seedFile,
}: {
workflow: Workflow;
seeds?: APIPaginatedList<Seed>;
seedFile?: StorageSeedFile;
}): DuplicateWorkflowSettings {
const workflowParams: WorkflowParams = {
...workflow,
name: workflow.name ? msg(str`${workflow.name} Copy`) : "",
};
const seedItems = seeds?.items;
return {
scopeType:
seedFile || (seedItems?.length && seedItems.length > 1)
? NewWorkflowOnlyScopeType.PageList
: workflowParams.config.scopeType,
workflow: workflowParams,
seeds: seedItems,
seedFile,
};
}

View File

@ -1,3 +1,6 @@
/**
* TODO Move to utils/crawl-configs/
*/
import { msg, str } from "@lit/localize";
import { z } from "zod";