feat: Apply saved workflow settings to current crawl (#2514)
Resolves https://github.com/webrecorder/browsertrix/issues/2366 ## Changes Allows users to update current crawl with newly saved workflow settings. ## Manual testing 1. Log in as crawler 2. Start a crawl 3. Go to edit workflow. Verify "Update Crawl" button is shown 4. Click "Update Crawl". Verify crawl is updated with new settings --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com> Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
parent
c4a7ebce29
commit
1fa43335c0
@ -387,7 +387,7 @@ class CrawlConfigOps:
|
||||
|
||||
async def update_crawl_config(
|
||||
self, cid: UUID, org: Organization, user: User, update: UpdateCrawlConfig
|
||||
) -> dict[str, bool | str]:
|
||||
) -> CrawlConfigUpdateResponse:
|
||||
# pylint: disable=too-many-locals, too-many-branches, too-many-statements
|
||||
"""Update name, scale, schedule, and/or tags for an existing crawl config"""
|
||||
|
||||
@ -455,11 +455,9 @@ class CrawlConfigOps:
|
||||
run_now = update.runNow
|
||||
|
||||
if not changed and not metadata_changed and not run_now:
|
||||
return {
|
||||
"updated": True,
|
||||
"settings_changed": changed,
|
||||
"metadata_changed": metadata_changed,
|
||||
}
|
||||
return CrawlConfigUpdateResponse(
|
||||
settings_changed=changed, metadata_changed=metadata_changed
|
||||
)
|
||||
|
||||
if changed:
|
||||
orig_dict = orig_crawl_config.dict(exclude_unset=True, exclude_none=True)
|
||||
@ -498,10 +496,11 @@ class CrawlConfigOps:
|
||||
status_code=404, detail=f"Crawl Config '{cid}' not found"
|
||||
)
|
||||
|
||||
crawlconfig = CrawlConfig.from_dict(result)
|
||||
|
||||
# update in crawl manager to change schedule
|
||||
if schedule_changed:
|
||||
try:
|
||||
crawlconfig = CrawlConfig.from_dict(result)
|
||||
await self.crawl_manager.update_scheduled_job(crawlconfig, str(user.id))
|
||||
|
||||
except Exception as exc:
|
||||
@ -511,16 +510,24 @@ class CrawlConfigOps:
|
||||
status_code=404, detail=f"Crawl Config '{cid}' not found"
|
||||
)
|
||||
|
||||
ret: dict[str, bool | str] = {
|
||||
"updated": True,
|
||||
"settings_changed": changed,
|
||||
"metadata_changed": metadata_changed,
|
||||
"storageQuotaReached": self.org_ops.storage_quota_reached(org),
|
||||
"execMinutesQuotaReached": self.org_ops.exec_mins_quota_reached(org),
|
||||
}
|
||||
ret = CrawlConfigUpdateResponse(
|
||||
settings_changed=changed,
|
||||
metadata_changed=metadata_changed,
|
||||
storageQuotaReached=self.org_ops.storage_quota_reached(org),
|
||||
execMinutesQuotaReached=self.org_ops.exec_mins_quota_reached(org),
|
||||
)
|
||||
|
||||
if run_now:
|
||||
crawl_id = await self.run_now(cid, org, user)
|
||||
ret["started"] = crawl_id
|
||||
ret.started = crawl_id
|
||||
elif update.updateRunning and changed:
|
||||
running_crawl = await self.get_running_crawl(cid)
|
||||
if running_crawl:
|
||||
await self.crawl_manager.update_running_crawl_config(
|
||||
running_crawl.id, crawlconfig
|
||||
)
|
||||
ret.updatedRunning = True
|
||||
|
||||
return ret
|
||||
|
||||
async def update_usernames(self, userid: UUID, updated_name: str) -> None:
|
||||
|
@ -249,6 +249,25 @@ class CrawlManager(K8sAPI):
|
||||
crawl_id, {"lastConfigUpdate": date_to_str(dt_now())}
|
||||
)
|
||||
|
||||
async def update_running_crawl_config(
|
||||
self, crawl_id: str, crawlconfig: CrawlConfig
|
||||
):
|
||||
"""force update of config for running crawl"""
|
||||
time_now = date_to_str(dt_now())
|
||||
|
||||
# pylint: disable=use-dict-literal
|
||||
patch = dict(
|
||||
crawlerChannel=crawlconfig.crawlerChannel,
|
||||
scale=crawlconfig.scale,
|
||||
timeout=crawlconfig.crawlTimeout,
|
||||
maxCrawlSize=crawlconfig.maxCrawlSize,
|
||||
proxyId=crawlconfig.proxyId or DEFAULT_PROXY_ID,
|
||||
lastConfigUpdate=time_now,
|
||||
restartTime=time_now,
|
||||
)
|
||||
|
||||
return await self._patch_job(crawl_id, patch)
|
||||
|
||||
async def create_qa_crawl_job(
|
||||
self,
|
||||
crawlconfig: CrawlConfig,
|
||||
|
@ -506,6 +506,7 @@ class UpdateCrawlConfig(BaseModel):
|
||||
description: Optional[str] = None
|
||||
autoAddCollections: Optional[List[UUID]] = None
|
||||
runNow: bool = False
|
||||
updateRunning: bool = False
|
||||
|
||||
# crawl data: revision tracked
|
||||
schedule: Optional[str] = None
|
||||
@ -578,9 +579,10 @@ class CrawlConfigSearchValues(BaseModel):
|
||||
class CrawlConfigUpdateResponse(BaseModel):
|
||||
"""Response model for updating crawlconfigs"""
|
||||
|
||||
updated: bool
|
||||
updated: bool = True
|
||||
settings_changed: bool
|
||||
metadata_changed: bool
|
||||
updatedRunning: bool = False
|
||||
|
||||
storageQuotaReached: Optional[bool] = False
|
||||
execMinutesQuotaReached: Optional[bool] = False
|
||||
|
@ -1305,8 +1305,7 @@ def test_custom_behavior_logs(
|
||||
if log["context"] == "behaviorScriptCustom":
|
||||
assert log["message"] in (
|
||||
"test-stat",
|
||||
"done!",
|
||||
"Using Site-Specific Behavior: TestBehavior",
|
||||
"In Test Behavior!",
|
||||
)
|
||||
if log["message"] in ("test-stat", "done!"):
|
||||
assert log["details"]["behavior"] == "TestBehavior"
|
||||
@ -1314,7 +1313,7 @@ def test_custom_behavior_logs(
|
||||
|
||||
custom_log_line_count += 1
|
||||
|
||||
assert custom_log_line_count == 3
|
||||
assert custom_log_line_count == 2
|
||||
|
||||
|
||||
def test_crawls_exclude_behavior_logs(
|
||||
|
@ -110,12 +110,12 @@ import {
|
||||
type WorkflowDefaults,
|
||||
} from "@/utils/workflow";
|
||||
|
||||
type NewCrawlConfigParams = WorkflowParams & {
|
||||
runNow: boolean;
|
||||
type CrawlConfigParams = WorkflowParams & {
|
||||
config: WorkflowParams["config"] & {
|
||||
seeds: Seed[];
|
||||
};
|
||||
};
|
||||
type WorkflowRunParams = { runNow: boolean; updateRunning?: boolean };
|
||||
|
||||
const STEPS = SECTIONS;
|
||||
type StepName = (typeof STEPS)[number];
|
||||
@ -626,14 +626,14 @@ export class WorkflowEditor extends BtrixElement {
|
||||
type="button"
|
||||
?disabled=${this.isSubmitting}
|
||||
?loading=${this.isSubmitting}
|
||||
@click=${this.save}
|
||||
@click=${() => void this.save()}
|
||||
>
|
||||
${msg("Save")}
|
||||
</sl-button>
|
||||
</sl-tooltip>
|
||||
<sl-tooltip
|
||||
content=${this.isCrawlRunning
|
||||
? msg("Crawl is already running")
|
||||
? msg("Save and apply settings to current crawl")
|
||||
: msg("Save and run with new settings")}
|
||||
?disabled=${this.isCrawlRunning === null}
|
||||
>
|
||||
@ -641,13 +641,13 @@ export class WorkflowEditor extends BtrixElement {
|
||||
size="small"
|
||||
variant="primary"
|
||||
type="submit"
|
||||
?disabled=${isArchivingDisabled(this.org, true) ||
|
||||
?disabled=${(!this.isCrawlRunning &&
|
||||
isArchivingDisabled(this.org, true)) ||
|
||||
this.isSubmitting ||
|
||||
this.isCrawlRunning ||
|
||||
this.isCrawlRunning === null}
|
||||
?loading=${this.isSubmitting || this.isCrawlRunning === null}
|
||||
>
|
||||
${msg(html`Run Crawl`)}
|
||||
${msg(this.isCrawlRunning ? "Update Crawl" : "Run Crawl")}
|
||||
</sl-button>
|
||||
</sl-tooltip>
|
||||
</footer>
|
||||
@ -2191,14 +2191,13 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
private async onSubmit(event: SubmitEvent) {
|
||||
event.preventDefault();
|
||||
|
||||
this.updateFormState({
|
||||
runNow: true,
|
||||
void this.save({
|
||||
runNow: !this.isCrawlRunning,
|
||||
updateRunning: Boolean(this.isCrawlRunning),
|
||||
});
|
||||
|
||||
void this.save();
|
||||
}
|
||||
|
||||
private async save() {
|
||||
private async save(opts?: WorkflowRunParams) {
|
||||
if (!this.formElem) return;
|
||||
|
||||
// TODO Move away from manual validation check
|
||||
@ -2235,7 +2234,15 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
return;
|
||||
}
|
||||
|
||||
const config = this.parseConfig();
|
||||
const config: CrawlConfigParams & WorkflowRunParams = {
|
||||
...this.parseConfig(),
|
||||
runNow: Boolean(opts?.runNow),
|
||||
};
|
||||
|
||||
if (this.configId) {
|
||||
config.updateRunning = Boolean(opts?.updateRunning);
|
||||
}
|
||||
|
||||
this.isSubmitting = true;
|
||||
|
||||
try {
|
||||
@ -2412,15 +2419,14 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
}
|
||||
}
|
||||
|
||||
private parseConfig(): NewCrawlConfigParams {
|
||||
const config: NewCrawlConfigParams = {
|
||||
private parseConfig(): CrawlConfigParams {
|
||||
const config: CrawlConfigParams = {
|
||||
// Job types are now merged into a single type
|
||||
jobType: "custom",
|
||||
name: this.formState.jobName || "",
|
||||
description: this.formState.description,
|
||||
scale: this.formState.scale,
|
||||
profileid: this.formState.browserProfile?.id || "",
|
||||
runNow: this.formState.runNow,
|
||||
schedule: this.formState.scheduleType === "cron" ? this.utcSchedule : "",
|
||||
crawlTimeout: this.formState.crawlTimeoutMinutes * 60,
|
||||
maxCrawlSize: this.formState.maxCrawlSizeGB * BYTES_PER_GB,
|
||||
@ -2471,7 +2477,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
}
|
||||
|
||||
private parseUrlListConfig(): Pick<
|
||||
NewCrawlConfigParams["config"],
|
||||
CrawlConfigParams["config"],
|
||||
"seeds" | "scopeType" | "extraHops" | "useSitemap" | "failOnFailedSeed"
|
||||
> {
|
||||
const config = {
|
||||
@ -2489,7 +2495,7 @@ https://archiveweb.page/images/${"logo.svg"}`}
|
||||
}
|
||||
|
||||
private parseSeededConfig(): Pick<
|
||||
NewCrawlConfigParams["config"],
|
||||
CrawlConfigParams["config"],
|
||||
"seeds" | "scopeType" | "useSitemap" | "failOnFailedSeed"
|
||||
> {
|
||||
const primarySeedUrl = this.formState.primarySeedUrl;
|
||||
|
@ -81,7 +81,6 @@ export type FormState = {
|
||||
minute: number;
|
||||
period: "AM" | "PM";
|
||||
};
|
||||
runNow: boolean;
|
||||
jobName: WorkflowParams["name"];
|
||||
browserProfile: Profile | null;
|
||||
tags: Tags;
|
||||
@ -139,7 +138,6 @@ export const getDefaultFormState = (): FormState => ({
|
||||
minute: 0,
|
||||
period: "AM",
|
||||
},
|
||||
runNow: false,
|
||||
jobName: "",
|
||||
browserProfile: null,
|
||||
tags: [],
|
||||
@ -275,10 +273,6 @@ export function getInitialFormState(params: {
|
||||
lang: params.initialWorkflow.config.lang ?? defaultFormState.lang,
|
||||
scheduleType: defaultFormState.scheduleType,
|
||||
scheduleFrequency: defaultFormState.scheduleFrequency,
|
||||
runNow:
|
||||
params.org?.storageQuotaReached || params.org?.execMinutesQuotaReached
|
||||
? false
|
||||
: defaultFormState.runNow,
|
||||
tags: params.initialWorkflow.tags,
|
||||
autoAddCollections: params.initialWorkflow.autoAddCollections,
|
||||
jobName: params.initialWorkflow.name || defaultFormState.jobName,
|
||||
|
Loading…
Reference in New Issue
Block a user