ensure max crawl size and max crawl timeout values are set to 0 when unused, instead of null (#1167)

- convert None->0 when creating CrawlJob
- ensure frontend sends 0 not null
- make input model require 'int = 0' instead of 'Optional[int] = 0'
This commit is contained in:
Ilya Kreymer 2023-09-13 09:51:26 -07:00 committed by GitHub
parent ab76f0f394
commit 9159c7c914
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 12 additions and 16 deletions

View File

@ -98,8 +98,8 @@ class K8sAPI:
"oid": oid,
"userid": userid,
"scale": scale,
"expire_time": crawl_expire_time,
"max_crawl_size": max_crawl_size,
"expire_time": crawl_expire_time or 0,
"max_crawl_size": max_crawl_size or 0,
"manual": "1" if manual else "0",
}

View File

@ -128,8 +128,8 @@ class CrawlConfigIn(BaseModel):
autoAddCollections: Optional[List[UUID4]] = []
tags: Optional[List[str]] = []
crawlTimeout: Optional[int] = 0
maxCrawlSize: Optional[int] = 0
crawlTimeout: int = 0
maxCrawlSize: int = 0
scale: Optional[conint(ge=1, le=MAX_CRAWL_SCALE)] = 1
crawlFilenameTemplate: Optional[str]

View File

@ -280,7 +280,7 @@ class BtrixOperator(K8sAPI):
started=data.parent["metadata"]["creationTimestamp"],
stopping=spec.get("stopping", False),
expire_time=from_k8s_date(spec.get("expireTime")),
max_crawl_size=int(configmap.get("MAX_CRAWL_SIZE", "0")),
max_crawl_size=int(spec.get("maxCrawlSize") or 0),
scheduled=spec.get("manual") != "1",
)

View File

@ -78,11 +78,11 @@ type FormState = {
includeLinkedPages: boolean;
useSitemap: boolean;
customIncludeUrlList: string;
crawlTimeoutMinutes: number | null;
crawlTimeoutMinutes: number;
behaviorTimeoutSeconds: number | null;
pageLoadTimeoutSeconds: number | null;
pageExtraDelaySeconds: number | null;
maxCrawlSizeGB: number | null;
maxCrawlSizeGB: number;
maxScopeDepth: number | null;
scopeType: WorkflowParams["config"]["scopeType"];
exclusions: WorkflowParams["config"]["exclude"];
@ -153,7 +153,7 @@ const getDefaultFormState = (): FormState => ({
includeLinkedPages: false,
useSitemap: true,
customIncludeUrlList: "",
crawlTimeoutMinutes: null,
crawlTimeoutMinutes: 0,
maxCrawlSizeGB: 0,
behaviorTimeoutSeconds: null,
pageLoadTimeoutSeconds: null,
@ -488,12 +488,12 @@ export class CrawlConfigEditor extends LiteElement {
formState.autoAddCollections = this.initialWorkflow.autoAddCollections;
}
const secondsToMinutes = (value: any, fallback: number | null) => {
const secondsToMinutes = (value: any, fallback: number = 0) => {
if (typeof value === "number" && value > 0) return value / 60;
return fallback;
};
const bytesToGB = (value: any, fallback: number | null) => {
const bytesToGB = (value: any, fallback: number = 0) => {
if (typeof value === "number" && value > 0)
return Math.floor(value / BYTES_PER_GB);
return fallback;
@ -2147,12 +2147,8 @@ https://archiveweb.page/images/${"logo.svg"}`}
profileid: this.formState.browserProfile?.id || "",
runNow: this.formState.runNow || this.formState.scheduleType === "now",
schedule: this.formState.scheduleType === "cron" ? this.utcSchedule : "",
crawlTimeout: this.formState.crawlTimeoutMinutes
? this.formState.crawlTimeoutMinutes * 60
: null,
maxCrawlSize: this.formState.maxCrawlSizeGB
? this.formState.maxCrawlSizeGB * BYTES_PER_GB
: null,
crawlTimeout: this.formState.crawlTimeoutMinutes * 60,
maxCrawlSize: this.formState.maxCrawlSizeGB * BYTES_PER_GB,
tags: this.formState.tags,
autoAddCollections: this.formState.autoAddCollections,
config: {