diff --git a/backend/archives.py b/backend/archives.py index 12cc507b..87136796 100644 --- a/backend/archives.py +++ b/backend/archives.py @@ -16,6 +16,10 @@ from users import User from invites import InvitePending, InviteToArchiveRequest, UserRole +# crawl scale for constraint +MAX_CRAWL_SCALE = 3 + + # ============================================================================ class UpdateRole(InviteToArchiveRequest): """Update existing role for user""" diff --git a/backend/crawlconfigs.py b/backend/crawlconfigs.py index 65725f5b..5e4099ed 100644 --- a/backend/crawlconfigs.py +++ b/backend/crawlconfigs.py @@ -9,11 +9,11 @@ import asyncio from datetime import datetime import pymongo -from pydantic import BaseModel, UUID4 +from pydantic import BaseModel, UUID4, conint from fastapi import APIRouter, Depends, HTTPException from users import User -from archives import Archive +from archives import Archive, MAX_CRAWL_SCALE from db import BaseMongoModel @@ -85,7 +85,7 @@ class CrawlConfigIn(BaseModel): colls: Optional[List[str]] = [] crawlTimeout: Optional[int] = 0 - parallel: Optional[int] = 1 + scale: Optional[conint(ge=1, le=MAX_CRAWL_SCALE)] = 1 oldId: Optional[UUID4] @@ -105,7 +105,7 @@ class CrawlConfig(BaseMongoModel): colls: Optional[List[str]] = [] crawlTimeout: Optional[int] = 0 - parallel: Optional[int] = 1 + scale: Optional[conint(ge=1, le=MAX_CRAWL_SCALE)] = 1 aid: UUID4 @@ -142,11 +142,12 @@ class CrawlConfigsResponse(BaseModel): # ============================================================================ -class UpdateScheduleOrName(BaseModel): +class UpdateCrawlConfig(BaseModel): """ Update crawl config name or crawl schedule """ name: Optional[str] schedule: Optional[str] + scale: Optional[conint(ge=1, le=MAX_CRAWL_SCALE)] # ============================================================================ @@ -216,9 +217,13 @@ class CrawlConfigOps: return result, new_name - async def update_crawl_config(self, cid: uuid.UUID, update: UpdateScheduleOrName): - """ Update name and/or schedule for an existing crawl config """ - if update.schedule is None and update.name is None: + async def update_crawl_config(self, cid: uuid.UUID, update: UpdateCrawlConfig): + """ Update name, scale and/or schedule for an existing crawl config """ + + # set update query + query = update.dict(exclude_unset=True, exclude_defaults=True, exclude_none=True) + + if len(query) == 0: raise HTTPException(status_code=400, detail="no_update_data") # update schedule in crawl manager first @@ -233,15 +238,6 @@ class CrawlConfigOps: status_code=404, detail=f"Crawl Config '{cid}' not found" ) - # set update query - query = {} - - if update.schedule is not None: - query["schedule"] = update.schedule - - if update.name is not None: - query["name"] = update.name - # update in db if not await self.crawl_configs.find_one_and_update( {"_id": cid, "inactive": {"$ne": True}}, {"$set": query} @@ -426,7 +422,7 @@ def init_crawl_config_api( @router.patch("/{cid}", dependencies=[Depends(archive_crawl_dep)]) async def update_crawl_config( - update: UpdateScheduleOrName, + update: UpdateCrawlConfig, cid: str, ): return await ops.update_crawl_config(uuid.UUID(cid), update) @@ -434,7 +430,7 @@ def init_crawl_config_api( # depcreated: to remove in favor of general patch @router.patch("/{cid}/schedule", dependencies=[Depends(archive_crawl_dep)]) async def update_crawl_schedule( - update: UpdateScheduleOrName, + update: UpdateCrawlConfig, cid: str, ): return await ops.update_crawl_config(uuid.UUID(cid), update) diff --git a/backend/crawls.py b/backend/crawls.py index 91955420..692a2d04 100644 --- a/backend/crawls.py +++ b/backend/crawls.py @@ -9,12 +9,12 @@ from typing import Optional, List, Dict, Union from datetime import datetime from fastapi import Depends, Request, HTTPException -from pydantic import BaseModel, UUID4 +from pydantic import BaseModel, UUID4, conint import pymongo import aioredis from db import BaseMongoModel -from archives import Archive +from archives import Archive, MAX_CRAWL_SCALE from storages import get_presigned_url @@ -29,7 +29,7 @@ class DeleteCrawlList(BaseModel): class CrawlScale(BaseModel): """ scale the crawl to N parallel containers """ - scale: int = 1 + scale: conint(ge=1, le=MAX_CRAWL_SCALE) = 1 # ============================================================================ @@ -70,7 +70,7 @@ class Crawl(BaseMongoModel): state: str - scale: int = 1 + scale: conint(ge=1, le=MAX_CRAWL_SCALE) = 1 completions: Optional[int] = 0 stats: Optional[Dict[str, str]] diff --git a/backend/k8sman.py b/backend/k8sman.py index f0938dea..db39731e 100644 --- a/backend/k8sman.py +++ b/backend/k8sman.py @@ -201,7 +201,7 @@ class K8SManager: labels, annotations, crawlconfig.crawlTimeout, - crawlconfig.parallel, + crawlconfig.scale, ) spec = client.V1beta1CronJobSpec( @@ -468,9 +468,6 @@ class K8SManager: if not job or job.metadata.labels["btrix.archive"] != aid: return "Invalid Crawled" - if parallelism < 1 or parallelism > 10: - return "Invalid Scale: Must be between 1 and 10" - job.spec.parallelism = parallelism await self.batch_api.patch_namespaced_job(