Support for setting scale in crawlconfig (#148)

* backend: scale support:
- add 'scale' field to crawlconfig
- support updating 'scale' field in crawlconfig patch
- add constraint for crawlconfig and crawl scale (currently 1-3)
This commit is contained in:
Ilya Kreymer 2022-02-20 11:27:47 -08:00 committed by GitHub
parent ca626f3c0a
commit ee68a2f64e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 24 additions and 27 deletions

View File

@ -16,6 +16,10 @@ from users import User
from invites import InvitePending, InviteToArchiveRequest, UserRole
# crawl scale for constraint
MAX_CRAWL_SCALE = 3
# ============================================================================
class UpdateRole(InviteToArchiveRequest):
"""Update existing role for user"""

View File

@ -9,11 +9,11 @@ import asyncio
from datetime import datetime
import pymongo
from pydantic import BaseModel, UUID4
from pydantic import BaseModel, UUID4, conint
from fastapi import APIRouter, Depends, HTTPException
from users import User
from archives import Archive
from archives import Archive, MAX_CRAWL_SCALE
from db import BaseMongoModel
@ -85,7 +85,7 @@ class CrawlConfigIn(BaseModel):
colls: Optional[List[str]] = []
crawlTimeout: Optional[int] = 0
parallel: Optional[int] = 1
scale: Optional[conint(ge=1, le=MAX_CRAWL_SCALE)] = 1
oldId: Optional[UUID4]
@ -105,7 +105,7 @@ class CrawlConfig(BaseMongoModel):
colls: Optional[List[str]] = []
crawlTimeout: Optional[int] = 0
parallel: Optional[int] = 1
scale: Optional[conint(ge=1, le=MAX_CRAWL_SCALE)] = 1
aid: UUID4
@ -142,11 +142,12 @@ class CrawlConfigsResponse(BaseModel):
# ============================================================================
class UpdateScheduleOrName(BaseModel):
class UpdateCrawlConfig(BaseModel):
""" Update crawl config name or crawl schedule """
name: Optional[str]
schedule: Optional[str]
scale: Optional[conint(ge=1, le=MAX_CRAWL_SCALE)]
# ============================================================================
@ -216,9 +217,13 @@ class CrawlConfigOps:
return result, new_name
async def update_crawl_config(self, cid: uuid.UUID, update: UpdateScheduleOrName):
""" Update name and/or schedule for an existing crawl config """
if update.schedule is None and update.name is None:
async def update_crawl_config(self, cid: uuid.UUID, update: UpdateCrawlConfig):
""" Update name, scale and/or schedule for an existing crawl config """
# set update query
query = update.dict(exclude_unset=True, exclude_defaults=True, exclude_none=True)
if len(query) == 0:
raise HTTPException(status_code=400, detail="no_update_data")
# update schedule in crawl manager first
@ -233,15 +238,6 @@ class CrawlConfigOps:
status_code=404, detail=f"Crawl Config '{cid}' not found"
)
# set update query
query = {}
if update.schedule is not None:
query["schedule"] = update.schedule
if update.name is not None:
query["name"] = update.name
# update in db
if not await self.crawl_configs.find_one_and_update(
{"_id": cid, "inactive": {"$ne": True}}, {"$set": query}
@ -426,7 +422,7 @@ def init_crawl_config_api(
@router.patch("/{cid}", dependencies=[Depends(archive_crawl_dep)])
async def update_crawl_config(
update: UpdateScheduleOrName,
update: UpdateCrawlConfig,
cid: str,
):
return await ops.update_crawl_config(uuid.UUID(cid), update)
@ -434,7 +430,7 @@ def init_crawl_config_api(
# depcreated: to remove in favor of general patch
@router.patch("/{cid}/schedule", dependencies=[Depends(archive_crawl_dep)])
async def update_crawl_schedule(
update: UpdateScheduleOrName,
update: UpdateCrawlConfig,
cid: str,
):
return await ops.update_crawl_config(uuid.UUID(cid), update)

View File

@ -9,12 +9,12 @@ from typing import Optional, List, Dict, Union
from datetime import datetime
from fastapi import Depends, Request, HTTPException
from pydantic import BaseModel, UUID4
from pydantic import BaseModel, UUID4, conint
import pymongo
import aioredis
from db import BaseMongoModel
from archives import Archive
from archives import Archive, MAX_CRAWL_SCALE
from storages import get_presigned_url
@ -29,7 +29,7 @@ class DeleteCrawlList(BaseModel):
class CrawlScale(BaseModel):
""" scale the crawl to N parallel containers """
scale: int = 1
scale: conint(ge=1, le=MAX_CRAWL_SCALE) = 1
# ============================================================================
@ -70,7 +70,7 @@ class Crawl(BaseMongoModel):
state: str
scale: int = 1
scale: conint(ge=1, le=MAX_CRAWL_SCALE) = 1
completions: Optional[int] = 0
stats: Optional[Dict[str, str]]

View File

@ -201,7 +201,7 @@ class K8SManager:
labels,
annotations,
crawlconfig.crawlTimeout,
crawlconfig.parallel,
crawlconfig.scale,
)
spec = client.V1beta1CronJobSpec(
@ -468,9 +468,6 @@ class K8SManager:
if not job or job.metadata.labels["btrix.archive"] != aid:
return "Invalid Crawled"
if parallelism < 1 or parallelism > 10:
return "Invalid Scale: Must be between 1 and 10"
job.spec.parallelism = parallelism
await self.batch_api.patch_namespaced_job(