Support for setting scale in crawlconfig (#148)
* backend: scale support: - add 'scale' field to crawlconfig - support updating 'scale' field in crawlconfig patch - add constraint for crawlconfig and crawl scale (currently 1-3)
This commit is contained in:
parent
ca626f3c0a
commit
ee68a2f64e
@ -16,6 +16,10 @@ from users import User
|
||||
from invites import InvitePending, InviteToArchiveRequest, UserRole
|
||||
|
||||
|
||||
# crawl scale for constraint
|
||||
MAX_CRAWL_SCALE = 3
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class UpdateRole(InviteToArchiveRequest):
|
||||
"""Update existing role for user"""
|
||||
|
@ -9,11 +9,11 @@ import asyncio
|
||||
from datetime import datetime
|
||||
|
||||
import pymongo
|
||||
from pydantic import BaseModel, UUID4
|
||||
from pydantic import BaseModel, UUID4, conint
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
|
||||
from users import User
|
||||
from archives import Archive
|
||||
from archives import Archive, MAX_CRAWL_SCALE
|
||||
|
||||
from db import BaseMongoModel
|
||||
|
||||
@ -85,7 +85,7 @@ class CrawlConfigIn(BaseModel):
|
||||
colls: Optional[List[str]] = []
|
||||
|
||||
crawlTimeout: Optional[int] = 0
|
||||
parallel: Optional[int] = 1
|
||||
scale: Optional[conint(ge=1, le=MAX_CRAWL_SCALE)] = 1
|
||||
|
||||
oldId: Optional[UUID4]
|
||||
|
||||
@ -105,7 +105,7 @@ class CrawlConfig(BaseMongoModel):
|
||||
colls: Optional[List[str]] = []
|
||||
|
||||
crawlTimeout: Optional[int] = 0
|
||||
parallel: Optional[int] = 1
|
||||
scale: Optional[conint(ge=1, le=MAX_CRAWL_SCALE)] = 1
|
||||
|
||||
aid: UUID4
|
||||
|
||||
@ -142,11 +142,12 @@ class CrawlConfigsResponse(BaseModel):
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class UpdateScheduleOrName(BaseModel):
|
||||
class UpdateCrawlConfig(BaseModel):
|
||||
""" Update crawl config name or crawl schedule """
|
||||
|
||||
name: Optional[str]
|
||||
schedule: Optional[str]
|
||||
scale: Optional[conint(ge=1, le=MAX_CRAWL_SCALE)]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
@ -216,9 +217,13 @@ class CrawlConfigOps:
|
||||
|
||||
return result, new_name
|
||||
|
||||
async def update_crawl_config(self, cid: uuid.UUID, update: UpdateScheduleOrName):
|
||||
""" Update name and/or schedule for an existing crawl config """
|
||||
if update.schedule is None and update.name is None:
|
||||
async def update_crawl_config(self, cid: uuid.UUID, update: UpdateCrawlConfig):
|
||||
""" Update name, scale and/or schedule for an existing crawl config """
|
||||
|
||||
# set update query
|
||||
query = update.dict(exclude_unset=True, exclude_defaults=True, exclude_none=True)
|
||||
|
||||
if len(query) == 0:
|
||||
raise HTTPException(status_code=400, detail="no_update_data")
|
||||
|
||||
# update schedule in crawl manager first
|
||||
@ -233,15 +238,6 @@ class CrawlConfigOps:
|
||||
status_code=404, detail=f"Crawl Config '{cid}' not found"
|
||||
)
|
||||
|
||||
# set update query
|
||||
query = {}
|
||||
|
||||
if update.schedule is not None:
|
||||
query["schedule"] = update.schedule
|
||||
|
||||
if update.name is not None:
|
||||
query["name"] = update.name
|
||||
|
||||
# update in db
|
||||
if not await self.crawl_configs.find_one_and_update(
|
||||
{"_id": cid, "inactive": {"$ne": True}}, {"$set": query}
|
||||
@ -426,7 +422,7 @@ def init_crawl_config_api(
|
||||
|
||||
@router.patch("/{cid}", dependencies=[Depends(archive_crawl_dep)])
|
||||
async def update_crawl_config(
|
||||
update: UpdateScheduleOrName,
|
||||
update: UpdateCrawlConfig,
|
||||
cid: str,
|
||||
):
|
||||
return await ops.update_crawl_config(uuid.UUID(cid), update)
|
||||
@ -434,7 +430,7 @@ def init_crawl_config_api(
|
||||
# depcreated: to remove in favor of general patch
|
||||
@router.patch("/{cid}/schedule", dependencies=[Depends(archive_crawl_dep)])
|
||||
async def update_crawl_schedule(
|
||||
update: UpdateScheduleOrName,
|
||||
update: UpdateCrawlConfig,
|
||||
cid: str,
|
||||
):
|
||||
return await ops.update_crawl_config(uuid.UUID(cid), update)
|
||||
|
@ -9,12 +9,12 @@ from typing import Optional, List, Dict, Union
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi import Depends, Request, HTTPException
|
||||
from pydantic import BaseModel, UUID4
|
||||
from pydantic import BaseModel, UUID4, conint
|
||||
import pymongo
|
||||
import aioredis
|
||||
|
||||
from db import BaseMongoModel
|
||||
from archives import Archive
|
||||
from archives import Archive, MAX_CRAWL_SCALE
|
||||
from storages import get_presigned_url
|
||||
|
||||
|
||||
@ -29,7 +29,7 @@ class DeleteCrawlList(BaseModel):
|
||||
class CrawlScale(BaseModel):
|
||||
""" scale the crawl to N parallel containers """
|
||||
|
||||
scale: int = 1
|
||||
scale: conint(ge=1, le=MAX_CRAWL_SCALE) = 1
|
||||
|
||||
|
||||
# ============================================================================
|
||||
@ -70,7 +70,7 @@ class Crawl(BaseMongoModel):
|
||||
|
||||
state: str
|
||||
|
||||
scale: int = 1
|
||||
scale: conint(ge=1, le=MAX_CRAWL_SCALE) = 1
|
||||
completions: Optional[int] = 0
|
||||
|
||||
stats: Optional[Dict[str, str]]
|
||||
|
@ -201,7 +201,7 @@ class K8SManager:
|
||||
labels,
|
||||
annotations,
|
||||
crawlconfig.crawlTimeout,
|
||||
crawlconfig.parallel,
|
||||
crawlconfig.scale,
|
||||
)
|
||||
|
||||
spec = client.V1beta1CronJobSpec(
|
||||
@ -468,9 +468,6 @@ class K8SManager:
|
||||
if not job or job.metadata.labels["btrix.archive"] != aid:
|
||||
return "Invalid Crawled"
|
||||
|
||||
if parallelism < 1 or parallelism > 10:
|
||||
return "Invalid Scale: Must be between 1 and 10"
|
||||
|
||||
job.spec.parallelism = parallelism
|
||||
|
||||
await self.batch_api.patch_namespaced_job(
|
||||
|
Loading…
Reference in New Issue
Block a user