Support for setting scale in crawlconfig (#148)

* backend: scale support:
- add 'scale' field to crawlconfig
- support updating 'scale' field in crawlconfig patch
- add constraint for crawlconfig and crawl scale (currently 1-3)
This commit is contained in:
Ilya Kreymer 2022-02-20 11:27:47 -08:00 committed by GitHub
parent ca626f3c0a
commit ee68a2f64e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 24 additions and 27 deletions

View File

@ -16,6 +16,10 @@ from users import User
from invites import InvitePending, InviteToArchiveRequest, UserRole from invites import InvitePending, InviteToArchiveRequest, UserRole
# crawl scale for constraint
MAX_CRAWL_SCALE = 3
# ============================================================================ # ============================================================================
class UpdateRole(InviteToArchiveRequest): class UpdateRole(InviteToArchiveRequest):
"""Update existing role for user""" """Update existing role for user"""

View File

@ -9,11 +9,11 @@ import asyncio
from datetime import datetime from datetime import datetime
import pymongo import pymongo
from pydantic import BaseModel, UUID4 from pydantic import BaseModel, UUID4, conint
from fastapi import APIRouter, Depends, HTTPException from fastapi import APIRouter, Depends, HTTPException
from users import User from users import User
from archives import Archive from archives import Archive, MAX_CRAWL_SCALE
from db import BaseMongoModel from db import BaseMongoModel
@ -85,7 +85,7 @@ class CrawlConfigIn(BaseModel):
colls: Optional[List[str]] = [] colls: Optional[List[str]] = []
crawlTimeout: Optional[int] = 0 crawlTimeout: Optional[int] = 0
parallel: Optional[int] = 1 scale: Optional[conint(ge=1, le=MAX_CRAWL_SCALE)] = 1
oldId: Optional[UUID4] oldId: Optional[UUID4]
@ -105,7 +105,7 @@ class CrawlConfig(BaseMongoModel):
colls: Optional[List[str]] = [] colls: Optional[List[str]] = []
crawlTimeout: Optional[int] = 0 crawlTimeout: Optional[int] = 0
parallel: Optional[int] = 1 scale: Optional[conint(ge=1, le=MAX_CRAWL_SCALE)] = 1
aid: UUID4 aid: UUID4
@ -142,11 +142,12 @@ class CrawlConfigsResponse(BaseModel):
# ============================================================================ # ============================================================================
class UpdateScheduleOrName(BaseModel): class UpdateCrawlConfig(BaseModel):
""" Update crawl config name or crawl schedule """ """ Update crawl config name or crawl schedule """
name: Optional[str] name: Optional[str]
schedule: Optional[str] schedule: Optional[str]
scale: Optional[conint(ge=1, le=MAX_CRAWL_SCALE)]
# ============================================================================ # ============================================================================
@ -216,9 +217,13 @@ class CrawlConfigOps:
return result, new_name return result, new_name
async def update_crawl_config(self, cid: uuid.UUID, update: UpdateScheduleOrName): async def update_crawl_config(self, cid: uuid.UUID, update: UpdateCrawlConfig):
""" Update name and/or schedule for an existing crawl config """ """ Update name, scale and/or schedule for an existing crawl config """
if update.schedule is None and update.name is None:
# set update query
query = update.dict(exclude_unset=True, exclude_defaults=True, exclude_none=True)
if len(query) == 0:
raise HTTPException(status_code=400, detail="no_update_data") raise HTTPException(status_code=400, detail="no_update_data")
# update schedule in crawl manager first # update schedule in crawl manager first
@ -233,15 +238,6 @@ class CrawlConfigOps:
status_code=404, detail=f"Crawl Config '{cid}' not found" status_code=404, detail=f"Crawl Config '{cid}' not found"
) )
# set update query
query = {}
if update.schedule is not None:
query["schedule"] = update.schedule
if update.name is not None:
query["name"] = update.name
# update in db # update in db
if not await self.crawl_configs.find_one_and_update( if not await self.crawl_configs.find_one_and_update(
{"_id": cid, "inactive": {"$ne": True}}, {"$set": query} {"_id": cid, "inactive": {"$ne": True}}, {"$set": query}
@ -426,7 +422,7 @@ def init_crawl_config_api(
@router.patch("/{cid}", dependencies=[Depends(archive_crawl_dep)]) @router.patch("/{cid}", dependencies=[Depends(archive_crawl_dep)])
async def update_crawl_config( async def update_crawl_config(
update: UpdateScheduleOrName, update: UpdateCrawlConfig,
cid: str, cid: str,
): ):
return await ops.update_crawl_config(uuid.UUID(cid), update) return await ops.update_crawl_config(uuid.UUID(cid), update)
@ -434,7 +430,7 @@ def init_crawl_config_api(
# depcreated: to remove in favor of general patch # depcreated: to remove in favor of general patch
@router.patch("/{cid}/schedule", dependencies=[Depends(archive_crawl_dep)]) @router.patch("/{cid}/schedule", dependencies=[Depends(archive_crawl_dep)])
async def update_crawl_schedule( async def update_crawl_schedule(
update: UpdateScheduleOrName, update: UpdateCrawlConfig,
cid: str, cid: str,
): ):
return await ops.update_crawl_config(uuid.UUID(cid), update) return await ops.update_crawl_config(uuid.UUID(cid), update)

View File

@ -9,12 +9,12 @@ from typing import Optional, List, Dict, Union
from datetime import datetime from datetime import datetime
from fastapi import Depends, Request, HTTPException from fastapi import Depends, Request, HTTPException
from pydantic import BaseModel, UUID4 from pydantic import BaseModel, UUID4, conint
import pymongo import pymongo
import aioredis import aioredis
from db import BaseMongoModel from db import BaseMongoModel
from archives import Archive from archives import Archive, MAX_CRAWL_SCALE
from storages import get_presigned_url from storages import get_presigned_url
@ -29,7 +29,7 @@ class DeleteCrawlList(BaseModel):
class CrawlScale(BaseModel): class CrawlScale(BaseModel):
""" scale the crawl to N parallel containers """ """ scale the crawl to N parallel containers """
scale: int = 1 scale: conint(ge=1, le=MAX_CRAWL_SCALE) = 1
# ============================================================================ # ============================================================================
@ -70,7 +70,7 @@ class Crawl(BaseMongoModel):
state: str state: str
scale: int = 1 scale: conint(ge=1, le=MAX_CRAWL_SCALE) = 1
completions: Optional[int] = 0 completions: Optional[int] = 0
stats: Optional[Dict[str, str]] stats: Optional[Dict[str, str]]

View File

@ -201,7 +201,7 @@ class K8SManager:
labels, labels,
annotations, annotations,
crawlconfig.crawlTimeout, crawlconfig.crawlTimeout,
crawlconfig.parallel, crawlconfig.scale,
) )
spec = client.V1beta1CronJobSpec( spec = client.V1beta1CronJobSpec(
@ -468,9 +468,6 @@ class K8SManager:
if not job or job.metadata.labels["btrix.archive"] != aid: if not job or job.metadata.labels["btrix.archive"] != aid:
return "Invalid Crawled" return "Invalid Crawled"
if parallelism < 1 or parallelism > 10:
return "Invalid Scale: Must be between 1 and 10"
job.spec.parallelism = parallelism job.spec.parallelism = parallelism
await self.batch_api.patch_namespaced_job( await self.batch_api.patch_namespaced_job(