Support for setting scale in crawlconfig (#148)
* backend: scale support: - add 'scale' field to crawlconfig - support updating 'scale' field in crawlconfig patch - add constraint for crawlconfig and crawl scale (currently 1-3)
This commit is contained in:
parent
ca626f3c0a
commit
ee68a2f64e
@ -16,6 +16,10 @@ from users import User
|
|||||||
from invites import InvitePending, InviteToArchiveRequest, UserRole
|
from invites import InvitePending, InviteToArchiveRequest, UserRole
|
||||||
|
|
||||||
|
|
||||||
|
# crawl scale for constraint
|
||||||
|
MAX_CRAWL_SCALE = 3
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class UpdateRole(InviteToArchiveRequest):
|
class UpdateRole(InviteToArchiveRequest):
|
||||||
"""Update existing role for user"""
|
"""Update existing role for user"""
|
||||||
|
@ -9,11 +9,11 @@ import asyncio
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import pymongo
|
import pymongo
|
||||||
from pydantic import BaseModel, UUID4
|
from pydantic import BaseModel, UUID4, conint
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
|
||||||
from users import User
|
from users import User
|
||||||
from archives import Archive
|
from archives import Archive, MAX_CRAWL_SCALE
|
||||||
|
|
||||||
from db import BaseMongoModel
|
from db import BaseMongoModel
|
||||||
|
|
||||||
@ -85,7 +85,7 @@ class CrawlConfigIn(BaseModel):
|
|||||||
colls: Optional[List[str]] = []
|
colls: Optional[List[str]] = []
|
||||||
|
|
||||||
crawlTimeout: Optional[int] = 0
|
crawlTimeout: Optional[int] = 0
|
||||||
parallel: Optional[int] = 1
|
scale: Optional[conint(ge=1, le=MAX_CRAWL_SCALE)] = 1
|
||||||
|
|
||||||
oldId: Optional[UUID4]
|
oldId: Optional[UUID4]
|
||||||
|
|
||||||
@ -105,7 +105,7 @@ class CrawlConfig(BaseMongoModel):
|
|||||||
colls: Optional[List[str]] = []
|
colls: Optional[List[str]] = []
|
||||||
|
|
||||||
crawlTimeout: Optional[int] = 0
|
crawlTimeout: Optional[int] = 0
|
||||||
parallel: Optional[int] = 1
|
scale: Optional[conint(ge=1, le=MAX_CRAWL_SCALE)] = 1
|
||||||
|
|
||||||
aid: UUID4
|
aid: UUID4
|
||||||
|
|
||||||
@ -142,11 +142,12 @@ class CrawlConfigsResponse(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class UpdateScheduleOrName(BaseModel):
|
class UpdateCrawlConfig(BaseModel):
|
||||||
""" Update crawl config name or crawl schedule """
|
""" Update crawl config name or crawl schedule """
|
||||||
|
|
||||||
name: Optional[str]
|
name: Optional[str]
|
||||||
schedule: Optional[str]
|
schedule: Optional[str]
|
||||||
|
scale: Optional[conint(ge=1, le=MAX_CRAWL_SCALE)]
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@ -216,9 +217,13 @@ class CrawlConfigOps:
|
|||||||
|
|
||||||
return result, new_name
|
return result, new_name
|
||||||
|
|
||||||
async def update_crawl_config(self, cid: uuid.UUID, update: UpdateScheduleOrName):
|
async def update_crawl_config(self, cid: uuid.UUID, update: UpdateCrawlConfig):
|
||||||
""" Update name and/or schedule for an existing crawl config """
|
""" Update name, scale and/or schedule for an existing crawl config """
|
||||||
if update.schedule is None and update.name is None:
|
|
||||||
|
# set update query
|
||||||
|
query = update.dict(exclude_unset=True, exclude_defaults=True, exclude_none=True)
|
||||||
|
|
||||||
|
if len(query) == 0:
|
||||||
raise HTTPException(status_code=400, detail="no_update_data")
|
raise HTTPException(status_code=400, detail="no_update_data")
|
||||||
|
|
||||||
# update schedule in crawl manager first
|
# update schedule in crawl manager first
|
||||||
@ -233,15 +238,6 @@ class CrawlConfigOps:
|
|||||||
status_code=404, detail=f"Crawl Config '{cid}' not found"
|
status_code=404, detail=f"Crawl Config '{cid}' not found"
|
||||||
)
|
)
|
||||||
|
|
||||||
# set update query
|
|
||||||
query = {}
|
|
||||||
|
|
||||||
if update.schedule is not None:
|
|
||||||
query["schedule"] = update.schedule
|
|
||||||
|
|
||||||
if update.name is not None:
|
|
||||||
query["name"] = update.name
|
|
||||||
|
|
||||||
# update in db
|
# update in db
|
||||||
if not await self.crawl_configs.find_one_and_update(
|
if not await self.crawl_configs.find_one_and_update(
|
||||||
{"_id": cid, "inactive": {"$ne": True}}, {"$set": query}
|
{"_id": cid, "inactive": {"$ne": True}}, {"$set": query}
|
||||||
@ -426,7 +422,7 @@ def init_crawl_config_api(
|
|||||||
|
|
||||||
@router.patch("/{cid}", dependencies=[Depends(archive_crawl_dep)])
|
@router.patch("/{cid}", dependencies=[Depends(archive_crawl_dep)])
|
||||||
async def update_crawl_config(
|
async def update_crawl_config(
|
||||||
update: UpdateScheduleOrName,
|
update: UpdateCrawlConfig,
|
||||||
cid: str,
|
cid: str,
|
||||||
):
|
):
|
||||||
return await ops.update_crawl_config(uuid.UUID(cid), update)
|
return await ops.update_crawl_config(uuid.UUID(cid), update)
|
||||||
@ -434,7 +430,7 @@ def init_crawl_config_api(
|
|||||||
# depcreated: to remove in favor of general patch
|
# depcreated: to remove in favor of general patch
|
||||||
@router.patch("/{cid}/schedule", dependencies=[Depends(archive_crawl_dep)])
|
@router.patch("/{cid}/schedule", dependencies=[Depends(archive_crawl_dep)])
|
||||||
async def update_crawl_schedule(
|
async def update_crawl_schedule(
|
||||||
update: UpdateScheduleOrName,
|
update: UpdateCrawlConfig,
|
||||||
cid: str,
|
cid: str,
|
||||||
):
|
):
|
||||||
return await ops.update_crawl_config(uuid.UUID(cid), update)
|
return await ops.update_crawl_config(uuid.UUID(cid), update)
|
||||||
|
@ -9,12 +9,12 @@ from typing import Optional, List, Dict, Union
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from fastapi import Depends, Request, HTTPException
|
from fastapi import Depends, Request, HTTPException
|
||||||
from pydantic import BaseModel, UUID4
|
from pydantic import BaseModel, UUID4, conint
|
||||||
import pymongo
|
import pymongo
|
||||||
import aioredis
|
import aioredis
|
||||||
|
|
||||||
from db import BaseMongoModel
|
from db import BaseMongoModel
|
||||||
from archives import Archive
|
from archives import Archive, MAX_CRAWL_SCALE
|
||||||
from storages import get_presigned_url
|
from storages import get_presigned_url
|
||||||
|
|
||||||
|
|
||||||
@ -29,7 +29,7 @@ class DeleteCrawlList(BaseModel):
|
|||||||
class CrawlScale(BaseModel):
|
class CrawlScale(BaseModel):
|
||||||
""" scale the crawl to N parallel containers """
|
""" scale the crawl to N parallel containers """
|
||||||
|
|
||||||
scale: int = 1
|
scale: conint(ge=1, le=MAX_CRAWL_SCALE) = 1
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@ -70,7 +70,7 @@ class Crawl(BaseMongoModel):
|
|||||||
|
|
||||||
state: str
|
state: str
|
||||||
|
|
||||||
scale: int = 1
|
scale: conint(ge=1, le=MAX_CRAWL_SCALE) = 1
|
||||||
completions: Optional[int] = 0
|
completions: Optional[int] = 0
|
||||||
|
|
||||||
stats: Optional[Dict[str, str]]
|
stats: Optional[Dict[str, str]]
|
||||||
|
@ -201,7 +201,7 @@ class K8SManager:
|
|||||||
labels,
|
labels,
|
||||||
annotations,
|
annotations,
|
||||||
crawlconfig.crawlTimeout,
|
crawlconfig.crawlTimeout,
|
||||||
crawlconfig.parallel,
|
crawlconfig.scale,
|
||||||
)
|
)
|
||||||
|
|
||||||
spec = client.V1beta1CronJobSpec(
|
spec = client.V1beta1CronJobSpec(
|
||||||
@ -468,9 +468,6 @@ class K8SManager:
|
|||||||
if not job or job.metadata.labels["btrix.archive"] != aid:
|
if not job or job.metadata.labels["btrix.archive"] != aid:
|
||||||
return "Invalid Crawled"
|
return "Invalid Crawled"
|
||||||
|
|
||||||
if parallelism < 1 or parallelism > 10:
|
|
||||||
return "Invalid Scale: Must be between 1 and 10"
|
|
||||||
|
|
||||||
job.spec.parallelism = parallelism
|
job.spec.parallelism = parallelism
|
||||||
|
|
||||||
await self.batch_api.patch_namespaced_job(
|
await self.batch_api.patch_namespaced_job(
|
||||||
|
Loading…
Reference in New Issue
Block a user