add a crawling defaults on the Org to allow setting certain crawl workflow fields as defaults: (#2031)
- add POST /orgs/<id>/defaults/crawling API to update all defaults (defaults unset are cleared) - defaults returned as 'crawlingDefaults' object on Org, if set - fixes #2016 --------- Co-authored-by: Emma Segal-Grossman <hi@emma.cafe>
This commit is contained in:
parent
0e16d526c0
commit
04c8b50423
@ -496,6 +496,30 @@ class UpdateCrawlConfig(BaseModel):
|
||||
config: Optional[RawCrawlConfig] = None
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class CrawlConfigDefaults(BaseModel):
|
||||
"""Crawl Config Org Defaults"""
|
||||
|
||||
crawlTimeout: Optional[int] = None
|
||||
maxCrawlSize: Optional[int] = None
|
||||
|
||||
pageLoadTimeout: Optional[int] = None
|
||||
postLoadDelay: Optional[int] = None
|
||||
behaviorTimeout: Optional[int] = None
|
||||
pageExtraDelay: Optional[int] = None
|
||||
|
||||
blockAds: Optional[bool] = None
|
||||
|
||||
profileid: Optional[UUID] = None
|
||||
crawlerChannel: Optional[str] = None
|
||||
|
||||
lang: Optional[str] = None
|
||||
|
||||
userAgent: Optional[str] = None
|
||||
|
||||
exclude: Optional[List[str]] = None
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class CrawlConfigAddedResponse(BaseModel):
|
||||
"""Response model for adding crawlconfigs"""
|
||||
@ -1353,6 +1377,8 @@ class OrgOut(BaseMongoModel):
|
||||
|
||||
subscription: Optional[Subscription] = None
|
||||
|
||||
crawlingDefaults: Optional[CrawlConfigDefaults] = None
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class Organization(BaseMongoModel):
|
||||
@ -1404,6 +1430,8 @@ class Organization(BaseMongoModel):
|
||||
|
||||
subscription: Optional[Subscription] = None
|
||||
|
||||
crawlingDefaults: Optional[CrawlConfigDefaults] = None
|
||||
|
||||
def is_owner(self, user):
|
||||
"""Check if user is owner"""
|
||||
return self._is_auth(user, UserRole.OWNER)
|
||||
|
@ -55,6 +55,7 @@ from .models import (
|
||||
PaginatedOrgOutResponse,
|
||||
CrawlConfig,
|
||||
Crawl,
|
||||
CrawlConfigDefaults,
|
||||
UploadedCrawl,
|
||||
ConfigRevision,
|
||||
Profile,
|
||||
@ -586,6 +587,17 @@ class OrgOps:
|
||||
)
|
||||
return res is not None
|
||||
|
||||
async def update_crawling_defaults(
|
||||
self, org: Organization, defaults: CrawlConfigDefaults
|
||||
):
|
||||
"""Update crawling defaults"""
|
||||
res = await self.orgs.find_one_and_update(
|
||||
{"_id": org.id},
|
||||
{"$set": {"crawlingDefaults": defaults.model_dump()}},
|
||||
return_document=ReturnDocument.AFTER,
|
||||
)
|
||||
return res is not None
|
||||
|
||||
async def add_user_by_invite(
|
||||
self,
|
||||
invite: InvitePending,
|
||||
@ -1535,6 +1547,16 @@ def init_orgs_api(
|
||||
|
||||
return {"updated": True}
|
||||
|
||||
@router.post(
|
||||
"/defaults/crawling", tags=["organizations"], response_model=UpdatedResponse
|
||||
)
|
||||
async def update_crawling_defaults(
|
||||
defaults: CrawlConfigDefaults,
|
||||
org: Organization = Depends(org_owner_dep),
|
||||
):
|
||||
await ops.update_crawling_defaults(org, defaults)
|
||||
return {"updated": True}
|
||||
|
||||
@router.post(
|
||||
"/recalculate-storage", tags=["organizations"], response_model=SuccessResponse
|
||||
)
|
||||
|
@ -56,6 +56,24 @@ def test_get_org_crawler(crawler_auth_headers, default_org_id):
|
||||
assert data.get("users") == {}
|
||||
|
||||
|
||||
def test_update_org_crawling_defaults(admin_auth_headers, default_org_id):
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/defaults/crawling",
|
||||
headers=admin_auth_headers,
|
||||
json={"maxCrawlSize": 200000, "lang": "fr"},
|
||||
)
|
||||
|
||||
assert r.status_code == 200
|
||||
assert r.json()["updated"] == True
|
||||
|
||||
r = requests.get(f"{API_PREFIX}/orgs/{default_org_id}", headers=admin_auth_headers)
|
||||
|
||||
data = r.json()
|
||||
assert data["crawlingDefaults"]
|
||||
assert data["crawlingDefaults"]["maxCrawlSize"] == 200000
|
||||
assert data["crawlingDefaults"]["lang"] == "fr"
|
||||
|
||||
|
||||
def test_rename_org(admin_auth_headers, default_org_id):
|
||||
UPDATED_NAME = "updated org name"
|
||||
UPDATED_SLUG = "updated-org-name"
|
||||
|
Loading…
Reference in New Issue
Block a user