add a crawling defaults on the Org to allow setting certain crawl workflow fields as defaults: (#2031)
- add POST /orgs/<id>/defaults/crawling API to update all defaults (defaults unset are cleared) - defaults returned as 'crawlingDefaults' object on Org, if set - fixes #2016 --------- Co-authored-by: Emma Segal-Grossman <hi@emma.cafe>
This commit is contained in:
		
							parent
							
								
									0e16d526c0
								
							
						
					
					
						commit
						04c8b50423
					
				| @ -496,6 +496,30 @@ class UpdateCrawlConfig(BaseModel): | ||||
|     config: Optional[RawCrawlConfig] = None | ||||
| 
 | ||||
| 
 | ||||
| # ============================================================================ | ||||
| class CrawlConfigDefaults(BaseModel): | ||||
|     """Crawl Config Org Defaults""" | ||||
| 
 | ||||
|     crawlTimeout: Optional[int] = None | ||||
|     maxCrawlSize: Optional[int] = None | ||||
| 
 | ||||
|     pageLoadTimeout: Optional[int] = None | ||||
|     postLoadDelay: Optional[int] = None | ||||
|     behaviorTimeout: Optional[int] = None | ||||
|     pageExtraDelay: Optional[int] = None | ||||
| 
 | ||||
|     blockAds: Optional[bool] = None | ||||
| 
 | ||||
|     profileid: Optional[UUID] = None | ||||
|     crawlerChannel: Optional[str] = None | ||||
| 
 | ||||
|     lang: Optional[str] = None | ||||
| 
 | ||||
|     userAgent: Optional[str] = None | ||||
| 
 | ||||
|     exclude: Optional[List[str]] = None | ||||
| 
 | ||||
| 
 | ||||
| # ============================================================================ | ||||
| class CrawlConfigAddedResponse(BaseModel): | ||||
|     """Response model for adding crawlconfigs""" | ||||
| @ -1353,6 +1377,8 @@ class OrgOut(BaseMongoModel): | ||||
| 
 | ||||
|     subscription: Optional[Subscription] = None | ||||
| 
 | ||||
|     crawlingDefaults: Optional[CrawlConfigDefaults] = None | ||||
| 
 | ||||
| 
 | ||||
| # ============================================================================ | ||||
| class Organization(BaseMongoModel): | ||||
| @ -1404,6 +1430,8 @@ class Organization(BaseMongoModel): | ||||
| 
 | ||||
|     subscription: Optional[Subscription] = None | ||||
| 
 | ||||
|     crawlingDefaults: Optional[CrawlConfigDefaults] = None | ||||
| 
 | ||||
|     def is_owner(self, user): | ||||
|         """Check if user is owner""" | ||||
|         return self._is_auth(user, UserRole.OWNER) | ||||
|  | ||||
| @ -55,6 +55,7 @@ from .models import ( | ||||
|     PaginatedOrgOutResponse, | ||||
|     CrawlConfig, | ||||
|     Crawl, | ||||
|     CrawlConfigDefaults, | ||||
|     UploadedCrawl, | ||||
|     ConfigRevision, | ||||
|     Profile, | ||||
| @ -586,6 +587,17 @@ class OrgOps: | ||||
|         ) | ||||
|         return res is not None | ||||
| 
 | ||||
|     async def update_crawling_defaults( | ||||
|         self, org: Organization, defaults: CrawlConfigDefaults | ||||
|     ): | ||||
|         """Update crawling defaults""" | ||||
|         res = await self.orgs.find_one_and_update( | ||||
|             {"_id": org.id}, | ||||
|             {"$set": {"crawlingDefaults": defaults.model_dump()}}, | ||||
|             return_document=ReturnDocument.AFTER, | ||||
|         ) | ||||
|         return res is not None | ||||
| 
 | ||||
|     async def add_user_by_invite( | ||||
|         self, | ||||
|         invite: InvitePending, | ||||
| @ -1535,6 +1547,16 @@ def init_orgs_api( | ||||
| 
 | ||||
|         return {"updated": True} | ||||
| 
 | ||||
|     @router.post( | ||||
|         "/defaults/crawling", tags=["organizations"], response_model=UpdatedResponse | ||||
|     ) | ||||
|     async def update_crawling_defaults( | ||||
|         defaults: CrawlConfigDefaults, | ||||
|         org: Organization = Depends(org_owner_dep), | ||||
|     ): | ||||
|         await ops.update_crawling_defaults(org, defaults) | ||||
|         return {"updated": True} | ||||
| 
 | ||||
|     @router.post( | ||||
|         "/recalculate-storage", tags=["organizations"], response_model=SuccessResponse | ||||
|     ) | ||||
|  | ||||
| @ -56,6 +56,24 @@ def test_get_org_crawler(crawler_auth_headers, default_org_id): | ||||
|     assert data.get("users") == {} | ||||
| 
 | ||||
| 
 | ||||
| def test_update_org_crawling_defaults(admin_auth_headers, default_org_id): | ||||
|     r = requests.post( | ||||
|         f"{API_PREFIX}/orgs/{default_org_id}/defaults/crawling", | ||||
|         headers=admin_auth_headers, | ||||
|         json={"maxCrawlSize": 200000, "lang": "fr"}, | ||||
|     ) | ||||
| 
 | ||||
|     assert r.status_code == 200 | ||||
|     assert r.json()["updated"] == True | ||||
| 
 | ||||
|     r = requests.get(f"{API_PREFIX}/orgs/{default_org_id}", headers=admin_auth_headers) | ||||
| 
 | ||||
|     data = r.json() | ||||
|     assert data["crawlingDefaults"] | ||||
|     assert data["crawlingDefaults"]["maxCrawlSize"] == 200000 | ||||
|     assert data["crawlingDefaults"]["lang"] == "fr" | ||||
| 
 | ||||
| 
 | ||||
| def test_rename_org(admin_auth_headers, default_org_id): | ||||
|     UPDATED_NAME = "updated org name" | ||||
|     UPDATED_SLUG = "updated-org-name" | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user