diff --git a/backend/btrixcloud/crawlconfigs.py b/backend/btrixcloud/crawlconfigs.py index f0b165d4..0d5fe15a 100644 --- a/backend/btrixcloud/crawlconfigs.py +++ b/backend/btrixcloud/crawlconfigs.py @@ -219,6 +219,10 @@ class CrawlConfigOps: storage_quota_reached = await self.org_ops.storage_quota_reached(org.id) exec_mins_quota_reached = await self.org_ops.exec_mins_quota_reached(org.id) + if org.readOnly: + run_now = False + print(f"Org {org.id} set to read-only", flush=True) + if storage_quota_reached: run_now = False print(f"Storage quota exceeded for org {org.id}", flush=True) @@ -843,6 +847,9 @@ class CrawlConfigOps: except: await self.readd_configmap(crawlconfig, org) + if org.readOnly: + raise HTTPException(status_code=403, detail="org_set_to_read_only") + if await self.org_ops.storage_quota_reached(org.id): raise HTTPException(status_code=403, detail="storage_quota_reached") diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py index 0e850c85..ee963c5a 100644 --- a/backend/btrixcloud/crawls.py +++ b/backend/btrixcloud/crawls.py @@ -738,6 +738,10 @@ class CrawlOps(BaseCrawlOps): crawl = await self.get_crawl(crawl_id, org) + # ensure org execution is allowed + if org.readOnly: + raise HTTPException(status_code=403, detail="org_set_to_read_only") + # can only QA finished crawls if not crawl.finished: raise HTTPException(status_code=400, detail="crawl_not_finished") diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 87e3ffa4..e0005967 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -971,6 +971,14 @@ class OrgQuotaUpdate(BaseModel): update: OrgQuotas +# ============================================================================ +class OrgReadOnlyUpdate(BaseModel): + """Organization readonly update""" + + readOnly: bool + readOnlyReason: Optional[str] = None + + # ============================================================================ class OrgWebhookUrls(BaseModel): """Organization webhook URLs""" @@ -1025,6 +1033,9 @@ class OrgOut(BaseMongoModel): webhookUrls: Optional[OrgWebhookUrls] = OrgWebhookUrls() + readOnly: Optional[bool] + readOnlyReason: Optional[str] + # ============================================================================ class Organization(BaseMongoModel): @@ -1069,6 +1080,9 @@ class Organization(BaseMongoModel): origin: Optional[AnyHttpUrl] = None + readOnly: Optional[bool] = False + readOnlyReason: Optional[str] = None + def is_owner(self, user): """Check if user is owner""" return self._is_auth(user, UserRole.OWNER) diff --git a/backend/btrixcloud/operator/cronjobs.py b/backend/btrixcloud/operator/cronjobs.py index 445e86fb..52f0a14b 100644 --- a/backend/btrixcloud/operator/cronjobs.py +++ b/backend/btrixcloud/operator/cronjobs.py @@ -1,9 +1,10 @@ """ Operator handler for crawl CronJobs """ from uuid import UUID +from typing import Optional import yaml -from btrixcloud.utils import to_k8s_date +from btrixcloud.utils import to_k8s_date, dt_now from .models import MCBaseRequest, MCDecoratorSyncData, CJS, CMAP from .baseoperator import BaseOperator @@ -38,6 +39,30 @@ class CronJobOperator(BaseOperator): ] } + def get_finished_response( + self, metadata: dict[str, str], set_status=True, finished: Optional[str] = None + ): + """get final response to indicate cronjob created job is finished""" + + if not finished: + finished = to_k8s_date(dt_now()) + + status = None + # set status on decorated job to indicate that its finished + if set_status: + status = { + "succeeded": 1, + "startTime": metadata.get("creationTimestamp"), + "completionTime": finished, + } + + return { + "attachments": [], + # set on job to match default behavior when job finishes + "annotations": {"finished": finished}, + "status": status, + } + async def sync_cronjob_crawl(self, data: MCDecoratorSyncData): """create crawljobs from a job object spawned by cronjob""" @@ -52,21 +77,14 @@ class CronJobOperator(BaseOperator): crawl_id, is_qa=False ) if finished: - status = None + finished_str = to_k8s_date(finished) + set_status = False # mark job as completed if not data.object["status"].get("succeeded"): print("Cron Job Complete!", finished) - status = { - "succeeded": 1, - "startTime": metadata.get("creationTimestamp"), - "completionTime": to_k8s_date(finished), - } + set_status = True - return { - "attachments": [], - "annotations": {"finished": finished}, - "status": status, - } + return self.get_finished_response(metadata, set_status, finished_str) configmap = data.related[CMAP][f"crawl-config-{cid}"]["data"] @@ -88,16 +106,22 @@ class CronJobOperator(BaseOperator): print( f"error: no crawlconfig {cid}. skipping scheduled job. old cronjob left over?" ) - return {"attachments": []} + return self.get_finished_response(metadata) # db create user = await self.user_ops.get_by_id(UUID(userid)) if not user: print(f"error: missing user for id {userid}") - return {"attachments": []} + return self.get_finished_response(metadata) warc_prefix = self.crawl_config_ops.get_warc_prefix(org, crawlconfig) + if org.readOnly: + print( + f"org {org.id} set to read-only. skipping scheduled crawl for workflow {cid}" + ) + return self.get_finished_response(metadata) + await self.crawl_config_ops.add_new_crawl( crawl_id, crawlconfig, diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index ca4c7aed..9d2100cd 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -25,6 +25,7 @@ from .models import ( StorageRef, OrgQuotas, OrgQuotaUpdate, + OrgReadOnlyUpdate, OrgMetrics, OrgWebhookUrls, CreateOrg, @@ -679,6 +680,16 @@ class OrgOps: slug_id_map[org["_id"]] = org["slug"] return slug_id_map + async def update_read_only(self, org: Organization, update: OrgReadOnlyUpdate): + """Set readOnly field for Organization""" + if update.readOnly is False: + # Set reason to empty string if readOnly is false + update.readOnlyReason = "" + + query = update.dict(exclude_unset=True) + + return await self.orgs.find_one_and_update({"_id": org.id}, {"$set": query}) + # ============================================================================ # pylint: disable=too-many-statements @@ -817,6 +828,19 @@ def init_orgs_api(app, mdb, user_manager, invites, user_dep): return {"updated": True} + @router.post("/read-only", tags=["organizations"]) + async def update_read_only( + update: OrgReadOnlyUpdate, + org: Organization = Depends(org_owner_dep), + user: User = Depends(user_dep), + ): + if not user.is_superuser: + raise HTTPException(status_code=403, detail="Not Allowed") + + await ops.update_read_only(org, update) + + return {"updated": True} + @router.post("/event-webhook-urls", tags=["organizations"]) async def update_event_webhook_urls( urls: OrgWebhookUrls, diff --git a/backend/test/test_org.py b/backend/test/test_org.py index fb94987b..6939d520 100644 --- a/backend/test/test_org.py +++ b/backend/test/test_org.py @@ -486,3 +486,58 @@ def test_get_org_slug_lookup_non_superadmin(crawler_auth_headers): r = requests.get(f"{API_PREFIX}/orgs/slug-lookup", headers=crawler_auth_headers) assert r.status_code == 403 assert r.json()["detail"] == "Not Allowed" + + +def test_update_read_only(admin_auth_headers, default_org_id): + r = requests.get(f"{API_PREFIX}/orgs/{default_org_id}", headers=admin_auth_headers) + data = r.json() + assert data["readOnly"] in (False, None) + assert data["readOnlyReason"] in (None, "") + + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/read-only", + headers=admin_auth_headers, + json={"readOnly": True, "readOnlyReason": "Payment suspended"}, + ) + assert r.json()["updated"] + + r = requests.get(f"{API_PREFIX}/orgs/{default_org_id}", headers=admin_auth_headers) + data = r.json() + assert data["readOnly"] is True + assert data["readOnlyReason"] == "Payment suspended" + + # Try to start crawls, should fail + crawl_data = { + "runNow": True, + "name": "Read Only Test Crawl", + "description": "Should not run now", + "tags": [], + "config": { + "seeds": [{"url": "https://webrecorder.net/", "depth": 1}], + "exclude": "community", + }, + } + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/", + headers=admin_auth_headers, + json=crawl_data, + ) + data = r.json() + + assert data["added"] + assert data["id"] + assert data["run_now_job"] is None + + # Reset back to False, future crawls in tests should run fine + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/read-only", + headers=admin_auth_headers, + json={"readOnly": False}, + ) + assert r.json()["updated"] + + r = requests.get(f"{API_PREFIX}/orgs/{default_org_id}", headers=admin_auth_headers) + data = r.json() + assert data["readOnly"] is False + # Test that reason is unset when readOnly is set to false, even implicitly + assert data["readOnlyReason"] == ""