Fix for cronjob skipping response (#1976)

If a cronjob is disabled, the operator should quickly return a success
value so that the job can be terminated.
Was previously returning an incorrect response, causing disabled
cronjobs to not be cleaned up. Add proper typing to always return correct response
This commit is contained in:
Ilya Kreymer 2024-07-29 12:24:18 -07:00 committed by GitHub
parent 551660bb62
commit 96691a33fa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 38 additions and 27 deletions

View File

@ -1,11 +1,11 @@
""" Operator handler for crawl CronJobs """ """ Operator handler for crawl CronJobs """
from uuid import UUID from uuid import UUID
from typing import Optional, Any from typing import Optional
import yaml import yaml
from btrixcloud.utils import to_k8s_date, dt_now from btrixcloud.utils import to_k8s_date, dt_now
from .models import MCDecoratorSyncData, CJS from .models import MCDecoratorSyncData, CJS, MCDecoratorSyncResponse
from .baseoperator import BaseOperator from .baseoperator import BaseOperator
from ..models import CrawlConfig from ..models import CrawlConfig
@ -20,12 +20,14 @@ class CronJobOperator(BaseOperator):
"""init routes for crawl CronJob decorator""" """init routes for crawl CronJob decorator"""
@app.post("/op/cronjob/sync") @app.post("/op/cronjob/sync")
async def mc_sync_cronjob_crawls(data: MCDecoratorSyncData): async def mc_sync_cronjob_crawls(
data: MCDecoratorSyncData,
) -> MCDecoratorSyncResponse:
return await self.sync_cronjob_crawl(data) return await self.sync_cronjob_crawl(data)
def get_finished_response( def get_finished_response(
self, metadata: dict[str, str], set_status=True, finished: Optional[str] = None self, metadata: dict[str, str], set_status=True, finished: Optional[str] = None
): ) -> MCDecoratorSyncResponse:
"""get final response to indicate cronjob created job is finished""" """get final response to indicate cronjob created job is finished"""
if not finished: if not finished:
@ -40,12 +42,12 @@ class CronJobOperator(BaseOperator):
"completionTime": finished, "completionTime": finished,
} }
return { return MCDecoratorSyncResponse(
"attachments": [], attachments=[],
# set on job to match default behavior when job finishes # set on job to match default behavior when job finishes
"annotations": {"finished": finished}, annotations={"finished": finished},
"status": status, status=status,
} )
# pylint: disable=too-many-arguments # pylint: disable=too-many-arguments
async def make_new_crawljob( async def make_new_crawljob(
@ -56,7 +58,7 @@ class CronJobOperator(BaseOperator):
crawl_id: str, crawl_id: str,
metadata: dict[str, str], metadata: dict[str, str],
state: Optional[str], state: Optional[str],
) -> list[dict[str, Any]]: ) -> MCDecoratorSyncResponse:
"""declare new CrawlJob from cid, based on db data""" """declare new CrawlJob from cid, based on db data"""
# cronjob doesn't exist yet # cronjob doesn't exist yet
crawlconfig: CrawlConfig crawlconfig: CrawlConfig
@ -125,9 +127,11 @@ class CronJobOperator(BaseOperator):
profile_filename=profile_filename or "", profile_filename=profile_filename or "",
) )
return list(yaml.safe_load_all(crawljob)) return MCDecoratorSyncResponse(attachments=list(yaml.safe_load_all(crawljob)))
async def sync_cronjob_crawl(self, data: MCDecoratorSyncData): async def sync_cronjob_crawl(
self, data: MCDecoratorSyncData
) -> MCDecoratorSyncResponse:
"""create crawljobs from a job object spawned by cronjob""" """create crawljobs from a job object spawned by cronjob"""
metadata = data.object["metadata"] metadata = data.object["metadata"]
@ -161,7 +165,7 @@ class CronJobOperator(BaseOperator):
crawljob_id = f"crawljob-{crawl_id}" crawljob_id = f"crawljob-{crawl_id}"
if crawljob_id not in crawljobs: if crawljob_id not in crawljobs:
attachments = await self.make_new_crawljob( response = await self.make_new_crawljob(
UUID(cid), UUID(cid),
UUID(oid) if oid else None, UUID(oid) if oid else None,
UUID(userid) if userid else None, UUID(userid) if userid else None,
@ -170,16 +174,14 @@ class CronJobOperator(BaseOperator):
actual_state, actual_state,
) )
else: else:
# just return existing crawljob, after removing annotation # just return existing crawljob, filter metadata, remove status and annotations
# should use OnDelete policy so should just not change crawljob anyway
crawljob = crawljobs[crawljob_id] crawljob = crawljobs[crawljob_id]
try: crawljob["metadata"] = {
del crawljob["metadata"]["annotations"][ "name": crawljob["metadata"]["name"],
"metacontroller.k8s.io/last-applied-configuration" "labels": crawljob["metadata"].get("labels"),
] }
except KeyError: crawljob.pop("status", "")
pass
attachments = [crawljob] response = MCDecoratorSyncResponse(attachments=[crawljob])
return {"attachments": attachments} return response

View File

@ -2,7 +2,7 @@
from collections import defaultdict from collections import defaultdict
from uuid import UUID from uuid import UUID
from typing import Optional, DefaultDict, Literal, Annotated from typing import Optional, DefaultDict, Literal, Annotated, Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from kubernetes.utils import parse_quantity from kubernetes.utils import parse_quantity
from btrixcloud.models import StorageRef, TYPE_ALL_CRAWL_STATES from btrixcloud.models import StorageRef, TYPE_ALL_CRAWL_STATES
@ -53,6 +53,15 @@ class MCDecoratorSyncData(BaseModel):
finalizing: bool = False finalizing: bool = False
# ============================================================================
class MCDecoratorSyncResponse(BaseModel):
"""Response model for decoratorcontroller sync api"""
attachments: list[dict[str, Any]]
status: Optional[dict[str, Any]] = None
annotations: Optional[dict[str, str]] = None
# ============================================================================ # ============================================================================
class CrawlSpec(BaseModel): class CrawlSpec(BaseModel):
"""spec from k8s CrawlJob object""" """spec from k8s CrawlJob object"""

View File

@ -5,7 +5,7 @@ metadata:
name: crawljobs-operator name: crawljobs-operator
spec: spec:
generateSelector: false generateSelector: false
resyncPeriodSeconds: {{ .Values.operator_resync_seconds | default 10 }} resyncPeriodSeconds: {{ .Values.operator_resync_seconds | default 30 }}
parentResource: parentResource:
apiVersion: btrix.cloud/v1 apiVersion: btrix.cloud/v1
resource: crawljobs resource: crawljobs
@ -82,7 +82,7 @@ kind: DecoratorController
metadata: metadata:
name: cron-crawljobs-operator name: cron-crawljobs-operator
spec: spec:
resyncPeriodSeconds: 30 resyncPeriodSeconds: 60
resources: resources:
- apiVersion: batch/v1 - apiVersion: batch/v1
resource: jobs resource: jobs
@ -111,7 +111,7 @@ kind: DecoratorController
metadata: metadata:
name: background-job-operator name: background-job-operator
spec: spec:
resyncPeriodSeconds: 30 resyncPeriodSeconds: 60
resources: resources:
- apiVersion: batch/v1 - apiVersion: batch/v1
resource: jobs resource: jobs