The operator class has gotten fairly large, this is a first pass in refactoring operator.py into a submodule instead, with multiple operator instances which handle different types of objects. - The main k8s interface has been split into K8sOpApi which extends K8sApi and is shared across all operators. - Each operator extends BaseOperator which also has an instance of K8sOpApi - The CrawlOperator is still the bulk of the functionality, but will likely be further refactored to support QA jobs --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
		
			
				
	
	
		
			199 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			199 lines
		
	
	
		
			5.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """ Operator Models """
 | |
| 
 | |
| from collections import defaultdict
 | |
| from uuid import UUID
 | |
| from typing import Optional, DefaultDict
 | |
| from pydantic import BaseModel, Field
 | |
| from kubernetes.utils import parse_quantity
 | |
| from btrixcloud.models import StorageRef
 | |
| 
 | |
| 
 | |
| BTRIX_API = "btrix.cloud/v1"
 | |
| 
 | |
| CMAP = "ConfigMap.v1"
 | |
| PVC = "PersistentVolumeClaim.v1"
 | |
| POD = "Pod.v1"
 | |
| CJS = f"CrawlJob.{BTRIX_API}"
 | |
| 
 | |
| 
 | |
| # ============================================================================
 | |
| class MCBaseRequest(BaseModel):
 | |
|     """base metacontroller model, used for customize hook"""
 | |
| 
 | |
|     parent: dict
 | |
|     controller: dict
 | |
| 
 | |
| 
 | |
| # ============================================================================
 | |
| class MCSyncData(MCBaseRequest):
 | |
|     """sync / finalize metacontroller model"""
 | |
| 
 | |
|     children: dict
 | |
|     related: dict
 | |
|     finalizing: bool = False
 | |
| 
 | |
| 
 | |
| # ============================================================================
 | |
| class MCDecoratorSyncData(BaseModel):
 | |
|     """sync for decoratorcontroller model"""
 | |
| 
 | |
|     object: dict
 | |
|     controller: dict
 | |
| 
 | |
|     attachments: dict
 | |
|     related: dict
 | |
|     finalizing: bool = False
 | |
| 
 | |
| 
 | |
| # ============================================================================
 | |
| class CrawlSpec(BaseModel):
 | |
|     """spec from k8s CrawlJob object"""
 | |
| 
 | |
|     id: str
 | |
|     cid: UUID
 | |
|     oid: UUID
 | |
|     scale: int = 1
 | |
|     storage: StorageRef
 | |
|     started: str
 | |
|     crawler_channel: str
 | |
|     stopping: bool = False
 | |
|     scheduled: bool = False
 | |
|     timeout: int = 0
 | |
|     max_crawl_size: int = 0
 | |
| 
 | |
| 
 | |
| # ============================================================================
 | |
| class PodResourcePercentage(BaseModel):
 | |
|     """Resource usage percentage ratios"""
 | |
| 
 | |
|     memory: float = 0
 | |
|     cpu: float = 0
 | |
|     storage: float = 0
 | |
| 
 | |
| 
 | |
| # ============================================================================
 | |
| class PodResources(BaseModel):
 | |
|     """Pod Resources"""
 | |
| 
 | |
|     memory: int = 0
 | |
|     cpu: float = 0
 | |
|     storage: int = 0
 | |
| 
 | |
|     def __init__(self, *a, **kw):
 | |
|         if "memory" in kw:
 | |
|             kw["memory"] = int(parse_quantity(kw["memory"]))
 | |
|         if "cpu" in kw:
 | |
|             kw["cpu"] = float(parse_quantity(kw["cpu"]))
 | |
|         if "storage" in kw:
 | |
|             kw["storage"] = int(parse_quantity(kw["storage"]))
 | |
|         super().__init__(*a, **kw)
 | |
| 
 | |
| 
 | |
| # ============================================================================
 | |
| class PodInfo(BaseModel):
 | |
|     """Aggregate pod status info held in CrawlJob"""
 | |
| 
 | |
|     exitTime: Optional[str] = None
 | |
|     exitCode: Optional[int] = None
 | |
|     isNewExit: Optional[bool] = Field(default=None, exclude=True)
 | |
|     reason: Optional[str] = None
 | |
| 
 | |
|     allocated: PodResources = PodResources()
 | |
|     used: PodResources = PodResources()
 | |
| 
 | |
|     newCpu: Optional[int] = None
 | |
|     newMemory: Optional[int] = None
 | |
| 
 | |
|     def dict(self, *a, **kw):
 | |
|         res = super().dict(*a, **kw)
 | |
|         percent = {
 | |
|             "memory": self.get_percent_memory(),
 | |
|             "cpu": self.get_percent_cpu(),
 | |
|             "storage": self.get_percent_storage(),
 | |
|         }
 | |
|         res["percent"] = percent
 | |
|         return res
 | |
| 
 | |
|     def get_percent_memory(self) -> float:
 | |
|         """compute percent memory used"""
 | |
|         return (
 | |
|             float(self.used.memory) / float(self.allocated.memory)
 | |
|             if self.allocated.memory
 | |
|             else 0
 | |
|         )
 | |
| 
 | |
|     def get_percent_cpu(self) -> float:
 | |
|         """compute percent cpu used"""
 | |
|         return (
 | |
|             float(self.used.cpu) / float(self.allocated.cpu)
 | |
|             if self.allocated.cpu
 | |
|             else 0
 | |
|         )
 | |
| 
 | |
|     def get_percent_storage(self) -> float:
 | |
|         """compute percent storage used"""
 | |
|         return (
 | |
|             float(self.used.storage) / float(self.allocated.storage)
 | |
|             if self.allocated.storage
 | |
|             else 0
 | |
|         )
 | |
| 
 | |
|     def should_restart_pod(self):
 | |
|         """return true if pod should be restarted"""
 | |
|         if self.newMemory and self.newMemory != self.allocated.memory:
 | |
|             return True
 | |
| 
 | |
|         if self.newCpu and self.newCpu != self.allocated.cpu:
 | |
|             return True
 | |
| 
 | |
|         return False
 | |
| 
 | |
| 
 | |
| # ============================================================================
 | |
| # pylint: disable=invalid-name
 | |
| class CrawlStatus(BaseModel):
 | |
|     """status from k8s CrawlJob object"""
 | |
| 
 | |
|     state: str = "starting"
 | |
|     pagesFound: int = 0
 | |
|     pagesDone: int = 0
 | |
|     size: int = 0
 | |
|     # human readable size string
 | |
|     sizeHuman: str = ""
 | |
|     scale: int = 1
 | |
|     filesAdded: int = 0
 | |
|     filesAddedSize: int = 0
 | |
|     finished: Optional[str] = None
 | |
|     stopping: bool = False
 | |
|     stopReason: Optional[str] = None
 | |
|     initRedis: bool = False
 | |
|     crawlerImage: Optional[str] = None
 | |
|     lastActiveTime: str = ""
 | |
|     podStatus: Optional[DefaultDict[str, PodInfo]] = defaultdict(
 | |
|         lambda: PodInfo()  # pylint: disable=unnecessary-lambda
 | |
|     )
 | |
|     # placeholder for pydantic 2.0 -- will require this version
 | |
|     # podStatus: Optional[
 | |
|     #    DefaultDict[str, Annotated[PodInfo, Field(default_factory=PodInfo)]]
 | |
|     # ]
 | |
|     restartTime: Optional[str]
 | |
|     canceled: bool = False
 | |
| 
 | |
|     # updated on pod exits and at regular interval
 | |
|     # Crawl Execution Time -- time all crawler pods have been running
 | |
|     # used to track resource usage and enforce execution minutes limit
 | |
|     crawlExecTime: int = 0
 | |
| 
 | |
|     # Elapsed Exec Time -- time crawl has been running in at least one pod
 | |
|     # used for crawl timeouts
 | |
|     elapsedCrawlTime: int = 0
 | |
| 
 | |
|     # last exec time update
 | |
|     lastUpdatedTime: str = ""
 | |
| 
 | |
|     # any pods exited
 | |
|     anyCrawlPodNewExit: Optional[bool] = Field(default=False, exclude=True)
 | |
| 
 | |
|     # don't include in status, use by metacontroller
 | |
|     resync_after: Optional[int] = Field(default=None, exclude=True)
 |