* rename config values - api -> backend - nginx -> frontend * job pods: - set job_pull_policy from api_pull_policy (same as backend image) - default to Always, but can be overridden for local deployment (same as backend image) typo fix: CRAWL_NAMESPACE -> CRAWLER_NAMESPACE (part of #491) ansible: set default label to :latest instead of :dev for
		
			
				
	
	
		
			97 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			97 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """ base k8s job driver """
 | |
| 
 | |
| import os
 | |
| import asyncio
 | |
| import sys
 | |
| import traceback
 | |
| 
 | |
| import yaml
 | |
| 
 | |
| from fastapi.templating import Jinja2Templates
 | |
| 
 | |
| from .utils import create_from_yaml, get_templates_dir
 | |
| from .k8sapi import K8sAPI
 | |
| 
 | |
| 
 | |
| # =============================================================================
 | |
| # pylint: disable=too-many-instance-attributes,bare-except,broad-except
 | |
| class K8SJobMixin(K8sAPI):
 | |
|     """Crawl Job State"""
 | |
| 
 | |
|     def __init__(self):
 | |
|         self.namespace = os.environ.get("CRAWLER_NAMESPACE") or "crawlers"
 | |
|         self.config_file = "/config/config.yaml"
 | |
| 
 | |
|         self.job_id = os.environ.get("JOB_ID")
 | |
|         self.orig_job_id = self.job_id
 | |
|         if self.job_id.startswith("job-"):
 | |
|             self.job_id = self.job_id[4:]
 | |
| 
 | |
|         self.templates = Jinja2Templates(directory=get_templates_dir())
 | |
|         super().__init__()
 | |
| 
 | |
|     async def init_job_objects(self, template, extra_params=None):
 | |
|         """init k8s objects from specified template with given extra_params"""
 | |
|         with open(self.config_file, encoding="utf-8") as fh_config:
 | |
|             params = yaml.safe_load(fh_config)
 | |
| 
 | |
|         params["id"] = self.job_id
 | |
| 
 | |
|         if extra_params:
 | |
|             params.update(extra_params)
 | |
| 
 | |
|         data = self.templates.env.get_template(template).render(params)
 | |
| 
 | |
|         try:
 | |
|             await create_from_yaml(self.api_client, data, namespace=self.namespace)
 | |
|         except Exception:
 | |
|             traceback.print_exc()
 | |
|             return
 | |
| 
 | |
|     async def delete_job_objects(self, selector):
 | |
|         """delete crawl stateful sets, services and pvcs"""
 | |
|         kwargs = {
 | |
|             "namespace": self.namespace,
 | |
|             "label_selector": selector,
 | |
|         }
 | |
| 
 | |
|         statefulsets = await self.apps_api.list_namespaced_stateful_set(**kwargs)
 | |
| 
 | |
|         for statefulset in statefulsets.items:
 | |
|             print(f"Deleting service {statefulset.spec.service_name}")
 | |
|             await self.core_api.delete_namespaced_service(
 | |
|                 name=statefulset.spec.service_name,
 | |
|                 namespace=self.namespace,
 | |
|                 propagation_policy="Foreground",
 | |
|             )
 | |
|             print(f"Deleting statefulset {statefulset.metadata.name}")
 | |
|             await self.apps_api.delete_namespaced_stateful_set(
 | |
|                 name=statefulset.metadata.name,
 | |
|                 namespace=self.namespace,
 | |
|                 propagation_policy="Foreground",
 | |
|             )
 | |
| 
 | |
|         # until delete policy is supported
 | |
|         try:
 | |
|             await self.core_api.delete_collection_namespaced_persistent_volume_claim(
 | |
|                 **kwargs
 | |
|             )
 | |
|         except Exception as exc:
 | |
|             print("PVC Delete failed", exc, flush=True)
 | |
| 
 | |
|         # delete our own job!
 | |
|         await self.batch_api.delete_namespaced_job(
 | |
|             name=self.orig_job_id,
 | |
|             namespace=self.namespace,
 | |
|             grace_period_seconds=30,
 | |
|             propagation_policy="Foreground",
 | |
|         )
 | |
| 
 | |
|         asyncio.create_task(self.exit_soon(5))
 | |
| 
 | |
|     async def exit_soon(self, timeout):
 | |
|         """exit soon"""
 | |
|         print("k8s objects deleted, job complete, exiting", flush=True)
 | |
|         await asyncio.sleep(timeout)
 | |
|         sys.exit(0)
 |