- use python-on-whale to use docker cli api directly, creating docker stack for each crawl or profile browser - configure storages via storages.yaml secret - add crawl_job, profile_job, splitting into base and k8s/swarm implementations - split manager into base crawlmanager and k8s/swarm implementations - swarm: load initial scale from db to avoid modifying fixed configs, in k8s, load from configmap - swarm: support scheduled jobs via swarm-cronjob service - remove docker dependencies (aiodocker, apscheduler, scheduling) - swarm: when using local minio, expose via /data/ route in nginx via extra include (in k8s, include dir is empty and routing handled via ingress) - k8s: cleanup minio chart: move init containers to minio.yaml - swarm: stateful set implementation to be consistent with k8s scaling: - don't use service replicas, - create a unique service with '-N' appended and allocate unique volume for each replica - allows crawl containers to be restarted w/o losing data - add volume pruning background service, as volumes can be deleted only after service shuts down fully - watch: fully simplify routing, route via replica index instead of ip for both k8s and swarm - rename network btrix-cloud-net -> btrix-net to avoid conflict with compose network
		
			
				
	
	
		
			67 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			67 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """ entry point for job which manages a browser (eg. for profile creation) """
 | |
| 
 | |
| import os
 | |
| import signal
 | |
| import asyncio
 | |
| 
 | |
| from abc import ABC, abstractmethod
 | |
| 
 | |
| 
 | |
| # =============================================================================
 | |
| class ProfileJob(ABC):
 | |
|     """ Browser run job """
 | |
| 
 | |
|     job_id = None
 | |
| 
 | |
|     def __init__(self):
 | |
|         super().__init__()
 | |
| 
 | |
|         self.loop = asyncio.get_event_loop()
 | |
| 
 | |
|         params = {
 | |
|             "storage_name": os.environ.get("STORAGE_NAME"),
 | |
|             "storage_path": os.environ.get("STORE_PATH") or "",
 | |
|             "url": os.environ.get("START_URL"),
 | |
|             "profile_filename": os.environ.get("PROFILE_PATH") or "",
 | |
|         }
 | |
| 
 | |
|         self.idle_timeout = int(os.environ["IDLE_TIMEOUT"])
 | |
| 
 | |
|         self.loop.add_signal_handler(signal.SIGUSR1, self.ping_handler)
 | |
|         self.loop.add_signal_handler(signal.SIGALRM, self.timeout_handler)
 | |
|         self.loop.add_signal_handler(signal.SIGTERM, self.exit_handler)
 | |
| 
 | |
|         self.loop.create_task(self.async_init("profilebrowser.yaml", params))
 | |
| 
 | |
|     async def async_init(self, template, params):
 | |
|         """ async init, overridable by subclass """
 | |
|         await self.init_job_objects(template, params)
 | |
| 
 | |
|     @abstractmethod
 | |
|     async def init_job_objects(self, filename, params):
 | |
|         """ base for creating objects """
 | |
| 
 | |
|     @abstractmethod
 | |
|     async def delete_job_objects(self, job_id):
 | |
|         """ base for deleting objects """
 | |
| 
 | |
|     def ping_handler(self, *_args):
 | |
|         """ handle custom signal as ping, extend shutdown timer """
 | |
| 
 | |
|         print(f"signal received, extending timer {self.idle_timeout} secs", flush=True)
 | |
| 
 | |
|         signal.setitimer(signal.ITIMER_REAL, self.idle_timeout, 0)
 | |
| 
 | |
|     def timeout_handler(self):
 | |
|         """ handle SIGTERM  """
 | |
|         print("sigterm: shutting down browser...", flush=True)
 | |
|         self._do_exit()
 | |
| 
 | |
|     def exit_handler(self):
 | |
|         """ handle SIGALRM """
 | |
|         print("sigalrm: timer expired ending idle browser...", flush=True)
 | |
|         self._do_exit()
 | |
| 
 | |
|     def _do_exit(self):
 | |
|         self.loop.create_task(self.delete_job_objects(f"browser={self.job_id}"))
 |