browsertrix/backend/btrixcloud/profile_job.py
Ilya Kreymer 30bda8c75d
VNC-Based Profile Browser (#433)
* profile browser vnc support + fixes:
- switch profile browser rendering to use VNC
- frontend: add @novnc/novnc as dependency, create separate bundle novnc.js to load into vnc browser (to avoid loading from each container)
- frontend: update proxy paths to proxy websocket, index page to crawler
- frontend: allow browser profiles in all browsers, remove browser compatibility check
- frontend: update webpack dev config, apply prettier
- frontend: node version fix
- backend: get vncpassword, build new URL for proxying to crawler iframe
- backend: fix profile / crawl job pull policy from 'Always' -> 'Never', should use existing image for job
- backend: fix kill signal to use bash -c to work with latest backend image
- backend/chart: add 'profile_browser_timeout_seconds' to chart values to control how long profile browser to remain when idle (default to 60)
- backend: remove utils.py, now using secret.token_hex() for random suffix
Co-authored-by: sua yoo <sua@suayoo.com>
2023-01-10 14:42:42 -08:00

72 lines
2.3 KiB
Python

""" entry point for job which manages a browser (eg. for profile creation) """
import os
import signal
import asyncio
import secrets
from abc import ABC, abstractmethod
# =============================================================================
class ProfileJob(ABC):
"""Browser run job"""
job_id = None
def __init__(self):
super().__init__()
self.loop = asyncio.get_event_loop()
params = {
"storage_name": os.environ.get("STORAGE_NAME"),
"storage_path": os.environ.get("STORE_PATH") or "",
"url": os.environ.get("START_URL"),
"profile_filename": os.environ.get("PROFILE_PATH") or "",
"vnc_password": secrets.token_hex(16),
}
self.idle_timeout = int(os.environ["IDLE_TIMEOUT"])
self.loop.add_signal_handler(signal.SIGUSR1, self.ping_handler)
self.loop.add_signal_handler(signal.SIGALRM, self.timeout_handler)
self.loop.add_signal_handler(signal.SIGTERM, self.exit_handler)
self.loop.create_task(self.async_init("profilebrowser.yaml", params))
print(f"running browser for at least {self.idle_timeout} secs", flush=True)
signal.setitimer(signal.ITIMER_REAL, self.idle_timeout, 0)
async def async_init(self, template, params):
"""async init, overridable by subclass"""
await self.init_job_objects(template, params)
@abstractmethod
async def init_job_objects(self, filename, params):
"""base for creating objects"""
@abstractmethod
async def delete_job_objects(self, job_id):
"""base for deleting objects"""
def ping_handler(self, *_args):
"""handle custom signal as ping, extend shutdown timer"""
print(f"signal received, extending timer {self.idle_timeout} secs", flush=True)
signal.setitimer(signal.ITIMER_REAL, self.idle_timeout, 0)
def exit_handler(self):
"""handle SIGTERM"""
print("sigterm: shutting down browser...", flush=True)
self._do_exit()
def timeout_handler(self):
"""handle SIGALRM"""
print("sigalrm: timer expired ending idle browser...", flush=True)
self._do_exit()
def _do_exit(self):
self.loop.create_task(self.delete_job_objects(f"browser={self.job_id}"))