- regression fix: ensure correct signals are set to stop crawl (SIGUSER1 + SIGTERM) - crawl stop: if crawl is still running after 60 seconds, allow signal to be resent - regression fix: ensure crawling with profile is working in k8s
This commit is contained in:
		
							parent
							
								
									1216f6cb66
								
							
						
					
					
						commit
						2531a03e41
					
				@ -6,6 +6,7 @@ import signal
 | 
			
		||||
import os
 | 
			
		||||
import json
 | 
			
		||||
import uuid
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from abc import ABC, abstractmethod
 | 
			
		||||
@ -18,6 +19,10 @@ from .db import init_db
 | 
			
		||||
from .crawls import Crawl, CrawlFile, CrawlCompleteIn, dt_now
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Seconds before allowing another shutdown attempt
 | 
			
		||||
SHUTDOWN_ATTEMPT_WAIT = 60
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# =============================================================================
 | 
			
		||||
# pylint: disable=too-many-instance-attributes,bare-except
 | 
			
		||||
class CrawlJob(ABC):
 | 
			
		||||
@ -30,8 +35,6 @@ class CrawlJob(ABC):
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
 | 
			
		||||
        self.shutdown_pending = False
 | 
			
		||||
 | 
			
		||||
        _, mdb = init_db()
 | 
			
		||||
        self.archives = mdb["archives"]
 | 
			
		||||
        self.crawls = mdb["crawls"]
 | 
			
		||||
@ -59,7 +62,7 @@ class CrawlJob(ABC):
 | 
			
		||||
 | 
			
		||||
        self._cached_params = {}
 | 
			
		||||
        self._files_added = False
 | 
			
		||||
        self._graceful_shutdown_pending = False
 | 
			
		||||
        self._graceful_shutdown_pending = 0
 | 
			
		||||
        self._delete_pending = False
 | 
			
		||||
 | 
			
		||||
        params = {
 | 
			
		||||
@ -299,16 +302,21 @@ class CrawlJob(ABC):
 | 
			
		||||
 | 
			
		||||
    async def graceful_shutdown(self):
 | 
			
		||||
        """ attempt to graceful stop the crawl, all data should be uploaded """
 | 
			
		||||
        if self._graceful_shutdown_pending:
 | 
			
		||||
        if (
 | 
			
		||||
            self._graceful_shutdown_pending
 | 
			
		||||
            and (time.time() - self._graceful_shutdown_pending) < SHUTDOWN_ATTEMPT_WAIT
 | 
			
		||||
        ):
 | 
			
		||||
            print("Already trying to stop crawl gracefully", flush=True)
 | 
			
		||||
            return {"success": False, "error": "already_stopping"}
 | 
			
		||||
 | 
			
		||||
        print("Stopping crawl", flush=True)
 | 
			
		||||
 | 
			
		||||
        if not await self._send_shutdown_signal():
 | 
			
		||||
        if not await self._send_shutdown_signal("SIGUSR1"):
 | 
			
		||||
            return {"success": False, "error": "unreachable"}
 | 
			
		||||
 | 
			
		||||
        self._graceful_shutdown_pending = True
 | 
			
		||||
        await self._send_shutdown_signal("SIGTERM")
 | 
			
		||||
 | 
			
		||||
        self._graceful_shutdown_pending = time.time()
 | 
			
		||||
 | 
			
		||||
        await self.update_crawl(state="stopping")
 | 
			
		||||
 | 
			
		||||
@ -403,7 +411,7 @@ class CrawlJob(ABC):
 | 
			
		||||
        """ set number of replicas """
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    async def _send_shutdown_signal(self):
 | 
			
		||||
    async def _send_shutdown_signal(self, signame):
 | 
			
		||||
        """ gracefully shutdown crawl """
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
 | 
			
		||||
@ -55,17 +55,14 @@ class K8SCrawlJob(K8SJobMixin, CrawlJob):
 | 
			
		||||
        except:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
    async def _send_shutdown_signal(self):
 | 
			
		||||
    async def _send_shutdown_signal(self, signame):
 | 
			
		||||
        pods = await self.core_api.list_namespaced_pod(
 | 
			
		||||
            namespace=self.namespace,
 | 
			
		||||
            label_selector=f"crawl={self.job_id},role=crawler",
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        return await send_signal_to_pods(
 | 
			
		||||
            self.core_api_ws,
 | 
			
		||||
            self.namespace,
 | 
			
		||||
            pods.items,
 | 
			
		||||
            "SIGINT",
 | 
			
		||||
            self.core_api_ws, self.namespace, pods.items, signame
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    # pylint: disable=line-too-long
 | 
			
		||||
 | 
			
		||||
@ -259,9 +259,9 @@ spec:
 | 
			
		||||
            - /tmp/crawl-config.json
 | 
			
		||||
            - --redisStoreUrl
 | 
			
		||||
            - {{ redis_url }}
 | 
			
		||||
          {%- if profile_filename %}
 | 
			
		||||
          {%- if env.PROFILE_FILENAME %}
 | 
			
		||||
            - --profile
 | 
			
		||||
            - "@profiles/{{ profile_filename }}"
 | 
			
		||||
            - "@profiles/{{ env.PROFILE_FILENAME }}"
 | 
			
		||||
          {%- endif %}
 | 
			
		||||
 | 
			
		||||
          volumeMounts:
 | 
			
		||||
 | 
			
		||||
@ -34,6 +34,8 @@ async def send_signal_to_pods(core_api_ws, namespace, pods, signame, func=None):
 | 
			
		||||
            if func and not func(pod.metadata):
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            print(f"Sending {signame} to {pod.metadata.name}", flush=True)
 | 
			
		||||
 | 
			
		||||
            await core_api_ws.connect_get_namespaced_pod_exec(
 | 
			
		||||
                pod.metadata.name,
 | 
			
		||||
                namespace=namespace,
 | 
			
		||||
 | 
			
		||||
@ -54,15 +54,15 @@ class SwarmCrawlJob(SwarmJobMixin, CrawlJob):
 | 
			
		||||
            None, runner.get_service, f"crawl-{self.job_id}-0_crawler"
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    async def _send_shutdown_signal(self):
 | 
			
		||||
    async def _send_shutdown_signal(self, signame):
 | 
			
		||||
        loop = asyncio.get_running_loop()
 | 
			
		||||
        count = 0
 | 
			
		||||
 | 
			
		||||
        for num in range(0, self.scale):
 | 
			
		||||
            name = f"crawl-{self.job_id}-{num}_crawler"
 | 
			
		||||
            print(f"Sending SIGABRT to {name}", flush=True)
 | 
			
		||||
            print(f"Sending {signame} to {name}", flush=True)
 | 
			
		||||
            count += await loop.run_in_executor(
 | 
			
		||||
                None, runner.ping_containers, name, "SIGABRT"
 | 
			
		||||
                None, runner.ping_containers, name, signame
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        # for now, assume success if at least 1 container is signaled
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user