backend: crawls api: better fix for graceful stop
- k8s: don't use redis, set to 'stopping' if status.active is not set, toggled immediately on delete_job - docker: set custom redis key to indicate 'stopping' state (container still running) - api: remove crawl is_running endpoint, redundant with general get crawl api
This commit is contained in:
parent
542680daf7
commit
be86505347
@ -184,7 +184,7 @@ class CrawlOps:
|
|||||||
"""Add finished crawl to db, increment archive usage.
|
"""Add finished crawl to db, increment archive usage.
|
||||||
If crawl file provided, update and add file"""
|
If crawl file provided, update and add file"""
|
||||||
if crawl_file:
|
if crawl_file:
|
||||||
await self.get_redis_stats([crawl], False)
|
await self.get_redis_stats([crawl])
|
||||||
|
|
||||||
crawl_update = {
|
crawl_update = {
|
||||||
"$set": crawl.to_dict(exclude={"files", "completions"}),
|
"$set": crawl.to_dict(exclude={"files", "completions"}),
|
||||||
@ -280,7 +280,7 @@ class CrawlOps:
|
|||||||
aid=archive.id_str
|
aid=archive.id_str
|
||||||
)
|
)
|
||||||
|
|
||||||
await self.get_redis_stats(running_crawls, True)
|
await self.get_redis_stats(running_crawls)
|
||||||
|
|
||||||
finished_crawls = await self.list_finished_crawls(
|
finished_crawls = await self.list_finished_crawls(
|
||||||
aid=archive.id, exclude_files=True
|
aid=archive.id, exclude_files=True
|
||||||
@ -300,7 +300,7 @@ class CrawlOps:
|
|||||||
""" Get data for single crawl """
|
""" Get data for single crawl """
|
||||||
crawl = await self.crawl_manager.get_running_crawl(crawlid, archive.id_str)
|
crawl = await self.crawl_manager.get_running_crawl(crawlid, archive.id_str)
|
||||||
if crawl:
|
if crawl:
|
||||||
await self.get_redis_stats([crawl], True)
|
await self.get_redis_stats([crawl])
|
||||||
|
|
||||||
else:
|
else:
|
||||||
res = await self.crawls.find_one({"_id": crawlid, "aid": archive.id})
|
res = await self.crawls.find_one({"_id": crawlid, "aid": archive.id})
|
||||||
@ -343,33 +343,25 @@ class CrawlOps:
|
|||||||
file_.filename = storage_prefix + file_.filename
|
file_.filename = storage_prefix + file_.filename
|
||||||
|
|
||||||
# pylint: disable=too-many-arguments
|
# pylint: disable=too-many-arguments
|
||||||
async def get_redis_stats(self, crawl_list, set_stopping=False):
|
async def get_redis_stats(self, crawl_list):
|
||||||
""" Add additional live crawl stats from redis """
|
""" Add additional live crawl stats from redis """
|
||||||
results = None
|
results = None
|
||||||
|
|
||||||
def pairwise(iterable):
|
def pairwise(iterable):
|
||||||
val = iter(iterable)
|
val = iter(iterable)
|
||||||
return zip(val, val, val)
|
return zip(val, val)
|
||||||
|
|
||||||
async with self.redis.pipeline(transaction=True) as pipe:
|
async with self.redis.pipeline(transaction=True) as pipe:
|
||||||
for crawl in crawl_list:
|
for crawl in crawl_list:
|
||||||
key = crawl.id
|
key = crawl.id
|
||||||
pipe.llen(f"{key}:d")
|
pipe.llen(f"{key}:d")
|
||||||
pipe.scard(f"{key}:s")
|
pipe.scard(f"{key}:s")
|
||||||
pipe.get(f"{key}:stop")
|
|
||||||
|
|
||||||
results = await pipe.execute()
|
results = await pipe.execute()
|
||||||
|
|
||||||
for crawl, (done, total, stopping) in zip(crawl_list, pairwise(results)):
|
for crawl, (done, total) in zip(crawl_list, pairwise(results)):
|
||||||
if set_stopping and stopping:
|
|
||||||
crawl.state = "stopping"
|
|
||||||
|
|
||||||
crawl.stats = {"done": done, "found": total}
|
crawl.stats = {"done": done, "found": total}
|
||||||
|
|
||||||
async def mark_stopping(self, crawl_id):
|
|
||||||
""" Mark crawl as in process of stopping in redis """
|
|
||||||
await self.redis.setex(f"{crawl_id}:stop", 600, 1)
|
|
||||||
|
|
||||||
async def delete_crawls(self, aid: uuid.UUID, delete_list: DeleteCrawlList):
|
async def delete_crawls(self, aid: uuid.UUID, delete_list: DeleteCrawlList):
|
||||||
""" Delete a list of crawls by id for given archive """
|
""" Delete a list of crawls by id for given archive """
|
||||||
res = await self.crawls.delete_many(
|
res = await self.crawls.delete_many(
|
||||||
@ -437,8 +429,6 @@ def init_crawls_api(
|
|||||||
if not stopping:
|
if not stopping:
|
||||||
raise HTTPException(status_code=404, detail=f"Crawl not found: {crawl_id}")
|
raise HTTPException(status_code=404, detail=f"Crawl not found: {crawl_id}")
|
||||||
|
|
||||||
await ops.mark_stopping(crawl_id)
|
|
||||||
|
|
||||||
return {"stopping_gracefully": True}
|
return {"stopping_gracefully": True}
|
||||||
|
|
||||||
@app.post("/archives/{aid}/crawls/delete", tags=["crawls"])
|
@app.post("/archives/{aid}/crawls/delete", tags=["crawls"])
|
||||||
@ -463,15 +453,15 @@ def init_crawls_api(
|
|||||||
async def get_crawl(crawl_id, archive: Archive = Depends(archive_crawl_dep)):
|
async def get_crawl(crawl_id, archive: Archive = Depends(archive_crawl_dep)):
|
||||||
return await ops.get_crawl(crawl_id, archive)
|
return await ops.get_crawl(crawl_id, archive)
|
||||||
|
|
||||||
@app.get(
|
# @app.get(
|
||||||
"/archives/{aid}/crawls/{crawl_id}/running",
|
# "/archives/{aid}/crawls/{crawl_id}/running",
|
||||||
tags=["crawls"],
|
# tags=["crawls"],
|
||||||
)
|
# )
|
||||||
async def get_running(crawl_id, archive: Archive = Depends(archive_crawl_dep)):
|
# async def get_running(crawl_id, archive: Archive = Depends(archive_crawl_dep)):
|
||||||
if not crawl_manager.is_running(crawl_id, archive.id_str):
|
# if not crawl_manager.is_running(crawl_id, archive.id_str):
|
||||||
raise HTTPException(status_code=404, detail="No Such Crawl")
|
# raise HTTPException(status_code=404, detail="No Such Crawl")
|
||||||
|
#
|
||||||
return {"running": True}
|
# return {"running": True}
|
||||||
|
|
||||||
@app.post(
|
@app.post(
|
||||||
"/archives/{aid}/crawls/{crawl_id}/scale",
|
"/archives/{aid}/crawls/{crawl_id}/scale",
|
||||||
|
@ -15,6 +15,7 @@ from tempfile import NamedTemporaryFile
|
|||||||
|
|
||||||
import aiodocker
|
import aiodocker
|
||||||
import aioprocessing
|
import aioprocessing
|
||||||
|
import aioredis
|
||||||
|
|
||||||
from scheduler import run_scheduler
|
from scheduler import run_scheduler
|
||||||
|
|
||||||
@ -62,6 +63,13 @@ class DockerManager:
|
|||||||
self.loop.create_task(self.run_event_loop())
|
self.loop.create_task(self.run_event_loop())
|
||||||
self.loop.create_task(self.init_trigger_queue())
|
self.loop.create_task(self.init_trigger_queue())
|
||||||
self.loop.create_task(self.cleanup_loop())
|
self.loop.create_task(self.cleanup_loop())
|
||||||
|
self.loop.create_task(self.init_redis(self.redis_url))
|
||||||
|
|
||||||
|
async def init_redis(self, redis_url):
|
||||||
|
""" init redis async """
|
||||||
|
self.redis = await aioredis.from_url(
|
||||||
|
redis_url, encoding="utf-8", decode_responses=True
|
||||||
|
)
|
||||||
|
|
||||||
# pylint: disable=no-member
|
# pylint: disable=no-member
|
||||||
async def init_trigger_queue(self):
|
async def init_trigger_queue(self):
|
||||||
@ -257,6 +265,7 @@ class DockerManager:
|
|||||||
result = self._make_crawl_for_container(container, "canceled", True)
|
result = self._make_crawl_for_container(container, "canceled", True)
|
||||||
else:
|
else:
|
||||||
result = True
|
result = True
|
||||||
|
await self._mark_is_stopping(crawl_id)
|
||||||
|
|
||||||
await container.kill(signal="SIGTERM")
|
await container.kill(signal="SIGTERM")
|
||||||
except aiodocker.exceptions.DockerError as exc:
|
except aiodocker.exceptions.DockerError as exc:
|
||||||
@ -358,16 +367,16 @@ class DockerManager:
|
|||||||
if aid and container["Config"]["Labels"]["btrix.archive"] != aid:
|
if aid and container["Config"]["Labels"]["btrix.archive"] != aid:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return self._make_crawl_for_container(container, "running", False, CrawlOut)
|
stopping = await self._get_is_stopping(crawl_id)
|
||||||
|
|
||||||
|
return self._make_crawl_for_container(
|
||||||
|
container, "stopping" if stopping else "running", False, CrawlOut
|
||||||
|
)
|
||||||
# pylint: disable=broad-except
|
# pylint: disable=broad-except
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
print(exc, flush=True)
|
print(exc, flush=True)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def is_running(self, crawl_id, aid):
|
|
||||||
""" Return true is crawl with given id is running """
|
|
||||||
return await self.get_running_crawl(crawl_id, aid) is not None
|
|
||||||
|
|
||||||
async def scale_crawl(self): # job_name, aid, parallelism=1):
|
async def scale_crawl(self): # job_name, aid, parallelism=1):
|
||||||
""" Scale running crawl, currently only supported in k8s"""
|
""" Scale running crawl, currently only supported in k8s"""
|
||||||
return "Not Supported"
|
return "Not Supported"
|
||||||
@ -510,6 +519,14 @@ class DockerManager:
|
|||||||
)
|
)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
async def _mark_is_stopping(self, crawl_id):
|
||||||
|
""" mark crawl as stopping in redis """
|
||||||
|
await self.redis.setex(f"{crawl_id}:stop", 600, 1)
|
||||||
|
|
||||||
|
async def _get_is_stopping(self, crawl_id):
|
||||||
|
""" check redis if crawl is marked for stopping """
|
||||||
|
return await self.redis.get(f"{crawl_id}:stop")
|
||||||
|
|
||||||
async def _is_scheduled_crawl_for_config_running(self, cid):
|
async def _is_scheduled_crawl_for_config_running(self, cid):
|
||||||
results = await self._list_running_containers(
|
results = await self._list_running_containers(
|
||||||
[f"btrix.crawlconfig={cid}", "btrix.run.manual=0"]
|
[f"btrix.crawlconfig={cid}", "btrix.run.manual=0"]
|
||||||
|
@ -281,9 +281,10 @@ class K8SManager:
|
|||||||
)
|
)
|
||||||
|
|
||||||
return [
|
return [
|
||||||
self._make_crawl_for_job(job, "running", False, CrawlOut)
|
self._make_crawl_for_job(
|
||||||
|
job, "running" if job.status.active else "stopping", False, CrawlOut
|
||||||
|
)
|
||||||
for job in jobs.items
|
for job in jobs.items
|
||||||
if job.status.active
|
|
||||||
]
|
]
|
||||||
|
|
||||||
async def init_crawl_screencast(self, crawl_id, aid):
|
async def init_crawl_screencast(self, crawl_id, aid):
|
||||||
@ -374,8 +375,9 @@ class K8SManager:
|
|||||||
if not job or job.metadata.labels["btrix.archive"] != aid:
|
if not job or job.metadata.labels["btrix.archive"] != aid:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if job.status.active:
|
return self._make_crawl_for_job(
|
||||||
return self._make_crawl_for_job(job, "running", False, CrawlOut)
|
job, "running" if job.status.active else "stopping", False, CrawlOut
|
||||||
|
)
|
||||||
|
|
||||||
# pylint: disable=broad-except
|
# pylint: disable=broad-except
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -383,21 +385,6 @@ class K8SManager:
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def is_running(self, job_name, aid):
|
|
||||||
""" Return true if the specified crawl (by job_name) is running """
|
|
||||||
try:
|
|
||||||
job = await self.batch_api.read_namespaced_job(
|
|
||||||
name=job_name, namespace=self.namespace
|
|
||||||
)
|
|
||||||
|
|
||||||
if not job or job.metadata.labels["btrix.archive"] != aid:
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
# pylint: disable=broad-except
|
|
||||||
except Exception:
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def stop_crawl(self, job_name, aid, graceful=True):
|
async def stop_crawl(self, job_name, aid, graceful=True):
|
||||||
"""Attempt to stop crawl, either gracefully by issuing a SIGTERM which
|
"""Attempt to stop crawl, either gracefully by issuing a SIGTERM which
|
||||||
will attempt to finish current pages
|
will attempt to finish current pages
|
||||||
|
Loading…
Reference in New Issue
Block a user