crawljob: detect crawl failure when all crawlers set their status to 'failed'

This commit is contained in:
Ilya Kreymer 2022-06-07 21:48:58 -07:00
parent e3f268a2e8
commit 21b1a87534

View File

@ -108,25 +108,44 @@ class CrawlUpdater:
""" check if crawl is done if all crawl workers have set their done state """ """ check if crawl is done if all crawl workers have set their done state """
results = await self.redis.hvals(f"{self.crawl_id}:status") results = await self.redis.hvals(f"{self.crawl_id}:status")
# check if done # check if done / failed
done = 0 done = 0
failed = 0
for res in results: for res in results:
if res == "done": if res == "done":
done += 1 done += 1
else: elif res == "failed":
return failed += 1
# check if done # check if all crawlers are done
if done >= self.scale: if done >= self.scale:
print("crawl done!", flush=True)
await self.finish_crawl() await self.finish_crawl()
await self.job.delete_crawl() await self.job.delete_crawl()
# check if all crawlers failed
elif failed >= self.scale:
print("crawl failed!", flush=True)
await self.fail_crawl()
await self.job.delete_crawl()
async def update_scale(self, new_scale): async def update_scale(self, new_scale):
""" set scale dynamically of running crawl """ """ set scale dynamically of running crawl """
self.scale = new_scale self.scale = new_scale
await self.update_crawl(scale=new_scale) await self.update_crawl(scale=new_scale)
async def fail_crawl(self):
""" mark crawl as failed """
if self.finished:
return
self.finished = dt_now()
await self.update_crawl(state="failed", finished=self.finished)
async def finish_crawl(self): async def finish_crawl(self):
""" finish crawl """ """ finish crawl """
if self.finished: if self.finished: