crawljob: detect crawl failure when all crawlers set their status to 'failed'

This commit is contained in:
Ilya Kreymer 2022-06-07 21:48:58 -07:00
parent e3f268a2e8
commit 21b1a87534

View File

@ -108,25 +108,44 @@ class CrawlUpdater:
""" check if crawl is done if all crawl workers have set their done state """
results = await self.redis.hvals(f"{self.crawl_id}:status")
# check if done
# check if done / failed
done = 0
failed = 0
for res in results:
if res == "done":
done += 1
else:
return
elif res == "failed":
failed += 1
# check if done
# check if all crawlers are done
if done >= self.scale:
print("crawl done!", flush=True)
await self.finish_crawl()
await self.job.delete_crawl()
# check if all crawlers failed
elif failed >= self.scale:
print("crawl failed!", flush=True)
await self.fail_crawl()
await self.job.delete_crawl()
async def update_scale(self, new_scale):
""" set scale dynamically of running crawl """
self.scale = new_scale
await self.update_crawl(scale=new_scale)
async def fail_crawl(self):
""" mark crawl as failed """
if self.finished:
return
self.finished = dt_now()
await self.update_crawl(state="failed", finished=self.finished)
async def finish_crawl(self):
""" finish crawl """
if self.finished: