crawljob: detect crawl failure when all crawlers set their status to 'failed'
This commit is contained in:
parent
e3f268a2e8
commit
21b1a87534
@ -108,25 +108,44 @@ class CrawlUpdater:
|
|||||||
""" check if crawl is done if all crawl workers have set their done state """
|
""" check if crawl is done if all crawl workers have set their done state """
|
||||||
results = await self.redis.hvals(f"{self.crawl_id}:status")
|
results = await self.redis.hvals(f"{self.crawl_id}:status")
|
||||||
|
|
||||||
# check if done
|
# check if done / failed
|
||||||
done = 0
|
done = 0
|
||||||
|
failed = 0
|
||||||
for res in results:
|
for res in results:
|
||||||
if res == "done":
|
if res == "done":
|
||||||
done += 1
|
done += 1
|
||||||
else:
|
elif res == "failed":
|
||||||
return
|
failed += 1
|
||||||
|
|
||||||
# check if done
|
# check if all crawlers are done
|
||||||
if done >= self.scale:
|
if done >= self.scale:
|
||||||
|
print("crawl done!", flush=True)
|
||||||
await self.finish_crawl()
|
await self.finish_crawl()
|
||||||
|
|
||||||
await self.job.delete_crawl()
|
await self.job.delete_crawl()
|
||||||
|
|
||||||
|
# check if all crawlers failed
|
||||||
|
elif failed >= self.scale:
|
||||||
|
print("crawl failed!", flush=True)
|
||||||
|
|
||||||
|
await self.fail_crawl()
|
||||||
|
|
||||||
|
await self.job.delete_crawl()
|
||||||
|
|
||||||
async def update_scale(self, new_scale):
|
async def update_scale(self, new_scale):
|
||||||
""" set scale dynamically of running crawl """
|
""" set scale dynamically of running crawl """
|
||||||
self.scale = new_scale
|
self.scale = new_scale
|
||||||
await self.update_crawl(scale=new_scale)
|
await self.update_crawl(scale=new_scale)
|
||||||
|
|
||||||
|
async def fail_crawl(self):
|
||||||
|
""" mark crawl as failed """
|
||||||
|
if self.finished:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.finished = dt_now()
|
||||||
|
|
||||||
|
await self.update_crawl(state="failed", finished=self.finished)
|
||||||
|
|
||||||
async def finish_crawl(self):
|
async def finish_crawl(self):
|
||||||
""" finish crawl """
|
""" finish crawl """
|
||||||
if self.finished:
|
if self.finished:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user