crawljob: detect crawl failure when all crawlers set their status to 'failed'
This commit is contained in:
		
							parent
							
								
									e3f268a2e8
								
							
						
					
					
						commit
						21b1a87534
					
				@ -108,25 +108,44 @@ class CrawlUpdater:
 | 
				
			|||||||
        """ check if crawl is done if all crawl workers have set their done state """
 | 
					        """ check if crawl is done if all crawl workers have set their done state """
 | 
				
			||||||
        results = await self.redis.hvals(f"{self.crawl_id}:status")
 | 
					        results = await self.redis.hvals(f"{self.crawl_id}:status")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # check if done
 | 
					        # check if done / failed
 | 
				
			||||||
        done = 0
 | 
					        done = 0
 | 
				
			||||||
 | 
					        failed = 0
 | 
				
			||||||
        for res in results:
 | 
					        for res in results:
 | 
				
			||||||
            if res == "done":
 | 
					            if res == "done":
 | 
				
			||||||
                done += 1
 | 
					                done += 1
 | 
				
			||||||
            else:
 | 
					            elif res == "failed":
 | 
				
			||||||
                return
 | 
					                failed += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # check if done
 | 
					        # check if all crawlers are done
 | 
				
			||||||
        if done >= self.scale:
 | 
					        if done >= self.scale:
 | 
				
			||||||
 | 
					            print("crawl done!", flush=True)
 | 
				
			||||||
            await self.finish_crawl()
 | 
					            await self.finish_crawl()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            await self.job.delete_crawl()
 | 
					            await self.job.delete_crawl()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # check if all crawlers failed
 | 
				
			||||||
 | 
					        elif failed >= self.scale:
 | 
				
			||||||
 | 
					            print("crawl failed!", flush=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            await self.fail_crawl()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            await self.job.delete_crawl()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    async def update_scale(self, new_scale):
 | 
					    async def update_scale(self, new_scale):
 | 
				
			||||||
        """ set scale dynamically of running crawl """
 | 
					        """ set scale dynamically of running crawl """
 | 
				
			||||||
        self.scale = new_scale
 | 
					        self.scale = new_scale
 | 
				
			||||||
        await self.update_crawl(scale=new_scale)
 | 
					        await self.update_crawl(scale=new_scale)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    async def fail_crawl(self):
 | 
				
			||||||
 | 
					        """ mark crawl as failed """
 | 
				
			||||||
 | 
					        if self.finished:
 | 
				
			||||||
 | 
					            return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.finished = dt_now()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        await self.update_crawl(state="failed", finished=self.finished)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    async def finish_crawl(self):
 | 
					    async def finish_crawl(self):
 | 
				
			||||||
        """ finish crawl """
 | 
					        """ finish crawl """
 | 
				
			||||||
        if self.finished:
 | 
					        if self.finished:
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user