crawljob: detect crawl failure when all crawlers set their status to 'failed'
This commit is contained in:
		
							parent
							
								
									e3f268a2e8
								
							
						
					
					
						commit
						21b1a87534
					
				@ -108,25 +108,44 @@ class CrawlUpdater:
 | 
			
		||||
        """ check if crawl is done if all crawl workers have set their done state """
 | 
			
		||||
        results = await self.redis.hvals(f"{self.crawl_id}:status")
 | 
			
		||||
 | 
			
		||||
        # check if done
 | 
			
		||||
        # check if done / failed
 | 
			
		||||
        done = 0
 | 
			
		||||
        failed = 0
 | 
			
		||||
        for res in results:
 | 
			
		||||
            if res == "done":
 | 
			
		||||
                done += 1
 | 
			
		||||
            else:
 | 
			
		||||
                return
 | 
			
		||||
            elif res == "failed":
 | 
			
		||||
                failed += 1
 | 
			
		||||
 | 
			
		||||
        # check if done
 | 
			
		||||
        # check if all crawlers are done
 | 
			
		||||
        if done >= self.scale:
 | 
			
		||||
            print("crawl done!", flush=True)
 | 
			
		||||
            await self.finish_crawl()
 | 
			
		||||
 | 
			
		||||
            await self.job.delete_crawl()
 | 
			
		||||
 | 
			
		||||
        # check if all crawlers failed
 | 
			
		||||
        elif failed >= self.scale:
 | 
			
		||||
            print("crawl failed!", flush=True)
 | 
			
		||||
 | 
			
		||||
            await self.fail_crawl()
 | 
			
		||||
 | 
			
		||||
            await self.job.delete_crawl()
 | 
			
		||||
 | 
			
		||||
    async def update_scale(self, new_scale):
 | 
			
		||||
        """ set scale dynamically of running crawl """
 | 
			
		||||
        self.scale = new_scale
 | 
			
		||||
        await self.update_crawl(scale=new_scale)
 | 
			
		||||
 | 
			
		||||
    async def fail_crawl(self):
 | 
			
		||||
        """ mark crawl as failed """
 | 
			
		||||
        if self.finished:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        self.finished = dt_now()
 | 
			
		||||
 | 
			
		||||
        await self.update_crawl(state="failed", finished=self.finished)
 | 
			
		||||
 | 
			
		||||
    async def finish_crawl(self):
 | 
			
		||||
        """ finish crawl """
 | 
			
		||||
        if self.finished:
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user