Ensure error and behavior logs are written to database in order (#2540)
Fixes #2539
This commit is contained in:
parent
55bedcb0b7
commit
785fd85105
@ -878,7 +878,7 @@ class CrawlOperator(BaseOperator):
|
|||||||
if crawler_running:
|
if crawler_running:
|
||||||
status.lastActiveTime = date_to_str(dt_now())
|
status.lastActiveTime = date_to_str(dt_now())
|
||||||
|
|
||||||
file_done = await redis.lpop(self.done_key)
|
file_done = await redis.rpop(self.done_key)
|
||||||
while file_done:
|
while file_done:
|
||||||
msg = json.loads(file_done)
|
msg = json.loads(file_done)
|
||||||
# add completed file
|
# add completed file
|
||||||
@ -887,9 +887,9 @@ class CrawlOperator(BaseOperator):
|
|||||||
await redis.incr("filesAdded")
|
await redis.incr("filesAdded")
|
||||||
|
|
||||||
# get next file done
|
# get next file done
|
||||||
file_done = await redis.lpop(self.done_key)
|
file_done = await redis.rpop(self.done_key)
|
||||||
|
|
||||||
page_crawled = await redis.lpop(f"{crawl.id}:{self.pages_key}")
|
page_crawled = await redis.rpop(f"{crawl.id}:{self.pages_key}")
|
||||||
qa_run_id = crawl.id if crawl.is_qa else None
|
qa_run_id = crawl.id if crawl.is_qa else None
|
||||||
|
|
||||||
while page_crawled:
|
while page_crawled:
|
||||||
@ -897,21 +897,21 @@ class CrawlOperator(BaseOperator):
|
|||||||
await self.page_ops.add_page_to_db(
|
await self.page_ops.add_page_to_db(
|
||||||
page_dict, crawl.db_crawl_id, qa_run_id, crawl.oid
|
page_dict, crawl.db_crawl_id, qa_run_id, crawl.oid
|
||||||
)
|
)
|
||||||
page_crawled = await redis.lpop(f"{crawl.id}:{self.pages_key}")
|
page_crawled = await redis.rpop(f"{crawl.id}:{self.pages_key}")
|
||||||
|
|
||||||
crawl_error = await redis.lpop(f"{crawl.id}:{self.errors_key}")
|
crawl_error = await redis.rpop(f"{crawl.id}:{self.errors_key}")
|
||||||
while crawl_error:
|
while crawl_error:
|
||||||
await self.crawl_ops.add_crawl_error(
|
await self.crawl_ops.add_crawl_error(
|
||||||
crawl.db_crawl_id, crawl.is_qa, crawl_error
|
crawl.db_crawl_id, crawl.is_qa, crawl_error
|
||||||
)
|
)
|
||||||
crawl_error = await redis.lpop(f"{crawl.id}:{self.errors_key}")
|
crawl_error = await redis.rpop(f"{crawl.id}:{self.errors_key}")
|
||||||
|
|
||||||
behavior_log = await redis.lpop(f"{crawl.id}:{self.behavior_logs_key}")
|
behavior_log = await redis.rpop(f"{crawl.id}:{self.behavior_logs_key}")
|
||||||
while behavior_log:
|
while behavior_log:
|
||||||
await self.crawl_ops.add_crawl_behavior_log(
|
await self.crawl_ops.add_crawl_behavior_log(
|
||||||
crawl.db_crawl_id, behavior_log
|
crawl.db_crawl_id, behavior_log
|
||||||
)
|
)
|
||||||
behavior_log = await redis.lpop(f"{crawl.id}:{self.behavior_logs_key}")
|
behavior_log = await redis.rpop(f"{crawl.id}:{self.behavior_logs_key}")
|
||||||
|
|
||||||
# ensure filesAdded and filesAddedSize always set
|
# ensure filesAdded and filesAddedSize always set
|
||||||
status.filesAdded = int(await redis.get("filesAdded") or 0)
|
status.filesAdded = int(await redis.get("filesAdded") or 0)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user