From 1a63c31b71e72ef4a25a4cf331db4f70e064057e Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Wed, 26 Apr 2023 17:36:48 -0400 Subject: [PATCH] backend: errors endpoint: Parse JSON-l errors before returning (#799) --- backend/btrixcloud/crawls.py | 13 ++++++++----- backend/btrixcloud/utils.py | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py index 2c0a5913..ec82475b 100644 --- a/backend/btrixcloud/crawls.py +++ b/backend/btrixcloud/crawls.py @@ -24,7 +24,7 @@ from .orgs import Organization, MAX_CRAWL_SCALE from .pagination import DEFAULT_PAGE_SIZE, paginated_format from .storages import get_presigned_url, delete_crawl_file_object, get_wacz_logs from .users import User -from .utils import dt_now, ts_now, get_redis_crawl_stats +from .utils import dt_now, ts_now, get_redis_crawl_stats, parse_jsonl_error_messages CRAWL_STATES = ( @@ -784,12 +784,14 @@ class CrawlOps: try: redis = await self.get_redis(crawl_id) errors = await redis.lrange(f"{crawl_id}:e", skip, page_size) - total = len(errors) except exceptions.ConnectionError: # pylint: disable=raise-missing-from raise HTTPException(status_code=503, detail="redis_connection_error") - return errors, total + parsed_errors = parse_jsonl_error_messages(errors) + total = len(parsed_errors) + + return parsed_errors, total async def get_redis(self, crawl_id): """get redis url for crawl id""" @@ -1240,8 +1242,9 @@ def init_crawls_api(app, mdb, users, crawl_manager, crawl_config_ops, orgs, user skip = (page - 1) * pageSize upper_bound = skip + pageSize - 1 errors = crawl.errors[skip:upper_bound] - total = len(errors) - return paginated_format(errors, total, page, pageSize) + parsed_errors = parse_jsonl_error_messages(errors) + total = len(parsed_errors) + return paginated_format(parsed_errors, total, page, pageSize) errors, total = await ops.get_errors_from_redis(crawl_id, pageSize, page) return paginated_format(errors, total, page, pageSize) diff --git a/backend/btrixcloud/utils.py b/backend/btrixcloud/utils.py index c82f48c5..10cff21e 100644 --- a/backend/btrixcloud/utils.py +++ b/backend/btrixcloud/utils.py @@ -2,6 +2,7 @@ import os import asyncio +import json import sys import signal import atexit @@ -83,3 +84,20 @@ def register_exit_handler(): sys.exit(1) loop.add_signal_handler(signal.SIGTERM, exit_handler) + + +def parse_jsonl_error_messages(errors): + """parse json-l error strings from redis/db into json""" + parsed_errors = [] + for error_line in errors: + if not error_line: + continue + try: + result = json.loads(error_line) + parsed_errors.append(result) + except json.JSONDecodeError as err: + print( + f"Error decoding json-l error line: {error_line}. Error: {err}", + flush=True, + ) + return parsed_errors