backend: errors endpoint: Parse JSON-l errors before returning (#799)

This commit is contained in:
Tessa Walsh 2023-04-26 17:36:48 -04:00 committed by GitHub
parent e6e46b522a
commit 1a63c31b71
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 26 additions and 5 deletions

View File

@ -24,7 +24,7 @@ from .orgs import Organization, MAX_CRAWL_SCALE
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
from .storages import get_presigned_url, delete_crawl_file_object, get_wacz_logs
from .users import User
from .utils import dt_now, ts_now, get_redis_crawl_stats
from .utils import dt_now, ts_now, get_redis_crawl_stats, parse_jsonl_error_messages
CRAWL_STATES = (
@ -784,12 +784,14 @@ class CrawlOps:
try:
redis = await self.get_redis(crawl_id)
errors = await redis.lrange(f"{crawl_id}:e", skip, page_size)
total = len(errors)
except exceptions.ConnectionError:
# pylint: disable=raise-missing-from
raise HTTPException(status_code=503, detail="redis_connection_error")
return errors, total
parsed_errors = parse_jsonl_error_messages(errors)
total = len(parsed_errors)
return parsed_errors, total
async def get_redis(self, crawl_id):
"""get redis url for crawl id"""
@ -1240,8 +1242,9 @@ def init_crawls_api(app, mdb, users, crawl_manager, crawl_config_ops, orgs, user
skip = (page - 1) * pageSize
upper_bound = skip + pageSize - 1
errors = crawl.errors[skip:upper_bound]
total = len(errors)
return paginated_format(errors, total, page, pageSize)
parsed_errors = parse_jsonl_error_messages(errors)
total = len(parsed_errors)
return paginated_format(parsed_errors, total, page, pageSize)
errors, total = await ops.get_errors_from_redis(crawl_id, pageSize, page)
return paginated_format(errors, total, page, pageSize)

View File

@ -2,6 +2,7 @@
import os
import asyncio
import json
import sys
import signal
import atexit
@ -83,3 +84,20 @@ def register_exit_handler():
sys.exit(1)
loop.add_signal_handler(signal.SIGTERM, exit_handler)
def parse_jsonl_error_messages(errors):
"""parse json-l error strings from redis/db into json"""
parsed_errors = []
for error_line in errors:
if not error_line:
continue
try:
result = json.loads(error_line)
parsed_errors.append(result)
except json.JSONDecodeError as err:
print(
f"Error decoding json-l error line: {error_line}. Error: {err}",
flush=True,
)
return parsed_errors