backend: errors endpoint: Parse JSON-l errors before returning (#799)
This commit is contained in:
parent
e6e46b522a
commit
1a63c31b71
@ -24,7 +24,7 @@ from .orgs import Organization, MAX_CRAWL_SCALE
|
|||||||
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
|
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
|
||||||
from .storages import get_presigned_url, delete_crawl_file_object, get_wacz_logs
|
from .storages import get_presigned_url, delete_crawl_file_object, get_wacz_logs
|
||||||
from .users import User
|
from .users import User
|
||||||
from .utils import dt_now, ts_now, get_redis_crawl_stats
|
from .utils import dt_now, ts_now, get_redis_crawl_stats, parse_jsonl_error_messages
|
||||||
|
|
||||||
|
|
||||||
CRAWL_STATES = (
|
CRAWL_STATES = (
|
||||||
@ -784,12 +784,14 @@ class CrawlOps:
|
|||||||
try:
|
try:
|
||||||
redis = await self.get_redis(crawl_id)
|
redis = await self.get_redis(crawl_id)
|
||||||
errors = await redis.lrange(f"{crawl_id}:e", skip, page_size)
|
errors = await redis.lrange(f"{crawl_id}:e", skip, page_size)
|
||||||
total = len(errors)
|
|
||||||
except exceptions.ConnectionError:
|
except exceptions.ConnectionError:
|
||||||
# pylint: disable=raise-missing-from
|
# pylint: disable=raise-missing-from
|
||||||
raise HTTPException(status_code=503, detail="redis_connection_error")
|
raise HTTPException(status_code=503, detail="redis_connection_error")
|
||||||
|
|
||||||
return errors, total
|
parsed_errors = parse_jsonl_error_messages(errors)
|
||||||
|
total = len(parsed_errors)
|
||||||
|
|
||||||
|
return parsed_errors, total
|
||||||
|
|
||||||
async def get_redis(self, crawl_id):
|
async def get_redis(self, crawl_id):
|
||||||
"""get redis url for crawl id"""
|
"""get redis url for crawl id"""
|
||||||
@ -1240,8 +1242,9 @@ def init_crawls_api(app, mdb, users, crawl_manager, crawl_config_ops, orgs, user
|
|||||||
skip = (page - 1) * pageSize
|
skip = (page - 1) * pageSize
|
||||||
upper_bound = skip + pageSize - 1
|
upper_bound = skip + pageSize - 1
|
||||||
errors = crawl.errors[skip:upper_bound]
|
errors = crawl.errors[skip:upper_bound]
|
||||||
total = len(errors)
|
parsed_errors = parse_jsonl_error_messages(errors)
|
||||||
return paginated_format(errors, total, page, pageSize)
|
total = len(parsed_errors)
|
||||||
|
return paginated_format(parsed_errors, total, page, pageSize)
|
||||||
|
|
||||||
errors, total = await ops.get_errors_from_redis(crawl_id, pageSize, page)
|
errors, total = await ops.get_errors_from_redis(crawl_id, pageSize, page)
|
||||||
return paginated_format(errors, total, page, pageSize)
|
return paginated_format(errors, total, page, pageSize)
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import json
|
||||||
import sys
|
import sys
|
||||||
import signal
|
import signal
|
||||||
import atexit
|
import atexit
|
||||||
@ -83,3 +84,20 @@ def register_exit_handler():
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
loop.add_signal_handler(signal.SIGTERM, exit_handler)
|
loop.add_signal_handler(signal.SIGTERM, exit_handler)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_jsonl_error_messages(errors):
|
||||||
|
"""parse json-l error strings from redis/db into json"""
|
||||||
|
parsed_errors = []
|
||||||
|
for error_line in errors:
|
||||||
|
if not error_line:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
result = json.loads(error_line)
|
||||||
|
parsed_errors.append(result)
|
||||||
|
except json.JSONDecodeError as err:
|
||||||
|
print(
|
||||||
|
f"Error decoding json-l error line: {error_line}. Error: {err}",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
return parsed_errors
|
||||||
|
Loading…
Reference in New Issue
Block a user