browsertrix/backend/test_nightly/test_crawl_logs.py
Tessa Walsh fb80a04f18 Add crawl /log API endpoint
If a crawl is completed, the endpoint streams the logs from the log
files in all of the created WACZ files, sorted by timestamp.

The API endpoint supports filtering by log_level and context whether
the crawl is still running or not.

This is not yet proper streaming because the entire log file is read
into memory before being streamed to the client. We will want to
switch to proper streaming eventually, but are currently blocked by
an aiobotocore bug - see:

https://github.com/aio-libs/aiobotocore/issues/991?#issuecomment-1490737762
2023-04-11 11:51:17 -04:00

89 lines
2.2 KiB
Python

import json
import requests
import time
import pytest
from .conftest import API_PREFIX
LINES_TO_TEST = 10
@pytest.mark.parametrize(
"log_level, context",
[
# No filtering
(None, None),
# Filter log level
("info", None),
("info,debug", None),
# Filter context
(None, "general"),
(None, "general,worker"),
# Filter both
("info,debug", "general,worker"),
],
)
def test_stream_crawl_logs_wacz(
admin_auth_headers,
default_org_id,
large_crawl_id,
large_crawl_finished,
log_level,
context,
):
"""Test that streaming logs after crawl concludes from WACZs works."""
api_url = f"{API_PREFIX}/orgs/{default_org_id}/crawls/{large_crawl_id}/logs"
if log_level and context:
api_url = api_url + f"?logLevel={log_level}&context={context}"
elif log_level:
api_url = api_url + f"?logLevel={log_level}"
elif context:
api_url = api_url + f"?context={context}"
log_levels = []
contexts = []
if log_level:
log_levels = log_level.split(",")
if context:
contexts = context.split(",")
with requests.get(api_url, headers=admin_auth_headers, stream=True) as r:
assert r.status_code == 200
last_timestamp = None
line_index = 0
# Wait for stream content
if not r.content:
while True:
if r.content:
break
time.sleep(5)
for line in r.iter_lines():
if line_index >= LINES_TO_TEST:
r.close()
return
line = line.decode("utf-8")
log_line_dict = json.loads(line)
assert log_line_dict["logLevel"]
if log_level:
assert log_line_dict["logLevel"] in log_levels
assert log_line_dict["context"]
if context:
assert log_line_dict["context"] in contexts
assert log_line_dict["details"] or log_line_dict["details"] == {}
timestamp = log_line_dict["timestamp"]
assert timestamp
if last_timestamp:
assert timestamp >= last_timestamp
last_timestamp = timestamp
line_index += 1