If a crawl is completed, the endpoint streams the logs from the log files in all of the created WACZ files, sorted by timestamp. The API endpoint supports filtering by log_level and context whether the crawl is still running or not. This is not yet proper streaming because the entire log file is read into memory before being streamed to the client. We will want to switch to proper streaming eventually, but are currently blocked by an aiobotocore bug - see: https://github.com/aio-libs/aiobotocore/issues/991?#issuecomment-1490737762
89 lines
2.2 KiB
Python
89 lines
2.2 KiB
Python
import json
|
|
import requests
|
|
import time
|
|
|
|
import pytest
|
|
|
|
from .conftest import API_PREFIX
|
|
|
|
|
|
LINES_TO_TEST = 10
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"log_level, context",
|
|
[
|
|
# No filtering
|
|
(None, None),
|
|
# Filter log level
|
|
("info", None),
|
|
("info,debug", None),
|
|
# Filter context
|
|
(None, "general"),
|
|
(None, "general,worker"),
|
|
# Filter both
|
|
("info,debug", "general,worker"),
|
|
],
|
|
)
|
|
def test_stream_crawl_logs_wacz(
|
|
admin_auth_headers,
|
|
default_org_id,
|
|
large_crawl_id,
|
|
large_crawl_finished,
|
|
log_level,
|
|
context,
|
|
):
|
|
"""Test that streaming logs after crawl concludes from WACZs works."""
|
|
api_url = f"{API_PREFIX}/orgs/{default_org_id}/crawls/{large_crawl_id}/logs"
|
|
if log_level and context:
|
|
api_url = api_url + f"?logLevel={log_level}&context={context}"
|
|
elif log_level:
|
|
api_url = api_url + f"?logLevel={log_level}"
|
|
elif context:
|
|
api_url = api_url + f"?context={context}"
|
|
|
|
log_levels = []
|
|
contexts = []
|
|
if log_level:
|
|
log_levels = log_level.split(",")
|
|
if context:
|
|
contexts = context.split(",")
|
|
|
|
with requests.get(api_url, headers=admin_auth_headers, stream=True) as r:
|
|
assert r.status_code == 200
|
|
|
|
last_timestamp = None
|
|
line_index = 0
|
|
|
|
# Wait for stream content
|
|
if not r.content:
|
|
while True:
|
|
if r.content:
|
|
break
|
|
time.sleep(5)
|
|
|
|
for line in r.iter_lines():
|
|
if line_index >= LINES_TO_TEST:
|
|
r.close()
|
|
return
|
|
|
|
line = line.decode("utf-8")
|
|
log_line_dict = json.loads(line)
|
|
|
|
assert log_line_dict["logLevel"]
|
|
if log_level:
|
|
assert log_line_dict["logLevel"] in log_levels
|
|
|
|
assert log_line_dict["context"]
|
|
if context:
|
|
assert log_line_dict["context"] in contexts
|
|
assert log_line_dict["details"] or log_line_dict["details"] == {}
|
|
|
|
timestamp = log_line_dict["timestamp"]
|
|
assert timestamp
|
|
if last_timestamp:
|
|
assert timestamp >= last_timestamp
|
|
last_timestamp = timestamp
|
|
|
|
line_index += 1
|