browsertrix/backend/test_nightly/test_crawl_logs.py

import json
import requests
import time

import pytest

from .conftest import API_PREFIX


LINES_TO_TEST = 10


@pytest.mark.parametrize(
    "log_level, context",
    [
        # No filtering
        (None, None),
        # Filter log level
        ("info", None),
        ("info,debug", None),
        # Filter context
        (None, "general"),
        (None, "general,worker"),
        # Filter both
        ("info,debug", "general,worker"),
    ],
)
def test_stream_crawl_logs_wacz(
    admin_auth_headers,
    default_org_id,
    large_crawl_id,
    large_crawl_finished,
    log_level,
    context,
):
    """Test that streaming logs after crawl concludes from WACZs works."""
    api_url = f"{API_PREFIX}/orgs/{default_org_id}/crawls/{large_crawl_id}/logs"
    if log_level and context:
        api_url = api_url + f"?logLevel={log_level}&context={context}"
    elif log_level:
        api_url = api_url + f"?logLevel={log_level}"
    elif context:
        api_url = api_url + f"?context={context}"

    log_levels = []
    contexts = []
    if log_level:
        log_levels = log_level.split(",")
    if context:
        contexts = context.split(",")

    with requests.get(api_url, headers=admin_auth_headers, stream=True) as r:
        assert r.status_code == 200

        last_timestamp = None
        line_index = 0

        # Wait for stream content
        if not r.content:
            while True:
                if r.content:
                    break
                time.sleep(5)

        for line in r.iter_lines():
            if line_index >= LINES_TO_TEST:
                r.close()
                return

            line = line.decode("utf-8")
            log_line_dict = json.loads(line)

            assert log_line_dict["logLevel"]
            if log_level:
                assert log_line_dict["logLevel"] in log_levels

            assert log_line_dict["context"]
            if context:
                assert log_line_dict["context"] in contexts
            assert log_line_dict["details"] or log_line_dict["details"] == {}

            timestamp = log_line_dict["timestamp"]
            assert timestamp
            if last_timestamp:
                assert timestamp >= last_timestamp
            last_timestamp = timestamp

            line_index += 1