Print crawl log to operator log (mostly for testing) (#1148)

* log only if 'log_failed_crawl_lines' value is set to number of last lines to log
from failed container

---------
Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
Ilya Kreymer 2023-09-06 17:53:02 -07:00 committed by GitHub
parent 38f596fd81
commit 68bc053ba0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 39 additions and 2 deletions

View File

@ -205,3 +205,12 @@ class K8sAPI:
self.api_client.set_default_header("Content-Type", content_type)
else:
del self.api_client.default_headers["Content-Type"]
async def print_pod_logs(self, pod_names, container, lines=100):
"""print pod logs"""
for pod in pod_names:
resp = await self.core_api.read_namespaced_pod_log(
pod, self.namespace, container=container, tail_lines=lines
)
print(f"============== LOGS FOR POD: {pod} ==============")
print(resp)

View File

@ -136,6 +136,8 @@ class BtrixOperator(K8sAPI):
self.fast_retry_secs = int(os.environ.get("FAST_RETRY_SECS") or 0)
self.log_failed_crawl_lines = int(os.environ.get("LOG_FAILED_CRAWL_LINES") or 0)
with open(self.config_file, encoding="utf-8") as fh_config:
self.shared_params = yaml.safe_load(fh_config)
@ -580,8 +582,10 @@ class BtrixOperator(K8sAPI):
status.filesAdded = int(await redis.get("filesAdded") or 0)
status.filesAddedSize = int(await redis.get("filesAddedSize") or 0)
pod_names = list(pods.keys())
# update stats and get status
return await self.update_crawl_state(redis, crawl, status)
return await self.update_crawl_state(redis, crawl, status, pod_names)
# pylint: disable=broad-except
except Exception as exc:
@ -674,7 +678,7 @@ class BtrixOperator(K8sAPI):
return False
async def update_crawl_state(self, redis, crawl, status):
async def update_crawl_state(self, redis, crawl, status, pod_names):
"""update crawl state and check if crawl is now done"""
results = await redis.hvals(f"{crawl.id}:status")
stats = await get_redis_crawl_stats(redis, crawl.id)
@ -716,16 +720,31 @@ class BtrixOperator(K8sAPI):
# check if all crawlers failed
elif status_count.get("failed", 0) >= crawl.scale:
prev_state = None
# if stopping, and no pages finished, mark as canceled
if status.stopping and not status.pagesDone:
state = "canceled"
else:
state = "failed"
prev_state = status.state
status = await self.mark_finished(
redis, crawl.id, crawl.cid, status, state=state
)
if (
self.log_failed_crawl_lines
and state == "failed"
and prev_state != "failed"
):
print("crawl failed: ", pod_names, stats)
asyncio.create_task(
self.print_pod_logs(
pod_names, "crawler", self.log_failed_crawl_lines
)
)
# check for other statuses
else:
new_status = None

View File

@ -144,6 +144,7 @@ def test_stop_crawl_partial(
time.sleep(2)
data = get_crawl(default_org_id, crawler_auth_headers, crawl_id)
done = data.get("stats") and data.get("stats").get("done") > 0
print("crawl stats", data)
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id}/stop",

View File

@ -43,6 +43,7 @@ data:
FAST_RETRY_SECS: "{{ .Values.operator_fast_resync_secs | default 3 }}"
LOG_FAILED_CRAWL_LINES: "{{ .Values.log_failed_crawl_lines | default 0 }}"
---
apiVersion: v1

View File

@ -34,3 +34,5 @@ max_pages_per_crawl: 4
registration_enabled: "0"
# log failed crawl pods to operator backend
log_failed_crawl_lines: 200

View File

@ -111,6 +111,11 @@ job_memory: "70Mi"
profile_browser_idle_seconds: 60
# if set, print last 'log_failed_crawl_lines' of each failed
# crawl pod to backend operator stdout
# mostly intended for debugging / testing
# log_failed_crawl_lines: 200
# Nginx Image
# =========================================