Print crawl log to operator log (mostly for testing) (#1148)
* log only if 'log_failed_crawl_lines' value is set to number of last lines to log from failed container --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
parent
38f596fd81
commit
68bc053ba0
@ -205,3 +205,12 @@ class K8sAPI:
|
||||
self.api_client.set_default_header("Content-Type", content_type)
|
||||
else:
|
||||
del self.api_client.default_headers["Content-Type"]
|
||||
|
||||
async def print_pod_logs(self, pod_names, container, lines=100):
|
||||
"""print pod logs"""
|
||||
for pod in pod_names:
|
||||
resp = await self.core_api.read_namespaced_pod_log(
|
||||
pod, self.namespace, container=container, tail_lines=lines
|
||||
)
|
||||
print(f"============== LOGS FOR POD: {pod} ==============")
|
||||
print(resp)
|
||||
|
@ -136,6 +136,8 @@ class BtrixOperator(K8sAPI):
|
||||
|
||||
self.fast_retry_secs = int(os.environ.get("FAST_RETRY_SECS") or 0)
|
||||
|
||||
self.log_failed_crawl_lines = int(os.environ.get("LOG_FAILED_CRAWL_LINES") or 0)
|
||||
|
||||
with open(self.config_file, encoding="utf-8") as fh_config:
|
||||
self.shared_params = yaml.safe_load(fh_config)
|
||||
|
||||
@ -580,8 +582,10 @@ class BtrixOperator(K8sAPI):
|
||||
status.filesAdded = int(await redis.get("filesAdded") or 0)
|
||||
status.filesAddedSize = int(await redis.get("filesAddedSize") or 0)
|
||||
|
||||
pod_names = list(pods.keys())
|
||||
|
||||
# update stats and get status
|
||||
return await self.update_crawl_state(redis, crawl, status)
|
||||
return await self.update_crawl_state(redis, crawl, status, pod_names)
|
||||
|
||||
# pylint: disable=broad-except
|
||||
except Exception as exc:
|
||||
@ -674,7 +678,7 @@ class BtrixOperator(K8sAPI):
|
||||
|
||||
return False
|
||||
|
||||
async def update_crawl_state(self, redis, crawl, status):
|
||||
async def update_crawl_state(self, redis, crawl, status, pod_names):
|
||||
"""update crawl state and check if crawl is now done"""
|
||||
results = await redis.hvals(f"{crawl.id}:status")
|
||||
stats = await get_redis_crawl_stats(redis, crawl.id)
|
||||
@ -716,16 +720,31 @@ class BtrixOperator(K8sAPI):
|
||||
|
||||
# check if all crawlers failed
|
||||
elif status_count.get("failed", 0) >= crawl.scale:
|
||||
prev_state = None
|
||||
|
||||
# if stopping, and no pages finished, mark as canceled
|
||||
if status.stopping and not status.pagesDone:
|
||||
state = "canceled"
|
||||
else:
|
||||
state = "failed"
|
||||
prev_state = status.state
|
||||
|
||||
status = await self.mark_finished(
|
||||
redis, crawl.id, crawl.cid, status, state=state
|
||||
)
|
||||
|
||||
if (
|
||||
self.log_failed_crawl_lines
|
||||
and state == "failed"
|
||||
and prev_state != "failed"
|
||||
):
|
||||
print("crawl failed: ", pod_names, stats)
|
||||
asyncio.create_task(
|
||||
self.print_pod_logs(
|
||||
pod_names, "crawler", self.log_failed_crawl_lines
|
||||
)
|
||||
)
|
||||
|
||||
# check for other statuses
|
||||
else:
|
||||
new_status = None
|
||||
|
@ -144,6 +144,7 @@ def test_stop_crawl_partial(
|
||||
time.sleep(2)
|
||||
data = get_crawl(default_org_id, crawler_auth_headers, crawl_id)
|
||||
done = data.get("stats") and data.get("stats").get("done") > 0
|
||||
print("crawl stats", data)
|
||||
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id}/stop",
|
||||
|
@ -43,6 +43,7 @@ data:
|
||||
|
||||
FAST_RETRY_SECS: "{{ .Values.operator_fast_resync_secs | default 3 }}"
|
||||
|
||||
LOG_FAILED_CRAWL_LINES: "{{ .Values.log_failed_crawl_lines | default 0 }}"
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
|
@ -34,3 +34,5 @@ max_pages_per_crawl: 4
|
||||
|
||||
registration_enabled: "0"
|
||||
|
||||
# log failed crawl pods to operator backend
|
||||
log_failed_crawl_lines: 200
|
||||
|
@ -111,6 +111,11 @@ job_memory: "70Mi"
|
||||
|
||||
profile_browser_idle_seconds: 60
|
||||
|
||||
# if set, print last 'log_failed_crawl_lines' of each failed
|
||||
# crawl pod to backend operator stdout
|
||||
# mostly intended for debugging / testing
|
||||
# log_failed_crawl_lines: 200
|
||||
|
||||
|
||||
# Nginx Image
|
||||
# =========================================
|
||||
|
Loading…
Reference in New Issue
Block a user