Print crawl log to operator log (mostly for testing) (#1148)
* log only if 'log_failed_crawl_lines' value is set to number of last lines to log from failed container --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
parent
38f596fd81
commit
68bc053ba0
@ -205,3 +205,12 @@ class K8sAPI:
|
|||||||
self.api_client.set_default_header("Content-Type", content_type)
|
self.api_client.set_default_header("Content-Type", content_type)
|
||||||
else:
|
else:
|
||||||
del self.api_client.default_headers["Content-Type"]
|
del self.api_client.default_headers["Content-Type"]
|
||||||
|
|
||||||
|
async def print_pod_logs(self, pod_names, container, lines=100):
|
||||||
|
"""print pod logs"""
|
||||||
|
for pod in pod_names:
|
||||||
|
resp = await self.core_api.read_namespaced_pod_log(
|
||||||
|
pod, self.namespace, container=container, tail_lines=lines
|
||||||
|
)
|
||||||
|
print(f"============== LOGS FOR POD: {pod} ==============")
|
||||||
|
print(resp)
|
||||||
|
@ -136,6 +136,8 @@ class BtrixOperator(K8sAPI):
|
|||||||
|
|
||||||
self.fast_retry_secs = int(os.environ.get("FAST_RETRY_SECS") or 0)
|
self.fast_retry_secs = int(os.environ.get("FAST_RETRY_SECS") or 0)
|
||||||
|
|
||||||
|
self.log_failed_crawl_lines = int(os.environ.get("LOG_FAILED_CRAWL_LINES") or 0)
|
||||||
|
|
||||||
with open(self.config_file, encoding="utf-8") as fh_config:
|
with open(self.config_file, encoding="utf-8") as fh_config:
|
||||||
self.shared_params = yaml.safe_load(fh_config)
|
self.shared_params = yaml.safe_load(fh_config)
|
||||||
|
|
||||||
@ -580,8 +582,10 @@ class BtrixOperator(K8sAPI):
|
|||||||
status.filesAdded = int(await redis.get("filesAdded") or 0)
|
status.filesAdded = int(await redis.get("filesAdded") or 0)
|
||||||
status.filesAddedSize = int(await redis.get("filesAddedSize") or 0)
|
status.filesAddedSize = int(await redis.get("filesAddedSize") or 0)
|
||||||
|
|
||||||
|
pod_names = list(pods.keys())
|
||||||
|
|
||||||
# update stats and get status
|
# update stats and get status
|
||||||
return await self.update_crawl_state(redis, crawl, status)
|
return await self.update_crawl_state(redis, crawl, status, pod_names)
|
||||||
|
|
||||||
# pylint: disable=broad-except
|
# pylint: disable=broad-except
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
@ -674,7 +678,7 @@ class BtrixOperator(K8sAPI):
|
|||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
async def update_crawl_state(self, redis, crawl, status):
|
async def update_crawl_state(self, redis, crawl, status, pod_names):
|
||||||
"""update crawl state and check if crawl is now done"""
|
"""update crawl state and check if crawl is now done"""
|
||||||
results = await redis.hvals(f"{crawl.id}:status")
|
results = await redis.hvals(f"{crawl.id}:status")
|
||||||
stats = await get_redis_crawl_stats(redis, crawl.id)
|
stats = await get_redis_crawl_stats(redis, crawl.id)
|
||||||
@ -716,16 +720,31 @@ class BtrixOperator(K8sAPI):
|
|||||||
|
|
||||||
# check if all crawlers failed
|
# check if all crawlers failed
|
||||||
elif status_count.get("failed", 0) >= crawl.scale:
|
elif status_count.get("failed", 0) >= crawl.scale:
|
||||||
|
prev_state = None
|
||||||
|
|
||||||
# if stopping, and no pages finished, mark as canceled
|
# if stopping, and no pages finished, mark as canceled
|
||||||
if status.stopping and not status.pagesDone:
|
if status.stopping and not status.pagesDone:
|
||||||
state = "canceled"
|
state = "canceled"
|
||||||
else:
|
else:
|
||||||
state = "failed"
|
state = "failed"
|
||||||
|
prev_state = status.state
|
||||||
|
|
||||||
status = await self.mark_finished(
|
status = await self.mark_finished(
|
||||||
redis, crawl.id, crawl.cid, status, state=state
|
redis, crawl.id, crawl.cid, status, state=state
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
self.log_failed_crawl_lines
|
||||||
|
and state == "failed"
|
||||||
|
and prev_state != "failed"
|
||||||
|
):
|
||||||
|
print("crawl failed: ", pod_names, stats)
|
||||||
|
asyncio.create_task(
|
||||||
|
self.print_pod_logs(
|
||||||
|
pod_names, "crawler", self.log_failed_crawl_lines
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# check for other statuses
|
# check for other statuses
|
||||||
else:
|
else:
|
||||||
new_status = None
|
new_status = None
|
||||||
|
@ -144,6 +144,7 @@ def test_stop_crawl_partial(
|
|||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
data = get_crawl(default_org_id, crawler_auth_headers, crawl_id)
|
data = get_crawl(default_org_id, crawler_auth_headers, crawl_id)
|
||||||
done = data.get("stats") and data.get("stats").get("done") > 0
|
done = data.get("stats") and data.get("stats").get("done") > 0
|
||||||
|
print("crawl stats", data)
|
||||||
|
|
||||||
r = requests.post(
|
r = requests.post(
|
||||||
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id}/stop",
|
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id}/stop",
|
||||||
|
@ -43,6 +43,7 @@ data:
|
|||||||
|
|
||||||
FAST_RETRY_SECS: "{{ .Values.operator_fast_resync_secs | default 3 }}"
|
FAST_RETRY_SECS: "{{ .Values.operator_fast_resync_secs | default 3 }}"
|
||||||
|
|
||||||
|
LOG_FAILED_CRAWL_LINES: "{{ .Values.log_failed_crawl_lines | default 0 }}"
|
||||||
|
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
|
@ -34,3 +34,5 @@ max_pages_per_crawl: 4
|
|||||||
|
|
||||||
registration_enabled: "0"
|
registration_enabled: "0"
|
||||||
|
|
||||||
|
# log failed crawl pods to operator backend
|
||||||
|
log_failed_crawl_lines: 200
|
||||||
|
@ -111,6 +111,11 @@ job_memory: "70Mi"
|
|||||||
|
|
||||||
profile_browser_idle_seconds: 60
|
profile_browser_idle_seconds: 60
|
||||||
|
|
||||||
|
# if set, print last 'log_failed_crawl_lines' of each failed
|
||||||
|
# crawl pod to backend operator stdout
|
||||||
|
# mostly intended for debugging / testing
|
||||||
|
# log_failed_crawl_lines: 200
|
||||||
|
|
||||||
|
|
||||||
# Nginx Image
|
# Nginx Image
|
||||||
# =========================================
|
# =========================================
|
||||||
|
Loading…
Reference in New Issue
Block a user