type fixes on util functions (#2009)

Some additional typing for util.py functions and resultant changes
This commit is contained in:
Ilya Kreymer 2024-08-12 10:54:45 -07:00 committed by GitHub
parent 12f994b864
commit d9f49afcc5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 31 additions and 27 deletions

View File

@ -263,7 +263,7 @@ class BaseCrawlOps:
async def update_crawl_state(self, crawl_id: str, state: str): async def update_crawl_state(self, crawl_id: str, state: str):
"""called only when job container is being stopped/canceled""" """called only when job container is being stopped/canceled"""
data = {"state": state} data: dict[str, Any] = {"state": state}
# if cancelation, set the finish time here # if cancelation, set the finish time here
if state == "canceled": if state == "canceled":
data["finished"] = dt_now() data["finished"] = dt_now()
@ -462,7 +462,11 @@ class BaseCrawlOps:
presigned_url = file_.presignedUrl presigned_url = file_.presignedUrl
now = dt_now() now = dt_now()
if update_presigned_url or not presigned_url or now >= file_.expireAt: if (
update_presigned_url
or not presigned_url
or (file_.expireAt and now >= file_.expireAt)
):
exp = now + delta exp = now + delta
presigned_url = await self.storage_ops.get_presigned_url( presigned_url = await self.storage_ops.get_presigned_url(
org, file_, self.presign_duration_seconds org, file_, self.presign_duration_seconds

View File

@ -342,7 +342,7 @@ class CrawlOps(BaseCrawlOps):
crawl_id: str, crawl_id: str,
crawlconfig: CrawlConfig, crawlconfig: CrawlConfig,
userid: UUID, userid: UUID,
started: str, started: datetime,
manual: bool, manual: bool,
username: str = "", username: str = "",
) -> None: ) -> None:
@ -582,7 +582,7 @@ class CrawlOps(BaseCrawlOps):
crawl_id: str, crawl_id: str,
is_qa: bool, is_qa: bool,
exec_time: int, exec_time: int,
last_updated_time: str, last_updated_time: datetime,
) -> bool: ) -> bool:
"""increment exec time""" """increment exec time"""
# update both crawl-shared qa exec seconds and per-qa run exec seconds # update both crawl-shared qa exec seconds and per-qa run exec seconds

View File

@ -43,7 +43,11 @@ class BgJobOperator(BaseOperator):
finalized = True finalized = True
finished = from_k8s_date(completion_time) if completion_time else dt_now() finished = None
if completion_time:
finished = from_k8s_date(completion_time)
if not finished:
finished = dt_now()
try: try:
await self.background_job_ops.job_finished( await self.background_job_ops.job_finished(

View File

@ -713,7 +713,7 @@ class CrawlOperator(BaseOperator):
if status.finished: if status.finished:
ttl = spec.get("ttlSecondsAfterFinished", DEFAULT_TTL) ttl = spec.get("ttlSecondsAfterFinished", DEFAULT_TTL)
finished = from_k8s_date(status.finished) finished = from_k8s_date(status.finished)
if (dt_now() - finished).total_seconds() > ttl >= 0: if finished and (dt_now() - finished).total_seconds() > ttl >= 0:
print("CrawlJob expired, deleting: " + crawl.id) print("CrawlJob expired, deleting: " + crawl.id)
finalized = True finalized = True
else: else:
@ -789,11 +789,9 @@ class CrawlOperator(BaseOperator):
# but not right away in case crawler pod is just restarting. # but not right away in case crawler pod is just restarting.
# avoids keeping redis pods around while no crawler pods are up # avoids keeping redis pods around while no crawler pods are up
# (eg. due to resource constraints) # (eg. due to resource constraints)
if status.lastActiveTime and ( last_active_time = from_k8s_date(status.lastActiveTime)
( if last_active_time and (
dt_now() - from_k8s_date(status.lastActiveTime) (dt_now() - last_active_time).total_seconds() > REDIS_TTL
).total_seconds()
> REDIS_TTL
): ):
print( print(
f"Pausing redis, no running crawler pods for >{REDIS_TTL} secs" f"Pausing redis, no running crawler pods for >{REDIS_TTL} secs"
@ -1233,10 +1231,9 @@ class CrawlOperator(BaseOperator):
# check timeout if timeout time exceeds elapsed time # check timeout if timeout time exceeds elapsed time
if crawl.timeout: if crawl.timeout:
elapsed = status.elapsedCrawlTime elapsed = status.elapsedCrawlTime
if status.lastUpdatedTime: last_updated_time = from_k8s_date(status.lastUpdatedTime)
elapsed += ( if last_updated_time:
dt_now() - from_k8s_date(status.lastUpdatedTime) elapsed += int((dt_now() - last_updated_time).total_seconds())
).total_seconds()
if elapsed > crawl.timeout: if elapsed > crawl.timeout:
return "time-limit" return "time-limit"

View File

@ -1,9 +1,6 @@
""" Operator handler for ProfileJobs """ """ Operator handler for ProfileJobs """
from btrixcloud.utils import ( from btrixcloud.utils import from_k8s_date, dt_now
from_k8s_date,
dt_now,
)
from btrixcloud.models import StorageRef from btrixcloud.models import StorageRef
@ -29,7 +26,7 @@ class ProfileOperator(BaseOperator):
expire_time = from_k8s_date(spec.get("expireTime")) expire_time = from_k8s_date(spec.get("expireTime"))
browserid = spec.get("id") browserid = spec.get("id")
if dt_now() >= expire_time: if expire_time and dt_now() >= expire_time:
self.run_task(self.k8s.delete_profile_browser(browserid)) self.run_task(self.k8s.delete_profile_browser(browserid))
return {"status": {}, "children": []} return {"status": {}, "children": []}

View File

@ -38,7 +38,7 @@ class JSONSerializer(json.JSONEncoder):
return super().default(o) return super().default(o)
def get_templates_dir(): def get_templates_dir() -> str:
"""return directory containing templates for loading""" """return directory containing templates for loading"""
return os.path.join(os.path.dirname(__file__), "templates") return os.path.join(os.path.dirname(__file__), "templates")
@ -53,17 +53,17 @@ def to_k8s_date(dt_val: datetime) -> str:
return dt_val.isoformat("T") + "Z" return dt_val.isoformat("T") + "Z"
def dt_now(): def dt_now() -> datetime:
"""get current ts""" """get current ts"""
return datetime.now(timezone.utc).replace(microsecond=0, tzinfo=None) return datetime.now(timezone.utc).replace(microsecond=0, tzinfo=None)
def ts_now(): def ts_now() -> str:
"""get current ts""" """get current ts"""
return str(dt_now()) return str(dt_now())
def run_once_lock(name): def run_once_lock(name) -> bool:
"""run once lock via temp directory """run once lock via temp directory
- if dir doesn't exist, return true - if dir doesn't exist, return true
- if exists, return false""" - if exists, return false"""
@ -83,7 +83,7 @@ def run_once_lock(name):
return True return True
def register_exit_handler(): def register_exit_handler() -> None:
"""register exit handler to exit on SIGTERM""" """register exit handler to exit on SIGTERM"""
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()
@ -95,7 +95,7 @@ def register_exit_handler():
loop.add_signal_handler(signal.SIGTERM, exit_handler) loop.add_signal_handler(signal.SIGTERM, exit_handler)
def parse_jsonl_error_messages(errors): def parse_jsonl_error_messages(errors: list[str]) -> list[dict]:
"""parse json-l error strings from redis/db into json""" """parse json-l error strings from redis/db into json"""
parsed_errors = [] parsed_errors = []
for error_line in errors: for error_line in errors:
@ -153,7 +153,9 @@ def validate_slug(slug: str) -> None:
raise HTTPException(status_code=400, detail="invalid_slug") raise HTTPException(status_code=400, detail="invalid_slug")
def stream_dict_list_as_csv(data: List[Dict[str, Union[str, int]]], filename: str): def stream_dict_list_as_csv(
data: List[Dict[str, Union[str, int]]], filename: str
) -> StreamingResponse:
"""Stream list of dictionaries as CSV with attachment filename header""" """Stream list of dictionaries as CSV with attachment filename header"""
if not data: if not data:
raise HTTPException(status_code=404, detail="crawls_not_found") raise HTTPException(status_code=404, detail="crawls_not_found")