type fixes on util functions (#2009)

Some additional typing for util.py functions and resultant changes
This commit is contained in:
Ilya Kreymer 2024-08-12 10:54:45 -07:00 committed by GitHub
parent 12f994b864
commit d9f49afcc5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 31 additions and 27 deletions

View File

@ -263,7 +263,7 @@ class BaseCrawlOps:
async def update_crawl_state(self, crawl_id: str, state: str):
"""called only when job container is being stopped/canceled"""
data = {"state": state}
data: dict[str, Any] = {"state": state}
# if cancelation, set the finish time here
if state == "canceled":
data["finished"] = dt_now()
@ -462,7 +462,11 @@ class BaseCrawlOps:
presigned_url = file_.presignedUrl
now = dt_now()
if update_presigned_url or not presigned_url or now >= file_.expireAt:
if (
update_presigned_url
or not presigned_url
or (file_.expireAt and now >= file_.expireAt)
):
exp = now + delta
presigned_url = await self.storage_ops.get_presigned_url(
org, file_, self.presign_duration_seconds

View File

@ -342,7 +342,7 @@ class CrawlOps(BaseCrawlOps):
crawl_id: str,
crawlconfig: CrawlConfig,
userid: UUID,
started: str,
started: datetime,
manual: bool,
username: str = "",
) -> None:
@ -582,7 +582,7 @@ class CrawlOps(BaseCrawlOps):
crawl_id: str,
is_qa: bool,
exec_time: int,
last_updated_time: str,
last_updated_time: datetime,
) -> bool:
"""increment exec time"""
# update both crawl-shared qa exec seconds and per-qa run exec seconds

View File

@ -43,7 +43,11 @@ class BgJobOperator(BaseOperator):
finalized = True
finished = from_k8s_date(completion_time) if completion_time else dt_now()
finished = None
if completion_time:
finished = from_k8s_date(completion_time)
if not finished:
finished = dt_now()
try:
await self.background_job_ops.job_finished(

View File

@ -713,7 +713,7 @@ class CrawlOperator(BaseOperator):
if status.finished:
ttl = spec.get("ttlSecondsAfterFinished", DEFAULT_TTL)
finished = from_k8s_date(status.finished)
if (dt_now() - finished).total_seconds() > ttl >= 0:
if finished and (dt_now() - finished).total_seconds() > ttl >= 0:
print("CrawlJob expired, deleting: " + crawl.id)
finalized = True
else:
@ -789,11 +789,9 @@ class CrawlOperator(BaseOperator):
# but not right away in case crawler pod is just restarting.
# avoids keeping redis pods around while no crawler pods are up
# (eg. due to resource constraints)
if status.lastActiveTime and (
(
dt_now() - from_k8s_date(status.lastActiveTime)
).total_seconds()
> REDIS_TTL
last_active_time = from_k8s_date(status.lastActiveTime)
if last_active_time and (
(dt_now() - last_active_time).total_seconds() > REDIS_TTL
):
print(
f"Pausing redis, no running crawler pods for >{REDIS_TTL} secs"
@ -1233,10 +1231,9 @@ class CrawlOperator(BaseOperator):
# check timeout if timeout time exceeds elapsed time
if crawl.timeout:
elapsed = status.elapsedCrawlTime
if status.lastUpdatedTime:
elapsed += (
dt_now() - from_k8s_date(status.lastUpdatedTime)
).total_seconds()
last_updated_time = from_k8s_date(status.lastUpdatedTime)
if last_updated_time:
elapsed += int((dt_now() - last_updated_time).total_seconds())
if elapsed > crawl.timeout:
return "time-limit"

View File

@ -1,9 +1,6 @@
""" Operator handler for ProfileJobs """
from btrixcloud.utils import (
from_k8s_date,
dt_now,
)
from btrixcloud.utils import from_k8s_date, dt_now
from btrixcloud.models import StorageRef
@ -29,7 +26,7 @@ class ProfileOperator(BaseOperator):
expire_time = from_k8s_date(spec.get("expireTime"))
browserid = spec.get("id")
if dt_now() >= expire_time:
if expire_time and dt_now() >= expire_time:
self.run_task(self.k8s.delete_profile_browser(browserid))
return {"status": {}, "children": []}

View File

@ -38,7 +38,7 @@ class JSONSerializer(json.JSONEncoder):
return super().default(o)
def get_templates_dir():
def get_templates_dir() -> str:
"""return directory containing templates for loading"""
return os.path.join(os.path.dirname(__file__), "templates")
@ -53,17 +53,17 @@ def to_k8s_date(dt_val: datetime) -> str:
return dt_val.isoformat("T") + "Z"
def dt_now():
def dt_now() -> datetime:
"""get current ts"""
return datetime.now(timezone.utc).replace(microsecond=0, tzinfo=None)
def ts_now():
def ts_now() -> str:
"""get current ts"""
return str(dt_now())
def run_once_lock(name):
def run_once_lock(name) -> bool:
"""run once lock via temp directory
- if dir doesn't exist, return true
- if exists, return false"""
@ -83,7 +83,7 @@ def run_once_lock(name):
return True
def register_exit_handler():
def register_exit_handler() -> None:
"""register exit handler to exit on SIGTERM"""
loop = asyncio.get_running_loop()
@ -95,7 +95,7 @@ def register_exit_handler():
loop.add_signal_handler(signal.SIGTERM, exit_handler)
def parse_jsonl_error_messages(errors):
def parse_jsonl_error_messages(errors: list[str]) -> list[dict]:
"""parse json-l error strings from redis/db into json"""
parsed_errors = []
for error_line in errors:
@ -153,7 +153,9 @@ def validate_slug(slug: str) -> None:
raise HTTPException(status_code=400, detail="invalid_slug")
def stream_dict_list_as_csv(data: List[Dict[str, Union[str, int]]], filename: str):
def stream_dict_list_as_csv(
data: List[Dict[str, Union[str, int]]], filename: str
) -> StreamingResponse:
"""Stream list of dictionaries as CSV with attachment filename header"""
if not data:
raise HTTPException(status_code=404, detail="crawls_not_found")