type fixes on util functions (#2009)
Some additional typing for util.py functions and resultant changes
This commit is contained in:
parent
12f994b864
commit
d9f49afcc5
@ -263,7 +263,7 @@ class BaseCrawlOps:
|
||||
async def update_crawl_state(self, crawl_id: str, state: str):
|
||||
"""called only when job container is being stopped/canceled"""
|
||||
|
||||
data = {"state": state}
|
||||
data: dict[str, Any] = {"state": state}
|
||||
# if cancelation, set the finish time here
|
||||
if state == "canceled":
|
||||
data["finished"] = dt_now()
|
||||
@ -462,7 +462,11 @@ class BaseCrawlOps:
|
||||
presigned_url = file_.presignedUrl
|
||||
now = dt_now()
|
||||
|
||||
if update_presigned_url or not presigned_url or now >= file_.expireAt:
|
||||
if (
|
||||
update_presigned_url
|
||||
or not presigned_url
|
||||
or (file_.expireAt and now >= file_.expireAt)
|
||||
):
|
||||
exp = now + delta
|
||||
presigned_url = await self.storage_ops.get_presigned_url(
|
||||
org, file_, self.presign_duration_seconds
|
||||
|
@ -342,7 +342,7 @@ class CrawlOps(BaseCrawlOps):
|
||||
crawl_id: str,
|
||||
crawlconfig: CrawlConfig,
|
||||
userid: UUID,
|
||||
started: str,
|
||||
started: datetime,
|
||||
manual: bool,
|
||||
username: str = "",
|
||||
) -> None:
|
||||
@ -582,7 +582,7 @@ class CrawlOps(BaseCrawlOps):
|
||||
crawl_id: str,
|
||||
is_qa: bool,
|
||||
exec_time: int,
|
||||
last_updated_time: str,
|
||||
last_updated_time: datetime,
|
||||
) -> bool:
|
||||
"""increment exec time"""
|
||||
# update both crawl-shared qa exec seconds and per-qa run exec seconds
|
||||
|
@ -43,7 +43,11 @@ class BgJobOperator(BaseOperator):
|
||||
|
||||
finalized = True
|
||||
|
||||
finished = from_k8s_date(completion_time) if completion_time else dt_now()
|
||||
finished = None
|
||||
if completion_time:
|
||||
finished = from_k8s_date(completion_time)
|
||||
if not finished:
|
||||
finished = dt_now()
|
||||
|
||||
try:
|
||||
await self.background_job_ops.job_finished(
|
||||
|
@ -713,7 +713,7 @@ class CrawlOperator(BaseOperator):
|
||||
if status.finished:
|
||||
ttl = spec.get("ttlSecondsAfterFinished", DEFAULT_TTL)
|
||||
finished = from_k8s_date(status.finished)
|
||||
if (dt_now() - finished).total_seconds() > ttl >= 0:
|
||||
if finished and (dt_now() - finished).total_seconds() > ttl >= 0:
|
||||
print("CrawlJob expired, deleting: " + crawl.id)
|
||||
finalized = True
|
||||
else:
|
||||
@ -789,11 +789,9 @@ class CrawlOperator(BaseOperator):
|
||||
# but not right away in case crawler pod is just restarting.
|
||||
# avoids keeping redis pods around while no crawler pods are up
|
||||
# (eg. due to resource constraints)
|
||||
if status.lastActiveTime and (
|
||||
(
|
||||
dt_now() - from_k8s_date(status.lastActiveTime)
|
||||
).total_seconds()
|
||||
> REDIS_TTL
|
||||
last_active_time = from_k8s_date(status.lastActiveTime)
|
||||
if last_active_time and (
|
||||
(dt_now() - last_active_time).total_seconds() > REDIS_TTL
|
||||
):
|
||||
print(
|
||||
f"Pausing redis, no running crawler pods for >{REDIS_TTL} secs"
|
||||
@ -1233,10 +1231,9 @@ class CrawlOperator(BaseOperator):
|
||||
# check timeout if timeout time exceeds elapsed time
|
||||
if crawl.timeout:
|
||||
elapsed = status.elapsedCrawlTime
|
||||
if status.lastUpdatedTime:
|
||||
elapsed += (
|
||||
dt_now() - from_k8s_date(status.lastUpdatedTime)
|
||||
).total_seconds()
|
||||
last_updated_time = from_k8s_date(status.lastUpdatedTime)
|
||||
if last_updated_time:
|
||||
elapsed += int((dt_now() - last_updated_time).total_seconds())
|
||||
|
||||
if elapsed > crawl.timeout:
|
||||
return "time-limit"
|
||||
|
@ -1,9 +1,6 @@
|
||||
""" Operator handler for ProfileJobs """
|
||||
|
||||
from btrixcloud.utils import (
|
||||
from_k8s_date,
|
||||
dt_now,
|
||||
)
|
||||
from btrixcloud.utils import from_k8s_date, dt_now
|
||||
|
||||
from btrixcloud.models import StorageRef
|
||||
|
||||
@ -29,7 +26,7 @@ class ProfileOperator(BaseOperator):
|
||||
expire_time = from_k8s_date(spec.get("expireTime"))
|
||||
browserid = spec.get("id")
|
||||
|
||||
if dt_now() >= expire_time:
|
||||
if expire_time and dt_now() >= expire_time:
|
||||
self.run_task(self.k8s.delete_profile_browser(browserid))
|
||||
return {"status": {}, "children": []}
|
||||
|
||||
|
@ -38,7 +38,7 @@ class JSONSerializer(json.JSONEncoder):
|
||||
return super().default(o)
|
||||
|
||||
|
||||
def get_templates_dir():
|
||||
def get_templates_dir() -> str:
|
||||
"""return directory containing templates for loading"""
|
||||
return os.path.join(os.path.dirname(__file__), "templates")
|
||||
|
||||
@ -53,17 +53,17 @@ def to_k8s_date(dt_val: datetime) -> str:
|
||||
return dt_val.isoformat("T") + "Z"
|
||||
|
||||
|
||||
def dt_now():
|
||||
def dt_now() -> datetime:
|
||||
"""get current ts"""
|
||||
return datetime.now(timezone.utc).replace(microsecond=0, tzinfo=None)
|
||||
|
||||
|
||||
def ts_now():
|
||||
def ts_now() -> str:
|
||||
"""get current ts"""
|
||||
return str(dt_now())
|
||||
|
||||
|
||||
def run_once_lock(name):
|
||||
def run_once_lock(name) -> bool:
|
||||
"""run once lock via temp directory
|
||||
- if dir doesn't exist, return true
|
||||
- if exists, return false"""
|
||||
@ -83,7 +83,7 @@ def run_once_lock(name):
|
||||
return True
|
||||
|
||||
|
||||
def register_exit_handler():
|
||||
def register_exit_handler() -> None:
|
||||
"""register exit handler to exit on SIGTERM"""
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
@ -95,7 +95,7 @@ def register_exit_handler():
|
||||
loop.add_signal_handler(signal.SIGTERM, exit_handler)
|
||||
|
||||
|
||||
def parse_jsonl_error_messages(errors):
|
||||
def parse_jsonl_error_messages(errors: list[str]) -> list[dict]:
|
||||
"""parse json-l error strings from redis/db into json"""
|
||||
parsed_errors = []
|
||||
for error_line in errors:
|
||||
@ -153,7 +153,9 @@ def validate_slug(slug: str) -> None:
|
||||
raise HTTPException(status_code=400, detail="invalid_slug")
|
||||
|
||||
|
||||
def stream_dict_list_as_csv(data: List[Dict[str, Union[str, int]]], filename: str):
|
||||
def stream_dict_list_as_csv(
|
||||
data: List[Dict[str, Union[str, int]]], filename: str
|
||||
) -> StreamingResponse:
|
||||
"""Stream list of dictionaries as CSV with attachment filename header"""
|
||||
if not data:
|
||||
raise HTTPException(status_code=404, detail="crawls_not_found")
|
||||
|
Loading…
Reference in New Issue
Block a user