type fixes on util functions (#2009)
Some additional typing for util.py functions and resultant changes
This commit is contained in:
		
							parent
							
								
									12f994b864
								
							
						
					
					
						commit
						d9f49afcc5
					
				| @ -263,7 +263,7 @@ class BaseCrawlOps: | ||||
|     async def update_crawl_state(self, crawl_id: str, state: str): | ||||
|         """called only when job container is being stopped/canceled""" | ||||
| 
 | ||||
|         data = {"state": state} | ||||
|         data: dict[str, Any] = {"state": state} | ||||
|         # if cancelation, set the finish time here | ||||
|         if state == "canceled": | ||||
|             data["finished"] = dt_now() | ||||
| @ -462,7 +462,11 @@ class BaseCrawlOps: | ||||
|             presigned_url = file_.presignedUrl | ||||
|             now = dt_now() | ||||
| 
 | ||||
|             if update_presigned_url or not presigned_url or now >= file_.expireAt: | ||||
|             if ( | ||||
|                 update_presigned_url | ||||
|                 or not presigned_url | ||||
|                 or (file_.expireAt and now >= file_.expireAt) | ||||
|             ): | ||||
|                 exp = now + delta | ||||
|                 presigned_url = await self.storage_ops.get_presigned_url( | ||||
|                     org, file_, self.presign_duration_seconds | ||||
|  | ||||
| @ -342,7 +342,7 @@ class CrawlOps(BaseCrawlOps): | ||||
|         crawl_id: str, | ||||
|         crawlconfig: CrawlConfig, | ||||
|         userid: UUID, | ||||
|         started: str, | ||||
|         started: datetime, | ||||
|         manual: bool, | ||||
|         username: str = "", | ||||
|     ) -> None: | ||||
| @ -582,7 +582,7 @@ class CrawlOps(BaseCrawlOps): | ||||
|         crawl_id: str, | ||||
|         is_qa: bool, | ||||
|         exec_time: int, | ||||
|         last_updated_time: str, | ||||
|         last_updated_time: datetime, | ||||
|     ) -> bool: | ||||
|         """increment exec time""" | ||||
|         # update both crawl-shared qa exec seconds and per-qa run exec seconds | ||||
|  | ||||
| @ -43,7 +43,11 @@ class BgJobOperator(BaseOperator): | ||||
| 
 | ||||
|         finalized = True | ||||
| 
 | ||||
|         finished = from_k8s_date(completion_time) if completion_time else dt_now() | ||||
|         finished = None | ||||
|         if completion_time: | ||||
|             finished = from_k8s_date(completion_time) | ||||
|         if not finished: | ||||
|             finished = dt_now() | ||||
| 
 | ||||
|         try: | ||||
|             await self.background_job_ops.job_finished( | ||||
|  | ||||
| @ -713,7 +713,7 @@ class CrawlOperator(BaseOperator): | ||||
|             if status.finished: | ||||
|                 ttl = spec.get("ttlSecondsAfterFinished", DEFAULT_TTL) | ||||
|                 finished = from_k8s_date(status.finished) | ||||
|                 if (dt_now() - finished).total_seconds() > ttl >= 0: | ||||
|                 if finished and (dt_now() - finished).total_seconds() > ttl >= 0: | ||||
|                     print("CrawlJob expired, deleting: " + crawl.id) | ||||
|                     finalized = True | ||||
|             else: | ||||
| @ -789,11 +789,9 @@ class CrawlOperator(BaseOperator): | ||||
|                     # but not right away in case crawler pod is just restarting. | ||||
|                     # avoids keeping redis pods around while no crawler pods are up | ||||
|                     # (eg. due to resource constraints) | ||||
|                     if status.lastActiveTime and ( | ||||
|                         ( | ||||
|                             dt_now() - from_k8s_date(status.lastActiveTime) | ||||
|                         ).total_seconds() | ||||
|                         > REDIS_TTL | ||||
|                     last_active_time = from_k8s_date(status.lastActiveTime) | ||||
|                     if last_active_time and ( | ||||
|                         (dt_now() - last_active_time).total_seconds() > REDIS_TTL | ||||
|                     ): | ||||
|                         print( | ||||
|                             f"Pausing redis, no running crawler pods for >{REDIS_TTL} secs" | ||||
| @ -1233,10 +1231,9 @@ class CrawlOperator(BaseOperator): | ||||
|         # check timeout if timeout time exceeds elapsed time | ||||
|         if crawl.timeout: | ||||
|             elapsed = status.elapsedCrawlTime | ||||
|             if status.lastUpdatedTime: | ||||
|                 elapsed += ( | ||||
|                     dt_now() - from_k8s_date(status.lastUpdatedTime) | ||||
|                 ).total_seconds() | ||||
|             last_updated_time = from_k8s_date(status.lastUpdatedTime) | ||||
|             if last_updated_time: | ||||
|                 elapsed += int((dt_now() - last_updated_time).total_seconds()) | ||||
| 
 | ||||
|             if elapsed > crawl.timeout: | ||||
|                 return "time-limit" | ||||
|  | ||||
| @ -1,9 +1,6 @@ | ||||
| """ Operator handler for ProfileJobs """ | ||||
| 
 | ||||
| from btrixcloud.utils import ( | ||||
|     from_k8s_date, | ||||
|     dt_now, | ||||
| ) | ||||
| from btrixcloud.utils import from_k8s_date, dt_now | ||||
| 
 | ||||
| from btrixcloud.models import StorageRef | ||||
| 
 | ||||
| @ -29,7 +26,7 @@ class ProfileOperator(BaseOperator): | ||||
|         expire_time = from_k8s_date(spec.get("expireTime")) | ||||
|         browserid = spec.get("id") | ||||
| 
 | ||||
|         if dt_now() >= expire_time: | ||||
|         if expire_time and dt_now() >= expire_time: | ||||
|             self.run_task(self.k8s.delete_profile_browser(browserid)) | ||||
|             return {"status": {}, "children": []} | ||||
| 
 | ||||
|  | ||||
| @ -38,7 +38,7 @@ class JSONSerializer(json.JSONEncoder): | ||||
|         return super().default(o) | ||||
| 
 | ||||
| 
 | ||||
| def get_templates_dir(): | ||||
| def get_templates_dir() -> str: | ||||
|     """return directory containing templates for loading""" | ||||
|     return os.path.join(os.path.dirname(__file__), "templates") | ||||
| 
 | ||||
| @ -53,17 +53,17 @@ def to_k8s_date(dt_val: datetime) -> str: | ||||
|     return dt_val.isoformat("T") + "Z" | ||||
| 
 | ||||
| 
 | ||||
| def dt_now(): | ||||
| def dt_now() -> datetime: | ||||
|     """get current ts""" | ||||
|     return datetime.now(timezone.utc).replace(microsecond=0, tzinfo=None) | ||||
| 
 | ||||
| 
 | ||||
| def ts_now(): | ||||
| def ts_now() -> str: | ||||
|     """get current ts""" | ||||
|     return str(dt_now()) | ||||
| 
 | ||||
| 
 | ||||
| def run_once_lock(name): | ||||
| def run_once_lock(name) -> bool: | ||||
|     """run once lock via temp directory | ||||
|     - if dir doesn't exist, return true | ||||
|     - if exists, return false""" | ||||
| @ -83,7 +83,7 @@ def run_once_lock(name): | ||||
|     return True | ||||
| 
 | ||||
| 
 | ||||
| def register_exit_handler(): | ||||
| def register_exit_handler() -> None: | ||||
|     """register exit handler to exit on SIGTERM""" | ||||
|     loop = asyncio.get_running_loop() | ||||
| 
 | ||||
| @ -95,7 +95,7 @@ def register_exit_handler(): | ||||
|     loop.add_signal_handler(signal.SIGTERM, exit_handler) | ||||
| 
 | ||||
| 
 | ||||
| def parse_jsonl_error_messages(errors): | ||||
| def parse_jsonl_error_messages(errors: list[str]) -> list[dict]: | ||||
|     """parse json-l error strings from redis/db into json""" | ||||
|     parsed_errors = [] | ||||
|     for error_line in errors: | ||||
| @ -153,7 +153,9 @@ def validate_slug(slug: str) -> None: | ||||
|         raise HTTPException(status_code=400, detail="invalid_slug") | ||||
| 
 | ||||
| 
 | ||||
| def stream_dict_list_as_csv(data: List[Dict[str, Union[str, int]]], filename: str): | ||||
| def stream_dict_list_as_csv( | ||||
|     data: List[Dict[str, Union[str, int]]], filename: str | ||||
| ) -> StreamingResponse: | ||||
|     """Stream list of dictionaries as CSV with attachment filename header""" | ||||
|     if not data: | ||||
|         raise HTTPException(status_code=404, detail="crawls_not_found") | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user