type fixes on util functions (#2009)
Some additional typing for util.py functions and resultant changes
This commit is contained in:
		
							parent
							
								
									12f994b864
								
							
						
					
					
						commit
						d9f49afcc5
					
				| @ -263,7 +263,7 @@ class BaseCrawlOps: | |||||||
|     async def update_crawl_state(self, crawl_id: str, state: str): |     async def update_crawl_state(self, crawl_id: str, state: str): | ||||||
|         """called only when job container is being stopped/canceled""" |         """called only when job container is being stopped/canceled""" | ||||||
| 
 | 
 | ||||||
|         data = {"state": state} |         data: dict[str, Any] = {"state": state} | ||||||
|         # if cancelation, set the finish time here |         # if cancelation, set the finish time here | ||||||
|         if state == "canceled": |         if state == "canceled": | ||||||
|             data["finished"] = dt_now() |             data["finished"] = dt_now() | ||||||
| @ -462,7 +462,11 @@ class BaseCrawlOps: | |||||||
|             presigned_url = file_.presignedUrl |             presigned_url = file_.presignedUrl | ||||||
|             now = dt_now() |             now = dt_now() | ||||||
| 
 | 
 | ||||||
|             if update_presigned_url or not presigned_url or now >= file_.expireAt: |             if ( | ||||||
|  |                 update_presigned_url | ||||||
|  |                 or not presigned_url | ||||||
|  |                 or (file_.expireAt and now >= file_.expireAt) | ||||||
|  |             ): | ||||||
|                 exp = now + delta |                 exp = now + delta | ||||||
|                 presigned_url = await self.storage_ops.get_presigned_url( |                 presigned_url = await self.storage_ops.get_presigned_url( | ||||||
|                     org, file_, self.presign_duration_seconds |                     org, file_, self.presign_duration_seconds | ||||||
|  | |||||||
| @ -342,7 +342,7 @@ class CrawlOps(BaseCrawlOps): | |||||||
|         crawl_id: str, |         crawl_id: str, | ||||||
|         crawlconfig: CrawlConfig, |         crawlconfig: CrawlConfig, | ||||||
|         userid: UUID, |         userid: UUID, | ||||||
|         started: str, |         started: datetime, | ||||||
|         manual: bool, |         manual: bool, | ||||||
|         username: str = "", |         username: str = "", | ||||||
|     ) -> None: |     ) -> None: | ||||||
| @ -582,7 +582,7 @@ class CrawlOps(BaseCrawlOps): | |||||||
|         crawl_id: str, |         crawl_id: str, | ||||||
|         is_qa: bool, |         is_qa: bool, | ||||||
|         exec_time: int, |         exec_time: int, | ||||||
|         last_updated_time: str, |         last_updated_time: datetime, | ||||||
|     ) -> bool: |     ) -> bool: | ||||||
|         """increment exec time""" |         """increment exec time""" | ||||||
|         # update both crawl-shared qa exec seconds and per-qa run exec seconds |         # update both crawl-shared qa exec seconds and per-qa run exec seconds | ||||||
|  | |||||||
| @ -43,7 +43,11 @@ class BgJobOperator(BaseOperator): | |||||||
| 
 | 
 | ||||||
|         finalized = True |         finalized = True | ||||||
| 
 | 
 | ||||||
|         finished = from_k8s_date(completion_time) if completion_time else dt_now() |         finished = None | ||||||
|  |         if completion_time: | ||||||
|  |             finished = from_k8s_date(completion_time) | ||||||
|  |         if not finished: | ||||||
|  |             finished = dt_now() | ||||||
| 
 | 
 | ||||||
|         try: |         try: | ||||||
|             await self.background_job_ops.job_finished( |             await self.background_job_ops.job_finished( | ||||||
|  | |||||||
| @ -713,7 +713,7 @@ class CrawlOperator(BaseOperator): | |||||||
|             if status.finished: |             if status.finished: | ||||||
|                 ttl = spec.get("ttlSecondsAfterFinished", DEFAULT_TTL) |                 ttl = spec.get("ttlSecondsAfterFinished", DEFAULT_TTL) | ||||||
|                 finished = from_k8s_date(status.finished) |                 finished = from_k8s_date(status.finished) | ||||||
|                 if (dt_now() - finished).total_seconds() > ttl >= 0: |                 if finished and (dt_now() - finished).total_seconds() > ttl >= 0: | ||||||
|                     print("CrawlJob expired, deleting: " + crawl.id) |                     print("CrawlJob expired, deleting: " + crawl.id) | ||||||
|                     finalized = True |                     finalized = True | ||||||
|             else: |             else: | ||||||
| @ -789,11 +789,9 @@ class CrawlOperator(BaseOperator): | |||||||
|                     # but not right away in case crawler pod is just restarting. |                     # but not right away in case crawler pod is just restarting. | ||||||
|                     # avoids keeping redis pods around while no crawler pods are up |                     # avoids keeping redis pods around while no crawler pods are up | ||||||
|                     # (eg. due to resource constraints) |                     # (eg. due to resource constraints) | ||||||
|                     if status.lastActiveTime and ( |                     last_active_time = from_k8s_date(status.lastActiveTime) | ||||||
|                         ( |                     if last_active_time and ( | ||||||
|                             dt_now() - from_k8s_date(status.lastActiveTime) |                         (dt_now() - last_active_time).total_seconds() > REDIS_TTL | ||||||
|                         ).total_seconds() |  | ||||||
|                         > REDIS_TTL |  | ||||||
|                     ): |                     ): | ||||||
|                         print( |                         print( | ||||||
|                             f"Pausing redis, no running crawler pods for >{REDIS_TTL} secs" |                             f"Pausing redis, no running crawler pods for >{REDIS_TTL} secs" | ||||||
| @ -1233,10 +1231,9 @@ class CrawlOperator(BaseOperator): | |||||||
|         # check timeout if timeout time exceeds elapsed time |         # check timeout if timeout time exceeds elapsed time | ||||||
|         if crawl.timeout: |         if crawl.timeout: | ||||||
|             elapsed = status.elapsedCrawlTime |             elapsed = status.elapsedCrawlTime | ||||||
|             if status.lastUpdatedTime: |             last_updated_time = from_k8s_date(status.lastUpdatedTime) | ||||||
|                 elapsed += ( |             if last_updated_time: | ||||||
|                     dt_now() - from_k8s_date(status.lastUpdatedTime) |                 elapsed += int((dt_now() - last_updated_time).total_seconds()) | ||||||
|                 ).total_seconds() |  | ||||||
| 
 | 
 | ||||||
|             if elapsed > crawl.timeout: |             if elapsed > crawl.timeout: | ||||||
|                 return "time-limit" |                 return "time-limit" | ||||||
|  | |||||||
| @ -1,9 +1,6 @@ | |||||||
| """ Operator handler for ProfileJobs """ | """ Operator handler for ProfileJobs """ | ||||||
| 
 | 
 | ||||||
| from btrixcloud.utils import ( | from btrixcloud.utils import from_k8s_date, dt_now | ||||||
|     from_k8s_date, |  | ||||||
|     dt_now, |  | ||||||
| ) |  | ||||||
| 
 | 
 | ||||||
| from btrixcloud.models import StorageRef | from btrixcloud.models import StorageRef | ||||||
| 
 | 
 | ||||||
| @ -29,7 +26,7 @@ class ProfileOperator(BaseOperator): | |||||||
|         expire_time = from_k8s_date(spec.get("expireTime")) |         expire_time = from_k8s_date(spec.get("expireTime")) | ||||||
|         browserid = spec.get("id") |         browserid = spec.get("id") | ||||||
| 
 | 
 | ||||||
|         if dt_now() >= expire_time: |         if expire_time and dt_now() >= expire_time: | ||||||
|             self.run_task(self.k8s.delete_profile_browser(browserid)) |             self.run_task(self.k8s.delete_profile_browser(browserid)) | ||||||
|             return {"status": {}, "children": []} |             return {"status": {}, "children": []} | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -38,7 +38,7 @@ class JSONSerializer(json.JSONEncoder): | |||||||
|         return super().default(o) |         return super().default(o) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def get_templates_dir(): | def get_templates_dir() -> str: | ||||||
|     """return directory containing templates for loading""" |     """return directory containing templates for loading""" | ||||||
|     return os.path.join(os.path.dirname(__file__), "templates") |     return os.path.join(os.path.dirname(__file__), "templates") | ||||||
| 
 | 
 | ||||||
| @ -53,17 +53,17 @@ def to_k8s_date(dt_val: datetime) -> str: | |||||||
|     return dt_val.isoformat("T") + "Z" |     return dt_val.isoformat("T") + "Z" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def dt_now(): | def dt_now() -> datetime: | ||||||
|     """get current ts""" |     """get current ts""" | ||||||
|     return datetime.now(timezone.utc).replace(microsecond=0, tzinfo=None) |     return datetime.now(timezone.utc).replace(microsecond=0, tzinfo=None) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def ts_now(): | def ts_now() -> str: | ||||||
|     """get current ts""" |     """get current ts""" | ||||||
|     return str(dt_now()) |     return str(dt_now()) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def run_once_lock(name): | def run_once_lock(name) -> bool: | ||||||
|     """run once lock via temp directory |     """run once lock via temp directory | ||||||
|     - if dir doesn't exist, return true |     - if dir doesn't exist, return true | ||||||
|     - if exists, return false""" |     - if exists, return false""" | ||||||
| @ -83,7 +83,7 @@ def run_once_lock(name): | |||||||
|     return True |     return True | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def register_exit_handler(): | def register_exit_handler() -> None: | ||||||
|     """register exit handler to exit on SIGTERM""" |     """register exit handler to exit on SIGTERM""" | ||||||
|     loop = asyncio.get_running_loop() |     loop = asyncio.get_running_loop() | ||||||
| 
 | 
 | ||||||
| @ -95,7 +95,7 @@ def register_exit_handler(): | |||||||
|     loop.add_signal_handler(signal.SIGTERM, exit_handler) |     loop.add_signal_handler(signal.SIGTERM, exit_handler) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def parse_jsonl_error_messages(errors): | def parse_jsonl_error_messages(errors: list[str]) -> list[dict]: | ||||||
|     """parse json-l error strings from redis/db into json""" |     """parse json-l error strings from redis/db into json""" | ||||||
|     parsed_errors = [] |     parsed_errors = [] | ||||||
|     for error_line in errors: |     for error_line in errors: | ||||||
| @ -153,7 +153,9 @@ def validate_slug(slug: str) -> None: | |||||||
|         raise HTTPException(status_code=400, detail="invalid_slug") |         raise HTTPException(status_code=400, detail="invalid_slug") | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def stream_dict_list_as_csv(data: List[Dict[str, Union[str, int]]], filename: str): | def stream_dict_list_as_csv( | ||||||
|  |     data: List[Dict[str, Union[str, int]]], filename: str | ||||||
|  | ) -> StreamingResponse: | ||||||
|     """Stream list of dictionaries as CSV with attachment filename header""" |     """Stream list of dictionaries as CSV with attachment filename header""" | ||||||
|     if not data: |     if not data: | ||||||
|         raise HTTPException(status_code=404, detail="crawls_not_found") |         raise HTTPException(status_code=404, detail="crawls_not_found") | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user