Additional typing cleanup (#1938)
Misc typing fixes, including in profiles and time functions --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
		
							parent
							
								
									4db3053a9f
								
							
						
					
					
						commit
						335700e683
					
				| @ -406,6 +406,7 @@ class BackgroundJobOps: | |||||||
|         try: |         try: | ||||||
|             if job.object_type == "profile": |             if job.object_type == "profile": | ||||||
|                 profile = await self.profile_ops.get_profile(UUID(job.object_id), org) |                 profile = await self.profile_ops.get_profile(UUID(job.object_id), org) | ||||||
|  |                 assert profile.resource | ||||||
|                 return BaseFile(**profile.resource.dict()) |                 return BaseFile(**profile.resource.dict()) | ||||||
| 
 | 
 | ||||||
|             item_res = await self.base_crawl_ops.get_base_crawl(job.object_id, org) |             item_res = await self.base_crawl_ops.get_base_crawl(job.object_id, org) | ||||||
|  | |||||||
| @ -783,11 +783,13 @@ class BaseCrawlOps: | |||||||
|         for cid in cids: |         for cid in cids: | ||||||
|             if not cid: |             if not cid: | ||||||
|                 continue |                 continue | ||||||
|             config = await self.crawl_configs.get_crawl_config(cid, org.id) |             try: | ||||||
|             if not config: |                 config = await self.crawl_configs.get_crawl_config(cid, org.id) | ||||||
|                 continue |                 first_seed = config.config.seeds[0] | ||||||
|             first_seed = config.config.seeds[0] |                 first_seeds.add(first_seed.url) | ||||||
|             first_seeds.add(first_seed.url) |             # pylint: disable=bare-except | ||||||
|  |             except: | ||||||
|  |                 pass | ||||||
| 
 | 
 | ||||||
|         return { |         return { | ||||||
|             "names": names, |             "names": names, | ||||||
|  | |||||||
| @ -115,7 +115,7 @@ class CrawlConfigOps: | |||||||
|         self.crawler_images_map = {} |         self.crawler_images_map = {} | ||||||
|         channels = [] |         channels = [] | ||||||
|         with open(os.environ["CRAWLER_CHANNELS_JSON"], encoding="utf-8") as fh: |         with open(os.environ["CRAWLER_CHANNELS_JSON"], encoding="utf-8") as fh: | ||||||
|             crawler_list: list[dict] = json.loads(fh.read()) |             crawler_list = json.loads(fh.read()) | ||||||
|             for channel_data in crawler_list: |             for channel_data in crawler_list: | ||||||
|                 channel = CrawlerChannel(**channel_data) |                 channel = CrawlerChannel(**channel_data) | ||||||
|                 channels.append(channel) |                 channels.append(channel) | ||||||
| @ -297,8 +297,6 @@ class CrawlConfigOps: | |||||||
|         """Update name, scale, schedule, and/or tags for an existing crawl config""" |         """Update name, scale, schedule, and/or tags for an existing crawl config""" | ||||||
| 
 | 
 | ||||||
|         orig_crawl_config = await self.get_crawl_config(cid, org.id) |         orig_crawl_config = await self.get_crawl_config(cid, org.id) | ||||||
|         if not orig_crawl_config: |  | ||||||
|             raise HTTPException(status_code=400, detail="config_not_found") |  | ||||||
| 
 | 
 | ||||||
|         # indicates if any k8s crawl config settings changed |         # indicates if any k8s crawl config settings changed | ||||||
|         changed = False |         changed = False | ||||||
| @ -437,7 +435,7 @@ class CrawlConfigOps: | |||||||
|         schedule: Optional[bool] = None, |         schedule: Optional[bool] = None, | ||||||
|         sort_by: str = "lastRun", |         sort_by: str = "lastRun", | ||||||
|         sort_direction: int = -1, |         sort_direction: int = -1, | ||||||
|     ): |     ) -> tuple[list[CrawlConfigOut], int]: | ||||||
|         """Get all crawl configs for an organization is a member of""" |         """Get all crawl configs for an organization is a member of""" | ||||||
|         # pylint: disable=too-many-locals,too-many-branches |         # pylint: disable=too-many-locals,too-many-branches | ||||||
|         # Zero-index page for query |         # Zero-index page for query | ||||||
| @ -535,7 +533,7 @@ class CrawlConfigOps: | |||||||
| 
 | 
 | ||||||
|     async def get_crawl_config_info_for_profile( |     async def get_crawl_config_info_for_profile( | ||||||
|         self, profileid: UUID, org: Organization |         self, profileid: UUID, org: Organization | ||||||
|     ): |     ) -> list[CrawlConfigProfileOut]: | ||||||
|         """Return all crawl configs that are associated with a given profileid""" |         """Return all crawl configs that are associated with a given profileid""" | ||||||
|         query = {"profileid": profileid, "inactive": {"$ne": True}} |         query = {"profileid": profileid, "inactive": {"$ne": True}} | ||||||
|         if org: |         if org: | ||||||
| @ -633,10 +631,6 @@ class CrawlConfigOps: | |||||||
|         crawlconfig = await self.get_crawl_config( |         crawlconfig = await self.get_crawl_config( | ||||||
|             cid, org.id, active_only=False, config_cls=CrawlConfigOut |             cid, org.id, active_only=False, config_cls=CrawlConfigOut | ||||||
|         ) |         ) | ||||||
|         if not crawlconfig: |  | ||||||
|             raise HTTPException( |  | ||||||
|                 status_code=404, detail=f"Crawl Config '{cid}' not found" |  | ||||||
|             ) |  | ||||||
| 
 | 
 | ||||||
|         if not crawlconfig.inactive: |         if not crawlconfig.inactive: | ||||||
|             self._add_curr_crawl_stats( |             self._add_curr_crawl_stats( | ||||||
| @ -1136,11 +1130,6 @@ def init_crawl_config_api( | |||||||
|     async def make_inactive(cid: UUID, org: Organization = Depends(org_crawl_dep)): |     async def make_inactive(cid: UUID, org: Organization = Depends(org_crawl_dep)): | ||||||
|         crawlconfig = await ops.get_crawl_config(cid, org.id) |         crawlconfig = await ops.get_crawl_config(cid, org.id) | ||||||
| 
 | 
 | ||||||
|         if not crawlconfig: |  | ||||||
|             raise HTTPException( |  | ||||||
|                 status_code=404, detail=f"Crawl Config '{cid}' not found" |  | ||||||
|             ) |  | ||||||
| 
 |  | ||||||
|         return await ops.do_make_inactive(crawlconfig) |         return await ops.do_make_inactive(crawlconfig) | ||||||
| 
 | 
 | ||||||
|     org_ops.router.include_router(router) |     org_ops.router.include_router(router) | ||||||
|  | |||||||
| @ -33,6 +33,8 @@ class EmailSender: | |||||||
| 
 | 
 | ||||||
|     log_sent_emails: bool |     log_sent_emails: bool | ||||||
| 
 | 
 | ||||||
|  |     default_origin: str | ||||||
|  | 
 | ||||||
|     def __init__(self): |     def __init__(self): | ||||||
|         self.sender = os.environ.get("EMAIL_SENDER") or "Browsertrix admin" |         self.sender = os.environ.get("EMAIL_SENDER") or "Browsertrix admin" | ||||||
|         self.password = os.environ.get("EMAIL_PASSWORD") or "" |         self.password = os.environ.get("EMAIL_PASSWORD") or "" | ||||||
| @ -44,7 +46,7 @@ class EmailSender: | |||||||
| 
 | 
 | ||||||
|         self.log_sent_emails = is_bool(os.environ.get("LOG_SENT_EMAILS")) |         self.log_sent_emails = is_bool(os.environ.get("LOG_SENT_EMAILS")) | ||||||
| 
 | 
 | ||||||
|         self.default_origin = os.environ.get("APP_ORIGIN") |         self.default_origin = os.environ.get("APP_ORIGIN", "") | ||||||
| 
 | 
 | ||||||
|         self.templates = Jinja2Templates( |         self.templates = Jinja2Templates( | ||||||
|             directory=os.path.join(os.path.dirname(__file__), "email-templates") |             directory=os.path.join(os.path.dirname(__file__), "email-templates") | ||||||
| @ -99,7 +101,7 @@ class EmailSender: | |||||||
|             server.send_message(msg) |             server.send_message(msg) | ||||||
|             # server.sendmail(self.sender, receiver, message) |             # server.sendmail(self.sender, receiver, message) | ||||||
| 
 | 
 | ||||||
|     def get_origin(self, headers): |     def get_origin(self, headers) -> str: | ||||||
|         """Return origin of the received request""" |         """Return origin of the received request""" | ||||||
|         if not headers: |         if not headers: | ||||||
|             return self.default_origin |             return self.default_origin | ||||||
|  | |||||||
| @ -2,7 +2,7 @@ | |||||||
| 
 | 
 | ||||||
| import asyncio | import asyncio | ||||||
| import os | import os | ||||||
| from typing import TYPE_CHECKING | from typing import TYPE_CHECKING, Any | ||||||
| from kubernetes.utils import parse_quantity | from kubernetes.utils import parse_quantity | ||||||
| 
 | 
 | ||||||
| import yaml | import yaml | ||||||
| @ -44,7 +44,7 @@ class K8sOpAPI(K8sAPI): | |||||||
|         self.compute_crawler_resources() |         self.compute_crawler_resources() | ||||||
|         self.compute_profile_resources() |         self.compute_profile_resources() | ||||||
| 
 | 
 | ||||||
|     def compute_crawler_resources(self): |     def compute_crawler_resources(self) -> None: | ||||||
|         """compute memory / cpu resources for crawlers""" |         """compute memory / cpu resources for crawlers""" | ||||||
|         p = self.shared_params |         p = self.shared_params | ||||||
|         num_workers = max(int(p["crawler_browser_instances"]), 1) |         num_workers = max(int(p["crawler_browser_instances"]), 1) | ||||||
| @ -105,7 +105,7 @@ class K8sOpAPI(K8sAPI): | |||||||
|         p["qa_memory"] = qa_memory |         p["qa_memory"] = qa_memory | ||||||
|         p["qa_workers"] = qa_num_workers |         p["qa_workers"] = qa_num_workers | ||||||
| 
 | 
 | ||||||
|     def compute_profile_resources(self): |     def compute_profile_resources(self) -> None: | ||||||
|         """compute memory /cpu resources for a single profile browser""" |         """compute memory /cpu resources for a single profile browser""" | ||||||
|         p = self.shared_params |         p = self.shared_params | ||||||
|         # if no profile specific options provided, default to crawler base for one browser |         # if no profile specific options provided, default to crawler base for one browser | ||||||
| @ -122,7 +122,7 @@ class K8sOpAPI(K8sAPI): | |||||||
|         print(f"cpu = {profile_cpu}") |         print(f"cpu = {profile_cpu}") | ||||||
|         print(f"memory = {profile_memory}") |         print(f"memory = {profile_memory}") | ||||||
| 
 | 
 | ||||||
|     async def async_init(self): |     async def async_init(self) -> None: | ||||||
|         """perform any async init here""" |         """perform any async init here""" | ||||||
|         self.has_pod_metrics = await self.is_pod_metrics_available() |         self.has_pod_metrics = await self.is_pod_metrics_available() | ||||||
|         print("Pod Metrics Available:", self.has_pod_metrics) |         print("Pod Metrics Available:", self.has_pod_metrics) | ||||||
| @ -172,16 +172,16 @@ class BaseOperator: | |||||||
|         # see: https://stackoverflow.com/a/74059981 |         # see: https://stackoverflow.com/a/74059981 | ||||||
|         self.bg_tasks = set() |         self.bg_tasks = set() | ||||||
| 
 | 
 | ||||||
|     def init_routes(self, app): |     def init_routes(self, app) -> None: | ||||||
|         """init routes for this operator""" |         """init routes for this operator""" | ||||||
| 
 | 
 | ||||||
|     def run_task(self, func): |     def run_task(self, func) -> None: | ||||||
|         """add bg tasks to set to avoid premature garbage collection""" |         """add bg tasks to set to avoid premature garbage collection""" | ||||||
|         task = asyncio.create_task(func) |         task = asyncio.create_task(func) | ||||||
|         self.bg_tasks.add(task) |         self.bg_tasks.add(task) | ||||||
|         task.add_done_callback(self.bg_tasks.discard) |         task.add_done_callback(self.bg_tasks.discard) | ||||||
| 
 | 
 | ||||||
|     def load_from_yaml(self, filename, params): |     def load_from_yaml(self, filename, params) -> list[Any]: | ||||||
|         """load and parse k8s template from yaml file""" |         """load and parse k8s template from yaml file""" | ||||||
|         return list( |         return list( | ||||||
|             yaml.safe_load_all( |             yaml.safe_load_all( | ||||||
|  | |||||||
| @ -1449,6 +1449,9 @@ class CrawlOperator(BaseOperator): | |||||||
|         """Increment Crawl Stats""" |         """Increment Crawl Stats""" | ||||||
| 
 | 
 | ||||||
|         started = from_k8s_date(crawl.started) |         started = from_k8s_date(crawl.started) | ||||||
|  |         if not started: | ||||||
|  |             print("Missing crawl start time, unable to increment crawl stats") | ||||||
|  |             return | ||||||
| 
 | 
 | ||||||
|         duration = int((finished - started).total_seconds()) |         duration = int((finished - started).total_seconds()) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -102,6 +102,7 @@ class PageOps: | |||||||
|         if not status and page_dict.get("loadState"): |         if not status and page_dict.get("loadState"): | ||||||
|             status = 200 |             status = 200 | ||||||
| 
 | 
 | ||||||
|  |         ts = page_dict.get("ts") | ||||||
|         p = Page( |         p = Page( | ||||||
|             id=page_id, |             id=page_id, | ||||||
|             oid=oid, |             oid=oid, | ||||||
| @ -111,9 +112,7 @@ class PageOps: | |||||||
|             loadState=page_dict.get("loadState"), |             loadState=page_dict.get("loadState"), | ||||||
|             status=status, |             status=status, | ||||||
|             mime=page_dict.get("mime", "text/html"), |             mime=page_dict.get("mime", "text/html"), | ||||||
|             ts=( |             ts=(from_k8s_date(ts) if ts else dt_now()), | ||||||
|                 from_k8s_date(page_dict.get("ts")) if page_dict.get("ts") else dt_now() |  | ||||||
|             ), |  | ||||||
|         ) |         ) | ||||||
|         p.compute_page_type() |         p.compute_page_type() | ||||||
|         return p |         return p | ||||||
| @ -403,7 +402,7 @@ class PageOps: | |||||||
| 
 | 
 | ||||||
|         remaining_notes = [] |         remaining_notes = [] | ||||||
|         for note in page_notes: |         for note in page_notes: | ||||||
|             if not note.get("id") in delete.delete_list: |             if note.get("id") not in delete.delete_list: | ||||||
|                 remaining_notes.append(note) |                 remaining_notes.append(note) | ||||||
| 
 | 
 | ||||||
|         modified = dt_now() |         modified = dt_now() | ||||||
|  | |||||||
| @ -1,12 +1,13 @@ | |||||||
| """ Profile Management """ | """ Profile Management """ | ||||||
| 
 | 
 | ||||||
| from typing import Optional, TYPE_CHECKING, Any, cast, Dict, List | from typing import Optional, TYPE_CHECKING, Any, cast, Dict, List, Tuple | ||||||
| from uuid import UUID, uuid4 | from uuid import UUID, uuid4 | ||||||
| import os | import os | ||||||
| 
 | 
 | ||||||
| from urllib.parse import urlencode | from urllib.parse import urlencode | ||||||
| 
 | 
 | ||||||
| from fastapi import APIRouter, Depends, Request, HTTPException | from fastapi import APIRouter, Depends, Request, HTTPException | ||||||
|  | from starlette.requests import Headers | ||||||
| import aiohttp | import aiohttp | ||||||
| 
 | 
 | ||||||
| from .pagination import DEFAULT_PAGE_SIZE, paginated_format | from .pagination import DEFAULT_PAGE_SIZE, paginated_format | ||||||
| @ -30,6 +31,7 @@ from .models import ( | |||||||
|     SuccessResponseStorageQuota, |     SuccessResponseStorageQuota, | ||||||
|     ProfilePingResponse, |     ProfilePingResponse, | ||||||
|     ProfileBrowserGetUrlResponse, |     ProfileBrowserGetUrlResponse, | ||||||
|  |     CrawlConfigProfileOut, | ||||||
| ) | ) | ||||||
| from .utils import dt_now | from .utils import dt_now | ||||||
| 
 | 
 | ||||||
| @ -58,6 +60,9 @@ class ProfileOps: | |||||||
|     crawlconfigs: CrawlConfigOps |     crawlconfigs: CrawlConfigOps | ||||||
|     background_job_ops: BackgroundJobOps |     background_job_ops: BackgroundJobOps | ||||||
| 
 | 
 | ||||||
|  |     browser_fqdn_suffix: str | ||||||
|  |     router: APIRouter | ||||||
|  | 
 | ||||||
|     def __init__(self, mdb, orgs, crawl_manager, storage_ops, background_job_ops): |     def __init__(self, mdb, orgs, crawl_manager, storage_ops, background_job_ops): | ||||||
|         self.profiles = mdb["profiles"] |         self.profiles = mdb["profiles"] | ||||||
|         self.orgs = orgs |         self.orgs = orgs | ||||||
| @ -66,7 +71,7 @@ class ProfileOps: | |||||||
|         self.crawl_manager = crawl_manager |         self.crawl_manager = crawl_manager | ||||||
|         self.storage_ops = storage_ops |         self.storage_ops = storage_ops | ||||||
| 
 | 
 | ||||||
|         self.browser_fqdn_suffix = os.environ.get("CRAWLER_FQDN_SUFFIX") |         self.browser_fqdn_suffix = os.environ.get("CRAWLER_FQDN_SUFFIX", "") | ||||||
| 
 | 
 | ||||||
|         self.router = APIRouter( |         self.router = APIRouter( | ||||||
|             prefix="/profiles", |             prefix="/profiles", | ||||||
| @ -82,16 +87,16 @@ class ProfileOps: | |||||||
| 
 | 
 | ||||||
|     async def create_new_browser( |     async def create_new_browser( | ||||||
|         self, org: Organization, user: User, profile_launch: ProfileLaunchBrowserIn |         self, org: Organization, user: User, profile_launch: ProfileLaunchBrowserIn | ||||||
|     ): |     ) -> BrowserId: | ||||||
|         """Create new profile""" |         """Create new profile""" | ||||||
|         prev_profile = "" |         prev_profile_path = "" | ||||||
|         prev_profile_id = "" |         prev_profile_id = "" | ||||||
|         if profile_launch.profileId: |         if profile_launch.profileId: | ||||||
|             prev_profile = await self.get_profile_storage_path( |             prev_profile_path = await self.get_profile_storage_path( | ||||||
|                 profile_launch.profileId, org |                 profile_launch.profileId, org | ||||||
|             ) |             ) | ||||||
| 
 | 
 | ||||||
|             if not prev_profile: |             if not prev_profile_path: | ||||||
|                 raise HTTPException(status_code=400, detail="invalid_base_profile") |                 raise HTTPException(status_code=400, detail="invalid_base_profile") | ||||||
| 
 | 
 | ||||||
|             prev_profile_id = str(profile_launch.profileId) |             prev_profile_id = str(profile_launch.profileId) | ||||||
| @ -109,7 +114,7 @@ class ProfileOps: | |||||||
|             storage=org.storage, |             storage=org.storage, | ||||||
|             crawler_image=crawler_image, |             crawler_image=crawler_image, | ||||||
|             baseprofile=prev_profile_id, |             baseprofile=prev_profile_id, | ||||||
|             profile_filename=prev_profile, |             profile_filename=prev_profile_path, | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|         if not browserid: |         if not browserid: | ||||||
| @ -117,7 +122,9 @@ class ProfileOps: | |||||||
| 
 | 
 | ||||||
|         return BrowserId(browserid=browserid) |         return BrowserId(browserid=browserid) | ||||||
| 
 | 
 | ||||||
|     async def get_profile_browser_url(self, browserid, oid, headers): |     async def get_profile_browser_url( | ||||||
|  |         self, browserid: str, oid: str, headers: Headers | ||||||
|  |     ) -> dict[str, str | int]: | ||||||
|         """get profile browser url""" |         """get profile browser url""" | ||||||
|         json = await self._send_browser_req(browserid, "/vncpass") |         json = await self._send_browser_req(browserid, "/vncpass") | ||||||
| 
 | 
 | ||||||
| @ -130,7 +137,7 @@ class ProfileOps: | |||||||
|         host = headers.get("Host") or "localhost" |         host = headers.get("Host") or "localhost" | ||||||
|         # ws_scheme = "wss" if scheme == "https" else "ws" |         # ws_scheme = "wss" if scheme == "https" else "ws" | ||||||
| 
 | 
 | ||||||
|         auth_bearer = headers.get("Authorization").split(" ")[1] |         auth_bearer = headers.get("Authorization", "").split(" ")[1] | ||||||
| 
 | 
 | ||||||
|         params = { |         params = { | ||||||
|             "path": f"browser/{browserid}/ws?oid={oid}&auth_bearer={auth_bearer}", |             "path": f"browser/{browserid}/ws?oid={oid}&auth_bearer={auth_bearer}", | ||||||
| @ -144,7 +151,7 @@ class ProfileOps: | |||||||
|         params["url"] = url |         params["url"] = url | ||||||
|         return params |         return params | ||||||
| 
 | 
 | ||||||
|     async def ping_profile_browser(self, browserid): |     async def ping_profile_browser(self, browserid: str) -> dict[str, Any]: | ||||||
|         """ping profile browser to keep it running""" |         """ping profile browser to keep it running""" | ||||||
|         await self.crawl_manager.ping_profile_browser(browserid) |         await self.crawl_manager.ping_profile_browser(browserid) | ||||||
| 
 | 
 | ||||||
| @ -152,7 +159,9 @@ class ProfileOps: | |||||||
| 
 | 
 | ||||||
|         return {"success": True, "origins": json.get("origins") or []} |         return {"success": True, "origins": json.get("origins") or []} | ||||||
| 
 | 
 | ||||||
|     async def navigate_profile_browser(self, browserid, urlin: UrlIn): |     async def navigate_profile_browser( | ||||||
|  |         self, browserid: str, urlin: UrlIn | ||||||
|  |     ) -> dict[str, bool]: | ||||||
|         """ping profile browser to keep it running""" |         """ping profile browser to keep it running""" | ||||||
|         await self._send_browser_req(browserid, "/navigate", "POST", json=urlin.dict()) |         await self._send_browser_req(browserid, "/navigate", "POST", json=urlin.dict()) | ||||||
| 
 | 
 | ||||||
| @ -255,7 +264,7 @@ class ProfileOps: | |||||||
| 
 | 
 | ||||||
|     async def update_profile_metadata( |     async def update_profile_metadata( | ||||||
|         self, profileid: UUID, update: ProfileUpdate, user: User |         self, profileid: UUID, update: ProfileUpdate, user: User | ||||||
|     ): |     ) -> dict[str, bool]: | ||||||
|         """Update name and description metadata only on existing profile""" |         """Update name and description metadata only on existing profile""" | ||||||
|         query = { |         query = { | ||||||
|             "name": update.name, |             "name": update.name, | ||||||
| @ -282,7 +291,7 @@ class ProfileOps: | |||||||
|         page: int = 1, |         page: int = 1, | ||||||
|         sort_by: str = "modified", |         sort_by: str = "modified", | ||||||
|         sort_direction: int = -1, |         sort_direction: int = -1, | ||||||
|     ): |     ) -> Tuple[list[Profile], int]: | ||||||
|         """list all profiles""" |         """list all profiles""" | ||||||
|         # pylint: disable=too-many-locals,duplicate-code |         # pylint: disable=too-many-locals,duplicate-code | ||||||
| 
 | 
 | ||||||
| @ -334,7 +343,9 @@ class ProfileOps: | |||||||
|         profiles = [Profile.from_dict(res) for res in items] |         profiles = [Profile.from_dict(res) for res in items] | ||||||
|         return profiles, total |         return profiles, total | ||||||
| 
 | 
 | ||||||
|     async def get_profile(self, profileid: UUID, org: Optional[Organization] = None): |     async def get_profile( | ||||||
|  |         self, profileid: UUID, org: Optional[Organization] = None | ||||||
|  |     ) -> Profile: | ||||||
|         """get profile by id and org""" |         """get profile by id and org""" | ||||||
|         query: dict[str, object] = {"_id": profileid} |         query: dict[str, object] = {"_id": profileid} | ||||||
|         if org: |         if org: | ||||||
| @ -346,7 +357,9 @@ class ProfileOps: | |||||||
| 
 | 
 | ||||||
|         return Profile.from_dict(res) |         return Profile.from_dict(res) | ||||||
| 
 | 
 | ||||||
|     async def get_profile_with_configs(self, profileid: UUID, org: Organization): |     async def get_profile_with_configs( | ||||||
|  |         self, profileid: UUID, org: Organization | ||||||
|  |     ) -> ProfileWithCrawlConfigs: | ||||||
|         """get profile for api output, with crawlconfigs""" |         """get profile for api output, with crawlconfigs""" | ||||||
| 
 | 
 | ||||||
|         profile = await self.get_profile(profileid, org) |         profile = await self.get_profile(profileid, org) | ||||||
| @ -357,27 +370,33 @@ class ProfileOps: | |||||||
| 
 | 
 | ||||||
|     async def get_profile_storage_path( |     async def get_profile_storage_path( | ||||||
|         self, profileid: UUID, org: Optional[Organization] = None |         self, profileid: UUID, org: Optional[Organization] = None | ||||||
|     ): |     ) -> str: | ||||||
|         """return profile path filename (relative path) for given profile id and org""" |         """return profile path filename (relative path) for given profile id and org""" | ||||||
|         try: |         try: | ||||||
|             profile = await self.get_profile(profileid, org) |             profile = await self.get_profile(profileid, org) | ||||||
|             return profile.resource.filename |             return profile.resource.filename if profile.resource else "" | ||||||
|         # pylint: disable=bare-except |         # pylint: disable=bare-except | ||||||
|         except: |         except: | ||||||
|             return None |             pass | ||||||
|  | 
 | ||||||
|  |         return "" | ||||||
| 
 | 
 | ||||||
|     async def get_profile_name( |     async def get_profile_name( | ||||||
|         self, profileid: UUID, org: Optional[Organization] = None |         self, profileid: UUID, org: Optional[Organization] = None | ||||||
|     ): |     ) -> str: | ||||||
|         """return profile for given profile id and org""" |         """return profile for given profile id and org""" | ||||||
|         try: |         try: | ||||||
|             profile = await self.get_profile(profileid, org) |             profile = await self.get_profile(profileid, org) | ||||||
|             return profile.name |             return profile.name | ||||||
|         # pylint: disable=bare-except |         # pylint: disable=bare-except | ||||||
|         except: |         except: | ||||||
|             return None |             pass | ||||||
| 
 | 
 | ||||||
|     async def get_crawl_configs_for_profile(self, profileid: UUID, org: Organization): |         return "" | ||||||
|  | 
 | ||||||
|  |     async def get_crawl_configs_for_profile( | ||||||
|  |         self, profileid: UUID, org: Organization | ||||||
|  |     ) -> list[CrawlConfigProfileOut]: | ||||||
|         """Get list of crawl configs with basic info for that use a particular profile""" |         """Get list of crawl configs with basic info for that use a particular profile""" | ||||||
| 
 | 
 | ||||||
|         crawlconfig_info = await self.crawlconfigs.get_crawl_config_info_for_profile( |         crawlconfig_info = await self.crawlconfigs.get_crawl_config_info_for_profile( | ||||||
| @ -386,7 +405,9 @@ class ProfileOps: | |||||||
| 
 | 
 | ||||||
|         return crawlconfig_info |         return crawlconfig_info | ||||||
| 
 | 
 | ||||||
|     async def delete_profile(self, profileid: UUID, org: Organization): |     async def delete_profile( | ||||||
|  |         self, profileid: UUID, org: Organization | ||||||
|  |     ) -> dict[str, Any]: | ||||||
|         """delete profile, if not used in active crawlconfig""" |         """delete profile, if not used in active crawlconfig""" | ||||||
|         profile = await self.get_profile_with_configs(profileid, org) |         profile = await self.get_profile_with_configs(profileid, org) | ||||||
| 
 | 
 | ||||||
| @ -403,27 +424,32 @@ class ProfileOps: | |||||||
|             await self.orgs.inc_org_bytes_stored( |             await self.orgs.inc_org_bytes_stored( | ||||||
|                 org.id, -profile.resource.size, "profile" |                 org.id, -profile.resource.size, "profile" | ||||||
|             ) |             ) | ||||||
|  |             await self.background_job_ops.create_delete_replica_jobs( | ||||||
|  |                 org, profile.resource, str(profile.id), "profile" | ||||||
|  |             ) | ||||||
| 
 | 
 | ||||||
|         res = await self.profiles.delete_one(query) |         res = await self.profiles.delete_one(query) | ||||||
|         if not res or res.deleted_count != 1: |         if not res or res.deleted_count != 1: | ||||||
|             raise HTTPException(status_code=404, detail="profile_not_found") |             raise HTTPException(status_code=404, detail="profile_not_found") | ||||||
| 
 | 
 | ||||||
|         await self.background_job_ops.create_delete_replica_jobs( |  | ||||||
|             org, profile.resource, profile.id, "profile" |  | ||||||
|         ) |  | ||||||
| 
 |  | ||||||
|         quota_reached = await self.orgs.storage_quota_reached(org.id) |         quota_reached = await self.orgs.storage_quota_reached(org.id) | ||||||
| 
 | 
 | ||||||
|         return {"success": True, "storageQuotaReached": quota_reached} |         return {"success": True, "storageQuotaReached": quota_reached} | ||||||
| 
 | 
 | ||||||
|     async def delete_profile_browser(self, browserid): |     async def delete_profile_browser(self, browserid: str) -> dict[str, bool]: | ||||||
|         """delete profile browser immediately""" |         """delete profile browser immediately""" | ||||||
|         if not await self.crawl_manager.delete_profile_browser(browserid): |         if not await self.crawl_manager.delete_profile_browser(browserid): | ||||||
|             raise HTTPException(status_code=404, detail="browser_not_found") |             raise HTTPException(status_code=404, detail="browser_not_found") | ||||||
| 
 | 
 | ||||||
|         return {"success": True} |         return {"success": True} | ||||||
| 
 | 
 | ||||||
|     async def _send_browser_req(self, browserid, path, method="GET", json=None): |     async def _send_browser_req( | ||||||
|  |         self, | ||||||
|  |         browserid: str, | ||||||
|  |         path: str, | ||||||
|  |         method: str = "GET", | ||||||
|  |         json: Optional[dict[str, Any]] = None, | ||||||
|  |     ) -> dict[str, Any]: | ||||||
|         """make request to browser api to get state""" |         """make request to browser api to get state""" | ||||||
|         try: |         try: | ||||||
|             async with aiohttp.ClientSession() as session: |             async with aiohttp.ClientSession() as session: | ||||||
| @ -438,7 +464,7 @@ class ProfileOps: | |||||||
|             # pylint: disable=raise-missing-from |             # pylint: disable=raise-missing-from | ||||||
|             raise HTTPException(status_code=200, detail="waiting_for_browser") |             raise HTTPException(status_code=200, detail="waiting_for_browser") | ||||||
| 
 | 
 | ||||||
|         return json |         return json or {} | ||||||
| 
 | 
 | ||||||
|     async def add_profile_file_replica( |     async def add_profile_file_replica( | ||||||
|         self, profileid: UUID, filename: str, ref: StorageRef |         self, profileid: UUID, filename: str, ref: StorageRef | ||||||
| @ -453,7 +479,12 @@ class ProfileOps: | |||||||
| # ============================================================================ | # ============================================================================ | ||||||
| # pylint: disable=redefined-builtin,invalid-name,too-many-locals,too-many-arguments | # pylint: disable=redefined-builtin,invalid-name,too-many-locals,too-many-arguments | ||||||
| def init_profiles_api( | def init_profiles_api( | ||||||
|     mdb, org_ops, crawl_manager, storage_ops, background_job_ops, user_dep |     mdb, | ||||||
|  |     org_ops: OrgOps, | ||||||
|  |     crawl_manager: CrawlManager, | ||||||
|  |     storage_ops: StorageOps, | ||||||
|  |     background_job_ops: BackgroundJobOps, | ||||||
|  |     user_dep, | ||||||
| ): | ): | ||||||
|     """init profile ops system""" |     """init profile ops system""" | ||||||
|     ops = ProfileOps(mdb, org_ops, crawl_manager, storage_ops, background_job_ops) |     ops = ProfileOps(mdb, org_ops, crawl_manager, storage_ops, background_job_ops) | ||||||
| @ -584,6 +615,7 @@ def init_profiles_api( | |||||||
|     async def delete_profile_browser(browserid: str = Depends(browser_dep)): |     async def delete_profile_browser(browserid: str = Depends(browser_dep)): | ||||||
|         return await ops.delete_profile_browser(browserid) |         return await ops.delete_profile_browser(browserid) | ||||||
| 
 | 
 | ||||||
|     org_ops.router.include_router(router) |     if org_ops.router: | ||||||
|  |         org_ops.router.include_router(router) | ||||||
| 
 | 
 | ||||||
|     return ops |     return ops | ||||||
|  | |||||||
| @ -37,6 +37,7 @@ from mypy_boto3_s3.type_defs import CompletedPartTypeDef | |||||||
| from types_aiobotocore_s3 import S3Client as AIOS3Client | from types_aiobotocore_s3 import S3Client as AIOS3Client | ||||||
| 
 | 
 | ||||||
| from .models import ( | from .models import ( | ||||||
|  |     BaseFile, | ||||||
|     CrawlFile, |     CrawlFile, | ||||||
|     CrawlFileOut, |     CrawlFileOut, | ||||||
|     Organization, |     Organization, | ||||||
| @ -504,7 +505,7 @@ class StorageOps: | |||||||
|         return presigned_url |         return presigned_url | ||||||
| 
 | 
 | ||||||
|     async def delete_crawl_file_object( |     async def delete_crawl_file_object( | ||||||
|         self, org: Organization, crawlfile: CrawlFile |         self, org: Organization, crawlfile: BaseFile | ||||||
|     ) -> bool: |     ) -> bool: | ||||||
|         """delete crawl file from storage.""" |         """delete crawl file from storage.""" | ||||||
|         return await self._delete_file(org, crawlfile.filename, crawlfile.storage) |         return await self._delete_file(org, crawlfile.filename, crawlfile.storage) | ||||||
|  | |||||||
| @ -40,12 +40,12 @@ def get_templates_dir(): | |||||||
|     return os.path.join(os.path.dirname(__file__), "templates") |     return os.path.join(os.path.dirname(__file__), "templates") | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def from_k8s_date(string): | def from_k8s_date(string: str) -> Optional[datetime]: | ||||||
|     """convert k8s date string to datetime""" |     """convert k8s date string to datetime""" | ||||||
|     return datetime.fromisoformat(string[:-1]) if string else None |     return datetime.fromisoformat(string[:-1]) if string else None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def to_k8s_date(dt_val): | def to_k8s_date(dt_val: datetime) -> str: | ||||||
|     """convert datetime to string for k8s""" |     """convert datetime to string for k8s""" | ||||||
|     return dt_val.isoformat("T") + "Z" |     return dt_val.isoformat("T") + "Z" | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user