Additional typing cleanup (#1938)
Misc typing fixes, including in profiles and time functions --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
parent
4db3053a9f
commit
335700e683
@ -406,6 +406,7 @@ class BackgroundJobOps:
|
|||||||
try:
|
try:
|
||||||
if job.object_type == "profile":
|
if job.object_type == "profile":
|
||||||
profile = await self.profile_ops.get_profile(UUID(job.object_id), org)
|
profile = await self.profile_ops.get_profile(UUID(job.object_id), org)
|
||||||
|
assert profile.resource
|
||||||
return BaseFile(**profile.resource.dict())
|
return BaseFile(**profile.resource.dict())
|
||||||
|
|
||||||
item_res = await self.base_crawl_ops.get_base_crawl(job.object_id, org)
|
item_res = await self.base_crawl_ops.get_base_crawl(job.object_id, org)
|
||||||
|
|||||||
@ -783,11 +783,13 @@ class BaseCrawlOps:
|
|||||||
for cid in cids:
|
for cid in cids:
|
||||||
if not cid:
|
if not cid:
|
||||||
continue
|
continue
|
||||||
|
try:
|
||||||
config = await self.crawl_configs.get_crawl_config(cid, org.id)
|
config = await self.crawl_configs.get_crawl_config(cid, org.id)
|
||||||
if not config:
|
|
||||||
continue
|
|
||||||
first_seed = config.config.seeds[0]
|
first_seed = config.config.seeds[0]
|
||||||
first_seeds.add(first_seed.url)
|
first_seeds.add(first_seed.url)
|
||||||
|
# pylint: disable=bare-except
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"names": names,
|
"names": names,
|
||||||
|
|||||||
@ -115,7 +115,7 @@ class CrawlConfigOps:
|
|||||||
self.crawler_images_map = {}
|
self.crawler_images_map = {}
|
||||||
channels = []
|
channels = []
|
||||||
with open(os.environ["CRAWLER_CHANNELS_JSON"], encoding="utf-8") as fh:
|
with open(os.environ["CRAWLER_CHANNELS_JSON"], encoding="utf-8") as fh:
|
||||||
crawler_list: list[dict] = json.loads(fh.read())
|
crawler_list = json.loads(fh.read())
|
||||||
for channel_data in crawler_list:
|
for channel_data in crawler_list:
|
||||||
channel = CrawlerChannel(**channel_data)
|
channel = CrawlerChannel(**channel_data)
|
||||||
channels.append(channel)
|
channels.append(channel)
|
||||||
@ -297,8 +297,6 @@ class CrawlConfigOps:
|
|||||||
"""Update name, scale, schedule, and/or tags for an existing crawl config"""
|
"""Update name, scale, schedule, and/or tags for an existing crawl config"""
|
||||||
|
|
||||||
orig_crawl_config = await self.get_crawl_config(cid, org.id)
|
orig_crawl_config = await self.get_crawl_config(cid, org.id)
|
||||||
if not orig_crawl_config:
|
|
||||||
raise HTTPException(status_code=400, detail="config_not_found")
|
|
||||||
|
|
||||||
# indicates if any k8s crawl config settings changed
|
# indicates if any k8s crawl config settings changed
|
||||||
changed = False
|
changed = False
|
||||||
@ -437,7 +435,7 @@ class CrawlConfigOps:
|
|||||||
schedule: Optional[bool] = None,
|
schedule: Optional[bool] = None,
|
||||||
sort_by: str = "lastRun",
|
sort_by: str = "lastRun",
|
||||||
sort_direction: int = -1,
|
sort_direction: int = -1,
|
||||||
):
|
) -> tuple[list[CrawlConfigOut], int]:
|
||||||
"""Get all crawl configs for an organization is a member of"""
|
"""Get all crawl configs for an organization is a member of"""
|
||||||
# pylint: disable=too-many-locals,too-many-branches
|
# pylint: disable=too-many-locals,too-many-branches
|
||||||
# Zero-index page for query
|
# Zero-index page for query
|
||||||
@ -535,7 +533,7 @@ class CrawlConfigOps:
|
|||||||
|
|
||||||
async def get_crawl_config_info_for_profile(
|
async def get_crawl_config_info_for_profile(
|
||||||
self, profileid: UUID, org: Organization
|
self, profileid: UUID, org: Organization
|
||||||
):
|
) -> list[CrawlConfigProfileOut]:
|
||||||
"""Return all crawl configs that are associated with a given profileid"""
|
"""Return all crawl configs that are associated with a given profileid"""
|
||||||
query = {"profileid": profileid, "inactive": {"$ne": True}}
|
query = {"profileid": profileid, "inactive": {"$ne": True}}
|
||||||
if org:
|
if org:
|
||||||
@ -633,10 +631,6 @@ class CrawlConfigOps:
|
|||||||
crawlconfig = await self.get_crawl_config(
|
crawlconfig = await self.get_crawl_config(
|
||||||
cid, org.id, active_only=False, config_cls=CrawlConfigOut
|
cid, org.id, active_only=False, config_cls=CrawlConfigOut
|
||||||
)
|
)
|
||||||
if not crawlconfig:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404, detail=f"Crawl Config '{cid}' not found"
|
|
||||||
)
|
|
||||||
|
|
||||||
if not crawlconfig.inactive:
|
if not crawlconfig.inactive:
|
||||||
self._add_curr_crawl_stats(
|
self._add_curr_crawl_stats(
|
||||||
@ -1136,11 +1130,6 @@ def init_crawl_config_api(
|
|||||||
async def make_inactive(cid: UUID, org: Organization = Depends(org_crawl_dep)):
|
async def make_inactive(cid: UUID, org: Organization = Depends(org_crawl_dep)):
|
||||||
crawlconfig = await ops.get_crawl_config(cid, org.id)
|
crawlconfig = await ops.get_crawl_config(cid, org.id)
|
||||||
|
|
||||||
if not crawlconfig:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404, detail=f"Crawl Config '{cid}' not found"
|
|
||||||
)
|
|
||||||
|
|
||||||
return await ops.do_make_inactive(crawlconfig)
|
return await ops.do_make_inactive(crawlconfig)
|
||||||
|
|
||||||
org_ops.router.include_router(router)
|
org_ops.router.include_router(router)
|
||||||
|
|||||||
@ -33,6 +33,8 @@ class EmailSender:
|
|||||||
|
|
||||||
log_sent_emails: bool
|
log_sent_emails: bool
|
||||||
|
|
||||||
|
default_origin: str
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.sender = os.environ.get("EMAIL_SENDER") or "Browsertrix admin"
|
self.sender = os.environ.get("EMAIL_SENDER") or "Browsertrix admin"
|
||||||
self.password = os.environ.get("EMAIL_PASSWORD") or ""
|
self.password = os.environ.get("EMAIL_PASSWORD") or ""
|
||||||
@ -44,7 +46,7 @@ class EmailSender:
|
|||||||
|
|
||||||
self.log_sent_emails = is_bool(os.environ.get("LOG_SENT_EMAILS"))
|
self.log_sent_emails = is_bool(os.environ.get("LOG_SENT_EMAILS"))
|
||||||
|
|
||||||
self.default_origin = os.environ.get("APP_ORIGIN")
|
self.default_origin = os.environ.get("APP_ORIGIN", "")
|
||||||
|
|
||||||
self.templates = Jinja2Templates(
|
self.templates = Jinja2Templates(
|
||||||
directory=os.path.join(os.path.dirname(__file__), "email-templates")
|
directory=os.path.join(os.path.dirname(__file__), "email-templates")
|
||||||
@ -99,7 +101,7 @@ class EmailSender:
|
|||||||
server.send_message(msg)
|
server.send_message(msg)
|
||||||
# server.sendmail(self.sender, receiver, message)
|
# server.sendmail(self.sender, receiver, message)
|
||||||
|
|
||||||
def get_origin(self, headers):
|
def get_origin(self, headers) -> str:
|
||||||
"""Return origin of the received request"""
|
"""Return origin of the received request"""
|
||||||
if not headers:
|
if not headers:
|
||||||
return self.default_origin
|
return self.default_origin
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING, Any
|
||||||
from kubernetes.utils import parse_quantity
|
from kubernetes.utils import parse_quantity
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
@ -44,7 +44,7 @@ class K8sOpAPI(K8sAPI):
|
|||||||
self.compute_crawler_resources()
|
self.compute_crawler_resources()
|
||||||
self.compute_profile_resources()
|
self.compute_profile_resources()
|
||||||
|
|
||||||
def compute_crawler_resources(self):
|
def compute_crawler_resources(self) -> None:
|
||||||
"""compute memory / cpu resources for crawlers"""
|
"""compute memory / cpu resources for crawlers"""
|
||||||
p = self.shared_params
|
p = self.shared_params
|
||||||
num_workers = max(int(p["crawler_browser_instances"]), 1)
|
num_workers = max(int(p["crawler_browser_instances"]), 1)
|
||||||
@ -105,7 +105,7 @@ class K8sOpAPI(K8sAPI):
|
|||||||
p["qa_memory"] = qa_memory
|
p["qa_memory"] = qa_memory
|
||||||
p["qa_workers"] = qa_num_workers
|
p["qa_workers"] = qa_num_workers
|
||||||
|
|
||||||
def compute_profile_resources(self):
|
def compute_profile_resources(self) -> None:
|
||||||
"""compute memory /cpu resources for a single profile browser"""
|
"""compute memory /cpu resources for a single profile browser"""
|
||||||
p = self.shared_params
|
p = self.shared_params
|
||||||
# if no profile specific options provided, default to crawler base for one browser
|
# if no profile specific options provided, default to crawler base for one browser
|
||||||
@ -122,7 +122,7 @@ class K8sOpAPI(K8sAPI):
|
|||||||
print(f"cpu = {profile_cpu}")
|
print(f"cpu = {profile_cpu}")
|
||||||
print(f"memory = {profile_memory}")
|
print(f"memory = {profile_memory}")
|
||||||
|
|
||||||
async def async_init(self):
|
async def async_init(self) -> None:
|
||||||
"""perform any async init here"""
|
"""perform any async init here"""
|
||||||
self.has_pod_metrics = await self.is_pod_metrics_available()
|
self.has_pod_metrics = await self.is_pod_metrics_available()
|
||||||
print("Pod Metrics Available:", self.has_pod_metrics)
|
print("Pod Metrics Available:", self.has_pod_metrics)
|
||||||
@ -172,16 +172,16 @@ class BaseOperator:
|
|||||||
# see: https://stackoverflow.com/a/74059981
|
# see: https://stackoverflow.com/a/74059981
|
||||||
self.bg_tasks = set()
|
self.bg_tasks = set()
|
||||||
|
|
||||||
def init_routes(self, app):
|
def init_routes(self, app) -> None:
|
||||||
"""init routes for this operator"""
|
"""init routes for this operator"""
|
||||||
|
|
||||||
def run_task(self, func):
|
def run_task(self, func) -> None:
|
||||||
"""add bg tasks to set to avoid premature garbage collection"""
|
"""add bg tasks to set to avoid premature garbage collection"""
|
||||||
task = asyncio.create_task(func)
|
task = asyncio.create_task(func)
|
||||||
self.bg_tasks.add(task)
|
self.bg_tasks.add(task)
|
||||||
task.add_done_callback(self.bg_tasks.discard)
|
task.add_done_callback(self.bg_tasks.discard)
|
||||||
|
|
||||||
def load_from_yaml(self, filename, params):
|
def load_from_yaml(self, filename, params) -> list[Any]:
|
||||||
"""load and parse k8s template from yaml file"""
|
"""load and parse k8s template from yaml file"""
|
||||||
return list(
|
return list(
|
||||||
yaml.safe_load_all(
|
yaml.safe_load_all(
|
||||||
|
|||||||
@ -1449,6 +1449,9 @@ class CrawlOperator(BaseOperator):
|
|||||||
"""Increment Crawl Stats"""
|
"""Increment Crawl Stats"""
|
||||||
|
|
||||||
started = from_k8s_date(crawl.started)
|
started = from_k8s_date(crawl.started)
|
||||||
|
if not started:
|
||||||
|
print("Missing crawl start time, unable to increment crawl stats")
|
||||||
|
return
|
||||||
|
|
||||||
duration = int((finished - started).total_seconds())
|
duration = int((finished - started).total_seconds())
|
||||||
|
|
||||||
|
|||||||
@ -102,6 +102,7 @@ class PageOps:
|
|||||||
if not status and page_dict.get("loadState"):
|
if not status and page_dict.get("loadState"):
|
||||||
status = 200
|
status = 200
|
||||||
|
|
||||||
|
ts = page_dict.get("ts")
|
||||||
p = Page(
|
p = Page(
|
||||||
id=page_id,
|
id=page_id,
|
||||||
oid=oid,
|
oid=oid,
|
||||||
@ -111,9 +112,7 @@ class PageOps:
|
|||||||
loadState=page_dict.get("loadState"),
|
loadState=page_dict.get("loadState"),
|
||||||
status=status,
|
status=status,
|
||||||
mime=page_dict.get("mime", "text/html"),
|
mime=page_dict.get("mime", "text/html"),
|
||||||
ts=(
|
ts=(from_k8s_date(ts) if ts else dt_now()),
|
||||||
from_k8s_date(page_dict.get("ts")) if page_dict.get("ts") else dt_now()
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
p.compute_page_type()
|
p.compute_page_type()
|
||||||
return p
|
return p
|
||||||
@ -403,7 +402,7 @@ class PageOps:
|
|||||||
|
|
||||||
remaining_notes = []
|
remaining_notes = []
|
||||||
for note in page_notes:
|
for note in page_notes:
|
||||||
if not note.get("id") in delete.delete_list:
|
if note.get("id") not in delete.delete_list:
|
||||||
remaining_notes.append(note)
|
remaining_notes.append(note)
|
||||||
|
|
||||||
modified = dt_now()
|
modified = dt_now()
|
||||||
|
|||||||
@ -1,12 +1,13 @@
|
|||||||
""" Profile Management """
|
""" Profile Management """
|
||||||
|
|
||||||
from typing import Optional, TYPE_CHECKING, Any, cast, Dict, List
|
from typing import Optional, TYPE_CHECKING, Any, cast, Dict, List, Tuple
|
||||||
from uuid import UUID, uuid4
|
from uuid import UUID, uuid4
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, Request, HTTPException
|
from fastapi import APIRouter, Depends, Request, HTTPException
|
||||||
|
from starlette.requests import Headers
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
|
||||||
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
|
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
|
||||||
@ -30,6 +31,7 @@ from .models import (
|
|||||||
SuccessResponseStorageQuota,
|
SuccessResponseStorageQuota,
|
||||||
ProfilePingResponse,
|
ProfilePingResponse,
|
||||||
ProfileBrowserGetUrlResponse,
|
ProfileBrowserGetUrlResponse,
|
||||||
|
CrawlConfigProfileOut,
|
||||||
)
|
)
|
||||||
from .utils import dt_now
|
from .utils import dt_now
|
||||||
|
|
||||||
@ -58,6 +60,9 @@ class ProfileOps:
|
|||||||
crawlconfigs: CrawlConfigOps
|
crawlconfigs: CrawlConfigOps
|
||||||
background_job_ops: BackgroundJobOps
|
background_job_ops: BackgroundJobOps
|
||||||
|
|
||||||
|
browser_fqdn_suffix: str
|
||||||
|
router: APIRouter
|
||||||
|
|
||||||
def __init__(self, mdb, orgs, crawl_manager, storage_ops, background_job_ops):
|
def __init__(self, mdb, orgs, crawl_manager, storage_ops, background_job_ops):
|
||||||
self.profiles = mdb["profiles"]
|
self.profiles = mdb["profiles"]
|
||||||
self.orgs = orgs
|
self.orgs = orgs
|
||||||
@ -66,7 +71,7 @@ class ProfileOps:
|
|||||||
self.crawl_manager = crawl_manager
|
self.crawl_manager = crawl_manager
|
||||||
self.storage_ops = storage_ops
|
self.storage_ops = storage_ops
|
||||||
|
|
||||||
self.browser_fqdn_suffix = os.environ.get("CRAWLER_FQDN_SUFFIX")
|
self.browser_fqdn_suffix = os.environ.get("CRAWLER_FQDN_SUFFIX", "")
|
||||||
|
|
||||||
self.router = APIRouter(
|
self.router = APIRouter(
|
||||||
prefix="/profiles",
|
prefix="/profiles",
|
||||||
@ -82,16 +87,16 @@ class ProfileOps:
|
|||||||
|
|
||||||
async def create_new_browser(
|
async def create_new_browser(
|
||||||
self, org: Organization, user: User, profile_launch: ProfileLaunchBrowserIn
|
self, org: Organization, user: User, profile_launch: ProfileLaunchBrowserIn
|
||||||
):
|
) -> BrowserId:
|
||||||
"""Create new profile"""
|
"""Create new profile"""
|
||||||
prev_profile = ""
|
prev_profile_path = ""
|
||||||
prev_profile_id = ""
|
prev_profile_id = ""
|
||||||
if profile_launch.profileId:
|
if profile_launch.profileId:
|
||||||
prev_profile = await self.get_profile_storage_path(
|
prev_profile_path = await self.get_profile_storage_path(
|
||||||
profile_launch.profileId, org
|
profile_launch.profileId, org
|
||||||
)
|
)
|
||||||
|
|
||||||
if not prev_profile:
|
if not prev_profile_path:
|
||||||
raise HTTPException(status_code=400, detail="invalid_base_profile")
|
raise HTTPException(status_code=400, detail="invalid_base_profile")
|
||||||
|
|
||||||
prev_profile_id = str(profile_launch.profileId)
|
prev_profile_id = str(profile_launch.profileId)
|
||||||
@ -109,7 +114,7 @@ class ProfileOps:
|
|||||||
storage=org.storage,
|
storage=org.storage,
|
||||||
crawler_image=crawler_image,
|
crawler_image=crawler_image,
|
||||||
baseprofile=prev_profile_id,
|
baseprofile=prev_profile_id,
|
||||||
profile_filename=prev_profile,
|
profile_filename=prev_profile_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not browserid:
|
if not browserid:
|
||||||
@ -117,7 +122,9 @@ class ProfileOps:
|
|||||||
|
|
||||||
return BrowserId(browserid=browserid)
|
return BrowserId(browserid=browserid)
|
||||||
|
|
||||||
async def get_profile_browser_url(self, browserid, oid, headers):
|
async def get_profile_browser_url(
|
||||||
|
self, browserid: str, oid: str, headers: Headers
|
||||||
|
) -> dict[str, str | int]:
|
||||||
"""get profile browser url"""
|
"""get profile browser url"""
|
||||||
json = await self._send_browser_req(browserid, "/vncpass")
|
json = await self._send_browser_req(browserid, "/vncpass")
|
||||||
|
|
||||||
@ -130,7 +137,7 @@ class ProfileOps:
|
|||||||
host = headers.get("Host") or "localhost"
|
host = headers.get("Host") or "localhost"
|
||||||
# ws_scheme = "wss" if scheme == "https" else "ws"
|
# ws_scheme = "wss" if scheme == "https" else "ws"
|
||||||
|
|
||||||
auth_bearer = headers.get("Authorization").split(" ")[1]
|
auth_bearer = headers.get("Authorization", "").split(" ")[1]
|
||||||
|
|
||||||
params = {
|
params = {
|
||||||
"path": f"browser/{browserid}/ws?oid={oid}&auth_bearer={auth_bearer}",
|
"path": f"browser/{browserid}/ws?oid={oid}&auth_bearer={auth_bearer}",
|
||||||
@ -144,7 +151,7 @@ class ProfileOps:
|
|||||||
params["url"] = url
|
params["url"] = url
|
||||||
return params
|
return params
|
||||||
|
|
||||||
async def ping_profile_browser(self, browserid):
|
async def ping_profile_browser(self, browserid: str) -> dict[str, Any]:
|
||||||
"""ping profile browser to keep it running"""
|
"""ping profile browser to keep it running"""
|
||||||
await self.crawl_manager.ping_profile_browser(browserid)
|
await self.crawl_manager.ping_profile_browser(browserid)
|
||||||
|
|
||||||
@ -152,7 +159,9 @@ class ProfileOps:
|
|||||||
|
|
||||||
return {"success": True, "origins": json.get("origins") or []}
|
return {"success": True, "origins": json.get("origins") or []}
|
||||||
|
|
||||||
async def navigate_profile_browser(self, browserid, urlin: UrlIn):
|
async def navigate_profile_browser(
|
||||||
|
self, browserid: str, urlin: UrlIn
|
||||||
|
) -> dict[str, bool]:
|
||||||
"""ping profile browser to keep it running"""
|
"""ping profile browser to keep it running"""
|
||||||
await self._send_browser_req(browserid, "/navigate", "POST", json=urlin.dict())
|
await self._send_browser_req(browserid, "/navigate", "POST", json=urlin.dict())
|
||||||
|
|
||||||
@ -255,7 +264,7 @@ class ProfileOps:
|
|||||||
|
|
||||||
async def update_profile_metadata(
|
async def update_profile_metadata(
|
||||||
self, profileid: UUID, update: ProfileUpdate, user: User
|
self, profileid: UUID, update: ProfileUpdate, user: User
|
||||||
):
|
) -> dict[str, bool]:
|
||||||
"""Update name and description metadata only on existing profile"""
|
"""Update name and description metadata only on existing profile"""
|
||||||
query = {
|
query = {
|
||||||
"name": update.name,
|
"name": update.name,
|
||||||
@ -282,7 +291,7 @@ class ProfileOps:
|
|||||||
page: int = 1,
|
page: int = 1,
|
||||||
sort_by: str = "modified",
|
sort_by: str = "modified",
|
||||||
sort_direction: int = -1,
|
sort_direction: int = -1,
|
||||||
):
|
) -> Tuple[list[Profile], int]:
|
||||||
"""list all profiles"""
|
"""list all profiles"""
|
||||||
# pylint: disable=too-many-locals,duplicate-code
|
# pylint: disable=too-many-locals,duplicate-code
|
||||||
|
|
||||||
@ -334,7 +343,9 @@ class ProfileOps:
|
|||||||
profiles = [Profile.from_dict(res) for res in items]
|
profiles = [Profile.from_dict(res) for res in items]
|
||||||
return profiles, total
|
return profiles, total
|
||||||
|
|
||||||
async def get_profile(self, profileid: UUID, org: Optional[Organization] = None):
|
async def get_profile(
|
||||||
|
self, profileid: UUID, org: Optional[Organization] = None
|
||||||
|
) -> Profile:
|
||||||
"""get profile by id and org"""
|
"""get profile by id and org"""
|
||||||
query: dict[str, object] = {"_id": profileid}
|
query: dict[str, object] = {"_id": profileid}
|
||||||
if org:
|
if org:
|
||||||
@ -346,7 +357,9 @@ class ProfileOps:
|
|||||||
|
|
||||||
return Profile.from_dict(res)
|
return Profile.from_dict(res)
|
||||||
|
|
||||||
async def get_profile_with_configs(self, profileid: UUID, org: Organization):
|
async def get_profile_with_configs(
|
||||||
|
self, profileid: UUID, org: Organization
|
||||||
|
) -> ProfileWithCrawlConfigs:
|
||||||
"""get profile for api output, with crawlconfigs"""
|
"""get profile for api output, with crawlconfigs"""
|
||||||
|
|
||||||
profile = await self.get_profile(profileid, org)
|
profile = await self.get_profile(profileid, org)
|
||||||
@ -357,27 +370,33 @@ class ProfileOps:
|
|||||||
|
|
||||||
async def get_profile_storage_path(
|
async def get_profile_storage_path(
|
||||||
self, profileid: UUID, org: Optional[Organization] = None
|
self, profileid: UUID, org: Optional[Organization] = None
|
||||||
):
|
) -> str:
|
||||||
"""return profile path filename (relative path) for given profile id and org"""
|
"""return profile path filename (relative path) for given profile id and org"""
|
||||||
try:
|
try:
|
||||||
profile = await self.get_profile(profileid, org)
|
profile = await self.get_profile(profileid, org)
|
||||||
return profile.resource.filename
|
return profile.resource.filename if profile.resource else ""
|
||||||
# pylint: disable=bare-except
|
# pylint: disable=bare-except
|
||||||
except:
|
except:
|
||||||
return None
|
pass
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
async def get_profile_name(
|
async def get_profile_name(
|
||||||
self, profileid: UUID, org: Optional[Organization] = None
|
self, profileid: UUID, org: Optional[Organization] = None
|
||||||
):
|
) -> str:
|
||||||
"""return profile for given profile id and org"""
|
"""return profile for given profile id and org"""
|
||||||
try:
|
try:
|
||||||
profile = await self.get_profile(profileid, org)
|
profile = await self.get_profile(profileid, org)
|
||||||
return profile.name
|
return profile.name
|
||||||
# pylint: disable=bare-except
|
# pylint: disable=bare-except
|
||||||
except:
|
except:
|
||||||
return None
|
pass
|
||||||
|
|
||||||
async def get_crawl_configs_for_profile(self, profileid: UUID, org: Organization):
|
return ""
|
||||||
|
|
||||||
|
async def get_crawl_configs_for_profile(
|
||||||
|
self, profileid: UUID, org: Organization
|
||||||
|
) -> list[CrawlConfigProfileOut]:
|
||||||
"""Get list of crawl configs with basic info for that use a particular profile"""
|
"""Get list of crawl configs with basic info for that use a particular profile"""
|
||||||
|
|
||||||
crawlconfig_info = await self.crawlconfigs.get_crawl_config_info_for_profile(
|
crawlconfig_info = await self.crawlconfigs.get_crawl_config_info_for_profile(
|
||||||
@ -386,7 +405,9 @@ class ProfileOps:
|
|||||||
|
|
||||||
return crawlconfig_info
|
return crawlconfig_info
|
||||||
|
|
||||||
async def delete_profile(self, profileid: UUID, org: Organization):
|
async def delete_profile(
|
||||||
|
self, profileid: UUID, org: Organization
|
||||||
|
) -> dict[str, Any]:
|
||||||
"""delete profile, if not used in active crawlconfig"""
|
"""delete profile, if not used in active crawlconfig"""
|
||||||
profile = await self.get_profile_with_configs(profileid, org)
|
profile = await self.get_profile_with_configs(profileid, org)
|
||||||
|
|
||||||
@ -403,27 +424,32 @@ class ProfileOps:
|
|||||||
await self.orgs.inc_org_bytes_stored(
|
await self.orgs.inc_org_bytes_stored(
|
||||||
org.id, -profile.resource.size, "profile"
|
org.id, -profile.resource.size, "profile"
|
||||||
)
|
)
|
||||||
|
await self.background_job_ops.create_delete_replica_jobs(
|
||||||
|
org, profile.resource, str(profile.id), "profile"
|
||||||
|
)
|
||||||
|
|
||||||
res = await self.profiles.delete_one(query)
|
res = await self.profiles.delete_one(query)
|
||||||
if not res or res.deleted_count != 1:
|
if not res or res.deleted_count != 1:
|
||||||
raise HTTPException(status_code=404, detail="profile_not_found")
|
raise HTTPException(status_code=404, detail="profile_not_found")
|
||||||
|
|
||||||
await self.background_job_ops.create_delete_replica_jobs(
|
|
||||||
org, profile.resource, profile.id, "profile"
|
|
||||||
)
|
|
||||||
|
|
||||||
quota_reached = await self.orgs.storage_quota_reached(org.id)
|
quota_reached = await self.orgs.storage_quota_reached(org.id)
|
||||||
|
|
||||||
return {"success": True, "storageQuotaReached": quota_reached}
|
return {"success": True, "storageQuotaReached": quota_reached}
|
||||||
|
|
||||||
async def delete_profile_browser(self, browserid):
|
async def delete_profile_browser(self, browserid: str) -> dict[str, bool]:
|
||||||
"""delete profile browser immediately"""
|
"""delete profile browser immediately"""
|
||||||
if not await self.crawl_manager.delete_profile_browser(browserid):
|
if not await self.crawl_manager.delete_profile_browser(browserid):
|
||||||
raise HTTPException(status_code=404, detail="browser_not_found")
|
raise HTTPException(status_code=404, detail="browser_not_found")
|
||||||
|
|
||||||
return {"success": True}
|
return {"success": True}
|
||||||
|
|
||||||
async def _send_browser_req(self, browserid, path, method="GET", json=None):
|
async def _send_browser_req(
|
||||||
|
self,
|
||||||
|
browserid: str,
|
||||||
|
path: str,
|
||||||
|
method: str = "GET",
|
||||||
|
json: Optional[dict[str, Any]] = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
"""make request to browser api to get state"""
|
"""make request to browser api to get state"""
|
||||||
try:
|
try:
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
@ -438,7 +464,7 @@ class ProfileOps:
|
|||||||
# pylint: disable=raise-missing-from
|
# pylint: disable=raise-missing-from
|
||||||
raise HTTPException(status_code=200, detail="waiting_for_browser")
|
raise HTTPException(status_code=200, detail="waiting_for_browser")
|
||||||
|
|
||||||
return json
|
return json or {}
|
||||||
|
|
||||||
async def add_profile_file_replica(
|
async def add_profile_file_replica(
|
||||||
self, profileid: UUID, filename: str, ref: StorageRef
|
self, profileid: UUID, filename: str, ref: StorageRef
|
||||||
@ -453,7 +479,12 @@ class ProfileOps:
|
|||||||
# ============================================================================
|
# ============================================================================
|
||||||
# pylint: disable=redefined-builtin,invalid-name,too-many-locals,too-many-arguments
|
# pylint: disable=redefined-builtin,invalid-name,too-many-locals,too-many-arguments
|
||||||
def init_profiles_api(
|
def init_profiles_api(
|
||||||
mdb, org_ops, crawl_manager, storage_ops, background_job_ops, user_dep
|
mdb,
|
||||||
|
org_ops: OrgOps,
|
||||||
|
crawl_manager: CrawlManager,
|
||||||
|
storage_ops: StorageOps,
|
||||||
|
background_job_ops: BackgroundJobOps,
|
||||||
|
user_dep,
|
||||||
):
|
):
|
||||||
"""init profile ops system"""
|
"""init profile ops system"""
|
||||||
ops = ProfileOps(mdb, org_ops, crawl_manager, storage_ops, background_job_ops)
|
ops = ProfileOps(mdb, org_ops, crawl_manager, storage_ops, background_job_ops)
|
||||||
@ -584,6 +615,7 @@ def init_profiles_api(
|
|||||||
async def delete_profile_browser(browserid: str = Depends(browser_dep)):
|
async def delete_profile_browser(browserid: str = Depends(browser_dep)):
|
||||||
return await ops.delete_profile_browser(browserid)
|
return await ops.delete_profile_browser(browserid)
|
||||||
|
|
||||||
|
if org_ops.router:
|
||||||
org_ops.router.include_router(router)
|
org_ops.router.include_router(router)
|
||||||
|
|
||||||
return ops
|
return ops
|
||||||
|
|||||||
@ -37,6 +37,7 @@ from mypy_boto3_s3.type_defs import CompletedPartTypeDef
|
|||||||
from types_aiobotocore_s3 import S3Client as AIOS3Client
|
from types_aiobotocore_s3 import S3Client as AIOS3Client
|
||||||
|
|
||||||
from .models import (
|
from .models import (
|
||||||
|
BaseFile,
|
||||||
CrawlFile,
|
CrawlFile,
|
||||||
CrawlFileOut,
|
CrawlFileOut,
|
||||||
Organization,
|
Organization,
|
||||||
@ -504,7 +505,7 @@ class StorageOps:
|
|||||||
return presigned_url
|
return presigned_url
|
||||||
|
|
||||||
async def delete_crawl_file_object(
|
async def delete_crawl_file_object(
|
||||||
self, org: Organization, crawlfile: CrawlFile
|
self, org: Organization, crawlfile: BaseFile
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""delete crawl file from storage."""
|
"""delete crawl file from storage."""
|
||||||
return await self._delete_file(org, crawlfile.filename, crawlfile.storage)
|
return await self._delete_file(org, crawlfile.filename, crawlfile.storage)
|
||||||
|
|||||||
@ -40,12 +40,12 @@ def get_templates_dir():
|
|||||||
return os.path.join(os.path.dirname(__file__), "templates")
|
return os.path.join(os.path.dirname(__file__), "templates")
|
||||||
|
|
||||||
|
|
||||||
def from_k8s_date(string):
|
def from_k8s_date(string: str) -> Optional[datetime]:
|
||||||
"""convert k8s date string to datetime"""
|
"""convert k8s date string to datetime"""
|
||||||
return datetime.fromisoformat(string[:-1]) if string else None
|
return datetime.fromisoformat(string[:-1]) if string else None
|
||||||
|
|
||||||
|
|
||||||
def to_k8s_date(dt_val):
|
def to_k8s_date(dt_val: datetime) -> str:
|
||||||
"""convert datetime to string for k8s"""
|
"""convert datetime to string for k8s"""
|
||||||
return dt_val.isoformat("T") + "Z"
|
return dt_val.isoformat("T") + "Z"
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user