Misc backend fixes (#133)
* misc backend fixes: - fix uuid typing: roles list, user invites - crawlconfig: fix created date setting, fix userName lookup - docker: fix timezone for scheduler, fix running check - remove prints - fix get crawl stuck in 'stopping' - check finished list first, then run list (in case k8s job has not been deleted)
This commit is contained in:
parent
d7f58c964c
commit
2b2e6fedfa
@ -96,7 +96,7 @@ class Archive(BaseMongoModel):
|
|||||||
user_list = await user_manager.get_user_names_by_ids(keys)
|
user_list = await user_manager.get_user_names_by_ids(keys)
|
||||||
|
|
||||||
for archive_user in user_list:
|
for archive_user in user_list:
|
||||||
id_ = archive_user["id"]
|
id_ = str(archive_user["id"])
|
||||||
role = result["users"].get(id_)
|
role = result["users"].get(id_)
|
||||||
if not role:
|
if not role:
|
||||||
continue
|
continue
|
||||||
@ -158,10 +158,10 @@ class ArchiveOps:
|
|||||||
return [Archive.from_dict(res) for res in results]
|
return [Archive.from_dict(res) for res in results]
|
||||||
|
|
||||||
async def get_archive_for_user_by_id(
|
async def get_archive_for_user_by_id(
|
||||||
self, aid: str, user: User, role: UserRole = UserRole.VIEWER
|
self, aid: uuid.UUID, user: User, role: UserRole = UserRole.VIEWER
|
||||||
):
|
):
|
||||||
"""Get an archive for user by unique id"""
|
"""Get an archive for user by unique id"""
|
||||||
query = {f"users.{user.id}": {"$gte": role.value}, "_id": uuid.UUID(aid)}
|
query = {f"users.{user.id}": {"$gte": role.value}, "_id": aid}
|
||||||
res = await self.archives.find_one(query)
|
res = await self.archives.find_one(query)
|
||||||
return Archive.from_dict(res)
|
return Archive.from_dict(res)
|
||||||
|
|
||||||
@ -221,7 +221,7 @@ def init_archives_api(app, mdb, user_manager, invites, user_dep: User):
|
|||||||
ops = ArchiveOps(mdb, invites)
|
ops = ArchiveOps(mdb, invites)
|
||||||
|
|
||||||
async def archive_dep(aid: str, user: User = Depends(user_dep)):
|
async def archive_dep(aid: str, user: User = Depends(user_dep)):
|
||||||
archive = await ops.get_archive_for_user_by_id(aid, user)
|
archive = await ops.get_archive_for_user_by_id(uuid.UUID(aid), user)
|
||||||
if not archive:
|
if not archive:
|
||||||
raise HTTPException(status_code=404, detail=f"Archive '{aid}' not found")
|
raise HTTPException(status_code=404, detail=f"Archive '{aid}' not found")
|
||||||
|
|
||||||
|
|||||||
@ -143,8 +143,9 @@ class UpdateSchedule(BaseModel):
|
|||||||
class CrawlOps:
|
class CrawlOps:
|
||||||
"""Crawl Config Operations"""
|
"""Crawl Config Operations"""
|
||||||
|
|
||||||
def __init__(self, mdb, archive_ops, crawl_manager):
|
def __init__(self, mdb, user_manager, archive_ops, crawl_manager):
|
||||||
self.crawl_configs = mdb["crawl_configs"]
|
self.crawl_configs = mdb["crawl_configs"]
|
||||||
|
self.user_manager = user_manager
|
||||||
self.archive_ops = archive_ops
|
self.archive_ops = archive_ops
|
||||||
self.crawl_manager = crawl_manager
|
self.crawl_manager = crawl_manager
|
||||||
|
|
||||||
@ -168,6 +169,7 @@ class CrawlOps:
|
|||||||
data["aid"] = archive.id
|
data["aid"] = archive.id
|
||||||
data["userid"] = user.id
|
data["userid"] = user.id
|
||||||
data["_id"] = uuid.uuid4()
|
data["_id"] = uuid.uuid4()
|
||||||
|
data["created"] = datetime.utcnow().replace(microsecond=0, tzinfo=None)
|
||||||
|
|
||||||
if config.colls:
|
if config.colls:
|
||||||
data["colls"] = await self.coll_ops.find_collections(
|
data["colls"] = await self.coll_ops.find_collections(
|
||||||
@ -176,8 +178,6 @@ class CrawlOps:
|
|||||||
|
|
||||||
result = await self.crawl_configs.insert_one(data)
|
result = await self.crawl_configs.insert_one(data)
|
||||||
|
|
||||||
data["created"] = datetime.utcnow().replace(microsecond=0, tzinfo=None)
|
|
||||||
|
|
||||||
crawlconfig = CrawlConfig.from_dict(data)
|
crawlconfig = CrawlConfig.from_dict(data)
|
||||||
|
|
||||||
new_name = await self.crawl_manager.add_crawl_config(
|
new_name = await self.crawl_manager.add_crawl_config(
|
||||||
@ -190,7 +190,7 @@ class CrawlOps:
|
|||||||
""" Update schedule for existing crawl config"""
|
""" Update schedule for existing crawl config"""
|
||||||
|
|
||||||
if not await self.crawl_configs.find_one_and_update(
|
if not await self.crawl_configs.find_one_and_update(
|
||||||
{"_id": cid}, {"$set": {"schedule": update.schedule}}
|
{"_id": uuid.UUID(cid)}, {"$set": {"schedule": update.schedule}}
|
||||||
):
|
):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -248,6 +248,11 @@ class CrawlOps:
|
|||||||
if len(crawls) == 1:
|
if len(crawls) == 1:
|
||||||
out.currCrawlId = crawls[0].id
|
out.currCrawlId = crawls[0].id
|
||||||
|
|
||||||
|
user = await self.user_manager.get(crawlconfig.userid)
|
||||||
|
# pylint: disable=invalid-name
|
||||||
|
if user:
|
||||||
|
out.userName = user.name
|
||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
async def get_crawl_config(self, cid: uuid.UUID, archive: Archive):
|
async def get_crawl_config(self, cid: uuid.UUID, archive: Archive):
|
||||||
@ -270,9 +275,9 @@ class CrawlOps:
|
|||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# pylint: disable=redefined-builtin,invalid-name,too-many-locals
|
# pylint: disable=redefined-builtin,invalid-name,too-many-locals
|
||||||
def init_crawl_config_api(mdb, user_dep, archive_ops, crawl_manager):
|
def init_crawl_config_api(mdb, user_dep, user_manager, archive_ops, crawl_manager):
|
||||||
"""Init /crawlconfigs api routes"""
|
"""Init /crawlconfigs api routes"""
|
||||||
ops = CrawlOps(mdb, archive_ops, crawl_manager)
|
ops = CrawlOps(mdb, user_manager, archive_ops, crawl_manager)
|
||||||
|
|
||||||
router = ops.router
|
router = ops.router
|
||||||
|
|
||||||
@ -342,7 +347,7 @@ def init_crawl_config_api(mdb, user_dep, archive_ops, crawl_manager):
|
|||||||
|
|
||||||
crawl_id = None
|
crawl_id = None
|
||||||
try:
|
try:
|
||||||
crawl_id = await crawl_manager.run_crawl_config(cid, str(user.id))
|
crawl_id = await crawl_manager.run_crawl_config(cid, userid=str(user.id))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# pylint: disable=raise-missing-from
|
# pylint: disable=raise-missing-from
|
||||||
raise HTTPException(status_code=500, detail=f"Error starting crawl: {e}")
|
raise HTTPException(status_code=500, detail=f"Error starting crawl: {e}")
|
||||||
|
|||||||
@ -313,17 +313,15 @@ class CrawlOps:
|
|||||||
|
|
||||||
async def get_crawl(self, crawlid: str, archive: Archive):
|
async def get_crawl(self, crawlid: str, archive: Archive):
|
||||||
""" Get data for single crawl """
|
""" Get data for single crawl """
|
||||||
|
|
||||||
|
res = await self.crawls.find_one({"_id": crawlid, "aid": archive.id})
|
||||||
|
|
||||||
|
if not res:
|
||||||
crawl = await self.crawl_manager.get_running_crawl(crawlid, archive.id_str)
|
crawl = await self.crawl_manager.get_running_crawl(crawlid, archive.id_str)
|
||||||
if crawl:
|
if crawl:
|
||||||
await self.get_redis_stats([crawl])
|
await self.get_redis_stats([crawl])
|
||||||
|
|
||||||
else:
|
else:
|
||||||
res = await self.crawls.find_one({"_id": crawlid, "aid": archive.id})
|
|
||||||
if not res:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404, detail=f"Crawl not found: {crawlid}"
|
|
||||||
)
|
|
||||||
|
|
||||||
files = [CrawlFile(**data) for data in res["files"]]
|
files = [CrawlFile(**data) for data in res["files"]]
|
||||||
|
|
||||||
del res["files"]
|
del res["files"]
|
||||||
@ -331,6 +329,11 @@ class CrawlOps:
|
|||||||
res["resources"] = await self._resolve_signed_urls(files, archive)
|
res["resources"] = await self._resolve_signed_urls(files, archive)
|
||||||
crawl = CrawlOut.from_dict(res)
|
crawl = CrawlOut.from_dict(res)
|
||||||
|
|
||||||
|
if not crawl:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404, detail=f"Crawl not found: {crawlid}"
|
||||||
|
)
|
||||||
|
|
||||||
return await self._resolve_crawl_refs(crawl, archive)
|
return await self._resolve_crawl_refs(crawl, archive)
|
||||||
|
|
||||||
async def _resolve_crawl_refs(
|
async def _resolve_crawl_refs(
|
||||||
|
|||||||
@ -359,6 +359,9 @@ class DockerManager:
|
|||||||
try:
|
try:
|
||||||
container = await self.client.containers.get(crawl_id)
|
container = await self.client.containers.get(crawl_id)
|
||||||
|
|
||||||
|
if container["State"]["Status"] != "running":
|
||||||
|
return None
|
||||||
|
|
||||||
if aid and container["Config"]["Labels"]["btrix.archive"] != aid:
|
if aid and container["Config"]["Labels"]["btrix.archive"] != aid:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|||||||
@ -66,6 +66,7 @@ def main():
|
|||||||
crawl_config_ops = init_crawl_config_api(
|
crawl_config_ops = init_crawl_config_api(
|
||||||
mdb,
|
mdb,
|
||||||
current_active_user,
|
current_active_user,
|
||||||
|
user_manager,
|
||||||
archive_ops,
|
archive_ops,
|
||||||
crawl_manager,
|
crawl_manager,
|
||||||
)
|
)
|
||||||
|
|||||||
@ -6,6 +6,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
|
|||||||
from apscheduler.triggers.cron import CronTrigger
|
from apscheduler.triggers.cron import CronTrigger
|
||||||
from apscheduler.jobstores.mongodb import MongoDBJobStore
|
from apscheduler.jobstores.mongodb import MongoDBJobStore
|
||||||
from pymongo import MongoClient
|
from pymongo import MongoClient
|
||||||
|
from pytz import utc
|
||||||
|
|
||||||
from db import DATABASE_URL
|
from db import DATABASE_URL
|
||||||
|
|
||||||
@ -28,7 +29,7 @@ def run_scheduler(event_q, trigger_q):
|
|||||||
|
|
||||||
print("Initializing Scheduler...", flush=True)
|
print("Initializing Scheduler...", flush=True)
|
||||||
|
|
||||||
scheduler = BackgroundScheduler()
|
scheduler = BackgroundScheduler(timezone=utc)
|
||||||
|
|
||||||
mongoclient = MongoClient(DATABASE_URL)
|
mongoclient = MongoClient(DATABASE_URL)
|
||||||
|
|
||||||
|
|||||||
@ -107,5 +107,4 @@ async def get_presigned_url(archive, crawlfile, crawl_manager, duration=3600):
|
|||||||
"get_object", Params={"Bucket": bucket, "Key": key}, ExpiresIn=duration
|
"get_object", Params={"Bucket": bucket, "Key": key}, ExpiresIn=duration
|
||||||
)
|
)
|
||||||
|
|
||||||
print("presigned_url", presigned_url)
|
|
||||||
return presigned_url
|
return presigned_url
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user