Misc backend fixes (#133)
* misc backend fixes: - fix uuid typing: roles list, user invites - crawlconfig: fix created date setting, fix userName lookup - docker: fix timezone for scheduler, fix running check - remove prints - fix get crawl stuck in 'stopping' - check finished list first, then run list (in case k8s job has not been deleted)
This commit is contained in:
parent
d7f58c964c
commit
2b2e6fedfa
@ -96,7 +96,7 @@ class Archive(BaseMongoModel):
|
||||
user_list = await user_manager.get_user_names_by_ids(keys)
|
||||
|
||||
for archive_user in user_list:
|
||||
id_ = archive_user["id"]
|
||||
id_ = str(archive_user["id"])
|
||||
role = result["users"].get(id_)
|
||||
if not role:
|
||||
continue
|
||||
@ -158,10 +158,10 @@ class ArchiveOps:
|
||||
return [Archive.from_dict(res) for res in results]
|
||||
|
||||
async def get_archive_for_user_by_id(
|
||||
self, aid: str, user: User, role: UserRole = UserRole.VIEWER
|
||||
self, aid: uuid.UUID, user: User, role: UserRole = UserRole.VIEWER
|
||||
):
|
||||
"""Get an archive for user by unique id"""
|
||||
query = {f"users.{user.id}": {"$gte": role.value}, "_id": uuid.UUID(aid)}
|
||||
query = {f"users.{user.id}": {"$gte": role.value}, "_id": aid}
|
||||
res = await self.archives.find_one(query)
|
||||
return Archive.from_dict(res)
|
||||
|
||||
@ -221,7 +221,7 @@ def init_archives_api(app, mdb, user_manager, invites, user_dep: User):
|
||||
ops = ArchiveOps(mdb, invites)
|
||||
|
||||
async def archive_dep(aid: str, user: User = Depends(user_dep)):
|
||||
archive = await ops.get_archive_for_user_by_id(aid, user)
|
||||
archive = await ops.get_archive_for_user_by_id(uuid.UUID(aid), user)
|
||||
if not archive:
|
||||
raise HTTPException(status_code=404, detail=f"Archive '{aid}' not found")
|
||||
|
||||
|
@ -143,8 +143,9 @@ class UpdateSchedule(BaseModel):
|
||||
class CrawlOps:
|
||||
"""Crawl Config Operations"""
|
||||
|
||||
def __init__(self, mdb, archive_ops, crawl_manager):
|
||||
def __init__(self, mdb, user_manager, archive_ops, crawl_manager):
|
||||
self.crawl_configs = mdb["crawl_configs"]
|
||||
self.user_manager = user_manager
|
||||
self.archive_ops = archive_ops
|
||||
self.crawl_manager = crawl_manager
|
||||
|
||||
@ -168,6 +169,7 @@ class CrawlOps:
|
||||
data["aid"] = archive.id
|
||||
data["userid"] = user.id
|
||||
data["_id"] = uuid.uuid4()
|
||||
data["created"] = datetime.utcnow().replace(microsecond=0, tzinfo=None)
|
||||
|
||||
if config.colls:
|
||||
data["colls"] = await self.coll_ops.find_collections(
|
||||
@ -176,8 +178,6 @@ class CrawlOps:
|
||||
|
||||
result = await self.crawl_configs.insert_one(data)
|
||||
|
||||
data["created"] = datetime.utcnow().replace(microsecond=0, tzinfo=None)
|
||||
|
||||
crawlconfig = CrawlConfig.from_dict(data)
|
||||
|
||||
new_name = await self.crawl_manager.add_crawl_config(
|
||||
@ -190,7 +190,7 @@ class CrawlOps:
|
||||
""" Update schedule for existing crawl config"""
|
||||
|
||||
if not await self.crawl_configs.find_one_and_update(
|
||||
{"_id": cid}, {"$set": {"schedule": update.schedule}}
|
||||
{"_id": uuid.UUID(cid)}, {"$set": {"schedule": update.schedule}}
|
||||
):
|
||||
return False
|
||||
|
||||
@ -248,6 +248,11 @@ class CrawlOps:
|
||||
if len(crawls) == 1:
|
||||
out.currCrawlId = crawls[0].id
|
||||
|
||||
user = await self.user_manager.get(crawlconfig.userid)
|
||||
# pylint: disable=invalid-name
|
||||
if user:
|
||||
out.userName = user.name
|
||||
|
||||
return out
|
||||
|
||||
async def get_crawl_config(self, cid: uuid.UUID, archive: Archive):
|
||||
@ -270,9 +275,9 @@ class CrawlOps:
|
||||
|
||||
# ============================================================================
|
||||
# pylint: disable=redefined-builtin,invalid-name,too-many-locals
|
||||
def init_crawl_config_api(mdb, user_dep, archive_ops, crawl_manager):
|
||||
def init_crawl_config_api(mdb, user_dep, user_manager, archive_ops, crawl_manager):
|
||||
"""Init /crawlconfigs api routes"""
|
||||
ops = CrawlOps(mdb, archive_ops, crawl_manager)
|
||||
ops = CrawlOps(mdb, user_manager, archive_ops, crawl_manager)
|
||||
|
||||
router = ops.router
|
||||
|
||||
@ -342,7 +347,7 @@ def init_crawl_config_api(mdb, user_dep, archive_ops, crawl_manager):
|
||||
|
||||
crawl_id = None
|
||||
try:
|
||||
crawl_id = await crawl_manager.run_crawl_config(cid, str(user.id))
|
||||
crawl_id = await crawl_manager.run_crawl_config(cid, userid=str(user.id))
|
||||
except Exception as e:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise HTTPException(status_code=500, detail=f"Error starting crawl: {e}")
|
||||
|
@ -313,17 +313,15 @@ class CrawlOps:
|
||||
|
||||
async def get_crawl(self, crawlid: str, archive: Archive):
|
||||
""" Get data for single crawl """
|
||||
crawl = await self.crawl_manager.get_running_crawl(crawlid, archive.id_str)
|
||||
if crawl:
|
||||
await self.get_redis_stats([crawl])
|
||||
|
||||
res = await self.crawls.find_one({"_id": crawlid, "aid": archive.id})
|
||||
|
||||
if not res:
|
||||
crawl = await self.crawl_manager.get_running_crawl(crawlid, archive.id_str)
|
||||
if crawl:
|
||||
await self.get_redis_stats([crawl])
|
||||
|
||||
else:
|
||||
res = await self.crawls.find_one({"_id": crawlid, "aid": archive.id})
|
||||
if not res:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Crawl not found: {crawlid}"
|
||||
)
|
||||
|
||||
files = [CrawlFile(**data) for data in res["files"]]
|
||||
|
||||
del res["files"]
|
||||
@ -331,6 +329,11 @@ class CrawlOps:
|
||||
res["resources"] = await self._resolve_signed_urls(files, archive)
|
||||
crawl = CrawlOut.from_dict(res)
|
||||
|
||||
if not crawl:
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Crawl not found: {crawlid}"
|
||||
)
|
||||
|
||||
return await self._resolve_crawl_refs(crawl, archive)
|
||||
|
||||
async def _resolve_crawl_refs(
|
||||
|
@ -359,6 +359,9 @@ class DockerManager:
|
||||
try:
|
||||
container = await self.client.containers.get(crawl_id)
|
||||
|
||||
if container["State"]["Status"] != "running":
|
||||
return None
|
||||
|
||||
if aid and container["Config"]["Labels"]["btrix.archive"] != aid:
|
||||
return None
|
||||
|
||||
|
@ -66,6 +66,7 @@ def main():
|
||||
crawl_config_ops = init_crawl_config_api(
|
||||
mdb,
|
||||
current_active_user,
|
||||
user_manager,
|
||||
archive_ops,
|
||||
crawl_manager,
|
||||
)
|
||||
|
@ -6,6 +6,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
from apscheduler.jobstores.mongodb import MongoDBJobStore
|
||||
from pymongo import MongoClient
|
||||
from pytz import utc
|
||||
|
||||
from db import DATABASE_URL
|
||||
|
||||
@ -28,7 +29,7 @@ def run_scheduler(event_q, trigger_q):
|
||||
|
||||
print("Initializing Scheduler...", flush=True)
|
||||
|
||||
scheduler = BackgroundScheduler()
|
||||
scheduler = BackgroundScheduler(timezone=utc)
|
||||
|
||||
mongoclient = MongoClient(DATABASE_URL)
|
||||
|
||||
|
@ -107,5 +107,4 @@ async def get_presigned_url(archive, crawlfile, crawl_manager, duration=3600):
|
||||
"get_object", Params={"Bucket": bucket, "Key": key}, ExpiresIn=duration
|
||||
)
|
||||
|
||||
print("presigned_url", presigned_url)
|
||||
return presigned_url
|
||||
|
Loading…
Reference in New Issue
Block a user