Misc backend fixes (#133)

* misc backend fixes:
- fix uuid typing: roles list, user invites
- crawlconfig: fix created date setting, fix userName lookup
- docker: fix timezone for scheduler, fix running check
- remove prints
- fix get crawl stuck in 'stopping' - check finished list first, then run list (in case k8s job has not been deleted)
This commit is contained in:
Ilya Kreymer 2022-01-31 19:41:04 -08:00 committed by GitHub
parent d7f58c964c
commit 2b2e6fedfa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 34 additions and 22 deletions

View File

@ -96,7 +96,7 @@ class Archive(BaseMongoModel):
user_list = await user_manager.get_user_names_by_ids(keys)
for archive_user in user_list:
id_ = archive_user["id"]
id_ = str(archive_user["id"])
role = result["users"].get(id_)
if not role:
continue
@ -158,10 +158,10 @@ class ArchiveOps:
return [Archive.from_dict(res) for res in results]
async def get_archive_for_user_by_id(
self, aid: str, user: User, role: UserRole = UserRole.VIEWER
self, aid: uuid.UUID, user: User, role: UserRole = UserRole.VIEWER
):
"""Get an archive for user by unique id"""
query = {f"users.{user.id}": {"$gte": role.value}, "_id": uuid.UUID(aid)}
query = {f"users.{user.id}": {"$gte": role.value}, "_id": aid}
res = await self.archives.find_one(query)
return Archive.from_dict(res)
@ -221,7 +221,7 @@ def init_archives_api(app, mdb, user_manager, invites, user_dep: User):
ops = ArchiveOps(mdb, invites)
async def archive_dep(aid: str, user: User = Depends(user_dep)):
archive = await ops.get_archive_for_user_by_id(aid, user)
archive = await ops.get_archive_for_user_by_id(uuid.UUID(aid), user)
if not archive:
raise HTTPException(status_code=404, detail=f"Archive '{aid}' not found")

View File

@ -143,8 +143,9 @@ class UpdateSchedule(BaseModel):
class CrawlOps:
"""Crawl Config Operations"""
def __init__(self, mdb, archive_ops, crawl_manager):
def __init__(self, mdb, user_manager, archive_ops, crawl_manager):
self.crawl_configs = mdb["crawl_configs"]
self.user_manager = user_manager
self.archive_ops = archive_ops
self.crawl_manager = crawl_manager
@ -168,6 +169,7 @@ class CrawlOps:
data["aid"] = archive.id
data["userid"] = user.id
data["_id"] = uuid.uuid4()
data["created"] = datetime.utcnow().replace(microsecond=0, tzinfo=None)
if config.colls:
data["colls"] = await self.coll_ops.find_collections(
@ -176,8 +178,6 @@ class CrawlOps:
result = await self.crawl_configs.insert_one(data)
data["created"] = datetime.utcnow().replace(microsecond=0, tzinfo=None)
crawlconfig = CrawlConfig.from_dict(data)
new_name = await self.crawl_manager.add_crawl_config(
@ -190,7 +190,7 @@ class CrawlOps:
""" Update schedule for existing crawl config"""
if not await self.crawl_configs.find_one_and_update(
{"_id": cid}, {"$set": {"schedule": update.schedule}}
{"_id": uuid.UUID(cid)}, {"$set": {"schedule": update.schedule}}
):
return False
@ -248,6 +248,11 @@ class CrawlOps:
if len(crawls) == 1:
out.currCrawlId = crawls[0].id
user = await self.user_manager.get(crawlconfig.userid)
# pylint: disable=invalid-name
if user:
out.userName = user.name
return out
async def get_crawl_config(self, cid: uuid.UUID, archive: Archive):
@ -270,9 +275,9 @@ class CrawlOps:
# ============================================================================
# pylint: disable=redefined-builtin,invalid-name,too-many-locals
def init_crawl_config_api(mdb, user_dep, archive_ops, crawl_manager):
def init_crawl_config_api(mdb, user_dep, user_manager, archive_ops, crawl_manager):
"""Init /crawlconfigs api routes"""
ops = CrawlOps(mdb, archive_ops, crawl_manager)
ops = CrawlOps(mdb, user_manager, archive_ops, crawl_manager)
router = ops.router
@ -342,7 +347,7 @@ def init_crawl_config_api(mdb, user_dep, archive_ops, crawl_manager):
crawl_id = None
try:
crawl_id = await crawl_manager.run_crawl_config(cid, str(user.id))
crawl_id = await crawl_manager.run_crawl_config(cid, userid=str(user.id))
except Exception as e:
# pylint: disable=raise-missing-from
raise HTTPException(status_code=500, detail=f"Error starting crawl: {e}")

View File

@ -313,17 +313,15 @@ class CrawlOps:
async def get_crawl(self, crawlid: str, archive: Archive):
""" Get data for single crawl """
crawl = await self.crawl_manager.get_running_crawl(crawlid, archive.id_str)
if crawl:
await self.get_redis_stats([crawl])
res = await self.crawls.find_one({"_id": crawlid, "aid": archive.id})
if not res:
crawl = await self.crawl_manager.get_running_crawl(crawlid, archive.id_str)
if crawl:
await self.get_redis_stats([crawl])
else:
res = await self.crawls.find_one({"_id": crawlid, "aid": archive.id})
if not res:
raise HTTPException(
status_code=404, detail=f"Crawl not found: {crawlid}"
)
files = [CrawlFile(**data) for data in res["files"]]
del res["files"]
@ -331,6 +329,11 @@ class CrawlOps:
res["resources"] = await self._resolve_signed_urls(files, archive)
crawl = CrawlOut.from_dict(res)
if not crawl:
raise HTTPException(
status_code=404, detail=f"Crawl not found: {crawlid}"
)
return await self._resolve_crawl_refs(crawl, archive)
async def _resolve_crawl_refs(

View File

@ -359,6 +359,9 @@ class DockerManager:
try:
container = await self.client.containers.get(crawl_id)
if container["State"]["Status"] != "running":
return None
if aid and container["Config"]["Labels"]["btrix.archive"] != aid:
return None

View File

@ -66,6 +66,7 @@ def main():
crawl_config_ops = init_crawl_config_api(
mdb,
current_active_user,
user_manager,
archive_ops,
crawl_manager,
)

View File

@ -6,6 +6,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger
from apscheduler.jobstores.mongodb import MongoDBJobStore
from pymongo import MongoClient
from pytz import utc
from db import DATABASE_URL
@ -28,7 +29,7 @@ def run_scheduler(event_q, trigger_q):
print("Initializing Scheduler...", flush=True)
scheduler = BackgroundScheduler()
scheduler = BackgroundScheduler(timezone=utc)
mongoclient = MongoClient(DATABASE_URL)

View File

@ -107,5 +107,4 @@ async def get_presigned_url(archive, crawlfile, crawl_manager, duration=3600):
"get_object", Params={"Bucket": bucket, "Key": key}, ExpiresIn=duration
)
print("presigned_url", presigned_url)
return presigned_url