browsertrix/backend/btrixcloud/db.py
Ilya Kreymer 5b6aa3bc95
Affinity + Tolerations + Cleanup Crawl Job (#256)
* k8s: add tolerations for 'nodeType=crawling:NoSchedule' to allow scheduling crawling on designated nodes for crawler and profiles jobs and statefulsets
* add affinity for 'nodeType=crawling' on crawling and profile browser statefulsets
* refactor crawljob: combine crawl_updater logic into base crawl_job
* increment new 'crawlAttemptCount' counter crawlconfig when crawl is started, not necessarily finished, to avoid deleting configs that had attempted but not finished crawls.
* better external mongodb support: use MONGO_DB_URL to set custom url directly, otherwise build from username, password and mongo host
2022-06-10 19:21:37 -07:00

73 lines
1.8 KiB
Python

"""
Browsertrix API Mongo DB initialization
"""
import os
from typing import Optional
import motor.motor_asyncio
from pydantic import BaseModel, UUID4
# ============================================================================
def resolve_db_url():
"""get the mongo db url, either from MONGO_DB_URL or
from separate username, password and host settings"""
db_url = os.environ.get("MONGO_DB_URL")
if db_url:
return db_url
mongo_user = os.environ["MONGO_INITDB_ROOT_USERNAME"]
mongo_pass = os.environ["MONGO_INITDB_ROOT_PASSWORD"]
mongo_host = os.environ["MONGO_HOST"]
return f"mongodb://{mongo_user}:{mongo_pass}@{mongo_host}:27017"
# ============================================================================
def init_db():
"""initializde the mongodb connector"""
db_url = resolve_db_url()
client = motor.motor_asyncio.AsyncIOMotorClient(
db_url, uuidRepresentation="standard"
)
mdb = client["browsertrixcloud"]
return client, mdb
# ============================================================================
class BaseMongoModel(BaseModel):
"""Base pydantic model that is also a mongo doc"""
id: Optional[UUID4]
@property
def id_str(self):
""" Return id as str """
return str(self.id)
@classmethod
def from_dict(cls, data):
"""convert dict from mongo to an Archive"""
if not data:
return None
data["id"] = data.pop("_id")
return cls(**data)
def serialize(self, **opts):
"""convert Archive to dict"""
return self.dict(
exclude_unset=True, exclude_defaults=True, exclude_none=True, **opts
)
def to_dict(self, **opts):
"""convert to dict for mongo"""
res = self.dict(**opts)
res["_id"] = res.pop("id", "")
return res