* k8s: add tolerations for 'nodeType=crawling:NoSchedule' to allow scheduling crawling on designated nodes for crawler and profiles jobs and statefulsets * add affinity for 'nodeType=crawling' on crawling and profile browser statefulsets * refactor crawljob: combine crawl_updater logic into base crawl_job * increment new 'crawlAttemptCount' counter crawlconfig when crawl is started, not necessarily finished, to avoid deleting configs that had attempted but not finished crawls. * better external mongodb support: use MONGO_DB_URL to set custom url directly, otherwise build from username, password and mongo host
73 lines
1.8 KiB
Python
73 lines
1.8 KiB
Python
"""
|
|
Browsertrix API Mongo DB initialization
|
|
"""
|
|
|
|
import os
|
|
from typing import Optional
|
|
|
|
import motor.motor_asyncio
|
|
|
|
from pydantic import BaseModel, UUID4
|
|
|
|
|
|
# ============================================================================
|
|
def resolve_db_url():
|
|
"""get the mongo db url, either from MONGO_DB_URL or
|
|
from separate username, password and host settings"""
|
|
db_url = os.environ.get("MONGO_DB_URL")
|
|
if db_url:
|
|
return db_url
|
|
|
|
mongo_user = os.environ["MONGO_INITDB_ROOT_USERNAME"]
|
|
mongo_pass = os.environ["MONGO_INITDB_ROOT_PASSWORD"]
|
|
mongo_host = os.environ["MONGO_HOST"]
|
|
|
|
return f"mongodb://{mongo_user}:{mongo_pass}@{mongo_host}:27017"
|
|
|
|
|
|
# ============================================================================
|
|
def init_db():
|
|
"""initializde the mongodb connector"""
|
|
|
|
db_url = resolve_db_url()
|
|
|
|
client = motor.motor_asyncio.AsyncIOMotorClient(
|
|
db_url, uuidRepresentation="standard"
|
|
)
|
|
|
|
mdb = client["browsertrixcloud"]
|
|
|
|
return client, mdb
|
|
|
|
|
|
# ============================================================================
|
|
class BaseMongoModel(BaseModel):
|
|
"""Base pydantic model that is also a mongo doc"""
|
|
|
|
id: Optional[UUID4]
|
|
|
|
@property
|
|
def id_str(self):
|
|
""" Return id as str """
|
|
return str(self.id)
|
|
|
|
@classmethod
|
|
def from_dict(cls, data):
|
|
"""convert dict from mongo to an Archive"""
|
|
if not data:
|
|
return None
|
|
data["id"] = data.pop("_id")
|
|
return cls(**data)
|
|
|
|
def serialize(self, **opts):
|
|
"""convert Archive to dict"""
|
|
return self.dict(
|
|
exclude_unset=True, exclude_defaults=True, exclude_none=True, **opts
|
|
)
|
|
|
|
def to_dict(self, **opts):
|
|
"""convert to dict for mongo"""
|
|
res = self.dict(**opts)
|
|
res["_id"] = res.pop("id", "")
|
|
return res
|