diff --git a/backend/btrixcloud/db.py b/backend/btrixcloud/db.py index cee9132c..22df8475 100644 --- a/backend/btrixcloud/db.py +++ b/backend/btrixcloud/db.py @@ -8,20 +8,35 @@ import urllib import asyncio from uuid import UUID, uuid4 -from typing import Optional, Union, TypeVar, Type +from typing import Optional, Union, TypeVar, Type, TYPE_CHECKING -import motor.motor_asyncio +from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase from pydantic import BaseModel from pymongo.errors import InvalidName from .migrations import BaseMigration +if TYPE_CHECKING: + from .users import UserManager + from .orgs import OrgOps + from .crawlconfigs import CrawlConfigOps + from .crawls import CrawlOps + from .colls import CollectionOps + from .invites import InviteOps + from .storages import StorageOps + from .pages import PageOps + from .background_jobs import BackgroundJobOps +else: + UserManager = OrgOps = CrawlConfigOps = CrawlOps = CollectionOps = InviteOps = ( + StorageOps + ) = PageOps = BackgroundJobOps = object + CURR_DB_VERSION = "0043" # ============================================================================ -def resolve_db_url(): +def resolve_db_url() -> str: """get the mongo db url, either from MONGO_DB_URL or from separate username, password and host settings""" db_url = os.environ.get("MONGO_DB_URL") @@ -36,12 +51,12 @@ def resolve_db_url(): # ============================================================================ -def init_db(): +def init_db() -> tuple[AsyncIOMotorClient, AsyncIOMotorDatabase]: """initialize the mongodb connector""" db_url = resolve_db_url() - client = motor.motor_asyncio.AsyncIOMotorClient( + client = AsyncIOMotorClient( db_url, tz_aware=True, uuidRepresentation="standard", @@ -55,7 +70,7 @@ def init_db(): # ============================================================================ -async def ping_db(mdb): +async def ping_db(mdb) -> None: """run in loop until db is up, set db_inited['inited'] property to true""" print("Waiting DB", flush=True) while True: @@ -73,18 +88,17 @@ async def ping_db(mdb): # ============================================================================ async def update_and_prepare_db( # pylint: disable=R0913 - mdb, - user_manager, - org_ops, - crawl_ops, - crawl_config_ops, - coll_ops, - invite_ops, - storage_ops, - page_ops, - background_job_ops, - db_inited, -): + mdb: AsyncIOMotorDatabase, + user_manager: UserManager, + org_ops: OrgOps, + crawl_ops: CrawlOps, + crawl_config_ops: CrawlConfigOps, + coll_ops: CollectionOps, + invite_ops: InviteOps, + storage_ops: StorageOps, + page_ops: PageOps, + background_job_ops: BackgroundJobOps, +) -> None: """Prepare database for application. - Run database migrations @@ -113,7 +127,6 @@ async def update_and_prepare_db( await user_manager.create_super_user() await org_ops.create_default_org() await org_ops.check_all_org_default_storages(storage_ops) - db_inited["inited"] = True print("Database updated and ready", flush=True) diff --git a/backend/btrixcloud/main.py b/backend/btrixcloud/main.py index a3e58b38..71dd62da 100644 --- a/backend/btrixcloud/main.py +++ b/backend/btrixcloud/main.py @@ -16,7 +16,7 @@ from fastapi.openapi.utils import get_openapi from fastapi.openapi.docs import get_swagger_ui_html, get_redoc_html from pydantic import BaseModel -from .db import init_db, await_db_and_migrations, update_and_prepare_db +from .db import init_db, await_db_and_migrations from .emailsender import EmailSender from .invites import init_invites @@ -38,7 +38,7 @@ from .pages import init_pages_api from .subs import init_subs_api from .crawlmanager import CrawlManager -from .utils import run_once_lock, register_exit_handler, is_bool +from .utils import register_exit_handler, is_bool from .version import __version__ API_PREFIX = "/api" @@ -274,25 +274,8 @@ def main() -> None: coll_ops.set_page_ops(page_ops) - # run only in first worker - if run_once_lock("btrix-init-db"): - asyncio.create_task( - update_and_prepare_db( - mdb, - user_manager, - org_ops, - crawls, - crawl_config_ops, - coll_ops, - invites, - storage_ops, - page_ops, - background_job_ops, - db_inited, - ) - ) - else: - asyncio.create_task(await_db_and_migrations(mdb, db_inited)) + # await db init, migrations should have already completed in init containers + asyncio.create_task(await_db_and_migrations(mdb, db_inited)) app.include_router(org_ops.router) diff --git a/backend/btrixcloud/main_bg.py b/backend/btrixcloud/main_bg.py index ff80fb4b..ab11752b 100644 --- a/backend/btrixcloud/main_bg.py +++ b/backend/btrixcloud/main_bg.py @@ -30,7 +30,9 @@ async def main(): ) return 1 - (org_ops, _, _, _, _, page_ops, coll_ops, _, _, _, _, user_manager) = init_ops() + (org_ops, _, _, _, _, page_ops, coll_ops, _, _, _, _, user_manager, _, _, _) = ( + init_ops() + ) # Run job (generic) if job_type == BgJobType.OPTIMIZE_PAGES: diff --git a/backend/btrixcloud/main_migrations.py b/backend/btrixcloud/main_migrations.py new file mode 100644 index 00000000..42d2669a --- /dev/null +++ b/backend/btrixcloud/main_migrations.py @@ -0,0 +1,61 @@ +"""entrypoint module for init_container, handles db migration""" + +import os +import sys +import asyncio + +from .ops import init_ops +from .db import update_and_prepare_db + + +# ============================================================================ +# pylint: disable=too-many-function-args, duplicate-code +async def main() -> int: + """init migrations""" + + # pylint: disable=import-outside-toplevel + if not os.environ.get("KUBERNETES_SERVICE_HOST"): + print( + "Sorry, the Browsertrix Backend must be run inside a Kubernetes environment.\ + Kubernetes not detected (KUBERNETES_SERVICE_HOST is not set), Exiting" + ) + return 1 + + ( + org_ops, + crawl_config_ops, + _, + crawl_ops, + _, + page_ops, + coll_ops, + _, + storage_ops, + background_job_ops, + _, + user_manager, + invite_ops, + _, + mdb, + ) = init_ops() + + await update_and_prepare_db( + mdb, + user_manager, + org_ops, + crawl_ops, + crawl_config_ops, + coll_ops, + invite_ops, + storage_ops, + page_ops, + background_job_ops, + ) + + return 0 + + +# # ============================================================================ +if __name__ == "__main__": + return_code = asyncio.run(main()) + sys.exit(return_code) diff --git a/backend/btrixcloud/main_op.py b/backend/btrixcloud/main_op.py index e5e35bff..c1d79c16 100644 --- a/backend/btrixcloud/main_op.py +++ b/backend/btrixcloud/main_op.py @@ -39,6 +39,9 @@ def main(): background_job_ops, event_webhook_ops, _, + _, + _, + _, ) = init_ops() return init_operator_api( diff --git a/backend/btrixcloud/ops.py b/backend/btrixcloud/ops.py index 4b64f69d..83f18504 100644 --- a/backend/btrixcloud/ops.py +++ b/backend/btrixcloud/ops.py @@ -1,6 +1,7 @@ """shared helper to initialize ops classes""" from typing import Tuple +from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase from .crawlmanager import CrawlManager from .db import init_db @@ -35,6 +36,9 @@ def init_ops() -> Tuple[ BackgroundJobOps, EventWebhookOps, UserManager, + InviteOps, + AsyncIOMotorClient, + AsyncIOMotorDatabase, ]: """Initialize and return ops classes""" email = EmailSender() @@ -122,4 +126,7 @@ def init_ops() -> Tuple[ background_job_ops, event_webhook_ops, user_manager, + invite_ops, + dbclient, + mdb, ) diff --git a/backend/btrixcloud/utils.py b/backend/btrixcloud/utils.py index 31f98090..8e4b44fd 100644 --- a/backend/btrixcloud/utils.py +++ b/backend/btrixcloud/utils.py @@ -1,7 +1,6 @@ """k8s utils""" import asyncio -import atexit import csv import io import json @@ -58,26 +57,6 @@ def dt_now() -> datetime: return datetime.now(timezone.utc).replace(microsecond=0) -def run_once_lock(name) -> bool: - """run once lock via temp directory - - if dir doesn't exist, return true - - if exists, return false""" - lock_dir = "/tmp/." + name - try: - os.mkdir(lock_dir) - # pylint: disable=bare-except - except: - return False - - # just in case, delete dir on exit - def del_dir(): - print("release lock: " + lock_dir, flush=True) - os.rmdir(lock_dir) - - atexit.register(del_dir) - return True - - def register_exit_handler() -> None: """register exit handler to exit on SIGTERM""" loop = asyncio.get_running_loop() diff --git a/chart/templates/backend.yaml b/chart/templates/backend.yaml index 3ce6dec5..0164daad 100644 --- a/chart/templates/backend.yaml +++ b/chart/templates/backend.yaml @@ -56,6 +56,48 @@ spec: configMap: name: email-templates + initContainers: + - name: migrations + image: {{ .Values.backend_image }} + imagePullPolicy: {{ .Values.backend_pull_policy }} + command: ["python3", "-m", "btrixcloud.main_migrations"] + + envFrom: + - configMapRef: + name: backend-env-config + - secretRef: + name: backend-auth + - secretRef: + name: mongo-auth + + env: + - name: MOTOR_MAX_WORKERS + value: "{{ .Values.backend_mongodb_workers | default 1 }}" + + volumeMounts: + - name: config-volume + mountPath: /config + + - name: ops-configs + mountPath: /ops-configs/ + + - name: ops-proxy-configs + mountPath: /ops-proxy-configs/ + + - name: app-templates + mountPath: /app/btrixcloud/templates/ + + - name: email-templates + mountPath: /app/btrixcloud/email-templates/ + + resources: + limits: + memory: {{ .Values.backend_memory }} + + requests: + cpu: {{ .Values.backend_cpu }} + memory: {{ .Values.backend_memory }} + containers: - name: api image: {{ .Values.backend_image }}