move db migrations to initContainers: (#2449)
- should avoid gunicorn worker timeouts for long running migrations, also fixes #2439 - add main_migrations as entrypoint to just run db migrations, using existing init_ops() call - first run 'migrations' container with same resources as 'app' and 'op' - additional typing for initializing db - cleanup unused code related to running only once, waiting for db to be ready - fixes #2447
This commit is contained in:
parent
702c9ab3b7
commit
e13c3bfb48
@ -8,20 +8,35 @@ import urllib
|
||||
import asyncio
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from typing import Optional, Union, TypeVar, Type
|
||||
from typing import Optional, Union, TypeVar, Type, TYPE_CHECKING
|
||||
|
||||
import motor.motor_asyncio
|
||||
from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase
|
||||
from pydantic import BaseModel
|
||||
from pymongo.errors import InvalidName
|
||||
|
||||
from .migrations import BaseMigration
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .users import UserManager
|
||||
from .orgs import OrgOps
|
||||
from .crawlconfigs import CrawlConfigOps
|
||||
from .crawls import CrawlOps
|
||||
from .colls import CollectionOps
|
||||
from .invites import InviteOps
|
||||
from .storages import StorageOps
|
||||
from .pages import PageOps
|
||||
from .background_jobs import BackgroundJobOps
|
||||
else:
|
||||
UserManager = OrgOps = CrawlConfigOps = CrawlOps = CollectionOps = InviteOps = (
|
||||
StorageOps
|
||||
) = PageOps = BackgroundJobOps = object
|
||||
|
||||
|
||||
CURR_DB_VERSION = "0043"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
def resolve_db_url():
|
||||
def resolve_db_url() -> str:
|
||||
"""get the mongo db url, either from MONGO_DB_URL or
|
||||
from separate username, password and host settings"""
|
||||
db_url = os.environ.get("MONGO_DB_URL")
|
||||
@ -36,12 +51,12 @@ def resolve_db_url():
|
||||
|
||||
|
||||
# ============================================================================
|
||||
def init_db():
|
||||
def init_db() -> tuple[AsyncIOMotorClient, AsyncIOMotorDatabase]:
|
||||
"""initialize the mongodb connector"""
|
||||
|
||||
db_url = resolve_db_url()
|
||||
|
||||
client = motor.motor_asyncio.AsyncIOMotorClient(
|
||||
client = AsyncIOMotorClient(
|
||||
db_url,
|
||||
tz_aware=True,
|
||||
uuidRepresentation="standard",
|
||||
@ -55,7 +70,7 @@ def init_db():
|
||||
|
||||
|
||||
# ============================================================================
|
||||
async def ping_db(mdb):
|
||||
async def ping_db(mdb) -> None:
|
||||
"""run in loop until db is up, set db_inited['inited'] property to true"""
|
||||
print("Waiting DB", flush=True)
|
||||
while True:
|
||||
@ -73,18 +88,17 @@ async def ping_db(mdb):
|
||||
# ============================================================================
|
||||
async def update_and_prepare_db(
|
||||
# pylint: disable=R0913
|
||||
mdb,
|
||||
user_manager,
|
||||
org_ops,
|
||||
crawl_ops,
|
||||
crawl_config_ops,
|
||||
coll_ops,
|
||||
invite_ops,
|
||||
storage_ops,
|
||||
page_ops,
|
||||
background_job_ops,
|
||||
db_inited,
|
||||
):
|
||||
mdb: AsyncIOMotorDatabase,
|
||||
user_manager: UserManager,
|
||||
org_ops: OrgOps,
|
||||
crawl_ops: CrawlOps,
|
||||
crawl_config_ops: CrawlConfigOps,
|
||||
coll_ops: CollectionOps,
|
||||
invite_ops: InviteOps,
|
||||
storage_ops: StorageOps,
|
||||
page_ops: PageOps,
|
||||
background_job_ops: BackgroundJobOps,
|
||||
) -> None:
|
||||
"""Prepare database for application.
|
||||
|
||||
- Run database migrations
|
||||
@ -113,7 +127,6 @@ async def update_and_prepare_db(
|
||||
await user_manager.create_super_user()
|
||||
await org_ops.create_default_org()
|
||||
await org_ops.check_all_org_default_storages(storage_ops)
|
||||
db_inited["inited"] = True
|
||||
print("Database updated and ready", flush=True)
|
||||
|
||||
|
||||
|
@ -16,7 +16,7 @@ from fastapi.openapi.utils import get_openapi
|
||||
from fastapi.openapi.docs import get_swagger_ui_html, get_redoc_html
|
||||
from pydantic import BaseModel
|
||||
|
||||
from .db import init_db, await_db_and_migrations, update_and_prepare_db
|
||||
from .db import init_db, await_db_and_migrations
|
||||
|
||||
from .emailsender import EmailSender
|
||||
from .invites import init_invites
|
||||
@ -38,7 +38,7 @@ from .pages import init_pages_api
|
||||
from .subs import init_subs_api
|
||||
|
||||
from .crawlmanager import CrawlManager
|
||||
from .utils import run_once_lock, register_exit_handler, is_bool
|
||||
from .utils import register_exit_handler, is_bool
|
||||
from .version import __version__
|
||||
|
||||
API_PREFIX = "/api"
|
||||
@ -274,25 +274,8 @@ def main() -> None:
|
||||
|
||||
coll_ops.set_page_ops(page_ops)
|
||||
|
||||
# run only in first worker
|
||||
if run_once_lock("btrix-init-db"):
|
||||
asyncio.create_task(
|
||||
update_and_prepare_db(
|
||||
mdb,
|
||||
user_manager,
|
||||
org_ops,
|
||||
crawls,
|
||||
crawl_config_ops,
|
||||
coll_ops,
|
||||
invites,
|
||||
storage_ops,
|
||||
page_ops,
|
||||
background_job_ops,
|
||||
db_inited,
|
||||
)
|
||||
)
|
||||
else:
|
||||
asyncio.create_task(await_db_and_migrations(mdb, db_inited))
|
||||
# await db init, migrations should have already completed in init containers
|
||||
asyncio.create_task(await_db_and_migrations(mdb, db_inited))
|
||||
|
||||
app.include_router(org_ops.router)
|
||||
|
||||
|
@ -30,7 +30,9 @@ async def main():
|
||||
)
|
||||
return 1
|
||||
|
||||
(org_ops, _, _, _, _, page_ops, coll_ops, _, _, _, _, user_manager) = init_ops()
|
||||
(org_ops, _, _, _, _, page_ops, coll_ops, _, _, _, _, user_manager, _, _, _) = (
|
||||
init_ops()
|
||||
)
|
||||
|
||||
# Run job (generic)
|
||||
if job_type == BgJobType.OPTIMIZE_PAGES:
|
||||
|
61
backend/btrixcloud/main_migrations.py
Normal file
61
backend/btrixcloud/main_migrations.py
Normal file
@ -0,0 +1,61 @@
|
||||
"""entrypoint module for init_container, handles db migration"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import asyncio
|
||||
|
||||
from .ops import init_ops
|
||||
from .db import update_and_prepare_db
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# pylint: disable=too-many-function-args, duplicate-code
|
||||
async def main() -> int:
|
||||
"""init migrations"""
|
||||
|
||||
# pylint: disable=import-outside-toplevel
|
||||
if not os.environ.get("KUBERNETES_SERVICE_HOST"):
|
||||
print(
|
||||
"Sorry, the Browsertrix Backend must be run inside a Kubernetes environment.\
|
||||
Kubernetes not detected (KUBERNETES_SERVICE_HOST is not set), Exiting"
|
||||
)
|
||||
return 1
|
||||
|
||||
(
|
||||
org_ops,
|
||||
crawl_config_ops,
|
||||
_,
|
||||
crawl_ops,
|
||||
_,
|
||||
page_ops,
|
||||
coll_ops,
|
||||
_,
|
||||
storage_ops,
|
||||
background_job_ops,
|
||||
_,
|
||||
user_manager,
|
||||
invite_ops,
|
||||
_,
|
||||
mdb,
|
||||
) = init_ops()
|
||||
|
||||
await update_and_prepare_db(
|
||||
mdb,
|
||||
user_manager,
|
||||
org_ops,
|
||||
crawl_ops,
|
||||
crawl_config_ops,
|
||||
coll_ops,
|
||||
invite_ops,
|
||||
storage_ops,
|
||||
page_ops,
|
||||
background_job_ops,
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
# # ============================================================================
|
||||
if __name__ == "__main__":
|
||||
return_code = asyncio.run(main())
|
||||
sys.exit(return_code)
|
@ -39,6 +39,9 @@ def main():
|
||||
background_job_ops,
|
||||
event_webhook_ops,
|
||||
_,
|
||||
_,
|
||||
_,
|
||||
_,
|
||||
) = init_ops()
|
||||
|
||||
return init_operator_api(
|
||||
|
@ -1,6 +1,7 @@
|
||||
"""shared helper to initialize ops classes"""
|
||||
|
||||
from typing import Tuple
|
||||
from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase
|
||||
|
||||
from .crawlmanager import CrawlManager
|
||||
from .db import init_db
|
||||
@ -35,6 +36,9 @@ def init_ops() -> Tuple[
|
||||
BackgroundJobOps,
|
||||
EventWebhookOps,
|
||||
UserManager,
|
||||
InviteOps,
|
||||
AsyncIOMotorClient,
|
||||
AsyncIOMotorDatabase,
|
||||
]:
|
||||
"""Initialize and return ops classes"""
|
||||
email = EmailSender()
|
||||
@ -122,4 +126,7 @@ def init_ops() -> Tuple[
|
||||
background_job_ops,
|
||||
event_webhook_ops,
|
||||
user_manager,
|
||||
invite_ops,
|
||||
dbclient,
|
||||
mdb,
|
||||
)
|
||||
|
@ -1,7 +1,6 @@
|
||||
"""k8s utils"""
|
||||
|
||||
import asyncio
|
||||
import atexit
|
||||
import csv
|
||||
import io
|
||||
import json
|
||||
@ -58,26 +57,6 @@ def dt_now() -> datetime:
|
||||
return datetime.now(timezone.utc).replace(microsecond=0)
|
||||
|
||||
|
||||
def run_once_lock(name) -> bool:
|
||||
"""run once lock via temp directory
|
||||
- if dir doesn't exist, return true
|
||||
- if exists, return false"""
|
||||
lock_dir = "/tmp/." + name
|
||||
try:
|
||||
os.mkdir(lock_dir)
|
||||
# pylint: disable=bare-except
|
||||
except:
|
||||
return False
|
||||
|
||||
# just in case, delete dir on exit
|
||||
def del_dir():
|
||||
print("release lock: " + lock_dir, flush=True)
|
||||
os.rmdir(lock_dir)
|
||||
|
||||
atexit.register(del_dir)
|
||||
return True
|
||||
|
||||
|
||||
def register_exit_handler() -> None:
|
||||
"""register exit handler to exit on SIGTERM"""
|
||||
loop = asyncio.get_running_loop()
|
||||
|
@ -56,6 +56,48 @@ spec:
|
||||
configMap:
|
||||
name: email-templates
|
||||
|
||||
initContainers:
|
||||
- name: migrations
|
||||
image: {{ .Values.backend_image }}
|
||||
imagePullPolicy: {{ .Values.backend_pull_policy }}
|
||||
command: ["python3", "-m", "btrixcloud.main_migrations"]
|
||||
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: backend-env-config
|
||||
- secretRef:
|
||||
name: backend-auth
|
||||
- secretRef:
|
||||
name: mongo-auth
|
||||
|
||||
env:
|
||||
- name: MOTOR_MAX_WORKERS
|
||||
value: "{{ .Values.backend_mongodb_workers | default 1 }}"
|
||||
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /config
|
||||
|
||||
- name: ops-configs
|
||||
mountPath: /ops-configs/
|
||||
|
||||
- name: ops-proxy-configs
|
||||
mountPath: /ops-proxy-configs/
|
||||
|
||||
- name: app-templates
|
||||
mountPath: /app/btrixcloud/templates/
|
||||
|
||||
- name: email-templates
|
||||
mountPath: /app/btrixcloud/email-templates/
|
||||
|
||||
resources:
|
||||
limits:
|
||||
memory: {{ .Values.backend_memory }}
|
||||
|
||||
requests:
|
||||
cpu: {{ .Values.backend_cpu }}
|
||||
memory: {{ .Values.backend_memory }}
|
||||
|
||||
containers:
|
||||
- name: api
|
||||
image: {{ .Values.backend_image }}
|
||||
|
Loading…
Reference in New Issue
Block a user