move db migrations to initContainers: (#2449)
- should avoid gunicorn worker timeouts for long running migrations, also fixes #2439 - add main_migrations as entrypoint to just run db migrations, using existing init_ops() call - first run 'migrations' container with same resources as 'app' and 'op' - additional typing for initializing db - cleanup unused code related to running only once, waiting for db to be ready - fixes #2447
This commit is contained in:
parent
702c9ab3b7
commit
e13c3bfb48
@ -8,20 +8,35 @@ import urllib
|
|||||||
import asyncio
|
import asyncio
|
||||||
from uuid import UUID, uuid4
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
from typing import Optional, Union, TypeVar, Type
|
from typing import Optional, Union, TypeVar, Type, TYPE_CHECKING
|
||||||
|
|
||||||
import motor.motor_asyncio
|
from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from pymongo.errors import InvalidName
|
from pymongo.errors import InvalidName
|
||||||
|
|
||||||
from .migrations import BaseMigration
|
from .migrations import BaseMigration
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from .users import UserManager
|
||||||
|
from .orgs import OrgOps
|
||||||
|
from .crawlconfigs import CrawlConfigOps
|
||||||
|
from .crawls import CrawlOps
|
||||||
|
from .colls import CollectionOps
|
||||||
|
from .invites import InviteOps
|
||||||
|
from .storages import StorageOps
|
||||||
|
from .pages import PageOps
|
||||||
|
from .background_jobs import BackgroundJobOps
|
||||||
|
else:
|
||||||
|
UserManager = OrgOps = CrawlConfigOps = CrawlOps = CollectionOps = InviteOps = (
|
||||||
|
StorageOps
|
||||||
|
) = PageOps = BackgroundJobOps = object
|
||||||
|
|
||||||
|
|
||||||
CURR_DB_VERSION = "0043"
|
CURR_DB_VERSION = "0043"
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
def resolve_db_url():
|
def resolve_db_url() -> str:
|
||||||
"""get the mongo db url, either from MONGO_DB_URL or
|
"""get the mongo db url, either from MONGO_DB_URL or
|
||||||
from separate username, password and host settings"""
|
from separate username, password and host settings"""
|
||||||
db_url = os.environ.get("MONGO_DB_URL")
|
db_url = os.environ.get("MONGO_DB_URL")
|
||||||
@ -36,12 +51,12 @@ def resolve_db_url():
|
|||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
def init_db():
|
def init_db() -> tuple[AsyncIOMotorClient, AsyncIOMotorDatabase]:
|
||||||
"""initialize the mongodb connector"""
|
"""initialize the mongodb connector"""
|
||||||
|
|
||||||
db_url = resolve_db_url()
|
db_url = resolve_db_url()
|
||||||
|
|
||||||
client = motor.motor_asyncio.AsyncIOMotorClient(
|
client = AsyncIOMotorClient(
|
||||||
db_url,
|
db_url,
|
||||||
tz_aware=True,
|
tz_aware=True,
|
||||||
uuidRepresentation="standard",
|
uuidRepresentation="standard",
|
||||||
@ -55,7 +70,7 @@ def init_db():
|
|||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
async def ping_db(mdb):
|
async def ping_db(mdb) -> None:
|
||||||
"""run in loop until db is up, set db_inited['inited'] property to true"""
|
"""run in loop until db is up, set db_inited['inited'] property to true"""
|
||||||
print("Waiting DB", flush=True)
|
print("Waiting DB", flush=True)
|
||||||
while True:
|
while True:
|
||||||
@ -73,18 +88,17 @@ async def ping_db(mdb):
|
|||||||
# ============================================================================
|
# ============================================================================
|
||||||
async def update_and_prepare_db(
|
async def update_and_prepare_db(
|
||||||
# pylint: disable=R0913
|
# pylint: disable=R0913
|
||||||
mdb,
|
mdb: AsyncIOMotorDatabase,
|
||||||
user_manager,
|
user_manager: UserManager,
|
||||||
org_ops,
|
org_ops: OrgOps,
|
||||||
crawl_ops,
|
crawl_ops: CrawlOps,
|
||||||
crawl_config_ops,
|
crawl_config_ops: CrawlConfigOps,
|
||||||
coll_ops,
|
coll_ops: CollectionOps,
|
||||||
invite_ops,
|
invite_ops: InviteOps,
|
||||||
storage_ops,
|
storage_ops: StorageOps,
|
||||||
page_ops,
|
page_ops: PageOps,
|
||||||
background_job_ops,
|
background_job_ops: BackgroundJobOps,
|
||||||
db_inited,
|
) -> None:
|
||||||
):
|
|
||||||
"""Prepare database for application.
|
"""Prepare database for application.
|
||||||
|
|
||||||
- Run database migrations
|
- Run database migrations
|
||||||
@ -113,7 +127,6 @@ async def update_and_prepare_db(
|
|||||||
await user_manager.create_super_user()
|
await user_manager.create_super_user()
|
||||||
await org_ops.create_default_org()
|
await org_ops.create_default_org()
|
||||||
await org_ops.check_all_org_default_storages(storage_ops)
|
await org_ops.check_all_org_default_storages(storage_ops)
|
||||||
db_inited["inited"] = True
|
|
||||||
print("Database updated and ready", flush=True)
|
print("Database updated and ready", flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ from fastapi.openapi.utils import get_openapi
|
|||||||
from fastapi.openapi.docs import get_swagger_ui_html, get_redoc_html
|
from fastapi.openapi.docs import get_swagger_ui_html, get_redoc_html
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from .db import init_db, await_db_and_migrations, update_and_prepare_db
|
from .db import init_db, await_db_and_migrations
|
||||||
|
|
||||||
from .emailsender import EmailSender
|
from .emailsender import EmailSender
|
||||||
from .invites import init_invites
|
from .invites import init_invites
|
||||||
@ -38,7 +38,7 @@ from .pages import init_pages_api
|
|||||||
from .subs import init_subs_api
|
from .subs import init_subs_api
|
||||||
|
|
||||||
from .crawlmanager import CrawlManager
|
from .crawlmanager import CrawlManager
|
||||||
from .utils import run_once_lock, register_exit_handler, is_bool
|
from .utils import register_exit_handler, is_bool
|
||||||
from .version import __version__
|
from .version import __version__
|
||||||
|
|
||||||
API_PREFIX = "/api"
|
API_PREFIX = "/api"
|
||||||
@ -274,25 +274,8 @@ def main() -> None:
|
|||||||
|
|
||||||
coll_ops.set_page_ops(page_ops)
|
coll_ops.set_page_ops(page_ops)
|
||||||
|
|
||||||
# run only in first worker
|
# await db init, migrations should have already completed in init containers
|
||||||
if run_once_lock("btrix-init-db"):
|
asyncio.create_task(await_db_and_migrations(mdb, db_inited))
|
||||||
asyncio.create_task(
|
|
||||||
update_and_prepare_db(
|
|
||||||
mdb,
|
|
||||||
user_manager,
|
|
||||||
org_ops,
|
|
||||||
crawls,
|
|
||||||
crawl_config_ops,
|
|
||||||
coll_ops,
|
|
||||||
invites,
|
|
||||||
storage_ops,
|
|
||||||
page_ops,
|
|
||||||
background_job_ops,
|
|
||||||
db_inited,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
asyncio.create_task(await_db_and_migrations(mdb, db_inited))
|
|
||||||
|
|
||||||
app.include_router(org_ops.router)
|
app.include_router(org_ops.router)
|
||||||
|
|
||||||
|
@ -30,7 +30,9 @@ async def main():
|
|||||||
)
|
)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
(org_ops, _, _, _, _, page_ops, coll_ops, _, _, _, _, user_manager) = init_ops()
|
(org_ops, _, _, _, _, page_ops, coll_ops, _, _, _, _, user_manager, _, _, _) = (
|
||||||
|
init_ops()
|
||||||
|
)
|
||||||
|
|
||||||
# Run job (generic)
|
# Run job (generic)
|
||||||
if job_type == BgJobType.OPTIMIZE_PAGES:
|
if job_type == BgJobType.OPTIMIZE_PAGES:
|
||||||
|
61
backend/btrixcloud/main_migrations.py
Normal file
61
backend/btrixcloud/main_migrations.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
"""entrypoint module for init_container, handles db migration"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
from .ops import init_ops
|
||||||
|
from .db import update_and_prepare_db
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# pylint: disable=too-many-function-args, duplicate-code
|
||||||
|
async def main() -> int:
|
||||||
|
"""init migrations"""
|
||||||
|
|
||||||
|
# pylint: disable=import-outside-toplevel
|
||||||
|
if not os.environ.get("KUBERNETES_SERVICE_HOST"):
|
||||||
|
print(
|
||||||
|
"Sorry, the Browsertrix Backend must be run inside a Kubernetes environment.\
|
||||||
|
Kubernetes not detected (KUBERNETES_SERVICE_HOST is not set), Exiting"
|
||||||
|
)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
(
|
||||||
|
org_ops,
|
||||||
|
crawl_config_ops,
|
||||||
|
_,
|
||||||
|
crawl_ops,
|
||||||
|
_,
|
||||||
|
page_ops,
|
||||||
|
coll_ops,
|
||||||
|
_,
|
||||||
|
storage_ops,
|
||||||
|
background_job_ops,
|
||||||
|
_,
|
||||||
|
user_manager,
|
||||||
|
invite_ops,
|
||||||
|
_,
|
||||||
|
mdb,
|
||||||
|
) = init_ops()
|
||||||
|
|
||||||
|
await update_and_prepare_db(
|
||||||
|
mdb,
|
||||||
|
user_manager,
|
||||||
|
org_ops,
|
||||||
|
crawl_ops,
|
||||||
|
crawl_config_ops,
|
||||||
|
coll_ops,
|
||||||
|
invite_ops,
|
||||||
|
storage_ops,
|
||||||
|
page_ops,
|
||||||
|
background_job_ops,
|
||||||
|
)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
# # ============================================================================
|
||||||
|
if __name__ == "__main__":
|
||||||
|
return_code = asyncio.run(main())
|
||||||
|
sys.exit(return_code)
|
@ -39,6 +39,9 @@ def main():
|
|||||||
background_job_ops,
|
background_job_ops,
|
||||||
event_webhook_ops,
|
event_webhook_ops,
|
||||||
_,
|
_,
|
||||||
|
_,
|
||||||
|
_,
|
||||||
|
_,
|
||||||
) = init_ops()
|
) = init_ops()
|
||||||
|
|
||||||
return init_operator_api(
|
return init_operator_api(
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
"""shared helper to initialize ops classes"""
|
"""shared helper to initialize ops classes"""
|
||||||
|
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
|
from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorDatabase
|
||||||
|
|
||||||
from .crawlmanager import CrawlManager
|
from .crawlmanager import CrawlManager
|
||||||
from .db import init_db
|
from .db import init_db
|
||||||
@ -35,6 +36,9 @@ def init_ops() -> Tuple[
|
|||||||
BackgroundJobOps,
|
BackgroundJobOps,
|
||||||
EventWebhookOps,
|
EventWebhookOps,
|
||||||
UserManager,
|
UserManager,
|
||||||
|
InviteOps,
|
||||||
|
AsyncIOMotorClient,
|
||||||
|
AsyncIOMotorDatabase,
|
||||||
]:
|
]:
|
||||||
"""Initialize and return ops classes"""
|
"""Initialize and return ops classes"""
|
||||||
email = EmailSender()
|
email = EmailSender()
|
||||||
@ -122,4 +126,7 @@ def init_ops() -> Tuple[
|
|||||||
background_job_ops,
|
background_job_ops,
|
||||||
event_webhook_ops,
|
event_webhook_ops,
|
||||||
user_manager,
|
user_manager,
|
||||||
|
invite_ops,
|
||||||
|
dbclient,
|
||||||
|
mdb,
|
||||||
)
|
)
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
"""k8s utils"""
|
"""k8s utils"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import atexit
|
|
||||||
import csv
|
import csv
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
@ -58,26 +57,6 @@ def dt_now() -> datetime:
|
|||||||
return datetime.now(timezone.utc).replace(microsecond=0)
|
return datetime.now(timezone.utc).replace(microsecond=0)
|
||||||
|
|
||||||
|
|
||||||
def run_once_lock(name) -> bool:
|
|
||||||
"""run once lock via temp directory
|
|
||||||
- if dir doesn't exist, return true
|
|
||||||
- if exists, return false"""
|
|
||||||
lock_dir = "/tmp/." + name
|
|
||||||
try:
|
|
||||||
os.mkdir(lock_dir)
|
|
||||||
# pylint: disable=bare-except
|
|
||||||
except:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# just in case, delete dir on exit
|
|
||||||
def del_dir():
|
|
||||||
print("release lock: " + lock_dir, flush=True)
|
|
||||||
os.rmdir(lock_dir)
|
|
||||||
|
|
||||||
atexit.register(del_dir)
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def register_exit_handler() -> None:
|
def register_exit_handler() -> None:
|
||||||
"""register exit handler to exit on SIGTERM"""
|
"""register exit handler to exit on SIGTERM"""
|
||||||
loop = asyncio.get_running_loop()
|
loop = asyncio.get_running_loop()
|
||||||
|
@ -56,6 +56,48 @@ spec:
|
|||||||
configMap:
|
configMap:
|
||||||
name: email-templates
|
name: email-templates
|
||||||
|
|
||||||
|
initContainers:
|
||||||
|
- name: migrations
|
||||||
|
image: {{ .Values.backend_image }}
|
||||||
|
imagePullPolicy: {{ .Values.backend_pull_policy }}
|
||||||
|
command: ["python3", "-m", "btrixcloud.main_migrations"]
|
||||||
|
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: backend-env-config
|
||||||
|
- secretRef:
|
||||||
|
name: backend-auth
|
||||||
|
- secretRef:
|
||||||
|
name: mongo-auth
|
||||||
|
|
||||||
|
env:
|
||||||
|
- name: MOTOR_MAX_WORKERS
|
||||||
|
value: "{{ .Values.backend_mongodb_workers | default 1 }}"
|
||||||
|
|
||||||
|
volumeMounts:
|
||||||
|
- name: config-volume
|
||||||
|
mountPath: /config
|
||||||
|
|
||||||
|
- name: ops-configs
|
||||||
|
mountPath: /ops-configs/
|
||||||
|
|
||||||
|
- name: ops-proxy-configs
|
||||||
|
mountPath: /ops-proxy-configs/
|
||||||
|
|
||||||
|
- name: app-templates
|
||||||
|
mountPath: /app/btrixcloud/templates/
|
||||||
|
|
||||||
|
- name: email-templates
|
||||||
|
mountPath: /app/btrixcloud/email-templates/
|
||||||
|
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: {{ .Values.backend_memory }}
|
||||||
|
|
||||||
|
requests:
|
||||||
|
cpu: {{ .Values.backend_cpu }}
|
||||||
|
memory: {{ .Values.backend_memory }}
|
||||||
|
|
||||||
containers:
|
containers:
|
||||||
- name: api
|
- name: api
|
||||||
image: {{ .Values.backend_image }}
|
image: {{ .Values.backend_image }}
|
||||||
|
Loading…
Reference in New Issue
Block a user