startup fixes: (#793)
- don't run migrations on first init, just set to CURR_DB_VERSION - implement 'run once lock' with mkdir/rmdir - move register_exit_handler() to utils - remove old run once handler
This commit is contained in:
parent
60ba9e366f
commit
7aefe09581
@ -10,7 +10,10 @@ import motor.motor_asyncio
|
|||||||
from pydantic import BaseModel, UUID4
|
from pydantic import BaseModel, UUID4
|
||||||
from pymongo.errors import InvalidName
|
from pymongo.errors import InvalidName
|
||||||
|
|
||||||
from .worker import by_one_worker
|
from .migrations import BaseMigration
|
||||||
|
|
||||||
|
|
||||||
|
CURR_DB_VERSION = "0005"
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@ -47,7 +50,6 @@ def init_db():
|
|||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@by_one_worker("/app/btrixcloud/worker-pid.file")
|
|
||||||
async def update_and_prepare_db(
|
async def update_and_prepare_db(
|
||||||
# pylint: disable=R0913
|
# pylint: disable=R0913
|
||||||
mdb,
|
mdb,
|
||||||
@ -64,9 +66,9 @@ async def update_and_prepare_db(
|
|||||||
- Create/update superuser
|
- Create/update superuser
|
||||||
- Create/update default org
|
- Create/update default org
|
||||||
|
|
||||||
Run all tasks in order in a single worker.
|
|
||||||
"""
|
"""
|
||||||
if await run_db_migrations(mdb):
|
print("Database setup started", flush=True)
|
||||||
|
if await run_db_migrations(mdb, user_manager):
|
||||||
await drop_indexes(mdb)
|
await drop_indexes(mdb)
|
||||||
await create_indexes(org_ops, crawl_config_ops, coll_ops, invite_ops)
|
await create_indexes(org_ops, crawl_config_ops, coll_ops, invite_ops)
|
||||||
await user_manager.create_super_user()
|
await user_manager.create_super_user()
|
||||||
@ -75,8 +77,19 @@ async def update_and_prepare_db(
|
|||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
async def run_db_migrations(mdb):
|
async def run_db_migrations(mdb, user_manager):
|
||||||
"""Run database migrations."""
|
"""Run database migrations."""
|
||||||
|
|
||||||
|
# if first run, just set version and exit
|
||||||
|
if not await user_manager.get_superuser():
|
||||||
|
base_migration = BaseMigration(mdb, CURR_DB_VERSION)
|
||||||
|
await base_migration.set_db_version()
|
||||||
|
print(
|
||||||
|
"New DB, no migration needed, set version to: " + CURR_DB_VERSION,
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
migrations_run = False
|
migrations_run = False
|
||||||
migrations_path = "/app/btrixcloud/migrations"
|
migrations_path = "/app/btrixcloud/migrations"
|
||||||
module_files = [
|
module_files = [
|
||||||
|
@ -3,9 +3,8 @@ main file for browsertrix-api system
|
|||||||
supports docker and kubernetes based deployments of multiple browsertrix-crawlers
|
supports docker and kubernetes based deployments of multiple browsertrix-crawlers
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
import signal
|
|
||||||
import sys
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import sys
|
||||||
|
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
from fastapi.routing import APIRouter
|
from fastapi.routing import APIRouter
|
||||||
@ -26,8 +25,8 @@ from .colls import init_collections_api
|
|||||||
from .crawls import init_crawls_api
|
from .crawls import init_crawls_api
|
||||||
|
|
||||||
from .crawlmanager import CrawlManager
|
from .crawlmanager import CrawlManager
|
||||||
|
from .utils import run_once_lock, register_exit_handler
|
||||||
|
|
||||||
# pylint: disable=duplicate-code
|
|
||||||
|
|
||||||
API_PREFIX = "/api"
|
API_PREFIX = "/api"
|
||||||
app_root = FastAPI(
|
app_root = FastAPI(
|
||||||
@ -111,11 +110,13 @@ def main():
|
|||||||
|
|
||||||
crawl_config_ops.set_coll_ops(coll_ops)
|
crawl_config_ops.set_coll_ops(coll_ops)
|
||||||
|
|
||||||
asyncio.create_task(
|
# run only in first worker
|
||||||
update_and_prepare_db(
|
if run_once_lock("btrix-init-db"):
|
||||||
mdb, user_manager, org_ops, crawl_config_ops, coll_ops, invites
|
asyncio.create_task(
|
||||||
|
update_and_prepare_db(
|
||||||
|
mdb, user_manager, org_ops, crawl_config_ops, coll_ops, invites
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
|
|
||||||
app.include_router(org_ops.router)
|
app.include_router(org_ops.router)
|
||||||
|
|
||||||
@ -140,13 +141,5 @@ def main():
|
|||||||
@app_root.on_event("startup")
|
@app_root.on_event("startup")
|
||||||
async def startup():
|
async def startup():
|
||||||
"""init on startup"""
|
"""init on startup"""
|
||||||
loop = asyncio.get_running_loop()
|
register_exit_handler()
|
||||||
loop.add_signal_handler(signal.SIGTERM, exit_handler)
|
|
||||||
|
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
|
||||||
def exit_handler():
|
|
||||||
"""sigterm handler"""
|
|
||||||
print("SIGTERM received, exiting")
|
|
||||||
sys.exit(1)
|
|
||||||
|
@ -1,21 +1,16 @@
|
|||||||
""" entrypoint module for operator """
|
""" entrypoint module for operator """
|
||||||
|
|
||||||
import signal
|
|
||||||
import sys
|
|
||||||
import asyncio
|
|
||||||
|
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
|
|
||||||
from .operator import init_operator_webhook
|
from .operator import init_operator_webhook
|
||||||
|
|
||||||
|
from .utils import register_exit_handler
|
||||||
|
|
||||||
API_PREFIX = "/api"
|
app_root = FastAPI()
|
||||||
app_root = FastAPI(
|
|
||||||
docs_url=API_PREFIX + "/docs",
|
|
||||||
redoc_url=API_PREFIX + "/redoc",
|
|
||||||
openapi_url=API_PREFIX + "/openapi.json",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
def main():
|
def main():
|
||||||
"""main init"""
|
"""main init"""
|
||||||
init_operator_webhook(app_root)
|
init_operator_webhook(app_root)
|
||||||
@ -25,13 +20,5 @@ def main():
|
|||||||
@app_root.on_event("startup")
|
@app_root.on_event("startup")
|
||||||
async def startup():
|
async def startup():
|
||||||
"""init on startup"""
|
"""init on startup"""
|
||||||
loop = asyncio.get_running_loop()
|
register_exit_handler()
|
||||||
loop.add_signal_handler(signal.SIGTERM, exit_handler)
|
|
||||||
|
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
|
||||||
def exit_handler():
|
|
||||||
"""sigterm handler"""
|
|
||||||
print("SIGTERM received, exiting")
|
|
||||||
sys.exit(1)
|
|
||||||
|
@ -8,6 +8,7 @@ from .k8sapi import K8sAPI
|
|||||||
from .db import init_db
|
from .db import init_db
|
||||||
from .crawlconfigs import get_crawl_config, inc_crawl_count
|
from .crawlconfigs import get_crawl_config, inc_crawl_count
|
||||||
from .crawls import add_new_crawl
|
from .crawls import add_new_crawl
|
||||||
|
from .utils import register_exit_handler
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@ -58,4 +59,5 @@ def main():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
register_exit_handler()
|
||||||
main()
|
main()
|
||||||
|
@ -156,6 +156,10 @@ class UserManager(BaseUserManager[UserCreate, UserDB]):
|
|||||||
)
|
)
|
||||||
return await cursor.to_list(length=1000)
|
return await cursor.to_list(length=1000)
|
||||||
|
|
||||||
|
async def get_superuser(self):
|
||||||
|
"""return current superuser, if any"""
|
||||||
|
return await self.user_db.collection.find_one({"is_superuser": True})
|
||||||
|
|
||||||
async def create_super_user(self):
|
async def create_super_user(self):
|
||||||
"""Initialize a super user from env vars"""
|
"""Initialize a super user from env vars"""
|
||||||
email = os.environ.get("SUPERUSER_EMAIL")
|
email = os.environ.get("SUPERUSER_EMAIL")
|
||||||
@ -167,9 +171,7 @@ class UserManager(BaseUserManager[UserCreate, UserDB]):
|
|||||||
if not password:
|
if not password:
|
||||||
password = passlib.pwd.genword()
|
password = passlib.pwd.genword()
|
||||||
|
|
||||||
curr_superuser_res = await self.user_db.collection.find_one(
|
curr_superuser_res = await self.get_superuser()
|
||||||
{"is_superuser": True}
|
|
||||||
)
|
|
||||||
if curr_superuser_res:
|
if curr_superuser_res:
|
||||||
user = UserDB(**curr_superuser_res)
|
user = UserDB(**curr_superuser_res)
|
||||||
update = {"password": password}
|
update = {"password": password}
|
||||||
|
@ -1,6 +1,11 @@
|
|||||||
""" k8s utils """
|
""" k8s utils """
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import asyncio
|
||||||
|
import sys
|
||||||
|
import signal
|
||||||
|
import atexit
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from redis import asyncio as exceptions
|
from redis import asyncio as exceptions
|
||||||
@ -46,3 +51,35 @@ async def get_redis_crawl_stats(redis, crawl_id):
|
|||||||
|
|
||||||
stats = {"found": pages_found, "done": pages_done, "size": archive_size}
|
stats = {"found": pages_found, "done": pages_done, "size": archive_size}
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
def run_once_lock(name):
|
||||||
|
"""run once lock via temp directory
|
||||||
|
- if dir doesn't exist, return true
|
||||||
|
- if exists, return false"""
|
||||||
|
lock_dir = "/tmp/." + name
|
||||||
|
try:
|
||||||
|
os.mkdir(lock_dir)
|
||||||
|
# pylint: disable=bare-except
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# just in case, delete dir on exit
|
||||||
|
def del_dir():
|
||||||
|
print("release lock: " + lock_dir, flush=True)
|
||||||
|
os.rmdir(lock_dir)
|
||||||
|
|
||||||
|
atexit.register(del_dir)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def register_exit_handler():
|
||||||
|
"""register exit handler to exit on SIGTERM"""
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
|
||||||
|
def exit_handler():
|
||||||
|
"""sigterm handler"""
|
||||||
|
print("SIGTERM received, exiting")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
loop.add_signal_handler(signal.SIGTERM, exit_handler)
|
||||||
|
@ -1,47 +0,0 @@
|
|||||||
"""
|
|
||||||
Unique Worker exposed as decorator by_one_worker
|
|
||||||
"""
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
import os
|
|
||||||
from functools import cached_property
|
|
||||||
|
|
||||||
|
|
||||||
class UniqueWorker:
|
|
||||||
"""Class to run async tasks in single worker only."""
|
|
||||||
|
|
||||||
def __init__(self, path):
|
|
||||||
self.path = Path(path)
|
|
||||||
self.pid = str(os.getpid())
|
|
||||||
self.set_id()
|
|
||||||
|
|
||||||
def set_id(self):
|
|
||||||
"""Create path to pid file and write to pid."""
|
|
||||||
if not self.path.exists():
|
|
||||||
self.path.parents[0].mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
with open(self.path, "w", encoding="utf-8") as pid_file:
|
|
||||||
pid_file.write(self.pid)
|
|
||||||
|
|
||||||
@cached_property
|
|
||||||
def is_assigned(self):
|
|
||||||
"""Check if worker has been assigned to unique worker."""
|
|
||||||
with open(self.path, "r", encoding="utf-8") as pid_file:
|
|
||||||
assigned_worker = pid_file.read()
|
|
||||||
|
|
||||||
return assigned_worker == self.pid
|
|
||||||
|
|
||||||
|
|
||||||
def by_one_worker(worker_pid_path):
|
|
||||||
"""Decorator which runs function in unique worker."""
|
|
||||||
unique_worker = UniqueWorker(worker_pid_path)
|
|
||||||
|
|
||||||
def deco(pid_path):
|
|
||||||
def wrapped(*args, **kwargs):
|
|
||||||
if not unique_worker.is_assigned:
|
|
||||||
return ""
|
|
||||||
return pid_path(*args, **kwargs)
|
|
||||||
|
|
||||||
return wrapped
|
|
||||||
|
|
||||||
return deco
|
|
Loading…
Reference in New Issue
Block a user