browsertrix/backend/main.py
Ilya Kreymer bcbc40059e
Refactor backend data model to support UUID (fixes #118) (#119)
* uuid fix: (fixes #118)
- update all mongo models to use UUID type as main '_id' (users continue to use 'id' as defined by fastapi-users)
- update all foreign doc references to use UUID instead of string
- api handlers convert str->uuid as needed
api fix:
- fix single crawl api, add CrawlOut response model
- fix collections api
- fix standalone-docker apis
- for manual job, set user to current user, overriding the setting from crawlconfig

* additional fixes:
- rename username -> userName to indicate not the login 'username'
- rename user -> userid, archive -> aid for crawlconfig + crawls
- ensure invites correctly convert str -> uuid as needed
- filter out unset values from browsertrix-crawler config

* convert remaining user -> userid variables
ensure archive id is passed to crawl_manager as str (via archive.id_str)

* remove bulk crawlconfig delete
* add support for `stopping` state when gracefully stopping crawl
* for get crawl endpoint, check stopped crawls first, then running
2022-01-29 19:00:11 -08:00

103 lines
2.4 KiB
Python

"""
main file for browsertrix-api system
supports docker and kubernetes based deployments of multiple browsertrix-crawlers
"""
import os
from fastapi import FastAPI
from db import init_db
from emailsender import EmailSender
from invites import init_invites
from users import init_users_api, init_user_manager, JWT_TOKEN_LIFETIME
from archives import init_archives_api
from storages import init_storages_api
from crawlconfigs import init_crawl_config_api
from colls import init_collections_api
from crawls import init_crawls_api
app = FastAPI()
# ============================================================================
# pylint: disable=too-many-locals
def main():
""" init browsertrix cloud api """
email = EmailSender()
crawl_manager = None
mdb = init_db()
settings = {
"registrationEnabled": os.environ.get("REGISTRATION_ENABLED") == "1",
"jwtTokenLifetime": JWT_TOKEN_LIFETIME,
}
invites = init_invites(mdb, email)
user_manager = init_user_manager(mdb, email, invites)
fastapi_users = init_users_api(app, user_manager)
current_active_user = fastapi_users.current_user(active=True)
archive_ops = init_archives_api(
app, mdb, user_manager, invites, current_active_user
)
user_manager.set_archive_ops(archive_ops)
# pylint: disable=import-outside-toplevel
if os.environ.get("KUBERNETES_SERVICE_HOST"):
from k8sman import K8SManager
crawl_manager = K8SManager()
else:
from dockerman import DockerManager
crawl_manager = DockerManager(archive_ops)
init_storages_api(archive_ops, crawl_manager, current_active_user)
crawl_config_ops = init_crawl_config_api(
mdb,
current_active_user,
archive_ops,
crawl_manager,
)
crawls = init_crawls_api(
app,
mdb,
os.environ.get("REDIS_URL"),
user_manager,
crawl_manager,
crawl_config_ops,
archive_ops,
)
coll_ops = init_collections_api(mdb, crawls, archive_ops, crawl_manager)
crawl_config_ops.set_coll_ops(coll_ops)
app.include_router(archive_ops.router)
@app.get("/settings")
async def get_settings():
return settings
@app.get("/healthz")
async def healthz():
return {}
# ============================================================================
@app.on_event("startup")
async def startup():
"""init on startup"""
main()