browsertrix/backend/main.py
Ilya Kreymer 2f63c7dcf8
Profiles: Backend API + Nginx Devtools Proxy Support (#212)
* add profile creation, list endpoints at /archives/<aid>/profiles
* add profile browser creation, get, ping, commit, delete endpoints at /archives/<aid>/profiles/browser
* support creation of profile browser using browsertrix-crawler 'create-login-profile' in docker and k8s
* ensure profile browser expires after set time, k8s job or docker container automatically deleted on exit
* profile browser creation returns temporary browser id, or `{"detail": "waiting_for_browser"}` while waiting for browser container init
* nginx frontend: proxy /loadbrowser/ to port 9223 in browsertrix-crawler, connecting directly to chrome devtools
* profile api auth: use redis for auth
- store browserid->archiveid and browserid->browser ip mapping in redis
- browser apis: ensure profile browser is associated with specified archive
- browser ws: pass arcchiveid and browserid to ws query args, browserid is part of archive, and browserid corresponds to specified ip
* store profiles in /profiles/ directory in default storage, include profileid in profile tar.gz filename

* support profile in crawlconfig:
- add profileid to CrawlConfig, and profileName to CrawlConfigOut
- support resolving profile path via profileid, setting '--profile @{path/to/profile.tar.gz}' for crawler (assuming same storage for profile as output for now) in both docker and k8s setups
- docker: support out_filename, custom wacz output filename missing functionality
2022-04-13 19:36:06 -07:00

116 lines
2.7 KiB
Python

"""
main file for browsertrix-api system
supports docker and kubernetes based deployments of multiple browsertrix-crawlers
"""
import os
from fastapi import FastAPI
from db import init_db
from emailsender import EmailSender
from invites import init_invites
from users import init_users_api, init_user_manager, JWT_TOKEN_LIFETIME
from archives import init_archives_api
from profiles import init_profiles_api
from storages import init_storages_api
from crawlconfigs import init_crawl_config_api
from colls import init_collections_api
from crawls import init_crawls_api
app = FastAPI()
# ============================================================================
# pylint: disable=too-many-locals
def main():
""" init browsertrix cloud api """
email = EmailSender()
crawl_manager = None
dbclient, mdb = init_db()
settings = {
"registrationEnabled": os.environ.get("REGISTRATION_ENABLED") == "1",
"jwtTokenLifetime": JWT_TOKEN_LIFETIME,
}
invites = init_invites(mdb, email)
user_manager = init_user_manager(mdb, email, invites)
fastapi_users = init_users_api(app, user_manager)
current_active_user = fastapi_users.current_user(active=True)
archive_ops = init_archives_api(
app, mdb, user_manager, invites, current_active_user
)
user_manager.set_archive_ops(archive_ops)
# pylint: disable=import-outside-toplevel
if os.environ.get("KUBERNETES_SERVICE_HOST"):
from k8sman import K8SManager
crawl_manager = K8SManager()
else:
from dockerman import DockerManager
crawl_manager = DockerManager(archive_ops)
redis_url = os.environ.get("REDIS_URL")
init_storages_api(archive_ops, crawl_manager, current_active_user)
profiles = init_profiles_api(
mdb, redis_url, crawl_manager, archive_ops, current_active_user
)
crawl_config_ops = init_crawl_config_api(
dbclient,
mdb,
current_active_user,
user_manager,
archive_ops,
crawl_manager,
profiles,
)
crawls = init_crawls_api(
app,
mdb,
redis_url,
user_manager,
crawl_manager,
crawl_config_ops,
archive_ops,
current_active_user,
)
coll_ops = init_collections_api(mdb, crawls, archive_ops, crawl_manager)
crawl_config_ops.set_coll_ops(coll_ops)
app.include_router(archive_ops.router)
@app.get("/settings")
async def get_settings():
return settings
@app.get("/healthz")
async def healthz():
return {}
# ============================================================================
@app.on_event("startup")
async def startup():
"""init on startup"""
main()