diff --git a/backend/archives.py b/backend/archives.py deleted file mode 100644 index ee31ae0b..00000000 --- a/backend/archives.py +++ /dev/null @@ -1,55 +0,0 @@ -from typing import List, Optional, TypeVar -from pydantic import BaseModel, UUID4, validator -from fastapi import APIRouter, Depends -from users import User -import uuid -from bson.objectid import ObjectId - -class Archive(BaseModel): - #id: Optional[UUID4] - title: Optional[str] - user: Optional[UUID4] - - -class S3Archive(Archive): - endpoint_url: Optional[str] - is_public: Optional[bool] - - #@validator("id", pre=True, always=True) - #def default_id(cls, v): - # return v or uuid.uuid4() - - -def init_archives_api(app, db, user_dep: User): - archives_coll = db["archives"] - - router = APIRouter( - prefix="/archives", - tags=["archives"], - responses={404: {"description": "Not found"}}, - ) - - @router.get("/") - async def get_archives(user: User=Depends(user_dep)): - cursor = archives_coll.find({}) - results = await cursor.to_list(length=1000) - return {"archives": [{"id": str(res["_id"]), "title": res["title"], "endpoint_url": res["endpoint_url"]} for res in results]} - - @router.get("/{id}") - async def get_archives(id: str, user: User=Depends(user_dep)): - res = await archives_coll.find_one(ObjectId(id)) - print(res) - if not res: - return {} - - return {"id": id, "title": res["title"], "endpoint_url": res["endpoint_url"]} - - @router.post("/") - async def add_archive(archive: S3Archive, user: User = Depends(user_dep)): - archive.user = user.id - print(archive.user) - res = await archives_coll.insert_one(archive.dict()) - return {"added": str(res.inserted_id)} - - app.include_router(router) - diff --git a/backend/crawls.py b/backend/crawls.py index acc234a6..c3020768 100644 --- a/backend/crawls.py +++ b/backend/crawls.py @@ -1,17 +1,94 @@ -from typing import List, Optional, TypeVar -from pydantic import BaseModel, UUID4, validator +""" +Crawl Config API handling +""" + +from typing import List + +from pydantic import BaseModel from fastapi import APIRouter, Depends -from users import User -import uuid from bson.objectid import ObjectId -class SimpleCrawl(BaseModel): - url: str +from users import User + + +# ============================================================================ +class CrawlConfig(BaseModel): + """ Base Crawl Config""" scopeType: str + seeds: List[str] +# ============================================================================ +class CrawlConfigOut(CrawlConfig): + """ Crawl Config Response with id""" + id: str +# ============================================================================ +class CrawlOps: + """ Crawl Config Operations""" + def __init__(self, mdb): + self.crawl_configs = mdb["crawl_configs"] + + async def add_crawl_config(self, config: CrawlConfig, user: User): + """ Add new crawl config""" + data = config.dict() + data["user"] = user.id + return await self.crawl_configs.insert_one(data) + + async def update_crawl_config(self, config: CrawlConfig, user: User): + """ Update crawl config""" + data = config.dict() + data["user"] = user.id + return await self.crawl_configs.replace_one(data) + + async def delete_crawl_config(self, _id: str): + """ Delete config""" + return await self.crawl_configs.delete_one(ObjectId(_id)) + + async def get_crawl_configs(self, user: User): + """ Get all configs for user""" + cursor = self.crawl_configs.find({"user": user.id}) + results = await cursor.to_list(length=1000) + return [CrawlConfigOut(id=str(data["_id"]), **data) for data in results] + + async def get_crawl_config(self, _id: str, user: User): + """ Get config by id""" + data = await self.crawl_configs.find_one({"_id": ObjectId(_id), "user": user.id}) + return CrawlConfigOut(id=str(data["_id"]), **data) +# ============================================================================ +def init_crawl_config_api(app, mdb, user_dep: User): + """ Init /crawlconfigs api routes""" + ops = CrawlOps(mdb) + router = APIRouter( + prefix="/crawlconfigs", + tags=["crawlconfigs"], + responses={404: {"description": "Not found"}}, + ) + + @router.get("/") + async def get_crawl_configs(user: User = Depends(user_dep)): + results = await ops.get_crawl_configs(user) + print(results) + return {"crawl_configs": results} + + @router.get("/{id}") + async def get_crawl_config(_id: str, user: User = Depends(user_dep)): + res = await ops.get_crawl_config(_id, user) + print(res) + if not res: + return {} + + return res + + @router.post("/") + async def add_crawl_config(config: CrawlConfig, user: User = Depends(user_dep)): + res = await ops.add_crawl_config(config, user) + return {"added": str(res.inserted_id)} + + app.include_router(router) + + return ops diff --git a/backend/db.py b/backend/db.py index 1821dd1b..4c19d984 100644 --- a/backend/db.py +++ b/backend/db.py @@ -1,3 +1,7 @@ +""" +Browsertrix API Mongo DB initialization +""" + import os import motor.motor_asyncio @@ -8,10 +12,11 @@ DATABASE_URL = ( def init_db(): + """ initializde the mongodb connector """ client = motor.motor_asyncio.AsyncIOMotorClient( DATABASE_URL, uuidRepresentation="standard" ) - db = client["browsertrixcloud"] + mdb = client["browsertrixcloud"] - return db + return mdb diff --git a/backend/dockerdriver.py b/backend/dockerdriver.py index 4ad24938..b3fb88a1 100644 --- a/backend/dockerdriver.py +++ b/backend/dockerdriver.py @@ -1,9 +1,12 @@ import aiodocker + class DockerDriver(BaseDriver): def __init__(self): self.docker = aiodocker.Docker() - self.crawl_image = os.environ.get("CRAWLER_IMAGE", "webrecorder/browsertrix-crawler") + self.crawl_image = os.environ.get( + "CRAWLER_IMAGE", "webrecorder/browsertrix-crawler" + ) def start_crawl(self): container = await self.docker.containers.create(config=config) diff --git a/backend/main.py b/backend/main.py index 5af484d4..cc3da8a2 100644 --- a/backend/main.py +++ b/backend/main.py @@ -1,31 +1,84 @@ -from fastapi import FastAPI, Depends +""" +main file for browsertrix-api system +supports docker and kubernetes based deployments of multiple browsertrix-crawlers +""" -import logging +#import logging import os -import sys -import json +#import sys +#import json +#import asyncio -from users import init_users_api, User +from fastapi import FastAPI, Request +#from fastapi.responses import HTMLResponse +from fastapi.staticfiles import StaticFiles +from fastapi.templating import Jinja2Templates + + + +from users import init_users_api, UserDB from db import init_db -from archives import init_archives_api +from storages import init_storages_api +from crawls import init_crawl_config_api -db = init_db() +# ============================================================================ +class BrowsertrixAPI: + """ + Main class for BrowsertrixAPI + """ + # pylint: disable=too-many-instance-attributes + def __init__(self): + self.default_storage = os.environ.get( + "DEFAULT_STORAGE", "http://localhost:8010/store-bucket/" + ) -app = FastAPI() + self.app = FastAPI() -fastapi_users = init_users_api(app, db) + #self.app.mount("/static", StaticFiles(directory="static"), name="static") -current_active_user = fastapi_users.current_user(active=True) + #self.templates = Jinja2Templates(directory="templates") + self.mdb = init_db() -init_archives_api(app, db, current_active_user) + self.fastapi_users = init_users_api( + self.app, + self.mdb, + self.on_after_register, + self.on_after_forgot_password, + self.on_after_verification_request, + ) + + current_active_user = self.fastapi_users.current_user(active=True) + + self.storage_ops = init_storages_api(self.app, self.mdb, current_active_user) + + self.crawl_config_ops = init_crawl_config_api( + self.app, self.mdb, current_active_user + ) + self.crawler_image = os.environ.get("CRAWLER_IMAGE") + + # self.crawl_manager = CrawlManager() + + # @app.get("/") + # async def root(): + # return {"message": "Hello World"} + + # pylint: disable=no-self-use, unused-argument + async def on_after_register(self, user: UserDB, request): + """ callback after registeration""" + await self.storage_ops.create_storage_for_user(self.default_storage, user) + print(f"User {user.id} has registered.") + + # pylint: disable=no-self-use, unused-argument + def on_after_forgot_password(self, user: UserDB, token: str, request: Request): + """ callback after password forgot""" + print(f"User {user.id} has forgot their password. Reset token: {token}") + + # pylint: disable=no-self-use, unused-argument + def on_after_verification_request(self, user: UserDB, token: str, request: Request): + """ callback after verification request""" + print(f"Verification requested for user {user.id}. Verification token: {token}") -@app.get("/") -async def root(): - return {"message": "Hello World"} - - -@app.get("/protected-route") -def protected_route(user: User = Depends(current_active_user)): - return f"Hello, {user.email}" +# ============================================================================ +app = BrowsertrixAPI().app diff --git a/backend/requirements.txt b/backend/requirements.txt index eeac9d4e..203f967d 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,3 +1,4 @@ uvicorn fastapi-users[mongodb]==6.0.0 loguru +aiofiles diff --git a/backend/storages.py b/backend/storages.py new file mode 100644 index 00000000..514eb10a --- /dev/null +++ b/backend/storages.py @@ -0,0 +1,99 @@ +""" +Storage API handling +""" + +import os +from typing import Optional + +from pydantic import BaseModel, UUID4 +from fastapi import APIRouter, Depends +from bson.objectid import ObjectId + +from users import User + +# ============================================================================ +class Storage(BaseModel): + """Storage Base Model""" + title: str + user: UUID4 + + +# ============================================================================ +class S3Storage(Storage): + """ S3 Storage Model""" + endpoint_url: str + is_public: Optional[bool] + + +# ============================================================================ +class StorageOps: + """ Storage API operations""" + def __init__(self, db): + self.storages_coll = db["storages"] + + async def add_storage(self, storage: S3Storage): + """ Add new storage""" + return await self.storages_coll.insert_one(storage.dict()) + + async def create_storage_for_user(self, endpoint_prefix: str, user: User): + """ Create default storage for new user""" + endpoint_url = os.path.join(endpoint_prefix, str(user.id)) + "/" + storage = S3Storage( + endpoint_url=endpoint_url, is_public=False, user=user.id, title="default" + ) + print(f"Created Default Endpoint at ${endpoint_url}") + await self.add_storage(storage) + + async def get_storages(self, user: User): + """ Get all storages for user""" + cursor = self.storages_coll.find({"user": user.id}) + return await cursor.to_list(length=1000) + + async def get_storage(self, uid: str, user: User): + """ Get a storage for user""" + return await self.storages_coll.find_one({"_id": ObjectId(uid), "user": user.id}) + + +# ============================================================================ +def init_storages_api(app, mdb, user_dep: User): + """ Init storage api router for /storages""" + ops = StorageOps(mdb) + + router = APIRouter( + prefix="/storages", + tags=["storages"], + responses={404: {"description": "Not found"}}, + ) + + @router.get("/") + async def get_storages(user: User = Depends(user_dep)): + results = await ops.get_storages(user) + return { + "storages": [ + { + "id": str(res["_id"]), + "title": res["title"], + "endpoint_url": res["endpoint_url"], + } + for res in results + ] + } + + @router.get("/{id}") + async def get_storage(uid: str, user: User = Depends(user_dep)): + res = await ops.get_storage(uid, user) + print(res) + if not res: + return {} + + return {"id": uid, "title": res["title"], "endpoint_url": res["endpoint_url"]} + + @router.post("/") + async def add_storage(storage: S3Storage, user: User = Depends(user_dep)): + storage.user = user.id + res = await ops.add_storage(storage) + return {"added": str(res.inserted_id)} + + app.include_router(router) + + return ops diff --git a/backend/users.py b/backend/users.py index 85e07a7a..a84d8533 100644 --- a/backend/users.py +++ b/backend/users.py @@ -1,6 +1,9 @@ +""" +FastAPI user handling (via fastapi-users) +""" + import os import uuid -from fastapi import Request from fastapi_users import FastAPIUsers, models from fastapi_users.authentication import JWTAuthentication from fastapi_users.db import MongoDBUserDatabase @@ -8,36 +11,43 @@ from fastapi_users.db import MongoDBUserDatabase PASSWORD_SECRET = os.environ.get("PASSWORD_SECRET", uuid.uuid4().hex) +# ============================================================================ class User(models.BaseUser): - pass - + """ + Base User Model + """ +# ============================================================================ class UserCreate(models.BaseUserCreate): - pass - + """ + User Creation Model + """ +# ============================================================================ class UserUpdate(User, models.BaseUserUpdate): - pass - + """ + User Update Model + """ +# ============================================================================ class UserDB(User, models.BaseUserDB): - pass + """ + User in DB Model + """ +# ============================================================================ +def init_users_api( + app, + mdb, + on_after_register=None, + on_after_forgot_password=None, + after_verification_request=None, +): + """ + Load users table and init /users routes + """ -def on_after_register(user: UserDB, request: Request): - print(f"User {user.id} has registered.") - - -def on_after_forgot_password(user: UserDB, token: str, request: Request): - print(f"User {user.id} has forgot their password. Reset token: {token}") - - -def after_verification_request(user: UserDB, token: str, request: Request): - print(f"Verification requested for user {user.id}. Verification token: {token}") - - -def init_users_api(app, db): - user_collection = db["users"] + user_collection = mdb["users"] user_db = MongoDBUserDatabase(UserDB, user_collection) diff --git a/docker-compose.yaml b/docker-compose.yml similarity index 92% rename from docker-compose.yaml rename to docker-compose.yml index 4db9d4f0..afdc9cb9 100644 --- a/docker-compose.yaml +++ b/docker-compose.yml @@ -3,7 +3,7 @@ version: '3.5' services: backend: build: ./backend - image: btrixcloud/backend + image: webrecorder/browsertrix-api ports: - 8000:8000