rename archives -> storages

add crawlconfig apis
run lint pass, prep for k8s / docker crawl manager support
This commit is contained in:
Ilya Kreymer 2021-06-29 20:30:33 -07:00
parent b08a188fea
commit c3143df0a2
9 changed files with 299 additions and 106 deletions

View File

@ -1,55 +0,0 @@
from typing import List, Optional, TypeVar
from pydantic import BaseModel, UUID4, validator
from fastapi import APIRouter, Depends
from users import User
import uuid
from bson.objectid import ObjectId
class Archive(BaseModel):
#id: Optional[UUID4]
title: Optional[str]
user: Optional[UUID4]
class S3Archive(Archive):
endpoint_url: Optional[str]
is_public: Optional[bool]
#@validator("id", pre=True, always=True)
#def default_id(cls, v):
# return v or uuid.uuid4()
def init_archives_api(app, db, user_dep: User):
archives_coll = db["archives"]
router = APIRouter(
prefix="/archives",
tags=["archives"],
responses={404: {"description": "Not found"}},
)
@router.get("/")
async def get_archives(user: User=Depends(user_dep)):
cursor = archives_coll.find({})
results = await cursor.to_list(length=1000)
return {"archives": [{"id": str(res["_id"]), "title": res["title"], "endpoint_url": res["endpoint_url"]} for res in results]}
@router.get("/{id}")
async def get_archives(id: str, user: User=Depends(user_dep)):
res = await archives_coll.find_one(ObjectId(id))
print(res)
if not res:
return {}
return {"id": id, "title": res["title"], "endpoint_url": res["endpoint_url"]}
@router.post("/")
async def add_archive(archive: S3Archive, user: User = Depends(user_dep)):
archive.user = user.id
print(archive.user)
res = await archives_coll.insert_one(archive.dict())
return {"added": str(res.inserted_id)}
app.include_router(router)

View File

@ -1,17 +1,94 @@
from typing import List, Optional, TypeVar
from pydantic import BaseModel, UUID4, validator
"""
Crawl Config API handling
"""
from typing import List
from pydantic import BaseModel
from fastapi import APIRouter, Depends
from users import User
import uuid
from bson.objectid import ObjectId
class SimpleCrawl(BaseModel):
url: str
from users import User
# ============================================================================
class CrawlConfig(BaseModel):
""" Base Crawl Config"""
scopeType: str
seeds: List[str]
# ============================================================================
class CrawlConfigOut(CrawlConfig):
""" Crawl Config Response with id"""
id: str
# ============================================================================
class CrawlOps:
""" Crawl Config Operations"""
def __init__(self, mdb):
self.crawl_configs = mdb["crawl_configs"]
async def add_crawl_config(self, config: CrawlConfig, user: User):
""" Add new crawl config"""
data = config.dict()
data["user"] = user.id
return await self.crawl_configs.insert_one(data)
async def update_crawl_config(self, config: CrawlConfig, user: User):
""" Update crawl config"""
data = config.dict()
data["user"] = user.id
return await self.crawl_configs.replace_one(data)
async def delete_crawl_config(self, _id: str):
""" Delete config"""
return await self.crawl_configs.delete_one(ObjectId(_id))
async def get_crawl_configs(self, user: User):
""" Get all configs for user"""
cursor = self.crawl_configs.find({"user": user.id})
results = await cursor.to_list(length=1000)
return [CrawlConfigOut(id=str(data["_id"]), **data) for data in results]
async def get_crawl_config(self, _id: str, user: User):
""" Get config by id"""
data = await self.crawl_configs.find_one({"_id": ObjectId(_id), "user": user.id})
return CrawlConfigOut(id=str(data["_id"]), **data)
# ============================================================================
def init_crawl_config_api(app, mdb, user_dep: User):
""" Init /crawlconfigs api routes"""
ops = CrawlOps(mdb)
router = APIRouter(
prefix="/crawlconfigs",
tags=["crawlconfigs"],
responses={404: {"description": "Not found"}},
)
@router.get("/")
async def get_crawl_configs(user: User = Depends(user_dep)):
results = await ops.get_crawl_configs(user)
print(results)
return {"crawl_configs": results}
@router.get("/{id}")
async def get_crawl_config(_id: str, user: User = Depends(user_dep)):
res = await ops.get_crawl_config(_id, user)
print(res)
if not res:
return {}
return res
@router.post("/")
async def add_crawl_config(config: CrawlConfig, user: User = Depends(user_dep)):
res = await ops.add_crawl_config(config, user)
return {"added": str(res.inserted_id)}
app.include_router(router)
return ops

View File

@ -1,3 +1,7 @@
"""
Browsertrix API Mongo DB initialization
"""
import os
import motor.motor_asyncio
@ -8,10 +12,11 @@ DATABASE_URL = (
def init_db():
""" initializde the mongodb connector """
client = motor.motor_asyncio.AsyncIOMotorClient(
DATABASE_URL, uuidRepresentation="standard"
)
db = client["browsertrixcloud"]
mdb = client["browsertrixcloud"]
return db
return mdb

View File

@ -1,9 +1,12 @@
import aiodocker
class DockerDriver(BaseDriver):
def __init__(self):
self.docker = aiodocker.Docker()
self.crawl_image = os.environ.get("CRAWLER_IMAGE", "webrecorder/browsertrix-crawler")
self.crawl_image = os.environ.get(
"CRAWLER_IMAGE", "webrecorder/browsertrix-crawler"
)
def start_crawl(self):
container = await self.docker.containers.create(config=config)

View File

@ -1,31 +1,84 @@
from fastapi import FastAPI, Depends
"""
main file for browsertrix-api system
supports docker and kubernetes based deployments of multiple browsertrix-crawlers
"""
import logging
#import logging
import os
import sys
import json
#import sys
#import json
#import asyncio
from users import init_users_api, User
from fastapi import FastAPI, Request
#from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from users import init_users_api, UserDB
from db import init_db
from archives import init_archives_api
from storages import init_storages_api
from crawls import init_crawl_config_api
db = init_db()
# ============================================================================
class BrowsertrixAPI:
"""
Main class for BrowsertrixAPI
"""
# pylint: disable=too-many-instance-attributes
def __init__(self):
self.default_storage = os.environ.get(
"DEFAULT_STORAGE", "http://localhost:8010/store-bucket/"
)
app = FastAPI()
self.app = FastAPI()
fastapi_users = init_users_api(app, db)
#self.app.mount("/static", StaticFiles(directory="static"), name="static")
current_active_user = fastapi_users.current_user(active=True)
#self.templates = Jinja2Templates(directory="templates")
self.mdb = init_db()
init_archives_api(app, db, current_active_user)
self.fastapi_users = init_users_api(
self.app,
self.mdb,
self.on_after_register,
self.on_after_forgot_password,
self.on_after_verification_request,
)
current_active_user = self.fastapi_users.current_user(active=True)
self.storage_ops = init_storages_api(self.app, self.mdb, current_active_user)
self.crawl_config_ops = init_crawl_config_api(
self.app, self.mdb, current_active_user
)
self.crawler_image = os.environ.get("CRAWLER_IMAGE")
# self.crawl_manager = CrawlManager()
# @app.get("/")
# async def root():
# return {"message": "Hello World"}
# pylint: disable=no-self-use, unused-argument
async def on_after_register(self, user: UserDB, request):
""" callback after registeration"""
await self.storage_ops.create_storage_for_user(self.default_storage, user)
print(f"User {user.id} has registered.")
# pylint: disable=no-self-use, unused-argument
def on_after_forgot_password(self, user: UserDB, token: str, request: Request):
""" callback after password forgot"""
print(f"User {user.id} has forgot their password. Reset token: {token}")
# pylint: disable=no-self-use, unused-argument
def on_after_verification_request(self, user: UserDB, token: str, request: Request):
""" callback after verification request"""
print(f"Verification requested for user {user.id}. Verification token: {token}")
@app.get("/")
async def root():
return {"message": "Hello World"}
@app.get("/protected-route")
def protected_route(user: User = Depends(current_active_user)):
return f"Hello, {user.email}"
# ============================================================================
app = BrowsertrixAPI().app

View File

@ -1,3 +1,4 @@
uvicorn
fastapi-users[mongodb]==6.0.0
loguru
aiofiles

99
backend/storages.py Normal file
View File

@ -0,0 +1,99 @@
"""
Storage API handling
"""
import os
from typing import Optional
from pydantic import BaseModel, UUID4
from fastapi import APIRouter, Depends
from bson.objectid import ObjectId
from users import User
# ============================================================================
class Storage(BaseModel):
"""Storage Base Model"""
title: str
user: UUID4
# ============================================================================
class S3Storage(Storage):
""" S3 Storage Model"""
endpoint_url: str
is_public: Optional[bool]
# ============================================================================
class StorageOps:
""" Storage API operations"""
def __init__(self, db):
self.storages_coll = db["storages"]
async def add_storage(self, storage: S3Storage):
""" Add new storage"""
return await self.storages_coll.insert_one(storage.dict())
async def create_storage_for_user(self, endpoint_prefix: str, user: User):
""" Create default storage for new user"""
endpoint_url = os.path.join(endpoint_prefix, str(user.id)) + "/"
storage = S3Storage(
endpoint_url=endpoint_url, is_public=False, user=user.id, title="default"
)
print(f"Created Default Endpoint at ${endpoint_url}")
await self.add_storage(storage)
async def get_storages(self, user: User):
""" Get all storages for user"""
cursor = self.storages_coll.find({"user": user.id})
return await cursor.to_list(length=1000)
async def get_storage(self, uid: str, user: User):
""" Get a storage for user"""
return await self.storages_coll.find_one({"_id": ObjectId(uid), "user": user.id})
# ============================================================================
def init_storages_api(app, mdb, user_dep: User):
""" Init storage api router for /storages"""
ops = StorageOps(mdb)
router = APIRouter(
prefix="/storages",
tags=["storages"],
responses={404: {"description": "Not found"}},
)
@router.get("/")
async def get_storages(user: User = Depends(user_dep)):
results = await ops.get_storages(user)
return {
"storages": [
{
"id": str(res["_id"]),
"title": res["title"],
"endpoint_url": res["endpoint_url"],
}
for res in results
]
}
@router.get("/{id}")
async def get_storage(uid: str, user: User = Depends(user_dep)):
res = await ops.get_storage(uid, user)
print(res)
if not res:
return {}
return {"id": uid, "title": res["title"], "endpoint_url": res["endpoint_url"]}
@router.post("/")
async def add_storage(storage: S3Storage, user: User = Depends(user_dep)):
storage.user = user.id
res = await ops.add_storage(storage)
return {"added": str(res.inserted_id)}
app.include_router(router)
return ops

View File

@ -1,6 +1,9 @@
"""
FastAPI user handling (via fastapi-users)
"""
import os
import uuid
from fastapi import Request
from fastapi_users import FastAPIUsers, models
from fastapi_users.authentication import JWTAuthentication
from fastapi_users.db import MongoDBUserDatabase
@ -8,36 +11,43 @@ from fastapi_users.db import MongoDBUserDatabase
PASSWORD_SECRET = os.environ.get("PASSWORD_SECRET", uuid.uuid4().hex)
# ============================================================================
class User(models.BaseUser):
pass
"""
Base User Model
"""
# ============================================================================
class UserCreate(models.BaseUserCreate):
pass
"""
User Creation Model
"""
# ============================================================================
class UserUpdate(User, models.BaseUserUpdate):
pass
"""
User Update Model
"""
# ============================================================================
class UserDB(User, models.BaseUserDB):
pass
"""
User in DB Model
"""
# ============================================================================
def init_users_api(
app,
mdb,
on_after_register=None,
on_after_forgot_password=None,
after_verification_request=None,
):
"""
Load users table and init /users routes
"""
def on_after_register(user: UserDB, request: Request):
print(f"User {user.id} has registered.")
def on_after_forgot_password(user: UserDB, token: str, request: Request):
print(f"User {user.id} has forgot their password. Reset token: {token}")
def after_verification_request(user: UserDB, token: str, request: Request):
print(f"Verification requested for user {user.id}. Verification token: {token}")
def init_users_api(app, db):
user_collection = db["users"]
user_collection = mdb["users"]
user_db = MongoDBUserDatabase(UserDB, user_collection)

View File

@ -3,7 +3,7 @@ version: '3.5'
services:
backend:
build: ./backend
image: btrixcloud/backend
image: webrecorder/browsertrix-api
ports:
- 8000:8000