rename archives -> storages
add crawlconfig apis run lint pass, prep for k8s / docker crawl manager support
This commit is contained in:
parent
b08a188fea
commit
c3143df0a2
@ -1,55 +0,0 @@
|
||||
from typing import List, Optional, TypeVar
|
||||
from pydantic import BaseModel, UUID4, validator
|
||||
from fastapi import APIRouter, Depends
|
||||
from users import User
|
||||
import uuid
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
class Archive(BaseModel):
|
||||
#id: Optional[UUID4]
|
||||
title: Optional[str]
|
||||
user: Optional[UUID4]
|
||||
|
||||
|
||||
class S3Archive(Archive):
|
||||
endpoint_url: Optional[str]
|
||||
is_public: Optional[bool]
|
||||
|
||||
#@validator("id", pre=True, always=True)
|
||||
#def default_id(cls, v):
|
||||
# return v or uuid.uuid4()
|
||||
|
||||
|
||||
def init_archives_api(app, db, user_dep: User):
|
||||
archives_coll = db["archives"]
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/archives",
|
||||
tags=["archives"],
|
||||
responses={404: {"description": "Not found"}},
|
||||
)
|
||||
|
||||
@router.get("/")
|
||||
async def get_archives(user: User=Depends(user_dep)):
|
||||
cursor = archives_coll.find({})
|
||||
results = await cursor.to_list(length=1000)
|
||||
return {"archives": [{"id": str(res["_id"]), "title": res["title"], "endpoint_url": res["endpoint_url"]} for res in results]}
|
||||
|
||||
@router.get("/{id}")
|
||||
async def get_archives(id: str, user: User=Depends(user_dep)):
|
||||
res = await archives_coll.find_one(ObjectId(id))
|
||||
print(res)
|
||||
if not res:
|
||||
return {}
|
||||
|
||||
return {"id": id, "title": res["title"], "endpoint_url": res["endpoint_url"]}
|
||||
|
||||
@router.post("/")
|
||||
async def add_archive(archive: S3Archive, user: User = Depends(user_dep)):
|
||||
archive.user = user.id
|
||||
print(archive.user)
|
||||
res = await archives_coll.insert_one(archive.dict())
|
||||
return {"added": str(res.inserted_id)}
|
||||
|
||||
app.include_router(router)
|
||||
|
||||
@ -1,17 +1,94 @@
|
||||
from typing import List, Optional, TypeVar
|
||||
from pydantic import BaseModel, UUID4, validator
|
||||
"""
|
||||
Crawl Config API handling
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
|
||||
from pydantic import BaseModel
|
||||
from fastapi import APIRouter, Depends
|
||||
from users import User
|
||||
import uuid
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
class SimpleCrawl(BaseModel):
|
||||
url: str
|
||||
from users import User
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class CrawlConfig(BaseModel):
|
||||
""" Base Crawl Config"""
|
||||
scopeType: str
|
||||
seeds: List[str]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class CrawlConfigOut(CrawlConfig):
|
||||
""" Crawl Config Response with id"""
|
||||
id: str
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class CrawlOps:
|
||||
""" Crawl Config Operations"""
|
||||
def __init__(self, mdb):
|
||||
self.crawl_configs = mdb["crawl_configs"]
|
||||
|
||||
async def add_crawl_config(self, config: CrawlConfig, user: User):
|
||||
""" Add new crawl config"""
|
||||
data = config.dict()
|
||||
data["user"] = user.id
|
||||
return await self.crawl_configs.insert_one(data)
|
||||
|
||||
async def update_crawl_config(self, config: CrawlConfig, user: User):
|
||||
""" Update crawl config"""
|
||||
data = config.dict()
|
||||
data["user"] = user.id
|
||||
return await self.crawl_configs.replace_one(data)
|
||||
|
||||
async def delete_crawl_config(self, _id: str):
|
||||
""" Delete config"""
|
||||
return await self.crawl_configs.delete_one(ObjectId(_id))
|
||||
|
||||
async def get_crawl_configs(self, user: User):
|
||||
""" Get all configs for user"""
|
||||
cursor = self.crawl_configs.find({"user": user.id})
|
||||
results = await cursor.to_list(length=1000)
|
||||
return [CrawlConfigOut(id=str(data["_id"]), **data) for data in results]
|
||||
|
||||
async def get_crawl_config(self, _id: str, user: User):
|
||||
""" Get config by id"""
|
||||
data = await self.crawl_configs.find_one({"_id": ObjectId(_id), "user": user.id})
|
||||
return CrawlConfigOut(id=str(data["_id"]), **data)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
def init_crawl_config_api(app, mdb, user_dep: User):
|
||||
""" Init /crawlconfigs api routes"""
|
||||
ops = CrawlOps(mdb)
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/crawlconfigs",
|
||||
tags=["crawlconfigs"],
|
||||
responses={404: {"description": "Not found"}},
|
||||
)
|
||||
|
||||
@router.get("/")
|
||||
async def get_crawl_configs(user: User = Depends(user_dep)):
|
||||
results = await ops.get_crawl_configs(user)
|
||||
print(results)
|
||||
return {"crawl_configs": results}
|
||||
|
||||
@router.get("/{id}")
|
||||
async def get_crawl_config(_id: str, user: User = Depends(user_dep)):
|
||||
res = await ops.get_crawl_config(_id, user)
|
||||
print(res)
|
||||
if not res:
|
||||
return {}
|
||||
|
||||
return res
|
||||
|
||||
@router.post("/")
|
||||
async def add_crawl_config(config: CrawlConfig, user: User = Depends(user_dep)):
|
||||
res = await ops.add_crawl_config(config, user)
|
||||
return {"added": str(res.inserted_id)}
|
||||
|
||||
app.include_router(router)
|
||||
|
||||
return ops
|
||||
|
||||
@ -1,3 +1,7 @@
|
||||
"""
|
||||
Browsertrix API Mongo DB initialization
|
||||
"""
|
||||
|
||||
import os
|
||||
import motor.motor_asyncio
|
||||
|
||||
@ -8,10 +12,11 @@ DATABASE_URL = (
|
||||
|
||||
|
||||
def init_db():
|
||||
""" initializde the mongodb connector """
|
||||
client = motor.motor_asyncio.AsyncIOMotorClient(
|
||||
DATABASE_URL, uuidRepresentation="standard"
|
||||
)
|
||||
|
||||
db = client["browsertrixcloud"]
|
||||
mdb = client["browsertrixcloud"]
|
||||
|
||||
return db
|
||||
return mdb
|
||||
|
||||
@ -1,9 +1,12 @@
|
||||
import aiodocker
|
||||
|
||||
|
||||
class DockerDriver(BaseDriver):
|
||||
def __init__(self):
|
||||
self.docker = aiodocker.Docker()
|
||||
self.crawl_image = os.environ.get("CRAWLER_IMAGE", "webrecorder/browsertrix-crawler")
|
||||
self.crawl_image = os.environ.get(
|
||||
"CRAWLER_IMAGE", "webrecorder/browsertrix-crawler"
|
||||
)
|
||||
|
||||
def start_crawl(self):
|
||||
container = await self.docker.containers.create(config=config)
|
||||
|
||||
@ -1,31 +1,84 @@
|
||||
from fastapi import FastAPI, Depends
|
||||
"""
|
||||
main file for browsertrix-api system
|
||||
supports docker and kubernetes based deployments of multiple browsertrix-crawlers
|
||||
"""
|
||||
|
||||
import logging
|
||||
#import logging
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
#import sys
|
||||
#import json
|
||||
#import asyncio
|
||||
|
||||
from users import init_users_api, User
|
||||
from fastapi import FastAPI, Request
|
||||
#from fastapi.responses import HTMLResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.templating import Jinja2Templates
|
||||
|
||||
|
||||
|
||||
from users import init_users_api, UserDB
|
||||
from db import init_db
|
||||
from archives import init_archives_api
|
||||
from storages import init_storages_api
|
||||
from crawls import init_crawl_config_api
|
||||
|
||||
|
||||
db = init_db()
|
||||
# ============================================================================
|
||||
class BrowsertrixAPI:
|
||||
"""
|
||||
Main class for BrowsertrixAPI
|
||||
"""
|
||||
# pylint: disable=too-many-instance-attributes
|
||||
def __init__(self):
|
||||
self.default_storage = os.environ.get(
|
||||
"DEFAULT_STORAGE", "http://localhost:8010/store-bucket/"
|
||||
)
|
||||
|
||||
app = FastAPI()
|
||||
self.app = FastAPI()
|
||||
|
||||
fastapi_users = init_users_api(app, db)
|
||||
#self.app.mount("/static", StaticFiles(directory="static"), name="static")
|
||||
|
||||
current_active_user = fastapi_users.current_user(active=True)
|
||||
#self.templates = Jinja2Templates(directory="templates")
|
||||
self.mdb = init_db()
|
||||
|
||||
init_archives_api(app, db, current_active_user)
|
||||
self.fastapi_users = init_users_api(
|
||||
self.app,
|
||||
self.mdb,
|
||||
self.on_after_register,
|
||||
self.on_after_forgot_password,
|
||||
self.on_after_verification_request,
|
||||
)
|
||||
|
||||
current_active_user = self.fastapi_users.current_user(active=True)
|
||||
|
||||
self.storage_ops = init_storages_api(self.app, self.mdb, current_active_user)
|
||||
|
||||
self.crawl_config_ops = init_crawl_config_api(
|
||||
self.app, self.mdb, current_active_user
|
||||
)
|
||||
self.crawler_image = os.environ.get("CRAWLER_IMAGE")
|
||||
|
||||
# self.crawl_manager = CrawlManager()
|
||||
|
||||
# @app.get("/")
|
||||
# async def root():
|
||||
# return {"message": "Hello World"}
|
||||
|
||||
# pylint: disable=no-self-use, unused-argument
|
||||
async def on_after_register(self, user: UserDB, request):
|
||||
""" callback after registeration"""
|
||||
await self.storage_ops.create_storage_for_user(self.default_storage, user)
|
||||
print(f"User {user.id} has registered.")
|
||||
|
||||
# pylint: disable=no-self-use, unused-argument
|
||||
def on_after_forgot_password(self, user: UserDB, token: str, request: Request):
|
||||
""" callback after password forgot"""
|
||||
print(f"User {user.id} has forgot their password. Reset token: {token}")
|
||||
|
||||
# pylint: disable=no-self-use, unused-argument
|
||||
def on_after_verification_request(self, user: UserDB, token: str, request: Request):
|
||||
""" callback after verification request"""
|
||||
print(f"Verification requested for user {user.id}. Verification token: {token}")
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {"message": "Hello World"}
|
||||
|
||||
|
||||
@app.get("/protected-route")
|
||||
def protected_route(user: User = Depends(current_active_user)):
|
||||
return f"Hello, {user.email}"
|
||||
# ============================================================================
|
||||
app = BrowsertrixAPI().app
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
uvicorn
|
||||
fastapi-users[mongodb]==6.0.0
|
||||
loguru
|
||||
aiofiles
|
||||
|
||||
99
backend/storages.py
Normal file
99
backend/storages.py
Normal file
@ -0,0 +1,99 @@
|
||||
"""
|
||||
Storage API handling
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, UUID4
|
||||
from fastapi import APIRouter, Depends
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
from users import User
|
||||
|
||||
# ============================================================================
|
||||
class Storage(BaseModel):
|
||||
"""Storage Base Model"""
|
||||
title: str
|
||||
user: UUID4
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class S3Storage(Storage):
|
||||
""" S3 Storage Model"""
|
||||
endpoint_url: str
|
||||
is_public: Optional[bool]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class StorageOps:
|
||||
""" Storage API operations"""
|
||||
def __init__(self, db):
|
||||
self.storages_coll = db["storages"]
|
||||
|
||||
async def add_storage(self, storage: S3Storage):
|
||||
""" Add new storage"""
|
||||
return await self.storages_coll.insert_one(storage.dict())
|
||||
|
||||
async def create_storage_for_user(self, endpoint_prefix: str, user: User):
|
||||
""" Create default storage for new user"""
|
||||
endpoint_url = os.path.join(endpoint_prefix, str(user.id)) + "/"
|
||||
storage = S3Storage(
|
||||
endpoint_url=endpoint_url, is_public=False, user=user.id, title="default"
|
||||
)
|
||||
print(f"Created Default Endpoint at ${endpoint_url}")
|
||||
await self.add_storage(storage)
|
||||
|
||||
async def get_storages(self, user: User):
|
||||
""" Get all storages for user"""
|
||||
cursor = self.storages_coll.find({"user": user.id})
|
||||
return await cursor.to_list(length=1000)
|
||||
|
||||
async def get_storage(self, uid: str, user: User):
|
||||
""" Get a storage for user"""
|
||||
return await self.storages_coll.find_one({"_id": ObjectId(uid), "user": user.id})
|
||||
|
||||
|
||||
# ============================================================================
|
||||
def init_storages_api(app, mdb, user_dep: User):
|
||||
""" Init storage api router for /storages"""
|
||||
ops = StorageOps(mdb)
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/storages",
|
||||
tags=["storages"],
|
||||
responses={404: {"description": "Not found"}},
|
||||
)
|
||||
|
||||
@router.get("/")
|
||||
async def get_storages(user: User = Depends(user_dep)):
|
||||
results = await ops.get_storages(user)
|
||||
return {
|
||||
"storages": [
|
||||
{
|
||||
"id": str(res["_id"]),
|
||||
"title": res["title"],
|
||||
"endpoint_url": res["endpoint_url"],
|
||||
}
|
||||
for res in results
|
||||
]
|
||||
}
|
||||
|
||||
@router.get("/{id}")
|
||||
async def get_storage(uid: str, user: User = Depends(user_dep)):
|
||||
res = await ops.get_storage(uid, user)
|
||||
print(res)
|
||||
if not res:
|
||||
return {}
|
||||
|
||||
return {"id": uid, "title": res["title"], "endpoint_url": res["endpoint_url"]}
|
||||
|
||||
@router.post("/")
|
||||
async def add_storage(storage: S3Storage, user: User = Depends(user_dep)):
|
||||
storage.user = user.id
|
||||
res = await ops.add_storage(storage)
|
||||
return {"added": str(res.inserted_id)}
|
||||
|
||||
app.include_router(router)
|
||||
|
||||
return ops
|
||||
@ -1,6 +1,9 @@
|
||||
"""
|
||||
FastAPI user handling (via fastapi-users)
|
||||
"""
|
||||
|
||||
import os
|
||||
import uuid
|
||||
from fastapi import Request
|
||||
from fastapi_users import FastAPIUsers, models
|
||||
from fastapi_users.authentication import JWTAuthentication
|
||||
from fastapi_users.db import MongoDBUserDatabase
|
||||
@ -8,36 +11,43 @@ from fastapi_users.db import MongoDBUserDatabase
|
||||
PASSWORD_SECRET = os.environ.get("PASSWORD_SECRET", uuid.uuid4().hex)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class User(models.BaseUser):
|
||||
pass
|
||||
|
||||
"""
|
||||
Base User Model
|
||||
"""
|
||||
|
||||
# ============================================================================
|
||||
class UserCreate(models.BaseUserCreate):
|
||||
pass
|
||||
|
||||
"""
|
||||
User Creation Model
|
||||
"""
|
||||
|
||||
# ============================================================================
|
||||
class UserUpdate(User, models.BaseUserUpdate):
|
||||
pass
|
||||
|
||||
"""
|
||||
User Update Model
|
||||
"""
|
||||
|
||||
# ============================================================================
|
||||
class UserDB(User, models.BaseUserDB):
|
||||
pass
|
||||
"""
|
||||
User in DB Model
|
||||
"""
|
||||
|
||||
# ============================================================================
|
||||
def init_users_api(
|
||||
app,
|
||||
mdb,
|
||||
on_after_register=None,
|
||||
on_after_forgot_password=None,
|
||||
after_verification_request=None,
|
||||
):
|
||||
"""
|
||||
Load users table and init /users routes
|
||||
"""
|
||||
|
||||
def on_after_register(user: UserDB, request: Request):
|
||||
print(f"User {user.id} has registered.")
|
||||
|
||||
|
||||
def on_after_forgot_password(user: UserDB, token: str, request: Request):
|
||||
print(f"User {user.id} has forgot their password. Reset token: {token}")
|
||||
|
||||
|
||||
def after_verification_request(user: UserDB, token: str, request: Request):
|
||||
print(f"Verification requested for user {user.id}. Verification token: {token}")
|
||||
|
||||
|
||||
def init_users_api(app, db):
|
||||
user_collection = db["users"]
|
||||
user_collection = mdb["users"]
|
||||
|
||||
user_db = MongoDBUserDatabase(UserDB, user_collection)
|
||||
|
||||
|
||||
@ -3,7 +3,7 @@ version: '3.5'
|
||||
services:
|
||||
backend:
|
||||
build: ./backend
|
||||
image: btrixcloud/backend
|
||||
image: webrecorder/browsertrix-api
|
||||
ports:
|
||||
- 8000:8000
|
||||
|
||||
Loading…
Reference in New Issue
Block a user