browsertrix/backend/storages.py
Ilya Kreymer 19879fe349 Storage + Data Model Refactor (fixes #3):
- Add default vs custom (s3) storage
 - K8S: All storages correspond to secrets
 - K8S: Default storages inited via helm
 - K8S: Custom storage results in custom secret (per archive)
 - K8S: Don't add secret per crawl config
 - API for changing storage per archive
 - Docker: default storage just hard-coded from env vars (only one for now)
 - Validate custom storage via aiobotocore before confirming
 - Data Model: remove usage from users
 - Data Model: support adding multiple files per crawl for parallel crawls
 - Data Model: track completions for parallel crawls
 - Data Model: initial support for tags per crawl, add collection as 'coll' tag

README fixes
2021-10-09 18:58:40 -07:00

79 lines
2.5 KiB
Python

"""
Storage API
"""
from typing import Union
from urllib.parse import urlsplit
from fastapi import Depends, HTTPException
from aiobotocore.session import get_session
from archives import Archive, DefaultStorage, S3Storage
from users import User
# ============================================================================
def init_storages_api(archive_ops, crawl_manager, user_dep):
""" API for updating storage for an archive """
router = archive_ops.router
archive_owner_dep = archive_ops.archive_owner_dep
# pylint: disable=bare-except, raise-missing-from
@router.patch("/storage", tags=["archives"])
async def update_storage(
storage: Union[S3Storage, DefaultStorage],
archive: Archive = Depends(archive_owner_dep),
user: User = Depends(user_dep),
):
if storage.type == "default":
try:
await crawl_manager.check_storage(storage.name, is_default=True)
except:
raise HTTPException(
status_code=400, detail=f"Invalid default storage {storage.name}"
)
else:
try:
await verify_storage_upload(storage, ".btrix-upload-verify")
except:
raise HTTPException(
status_code=400,
detail="Could not verify custom storage. Check credentials are valid?",
)
await archive_ops.update_storage(archive, storage)
await crawl_manager.update_archive_storage(
archive.id, str(user.id), archive.storage
)
return {"updated": True}
# ============================================================================
async def verify_storage_upload(storage, filename):
""" Test credentials and storage endpoint by uploading an empty test file """
if not storage.endpoint_url.endswith("/"):
storage.endpoint_url += "/"
session = get_session()
parts = urlsplit(storage.endpoint_url)
bucket, key = parts.path[1:].split("/", 1)
key += filename
endpoint_url = parts.scheme + "://" + parts.netloc
async with session.create_client(
"s3",
region_name="",
endpoint_url=endpoint_url,
aws_access_key_id=storage.access_key,
aws_secret_access_key=storage.secret_key,
) as client:
data = b""
resp = await client.put_object(Bucket=bucket, Key=key, Body=data)
assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200