- Add default vs custom (s3) storage - K8S: All storages correspond to secrets - K8S: Default storages inited via helm - K8S: Custom storage results in custom secret (per archive) - K8S: Don't add secret per crawl config - API for changing storage per archive - Docker: default storage just hard-coded from env vars (only one for now) - Validate custom storage via aiobotocore before confirming - Data Model: remove usage from users - Data Model: support adding multiple files per crawl for parallel crawls - Data Model: track completions for parallel crawls - Data Model: initial support for tags per crawl, add collection as 'coll' tag README fixes
79 lines
2.5 KiB
Python
79 lines
2.5 KiB
Python
"""
|
|
Storage API
|
|
"""
|
|
from typing import Union
|
|
from urllib.parse import urlsplit
|
|
|
|
from fastapi import Depends, HTTPException
|
|
from aiobotocore.session import get_session
|
|
|
|
from archives import Archive, DefaultStorage, S3Storage
|
|
from users import User
|
|
|
|
|
|
# ============================================================================
|
|
def init_storages_api(archive_ops, crawl_manager, user_dep):
|
|
""" API for updating storage for an archive """
|
|
|
|
router = archive_ops.router
|
|
archive_owner_dep = archive_ops.archive_owner_dep
|
|
|
|
# pylint: disable=bare-except, raise-missing-from
|
|
@router.patch("/storage", tags=["archives"])
|
|
async def update_storage(
|
|
storage: Union[S3Storage, DefaultStorage],
|
|
archive: Archive = Depends(archive_owner_dep),
|
|
user: User = Depends(user_dep),
|
|
):
|
|
if storage.type == "default":
|
|
try:
|
|
await crawl_manager.check_storage(storage.name, is_default=True)
|
|
except:
|
|
raise HTTPException(
|
|
status_code=400, detail=f"Invalid default storage {storage.name}"
|
|
)
|
|
|
|
else:
|
|
try:
|
|
await verify_storage_upload(storage, ".btrix-upload-verify")
|
|
except:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="Could not verify custom storage. Check credentials are valid?",
|
|
)
|
|
|
|
await archive_ops.update_storage(archive, storage)
|
|
|
|
await crawl_manager.update_archive_storage(
|
|
archive.id, str(user.id), archive.storage
|
|
)
|
|
|
|
return {"updated": True}
|
|
|
|
|
|
# ============================================================================
|
|
async def verify_storage_upload(storage, filename):
|
|
""" Test credentials and storage endpoint by uploading an empty test file """
|
|
if not storage.endpoint_url.endswith("/"):
|
|
storage.endpoint_url += "/"
|
|
|
|
session = get_session()
|
|
|
|
parts = urlsplit(storage.endpoint_url)
|
|
|
|
bucket, key = parts.path[1:].split("/", 1)
|
|
key += filename
|
|
|
|
endpoint_url = parts.scheme + "://" + parts.netloc
|
|
|
|
async with session.create_client(
|
|
"s3",
|
|
region_name="",
|
|
endpoint_url=endpoint_url,
|
|
aws_access_key_id=storage.access_key,
|
|
aws_secret_access_key=storage.secret_key,
|
|
) as client:
|
|
data = b""
|
|
resp = await client.put_object(Bucket=bucket, Key=key, Body=data)
|
|
assert resp["ResponseMetadata"]["HTTPStatusCode"] == 200
|