support committing to s3 storage
move mongo into separate optional deployment along with minio support for configuring storages support for deleting crawls, associated config and secrets
This commit is contained in:
parent
a111bacfb5
commit
f77eaccf41
@ -43,6 +43,8 @@ class BaseCrawlConfig(BaseModel):
|
|||||||
|
|
||||||
seeds: List[Union[str, Seed]]
|
seeds: List[Union[str, Seed]]
|
||||||
|
|
||||||
|
collection: Optional[str] = "my-web-archive"
|
||||||
|
|
||||||
scopeType: Optional[ScopeType] = ScopeType.PREFIX
|
scopeType: Optional[ScopeType] = ScopeType.PREFIX
|
||||||
scope: Union[str, List[str], None] = ""
|
scope: Union[str, List[str], None] = ""
|
||||||
exclude: Union[str, List[str], None] = ""
|
exclude: Union[str, List[str], None] = ""
|
||||||
@ -66,6 +68,7 @@ class CrawlConfig(BaseCrawlConfig):
|
|||||||
"""Schedulable config"""
|
"""Schedulable config"""
|
||||||
|
|
||||||
schedule: Optional[str] = ""
|
schedule: Optional[str] = ""
|
||||||
|
storageName: Optional[str] = "default"
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@ -85,8 +88,9 @@ def to_crawl_config(data, uid=None):
|
|||||||
class CrawlOps:
|
class CrawlOps:
|
||||||
"""Crawl Config Operations"""
|
"""Crawl Config Operations"""
|
||||||
|
|
||||||
def __init__(self, mdb, crawl_manager):
|
def __init__(self, mdb, storage_ops, crawl_manager):
|
||||||
self.crawl_configs = mdb["crawl_configs"]
|
self.crawl_configs = mdb["crawl_configs"]
|
||||||
|
self.storage_ops = storage_ops
|
||||||
self.crawl_manager = crawl_manager
|
self.crawl_manager = crawl_manager
|
||||||
self.default_crawl_params = [
|
self.default_crawl_params = [
|
||||||
"--collection",
|
"--collection",
|
||||||
@ -102,10 +106,18 @@ class CrawlOps:
|
|||||||
data = config.dict()
|
data = config.dict()
|
||||||
data["user"] = user.id
|
data["user"] = user.id
|
||||||
|
|
||||||
|
storage = await self.storage_ops.get_storage_by_name(config.storageName, user)
|
||||||
|
|
||||||
|
if not storage:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"Invalid Config: Storage '{config.storageName}' not found",
|
||||||
|
)
|
||||||
|
|
||||||
result = await self.crawl_configs.insert_one(data)
|
result = await self.crawl_configs.insert_one(data)
|
||||||
out = to_crawl_config(data, result.inserted_id)
|
out = to_crawl_config(data, result.inserted_id)
|
||||||
await self.crawl_manager.add_crawl_config(
|
await self.crawl_manager.add_crawl_config(
|
||||||
out.dict(), str(user.id), self.default_crawl_params
|
out.dict(), str(user.id), storage, self.default_crawl_params
|
||||||
)
|
)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@ -118,9 +130,7 @@ class CrawlOps:
|
|||||||
async def delete_crawl_config(self, _id: str, user: User):
|
async def delete_crawl_config(self, _id: str, user: User):
|
||||||
"""Delete config"""
|
"""Delete config"""
|
||||||
await self.crawl_manager.delete_crawl_configs(f"btrix.crawlconfig={_id}")
|
await self.crawl_manager.delete_crawl_configs(f"btrix.crawlconfig={_id}")
|
||||||
return self.crawl_configs.delete_one(
|
return self.crawl_configs.delete_one({"_id": ObjectId(_id), "user": user.id})
|
||||||
{"_id": ObjectId(_id), "user": user.id}
|
|
||||||
)
|
|
||||||
|
|
||||||
async def delete_crawl_configs(self, user: User):
|
async def delete_crawl_configs(self, user: User):
|
||||||
"""Delete all crawl configs for user"""
|
"""Delete all crawl configs for user"""
|
||||||
@ -143,9 +153,9 @@ class CrawlOps:
|
|||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# pylint: disable=redefined-builtin,invalid-name
|
# pylint: disable=redefined-builtin,invalid-name
|
||||||
def init_crawl_config_api(app, mdb, user_dep: User, crawl_manager):
|
def init_crawl_config_api(app, mdb, user_dep: User, storage_ops, crawl_manager):
|
||||||
"""Init /crawlconfigs api routes"""
|
"""Init /crawlconfigs api routes"""
|
||||||
ops = CrawlOps(mdb, crawl_manager)
|
ops = CrawlOps(mdb, storage_ops, crawl_manager)
|
||||||
|
|
||||||
router = APIRouter(
|
router = APIRouter(
|
||||||
prefix="/crawlconfigs",
|
prefix="/crawlconfigs",
|
||||||
@ -170,7 +180,7 @@ def init_crawl_config_api(app, mdb, user_dep: User, crawl_manager):
|
|||||||
if not result or not result.deleted_count:
|
if not result or not result.deleted_count:
|
||||||
raise HTTPException(status_code=404, detail="Crawl Config Not Found")
|
raise HTTPException(status_code=404, detail="Crawl Config Not Found")
|
||||||
|
|
||||||
return {"deleted": result.deleted_count}
|
return {"deleted": 1}
|
||||||
|
|
||||||
@router.get("/{id}")
|
@router.get("/{id}")
|
||||||
async def get_crawl_config(id: str, user: User = Depends(user_dep)):
|
async def get_crawl_config(id: str, user: User = Depends(user_dep)):
|
||||||
|
|||||||
@ -1,12 +0,0 @@
|
|||||||
import aiodocker
|
|
||||||
|
|
||||||
|
|
||||||
class DockerDriver(BaseDriver):
|
|
||||||
def __init__(self):
|
|
||||||
self.docker = aiodocker.Docker()
|
|
||||||
self.crawl_image = os.environ.get(
|
|
||||||
"CRAWLER_IMAGE", "webrecorder/browsertrix-crawler"
|
|
||||||
)
|
|
||||||
|
|
||||||
def start_crawl(self):
|
|
||||||
container = await self.docker.containers.create(config=config)
|
|
||||||
@ -12,13 +12,6 @@ from jinja2 import Environment, FileSystemLoader
|
|||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
if os.environ.get("IN_CLUSTER"):
|
|
||||||
print("Cluster Init")
|
|
||||||
config.load_incluster_config()
|
|
||||||
else:
|
|
||||||
config.load_kube_config()
|
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_NAMESPACE = os.environ.get("CRAWLER_NAMESPACE") or "crawlers"
|
DEFAULT_NAMESPACE = os.environ.get("CRAWLER_NAMESPACE") or "crawlers"
|
||||||
|
|
||||||
DEFAULT_NO_SCHEDULE = "* * 31 2 *"
|
DEFAULT_NO_SCHEDULE = "* * 31 2 *"
|
||||||
@ -26,10 +19,12 @@ DEFAULT_NO_SCHEDULE = "* * 31 2 *"
|
|||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class K8SManager:
|
class K8SManager:
|
||||||
# pylint: disable=too-few-public-methods
|
# pylint: disable=too-many-instance-attributes,too-many-locals
|
||||||
"""K8SManager, manager creation of k8s resources from crawl api requests"""
|
"""K8SManager, manager creation of k8s resources from crawl api requests"""
|
||||||
|
|
||||||
def __init__(self, namespace=DEFAULT_NAMESPACE):
|
def __init__(self, namespace=DEFAULT_NAMESPACE):
|
||||||
|
config.load_incluster_config()
|
||||||
|
|
||||||
self.core_api = client.CoreV1Api()
|
self.core_api = client.CoreV1Api()
|
||||||
self.batch_api = client.BatchV1Api()
|
self.batch_api = client.BatchV1Api()
|
||||||
self.batch_beta_api = client.BatchV1beta1Api()
|
self.batch_beta_api = client.BatchV1beta1Api()
|
||||||
@ -45,7 +40,11 @@ class K8SManager:
|
|||||||
self.crawler_image_pull_policy = "IfNotPresent"
|
self.crawler_image_pull_policy = "IfNotPresent"
|
||||||
|
|
||||||
async def add_crawl_config(
|
async def add_crawl_config(
|
||||||
self, crawlconfig: dict, userid: str, extra_crawl_params: list = None
|
self,
|
||||||
|
crawlconfig: dict,
|
||||||
|
userid: str,
|
||||||
|
storage: dict,
|
||||||
|
extra_crawl_params: list = None,
|
||||||
):
|
):
|
||||||
"""add new crawl as cron job, store crawl config in configmap"""
|
"""add new crawl as cron job, store crawl config in configmap"""
|
||||||
uid = str(crawlconfig["id"])
|
uid = str(crawlconfig["id"])
|
||||||
@ -54,6 +53,7 @@ class K8SManager:
|
|||||||
|
|
||||||
extra_crawl_params = extra_crawl_params or []
|
extra_crawl_params = extra_crawl_params or []
|
||||||
|
|
||||||
|
# Create Config Map
|
||||||
config_map = client.V1ConfigMap(
|
config_map = client.V1ConfigMap(
|
||||||
metadata={
|
metadata={
|
||||||
"name": f"crawl-config-{uid}",
|
"name": f"crawl-config-{uid}",
|
||||||
@ -67,6 +67,30 @@ class K8SManager:
|
|||||||
namespace=self.namespace, body=config_map
|
namespace=self.namespace, body=config_map
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Create Secret
|
||||||
|
endpoint_with_coll_url = os.path.join(
|
||||||
|
storage["endpoint_url"], crawlconfig["collection"] + "/"
|
||||||
|
)
|
||||||
|
|
||||||
|
crawl_secret = client.V1Secret(
|
||||||
|
metadata={
|
||||||
|
"name": f"crawl-secret-{uid}",
|
||||||
|
"namespace": self.namespace,
|
||||||
|
"labels": labels,
|
||||||
|
},
|
||||||
|
string_data={
|
||||||
|
"STORE_USER": userid,
|
||||||
|
"STORE_ENDPOINT_URL": endpoint_with_coll_url,
|
||||||
|
"STORE_ACCESS_KEY": storage["access_key"],
|
||||||
|
"STORE_SECRET_KEY": storage["secret_key"],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
api_response = await self.core_api.create_namespaced_secret(
|
||||||
|
namespace=self.namespace, body=crawl_secret
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create Cron Job
|
||||||
run_now = False
|
run_now = False
|
||||||
schedule = crawlconfig.get("schedule")
|
schedule = crawlconfig.get("schedule")
|
||||||
suspend = False
|
suspend = False
|
||||||
@ -100,21 +124,33 @@ class K8SManager:
|
|||||||
namespace=self.namespace, body=cron_job
|
namespace=self.namespace, body=cron_job
|
||||||
)
|
)
|
||||||
|
|
||||||
# print(api_response)
|
# Run Job Now
|
||||||
|
|
||||||
if run_now:
|
if run_now:
|
||||||
await self.create_run_now_job(api_response, labels)
|
await self.create_run_now_job(api_response, labels)
|
||||||
|
|
||||||
return api_response
|
return api_response
|
||||||
|
|
||||||
async def delete_crawl_configs(self, label):
|
async def delete_crawl_configs(self, label):
|
||||||
"""Delete Crawl Cron Job and all dependent resources"""
|
"""Delete Crawl Cron Job and all dependent resources, including configmap and secrets"""
|
||||||
|
|
||||||
await self.batch_beta_api.delete_collection_namespaced_cron_job(
|
await self.batch_beta_api.delete_collection_namespaced_cron_job(
|
||||||
namespace=self.namespace,
|
namespace=self.namespace,
|
||||||
label_selector=label,
|
label_selector=label,
|
||||||
propagation_policy="Foreground",
|
propagation_policy="Foreground",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
await self.core_api.delete_collection_namespaced_secret(
|
||||||
|
namespace=self.namespace,
|
||||||
|
label_selector=label,
|
||||||
|
propagation_policy="Foreground",
|
||||||
|
)
|
||||||
|
|
||||||
|
await self.core_api.delete_collection_namespaced_config_map(
|
||||||
|
namespace=self.namespace,
|
||||||
|
label_selector=label,
|
||||||
|
propagation_policy="Foreground",
|
||||||
|
)
|
||||||
|
|
||||||
async def create_run_now_job(self, cron_job, labels):
|
async def create_run_now_job(self, cron_job, labels):
|
||||||
"""Create new job from cron job to run instantly"""
|
"""Create new job from cron job to run instantly"""
|
||||||
annotations = {}
|
annotations = {}
|
||||||
@ -174,6 +210,9 @@ class K8SManager:
|
|||||||
"readOnly": True,
|
"readOnly": True,
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
"envFrom": [
|
||||||
|
{"secretRef": {"name": f"crawl-secret-{uid}"}}
|
||||||
|
],
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"volumes": [
|
"volumes": [
|
||||||
|
|||||||
@ -8,14 +8,13 @@ import os
|
|||||||
from fastapi import FastAPI, Request
|
from fastapi import FastAPI, Request
|
||||||
|
|
||||||
# from fastapi.responses import HTMLResponse
|
# from fastapi.responses import HTMLResponse
|
||||||
from fastapi.staticfiles import StaticFiles
|
# from fastapi.staticfiles import StaticFiles
|
||||||
|
|
||||||
|
|
||||||
from users import init_users_api, UserDB
|
from users import init_users_api, UserDB
|
||||||
from db import init_db
|
from db import init_db
|
||||||
from storages import init_storages_api
|
from storages import init_storages_api
|
||||||
from crawls import init_crawl_config_api
|
from crawls import init_crawl_config_api
|
||||||
|
|
||||||
from k8sman import K8SManager
|
from k8sman import K8SManager
|
||||||
|
|
||||||
|
|
||||||
@ -27,18 +26,19 @@ class BrowsertrixAPI:
|
|||||||
|
|
||||||
# pylint: disable=too-many-instance-attributes
|
# pylint: disable=too-many-instance-attributes
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.default_storage = os.environ.get(
|
self.default_storage_endpoint_url = os.environ.get(
|
||||||
"DEFAULT_STORAGE", "http://localhost:8010/store-bucket/"
|
"STORE_ENDPOINT_URL", "http://localhost:8010/store-bucket/"
|
||||||
)
|
)
|
||||||
|
self.default_storage_access_key = os.environ.get("STORE_ACCESS_KEY")
|
||||||
|
self.default_storage_secret_key = os.environ.get("STORE_SECRET_KEY")
|
||||||
|
|
||||||
self.app = FastAPI()
|
self.app = FastAPI()
|
||||||
|
|
||||||
if os.environ.get("K8S"):
|
if os.environ.get("KUBERNETES_SERVICE_HOST"):
|
||||||
self.crawl_manager = K8SManager()
|
self.crawl_manager = K8SManager()
|
||||||
else:
|
else:
|
||||||
self.crawl_manager = None
|
#to implement
|
||||||
|
raise Exception("Currently, only running in Kubernetes is supported")
|
||||||
# self.app.mount("/static", StaticFiles(directory="static"), name="static")
|
|
||||||
|
|
||||||
self.mdb = init_db()
|
self.mdb = init_db()
|
||||||
|
|
||||||
@ -55,7 +55,11 @@ class BrowsertrixAPI:
|
|||||||
self.storage_ops = init_storages_api(self.app, self.mdb, current_active_user)
|
self.storage_ops = init_storages_api(self.app, self.mdb, current_active_user)
|
||||||
|
|
||||||
self.crawl_config_ops = init_crawl_config_api(
|
self.crawl_config_ops = init_crawl_config_api(
|
||||||
self.app, self.mdb, current_active_user, self.crawl_manager
|
self.app,
|
||||||
|
self.mdb,
|
||||||
|
current_active_user,
|
||||||
|
self.storage_ops,
|
||||||
|
self.crawl_manager,
|
||||||
)
|
)
|
||||||
|
|
||||||
# @app.get("/")
|
# @app.get("/")
|
||||||
@ -65,7 +69,14 @@ class BrowsertrixAPI:
|
|||||||
# pylint: disable=no-self-use, unused-argument
|
# pylint: disable=no-self-use, unused-argument
|
||||||
async def on_after_register(self, user: UserDB, request):
|
async def on_after_register(self, user: UserDB, request):
|
||||||
"""callback after registeration"""
|
"""callback after registeration"""
|
||||||
await self.storage_ops.create_storage_for_user(self.default_storage, user)
|
|
||||||
|
await self.storage_ops.create_storage_for_user(
|
||||||
|
endpoint_url=self.default_storage_endpoint_url,
|
||||||
|
access_key=self.default_storage_access_key,
|
||||||
|
secret_key=self.default_storage_secret_key,
|
||||||
|
user=user,
|
||||||
|
)
|
||||||
|
|
||||||
print(f"User {user.id} has registered.")
|
print(f"User {user.id} has registered.")
|
||||||
|
|
||||||
# pylint: disable=no-self-use, unused-argument
|
# pylint: disable=no-self-use, unused-argument
|
||||||
|
|||||||
@ -2,7 +2,6 @@
|
|||||||
Storage API handling
|
Storage API handling
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from pydantic import BaseModel, UUID4
|
from pydantic import BaseModel, UUID4
|
||||||
@ -15,7 +14,7 @@ from users import User
|
|||||||
class Storage(BaseModel):
|
class Storage(BaseModel):
|
||||||
"""Storage Base Model"""
|
"""Storage Base Model"""
|
||||||
|
|
||||||
title: str
|
name: str
|
||||||
user: UUID4
|
user: UUID4
|
||||||
|
|
||||||
|
|
||||||
@ -24,6 +23,8 @@ class S3Storage(Storage):
|
|||||||
"""S3 Storage Model"""
|
"""S3 Storage Model"""
|
||||||
|
|
||||||
endpoint_url: str
|
endpoint_url: str
|
||||||
|
access_key: str
|
||||||
|
secret_key: str
|
||||||
is_public: Optional[bool]
|
is_public: Optional[bool]
|
||||||
|
|
||||||
|
|
||||||
@ -38,11 +39,17 @@ class StorageOps:
|
|||||||
"""Add new storage"""
|
"""Add new storage"""
|
||||||
return await self.storages_coll.insert_one(storage.dict())
|
return await self.storages_coll.insert_one(storage.dict())
|
||||||
|
|
||||||
async def create_storage_for_user(self, endpoint_prefix: str, user: User):
|
async def create_storage_for_user(
|
||||||
|
self, endpoint_url: str, access_key: str, secret_key: str, user: User
|
||||||
|
):
|
||||||
"""Create default storage for new user"""
|
"""Create default storage for new user"""
|
||||||
endpoint_url = os.path.join(endpoint_prefix, str(user.id)) + "/"
|
|
||||||
storage = S3Storage(
|
storage = S3Storage(
|
||||||
endpoint_url=endpoint_url, is_public=False, user=user.id, title="default"
|
endpoint_url=endpoint_url,
|
||||||
|
access_key=access_key,
|
||||||
|
secret_key=secret_key,
|
||||||
|
user=user.id,
|
||||||
|
name="default",
|
||||||
)
|
)
|
||||||
print(f"Created Default Endpoint at ${endpoint_url}")
|
print(f"Created Default Endpoint at ${endpoint_url}")
|
||||||
await self.add_storage(storage)
|
await self.add_storage(storage)
|
||||||
@ -52,12 +59,16 @@ class StorageOps:
|
|||||||
cursor = self.storages_coll.find({"user": user.id})
|
cursor = self.storages_coll.find({"user": user.id})
|
||||||
return await cursor.to_list(length=1000)
|
return await cursor.to_list(length=1000)
|
||||||
|
|
||||||
async def get_storage(self, uid: str, user: User):
|
async def get_storage_by_id(self, uid: str, user: User):
|
||||||
"""Get a storage for user"""
|
"""Get a storage for user by unique id"""
|
||||||
return await self.storages_coll.find_one(
|
return await self.storages_coll.find_one(
|
||||||
{"_id": ObjectId(uid), "user": user.id}
|
{"_id": ObjectId(uid), "user": user.id}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def get_storage_by_name(self, name: str, user: User):
|
||||||
|
"""Get a storage for user by name"""
|
||||||
|
return await self.storages_coll.find_one({"name": name, "user": user.id})
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
def init_storages_api(app, mdb, user_dep: User):
|
def init_storages_api(app, mdb, user_dep: User):
|
||||||
@ -77,7 +88,7 @@ def init_storages_api(app, mdb, user_dep: User):
|
|||||||
"storages": [
|
"storages": [
|
||||||
{
|
{
|
||||||
"id": str(res["_id"]),
|
"id": str(res["_id"]),
|
||||||
"title": res["title"],
|
"name": res["name"],
|
||||||
"endpoint_url": res["endpoint_url"],
|
"endpoint_url": res["endpoint_url"],
|
||||||
}
|
}
|
||||||
for res in results
|
for res in results
|
||||||
@ -86,12 +97,12 @@ def init_storages_api(app, mdb, user_dep: User):
|
|||||||
|
|
||||||
@router.get("/{id}")
|
@router.get("/{id}")
|
||||||
async def get_storage(uid: str, user: User = Depends(user_dep)):
|
async def get_storage(uid: str, user: User = Depends(user_dep)):
|
||||||
res = await ops.get_storage(uid, user)
|
res = await ops.get_storage_by_id(uid, user)
|
||||||
print(res)
|
print(res)
|
||||||
if not res:
|
if not res:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
return {"id": uid, "title": res["title"], "endpoint_url": res["endpoint_url"]}
|
return {"id": uid, "name": res["name"], "endpoint_url": res["endpoint_url"]}
|
||||||
|
|
||||||
@router.post("/")
|
@router.post("/")
|
||||||
async def add_storage(storage: S3Storage, user: User = Depends(user_dep)):
|
async def add_storage(storage: S3Storage, user: User = Depends(user_dep)):
|
||||||
|
|||||||
@ -17,24 +17,28 @@ class User(models.BaseUser):
|
|||||||
Base User Model
|
Base User Model
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class UserCreate(models.BaseUserCreate):
|
class UserCreate(models.BaseUserCreate):
|
||||||
"""
|
"""
|
||||||
User Creation Model
|
User Creation Model
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class UserUpdate(User, models.BaseUserUpdate):
|
class UserUpdate(User, models.BaseUserUpdate):
|
||||||
"""
|
"""
|
||||||
User Update Model
|
User Update Model
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class UserDB(User, models.BaseUserDB):
|
class UserDB(User, models.BaseUserDB):
|
||||||
"""
|
"""
|
||||||
User in DB Model
|
User in DB Model
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
def init_users_api(
|
def init_users_api(
|
||||||
app,
|
app,
|
||||||
|
|||||||
@ -1,49 +0,0 @@
|
|||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: {{ .Values.name }}-env-config
|
|
||||||
namespace: {{ .Release.Namespace }}
|
|
||||||
|
|
||||||
data:
|
|
||||||
MONGO_HOST: localhost
|
|
||||||
|
|
||||||
CRAWLER_NAMESPACE: {{ .Values.crawler_namespace }}
|
|
||||||
CRAWLER_IMAGE: {{ .Values.crawler_image }}
|
|
||||||
|
|
||||||
IN_CLUSTER: "1"
|
|
||||||
K8S: "1"
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Secret
|
|
||||||
metadata:
|
|
||||||
name: mongo-auth
|
|
||||||
namespace: {{ .Release.Namespace }}
|
|
||||||
|
|
||||||
type: Opaque
|
|
||||||
stringData:
|
|
||||||
MONGO_INITDB_ROOT_USERNAME: {{ .Values.mongo_auth.username | quote }}
|
|
||||||
MONGO_INITDB_ROOT_PASSWORD: {{ .Values.mongo_auth.password | quote }}
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Secret
|
|
||||||
metadata:
|
|
||||||
name: storage-auth
|
|
||||||
namespace: {{ .Release.Namespace }}
|
|
||||||
|
|
||||||
type: Opaque
|
|
||||||
stringData:
|
|
||||||
PASSWORD_SECRET: "{{ .Values.api_password_secret }}"
|
|
||||||
|
|
||||||
MINIO_ROOT_USER: "{{ .Values.storage.access_key }}"
|
|
||||||
MINIO_ROOT_PASSWORD: "{{ .Values.storage.secret_key }}"
|
|
||||||
|
|
||||||
AWS_ACCESS_KEY_ID: "{{ .Values.storage.access_key }}"
|
|
||||||
AWS_SECRET_ACCESS_KEY: "{{ .Values.storage.secret_key }}"
|
|
||||||
AWS_ENDPOINT: "{{ .Values.storage.endpoint }}"
|
|
||||||
#S3_FORCE_PATH_STYLE: "{{ .Values.storage.force_path_style | quote }}"
|
|
||||||
|
|
||||||
@ -1,3 +1,40 @@
|
|||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: {{ .Values.name }}-env-config
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
|
|
||||||
|
data:
|
||||||
|
MONGO_HOST: {{ .Values.mongo_host }}
|
||||||
|
|
||||||
|
CRAWLER_NAMESPACE: {{ .Values.crawler_namespace }}
|
||||||
|
CRAWLER_IMAGE: {{ .Values.crawler_image }}
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: storage-auth
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
|
|
||||||
|
type: Opaque
|
||||||
|
stringData:
|
||||||
|
PASSWORD_SECRET: "{{ .Values.api_password_secret }}"
|
||||||
|
|
||||||
|
{{- if .Values.minio_local }}
|
||||||
|
MINIO_ROOT_USER: "{{ .Values.storage.access_key }}"
|
||||||
|
MINIO_ROOT_PASSWORD: "{{ .Values.storage.secret_key }}"
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
STORE_ACCESS_KEY: "{{ .Values.storage.access_key }}"
|
||||||
|
STORE_SECRET_KEY: "{{ .Values.storage.secret_key }}"
|
||||||
|
STORE_ENDPOINT_URL: "{{ .Values.storage.endpoint }}"
|
||||||
|
#S3_FORCE_PATH_STYLE: "{{ .Values.storage.force_path_style | quote }}"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
---
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
@ -14,19 +51,7 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
app: {{ .Values.name }}
|
app: {{ .Values.name }}
|
||||||
|
|
||||||
annotations:
|
|
||||||
checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
|
|
||||||
|
|
||||||
spec:
|
spec:
|
||||||
volumes:
|
|
||||||
- name: data-db
|
|
||||||
hostPath:
|
|
||||||
path: /browsertrix-mongo-data
|
|
||||||
type: DirectoryOrCreate
|
|
||||||
|
|
||||||
- name: data-storage
|
|
||||||
emptyDir: {}
|
|
||||||
|
|
||||||
containers:
|
containers:
|
||||||
- name: api
|
- name: api
|
||||||
image: {{ .Values.api_image }}
|
image: {{ .Values.api_image }}
|
||||||
@ -47,17 +72,6 @@ spec:
|
|||||||
requests:
|
requests:
|
||||||
cpu: {{ .Values.api_requests_cpu }}
|
cpu: {{ .Values.api_requests_cpu }}
|
||||||
|
|
||||||
- name: mongo
|
|
||||||
image: {{ .Values.mongo_image }}
|
|
||||||
imagePullPolicy: {{ .Values.mongo_pull_policy }}
|
|
||||||
envFrom:
|
|
||||||
- secretRef:
|
|
||||||
name: mongo-auth
|
|
||||||
|
|
||||||
volumeMounts:
|
|
||||||
- name: data-db
|
|
||||||
mountPath: /data/db
|
|
||||||
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@ -1,4 +1,4 @@
|
|||||||
{{- if .Values.use_minio }}
|
{{- if .Values.minio_local }}
|
||||||
|
|
||||||
---
|
---
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
@ -17,13 +17,12 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
app: local-minio
|
app: local-minio
|
||||||
|
|
||||||
annotations:
|
|
||||||
checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
|
|
||||||
|
|
||||||
spec:
|
spec:
|
||||||
volumes:
|
volumes:
|
||||||
- name: data-storage
|
- name: data-storage
|
||||||
emptyDir: {}
|
hostPath:
|
||||||
|
path: /browsertrix-minio-data
|
||||||
|
type: DirectoryOrCreate
|
||||||
|
|
||||||
containers:
|
containers:
|
||||||
- name: minio
|
- name: minio
|
||||||
@ -39,7 +38,6 @@ spec:
|
|||||||
mountPath: /data
|
mountPath: /data
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
|
|
||||||
|
|||||||
74
chart/templates/mongo.yaml
Normal file
74
chart/templates/mongo.yaml
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
{{- if .Values.mongo_local }}
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: mongo-auth
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
|
|
||||||
|
type: Opaque
|
||||||
|
stringData:
|
||||||
|
MONGO_INITDB_ROOT_USERNAME: {{ .Values.mongo_auth.username | quote }}
|
||||||
|
MONGO_INITDB_ROOT_PASSWORD: {{ .Values.mongo_auth.password | quote }}
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: local-mongo
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: local-mongo
|
||||||
|
replicas: {{ .Values.api_num_replicas }}
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: local-mongo
|
||||||
|
|
||||||
|
spec:
|
||||||
|
volumes:
|
||||||
|
- name: data-db
|
||||||
|
hostPath:
|
||||||
|
path: /browsertrix-mongo-data
|
||||||
|
type: DirectoryOrCreate
|
||||||
|
|
||||||
|
containers:
|
||||||
|
- name: mongo
|
||||||
|
image: {{ .Values.mongo_image }}
|
||||||
|
imagePullPolicy: {{ .Values.mongo_pull_policy }}
|
||||||
|
envFrom:
|
||||||
|
- secretRef:
|
||||||
|
name: mongo-auth
|
||||||
|
|
||||||
|
volumeMounts:
|
||||||
|
- name: data-db
|
||||||
|
mountPath: /data/db
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
|
||||||
|
metadata:
|
||||||
|
namespace: {{ .Release.Namespace }}
|
||||||
|
name: local-mongo
|
||||||
|
labels:
|
||||||
|
app: local-mongo
|
||||||
|
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: local-mongo
|
||||||
|
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 27017
|
||||||
|
targetPort: 27017
|
||||||
|
name: minio
|
||||||
|
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
|
||||||
@ -16,6 +16,10 @@ api_requests_cpu: "25m"
|
|||||||
|
|
||||||
# MongoDB Image
|
# MongoDB Image
|
||||||
# =========================================
|
# =========================================
|
||||||
|
mongo_local: true
|
||||||
|
|
||||||
|
mongo_host: "local-mongo"
|
||||||
|
|
||||||
mongo_image: "mongo"
|
mongo_image: "mongo"
|
||||||
mongo_pull_policy: "IfNotPresent"
|
mongo_pull_policy: "IfNotPresent"
|
||||||
|
|
||||||
@ -41,15 +45,15 @@ crawler_namespace: "crawlers"
|
|||||||
|
|
||||||
storage:
|
storage:
|
||||||
access_key: "ADMIN"
|
access_key: "ADMIN"
|
||||||
secret_key: "PASSW"
|
secret_key: "PASSW0RD"
|
||||||
# api_endpoint can be "" if using AWS S3, otherwise, set to your provider's S3 endpoint
|
# api_endpoint can be "" if using AWS S3, otherwise, set to your provider's S3 endpoint
|
||||||
endpoint: "http://local-minio.default:9000"
|
endpoint: "http://local-minio.default:9000/test-bucket/"
|
||||||
# if your provider requires path-style URLs for S3 objects, set force_path_style to "true" (any truthy string)
|
# if your provider requires path-style URLs for S3 objects, set force_path_style to "true" (any truthy string)
|
||||||
# https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html
|
# https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html
|
||||||
force_path_style: "true"
|
force_path_style: "true"
|
||||||
# the target bucket's name and desired storage subpath, formatted as an s3:// URL for convenience
|
# the target bucket's name and desired storage subpath, formatted as an s3:// URL for convenience
|
||||||
# (the protocol is ignored; the bucket == the netloc; the subpath == the rest)
|
# (the protocol is ignored; the bucket == the netloc; the subpath == the rest)
|
||||||
storage_prefix: "s3://browsertrix/archives/"
|
storage_prefix: "s3://test/bucket/"
|
||||||
# acl settings for uploaded files, if any.
|
# acl settings for uploaded files, if any.
|
||||||
# for example, to enable uploaded files to be public, set to:
|
# for example, to enable uploaded files to be public, set to:
|
||||||
# acl: "public-read"
|
# acl: "public-read"
|
||||||
@ -60,7 +64,7 @@ storage:
|
|||||||
# Local Minio Pod (optional)
|
# Local Minio Pod (optional)
|
||||||
# =========================================
|
# =========================================
|
||||||
# set to true to use a local minio image
|
# set to true to use a local minio image
|
||||||
use_minio: True
|
minio_local: True
|
||||||
|
|
||||||
minio_image: minio/minio
|
minio_image: minio/minio
|
||||||
minio_pull_policy: "IfNotPresent"
|
minio_pull_policy: "IfNotPresent"
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user