Misc backend fixes for cloud deployment (#26)
* misc backend fixes: - fix running w/o local minio - ensure crawler image pull policy is configurable, loaded via chart value - use digitalocean repo for main backend image (for now) - add bucket_name to config only if using default bucket * enable all behaviors, support 'access_endpoint_url' for default storages * debugging: add 'no_delete_jobs' setting for k8s and docker to disable deletion of completed jobs
This commit is contained in:
parent
58eba70c68
commit
3d4d7049a2
@ -66,7 +66,7 @@ class RawCrawlConfig(BaseModel):
|
||||
combineWARC: Optional[bool] = False
|
||||
|
||||
logging: Optional[str] = ""
|
||||
behaviors: Optional[str] = "autoscroll"
|
||||
behaviors: Optional[str] = "autoscroll,autoplay,autofetch,siteSpecific"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
|
@ -44,6 +44,8 @@ class DockerManager:
|
||||
self.extra_crawl_params = extra_crawl_params or []
|
||||
self._event_q = None
|
||||
|
||||
self.no_delete_on_fail = os.environ.get("NO_DELETE_ON_FAIL", "")
|
||||
|
||||
self.storages = {
|
||||
"default": S3Storage(
|
||||
name="default",
|
||||
@ -111,7 +113,8 @@ class DockerManager:
|
||||
|
||||
for container in results:
|
||||
print(f"Cleaning Up Orphan Container {container['Id']}", flush=True)
|
||||
await container.delete()
|
||||
if not self.no_delete_on_fail:
|
||||
await container.delete()
|
||||
|
||||
results = await self.client.containers.list(
|
||||
filters=json.dumps(
|
||||
@ -482,8 +485,10 @@ class DockerManager:
|
||||
if actor["Attributes"]["exitCode"] != 0:
|
||||
crawl = self._make_crawl_for_container(container, "failed", True)
|
||||
await self.crawl_ops.store_crawl(crawl)
|
||||
|
||||
await container.delete()
|
||||
if not self.no_delete_on_fail:
|
||||
await container.delete()
|
||||
else:
|
||||
await container.delete()
|
||||
|
||||
# pylint: disable=no-self-use,too-many-arguments
|
||||
def _make_crawl_for_container(self, container, state, finish_now=False):
|
||||
|
@ -37,11 +37,13 @@ class K8SManager:
|
||||
self.namespace = namespace
|
||||
self._default_storage_endpoints = {}
|
||||
|
||||
self.crawler_image = os.environ.get("CRAWLER_IMAGE")
|
||||
self.crawler_image_pull_policy = "IfNotPresent"
|
||||
self.crawler_image = os.environ["CRAWLER_IMAGE"]
|
||||
self.crawler_image_pull_policy = os.environ["CRAWLER_PULL_POLICY"]
|
||||
|
||||
self.crawl_retries = int(os.environ.get("CRAWL_RETRIES", "3"))
|
||||
|
||||
self.no_delete_jobs = os.environ.get("NO_DELETE_JOBS", "0") != "0"
|
||||
|
||||
self.loop = asyncio.get_running_loop()
|
||||
self.loop.create_task(self.run_event_loop())
|
||||
|
||||
@ -319,7 +321,7 @@ class K8SManager:
|
||||
return None, None
|
||||
|
||||
manual = job.metadata.annotations.get("btrix.run.manual") == "1"
|
||||
if manual:
|
||||
if manual and not self.no_delete_jobs:
|
||||
self.loop.create_task(self._delete_job(job.metadata.name))
|
||||
|
||||
crawl = self._make_crawl_for_job(
|
||||
@ -457,7 +459,7 @@ class K8SManager:
|
||||
failure = await self.crawl_ops.store_crawl(crawl)
|
||||
|
||||
# keep failed jobs around, for now
|
||||
if not failure:
|
||||
if not failure and not self.no_delete_jobs:
|
||||
await self._delete_job(job_name)
|
||||
|
||||
# ========================================================================
|
||||
@ -643,7 +645,7 @@ class K8SManager:
|
||||
{
|
||||
"name": "crawler",
|
||||
"image": self.crawler_image,
|
||||
"imagePullPolicy": "Never",
|
||||
"imagePullPolicy": self.crawler_image_pull_policy,
|
||||
"command": [
|
||||
"crawl",
|
||||
"--config",
|
||||
|
@ -10,6 +10,7 @@ data:
|
||||
|
||||
CRAWLER_NAMESPACE: {{ .Values.crawler_namespace }}
|
||||
CRAWLER_IMAGE: {{ .Values.crawler_image }}
|
||||
CRAWLER_PULL_POLICY: {{ .Values.crawler_pull_policy }}
|
||||
|
||||
CRAWL_TIMEOUT: "{{ .Values.crawl_timeout }}"
|
||||
CRAWL_RETRIES: "{{ .Values.crawl_retries }}"
|
||||
@ -18,6 +19,8 @@ data:
|
||||
|
||||
REDIS_CRAWLS_DONE_KEY: "crawls-done"
|
||||
|
||||
NO_DELETE_JOBS: "{{ .Values.no_delete_jobs | default '0' }}"
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
|
@ -29,8 +29,8 @@ spec:
|
||||
rules:
|
||||
- host: {{ .Values.ingress.host }}
|
||||
http:
|
||||
{{- if .Values.minio_local }}
|
||||
paths:
|
||||
{{- if .Values.minio_local }}
|
||||
- path: /data/(.*)
|
||||
pathType: Prefix
|
||||
backend:
|
||||
@ -51,7 +51,7 @@ spec:
|
||||
{{ if .Values.ingress.tls }}
|
||||
---
|
||||
|
||||
apiVersion: cert-manager.io/v1alpha2
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: cert-main
|
||||
|
@ -31,8 +31,8 @@ spec:
|
||||
- name: nginx-resolver
|
||||
emptyDir: {}
|
||||
|
||||
{{- if .Values.minio_local }}
|
||||
initContainers:
|
||||
{{- if .Values.minio_local }}
|
||||
- name: init-bucket
|
||||
image: {{ .Values.minio_mc_image }}
|
||||
imagePullPolicy: {{ .Values.minio_pull_policy }}
|
||||
@ -44,7 +44,7 @@ spec:
|
||||
key: MC_HOST
|
||||
|
||||
command: ['/bin/sh']
|
||||
args: ['-c', 'mc mb local/test-bucket; mc policy set public local/test-bucket' ]
|
||||
args: ['-c', 'mc mb --ignore-existing local/{{ .Values.minio_local_bucket_name }}' ]
|
||||
{{- end }}
|
||||
|
||||
- name: init-nginx
|
||||
|
@ -31,9 +31,19 @@ type: Opaque
|
||||
stringData:
|
||||
STORE_ACCESS_KEY: "{{ $storage.access_key }}"
|
||||
STORE_SECRET_KEY: "{{ $storage.secret_key }}"
|
||||
STORE_ENDPOINT_URL: "{{ $storage.endpoint_url }}{{ $storage.bucket_name }}/"
|
||||
{{- if and $.Values.ingress.host $.Values.minio_local }}
|
||||
|
||||
{{- if $storage.bucket_name }}
|
||||
STORE_ENDPOINT_URL: "{{ $storage.endpoint_url }}{{ $storage.bucket_name }}"
|
||||
{{- else }}
|
||||
STORE_ENDPOINT_URL: "{{ $storage.endpoint_url }}"
|
||||
{{- end }}
|
||||
|
||||
{{- if $storage.access_endpoint_url }}
|
||||
STORE_ACCESS_ENDPOINT_URL: "{{ $storage.access_endpoint_url }}"
|
||||
{{- else if and $.Values.ingress.host $.Values.minio_local }}
|
||||
STORE_ACCESS_ENDPOINT_URL: {{ $.Values.ingress.scheme | default "https" }}://{{ $.Values.ingress.host }}/data/{{ $storage.bucket_name }}/
|
||||
{{- else }}
|
||||
STORE_ACCESS_ENDPOINT_URL: "{{ $storage.endpoint_url }}"
|
||||
{{- end }}
|
||||
|
||||
{{- end }}
|
||||
|
@ -57,7 +57,7 @@ redis_url: "redis://local-redis.default:6379/1"
|
||||
# =========================================
|
||||
|
||||
crawler_image: "webrecorder/browsertrix-crawler:latest"
|
||||
crawler_pull_policy: "Never"
|
||||
crawler_pull_policy: "IfNotPresent"
|
||||
|
||||
crawler_namespace: "crawlers"
|
||||
|
||||
@ -68,19 +68,6 @@ crawl_retries: 1
|
||||
crawler_args: "--timeout 90 --logging stats,behaviors,debug --generateWACZ --screencastPort 9037"
|
||||
|
||||
|
||||
|
||||
# Storage
|
||||
# =========================================
|
||||
|
||||
storages:
|
||||
- name: "default"
|
||||
access_key: "ADMIN"
|
||||
secret_key: "PASSW0RD"
|
||||
bucket_name: "test-bucket"
|
||||
|
||||
endpoint_url: "http://local-minio.default:9000/"
|
||||
|
||||
|
||||
# Local Minio Pod (optional)
|
||||
# =========================================
|
||||
# set to true to use a local minio image
|
||||
@ -93,6 +80,21 @@ minio_image: minio/minio
|
||||
minio_mc_image: minio/mc
|
||||
minio_pull_policy: "IfNotPresent"
|
||||
|
||||
minio_local_bucket_name: &local_bucket_name "test-bucket"
|
||||
|
||||
|
||||
# Storage
|
||||
# =========================================
|
||||
# should include the local minio bucket, if enabled, and any other available buckets for default storage
|
||||
|
||||
storages:
|
||||
- name: "default"
|
||||
access_key: "ADMIN"
|
||||
secret_key: "PASSW0RD"
|
||||
bucket_name: *local_bucket_name
|
||||
|
||||
endpoint_url: "http://local-minio.default:9000/"
|
||||
|
||||
|
||||
# Deployment options
|
||||
# =========================================
|
||||
|
@ -3,7 +3,7 @@ version: '3.5'
|
||||
services:
|
||||
backend:
|
||||
build: ./backend
|
||||
image: webrecorder/browsertrix-api
|
||||
image: registry.digitalocean.com/btrix/webrecorder/browsertrix-api
|
||||
ports:
|
||||
- 8000:8000
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user