diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 27405b45..9f8c264b 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -1666,6 +1666,7 @@ class S3StorageIn(BaseModel): endpoint_url: str bucket: str access_endpoint_url: Optional[str] = None + access_addressing_style: Literal["virtual", "path"] = "virtual" region: str = "" @@ -1680,6 +1681,7 @@ class S3Storage(BaseModel): access_key: str secret_key: str access_endpoint_url: str + access_addressing_style: Literal["virtual", "path"] = "virtual" region: str = "" diff --git a/backend/btrixcloud/storages.py b/backend/btrixcloud/storages.py index 1e585217..c944e8e3 100644 --- a/backend/btrixcloud/storages.py +++ b/backend/btrixcloud/storages.py @@ -70,7 +70,7 @@ CHUNK_SIZE = 1024 * 256 # ============================================================================ -# pylint: disable=broad-except,raise-missing-from +# pylint: disable=broad-except,raise-missing-from,too-many-instance-attributes class StorageOps: """All storage handling, download/upload operations""" @@ -104,6 +104,8 @@ class StorageOps: default_namespace = os.environ.get("DEFAULT_NAMESPACE", "default") self.frontend_origin = f"{frontend_origin}.{default_namespace}" + self.local_minio_access_path = os.environ.get("LOCAL_MINIO_ACCESS_PATH") + with open(os.environ["STORAGES_JSON"], encoding="utf-8") as fh: storage_list = json.loads(fh.read()) @@ -158,6 +160,10 @@ class StorageOps: access_endpoint_url = storage.get("access_endpoint_url") or endpoint_url + addressing_style = storage.get("access_addressing_style", "virtual") + if access_endpoint_url == self.local_minio_access_path: + addressing_style = "path" + return S3Storage( access_key=storage["access_key"], secret_key=storage["secret_key"], @@ -165,6 +171,7 @@ class StorageOps: endpoint_url=endpoint_url, endpoint_no_bucket_url=endpoint_no_bucket_url, access_endpoint_url=access_endpoint_url, + access_addressing_style=addressing_style, ) async def add_custom_storage( @@ -189,6 +196,7 @@ class StorageOps: endpoint_url=endpoint_url, endpoint_no_bucket_url=endpoint_no_bucket_url, access_endpoint_url=storagein.access_endpoint_url or storagein.endpoint_url, + access_addressing_style=storagein.access_addressing_style, ) try: @@ -291,9 +299,12 @@ class StorageOps: session = aiobotocore.session.get_session() - config = None + s3 = None + if for_presign and storage.access_endpoint_url != storage.endpoint_url: - config = AioConfig(s3={"addressing_style": "virtual"}) + s3 = {"addressing_style": storage.access_addressing_style} + + config = AioConfig(signature_version="s3v4", s3=s3) async with session.create_client( "s3", @@ -498,9 +509,12 @@ class StorageOps: s3storage.access_endpoint_url and s3storage.access_endpoint_url != s3storage.endpoint_url ): + virtual = s3storage.access_addressing_style == "virtual" parts = urlsplit(s3storage.endpoint_url) host_endpoint_url = ( f"{parts.scheme}://{bucket}.{parts.netloc}/{orig_key}" + if virtual + else f"{parts.scheme}://{parts.netloc}/{bucket}/{orig_key}" ) presigned_url = presigned_url.replace( host_endpoint_url, s3storage.access_endpoint_url diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index 185684a0..125d57ee 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -14,7 +14,7 @@ data: FRONTEND_ORIGIN: {{ .Values.frontend_alias | default "http://browsertrix-cloud-frontend" }} - CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}.svc.cluster.local" + CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}{{ .Values.fqdn_suffix }}" DEFAULT_ORG: "{{ .Values.default_org }}" @@ -53,6 +53,8 @@ data: IS_LOCAL_MINIO: "{{ .Values.minio_local }}" + LOCAL_MINIO_ACCESS_PATH: "{{ .Values.minio_access_path }}" + STORAGES_JSON: "/ops-configs/storages.json" CRAWLER_CHANNELS_JSON: "/ops-configs/crawler_channels.json" diff --git a/chart/templates/frontend.yaml b/chart/templates/frontend.yaml index d90ddd7a..8d060f6a 100644 --- a/chart/templates/frontend.yaml +++ b/chart/templates/frontend.yaml @@ -41,7 +41,7 @@ spec: value: {{ .Values.name }}-backend - name: CRAWLER_FQDN_SUFFIX - value: ".{{ .Values.crawler_namespace }}.svc.cluster.local" + value: ".{{ .Values.crawler_namespace }}{{ .Values.fqdn_suffix }}" - name: NGINX_ENTRYPOINT_WORKER_PROCESSES_AUTOTUNE value: "1" @@ -60,7 +60,10 @@ spec: - name: LOCAL_BUCKET value: "{{ .Values.minio_local_bucket_name }}" - {{- end }} + + - name: LOCAL_ACCESS_PATH + value: "{{ .Values.minio_access_path }}" + {{- end }} {{- if .Values.inject_extra }} - name: INJECT_EXTRA diff --git a/chart/templates/minio.yaml b/chart/templates/minio.yaml index 912da090..8bd5498f 100644 --- a/chart/templates/minio.yaml +++ b/chart/templates/minio.yaml @@ -136,6 +136,23 @@ spec: {{- end }} name: minio +--- +apiVersion: v1 +kind: Service + +metadata: + namespace: {{ .Values.crawler_namespace }} + name: local-minio + labels: + app: local-minio + +spec: + type: ExternalName + externalName: "local-minio.{{ .Release.Namespace }}{{ .Values.fqdn_suffix }}" + ports: + - port: 9000 + + {{- if .Values.minio_local_console_port }} --- apiVersion: v1 diff --git a/chart/values.yaml b/chart/values.yaml index b7f2f204..62c5c559 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -398,6 +398,9 @@ minio_pull_policy: "IfNotPresent" minio_local_bucket_name: &local_bucket_name "btrix-data" +# path for serving from local minio bucket +minio_access_path: &minio_access_path "/data/" + minio_cpu: "10m" minio_memory: "1024Mi" @@ -413,8 +416,8 @@ storages: secret_key: "PASSW0RD" bucket_name: *local_bucket_name - endpoint_url: "http://local-minio.default:9000/" - access_endpoint_url: "/data/" + endpoint_url: "http://local-minio:9000/" + access_endpoint_url: *minio_access_path # optional: duration in minutes for WACZ download links to be valid @@ -495,6 +498,9 @@ signer_memory: "50Mi" # Other Settings # ========================================= +# default FQDN suffix, shouldn't need to change +fqdn_suffix: .svc.cluster.local + # Optional: configure load balancing annotations # service: # annotations: diff --git a/frontend/00-browsertrix-nginx-init.sh b/frontend/00-browsertrix-nginx-init.sh index a833051d..eb1a5313 100755 --- a/frontend/00-browsertrix-nginx-init.sh +++ b/frontend/00-browsertrix-nginx-init.sh @@ -7,7 +7,9 @@ if [ -z "$LOCAL_MINIO_HOST" ]; then echo "no local minio, clearing out minio route" echo "" >/etc/nginx/includes/minio.conf else - echo "local minio: replacing \$LOCAL_MINIO_HOST with \"$LOCAL_MINIO_HOST\", \$LOCAL_BUCKET with \"$LOCAL_BUCKET\"" + LOCAL_ACCESS_PATH=$(printf '%s\n' "$LOCAL_ACCESS_PATH" | sed -e 's/[\/&]/\\&/g') + echo "local minio: replacing \$LOCAL_MINIO_HOST with \"$LOCAL_MINIO_HOST\", \$LOCAL_BUCKET with \"$LOCAL_BUCKET\", \$LOCAL_ACCESS_PATH with \"$LOCAL_ACCESS_PATH\"" + sed -i "s/\$LOCAL_ACCESS_PATH/$LOCAL_ACCESS_PATH/g" /etc/nginx/includes/minio.conf sed -i "s/\$LOCAL_MINIO_HOST/$LOCAL_MINIO_HOST/g" /etc/nginx/includes/minio.conf sed -i "s/\$LOCAL_BUCKET/$LOCAL_BUCKET/g" /etc/nginx/includes/minio.conf fi diff --git a/frontend/minio.conf b/frontend/minio.conf index 0c9b3a83..14256bba 100644 --- a/frontend/minio.conf +++ b/frontend/minio.conf @@ -1,4 +1,4 @@ -location /data/ { +location $LOCAL_ACCESS_PATH { proxy_pass http://$LOCAL_MINIO_HOST/$LOCAL_BUCKET/; proxy_redirect off; proxy_buffering off;