storage: use s3v4 signature for presigning urls (#2611)
Use V4 ('s3v4') signature version for for all presigning URLs to support backblaze, fixes #2472 - add 'access_addressing_style' to be able to choose virtual/path addressing for access endpoint (default to 'virtual' as before) - fix minio presigning with v4 by using 'path' addressing style for minio - if path matches '/data/' for internal minio bucket, then always use 'path' - also make minio access path '/data/' configurable also simplify running in any namespace with default settings: - don't hardcode 'local-minio.default' - in crawlers namespace, add a 'local-minio' externalName service which maps to the main namespace service.
This commit is contained in:
parent
4b1e416eb6
commit
f1fd11c031
@ -1666,6 +1666,7 @@ class S3StorageIn(BaseModel):
|
||||
endpoint_url: str
|
||||
bucket: str
|
||||
access_endpoint_url: Optional[str] = None
|
||||
access_addressing_style: Literal["virtual", "path"] = "virtual"
|
||||
region: str = ""
|
||||
|
||||
|
||||
@ -1680,6 +1681,7 @@ class S3Storage(BaseModel):
|
||||
access_key: str
|
||||
secret_key: str
|
||||
access_endpoint_url: str
|
||||
access_addressing_style: Literal["virtual", "path"] = "virtual"
|
||||
region: str = ""
|
||||
|
||||
|
||||
|
@ -70,7 +70,7 @@ CHUNK_SIZE = 1024 * 256
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# pylint: disable=broad-except,raise-missing-from
|
||||
# pylint: disable=broad-except,raise-missing-from,too-many-instance-attributes
|
||||
class StorageOps:
|
||||
"""All storage handling, download/upload operations"""
|
||||
|
||||
@ -104,6 +104,8 @@ class StorageOps:
|
||||
default_namespace = os.environ.get("DEFAULT_NAMESPACE", "default")
|
||||
self.frontend_origin = f"{frontend_origin}.{default_namespace}"
|
||||
|
||||
self.local_minio_access_path = os.environ.get("LOCAL_MINIO_ACCESS_PATH")
|
||||
|
||||
with open(os.environ["STORAGES_JSON"], encoding="utf-8") as fh:
|
||||
storage_list = json.loads(fh.read())
|
||||
|
||||
@ -158,6 +160,10 @@ class StorageOps:
|
||||
|
||||
access_endpoint_url = storage.get("access_endpoint_url") or endpoint_url
|
||||
|
||||
addressing_style = storage.get("access_addressing_style", "virtual")
|
||||
if access_endpoint_url == self.local_minio_access_path:
|
||||
addressing_style = "path"
|
||||
|
||||
return S3Storage(
|
||||
access_key=storage["access_key"],
|
||||
secret_key=storage["secret_key"],
|
||||
@ -165,6 +171,7 @@ class StorageOps:
|
||||
endpoint_url=endpoint_url,
|
||||
endpoint_no_bucket_url=endpoint_no_bucket_url,
|
||||
access_endpoint_url=access_endpoint_url,
|
||||
access_addressing_style=addressing_style,
|
||||
)
|
||||
|
||||
async def add_custom_storage(
|
||||
@ -189,6 +196,7 @@ class StorageOps:
|
||||
endpoint_url=endpoint_url,
|
||||
endpoint_no_bucket_url=endpoint_no_bucket_url,
|
||||
access_endpoint_url=storagein.access_endpoint_url or storagein.endpoint_url,
|
||||
access_addressing_style=storagein.access_addressing_style,
|
||||
)
|
||||
|
||||
try:
|
||||
@ -291,9 +299,12 @@ class StorageOps:
|
||||
|
||||
session = aiobotocore.session.get_session()
|
||||
|
||||
config = None
|
||||
s3 = None
|
||||
|
||||
if for_presign and storage.access_endpoint_url != storage.endpoint_url:
|
||||
config = AioConfig(s3={"addressing_style": "virtual"})
|
||||
s3 = {"addressing_style": storage.access_addressing_style}
|
||||
|
||||
config = AioConfig(signature_version="s3v4", s3=s3)
|
||||
|
||||
async with session.create_client(
|
||||
"s3",
|
||||
@ -498,9 +509,12 @@ class StorageOps:
|
||||
s3storage.access_endpoint_url
|
||||
and s3storage.access_endpoint_url != s3storage.endpoint_url
|
||||
):
|
||||
virtual = s3storage.access_addressing_style == "virtual"
|
||||
parts = urlsplit(s3storage.endpoint_url)
|
||||
host_endpoint_url = (
|
||||
f"{parts.scheme}://{bucket}.{parts.netloc}/{orig_key}"
|
||||
if virtual
|
||||
else f"{parts.scheme}://{parts.netloc}/{bucket}/{orig_key}"
|
||||
)
|
||||
presigned_url = presigned_url.replace(
|
||||
host_endpoint_url, s3storage.access_endpoint_url
|
||||
|
@ -14,7 +14,7 @@ data:
|
||||
|
||||
FRONTEND_ORIGIN: {{ .Values.frontend_alias | default "http://browsertrix-cloud-frontend" }}
|
||||
|
||||
CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}.svc.cluster.local"
|
||||
CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}{{ .Values.fqdn_suffix }}"
|
||||
|
||||
DEFAULT_ORG: "{{ .Values.default_org }}"
|
||||
|
||||
@ -53,6 +53,8 @@ data:
|
||||
|
||||
IS_LOCAL_MINIO: "{{ .Values.minio_local }}"
|
||||
|
||||
LOCAL_MINIO_ACCESS_PATH: "{{ .Values.minio_access_path }}"
|
||||
|
||||
STORAGES_JSON: "/ops-configs/storages.json"
|
||||
|
||||
CRAWLER_CHANNELS_JSON: "/ops-configs/crawler_channels.json"
|
||||
|
@ -41,7 +41,7 @@ spec:
|
||||
value: {{ .Values.name }}-backend
|
||||
|
||||
- name: CRAWLER_FQDN_SUFFIX
|
||||
value: ".{{ .Values.crawler_namespace }}.svc.cluster.local"
|
||||
value: ".{{ .Values.crawler_namespace }}{{ .Values.fqdn_suffix }}"
|
||||
|
||||
- name: NGINX_ENTRYPOINT_WORKER_PROCESSES_AUTOTUNE
|
||||
value: "1"
|
||||
@ -60,7 +60,10 @@ spec:
|
||||
|
||||
- name: LOCAL_BUCKET
|
||||
value: "{{ .Values.minio_local_bucket_name }}"
|
||||
{{- end }}
|
||||
|
||||
- name: LOCAL_ACCESS_PATH
|
||||
value: "{{ .Values.minio_access_path }}"
|
||||
{{- end }}
|
||||
|
||||
{{- if .Values.inject_extra }}
|
||||
- name: INJECT_EXTRA
|
||||
|
@ -136,6 +136,23 @@ spec:
|
||||
{{- end }}
|
||||
name: minio
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
|
||||
metadata:
|
||||
namespace: {{ .Values.crawler_namespace }}
|
||||
name: local-minio
|
||||
labels:
|
||||
app: local-minio
|
||||
|
||||
spec:
|
||||
type: ExternalName
|
||||
externalName: "local-minio.{{ .Release.Namespace }}{{ .Values.fqdn_suffix }}"
|
||||
ports:
|
||||
- port: 9000
|
||||
|
||||
|
||||
{{- if .Values.minio_local_console_port }}
|
||||
---
|
||||
apiVersion: v1
|
||||
|
@ -398,6 +398,9 @@ minio_pull_policy: "IfNotPresent"
|
||||
|
||||
minio_local_bucket_name: &local_bucket_name "btrix-data"
|
||||
|
||||
# path for serving from local minio bucket
|
||||
minio_access_path: &minio_access_path "/data/"
|
||||
|
||||
minio_cpu: "10m"
|
||||
minio_memory: "1024Mi"
|
||||
|
||||
@ -413,8 +416,8 @@ storages:
|
||||
secret_key: "PASSW0RD"
|
||||
bucket_name: *local_bucket_name
|
||||
|
||||
endpoint_url: "http://local-minio.default:9000/"
|
||||
access_endpoint_url: "/data/"
|
||||
endpoint_url: "http://local-minio:9000/"
|
||||
access_endpoint_url: *minio_access_path
|
||||
|
||||
|
||||
# optional: duration in minutes for WACZ download links to be valid
|
||||
@ -495,6 +498,9 @@ signer_memory: "50Mi"
|
||||
# Other Settings
|
||||
# =========================================
|
||||
|
||||
# default FQDN suffix, shouldn't need to change
|
||||
fqdn_suffix: .svc.cluster.local
|
||||
|
||||
# Optional: configure load balancing annotations
|
||||
# service:
|
||||
# annotations:
|
||||
|
@ -7,7 +7,9 @@ if [ -z "$LOCAL_MINIO_HOST" ]; then
|
||||
echo "no local minio, clearing out minio route"
|
||||
echo "" >/etc/nginx/includes/minio.conf
|
||||
else
|
||||
echo "local minio: replacing \$LOCAL_MINIO_HOST with \"$LOCAL_MINIO_HOST\", \$LOCAL_BUCKET with \"$LOCAL_BUCKET\""
|
||||
LOCAL_ACCESS_PATH=$(printf '%s\n' "$LOCAL_ACCESS_PATH" | sed -e 's/[\/&]/\\&/g')
|
||||
echo "local minio: replacing \$LOCAL_MINIO_HOST with \"$LOCAL_MINIO_HOST\", \$LOCAL_BUCKET with \"$LOCAL_BUCKET\", \$LOCAL_ACCESS_PATH with \"$LOCAL_ACCESS_PATH\""
|
||||
sed -i "s/\$LOCAL_ACCESS_PATH/$LOCAL_ACCESS_PATH/g" /etc/nginx/includes/minio.conf
|
||||
sed -i "s/\$LOCAL_MINIO_HOST/$LOCAL_MINIO_HOST/g" /etc/nginx/includes/minio.conf
|
||||
sed -i "s/\$LOCAL_BUCKET/$LOCAL_BUCKET/g" /etc/nginx/includes/minio.conf
|
||||
fi
|
||||
|
@ -1,4 +1,4 @@
|
||||
location /data/ {
|
||||
location $LOCAL_ACCESS_PATH {
|
||||
proxy_pass http://$LOCAL_MINIO_HOST/$LOCAL_BUCKET/;
|
||||
proxy_redirect off;
|
||||
proxy_buffering off;
|
||||
|
Loading…
Reference in New Issue
Block a user