storage: use s3v4 signature for presigning urls (#2611)
Use V4 ('s3v4') signature version for for all presigning URLs to support backblaze, fixes #2472 - add 'access_addressing_style' to be able to choose virtual/path addressing for access endpoint (default to 'virtual' as before) - fix minio presigning with v4 by using 'path' addressing style for minio - if path matches '/data/' for internal minio bucket, then always use 'path' - also make minio access path '/data/' configurable also simplify running in any namespace with default settings: - don't hardcode 'local-minio.default' - in crawlers namespace, add a 'local-minio' externalName service which maps to the main namespace service.
This commit is contained in:
parent
4b1e416eb6
commit
f1fd11c031
@ -1666,6 +1666,7 @@ class S3StorageIn(BaseModel):
|
|||||||
endpoint_url: str
|
endpoint_url: str
|
||||||
bucket: str
|
bucket: str
|
||||||
access_endpoint_url: Optional[str] = None
|
access_endpoint_url: Optional[str] = None
|
||||||
|
access_addressing_style: Literal["virtual", "path"] = "virtual"
|
||||||
region: str = ""
|
region: str = ""
|
||||||
|
|
||||||
|
|
||||||
@ -1680,6 +1681,7 @@ class S3Storage(BaseModel):
|
|||||||
access_key: str
|
access_key: str
|
||||||
secret_key: str
|
secret_key: str
|
||||||
access_endpoint_url: str
|
access_endpoint_url: str
|
||||||
|
access_addressing_style: Literal["virtual", "path"] = "virtual"
|
||||||
region: str = ""
|
region: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
@ -70,7 +70,7 @@ CHUNK_SIZE = 1024 * 256
|
|||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# pylint: disable=broad-except,raise-missing-from
|
# pylint: disable=broad-except,raise-missing-from,too-many-instance-attributes
|
||||||
class StorageOps:
|
class StorageOps:
|
||||||
"""All storage handling, download/upload operations"""
|
"""All storage handling, download/upload operations"""
|
||||||
|
|
||||||
@ -104,6 +104,8 @@ class StorageOps:
|
|||||||
default_namespace = os.environ.get("DEFAULT_NAMESPACE", "default")
|
default_namespace = os.environ.get("DEFAULT_NAMESPACE", "default")
|
||||||
self.frontend_origin = f"{frontend_origin}.{default_namespace}"
|
self.frontend_origin = f"{frontend_origin}.{default_namespace}"
|
||||||
|
|
||||||
|
self.local_minio_access_path = os.environ.get("LOCAL_MINIO_ACCESS_PATH")
|
||||||
|
|
||||||
with open(os.environ["STORAGES_JSON"], encoding="utf-8") as fh:
|
with open(os.environ["STORAGES_JSON"], encoding="utf-8") as fh:
|
||||||
storage_list = json.loads(fh.read())
|
storage_list = json.loads(fh.read())
|
||||||
|
|
||||||
@ -158,6 +160,10 @@ class StorageOps:
|
|||||||
|
|
||||||
access_endpoint_url = storage.get("access_endpoint_url") or endpoint_url
|
access_endpoint_url = storage.get("access_endpoint_url") or endpoint_url
|
||||||
|
|
||||||
|
addressing_style = storage.get("access_addressing_style", "virtual")
|
||||||
|
if access_endpoint_url == self.local_minio_access_path:
|
||||||
|
addressing_style = "path"
|
||||||
|
|
||||||
return S3Storage(
|
return S3Storage(
|
||||||
access_key=storage["access_key"],
|
access_key=storage["access_key"],
|
||||||
secret_key=storage["secret_key"],
|
secret_key=storage["secret_key"],
|
||||||
@ -165,6 +171,7 @@ class StorageOps:
|
|||||||
endpoint_url=endpoint_url,
|
endpoint_url=endpoint_url,
|
||||||
endpoint_no_bucket_url=endpoint_no_bucket_url,
|
endpoint_no_bucket_url=endpoint_no_bucket_url,
|
||||||
access_endpoint_url=access_endpoint_url,
|
access_endpoint_url=access_endpoint_url,
|
||||||
|
access_addressing_style=addressing_style,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def add_custom_storage(
|
async def add_custom_storage(
|
||||||
@ -189,6 +196,7 @@ class StorageOps:
|
|||||||
endpoint_url=endpoint_url,
|
endpoint_url=endpoint_url,
|
||||||
endpoint_no_bucket_url=endpoint_no_bucket_url,
|
endpoint_no_bucket_url=endpoint_no_bucket_url,
|
||||||
access_endpoint_url=storagein.access_endpoint_url or storagein.endpoint_url,
|
access_endpoint_url=storagein.access_endpoint_url or storagein.endpoint_url,
|
||||||
|
access_addressing_style=storagein.access_addressing_style,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -291,9 +299,12 @@ class StorageOps:
|
|||||||
|
|
||||||
session = aiobotocore.session.get_session()
|
session = aiobotocore.session.get_session()
|
||||||
|
|
||||||
config = None
|
s3 = None
|
||||||
|
|
||||||
if for_presign and storage.access_endpoint_url != storage.endpoint_url:
|
if for_presign and storage.access_endpoint_url != storage.endpoint_url:
|
||||||
config = AioConfig(s3={"addressing_style": "virtual"})
|
s3 = {"addressing_style": storage.access_addressing_style}
|
||||||
|
|
||||||
|
config = AioConfig(signature_version="s3v4", s3=s3)
|
||||||
|
|
||||||
async with session.create_client(
|
async with session.create_client(
|
||||||
"s3",
|
"s3",
|
||||||
@ -498,9 +509,12 @@ class StorageOps:
|
|||||||
s3storage.access_endpoint_url
|
s3storage.access_endpoint_url
|
||||||
and s3storage.access_endpoint_url != s3storage.endpoint_url
|
and s3storage.access_endpoint_url != s3storage.endpoint_url
|
||||||
):
|
):
|
||||||
|
virtual = s3storage.access_addressing_style == "virtual"
|
||||||
parts = urlsplit(s3storage.endpoint_url)
|
parts = urlsplit(s3storage.endpoint_url)
|
||||||
host_endpoint_url = (
|
host_endpoint_url = (
|
||||||
f"{parts.scheme}://{bucket}.{parts.netloc}/{orig_key}"
|
f"{parts.scheme}://{bucket}.{parts.netloc}/{orig_key}"
|
||||||
|
if virtual
|
||||||
|
else f"{parts.scheme}://{parts.netloc}/{bucket}/{orig_key}"
|
||||||
)
|
)
|
||||||
presigned_url = presigned_url.replace(
|
presigned_url = presigned_url.replace(
|
||||||
host_endpoint_url, s3storage.access_endpoint_url
|
host_endpoint_url, s3storage.access_endpoint_url
|
||||||
|
@ -14,7 +14,7 @@ data:
|
|||||||
|
|
||||||
FRONTEND_ORIGIN: {{ .Values.frontend_alias | default "http://browsertrix-cloud-frontend" }}
|
FRONTEND_ORIGIN: {{ .Values.frontend_alias | default "http://browsertrix-cloud-frontend" }}
|
||||||
|
|
||||||
CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}.svc.cluster.local"
|
CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}{{ .Values.fqdn_suffix }}"
|
||||||
|
|
||||||
DEFAULT_ORG: "{{ .Values.default_org }}"
|
DEFAULT_ORG: "{{ .Values.default_org }}"
|
||||||
|
|
||||||
@ -53,6 +53,8 @@ data:
|
|||||||
|
|
||||||
IS_LOCAL_MINIO: "{{ .Values.minio_local }}"
|
IS_LOCAL_MINIO: "{{ .Values.minio_local }}"
|
||||||
|
|
||||||
|
LOCAL_MINIO_ACCESS_PATH: "{{ .Values.minio_access_path }}"
|
||||||
|
|
||||||
STORAGES_JSON: "/ops-configs/storages.json"
|
STORAGES_JSON: "/ops-configs/storages.json"
|
||||||
|
|
||||||
CRAWLER_CHANNELS_JSON: "/ops-configs/crawler_channels.json"
|
CRAWLER_CHANNELS_JSON: "/ops-configs/crawler_channels.json"
|
||||||
|
@ -41,7 +41,7 @@ spec:
|
|||||||
value: {{ .Values.name }}-backend
|
value: {{ .Values.name }}-backend
|
||||||
|
|
||||||
- name: CRAWLER_FQDN_SUFFIX
|
- name: CRAWLER_FQDN_SUFFIX
|
||||||
value: ".{{ .Values.crawler_namespace }}.svc.cluster.local"
|
value: ".{{ .Values.crawler_namespace }}{{ .Values.fqdn_suffix }}"
|
||||||
|
|
||||||
- name: NGINX_ENTRYPOINT_WORKER_PROCESSES_AUTOTUNE
|
- name: NGINX_ENTRYPOINT_WORKER_PROCESSES_AUTOTUNE
|
||||||
value: "1"
|
value: "1"
|
||||||
@ -60,7 +60,10 @@ spec:
|
|||||||
|
|
||||||
- name: LOCAL_BUCKET
|
- name: LOCAL_BUCKET
|
||||||
value: "{{ .Values.minio_local_bucket_name }}"
|
value: "{{ .Values.minio_local_bucket_name }}"
|
||||||
{{- end }}
|
|
||||||
|
- name: LOCAL_ACCESS_PATH
|
||||||
|
value: "{{ .Values.minio_access_path }}"
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
{{- if .Values.inject_extra }}
|
{{- if .Values.inject_extra }}
|
||||||
- name: INJECT_EXTRA
|
- name: INJECT_EXTRA
|
||||||
|
@ -136,6 +136,23 @@ spec:
|
|||||||
{{- end }}
|
{{- end }}
|
||||||
name: minio
|
name: minio
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
|
||||||
|
metadata:
|
||||||
|
namespace: {{ .Values.crawler_namespace }}
|
||||||
|
name: local-minio
|
||||||
|
labels:
|
||||||
|
app: local-minio
|
||||||
|
|
||||||
|
spec:
|
||||||
|
type: ExternalName
|
||||||
|
externalName: "local-minio.{{ .Release.Namespace }}{{ .Values.fqdn_suffix }}"
|
||||||
|
ports:
|
||||||
|
- port: 9000
|
||||||
|
|
||||||
|
|
||||||
{{- if .Values.minio_local_console_port }}
|
{{- if .Values.minio_local_console_port }}
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
|
@ -398,6 +398,9 @@ minio_pull_policy: "IfNotPresent"
|
|||||||
|
|
||||||
minio_local_bucket_name: &local_bucket_name "btrix-data"
|
minio_local_bucket_name: &local_bucket_name "btrix-data"
|
||||||
|
|
||||||
|
# path for serving from local minio bucket
|
||||||
|
minio_access_path: &minio_access_path "/data/"
|
||||||
|
|
||||||
minio_cpu: "10m"
|
minio_cpu: "10m"
|
||||||
minio_memory: "1024Mi"
|
minio_memory: "1024Mi"
|
||||||
|
|
||||||
@ -413,8 +416,8 @@ storages:
|
|||||||
secret_key: "PASSW0RD"
|
secret_key: "PASSW0RD"
|
||||||
bucket_name: *local_bucket_name
|
bucket_name: *local_bucket_name
|
||||||
|
|
||||||
endpoint_url: "http://local-minio.default:9000/"
|
endpoint_url: "http://local-minio:9000/"
|
||||||
access_endpoint_url: "/data/"
|
access_endpoint_url: *minio_access_path
|
||||||
|
|
||||||
|
|
||||||
# optional: duration in minutes for WACZ download links to be valid
|
# optional: duration in minutes for WACZ download links to be valid
|
||||||
@ -495,6 +498,9 @@ signer_memory: "50Mi"
|
|||||||
# Other Settings
|
# Other Settings
|
||||||
# =========================================
|
# =========================================
|
||||||
|
|
||||||
|
# default FQDN suffix, shouldn't need to change
|
||||||
|
fqdn_suffix: .svc.cluster.local
|
||||||
|
|
||||||
# Optional: configure load balancing annotations
|
# Optional: configure load balancing annotations
|
||||||
# service:
|
# service:
|
||||||
# annotations:
|
# annotations:
|
||||||
|
@ -7,7 +7,9 @@ if [ -z "$LOCAL_MINIO_HOST" ]; then
|
|||||||
echo "no local minio, clearing out minio route"
|
echo "no local minio, clearing out minio route"
|
||||||
echo "" >/etc/nginx/includes/minio.conf
|
echo "" >/etc/nginx/includes/minio.conf
|
||||||
else
|
else
|
||||||
echo "local minio: replacing \$LOCAL_MINIO_HOST with \"$LOCAL_MINIO_HOST\", \$LOCAL_BUCKET with \"$LOCAL_BUCKET\""
|
LOCAL_ACCESS_PATH=$(printf '%s\n' "$LOCAL_ACCESS_PATH" | sed -e 's/[\/&]/\\&/g')
|
||||||
|
echo "local minio: replacing \$LOCAL_MINIO_HOST with \"$LOCAL_MINIO_HOST\", \$LOCAL_BUCKET with \"$LOCAL_BUCKET\", \$LOCAL_ACCESS_PATH with \"$LOCAL_ACCESS_PATH\""
|
||||||
|
sed -i "s/\$LOCAL_ACCESS_PATH/$LOCAL_ACCESS_PATH/g" /etc/nginx/includes/minio.conf
|
||||||
sed -i "s/\$LOCAL_MINIO_HOST/$LOCAL_MINIO_HOST/g" /etc/nginx/includes/minio.conf
|
sed -i "s/\$LOCAL_MINIO_HOST/$LOCAL_MINIO_HOST/g" /etc/nginx/includes/minio.conf
|
||||||
sed -i "s/\$LOCAL_BUCKET/$LOCAL_BUCKET/g" /etc/nginx/includes/minio.conf
|
sed -i "s/\$LOCAL_BUCKET/$LOCAL_BUCKET/g" /etc/nginx/includes/minio.conf
|
||||||
fi
|
fi
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
location /data/ {
|
location $LOCAL_ACCESS_PATH {
|
||||||
proxy_pass http://$LOCAL_MINIO_HOST/$LOCAL_BUCKET/;
|
proxy_pass http://$LOCAL_MINIO_HOST/$LOCAL_BUCKET/;
|
||||||
proxy_redirect off;
|
proxy_redirect off;
|
||||||
proxy_buffering off;
|
proxy_buffering off;
|
||||||
|
Loading…
Reference in New Issue
Block a user