storage: use s3v4 signature for presigning urls (#2611)
Use V4 ('s3v4') signature version for for all presigning URLs to support
backblaze, fixes #2472
- add 'access_addressing_style' to be able to choose virtual/path
addressing for access endpoint (default to 'virtual' as before)
- fix minio presigning with v4 by using 'path' addressing style for
minio
- if path matches '/data/' for internal minio bucket, then always use
'path'
- also make minio access path '/data/' configurable
also simplify running in any namespace with default settings:
- don't hardcode 'local-minio.default'
- in crawlers namespace, add a 'local-minio' externalName service which
maps to the main namespace service.
			
			
This commit is contained in:
		
							parent
							
								
									4b1e416eb6
								
							
						
					
					
						commit
						f1fd11c031
					
				| @ -1666,6 +1666,7 @@ class S3StorageIn(BaseModel): | ||||
|     endpoint_url: str | ||||
|     bucket: str | ||||
|     access_endpoint_url: Optional[str] = None | ||||
|     access_addressing_style: Literal["virtual", "path"] = "virtual" | ||||
|     region: str = "" | ||||
| 
 | ||||
| 
 | ||||
| @ -1680,6 +1681,7 @@ class S3Storage(BaseModel): | ||||
|     access_key: str | ||||
|     secret_key: str | ||||
|     access_endpoint_url: str | ||||
|     access_addressing_style: Literal["virtual", "path"] = "virtual" | ||||
|     region: str = "" | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -70,7 +70,7 @@ CHUNK_SIZE = 1024 * 256 | ||||
| 
 | ||||
| 
 | ||||
| # ============================================================================ | ||||
| # pylint: disable=broad-except,raise-missing-from | ||||
| # pylint: disable=broad-except,raise-missing-from,too-many-instance-attributes | ||||
| class StorageOps: | ||||
|     """All storage handling, download/upload operations""" | ||||
| 
 | ||||
| @ -104,6 +104,8 @@ class StorageOps: | ||||
|         default_namespace = os.environ.get("DEFAULT_NAMESPACE", "default") | ||||
|         self.frontend_origin = f"{frontend_origin}.{default_namespace}" | ||||
| 
 | ||||
|         self.local_minio_access_path = os.environ.get("LOCAL_MINIO_ACCESS_PATH") | ||||
| 
 | ||||
|         with open(os.environ["STORAGES_JSON"], encoding="utf-8") as fh: | ||||
|             storage_list = json.loads(fh.read()) | ||||
| 
 | ||||
| @ -158,6 +160,10 @@ class StorageOps: | ||||
| 
 | ||||
|         access_endpoint_url = storage.get("access_endpoint_url") or endpoint_url | ||||
| 
 | ||||
|         addressing_style = storage.get("access_addressing_style", "virtual") | ||||
|         if access_endpoint_url == self.local_minio_access_path: | ||||
|             addressing_style = "path" | ||||
| 
 | ||||
|         return S3Storage( | ||||
|             access_key=storage["access_key"], | ||||
|             secret_key=storage["secret_key"], | ||||
| @ -165,6 +171,7 @@ class StorageOps: | ||||
|             endpoint_url=endpoint_url, | ||||
|             endpoint_no_bucket_url=endpoint_no_bucket_url, | ||||
|             access_endpoint_url=access_endpoint_url, | ||||
|             access_addressing_style=addressing_style, | ||||
|         ) | ||||
| 
 | ||||
|     async def add_custom_storage( | ||||
| @ -189,6 +196,7 @@ class StorageOps: | ||||
|             endpoint_url=endpoint_url, | ||||
|             endpoint_no_bucket_url=endpoint_no_bucket_url, | ||||
|             access_endpoint_url=storagein.access_endpoint_url or storagein.endpoint_url, | ||||
|             access_addressing_style=storagein.access_addressing_style, | ||||
|         ) | ||||
| 
 | ||||
|         try: | ||||
| @ -291,9 +299,12 @@ class StorageOps: | ||||
| 
 | ||||
|         session = aiobotocore.session.get_session() | ||||
| 
 | ||||
|         config = None | ||||
|         s3 = None | ||||
| 
 | ||||
|         if for_presign and storage.access_endpoint_url != storage.endpoint_url: | ||||
|             config = AioConfig(s3={"addressing_style": "virtual"}) | ||||
|             s3 = {"addressing_style": storage.access_addressing_style} | ||||
| 
 | ||||
|         config = AioConfig(signature_version="s3v4", s3=s3) | ||||
| 
 | ||||
|         async with session.create_client( | ||||
|             "s3", | ||||
| @ -498,9 +509,12 @@ class StorageOps: | ||||
|                 s3storage.access_endpoint_url | ||||
|                 and s3storage.access_endpoint_url != s3storage.endpoint_url | ||||
|             ): | ||||
|                 virtual = s3storage.access_addressing_style == "virtual" | ||||
|                 parts = urlsplit(s3storage.endpoint_url) | ||||
|                 host_endpoint_url = ( | ||||
|                     f"{parts.scheme}://{bucket}.{parts.netloc}/{orig_key}" | ||||
|                     if virtual | ||||
|                     else f"{parts.scheme}://{parts.netloc}/{bucket}/{orig_key}" | ||||
|                 ) | ||||
|                 presigned_url = presigned_url.replace( | ||||
|                     host_endpoint_url, s3storage.access_endpoint_url | ||||
|  | ||||
| @ -14,7 +14,7 @@ data: | ||||
| 
 | ||||
|   FRONTEND_ORIGIN: {{ .Values.frontend_alias | default "http://browsertrix-cloud-frontend" }} | ||||
| 
 | ||||
|   CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}.svc.cluster.local" | ||||
|   CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}{{ .Values.fqdn_suffix }}" | ||||
| 
 | ||||
|   DEFAULT_ORG: "{{ .Values.default_org }}" | ||||
| 
 | ||||
| @ -53,6 +53,8 @@ data: | ||||
| 
 | ||||
|   IS_LOCAL_MINIO: "{{ .Values.minio_local }}" | ||||
| 
 | ||||
|   LOCAL_MINIO_ACCESS_PATH: "{{ .Values.minio_access_path }}" | ||||
| 
 | ||||
|   STORAGES_JSON: "/ops-configs/storages.json" | ||||
| 
 | ||||
|   CRAWLER_CHANNELS_JSON: "/ops-configs/crawler_channels.json" | ||||
|  | ||||
| @ -41,7 +41,7 @@ spec: | ||||
|               value: {{ .Values.name }}-backend | ||||
| 
 | ||||
|             - name: CRAWLER_FQDN_SUFFIX | ||||
|               value: ".{{ .Values.crawler_namespace }}.svc.cluster.local" | ||||
|               value: ".{{ .Values.crawler_namespace }}{{ .Values.fqdn_suffix }}" | ||||
| 
 | ||||
|             - name: NGINX_ENTRYPOINT_WORKER_PROCESSES_AUTOTUNE | ||||
|               value: "1" | ||||
| @ -60,7 +60,10 @@ spec: | ||||
| 
 | ||||
|             - name: LOCAL_BUCKET | ||||
|               value: "{{ .Values.minio_local_bucket_name }}" | ||||
|             {{- end }} | ||||
| 
 | ||||
|             - name: LOCAL_ACCESS_PATH | ||||
|               value: "{{ .Values.minio_access_path }}" | ||||
|              {{- end }} | ||||
| 
 | ||||
|             {{- if .Values.inject_extra }} | ||||
|             - name: INJECT_EXTRA | ||||
|  | ||||
| @ -136,6 +136,23 @@ spec: | ||||
|       {{- end }} | ||||
|       name: minio | ||||
| 
 | ||||
| --- | ||||
| apiVersion: v1 | ||||
| kind: Service | ||||
| 
 | ||||
| metadata: | ||||
|   namespace: {{ .Values.crawler_namespace }} | ||||
|   name: local-minio | ||||
|   labels: | ||||
|     app: local-minio | ||||
| 
 | ||||
| spec: | ||||
|   type: ExternalName | ||||
|   externalName: "local-minio.{{ .Release.Namespace }}{{ .Values.fqdn_suffix }}" | ||||
|   ports: | ||||
|     - port: 9000 | ||||
| 
 | ||||
| 
 | ||||
| {{- if .Values.minio_local_console_port }} | ||||
| --- | ||||
| apiVersion: v1 | ||||
|  | ||||
| @ -398,6 +398,9 @@ minio_pull_policy: "IfNotPresent" | ||||
| 
 | ||||
| minio_local_bucket_name: &local_bucket_name "btrix-data" | ||||
| 
 | ||||
| # path for serving from local minio bucket | ||||
| minio_access_path: &minio_access_path "/data/" | ||||
| 
 | ||||
| minio_cpu: "10m" | ||||
| minio_memory: "1024Mi" | ||||
| 
 | ||||
| @ -413,8 +416,8 @@ storages: | ||||
|     secret_key: "PASSW0RD" | ||||
|     bucket_name: *local_bucket_name | ||||
| 
 | ||||
|     endpoint_url: "http://local-minio.default:9000/" | ||||
|     access_endpoint_url: "/data/" | ||||
|     endpoint_url: "http://local-minio:9000/" | ||||
|     access_endpoint_url: *minio_access_path | ||||
| 
 | ||||
| 
 | ||||
| # optional: duration in minutes for WACZ download links to be valid | ||||
| @ -495,6 +498,9 @@ signer_memory: "50Mi" | ||||
| # Other Settings | ||||
| # ========================================= | ||||
| 
 | ||||
| # default FQDN suffix, shouldn't need to change | ||||
| fqdn_suffix: .svc.cluster.local | ||||
| 
 | ||||
| # Optional: configure load balancing annotations | ||||
| # service: | ||||
| #   annotations: | ||||
|  | ||||
| @ -7,7 +7,9 @@ if [ -z "$LOCAL_MINIO_HOST" ]; then | ||||
|   echo "no local minio, clearing out minio route" | ||||
|   echo "" >/etc/nginx/includes/minio.conf | ||||
| else | ||||
|   echo "local minio: replacing \$LOCAL_MINIO_HOST with \"$LOCAL_MINIO_HOST\", \$LOCAL_BUCKET with \"$LOCAL_BUCKET\"" | ||||
|   LOCAL_ACCESS_PATH=$(printf '%s\n' "$LOCAL_ACCESS_PATH" | sed -e 's/[\/&]/\\&/g') | ||||
|   echo "local minio: replacing \$LOCAL_MINIO_HOST with \"$LOCAL_MINIO_HOST\", \$LOCAL_BUCKET with \"$LOCAL_BUCKET\", \$LOCAL_ACCESS_PATH with \"$LOCAL_ACCESS_PATH\"" | ||||
|   sed -i "s/\$LOCAL_ACCESS_PATH/$LOCAL_ACCESS_PATH/g" /etc/nginx/includes/minio.conf | ||||
|   sed -i "s/\$LOCAL_MINIO_HOST/$LOCAL_MINIO_HOST/g" /etc/nginx/includes/minio.conf | ||||
|   sed -i "s/\$LOCAL_BUCKET/$LOCAL_BUCKET/g" /etc/nginx/includes/minio.conf | ||||
| fi | ||||
|  | ||||
| @ -1,4 +1,4 @@ | ||||
| location /data/ { | ||||
| location $LOCAL_ACCESS_PATH { | ||||
|   proxy_pass http://$LOCAL_MINIO_HOST/$LOCAL_BUCKET/; | ||||
|   proxy_redirect off; | ||||
|   proxy_buffering off; | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user