frontend: configure running with a fixed 'replay.json', auth headers passed via separate config (#899)

wabac.js will reload the replay.json on 403 with new token (will be in next version of wabac.js)
presign urls: make presign timeout configurable (in minutes), defaults to 60 mins
dockerfile: fix configuring RWP_BASE_URL
This commit is contained in:
Ilya Kreymer 2023-06-08 11:26:26 -07:00 committed by GitHub
parent d286555396
commit 4428184aea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 16 additions and 11 deletions

View File

@ -220,7 +220,9 @@ class CrawlOps:
self.crawl_configs.set_crawl_ops(self) self.crawl_configs.set_crawl_ops(self)
self.presign_duration = int(os.environ.get("PRESIGN_DURATION_SECONDS", 3600)) self.presign_duration_seconds = (
int(os.environ.get("PRESIGN_DURATION_MINUTES", 60)) * 60
)
async def init_index(self): async def init_index(self):
"""init index for crawls db collection""" """init index for crawls db collection"""
@ -463,7 +465,7 @@ class CrawlOps:
print("no files") print("no files")
return return
delta = timedelta(seconds=self.presign_duration) delta = timedelta(seconds=self.presign_duration_seconds)
updates = [] updates = []
out_files = [] out_files = []
@ -475,7 +477,7 @@ class CrawlOps:
if not presigned_url or now >= file_.expireAt: if not presigned_url or now >= file_.expireAt:
exp = now + delta exp = now + delta
presigned_url = await get_presigned_url( presigned_url = await get_presigned_url(
org, file_, self.crawl_manager, self.presign_duration org, file_, self.crawl_manager, self.presign_duration_seconds
) )
updates.append( updates.append(
( (

View File

@ -62,6 +62,8 @@ data:
RERUN_LAST_MIGRATION: "{{ .Values.rerun_last_migration }}" RERUN_LAST_MIGRATION: "{{ .Values.rerun_last_migration }}"
PRESIGN_DURATION_MINUTES: "{{ .Values.storage_presign_duration_minutes | default 60 }}"
--- ---
apiVersion: v1 apiVersion: v1

View File

@ -1,7 +1,4 @@
# syntax=docker/dockerfile:1.4 # syntax=docker/dockerfile:1.4
# central place to configure the production replayweb.page loading prefix
ARG RWP_BASE_URL=https://cdn.jsdelivr.net/npm/replaywebpage/
FROM docker.io/library/node:16 as build_deps FROM docker.io/library/node:16 as build_deps
WORKDIR /app WORKDIR /app
@ -30,10 +27,12 @@ COPY --link src ./src/
ARG GIT_COMMIT_HASH ARG GIT_COMMIT_HASH
ARG GIT_BRANCH_NAME ARG GIT_BRANCH_NAME
ARG VERSION ARG VERSION
ARG RWP_BASE_URL=https://cdn.jsdelivr.net/npm/replaywebpage/
ENV GIT_COMMIT_HASH=${GIT_COMMIT_HASH} \ ENV GIT_COMMIT_HASH=${GIT_COMMIT_HASH} \
GIT_BRANCH_NAME=${GIT_BRANCH_NAME} \ GIT_BRANCH_NAME=${GIT_BRANCH_NAME} \
VERSION=${VERSION} VERSION=${VERSION} \
RWP_BASE_URL=${RWP_BASE_URL}
# Prevent Docker caching node_modules # Prevent Docker caching node_modules
RUN yarn build && \ RUN yarn build && \

View File

@ -571,11 +571,12 @@ export class CrawlDetail extends LiteElement {
} }
private renderReplay() { private renderReplay() {
const bearer = this.authState?.headers?.Authorization?.split(" ", 2)[1]; //const replaySource = `/api/orgs/${this.crawl?.oid}/crawls/${this.crawlId}/replay.json?auth_bearer=${bearer}`;
const replaySource = `/api/orgs/${this.crawl?.oid}/crawls/${this.crawlId}/replay.json`;
// for now, just use the first file until multi-wacz support is fully implemented const headers = this.authState?.headers;
const replaySource = `/api/orgs/${this.crawl?.oid}/crawls/${this.crawlId}/replay.json?auth_bearer=${bearer}`;
//const replaySource = this.crawl?.resources?.[0]?.path; const config = JSON.stringify({headers});
const canReplay = replaySource && this.hasFiles; const canReplay = replaySource && this.hasFiles;
@ -587,6 +588,7 @@ export class CrawlDetail extends LiteElement {
<replay-web-page <replay-web-page
source="${replaySource}" source="${replaySource}"
coll="${ifDefined(this.crawl?.id)}" coll="${ifDefined(this.crawl?.id)}"
config="${config}"
replayBase="/replay/" replayBase="/replay/"
noSandbox="true" noSandbox="true"
noCache="true" noCache="true"