VNC-Based Profile Browser (#433)

* profile browser vnc support + fixes:
- switch profile browser rendering to use VNC
- frontend: add @novnc/novnc as dependency, create separate bundle novnc.js to load into vnc browser (to avoid loading from each container)
- frontend: update proxy paths to proxy websocket, index page to crawler
- frontend: allow browser profiles in all browsers, remove browser compatibility check
- frontend: update webpack dev config, apply prettier
- frontend: node version fix
- backend: get vncpassword, build new URL for proxying to crawler iframe
- backend: fix profile / crawl job pull policy from 'Always' -> 'Never', should use existing image for job
- backend: fix kill signal to use bash -c to work with latest backend image
- backend/chart: add 'profile_browser_timeout_seconds' to chart values to control how long profile browser to remain when idle (default to 60)
- backend: remove utils.py, now using secret.token_hex() for random suffix
Co-authored-by: sua yoo <sua@suayoo.com>
This commit is contained in:
Ilya Kreymer 2023-01-10 14:42:42 -08:00 committed by GitHub
parent 303df2869c
commit 30bda8c75d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 138 additions and 124 deletions

View File

@ -3,12 +3,12 @@
import os
import asyncio
import datetime
import secrets
from abc import ABC, abstractmethod
from fastapi.templating import Jinja2Templates
from .utils import random_suffix
from .db import resolve_db_url
@ -52,7 +52,7 @@ class BaseCrawlManager(ABC):
await self.check_storage(storage_name)
browserid = f"prf-{random_suffix()}"
browserid = f"prf-{secrets.token_hex(5)}"
params = {
"id": browserid,
@ -63,6 +63,7 @@ class BaseCrawlManager(ABC):
"storage_path": storage_path or "",
"baseprofile": baseprofile or "",
"profile_path": profile_path,
"idle_timeout": os.environ.get("IDLE_TIMEOUT", "60"),
"url": url,
"env": os.environ,
}

View File

@ -41,7 +41,7 @@ spec:
containers:
- name: crawl-job
image: {{ job_image }}
imagePullPolicy: Always
imagePullPolicy: Never
command: ["uvicorn", "btrixcloud.k8s.crawl_job:app", "--host", "0.0.0.0", "--access-log", "--log-level", "info"]
volumeMounts:

View File

@ -45,7 +45,7 @@ spec:
containers:
- name: crawl-job
image: {{ job_image }}
imagePullPolicy: Always
imagePullPolicy: Never
command: ["python", "-m", "btrixcloud.k8s.profile_job"]
volumeMounts:
@ -65,7 +65,7 @@ spec:
value: {{ storage_name }}
- name: IDLE_TIMEOUT
value: "60"
value: "{{ idle_timeout }}"
- name: START_URL
value: {{ url }}

View File

@ -75,6 +75,9 @@ spec:
- name: STORE_PATH
value: {{ storage_path }}
- name: VNC_PASS
value: {{ vnc_password }}
---
apiVersion: v1
kind: Service

View File

@ -26,7 +26,7 @@ async def create_from_yaml(k8s_client, doc, namespace):
async def send_signal_to_pods(core_api_ws, namespace, pods, signame, func=None):
"""send signal to all pods"""
command = ["kill", "-s", signame, "1"]
command = ["bash", "-c", f"kill -s {signame} 1"]
signaled = False
try:
@ -36,13 +36,17 @@ async def send_signal_to_pods(core_api_ws, namespace, pods, signame, func=None):
print(f"Sending {signame} to {pod.metadata.name}", flush=True)
await core_api_ws.connect_get_namespaced_pod_exec(
res = await core_api_ws.connect_get_namespaced_pod_exec(
pod.metadata.name,
namespace=namespace,
command=command,
stdout=True,
)
signaled = True
if res:
print("Result", res, flush=True)
else:
signaled = True
# pylint: disable=broad-except
except Exception as exc:

View File

@ -3,6 +3,7 @@
import os
import signal
import asyncio
import secrets
from abc import ABC, abstractmethod
@ -23,6 +24,7 @@ class ProfileJob(ABC):
"storage_path": os.environ.get("STORE_PATH") or "",
"url": os.environ.get("START_URL"),
"profile_filename": os.environ.get("PROFILE_PATH") or "",
"vnc_password": secrets.token_hex(16),
}
self.idle_timeout = int(os.environ["IDLE_TIMEOUT"])

View File

@ -146,28 +146,29 @@ class ProfileOps:
async def get_profile_browser_url(self, browserid, aid, headers):
"""get profile browser url"""
json = await self._send_browser_req(browserid, "/target")
json = await self._send_browser_req(browserid, "/vncpass")
target_id = json.get("targetId")
password = json.get("password")
if not target_id:
if not password:
raise HTTPException(status_code=400, detail="browser_not_available")
scheme = headers.get("X-Forwarded-Proto") or "http"
host = headers.get("Host") or "localhost"
ws_scheme = "wss" if scheme == "https" else "ws"
prefix = f"{host}/loadbrowser/{browserid}/devtools"
# ws_scheme = "wss" if scheme == "https" else "ws"
auth_bearer = headers.get("Authorization").split(" ")[1]
params = {"panel": "resources"}
params[
ws_scheme
] = f"{prefix}/page/{target_id}?aid={aid}&auth_bearer={auth_bearer}"
params = {
"path": f"browser/{browserid}/ws?aid={aid}&auth_bearer={auth_bearer}",
"password": password,
"aid": aid,
"auth_bearer": auth_bearer,
"scale": 0.75,
}
# pylint: disable=line-too-long
return {"url": f"{scheme}://{prefix}/inspector.html?{urlencode(params)}"}
url = f"{scheme}://{host}/browser/{browserid}/?{urlencode(params)}"
return {"url": url}
async def ping_profile_browser(self, browserid):
"""ping profile browser to keep it running"""

View File

@ -1,9 +0,0 @@
""" shared utils """
import base64
import os
def random_suffix():
"""generate suffix for unique container"""
return base64.b32encode(os.urandom(5)).lower().decode("utf-8")

View File

@ -1,2 +1,2 @@
""" current version """
__version__ = "1.2.0-beta.0"
__version__ = "1.2.0-beta.1"

View File

@ -47,6 +47,8 @@ data:
WEB_CONCURRENCY: "{{ .Values.backend_workers | default 4 }}"
IDLE_TIMEOUT: "{{ .Values.profile_browser_idle_seconds | default 60 }}"
---
apiVersion: v1

View File

@ -48,6 +48,8 @@ api_limits_memory: "512Mi"
job_cpu: "3m"
job_memory: "70Mi"
profile_browser_idle_seconds: 60
# Nginx Image
# =========================================

View File

@ -62,7 +62,13 @@ server {
proxy_set_header Content-Length "";
}
location ~* /loadbrowser/([^/]+)/(.*)/page/(.*)$ {
# redirect to bundled build of novnc
location ~* ^/browser/([^/]+)/core/rfb.js$ {
absolute_redirect off;
return 308 /js/novnc.js;
}
location ~* ^/browser/([^/]+)/ws$ {
set $browserid $1;
set $auth_bearer $arg_auth_bearer;
set $archive $arg_aid;
@ -70,14 +76,29 @@ server {
auth_request /access_check_profiles;
proxy_pass http://browser-$browserid-0.browser-$browserid$fqdn_suffix:9222/$2/page/$3;
proxy_pass http://browser-$browserid-0.browser-$browserid$fqdn_suffix:6080/websockify;
proxy_set_header Host "localhost";
proxy_send_timeout 10m;
proxy_read_timeout 10m;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $http_connection;
}
location ~* ^/browser/([^/]+)/$ {
set $browserid $1;
set $auth_bearer $arg_auth_bearer;
set $archive $arg_aid;
set $fqdn_suffix "${CRAWLER_FQDN_SUFFIX}";
auth_request /access_check_profiles;
proxy_pass http://browser-$browserid-0.browser-$browserid$fqdn_suffix:9223/vnc/;
proxy_set_header Host "localhost";
}
location = /access_check_profiles {
internal;
proxy_pass http://${BACKEND_HOST}:8000/api/archives/$archive/profiles/browser/$browserid/access?auth_bearer=$auth_bearer;
@ -85,18 +106,6 @@ server {
proxy_set_header Content-Length "";
}
location ~* /loadbrowser/([^/]+)/(.*) {
set $browserid $1;
set $fqdn_suffix "${CRAWLER_FQDN_SUFFIX}";
proxy_pass http://browser-$browserid-0.browser-$browserid$fqdn_suffix:9222/$2;
proxy_set_header Host "localhost";
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $http_connection;
}
include ./includes/*.conf;
}

View File

@ -1,6 +1,6 @@
{
"name": "browsertrix-frontend",
"version": "1.2.0-beta.0",
"version": "1.2.0-beta.1",
"main": "index.ts",
"license": "AGPL-3.0-or-later",
"dependencies": {
@ -8,6 +8,7 @@
"@formatjs/intl-displaynames": "^5.2.5",
"@formatjs/intl-getcanonicallocales": "^1.8.0",
"@lit/localize": "^0.11.4",
"@novnc/novnc": "^1.4.0-beta",
"@shoelace-style/shoelace": "2.0.0-beta.85",
"@xstate/fsm": "^1.6.2",
"axios": "^0.22.0",

View File

@ -30,9 +30,6 @@ export class ProfileBrowser extends LiteElement {
// is hidden on the backend
static SIDE_BAR_WIDTH = 288;
/** Profile creation only works in Chromium-based browsers */
static isBrowserCompatible = Boolean((window as any).chrome);
@property({ type: Object })
authState!: AuthState;
@ -112,18 +109,6 @@ export class ProfileBrowser extends LiteElement {
}
private renderBrowser() {
if (!ProfileBrowser.isBrowserCompatible) {
return html`
<div style="padding-right: ${ProfileBrowser.SIDE_BAR_WIDTH}px;">
<btrix-alert variant="warning" class="text-sm">
${msg(
"Browser profile creation is only supported in Chromium-based browsers (such as Chrome) at this time. Please re-open this page in a compatible browser to proceed."
)}
</btrix-alert>
</div>
`;
}
if (this.hasFetchError) {
return html`
<div style="padding-right: ${ProfileBrowser.SIDE_BAR_WIDTH}px;">

View File

@ -239,7 +239,6 @@ export class BrowserProfilesDetail extends LiteElement {
<sl-button
variant="primary"
outline
?disabled=${!ProfileBrowser.isBrowserCompatible}
@click=${this.startBrowserPreview}
><sl-icon
slot="prefix"

View File

@ -34,9 +34,6 @@ export class BrowserProfilesList extends LiteElement {
@state()
private isSubmitting = false;
/** Profile creation only works in Chromium-based browsers */
private isBrowserCompatible = Boolean((window as any).chrome);
firstUpdated() {
if (this.showCreateDialog) {
this.isCreateFormVisible = true;
@ -66,17 +63,6 @@ export class BrowserProfilesList extends LiteElement {
@sl-show=${() => (this.isCreateFormVisible = true)}
@sl-after-hide=${() => (this.isCreateFormVisible = false)}
>
${this.isBrowserCompatible
? ""
: html`
<div class="mb-4">
<btrix-alert variant="warning" class="text-sm">
${msg(
"Browser profile creation is only supported in Chromium-based browsers (such as Chrome) at this time. Please re-open this page in a compatible browser to proceed."
)}
</btrix-alert>
</div>
`}
${this.isCreateFormVisible ? this.renderCreateForm() : ""}
</sl-dialog> `;
}
@ -213,7 +199,6 @@ export class BrowserProfilesList extends LiteElement {
name="urlPrefix"
value="https://"
hoist
?disabled=${!this.isBrowserCompatible}
@sl-hide=${this.stopProp}
@sl-after-hide=${this.stopProp}
>
@ -226,7 +211,6 @@ export class BrowserProfilesList extends LiteElement {
placeholder=${msg("example.com")}
autocomplete="off"
aria-labelledby="startingUrlLabel"
?disabled=${!this.isBrowserCompatible}
required
>
</sl-input>
@ -238,7 +222,7 @@ export class BrowserProfilesList extends LiteElement {
<sl-button
variant="primary"
type="submit"
?disabled=${!this.isBrowserCompatible || this.isSubmitting}
?disabled=${this.isSubmitting}
?loading=${this.isSubmitting}
>
${msg("Start Profile Creator")}

View File

@ -64,7 +64,7 @@ const version = (() => {
return packageJSON.version;
})();
module.exports = {
const main = {
entry: "./src/index.ts",
output: {
path: path.resolve(__dirname, "dist"),
@ -156,3 +156,19 @@ module.exports = {
}),
],
};
const vnc = {
entry: "./node_modules/@novnc/novnc/core/rfb.js",
experiments: { outputModule: true },
output: {
filename: "js/novnc.js",
library: {
type: "module",
},
// Fix node >v16 compatibility issues
// https://stackoverflow.com/a/73465262
hashFunction: "xxhash64",
},
};
module.exports = [main, vnc];

View File

@ -1,7 +1,7 @@
const path = require("path");
const { merge } = require("webpack-merge");
const common = require("./webpack.config.js");
const [main, vnc] = require("./webpack.config.js");
// for testing: for prod, the Dockerfile should have the official prod version used
const RWP_BASE_URL = process.env.RWP_BASE_URL || "https://replayweb.page/";
@ -19,47 +19,53 @@ const shoelaceAssetsSrcPath = path.resolve(
);
const shoelaceAssetsPublicPath = "shoelace/assets";
module.exports = merge(common, {
devServer: {
watchFiles: ["src/*.js"],
open: true,
compress: true,
hot: true,
static: [
{
directory: shoelaceAssetsSrcPath,
publicPath: "/" + shoelaceAssetsPublicPath,
},
{
directory: path.join(__dirname),
//publicPath: "/",
watch: true,
},
],
historyApiFallback: true,
proxy: {
"/api": {
target: devBackendUrl.href,
headers: {
Host: devBackendUrl.host,
module.exports = [
merge(main, {
devServer: {
watchFiles: ["src/*.js"],
open: true,
compress: true,
hot: true,
static: [
{
directory: shoelaceAssetsSrcPath,
publicPath: "/" + shoelaceAssetsPublicPath,
},
{
directory: path.join(__dirname),
//publicPath: "/",
watch: true,
},
],
historyApiFallback: true,
proxy: {
"/api": {
target: devBackendUrl.href,
headers: {
Host: devBackendUrl.host,
},
ws: true,
},
ws: true,
},
"/data": {
target: devBackendUrl.href,
headers: {
Host: devBackendUrl.host,
"/data": {
target: devBackendUrl.href,
headers: {
Host: devBackendUrl.host,
},
},
},
// Serve replay service worker file
onBeforeSetupMiddleware: (server) => {
server.app.get("/replay/sw.js", (req, res) => {
res.set("Content-Type", "application/javascript");
res.send(`importScripts("${RWP_BASE_URL}sw.js")`);
});
},
port: 9870,
},
// Serve replay service worker file
onBeforeSetupMiddleware: (server) => {
server.app.get("/replay/sw.js", (req, res) => {
res.set("Content-Type", "application/javascript");
res.send(`importScripts("${RWP_BASE_URL}sw.js")`);
});
},
port: 9870,
}),
{
...vnc,
mode: "production",
},
});
];

View File

@ -1,8 +1,8 @@
const { merge } = require("webpack-merge");
const common = require("./webpack.config.js");
const [main, vnc] = require("./webpack.config.js");
module.exports = merge(common, {
module.exports = [merge(main, {
mode: "production",
devtool: "source-map",
@ -14,4 +14,7 @@ module.exports = merge(common, {
chunks: "all",
},
},
});
}), {
...vnc,
mode: "production"
}];

View File

@ -206,6 +206,11 @@
"@nodelib/fs.scandir" "2.1.5"
fastq "^1.6.0"
"@novnc/novnc@^1.4.0-beta":
version "1.4.0-beta"
resolved "https://registry.yarnpkg.com/@novnc/novnc/-/novnc-1.4.0-beta.tgz#a9aedc3f0274863dcfd0d382c43615e912f7c006"
integrity sha512-iLwlvPucpqZ14yZHIrW6bxeC1Aynd5hNhbe9iSEYTOPtOicpVkbwj5Mpkmyw9rSqYoqwLKerV7OJ8afUg1Yq0g==
"@open-wc/chai-dom-equals@^0.12.36":
version "0.12.36"
resolved "https://registry.yarnpkg.com/@open-wc/chai-dom-equals/-/chai-dom-equals-0.12.36.tgz#ed0eb56b9e98c4d7f7280facce6215654aae9f4c"

View File

@ -1,5 +1,5 @@
#!/bin/bash
CURR=$(dirname "${BASH_SOURCE[0]}")
docker build -t ${REGISTRY}webrecorder/browsertrix-backend $CURR/../backend/
docker build -t ${REGISTRY}webrecorder/browsertrix-backend:latest $CURR/../backend/

View File

@ -1,4 +1,4 @@
#!/bin/bash
CURR=$(dirname "${BASH_SOURCE[0]}")
docker build --build-arg GIT_COMMIT_HASH="$(git rev-parse --short HEAD)" --build-arg GIT_BRANCH_NAME="$(git rev-parse --abbrev-ref HEAD)" --build-arg --load -t ${REGISTRY}webrecorder/browsertrix-frontend $CURR/../frontend/
docker build --build-arg GIT_COMMIT_HASH="$(git rev-parse --short HEAD)" --build-arg GIT_BRANCH_NAME="$(git rev-parse --abbrev-ref HEAD)" --build-arg --load -t ${REGISTRY}webrecorder/browsertrix-frontend:latest $CURR/../frontend/

View File

@ -1 +1 @@
1.2.0-beta.0
1.2.0-beta.1