Local Deployment Work: Support running locally + test cluster on CI (#396)

* k8s local deployment work:
- make it easier to deploy w/o ingress by setting 'local_service_port' (suggested port 30870)
- if using local minio, ensure file endpoints set to /data/ and /data/ proxies correctly to local bucket
- if not using minio, ensure file endpoints point to correct access / endpoint url.
- setup should work with docker desktop, minikube, microk8s and k3s!
- nginx chart: bump nginx memory limit to 20Mi
- nginx image: 00-default-override-resolver-config -> 00-browsertrix-nginx-init for clarity
- nginx image: use default nginx.conf, pin to nginx 1.23.2
- mongo: readd readiness probe, bump connect wait timeout (needed for ci)
- config: set superadmin username to 'admin'
- config schema: set 'name' as required 
- add sample chart values overrides:
- chart values: local-config.yaml for running locally with 'local_service_port'
- chart values: add microk8s-hosted.yaml for configuring a hosted microk8s setup
- chart values: add microk8s-ci.yaml for ci tests
- ci: remove docker swarm tests
- ci: add microk8s integration tests: launching cluster, logging in, running a crawl of example.com, downloading/checking WACZ
- bump to 1.1.0-beta.2
This commit is contained in:
Ilya Kreymer 2022-12-02 19:58:34 -08:00 committed by GitHub
parent 5d18ffa938
commit 82ffc0dfbc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 408 additions and 180 deletions

View File

@ -1,66 +0,0 @@
name: Browsertrix Cloud Integration Test (for Swarm)
on: [push, pull_request]
jobs:
btrix-swarm-test:
runs-on: ubuntu-20.04
services:
registry:
image: registry:2
ports:
- 5000:5000
steps:
-
name: Checkout
uses: actions/checkout@v3
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
with:
driver-opts: network=host
-
name: Copy Configs
run: ./scripts/init-configs.sh
-
name: Build Backend
uses: docker/build-push-action@v3
with:
context: backend
push: true
tags: localhost:5000/webrecorder/browsertrix-backend:latest
cache-from: type=gha,scope=backend
cache-to: type=gha,scope=backend,mode=max
-
name: Build Frontend
uses: docker/build-push-action@v3
with:
context: frontend
push: true
tags: localhost:5000/webrecorder/browsertrix-frontend:latest
cache-from: type=gha,scope=frontend
cache-to: type=gha,scope=frontend,mode=max
-
name: Run Bootstrap Script
run: ./test/setup.sh
-
name: Install Python
uses: actions/setup-python@v3
with:
python-version: '3.9'
-
name: Install Python Libs
run: pip install pytest requests
-
name: Backend Tests
run: py.test -vv ./backend/test/*.py

74
.github/workflows/microk8s-ci.yaml vendored Normal file
View File

@ -0,0 +1,74 @@
name: Cluster Run (MicroK8S local)
on: [push, pull_request]
jobs:
btrix-microk8s-test:
runs-on: ubuntu-latest
steps:
-
uses: balchua/microk8s-actions@v0.3.0
with:
channel: '1.25/stable'
addons: '["dns", "helm3", "hostpath-storage", "registry", "metrics-server"]'
-
name: Checkout
uses: actions/checkout@v3
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
with:
driver-opts: network=host
-
name: Wait for registry to be available
run: curl --retry 20 --retry-delay 10 --retry-connrefused http://localhost:32000/v2/
-
name: Build Backend
uses: docker/build-push-action@v3
with:
context: backend
push: true
tags: localhost:32000/webrecorder/browsertrix-backend:latest
cache-from: type=gha,scope=backend
cache-to: type=gha,scope=backend,mode=max
-
name: Build Frontend
uses: docker/build-push-action@v3
with:
context: frontend
push: true
tags: localhost:32000/webrecorder/browsertrix-frontend:latest
cache-from: type=gha,scope=frontend
cache-to: type=gha,scope=frontend,mode=max
-
name: Start Cluster with Helm
run: |
sudo microk8s helm3 upgrade --install -f ./chart/values.yaml -f ./chart/examples/microk8s-ci.yaml btrix ./chart/
-
name: Install Python
uses: actions/setup-python@v3
with:
python-version: '3.9'
-
name: Install Python Libs
run: pip install pytest requests
-
name: Wait for all pods to be ready
run: sudo microk8s kubectl wait --for=condition=ready pod --all --timeout=240s
-
name: Run Tests
run: py.test -vv ./backend/test/*.py
-
name: Print Backend Logs
if: ${{ failure() }}
run: sudo microk8s kubectl logs svc/browsertrix-cloud-backend

View File

@ -2,7 +2,7 @@ ARG PODMAN_VERSION=4
FROM docker.io/mgoltzsche/podman:${PODMAN_VERSION}-remote as podmanremote FROM docker.io/mgoltzsche/podman:${PODMAN_VERSION}-remote as podmanremote
FROM python:3.9 FROM docker.io/library/python:3.9
WORKDIR /app WORKDIR /app

View File

@ -86,7 +86,7 @@ class CrawlConfigIn(BaseModel):
config: RawCrawlConfig config: RawCrawlConfig
name: Optional[str] name: str
profileid: Optional[UUID4] profileid: Optional[UUID4]
@ -106,7 +106,7 @@ class CrawlConfig(BaseMongoModel):
config: RawCrawlConfig config: RawCrawlConfig
name: Optional[str] name: str
created: Optional[datetime] created: Optional[datetime]

View File

@ -32,7 +32,10 @@ def init_db():
db_url = resolve_db_url() db_url = resolve_db_url()
client = motor.motor_asyncio.AsyncIOMotorClient( client = motor.motor_asyncio.AsyncIOMotorClient(
db_url, uuidRepresentation="standard" db_url,
uuidRepresentation="standard",
connectTimeoutMS=120000,
serverSelectionTimeoutMS=120000,
) )
mdb = client["browsertrixcloud"] mdb = client["browsertrixcloud"]

View File

@ -166,10 +166,11 @@ class UserManager(BaseUserManager[UserCreate, UserDB]):
try: try:
res = await self.create( res = await self.create(
UserCreate( UserCreate(
name="admin",
email=email, email=email,
password=password, password=password,
is_superuser=True, is_superuser=True,
newArchive=False, newArchive=True,
is_verified=True, is_verified=True,
) )
) )

View File

@ -1,2 +1,2 @@
""" current version """ """ current version """
__version__ = "1.1.0-beta.1" __version__ = "1.1.0-beta.2"

View File

@ -1,6 +1,6 @@
import requests import requests
api_prefix = "http://127.0.0.1:9871/api" api_prefix = "http://127.0.0.1:30870/api"
def test_login_invalid(): def test_login_invalid():
@ -18,14 +18,14 @@ def test_login_invalid():
def test_login(): def test_login():
username = "admin@example.com" username = "admin@example.com"
password = "PASSW0RD0" password = "PASSW0RD!"
r = requests.post( r = requests.post(
f"{api_prefix}/auth/jwt/login", f"{api_prefix}/auth/jwt/login",
data={"username": username, "password": password, "grant_type": "password"}, data={"username": username, "password": password, "grant_type": "password"},
) )
data = r.json() data = r.json()
assert r.status_code == 200 assert r.status_code == 200, data["detail"]
assert data["token_type"] == "bearer" assert data["token_type"] == "bearer"
assert data["access_token"] assert data["access_token"]
access_token = data["access_token"] access_token = data["access_token"]

View File

@ -0,0 +1,129 @@
import requests
import hashlib
import time
import io
import zipfile
host_prefix = "http://127.0.0.1:30870"
api_prefix = f"{host_prefix}/api"
access_token = None
headers = None
archive_id = None
crawl_id = None
wacz_path = None
wacz_size = None
wacz_hash = None
wacz_content = None
def test_login():
username = "admin@example.com"
password = "PASSW0RD!"
r = requests.post(
f"{api_prefix}/auth/jwt/login",
data={"username": username, "password": password, "grant_type": "password"},
)
assert r.status_code == 200
data = r.json()
assert data["token_type"] == "bearer"
global access_token
access_token = data["access_token"]
global headers
headers = {"Authorization": f"Bearer {access_token}"}
def test_list_archives():
r = requests.get(f"{api_prefix}/archives", headers=headers)
data = r.json()
assert len(data["archives"]) == 1
assert data["archives"][0]["id"]
global archive_id
archive_id = data["archives"][0]["id"]
assert data["archives"][0]["name"] == "admin's Archive"
def test_create_new_config():
crawl_data = {
"runNow": True,
"name": "Test Crawl",
"config": {"seeds": ["https://example.com/"]},
}
r = requests.post(
f"{api_prefix}/archives/{archive_id}/crawlconfigs/",
headers=headers,
json=crawl_data,
)
assert r.status_code == 200
data = r.json()
assert data["added"]
assert data["run_now_job"]
global crawl_id
crawl_id = data["run_now_job"]
def test_wait_for_complete():
print("")
print("---- Running Crawl ----")
while True:
r = requests.get(
f"{api_prefix}/archives/{archive_id}/crawls/{crawl_id}.json",
headers=headers,
)
data = r.json()
assert (
data["state"] == "starting"
or data["state"] == "running"
or data["state"] == "complete"
), data["state"]
if data["state"] == "complete":
break
time.sleep(5)
assert len(data["resources"]) == 1
assert data["resources"][0]["path"]
global wacz_path
global wacz_size
global wacz_hash
wacz_path = data["resources"][0]["path"]
wacz_size = data["resources"][0]["size"]
wacz_hash = data["resources"][0]["hash"]
def test_download_wacz():
r = requests.get(host_prefix + wacz_path)
assert r.status_code == 200
assert len(r.content) == wacz_size
h = hashlib.sha256()
h.update(r.content)
assert h.hexdigest() == wacz_hash, (h.hexdigest(), wacz_hash)
global wacz_content
wacz_content = r.content
def test_verify_wacz():
b = io.BytesIO(wacz_content)
z = zipfile.ZipFile(b)
assert "pages/pages.jsonl" in z.namelist()
pages = z.open("pages/pages.jsonl").read().decode("utf-8")
assert '"https://example.com/"' in pages

View File

@ -0,0 +1,40 @@
# Local Config
# ------------
# This config is designed for local (non-hosted) use, with either Docker Desktop, minikube or local microk8s.
# With this setup, the cluster should be accessed locally via 'localhost'
# run service on localhost on port
local_service_port: 30870
# overrides to use existing images in local Docker, otherwise will pull from latest
# api_pull_policy: "Never"
# nginx_pull_policy: "Never"
# crawler_pull_policy: "Never"
# redis_pull_policy: "Never"
# microk8s: if developing locally, can override these to use images from local microk8s repository (on localhost:32000)
# api_image: "localhost:32000/webrecorder/browsertrix-backend:latest"
# nginx_image: "localhost:32000/webrecorder/browsertrix-frontend:latest"
# optionally, override default mongodb auth, used for all data storage:
#mongo_auth:
#username: root
#password: PASSWORD!
# optionally, set these to custom values
#superuser:
# set this to enable a superuser admim
# email: admin@example.com
# optional: if not set, automatically generated
# change or remove this
# password: PASSWORD!

View File

@ -0,0 +1,24 @@
# microk8s overrides for ci
# -------------------------
# use local images
api_image: "localhost:32000/webrecorder/browsertrix-backend:latest"
nginx_image: "localhost:32000/webrecorder/browsertrix-frontend:latest"
# don't pull use, existing images
api_pull_policy: "IfNotPresent"
nginx_pull_policy: "IfNotPresent"
superuser:
# set this to enable a superuser admin
email: admin@example.com
# optional: if not set, automatically generated
# change or remove this
password: PASSW0RD!
local_service_port: 30870

View File

@ -0,0 +1,65 @@
# Hosted Config (eg. for microk8s)
# --------------------------------
#
# This config can be used to run Browsertrix Cloud hosted on a remote server.
# This requires setting the hostname and signing hostname.
#
#
# The `ingress.host` and `ingress.cert_email` and optional `signer.host` must be set below.
#
# microk8s
# --------
#
# One way to deploy this is with microk8s, which will require the following addons:
#
# microk8s enable dns ingress cert-manager hostpath-storage registry helm3
#
# run with:
#
# microk8s helm3 upgrade --install -f ./chart/values.yaml -f ./chart/examples/microk8s-hosted.yaml btrix ./chart/
#
#
# If developing locally, can override these to use images from local microk8s repository (on localhost:32000)
#
# api_image: "localhost:32000/webrecorder/browsertrix-backend:latest"
# nginx_image: "localhost:32000/webrecorder/browsertrix-frontend:latest"
# crawler_image: "localhost:32000/webrecorder/browsertrix-crawler:latest"
ingress:
# required: set host to use tls
# host: "..."
# required: set email to use tls
#cert_email: "..."
scheme: "https"
tls: true
ingress_class: "public"
# optionally, set host below to enable WACZ signing
signer:
enabled: true
# host: ...
# optionally, override default mongodb auth, used for all data storage:
#mongo_auth:
#username: root
#password: PASSWORD!
# optionally, set these to custom values
#superuser:
# set this to enable a superuser admim
# email: admin@example.com
# optional: if not set, automatically generated
# change or remove this
# password: PASSWORD!

View File

@ -44,8 +44,13 @@ spec:
- name: CRAWLER_SVC_SUFFIX - name: CRAWLER_SVC_SUFFIX
value: ".crawl-$crawl" value: ".crawl-$crawl"
- name: NO_MINIO_ROUTE {{- if .Values.minio_local }}
value: "1" - name: LOCAL_MINIO_HOST
value: "{{ .Values.minio_host }}"
- name: LOCAL_BUCKET
value: "{{ .Values.minio_local_bucket_name }}"
{{- end }}
resources: resources:
limits: limits:
@ -87,7 +92,7 @@ spec:
app: {{ .Values.name }} app: {{ .Values.name }}
role: frontend role: frontend
{{- if .Values.nginx_service_use_node_port }} {{- if .Values.local_service_port }}
type: NodePort type: NodePort
{{- end }} {{- end }}
@ -95,4 +100,7 @@ spec:
- protocol: TCP - protocol: TCP
port: 80 port: 80
name: frontend name: frontend
{{- if .Values.local_service_port }}
nodePort: {{ .Values.local_service_port }}
{{- end }}

View File

@ -76,34 +76,6 @@ spec:
{{ end }} {{ end }}
{{- if .Values.minio_local }}
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: ingress-minio
namespace: {{ .Release.Namespace }}
annotations:
kubernetes.io/ingress.class: {{ .Values.ingress_class | default "nginx" }}
nginx.ingress.kubernetes.io/rewrite-target: /$1
nginx.ingress.kubernetes.io/upstream-vhost: "{{ .Values.minio_host }}"
spec:
rules:
- host: {{ .Values.ingress.host }}
http:
paths:
- path: /data/(.*)
pathType: Prefix
backend:
service:
name: local-minio
port:
number: 9000
{{- end }}
{{ if .Values.ingress.tls }} {{ if .Values.ingress.tls }}
--- ---

View File

@ -101,13 +101,16 @@ spec:
cpu: {{ .Values.mongo_requests_cpu }} cpu: {{ .Values.mongo_requests_cpu }}
memory: {{ .Values.mongo_requests_memory }} memory: {{ .Values.mongo_requests_memory }}
# disable for now, issue pending: https://github.com/bitnami/charts/issues/10264 # reenable for now with mongo 5.0.x
#readinessProbe: readinessProbe:
# exec: timeoutSeconds: 10
# command: successThreshold: 1
# - mongo failureThreshold: 2
# - --eval exec:
# - db.adminCommand('ping') command:
- mongo
- --eval
- db.adminCommand('ping')
--- ---
apiVersion: v1 apiVersion: v1

View File

@ -47,15 +47,16 @@ stringData:
STORE_ENDPOINT_URL: "{{ $storage.endpoint_url }}" STORE_ENDPOINT_URL: "{{ $storage.endpoint_url }}"
{{- end }} {{- end }}
{{- if $storage.access_endpoint_url }} {{- if $.Values.minio_local }}
STORE_ACCESS_ENDPOINT_URL: "{{ $storage.access_endpoint_url }}/" #STORE_ACCESS_ENDPOINT_URL: "/data/{{ $storage.bucket_name }}/"
STORE_USE_ACCESS_FOR_PRESIGN: "1" STORE_ACCESS_ENDPOINT_URL: "/data/"
{{- else if and $.Values.ingress.host $.Values.minio_local }}
STORE_ACCESS_ENDPOINT_URL: {{ $.Values.ingress.scheme | default "https" }}://{{ $.Values.ingress.host }}/data/{{ $storage.bucket_name }}/
STORE_USE_ACCESS_FOR_PRESIGN: "0" STORE_USE_ACCESS_FOR_PRESIGN: "0"
{{- else if $storage.access_endpoint_url }}
STORE_ACCESS_ENDPOINT_URL: "{{ $storage.access_endpoint_url }}"
STORE_USE_ACCESS_FOR_PRESIGN: "1"
{{- else }} {{- else }}
STORE_ACCESS_ENDPOINT_URL: "{{ $storage.endpoint_url }}" STORE_ACCESS_ENDPOINT_URL: "{{ $storage.endpoint_url }}"
STORE_USE_ACCESS_FOR_PRESIGN: "0" STORE_USE_ACCESS_FOR_PRESIGN: "1"
{{- end }} {{- end }}
STORE_REGION: "{{ $storage.region }}" STORE_REGION: "{{ $storage.region }}"

View File

@ -26,7 +26,8 @@ superuser:
email: admin@example.com email: admin@example.com
# optional: if not set, automatically generated # optional: if not set, automatically generated
password: # change or remove this
password: PASSW0RD!
# API Image # API Image
@ -57,11 +58,12 @@ nginx_requests_cpu: "3m"
nginx_limits_cpu: "10m" nginx_limits_cpu: "10m"
nginx_requests_memory: "12Mi" nginx_requests_memory: "12Mi"
nginx_limits_memory: "12Mi" nginx_limits_memory: "20Mi"
# if true, will use node port to make the service directly available # if set, maps nginx to a fixed port on host machine
# for testing / local deployments only # must be between 30000 - 32767
nginx_service_use_node_port: false # use for deployments on localhost when not using ingress
# local_service_port: 30870
# MongoDB Image # MongoDB Image
@ -82,8 +84,8 @@ mongo_limits_memory: "512Mi"
mongo_auth: mongo_auth:
# specify either username + password (for local mongo) # specify either username + password (for local mongo)
# username: root username: root
# password: example password: PASSWORD!
# or full URL (for remote mongo server) # or full URL (for remote mongo server)
# db_url: mongodb+srv://... # db_url: mongodb+srv://...
@ -192,7 +194,7 @@ email:
# Ingress (Optional) # Ingress (Optional)
# Optional: if 'host' is set, a publicly accessible Ingress controller is created with an SSL cert (using letsencrypt) # Optional: if 'host' is set, a publicly accessible Ingress controller is created with an SSL cert (using letsencrypt)
ingress: ingress:
host: "btrix.cloud" #host: ""
cert_email: "test@example.com" cert_email: "test@example.com"
scheme: "http" scheme: "http"
tls: false tls: false

View File

@ -0,0 +1,18 @@
#!/bin/bash
# remove old config
rm /etc/nginx/conf.d/default.conf
if [ -z "$LOCAL_MINIO_HOST" ]; then
echo "no local minio, clearing out minio route"
echo "" > /etc/nginx/includes/minio.conf
else
echo "local minio: replacing \$LOCAL_MINIO_HOST with \"$LOCAL_MINIO_HOST\", \$LOCAL_BUCKET with \"$LOCAL_BUCKET\""
sed -i "s/\$LOCAL_MINIO_HOST/$LOCAL_MINIO_HOST/g" /etc/nginx/includes/minio.conf
sed -i "s/\$LOCAL_BUCKET/$LOCAL_BUCKET/g" /etc/nginx/includes/minio.conf
fi
mkdir -p /etc/nginx/resolvers/
echo resolver $(awk 'BEGIN{ORS=" "} $1=="nameserver" {print $2}' /etc/resolv.conf) valid=10s ipv6=off";" > /etc/nginx/resolvers/resolvers.conf
cat /etc/nginx/resolvers/resolvers.conf

View File

@ -1,14 +0,0 @@
#!/bin/bash
# remove old config
rm /etc/nginx/conf.d/default.conf
if [ "$NO_MINIO_ROUTE" == "1" ]; then
echo "clearning out minio route"
echo "" > /etc/nginx/includes/minio.conf
fi
mkdir -p /etc/nginx/resolvers/
echo resolver $(awk 'BEGIN{ORS=" "} $1=="nameserver" {print $2}' /etc/resolv.conf) valid=10s ipv6=off";" > /etc/nginx/resolvers/resolvers.conf
cat /etc/nginx/resolvers/resolvers.conf

View File

@ -1,7 +1,7 @@
# central place to configure the production replayweb.page loading prefix # central place to configure the production replayweb.page loading prefix
ARG RWP_BASE_URL=https://cdn.jsdelivr.net/npm/replaywebpage/ ARG RWP_BASE_URL=https://cdn.jsdelivr.net/npm/replaywebpage/
FROM node:16 as build FROM docker.io/library/node:16 as build
ARG GIT_COMMIT_HASH ARG GIT_COMMIT_HASH
ARG GIT_BRANCH_NAME ARG GIT_BRANCH_NAME
@ -29,16 +29,16 @@ COPY src ./src/
RUN yarn build RUN yarn build
FROM nginx FROM docker.io/library/nginx:1.23.2
ARG RWP_BASE_URL ARG RWP_BASE_URL
ENV RWP_BASE_URL=${RWP_BASE_URL} ENV RWP_BASE_URL=${RWP_BASE_URL}
COPY --from=build /app/dist /usr/share/nginx/html COPY --from=build /app/dist /usr/share/nginx/html
COPY ./nginx.conf /etc/nginx/nginx.conf #COPY ./nginx.conf /etc/nginx/nginx.conf
COPY ./frontend.conf.template /etc/nginx/templates/ COPY ./frontend.conf.template /etc/nginx/templates/
COPY ./minio.conf /etc/nginx/includes/ COPY ./minio.conf /etc/nginx/includes/
ADD ./00-default-override-resolver-config.sh ./docker-entrypoint.d/ ADD ./00-browsertrix-nginx-init.sh ./docker-entrypoint.d/

View File

@ -1,6 +1,5 @@
location /data/ { location /data/ {
proxy_pass http://minio:9000/btrix-data/; proxy_pass http://$LOCAL_MINIO_HOST/$LOCAL_BUCKET/;
proxy_set_header Host "minio:9000";
proxy_redirect off; proxy_redirect off;
proxy_buffering off; proxy_buffering off;

View File

@ -1,31 +0,0 @@
user nginx;
worker_processes 2;
error_log /var/log/nginx/error.log notice;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
sendfile on;
#tcp_nopush on;
keepalive_timeout 65;
#gzip on;
include /etc/nginx/conf.d/*.conf;
}

View File

@ -1,6 +1,6 @@
{ {
"name": "browsertrix-frontend", "name": "browsertrix-frontend",
"version": "1.1.0-beta.1", "version": "1.1.0-beta.2",
"main": "index.ts", "main": "index.ts",
"license": "AGPL-3.0-or-later", "license": "AGPL-3.0-or-later",
"dependencies": { "dependencies": {

View File

@ -1 +1 @@
1.1.0-beta.1 1.1.0-beta.2