diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml deleted file mode 100644 index 1a9be074..00000000 --- a/.github/workflows/ci.yaml +++ /dev/null @@ -1,66 +0,0 @@ -name: Browsertrix Cloud Integration Test (for Swarm) - -on: [push, pull_request] - -jobs: - btrix-swarm-test: - runs-on: ubuntu-20.04 - services: - registry: - image: registry:2 - ports: - - 5000:5000 - - steps: - - - name: Checkout - uses: actions/checkout@v3 - - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - with: - driver-opts: network=host - - - - name: Copy Configs - run: ./scripts/init-configs.sh - - - - name: Build Backend - uses: docker/build-push-action@v3 - with: - context: backend - push: true - tags: localhost:5000/webrecorder/browsertrix-backend:latest - cache-from: type=gha,scope=backend - cache-to: type=gha,scope=backend,mode=max - - - - name: Build Frontend - uses: docker/build-push-action@v3 - with: - context: frontend - push: true - tags: localhost:5000/webrecorder/browsertrix-frontend:latest - cache-from: type=gha,scope=frontend - cache-to: type=gha,scope=frontend,mode=max - - - - name: Run Bootstrap Script - run: ./test/setup.sh - - - - name: Install Python - uses: actions/setup-python@v3 - with: - python-version: '3.9' - - - - name: Install Python Libs - run: pip install pytest requests - - - - name: Backend Tests - run: py.test -vv ./backend/test/*.py - diff --git a/.github/workflows/microk8s-ci.yaml b/.github/workflows/microk8s-ci.yaml new file mode 100644 index 00000000..d1aa7dc1 --- /dev/null +++ b/.github/workflows/microk8s-ci.yaml @@ -0,0 +1,74 @@ +name: Cluster Run (MicroK8S local) + +on: [push, pull_request] + +jobs: + btrix-microk8s-test: + runs-on: ubuntu-latest + steps: + - + uses: balchua/microk8s-actions@v0.3.0 + with: + channel: '1.25/stable' + addons: '["dns", "helm3", "hostpath-storage", "registry", "metrics-server"]' + - + name: Checkout + uses: actions/checkout@v3 + + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + with: + driver-opts: network=host + + - + name: Wait for registry to be available + run: curl --retry 20 --retry-delay 10 --retry-connrefused http://localhost:32000/v2/ + + - + name: Build Backend + uses: docker/build-push-action@v3 + with: + context: backend + push: true + tags: localhost:32000/webrecorder/browsertrix-backend:latest + cache-from: type=gha,scope=backend + cache-to: type=gha,scope=backend,mode=max + + - + name: Build Frontend + uses: docker/build-push-action@v3 + with: + context: frontend + push: true + tags: localhost:32000/webrecorder/browsertrix-frontend:latest + cache-from: type=gha,scope=frontend + cache-to: type=gha,scope=frontend,mode=max + + - + name: Start Cluster with Helm + run: | + sudo microk8s helm3 upgrade --install -f ./chart/values.yaml -f ./chart/examples/microk8s-ci.yaml btrix ./chart/ + + - + name: Install Python + uses: actions/setup-python@v3 + with: + python-version: '3.9' + + - + name: Install Python Libs + run: pip install pytest requests + + - + name: Wait for all pods to be ready + run: sudo microk8s kubectl wait --for=condition=ready pod --all --timeout=240s + + - + name: Run Tests + run: py.test -vv ./backend/test/*.py + + - + name: Print Backend Logs + if: ${{ failure() }} + run: sudo microk8s kubectl logs svc/browsertrix-cloud-backend diff --git a/backend/Dockerfile b/backend/Dockerfile index 8602c62e..0c52816f 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -2,7 +2,7 @@ ARG PODMAN_VERSION=4 FROM docker.io/mgoltzsche/podman:${PODMAN_VERSION}-remote as podmanremote -FROM python:3.9 +FROM docker.io/library/python:3.9 WORKDIR /app diff --git a/backend/btrixcloud/crawlconfigs.py b/backend/btrixcloud/crawlconfigs.py index 25240734..4c3dc160 100644 --- a/backend/btrixcloud/crawlconfigs.py +++ b/backend/btrixcloud/crawlconfigs.py @@ -86,7 +86,7 @@ class CrawlConfigIn(BaseModel): config: RawCrawlConfig - name: Optional[str] + name: str profileid: Optional[UUID4] @@ -106,7 +106,7 @@ class CrawlConfig(BaseMongoModel): config: RawCrawlConfig - name: Optional[str] + name: str created: Optional[datetime] diff --git a/backend/btrixcloud/db.py b/backend/btrixcloud/db.py index a0347aa6..605ec2d1 100644 --- a/backend/btrixcloud/db.py +++ b/backend/btrixcloud/db.py @@ -32,7 +32,10 @@ def init_db(): db_url = resolve_db_url() client = motor.motor_asyncio.AsyncIOMotorClient( - db_url, uuidRepresentation="standard" + db_url, + uuidRepresentation="standard", + connectTimeoutMS=120000, + serverSelectionTimeoutMS=120000, ) mdb = client["browsertrixcloud"] diff --git a/backend/btrixcloud/users.py b/backend/btrixcloud/users.py index 4a105a0d..1c2f524c 100644 --- a/backend/btrixcloud/users.py +++ b/backend/btrixcloud/users.py @@ -166,10 +166,11 @@ class UserManager(BaseUserManager[UserCreate, UserDB]): try: res = await self.create( UserCreate( + name="admin", email=email, password=password, is_superuser=True, - newArchive=False, + newArchive=True, is_verified=True, ) ) diff --git a/backend/btrixcloud/version.py b/backend/btrixcloud/version.py index 357ffc16..bd4a1057 100644 --- a/backend/btrixcloud/version.py +++ b/backend/btrixcloud/version.py @@ -1,2 +1,2 @@ """ current version """ -__version__ = "1.1.0-beta.1" +__version__ = "1.1.0-beta.2" diff --git a/backend/test/test_login.py b/backend/test/test_login.py index 18188d2c..ba461fce 100644 --- a/backend/test/test_login.py +++ b/backend/test/test_login.py @@ -1,6 +1,6 @@ import requests -api_prefix = "http://127.0.0.1:9871/api" +api_prefix = "http://127.0.0.1:30870/api" def test_login_invalid(): @@ -18,14 +18,14 @@ def test_login_invalid(): def test_login(): username = "admin@example.com" - password = "PASSW0RD0" + password = "PASSW0RD!" r = requests.post( f"{api_prefix}/auth/jwt/login", data={"username": username, "password": password, "grant_type": "password"}, ) data = r.json() - assert r.status_code == 200 + assert r.status_code == 200, data["detail"] assert data["token_type"] == "bearer" assert data["access_token"] access_token = data["access_token"] diff --git a/backend/test/test_run_crawl.py b/backend/test/test_run_crawl.py new file mode 100644 index 00000000..2d4dd9e7 --- /dev/null +++ b/backend/test/test_run_crawl.py @@ -0,0 +1,129 @@ +import requests +import hashlib +import time +import io +import zipfile + +host_prefix = "http://127.0.0.1:30870" +api_prefix = f"{host_prefix}/api" + + +access_token = None +headers = None +archive_id = None + +crawl_id = None + +wacz_path = None +wacz_size = None +wacz_hash = None + +wacz_content = None + + +def test_login(): + username = "admin@example.com" + password = "PASSW0RD!" + r = requests.post( + f"{api_prefix}/auth/jwt/login", + data={"username": username, "password": password, "grant_type": "password"}, + ) + assert r.status_code == 200 + data = r.json() + + assert data["token_type"] == "bearer" + + global access_token + access_token = data["access_token"] + + global headers + headers = {"Authorization": f"Bearer {access_token}"} + + +def test_list_archives(): + r = requests.get(f"{api_prefix}/archives", headers=headers) + data = r.json() + + assert len(data["archives"]) == 1 + assert data["archives"][0]["id"] + + global archive_id + archive_id = data["archives"][0]["id"] + + assert data["archives"][0]["name"] == "admin's Archive" + + +def test_create_new_config(): + crawl_data = { + "runNow": True, + "name": "Test Crawl", + "config": {"seeds": ["https://example.com/"]}, + } + r = requests.post( + f"{api_prefix}/archives/{archive_id}/crawlconfigs/", + headers=headers, + json=crawl_data, + ) + + assert r.status_code == 200 + + data = r.json() + assert data["added"] + assert data["run_now_job"] + + global crawl_id + crawl_id = data["run_now_job"] + + +def test_wait_for_complete(): + print("") + print("---- Running Crawl ----") + + while True: + r = requests.get( + f"{api_prefix}/archives/{archive_id}/crawls/{crawl_id}.json", + headers=headers, + ) + data = r.json() + assert ( + data["state"] == "starting" + or data["state"] == "running" + or data["state"] == "complete" + ), data["state"] + if data["state"] == "complete": + break + + time.sleep(5) + + assert len(data["resources"]) == 1 + assert data["resources"][0]["path"] + + global wacz_path + global wacz_size + global wacz_hash + wacz_path = data["resources"][0]["path"] + wacz_size = data["resources"][0]["size"] + wacz_hash = data["resources"][0]["hash"] + + +def test_download_wacz(): + r = requests.get(host_prefix + wacz_path) + assert r.status_code == 200 + assert len(r.content) == wacz_size + + h = hashlib.sha256() + h.update(r.content) + assert h.hexdigest() == wacz_hash, (h.hexdigest(), wacz_hash) + + global wacz_content + wacz_content = r.content + + +def test_verify_wacz(): + b = io.BytesIO(wacz_content) + z = zipfile.ZipFile(b) + + assert "pages/pages.jsonl" in z.namelist() + + pages = z.open("pages/pages.jsonl").read().decode("utf-8") + assert '"https://example.com/"' in pages diff --git a/chart/examples/local-config.yaml b/chart/examples/local-config.yaml new file mode 100644 index 00000000..f3928948 --- /dev/null +++ b/chart/examples/local-config.yaml @@ -0,0 +1,40 @@ +# Local Config +# ------------ + +# This config is designed for local (non-hosted) use, with either Docker Desktop, minikube or local microk8s. +# With this setup, the cluster should be accessed locally via 'localhost' + +# run service on localhost on port +local_service_port: 30870 + + +# overrides to use existing images in local Docker, otherwise will pull from latest +# api_pull_policy: "Never" +# nginx_pull_policy: "Never" +# crawler_pull_policy: "Never" +# redis_pull_policy: "Never" + + +# microk8s: if developing locally, can override these to use images from local microk8s repository (on localhost:32000) +# api_image: "localhost:32000/webrecorder/browsertrix-backend:latest" +# nginx_image: "localhost:32000/webrecorder/browsertrix-frontend:latest" + + +# optionally, override default mongodb auth, used for all data storage: + +#mongo_auth: + #username: root + #password: PASSWORD! + + +# optionally, set these to custom values + +#superuser: + # set this to enable a superuser admim + # email: admin@example.com + + # optional: if not set, automatically generated + # change or remove this + # password: PASSWORD! + + diff --git a/chart/examples/microk8s-ci.yaml b/chart/examples/microk8s-ci.yaml new file mode 100644 index 00000000..705f7ef5 --- /dev/null +++ b/chart/examples/microk8s-ci.yaml @@ -0,0 +1,24 @@ +# microk8s overrides for ci +# ------------------------- + + +# use local images +api_image: "localhost:32000/webrecorder/browsertrix-backend:latest" +nginx_image: "localhost:32000/webrecorder/browsertrix-frontend:latest" + +# don't pull use, existing images +api_pull_policy: "IfNotPresent" +nginx_pull_policy: "IfNotPresent" + + +superuser: + # set this to enable a superuser admin + email: admin@example.com + + # optional: if not set, automatically generated + # change or remove this + password: PASSW0RD! + + +local_service_port: 30870 + diff --git a/chart/examples/microk8s-hosted.yaml b/chart/examples/microk8s-hosted.yaml new file mode 100644 index 00000000..3d60a309 --- /dev/null +++ b/chart/examples/microk8s-hosted.yaml @@ -0,0 +1,65 @@ +# Hosted Config (eg. for microk8s) +# -------------------------------- +# +# This config can be used to run Browsertrix Cloud hosted on a remote server. +# This requires setting the hostname and signing hostname. +# +# +# The `ingress.host` and `ingress.cert_email` and optional `signer.host` must be set below. +# +# microk8s +# -------- +# +# One way to deploy this is with microk8s, which will require the following addons: +# +# microk8s enable dns ingress cert-manager hostpath-storage registry helm3 +# +# run with: +# +# microk8s helm3 upgrade --install -f ./chart/values.yaml -f ./chart/examples/microk8s-hosted.yaml btrix ./chart/ +# +# +# If developing locally, can override these to use images from local microk8s repository (on localhost:32000) +# +# api_image: "localhost:32000/webrecorder/browsertrix-backend:latest" +# nginx_image: "localhost:32000/webrecorder/browsertrix-frontend:latest" +# crawler_image: "localhost:32000/webrecorder/browsertrix-crawler:latest" + + +ingress: + # required: set host to use tls + # host: "..." + + # required: set email to use tls + #cert_email: "..." + + scheme: "https" + tls: true + +ingress_class: "public" + + +# optionally, set host below to enable WACZ signing +signer: + enabled: true + + # host: ... + + +# optionally, override default mongodb auth, used for all data storage: + +#mongo_auth: + #username: root + #password: PASSWORD! + + +# optionally, set these to custom values + +#superuser: + # set this to enable a superuser admim + # email: admin@example.com + + # optional: if not set, automatically generated + # change or remove this + # password: PASSWORD! + diff --git a/chart/templates/frontend.yaml b/chart/templates/frontend.yaml index 6396c240..de6ac999 100644 --- a/chart/templates/frontend.yaml +++ b/chart/templates/frontend.yaml @@ -44,8 +44,13 @@ spec: - name: CRAWLER_SVC_SUFFIX value: ".crawl-$crawl" - - name: NO_MINIO_ROUTE - value: "1" + {{- if .Values.minio_local }} + - name: LOCAL_MINIO_HOST + value: "{{ .Values.minio_host }}" + + - name: LOCAL_BUCKET + value: "{{ .Values.minio_local_bucket_name }}" + {{- end }} resources: limits: @@ -87,7 +92,7 @@ spec: app: {{ .Values.name }} role: frontend - {{- if .Values.nginx_service_use_node_port }} + {{- if .Values.local_service_port }} type: NodePort {{- end }} @@ -95,4 +100,7 @@ spec: - protocol: TCP port: 80 name: frontend + {{- if .Values.local_service_port }} + nodePort: {{ .Values.local_service_port }} + {{- end }} diff --git a/chart/templates/ingress.yaml b/chart/templates/ingress.yaml index a233cd20..91f7c19d 100644 --- a/chart/templates/ingress.yaml +++ b/chart/templates/ingress.yaml @@ -76,34 +76,6 @@ spec: {{ end }} -{{- if .Values.minio_local }} ---- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: ingress-minio - namespace: {{ .Release.Namespace }} - annotations: - kubernetes.io/ingress.class: {{ .Values.ingress_class | default "nginx" }} - nginx.ingress.kubernetes.io/rewrite-target: /$1 - nginx.ingress.kubernetes.io/upstream-vhost: "{{ .Values.minio_host }}" - -spec: - rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - path: /data/(.*) - pathType: Prefix - backend: - service: - name: local-minio - port: - number: 9000 - - -{{- end }} - {{ if .Values.ingress.tls }} --- diff --git a/chart/templates/mongo.yaml b/chart/templates/mongo.yaml index f2425f00..4977a636 100644 --- a/chart/templates/mongo.yaml +++ b/chart/templates/mongo.yaml @@ -101,13 +101,16 @@ spec: cpu: {{ .Values.mongo_requests_cpu }} memory: {{ .Values.mongo_requests_memory }} - # disable for now, issue pending: https://github.com/bitnami/charts/issues/10264 - #readinessProbe: - # exec: - # command: - # - mongo - # - --eval - # - db.adminCommand('ping') + # reenable for now with mongo 5.0.x + readinessProbe: + timeoutSeconds: 10 + successThreshold: 1 + failureThreshold: 2 + exec: + command: + - mongo + - --eval + - db.adminCommand('ping') --- apiVersion: v1 diff --git a/chart/templates/secrets.yaml b/chart/templates/secrets.yaml index ff1d9d3d..cc24b9d6 100644 --- a/chart/templates/secrets.yaml +++ b/chart/templates/secrets.yaml @@ -47,15 +47,16 @@ stringData: STORE_ENDPOINT_URL: "{{ $storage.endpoint_url }}" {{- end }} - {{- if $storage.access_endpoint_url }} - STORE_ACCESS_ENDPOINT_URL: "{{ $storage.access_endpoint_url }}/" - STORE_USE_ACCESS_FOR_PRESIGN: "1" - {{- else if and $.Values.ingress.host $.Values.minio_local }} - STORE_ACCESS_ENDPOINT_URL: {{ $.Values.ingress.scheme | default "https" }}://{{ $.Values.ingress.host }}/data/{{ $storage.bucket_name }}/ + {{- if $.Values.minio_local }} + #STORE_ACCESS_ENDPOINT_URL: "/data/{{ $storage.bucket_name }}/" + STORE_ACCESS_ENDPOINT_URL: "/data/" STORE_USE_ACCESS_FOR_PRESIGN: "0" + {{- else if $storage.access_endpoint_url }} + STORE_ACCESS_ENDPOINT_URL: "{{ $storage.access_endpoint_url }}" + STORE_USE_ACCESS_FOR_PRESIGN: "1" {{- else }} STORE_ACCESS_ENDPOINT_URL: "{{ $storage.endpoint_url }}" - STORE_USE_ACCESS_FOR_PRESIGN: "0" + STORE_USE_ACCESS_FOR_PRESIGN: "1" {{- end }} STORE_REGION: "{{ $storage.region }}" diff --git a/chart/values.yaml b/chart/values.yaml index fa517e3a..72bbe3eb 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -26,7 +26,8 @@ superuser: email: admin@example.com # optional: if not set, automatically generated - password: + # change or remove this + password: PASSW0RD! # API Image @@ -57,11 +58,12 @@ nginx_requests_cpu: "3m" nginx_limits_cpu: "10m" nginx_requests_memory: "12Mi" -nginx_limits_memory: "12Mi" +nginx_limits_memory: "20Mi" -# if true, will use node port to make the service directly available -# for testing / local deployments only -nginx_service_use_node_port: false +# if set, maps nginx to a fixed port on host machine +# must be between 30000 - 32767 +# use for deployments on localhost when not using ingress +# local_service_port: 30870 # MongoDB Image @@ -82,8 +84,8 @@ mongo_limits_memory: "512Mi" mongo_auth: # specify either username + password (for local mongo) - # username: root - # password: example + username: root + password: PASSWORD! # or full URL (for remote mongo server) # db_url: mongodb+srv://... @@ -192,7 +194,7 @@ email: # Ingress (Optional) # Optional: if 'host' is set, a publicly accessible Ingress controller is created with an SSL cert (using letsencrypt) ingress: - host: "btrix.cloud" + #host: "" cert_email: "test@example.com" scheme: "http" tls: false diff --git a/frontend/00-browsertrix-nginx-init.sh b/frontend/00-browsertrix-nginx-init.sh new file mode 100755 index 00000000..27bf7eb1 --- /dev/null +++ b/frontend/00-browsertrix-nginx-init.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# remove old config +rm /etc/nginx/conf.d/default.conf + +if [ -z "$LOCAL_MINIO_HOST" ]; then + echo "no local minio, clearing out minio route" + echo "" > /etc/nginx/includes/minio.conf +else + echo "local minio: replacing \$LOCAL_MINIO_HOST with \"$LOCAL_MINIO_HOST\", \$LOCAL_BUCKET with \"$LOCAL_BUCKET\"" + sed -i "s/\$LOCAL_MINIO_HOST/$LOCAL_MINIO_HOST/g" /etc/nginx/includes/minio.conf + sed -i "s/\$LOCAL_BUCKET/$LOCAL_BUCKET/g" /etc/nginx/includes/minio.conf +fi + +mkdir -p /etc/nginx/resolvers/ +echo resolver $(awk 'BEGIN{ORS=" "} $1=="nameserver" {print $2}' /etc/resolv.conf) valid=10s ipv6=off";" > /etc/nginx/resolvers/resolvers.conf + +cat /etc/nginx/resolvers/resolvers.conf diff --git a/frontend/00-default-override-resolver-config.sh b/frontend/00-default-override-resolver-config.sh deleted file mode 100755 index 43f30ff6..00000000 --- a/frontend/00-default-override-resolver-config.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -# remove old config -rm /etc/nginx/conf.d/default.conf - -if [ "$NO_MINIO_ROUTE" == "1" ]; then - echo "clearning out minio route" - echo "" > /etc/nginx/includes/minio.conf -fi - -mkdir -p /etc/nginx/resolvers/ -echo resolver $(awk 'BEGIN{ORS=" "} $1=="nameserver" {print $2}' /etc/resolv.conf) valid=10s ipv6=off";" > /etc/nginx/resolvers/resolvers.conf - -cat /etc/nginx/resolvers/resolvers.conf diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 70d7d405..4b1588a5 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -1,7 +1,7 @@ # central place to configure the production replayweb.page loading prefix ARG RWP_BASE_URL=https://cdn.jsdelivr.net/npm/replaywebpage/ -FROM node:16 as build +FROM docker.io/library/node:16 as build ARG GIT_COMMIT_HASH ARG GIT_BRANCH_NAME @@ -29,16 +29,16 @@ COPY src ./src/ RUN yarn build -FROM nginx +FROM docker.io/library/nginx:1.23.2 ARG RWP_BASE_URL ENV RWP_BASE_URL=${RWP_BASE_URL} COPY --from=build /app/dist /usr/share/nginx/html -COPY ./nginx.conf /etc/nginx/nginx.conf +#COPY ./nginx.conf /etc/nginx/nginx.conf COPY ./frontend.conf.template /etc/nginx/templates/ COPY ./minio.conf /etc/nginx/includes/ -ADD ./00-default-override-resolver-config.sh ./docker-entrypoint.d/ +ADD ./00-browsertrix-nginx-init.sh ./docker-entrypoint.d/ diff --git a/frontend/minio.conf b/frontend/minio.conf index b691875f..0c9b3a83 100644 --- a/frontend/minio.conf +++ b/frontend/minio.conf @@ -1,6 +1,5 @@ location /data/ { - proxy_pass http://minio:9000/btrix-data/; - proxy_set_header Host "minio:9000"; + proxy_pass http://$LOCAL_MINIO_HOST/$LOCAL_BUCKET/; proxy_redirect off; proxy_buffering off; diff --git a/frontend/nginx.conf b/frontend/nginx.conf deleted file mode 100644 index 56d5670c..00000000 --- a/frontend/nginx.conf +++ /dev/null @@ -1,31 +0,0 @@ -user nginx; -worker_processes 2; - -error_log /var/log/nginx/error.log notice; -pid /var/run/nginx.pid; - - -events { - worker_connections 1024; -} - - -http { - include /etc/nginx/mime.types; - default_type application/octet-stream; - - log_format main '$remote_addr - $remote_user [$time_local] "$request" ' - '$status $body_bytes_sent "$http_referer" ' - '"$http_user_agent" "$http_x_forwarded_for"'; - - access_log /var/log/nginx/access.log main; - - sendfile on; - #tcp_nopush on; - - keepalive_timeout 65; - - #gzip on; - - include /etc/nginx/conf.d/*.conf; -} diff --git a/frontend/package.json b/frontend/package.json index 51f4576c..0c54c1c6 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "browsertrix-frontend", - "version": "1.1.0-beta.1", + "version": "1.1.0-beta.2", "main": "index.ts", "license": "AGPL-3.0-or-later", "dependencies": { diff --git a/version.txt b/version.txt index c9fb0fef..7e330a34 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -1.1.0-beta.1 +1.1.0-beta.2