Digital ocean setup (#314)

- Ansible playbook for deploying on DigitalOcean, configuring space, k8s cluster, mongodb, domain / subdomain, signing subdomain, container registry, and cors
- Generates helm chat in ./deploys/ directory for future use with helm directly
- Initial support for deletion of created resources as well.
- add documentation on how to use playbook
default helm values: update to latest authsign, set default timeout to 120 seconds
This commit is contained in:
Francis Kayiwa 2022-11-15 16:44:24 -05:00 committed by GitHub
parent 1ef9f7df6d
commit 6833c9d676
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 372 additions and 3 deletions

2
.gitignore vendored
View File

@ -4,3 +4,5 @@
**/config.yaml
**/signing.yaml
.DS_Store
# digital ocean custom values
private.yml

View File

@ -11,6 +11,8 @@ ansible-lint = "==6.4.0"
yamllint = "==1.27.1"
ansible-core = "==2.13"
docker = "*"
boto3 = "*"
jmespath = "*"
[dev-packages]

48
ansible/README.md Normal file
View File

@ -0,0 +1,48 @@
### Playbooks to install browsertrix
#### DigitalOcean
To install browsertrix on [DigitalOcean](playbooks/do_setup.yml) you will need to the following:
* Install [ansible](https://www.ansible.com)
* Set up a DigitalOcean API token and save it in your environment as `DO_API_TOKEN`
* Set up a Spaces ACCESS and SECRET KEY and save them in your environment as `DO_AWS_ACCESS_KEY` and `DO_AWS_SECRET_KEY`
* make a copy of [group_vars/do/private.yml.example](group_vars/do/private.yml.example) to [group_vars/do/private.yml](group_vars/do/private.yml)
##### Digital Ocean Variables
See Known Issues below.
The first running of the playbook will place variables under your tmp directory in the following format YYYY-MM-DD@:HH:MMd_ocean*. Content of these files will need to be added to the [group_vars/do/private.yml](group_vars/do/private.yml) or else run as an `-e` extra value as shown below
```yaml
-e btrix_db_url: (contents of /tmp/YYYY-MM-DD@:HH:MMd_ocean_btrix_db_url.txt`
-e lb_uuid: (contents of /tmp/YYYY-MM-DD@:HH:MMd_ocean_lb_uuid.txt`
-e loadbalancer_ip: (contents of /tmp/YYYY-MM-DD@:HH:MMd_ocean_loadbalancer_ip.txt`
-e domain_name: <your registered domain
```
In addition change the name (it will default to demo otherwise) and the region DigitalOcean preferred region (it will default to sfo3).
##### Example Playbooks
The playbook will install the Kubernetes [package manager](https://helm.sh/) and the [DigitalOcean Controller](https://docs.digitalocean.com/reference/doctl/) both are useful in managing your installation.
* Run the playbook two times.
```zsh
ansible-playbook -v playbooks/do_setup.yml
ansible-playbook -v playbooks/do_setup.yml -t helm_upgrade -e btrix_db_url= -e lb_uuid= -e loadbalancer_ip=
```
Every subsequent time one needs to run helm updates the `-t helm_upgrade` can be passed to the playbook like so:
```zsh
ansible-playbook -v playbooks/do_setup.yml -t helm_upgrade
```
Known Issues:
The `doctl` tool is the only one that allows us to create a mongodb password. We continue to investigate why this cannot use ansible's [set_fact](https://docs.ansible.com/ansible/latest/collections/ansible/builtin/set_fact_module.html) in the playbook.
The Kubernetes task creates a loadbalancer which will not be ready by the time the playbook completes the first time. So a second or sometimes 3rd run will be needed.

2
ansible/deploys/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

View File

@ -0,0 +1,70 @@
# Resources / Node Types
main_node_type: main
crawler_node_type: crawling
redis_node_type: crawling
crawler_requests_cpu: "1200m"
crawler_limits_cpu: "2200m"
crawler_requests_memory: "1200Mi"
crawler_limits_memory: "3200Mi"
# Registry
{% if use_do_registry %}
api_image: "{{ registry_endpoint }}/webrecorder/browsertrix-backend:{{ image_tag }}"
nginx_image: "{{ registry_endpoint }}/webrecorder/browsertrix-frontend:{{ image_tag }}"
crawler_image: "{{ registry_endpoint }}/webrecorder/browsertrix-crawler:{{ image_tag }}"
{% endif %}
# DB
mongo_local: false
mongo_auth:
db_url: "{{ db_url }}"
# Storage
minio_local: false
storages:
- name: "default"
access_key: "{{ lookup('env', 'DO_AWS_ACCESS_KEY')}}"
secret_key: "{{ lookup('env', 'DO_AWS_SECRET_KEY')}}"
endpoint_url: "{{ bucket_endpoint_url }}"
# Domain
ingress:
host: {{ full_domain }}
cert_email: {{ cert_email }}
scheme: "https"
tls: true
# Signing
{% if enable_signing %}
signer:
enabled: true
host: {{ full_signing_domain }}
cert_email: {{ cert_email }}
image_pull_policy: "IfNotPresent"
auth_token: {{ signing_authtoken }}
{% endif %}
# User Auth
superuser:
email: {{ superuser_email }}
password: {{ superuser_password }}
registration_enabled: "{{ '1' if registration_enabled else '0' }}"
# Admin Send Email Options
email:
smtp_port: {{ smtp_port }}
smtp_host: {{ smtp_host }}
sender_email: {{ sender_email }}
password: {{ sender_password }}

View File

@ -0,0 +1,39 @@
---
project_name: "default"
main_node_size: "s-1vcpu-2gb"
crawl_node_size: "c-4"
droplet_region: "sfo3"
node_pools:
- name=main-app;size={{ main_node_size }};label=nodeType=main;count=2
- name=crawling;size={{ crawl_node_size }};label=nodeType=crawling;taint=nodeType=crawling:NoSchedule;auto-scale=true;min-nodes=1;max-nodes=3;count=1
db_name: "{{ project_name }}"
k8s_name: "{{ project_name }}"
bucket_name: "{{ project_name }}"
bucket_path: "crawls"
domain: "browsertrix.cloud"
subdomain: "{{ project_name }}"
use_do_registry: true
image_tag: "dev"
enable_signing: true
signing_host: "signing"
superuser_email: "dev@webrecorder.net"
superuser_password: "PassW0rd!"
registration_enabled: true
cert_email: "{{ superuser_email }}"
smtp_port: ""
smtp_host: ""
sender_email: ""
sender_password: ""

View File

@ -0,0 +1,206 @@
---
- name: deploy browsertrix cloud on digital ocean
hosts: localhost
connection: local
gather_facts: false
vars_files:
- ../group_vars/do/main.yml
tasks:
# ===========================================
# Init
- name: d_ocean | init | install doctl and helm
ansible.builtin.package:
name: "{{ item }}"
state: present
loop:
- helm
- doctl
- name: d_ocean | init | set full domain
ansible.builtin.set_fact:
full_domain: "{{ subdomain + '.' + domain if subdomain else domain }}"
# MongoDB
# ===========================================
- name: d_ocean | db | test for existing mongodb
ansible.builtin.command: doctl db list
changed_when: false
failed_when: false
register: db_check
- name: d_ocean | db | create mongodb database
ansible.builtin.command: doctl databases create {{ db_name }} --region {{ droplet_region }} --engine mongodb --version 5 --output json
async: 1800
poll: 60
register: db_create_result
when: db_check.stdout.find(db_name) == -1
- name: d_ocean | db | set db url
ansible.builtin.set_fact:
db_url: "{{ db_create_result.stdout | from_json | json_query('[0].private_connection.uri') }}"
- name: d_ocean | db | set db id
ansible.builtin.set_fact:
db_uuid: db_create_result.stdout | from_json | json_query('[0].id')
# Storage (Space)
# ===========================================
- name: d_ocean | space | create new
community.digitalocean.digital_ocean_spaces:
name: "{{ bucket_name }}"
state: present
oauth_token: "{{ lookup('env', 'DO_API_TOKEN') }}"
aws_access_key_id: "{{ lookup('env', 'DO_AWS_ACCESS_KEY') }}"
aws_secret_access_key: "{{ lookup('env', 'DO_AWS_SECRET_KEY') }}"
region: "{{ droplet_region }}"
register: db_space_result
ignore_errors: true
- name: d_ocean | space | set endpoint urls
ansible.builtin.set_fact:
endpoint_url: "{{ db_space_result.data.space.endpoint_url }}/"
bucket_endpoint_url: "{{ db_space_result.data.space.endpoint_url }}/{{ db_space_result.data.space.name }}/{{ bucket_path }}/"
- name: d_ocean | space | set bucket cors
community.aws.aws_s3_cors:
name: "{{ bucket_name }}"
aws_access_key: "{{ lookup('env', 'DO_AWS_ACCESS_KEY') }}"
aws_secret_key: "{{ lookup('env', 'DO_AWS_SECRET_KEY') }}"
endpoint_url: "{{ endpoint_url }}"
region: "{{ droplet_region }}"
state: present
rules:
- allowed_origins:
- "https://{{ full_domain }}"
allowed_methods:
- GET
- HEAD
allowed_headers:
- "*"
expose_headers:
- Content-Range
- Content-Encoding
- Content-Length
# K8S
# ===========================================
- name: d_ocean | k8s | test for existing k8s cluster
ansible.builtin.command: doctl k8s cluster list
changed_when: false
failed_when: false
register: cluster_check
- name: d_ocean | k8s | create a kubernetes cluster in {{ droplet_region }}
# skip_ansible_lint
ansible.builtin.command: >-
doctl kubernetes cluster create {{ k8s_name }} --1-clicks ingress-nginx,cert-manager --node-pool
"{{ node_pools|join(',') }}"
--region={{ droplet_region }}
async: 1800
poll: 60
changed_when: false
when: cluster_check.stdout.find(k8s_name) == -1
- name: d_ocean | k8s | Get information about our cluster
community.digitalocean.digital_ocean_kubernetes_info:
oauth_token: "{{ lookup('ansible.builtin.env', 'DO_API_TOKEN') }}"
name: "{{ k8s_name }}"
return_kubeconfig: true
register: my_cluster
- name: d_ocean | k8s | print information about an existing DigitalOcean Kubernetes cluster
ansible.builtin.debug:
msg: Cluster name is {{ my_cluster.data.name }}, ID is {{ my_cluster.data.id }}
failed_when: not my_cluster
- name: d_ocean | k8s | save kubectl config to kube_dir
ansible.builtin.command: doctl kubernetes cluster kubeconfig save {{ my_cluster.data.id }}
changed_when: false
- name: d_ocean | k8s | get loadbalancer info from doctl
ansible.builtin.command: doctl k8s cluster list-associated-resources {{ my_cluster.data.id }} --format LoadBalancers --output json
register: lb_id_result
retries: 100
delay: 5
changed_when: lb_id_result.stdout | from_json | json_query('load_balancers') | length > 0
until: lb_id_result.stdout | from_json | json_query('load_balancers') | length > 0
- name: d_ocean | k8s | parse lb id
ansible.builtin.set_fact:
lb_id: "{{ lb_id_result.stdout | from_json | json_query('load_balancers[0].id') }}"
# DNS
# ===========================================
- name: d_ocean | dns | grab loadbalancer ip using doctl
ansible.builtin.command: doctl compute load-balancer get --format IP "{{ lb_id }}"
register: loadbalancer_ip_result
retries: 100
delay: 5
changed_when: loadbalancer_ip_result.stdout_lines | length > 1
until: loadbalancer_ip_result.stdout_lines | length > 1
- name: d_ocean | dns | parse ip
ansible.builtin.set_fact:
lb_ip: "{{ loadbalancer_ip_result.stdout_lines[1] }}"
- name: d_ocean | dns | register the dns for browsertrix cloud
ansible.builtin.command: >-
doctl compute domain records create --record-type A --record-name "{{ subdomain if subdomain else '@' }}" --record-data "{{ lb_ip }}" "{{ domain }}"
changed_when: dns_result.rc == 0
register: dns_result
# Signing + DNS
# ===========================================
- name: d_coean | signing | set signing domain + authtoken
ansible.builtin.set_fact:
full_signing_domain: "{{ signing_host }}.{{ full_domain }}"
signing_subdomain: "{{ signing_host + '.' + subdomain if subdomain else signing_host }}"
signing_authtoken: "{{ 99999999 | random | to_uuid }}"
when: enable_signing
- name: d_ocean | signing | register the dns for signing subdomain
ansible.builtin.command: >-
doctl compute domain records create --record-type A --record-name "{{ signing_subdomain }}" --record-data "{{ lb_ip }}" "{{ domain }}"
register: signing_dns_result
when: enable_signing
# Registry
# ===========================================
- name: d_ocean | registry | get endpoint, if using registry
ansible.builtin.command: doctl registry get --format Endpoint
register: do_registry_result
when: use_do_registry
failed_when: do_registry_result.stdout_lines | length < 2
- name: d_ocean | registry | store registry endpoint
ansible.builtin.set_fact:
registry_endpoint: "{{ do_registry_result.stdout_lines[1] }}"
when: use_do_registry
- name: d_ocean | registry | add to new k8s cluster
ansible.builtin.shell: set -o pipefail && doctl registry kubernetes-manifest | kubectl apply -f -
when: use_do_registry
# Helm Output + Deploy
# ===========================================
- name: d_ocean | helm | output values yaml
ansible.builtin.template:
src: ../group_vars/do/do-values.template.yaml
dest: ../deploys/{{ project_name }}-values.yaml
mode: u+rw
- name: d_ocean | helm | deploy btrix
ansible.builtin.command: helm upgrade --install -f ../../chart/values.yaml -f ../deploys/{{ project_name }}-values.yaml btrix ../../chart/
register: helm_result
changed_when: helm_result.rc == 0
tags: helm_upgrade

View File

@ -12,7 +12,7 @@ volume_storage_class:
# if set, set the node selector 'nodeType' to this crawling pods
# crawler_node_type:
registration_enabled: 0
registration_enabled: "0"
jwt_token_lifetime_minutes: 1440
# number of workers for backend api
@ -118,7 +118,7 @@ crawler_namespace: "crawlers"
crawl_retries: 1000
# browsertrix-crawler args:
crawler_args: "--timeout 90 --logging stats,behaviors,debug --generateWACZ --text --workers 4 --collection thecrawl --screencastPort 9037 --sizeLimit 100000000000 --timeLimit 18000 --healthCheckPort 6065 --waitOnDone"
crawler_args: "--timeout 120 --logging stats,behaviors,debug --generateWACZ --text --workers 4 --collection thecrawl --screencastPort 9037 --sizeLimit 100000000000 --timeLimit 18000 --healthCheckPort 6065 --waitOnDone --behaviorTimeout 300"
crawler_requests_cpu: "800m"
crawler_limits_cpu: "1200m"
@ -199,9 +199,9 @@ ingress:
# optionally enable signer
signer:
enabled: false
image: webrecorder/authsign:0.5.0
# host: <set to signer domain>
# cert_email: "test@example.com
# image: webrecorder/authsign:0.4.0
# image_pull_policy: "IfNotPresent"
# auth_token: <set to custom value>