browsertrix/frontend/frontend.conf.template
Ilya Kreymer ad9bca2e92
Operator refactor to control pods + pvcs directly instead of statefulsets (#1149)
- Ability for pod to be Completed, unlike in Statefulset - eg. if 3 pods are running and first one finishes, all 3 must be running until all 3 are done. With this setup, the first finished pod can remain in Completed state.
- Fixed shutdown order - crawler pods now correctly shutdown first before redis pods, by switching to background deletion.
- Pod priority decreases with scale: 1st instance of a new crawl can preempt 3rd or 2nd instance of another crawl
- Create priority classes upto 'max_crawl_scale, configured in values.yaml
- Improved scale change reconciliation: if increasing scale, immediately scale up. If decreasing scale,
graceful stop scaled-down instance to complete via redis 'stopone' key, wait until they exit with Completed state
before adjust status.scale / removing scaled down pods. Ensures unaccepted interrupts don't cause scaled down data to be deleted.
- Redis pod remains inactive until crawler is first active, or after no crawl pods are active for 60 seconds
- Configurable Redis storage with 'redis_storage' value, set to 3Gi by default
- CrawlJob deletion starts as soon as post-finish crawl operations are run
- Post-crawl operations get their own redis instance, since one during response is being cleaned up in finalizer
- Finalizer ignores request with incorrect state (returns 400 if reported as not finished while crawl is finished)
- Current resource usage added to status
- Profile browser: also manage single pod directly without statefulset for consistency.
- Restart pods via restartTime value: if spec.restartTime != status.restartTime, clear out pods and update status.restartTime (using OnDelete policy to avoid recreate loops in edge cases).
- Update to latest metacontroller (v4.11.0)
- Add --restartOnError flag for crawler (for browsertrix-crawler 0.11.0)
- Failed crawl logging: dd 'fail_crawl()' to be used for failing a crawl, which prints logs for default container (if enabled) as well as pod status
- tests: check other finished states to avoid stuck in infinite loop if crawl fails
- tests: disable disk utilization check, which adds unpredictability to crawl testing!
fixes #1147 

---------
Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
2023-09-11 10:38:04 -07:00

129 lines
3.3 KiB
Plaintext

include ./resolvers/resolvers.conf;
server {
listen 8880;
# health check for k8s
location /healthz {
return 200;
}
}
server {
listen 80 default_server;
server_name _;
proxy_buffering off;
proxy_buffers 16 64k;
proxy_buffer_size 64k;
root /usr/share/nginx/html;
index index.html index.htm;
error_page 500 501 502 503 504 /50x.html;
client_max_body_size 0;
merge_slashes off;
location = /50x.html {
root /usr/share/nginx/html;
}
# fallback to index for any page
error_page 404 /index.html;
location / {
root /usr/share/nginx/html;
index index.html index.htm;
}
# serve replay service worker, RWP_BASE_URL set in Dockerfile
location /replay/sw.js {
add_header Content-Type application/javascript;
return 200 'importScripts("${RWP_BASE_URL}sw.js");';
}
location /replay/ui.js {
add_header Content-Type application/javascript;
return 307 ${RWP_BASE_URL}ui.js;
}
# used by docker only: k8s deployment handles /api directly via ingress
location /api/ {
proxy_pass http://${BACKEND_HOST}:8000;
proxy_set_header Host $http_host;
proxy_set_header X-Forwarded-Proto $scheme;
}
location ~* /watch/([^/]+)/([^/]+)/([^/]+)/ws {
set $org $1;
set $crawl $2;
set $num $3;
set $auth_bearer $arg_auth_bearer;
set $svc_suffix ".crawler";
set $fqdn_suffix "${CRAWLER_FQDN_SUFFIX}";
auth_request /access_check;
proxy_pass http://crawl-$crawl-$num$svc_suffix$fqdn_suffix:9037/ws;
proxy_set_header Host "localhost";
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $http_connection;
}
location = /access_check {
internal;
proxy_pass http://${BACKEND_HOST}:8000/api/orgs/$org/crawls/$crawl/access?auth_bearer=$auth_bearer;
proxy_pass_request_body off;
proxy_set_header Content-Length "";
}
# redirect to bundled build of novnc
location ~* ^/browser/([^/]+)/core/rfb.js$ {
absolute_redirect off;
return 308 /js/novnc.js;
}
location ~* ^/browser/([^/]+)/ws$ {
set $browserid $1;
set $auth_bearer $arg_auth_bearer;
set $org $arg_oid;
set $fqdn_suffix "${CRAWLER_FQDN_SUFFIX}";
auth_request /access_check_profiles;
proxy_pass http://browser-$browserid.browser$fqdn_suffix:6080/websockify;
proxy_set_header Host "localhost";
proxy_send_timeout 10m;
proxy_read_timeout 10m;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $http_connection;
}
location ~* ^/browser/([^/]+)/$ {
set $browserid $1;
set $auth_bearer $arg_auth_bearer;
set $org $arg_oid;
set $fqdn_suffix "${CRAWLER_FQDN_SUFFIX}";
auth_request /access_check_profiles;
proxy_pass http://browser-$browserid.browser$fqdn_suffix:9223/vnc/;
proxy_set_header Host "localhost";
}
location = /access_check_profiles {
internal;
proxy_pass http://${BACKEND_HOST}:8000/api/orgs/$org/profiles/browser/$browserid/access?auth_bearer=$auth_bearer;
proxy_pass_request_body off;
proxy_set_header Content-Length "";
}
include ./includes/*.conf;
}