- Ability for pod to be Completed, unlike in Statefulset - eg. if 3 pods are running and first one finishes, all 3 must be running until all 3 are done. With this setup, the first finished pod can remain in Completed state. - Fixed shutdown order - crawler pods now correctly shutdown first before redis pods, by switching to background deletion. - Pod priority decreases with scale: 1st instance of a new crawl can preempt 3rd or 2nd instance of another crawl - Create priority classes upto 'max_crawl_scale, configured in values.yaml - Improved scale change reconciliation: if increasing scale, immediately scale up. If decreasing scale, graceful stop scaled-down instance to complete via redis 'stopone' key, wait until they exit with Completed state before adjust status.scale / removing scaled down pods. Ensures unaccepted interrupts don't cause scaled down data to be deleted. - Redis pod remains inactive until crawler is first active, or after no crawl pods are active for 60 seconds - Configurable Redis storage with 'redis_storage' value, set to 3Gi by default - CrawlJob deletion starts as soon as post-finish crawl operations are run - Post-crawl operations get their own redis instance, since one during response is being cleaned up in finalizer - Finalizer ignores request with incorrect state (returns 400 if reported as not finished while crawl is finished) - Current resource usage added to status - Profile browser: also manage single pod directly without statefulset for consistency. - Restart pods via restartTime value: if spec.restartTime != status.restartTime, clear out pods and update status.restartTime (using OnDelete policy to avoid recreate loops in edge cases). - Update to latest metacontroller (v4.11.0) - Add --restartOnError flag for crawler (for browsertrix-crawler 0.11.0) - Failed crawl logging: dd 'fail_crawl()' to be used for failing a crawl, which prints logs for default container (if enabled) as well as pod status - tests: check other finished states to avoid stuck in infinite loop if crawl fails - tests: disable disk utilization check, which adds unpredictability to crawl testing! fixes #1147 --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
129 lines
3.3 KiB
Plaintext
129 lines
3.3 KiB
Plaintext
include ./resolvers/resolvers.conf;
|
|
|
|
server {
|
|
listen 8880;
|
|
|
|
# health check for k8s
|
|
location /healthz {
|
|
return 200;
|
|
}
|
|
}
|
|
|
|
|
|
server {
|
|
listen 80 default_server;
|
|
server_name _;
|
|
proxy_buffering off;
|
|
proxy_buffers 16 64k;
|
|
proxy_buffer_size 64k;
|
|
root /usr/share/nginx/html;
|
|
index index.html index.htm;
|
|
|
|
error_page 500 501 502 503 504 /50x.html;
|
|
|
|
client_max_body_size 0;
|
|
|
|
merge_slashes off;
|
|
location = /50x.html {
|
|
root /usr/share/nginx/html;
|
|
}
|
|
|
|
# fallback to index for any page
|
|
error_page 404 /index.html;
|
|
|
|
location / {
|
|
root /usr/share/nginx/html;
|
|
index index.html index.htm;
|
|
}
|
|
|
|
# serve replay service worker, RWP_BASE_URL set in Dockerfile
|
|
location /replay/sw.js {
|
|
add_header Content-Type application/javascript;
|
|
return 200 'importScripts("${RWP_BASE_URL}sw.js");';
|
|
}
|
|
|
|
location /replay/ui.js {
|
|
add_header Content-Type application/javascript;
|
|
return 307 ${RWP_BASE_URL}ui.js;
|
|
}
|
|
|
|
# used by docker only: k8s deployment handles /api directly via ingress
|
|
location /api/ {
|
|
proxy_pass http://${BACKEND_HOST}:8000;
|
|
proxy_set_header Host $http_host;
|
|
proxy_set_header X-Forwarded-Proto $scheme;
|
|
}
|
|
|
|
location ~* /watch/([^/]+)/([^/]+)/([^/]+)/ws {
|
|
set $org $1;
|
|
set $crawl $2;
|
|
set $num $3;
|
|
set $auth_bearer $arg_auth_bearer;
|
|
set $svc_suffix ".crawler";
|
|
set $fqdn_suffix "${CRAWLER_FQDN_SUFFIX}";
|
|
|
|
auth_request /access_check;
|
|
|
|
proxy_pass http://crawl-$crawl-$num$svc_suffix$fqdn_suffix:9037/ws;
|
|
proxy_set_header Host "localhost";
|
|
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Upgrade $http_upgrade;
|
|
proxy_set_header Connection $http_connection;
|
|
}
|
|
|
|
location = /access_check {
|
|
internal;
|
|
proxy_pass http://${BACKEND_HOST}:8000/api/orgs/$org/crawls/$crawl/access?auth_bearer=$auth_bearer;
|
|
proxy_pass_request_body off;
|
|
proxy_set_header Content-Length "";
|
|
}
|
|
|
|
# redirect to bundled build of novnc
|
|
location ~* ^/browser/([^/]+)/core/rfb.js$ {
|
|
absolute_redirect off;
|
|
return 308 /js/novnc.js;
|
|
}
|
|
|
|
location ~* ^/browser/([^/]+)/ws$ {
|
|
set $browserid $1;
|
|
set $auth_bearer $arg_auth_bearer;
|
|
set $org $arg_oid;
|
|
set $fqdn_suffix "${CRAWLER_FQDN_SUFFIX}";
|
|
|
|
auth_request /access_check_profiles;
|
|
|
|
proxy_pass http://browser-$browserid.browser$fqdn_suffix:6080/websockify;
|
|
proxy_set_header Host "localhost";
|
|
|
|
proxy_send_timeout 10m;
|
|
proxy_read_timeout 10m;
|
|
|
|
proxy_http_version 1.1;
|
|
proxy_set_header Upgrade $http_upgrade;
|
|
proxy_set_header Connection $http_connection;
|
|
}
|
|
|
|
location ~* ^/browser/([^/]+)/$ {
|
|
set $browserid $1;
|
|
set $auth_bearer $arg_auth_bearer;
|
|
set $org $arg_oid;
|
|
set $fqdn_suffix "${CRAWLER_FQDN_SUFFIX}";
|
|
|
|
auth_request /access_check_profiles;
|
|
|
|
proxy_pass http://browser-$browserid.browser$fqdn_suffix:9223/vnc/;
|
|
proxy_set_header Host "localhost";
|
|
}
|
|
|
|
location = /access_check_profiles {
|
|
internal;
|
|
proxy_pass http://${BACKEND_HOST}:8000/api/orgs/$org/profiles/browser/$browserid/access?auth_bearer=$auth_bearer;
|
|
proxy_pass_request_body off;
|
|
proxy_set_header Content-Length "";
|
|
}
|
|
|
|
include ./includes/*.conf;
|
|
}
|
|
|