* basecrawl refactor: make crawls db more generic, supporting different types of 'base crawls': crawls, uploads, manual archives - move shared functionality to basecrawl.py - create a base BaseCrawl object, which contains start / finish time, metadata and files array - create BaseCrawlOps, base class for CrawlOps, which supports base crawl deletion, querying and collection add/remove * uploads api: (part of #929) - new UploadCrawl object which extends BaseCrawl, has name and description - support multipart form data data upload to /uploads/formdata - support streaming upload of a single file via /uploads/stream, using botocore multipart upload to upload to s3-endpoint in parts - require 'filename' param to set upload filename for streaming uploads (otherwise use form data names) - sanitize filename, place uploads in /uploads/<uuid>/<sanitized-filename>-<random>.wacz - uploads have internal id 'upload-<uuid>' - create UploadedCrawl object with CrawlFiles pointing to the newly uploaded files, set state to 'complete' - handle upload failures, abort multipart upload - ensure uploads added within org bucket path - return id / added when adding new UploadedCrawl - support listing, deleting, and patch /uploads - support upload details via /replay.json to support for replay - add support for 'replaceId=<id>', which would remove all previous files in upload after new upload succeeds. if replaceId doesn't exist, create new upload. (only for stream endpoint so far). - support patching upload metadata: notes, tags and name on uploads (UpdateUpload extends UpdateCrawl and adds 'name') * base crawls api: Add /all-crawls list and delete endpoints for all crawl types (without resources) - support all-crawls/<id>/replay.json with resources - Use ListCrawlOut model for /all-crawls list endpoint - Extend BaseCrawlOut from ListCrawlOut, add type - use 'type: crawl' for crawls and 'type: upload' for uploads - migration: ensure all previous crawl objects / missing type are set to 'type: crawl' - indexes: add db indices on 'type' field and with 'type' field and oid, cid, finished, state * tests: add test for multipart and streaming upload, listing uploads, deleting upload - add sample WACZ for upload testing: 'example.wacz' and 'example-2.wacz' * collections: support adding and remove both crawls and uploads via base crawl - include collection_ids in /all-crawls list - collections replay.json can include both crawls and uploads bump version to 1.6.0-beta.2 --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
		
			
				
	
	
		
			124 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			124 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| include ./resolvers/resolvers.conf;
 | |
| 
 | |
| server {
 | |
|     listen 8880;
 | |
| 
 | |
|     # health check for k8s
 | |
|     location /healthz {
 | |
|       return 200;
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| server {
 | |
|     listen 80 default_server;
 | |
|     server_name _;
 | |
|     proxy_buffering off;
 | |
|     proxy_buffers 16 64k;
 | |
|     proxy_buffer_size 64k;
 | |
|     root /usr/share/nginx/html;
 | |
|     index index.html index.htm;
 | |
| 
 | |
|     error_page 500 501 502 503 504 /50x.html;
 | |
| 
 | |
|     client_max_body_size 0;
 | |
| 
 | |
|     merge_slashes off;
 | |
|     location = /50x.html {
 | |
|         root /usr/share/nginx/html;
 | |
|     }
 | |
| 
 | |
|     # fallback to index for any page
 | |
|     error_page 404 /index.html;
 | |
| 
 | |
|     location / {
 | |
|       root   /usr/share/nginx/html;
 | |
|       index  index.html index.htm;
 | |
|     }
 | |
| 
 | |
|     # serve replay service worker, RWP_BASE_URL set in Dockerfile
 | |
|     location /replay/sw.js {
 | |
|       add_header Content-Type application/javascript;
 | |
|       return 200 'importScripts("${RWP_BASE_URL}sw.js");';
 | |
|     }
 | |
| 
 | |
|     # used by docker only: k8s deployment handles /api directly via ingress
 | |
|     location /api/ {
 | |
|       proxy_pass http://${BACKEND_HOST}:8000;
 | |
|       proxy_set_header Host $http_host;
 | |
|       proxy_set_header X-Forwarded-Proto $scheme;
 | |
|     }
 | |
| 
 | |
|     location ~* /watch/([^/]+)/([^/]+)/([^/]+)/ws {
 | |
|       set $org $1;
 | |
|       set $crawl $2;
 | |
|       set $num $3;
 | |
|       set $auth_bearer $arg_auth_bearer;
 | |
|       set $svc_suffix "${CRAWLER_SVC_SUFFIX}";
 | |
|       set $fqdn_suffix "${CRAWLER_FQDN_SUFFIX}";
 | |
| 
 | |
|       auth_request  /access_check;
 | |
| 
 | |
|       proxy_pass http://crawl-$crawl-$num$svc_suffix$fqdn_suffix:9037/ws;
 | |
|       proxy_set_header Host "localhost";
 | |
| 
 | |
|       proxy_http_version 1.1;
 | |
|       proxy_set_header Upgrade $http_upgrade;
 | |
|       proxy_set_header Connection $http_connection;
 | |
|     }
 | |
| 
 | |
|     location = /access_check {
 | |
|       internal;
 | |
|       proxy_pass http://${BACKEND_HOST}:8000/api/orgs/$org/crawls/$crawl/access?auth_bearer=$auth_bearer;
 | |
|       proxy_pass_request_body off;
 | |
|       proxy_set_header Content-Length "";
 | |
|     }
 | |
| 
 | |
|     # redirect to bundled build of novnc
 | |
|     location ~* ^/browser/([^/]+)/core/rfb.js$ {
 | |
|       absolute_redirect off;
 | |
|       return 308 /js/novnc.js;
 | |
|     }
 | |
| 
 | |
|     location ~* ^/browser/([^/]+)/ws$ {
 | |
|       set $browserid $1;
 | |
|       set $auth_bearer $arg_auth_bearer;
 | |
|       set $org $arg_oid;
 | |
|       set $fqdn_suffix "${CRAWLER_FQDN_SUFFIX}";
 | |
| 
 | |
|       auth_request  /access_check_profiles;
 | |
| 
 | |
|       proxy_pass http://browser-$browserid-0.browser-$browserid$fqdn_suffix:6080/websockify;
 | |
|       proxy_set_header Host "localhost";
 | |
| 
 | |
|       proxy_send_timeout 10m;
 | |
|       proxy_read_timeout 10m;
 | |
| 
 | |
|       proxy_http_version 1.1;
 | |
|       proxy_set_header Upgrade $http_upgrade;
 | |
|       proxy_set_header Connection $http_connection;
 | |
|     }
 | |
| 
 | |
|     location ~* ^/browser/([^/]+)/$ {
 | |
|       set $browserid $1;
 | |
|       set $auth_bearer $arg_auth_bearer;
 | |
|       set $org $arg_oid;
 | |
|       set $fqdn_suffix "${CRAWLER_FQDN_SUFFIX}";
 | |
| 
 | |
|       auth_request  /access_check_profiles;
 | |
| 
 | |
|       proxy_pass http://browser-$browserid-0.browser-$browserid$fqdn_suffix:9223/vnc/;
 | |
|       proxy_set_header Host "localhost";
 | |
|     }
 | |
| 
 | |
|     location = /access_check_profiles {
 | |
|       internal;
 | |
|       proxy_pass http://${BACKEND_HOST}:8000/api/orgs/$org/profiles/browser/$browserid/access?auth_bearer=$auth_bearer;
 | |
|       proxy_pass_request_body off;
 | |
|       proxy_set_header Content-Length "";
 | |
|     }
 | |
| 
 | |
|     include ./includes/*.conf;
 | |
| }
 | |
| 
 |