Supports running QA Runs via the QA API! Builds on top of the `issue-1498-crawl-qa-backend-support` branch, fixes #1498 Also requires the latest Browsertrix Crawler 1.1.0+ (from webrecorder/browsertrix-crawler#469 branch) Notable changes: - QARun objects contain info about QA runs, which are crawls performed on data loaded from existing crawls. - Various crawl db operations can be performed on either the crawl or `qa.` object, and core crawl fields have been moved to CoreCrawlable. - While running,`QARun` data stored in a single `qa` object, while finished qa runs are added to `qaFinished` dictionary on the Crawl. The QA list API returns data from the finished list, sorted by most recent first. - Includes additional type fixes / type safety, especially around BaseCrawl / Crawl / UploadedCrawl functionality, also creating specific get_upload(), get_basecrawl(), get_crawl() getters for internal use and get_crawl_out() for API - Support filtering and sorting pages via `qaFilterBy` (screenshotMatch, textMatch) along with `gt`, `lt`, `gte`, `lte` params to return pages based on QA results. --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
		
			
				
	
	
		
			104 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			104 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| apiVersion: batch/v1
 | |
| kind: Job
 | |
| metadata:
 | |
|   name: "{{ id }}"
 | |
|   labels:
 | |
|     role: "background-job"
 | |
|     job_type: {{ job_type }}
 | |
|     btrix.org: {{ oid }}
 | |
| 
 | |
| spec:
 | |
|   ttlSecondsAfterFinished: 0
 | |
|   backoffLimit: 3
 | |
|   template:
 | |
|     spec:
 | |
|       restartPolicy: Never
 | |
|       priorityClassName: bg-job
 | |
|       podFailurePolicy:
 | |
|         rules:
 | |
|         - action: FailJob
 | |
|           onExitCodes:
 | |
|             containerName: rclone
 | |
|             operator: NotIn
 | |
|             values: [0]
 | |
|       containers:
 | |
|       - name: rclone
 | |
|         image: rclone/rclone:latest
 | |
|         env:
 | |
| 
 | |
| {% if job_type == BgJobType.CREATE_REPLICA %}
 | |
|         - name: RCLONE_CONFIG_PRIMARY_TYPE
 | |
|           value: "s3"
 | |
| 
 | |
|         - name: RCLONE_CONFIG_PRIMARY_ACCESS_KEY_ID
 | |
|           valueFrom:
 | |
|             secretKeyRef:
 | |
|               name: "{{ primary_secret_name }}"
 | |
|               key: STORE_ACCESS_KEY
 | |
| 
 | |
|         - name: RCLONE_CONFIG_PRIMARY_SECRET_ACCESS_KEY
 | |
|           valueFrom:
 | |
|             secretKeyRef:
 | |
|               name: "{{ primary_secret_name }}"
 | |
|               key: STORE_SECRET_KEY
 | |
| 
 | |
|         - name: RCLONE_CONFIG_PRIMARY_REGION
 | |
|           valueFrom:
 | |
|             secretKeyRef:
 | |
|               name: "{{ primary_secret_name }}"
 | |
|               key: STORE_REGION
 | |
| 
 | |
|         - name: RCLONE_CONFIG_PRIMARY_PROVIDER
 | |
|           valueFrom:
 | |
|             secretKeyRef:
 | |
|               name: "{{ primary_secret_name }}"
 | |
|               key: STORE_S3_PROVIDER
 | |
| 
 | |
|         - name: RCLONE_CONFIG_PRIMARY_ENDPOINT
 | |
|           value: "{{ primary_endpoint }}"
 | |
| 
 | |
| {% endif %}
 | |
| 
 | |
|         - name: RCLONE_CONFIG_REPLICA_TYPE
 | |
|           value: "s3"
 | |
| 
 | |
|         - name: RCLONE_CONFIG_REPLICA_ACCESS_KEY_ID
 | |
|           valueFrom:
 | |
|             secretKeyRef:
 | |
|               name: "{{ replica_secret_name }}"
 | |
|               key: STORE_ACCESS_KEY
 | |
| 
 | |
|         - name: RCLONE_CONFIG_REPLICA_SECRET_ACCESS_KEY
 | |
|           valueFrom:
 | |
|             secretKeyRef:
 | |
|               name: "{{ replica_secret_name }}"
 | |
|               key: STORE_SECRET_KEY
 | |
| 
 | |
|         - name: RCLONE_CONFIG_REPLICA_REGION
 | |
|           valueFrom:
 | |
|             secretKeyRef:
 | |
|               name: "{{ replica_secret_name }}"
 | |
|               key: STORE_REGION
 | |
| 
 | |
|         - name: RCLONE_CONFIG_REPLICA_PROVIDER
 | |
|           valueFrom:
 | |
|             secretKeyRef:
 | |
|               name: "{{ replica_secret_name }}"
 | |
|               key: STORE_S3_PROVIDER
 | |
| 
 | |
|         - name: RCLONE_CONFIG_REPLICA_ENDPOINT
 | |
|           value: "{{ replica_endpoint }}"
 | |
| 
 | |
| {% if job_type == BgJobType.CREATE_REPLICA %}
 | |
|         command: ["rclone", "-vv", "copyto", "--checksum", "primary:{{ primary_file_path }}", "replica:{{ replica_file_path }}"]
 | |
| {% elif job_type == BgJobType.DELETE_REPLICA %}
 | |
|         command: ["rclone", "-vv", "delete", "replica:{{ replica_file_path }}"]
 | |
| {% endif %}
 | |
|         resources:
 | |
|           limits:
 | |
|             memory: "200Mi"
 | |
| 
 | |
|           requests:
 | |
|             memory: "200Mi"
 | |
|             cpu: "50m"
 |