Supports running QA Runs via the QA API! Builds on top of the `issue-1498-crawl-qa-backend-support` branch, fixes #1498 Also requires the latest Browsertrix Crawler 1.1.0+ (from webrecorder/browsertrix-crawler#469 branch) Notable changes: - QARun objects contain info about QA runs, which are crawls performed on data loaded from existing crawls. - Various crawl db operations can be performed on either the crawl or `qa.` object, and core crawl fields have been moved to CoreCrawlable. - While running,`QARun` data stored in a single `qa` object, while finished qa runs are added to `qaFinished` dictionary on the Crawl. The QA list API returns data from the finished list, sorted by most recent first. - Includes additional type fixes / type safety, especially around BaseCrawl / Crawl / UploadedCrawl functionality, also creating specific get_upload(), get_basecrawl(), get_crawl() getters for internal use and get_crawl_out() for API - Support filtering and sorting pages via `qaFilterBy` (screenshotMatch, textMatch) along with `gt`, `lt`, `gte`, `lte` params to return pages based on QA results. --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
104 lines
2.8 KiB
YAML
104 lines
2.8 KiB
YAML
apiVersion: batch/v1
|
|
kind: Job
|
|
metadata:
|
|
name: "{{ id }}"
|
|
labels:
|
|
role: "background-job"
|
|
job_type: {{ job_type }}
|
|
btrix.org: {{ oid }}
|
|
|
|
spec:
|
|
ttlSecondsAfterFinished: 0
|
|
backoffLimit: 3
|
|
template:
|
|
spec:
|
|
restartPolicy: Never
|
|
priorityClassName: bg-job
|
|
podFailurePolicy:
|
|
rules:
|
|
- action: FailJob
|
|
onExitCodes:
|
|
containerName: rclone
|
|
operator: NotIn
|
|
values: [0]
|
|
containers:
|
|
- name: rclone
|
|
image: rclone/rclone:latest
|
|
env:
|
|
|
|
{% if job_type == BgJobType.CREATE_REPLICA %}
|
|
- name: RCLONE_CONFIG_PRIMARY_TYPE
|
|
value: "s3"
|
|
|
|
- name: RCLONE_CONFIG_PRIMARY_ACCESS_KEY_ID
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: "{{ primary_secret_name }}"
|
|
key: STORE_ACCESS_KEY
|
|
|
|
- name: RCLONE_CONFIG_PRIMARY_SECRET_ACCESS_KEY
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: "{{ primary_secret_name }}"
|
|
key: STORE_SECRET_KEY
|
|
|
|
- name: RCLONE_CONFIG_PRIMARY_REGION
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: "{{ primary_secret_name }}"
|
|
key: STORE_REGION
|
|
|
|
- name: RCLONE_CONFIG_PRIMARY_PROVIDER
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: "{{ primary_secret_name }}"
|
|
key: STORE_S3_PROVIDER
|
|
|
|
- name: RCLONE_CONFIG_PRIMARY_ENDPOINT
|
|
value: "{{ primary_endpoint }}"
|
|
|
|
{% endif %}
|
|
|
|
- name: RCLONE_CONFIG_REPLICA_TYPE
|
|
value: "s3"
|
|
|
|
- name: RCLONE_CONFIG_REPLICA_ACCESS_KEY_ID
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: "{{ replica_secret_name }}"
|
|
key: STORE_ACCESS_KEY
|
|
|
|
- name: RCLONE_CONFIG_REPLICA_SECRET_ACCESS_KEY
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: "{{ replica_secret_name }}"
|
|
key: STORE_SECRET_KEY
|
|
|
|
- name: RCLONE_CONFIG_REPLICA_REGION
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: "{{ replica_secret_name }}"
|
|
key: STORE_REGION
|
|
|
|
- name: RCLONE_CONFIG_REPLICA_PROVIDER
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: "{{ replica_secret_name }}"
|
|
key: STORE_S3_PROVIDER
|
|
|
|
- name: RCLONE_CONFIG_REPLICA_ENDPOINT
|
|
value: "{{ replica_endpoint }}"
|
|
|
|
{% if job_type == BgJobType.CREATE_REPLICA %}
|
|
command: ["rclone", "-vv", "copyto", "--checksum", "primary:{{ primary_file_path }}", "replica:{{ replica_file_path }}"]
|
|
{% elif job_type == BgJobType.DELETE_REPLICA %}
|
|
command: ["rclone", "-vv", "delete", "replica:{{ replica_file_path }}"]
|
|
{% endif %}
|
|
resources:
|
|
limits:
|
|
memory: "200Mi"
|
|
|
|
requests:
|
|
memory: "200Mi"
|
|
cpu: "50m"
|