Supports running QA Runs via the QA API! Builds on top of the `issue-1498-crawl-qa-backend-support` branch, fixes #1498 Also requires the latest Browsertrix Crawler 1.1.0+ (from webrecorder/browsertrix-crawler#469 branch) Notable changes: - QARun objects contain info about QA runs, which are crawls performed on data loaded from existing crawls. - Various crawl db operations can be performed on either the crawl or `qa.` object, and core crawl fields have been moved to CoreCrawlable. - While running,`QARun` data stored in a single `qa` object, while finished qa runs are added to `qaFinished` dictionary on the Crawl. The QA list API returns data from the finished list, sorted by most recent first. - Includes additional type fixes / type safety, especially around BaseCrawl / Crawl / UploadedCrawl functionality, also creating specific get_upload(), get_basecrawl(), get_crawl() getters for internal use and get_crawl_out() for API - Support filtering and sorting pages via `qaFilterBy` (screenshotMatch, textMatch) along with `gt`, `lt`, `gte`, `lte` params to return pages based on QA results. --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
143 lines
3.2 KiB
YAML
143 lines
3.2 KiB
YAML
---
|
|
apiVersion: metacontroller.k8s.io/v1alpha1
|
|
kind: CompositeController
|
|
metadata:
|
|
name: crawljobs-operator
|
|
spec:
|
|
generateSelector: false
|
|
resyncPeriodSeconds: {{ .Values.operator_resync_seconds | default 10 }}
|
|
parentResource:
|
|
apiVersion: btrix.cloud/v1
|
|
resource: crawljobs
|
|
childResources:
|
|
- apiVersion: v1
|
|
resource: pods
|
|
updateStrategy:
|
|
method: OnDelete
|
|
|
|
- apiVersion: v1
|
|
resource: persistentvolumeclaims
|
|
updateStrategy:
|
|
method: InPlace
|
|
|
|
- apiVersion: v1
|
|
resource: configmaps
|
|
updateStrategy:
|
|
method: OnDelete
|
|
|
|
hooks:
|
|
sync:
|
|
webhook:
|
|
service:
|
|
namespace: {{ .Release.Namespace }}
|
|
name: {{ .Values.name }}-backend
|
|
port: {{ .Values.opPort }}
|
|
path: /op/crawls/sync
|
|
|
|
finalize:
|
|
webhook:
|
|
service:
|
|
namespace: {{ .Release.Namespace }}
|
|
name: {{ .Values.name }}-backend
|
|
port: {{ .Values.opPort }}
|
|
path: /op/crawls/finalize
|
|
|
|
customize:
|
|
webhook:
|
|
service:
|
|
namespace: {{ .Release.Namespace }}
|
|
name: {{ .Values.name }}-backend
|
|
port: {{ .Values.opPort }}
|
|
path: /op/crawls/customize
|
|
|
|
---
|
|
apiVersion: metacontroller.k8s.io/v1alpha1
|
|
kind: CompositeController
|
|
metadata:
|
|
name: profilejobs-operator
|
|
spec:
|
|
generateSelector: false
|
|
resyncPeriodSeconds: 30
|
|
parentResource:
|
|
apiVersion: btrix.cloud/v1
|
|
resource: profilejobs
|
|
childResources:
|
|
- apiVersion: v1
|
|
resource: pods
|
|
updateStrategy:
|
|
method: InPlace
|
|
|
|
hooks:
|
|
sync:
|
|
webhook:
|
|
service:
|
|
namespace: {{ .Release.Namespace }}
|
|
name: {{ .Values.name }}-backend
|
|
port: {{ .Values.opPort }}
|
|
path: /op/profilebrowsers/sync
|
|
|
|
---
|
|
apiVersion: metacontroller.k8s.io/v1alpha1
|
|
kind: DecoratorController
|
|
metadata:
|
|
name: cron-crawljobs-operator
|
|
spec:
|
|
resyncPeriodSeconds: 30
|
|
resources:
|
|
- apiVersion: batch/v1
|
|
resource: jobs
|
|
labelSelector:
|
|
matchLabels:
|
|
role: scheduled-crawljob
|
|
|
|
attachments:
|
|
- apiVersion: btrix.cloud/v1
|
|
resource: crawljobs
|
|
updateStrategy:
|
|
method: InPlace
|
|
|
|
hooks:
|
|
sync:
|
|
webhook:
|
|
service:
|
|
namespace: {{ .Release.Namespace }}
|
|
name: {{ .Values.name }}-backend
|
|
port: {{ .Values.opPort }}
|
|
path: /op/cronjob/sync
|
|
|
|
customize:
|
|
webhook:
|
|
service:
|
|
namespace: {{ .Release.Namespace }}
|
|
name: {{ .Values.name }}-backend
|
|
port: {{ .Values.opPort }}
|
|
path: /op/cronjob/customize
|
|
---
|
|
apiVersion: metacontroller.k8s.io/v1alpha1
|
|
kind: DecoratorController
|
|
metadata:
|
|
name: background-job-operator
|
|
spec:
|
|
resyncPeriodSeconds: 30
|
|
resources:
|
|
- apiVersion: batch/v1
|
|
resource: jobs
|
|
labelSelector:
|
|
matchLabels:
|
|
role: background-job
|
|
hooks:
|
|
sync:
|
|
webhook:
|
|
service:
|
|
namespace: {{ .Release.Namespace }}
|
|
name: {{ .Values.name }}-backend
|
|
port: {{ .Values.opPort }}
|
|
path: /op/backgroundjob/sync
|
|
finalize:
|
|
webhook:
|
|
service:
|
|
namespace: {{ .Release.Namespace }}
|
|
name: {{ .Values.name }}-backend
|
|
port: {{ .Values.opPort }}
|
|
path: /op/backgroundjob/finalize
|