Supports running QA Runs via the QA API! Builds on top of the `issue-1498-crawl-qa-backend-support` branch, fixes #1498 Also requires the latest Browsertrix Crawler 1.1.0+ (from webrecorder/browsertrix-crawler#469 branch) Notable changes: - QARun objects contain info about QA runs, which are crawls performed on data loaded from existing crawls. - Various crawl db operations can be performed on either the crawl or `qa.` object, and core crawl fields have been moved to CoreCrawlable. - While running,`QARun` data stored in a single `qa` object, while finished qa runs are added to `qaFinished` dictionary on the Crawl. The QA list API returns data from the finished list, sorted by most recent first. - Includes additional type fixes / type safety, especially around BaseCrawl / Crawl / UploadedCrawl functionality, also creating specific get_upload(), get_basecrawl(), get_crawl() getters for internal use and get_crawl_out() for API - Support filtering and sorting pages via `qaFilterBy` (screenshotMatch, textMatch) along with `gt`, `lt`, `gte`, `lte` params to return pages based on QA results. --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
		
			
				
	
	
		
			143 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			143 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
---
 | 
						|
apiVersion: metacontroller.k8s.io/v1alpha1
 | 
						|
kind: CompositeController
 | 
						|
metadata:
 | 
						|
  name: crawljobs-operator
 | 
						|
spec:
 | 
						|
  generateSelector: false
 | 
						|
  resyncPeriodSeconds: {{ .Values.operator_resync_seconds | default 10 }}
 | 
						|
  parentResource:
 | 
						|
    apiVersion: btrix.cloud/v1
 | 
						|
    resource: crawljobs
 | 
						|
  childResources:
 | 
						|
  - apiVersion: v1
 | 
						|
    resource: pods
 | 
						|
    updateStrategy:
 | 
						|
      method: OnDelete
 | 
						|
 | 
						|
  - apiVersion: v1
 | 
						|
    resource: persistentvolumeclaims
 | 
						|
    updateStrategy:
 | 
						|
      method: InPlace
 | 
						|
 | 
						|
  - apiVersion: v1
 | 
						|
    resource: configmaps
 | 
						|
    updateStrategy:
 | 
						|
      method: OnDelete
 | 
						|
 | 
						|
  hooks:
 | 
						|
    sync:
 | 
						|
      webhook:
 | 
						|
        service:
 | 
						|
          namespace: {{ .Release.Namespace }}
 | 
						|
          name: {{ .Values.name }}-backend
 | 
						|
          port: {{ .Values.opPort }}
 | 
						|
        path: /op/crawls/sync
 | 
						|
 | 
						|
    finalize:
 | 
						|
      webhook:
 | 
						|
        service:
 | 
						|
          namespace: {{ .Release.Namespace }}
 | 
						|
          name: {{ .Values.name }}-backend
 | 
						|
          port: {{ .Values.opPort }}
 | 
						|
        path: /op/crawls/finalize
 | 
						|
 | 
						|
    customize:
 | 
						|
      webhook:
 | 
						|
        service:
 | 
						|
          namespace: {{ .Release.Namespace }}
 | 
						|
          name: {{ .Values.name }}-backend
 | 
						|
          port: {{ .Values.opPort }}
 | 
						|
        path: /op/crawls/customize
 | 
						|
 | 
						|
---
 | 
						|
apiVersion: metacontroller.k8s.io/v1alpha1
 | 
						|
kind: CompositeController
 | 
						|
metadata:
 | 
						|
  name: profilejobs-operator
 | 
						|
spec:
 | 
						|
  generateSelector: false
 | 
						|
  resyncPeriodSeconds: 30
 | 
						|
  parentResource:
 | 
						|
    apiVersion: btrix.cloud/v1
 | 
						|
    resource: profilejobs
 | 
						|
  childResources:
 | 
						|
  - apiVersion: v1
 | 
						|
    resource: pods
 | 
						|
    updateStrategy:
 | 
						|
      method: InPlace
 | 
						|
 
 | 
						|
  hooks:
 | 
						|
    sync:
 | 
						|
      webhook:
 | 
						|
        service:
 | 
						|
          namespace: {{ .Release.Namespace }}
 | 
						|
          name: {{ .Values.name }}-backend
 | 
						|
          port: {{ .Values.opPort }}
 | 
						|
        path: /op/profilebrowsers/sync
 | 
						|
 | 
						|
---
 | 
						|
apiVersion: metacontroller.k8s.io/v1alpha1
 | 
						|
kind: DecoratorController
 | 
						|
metadata:
 | 
						|
  name: cron-crawljobs-operator
 | 
						|
spec:
 | 
						|
  resyncPeriodSeconds: 30
 | 
						|
  resources:
 | 
						|
  - apiVersion: batch/v1
 | 
						|
    resource: jobs
 | 
						|
    labelSelector:
 | 
						|
      matchLabels:
 | 
						|
        role: scheduled-crawljob
 | 
						|
 | 
						|
  attachments:
 | 
						|
  - apiVersion: btrix.cloud/v1
 | 
						|
    resource: crawljobs
 | 
						|
    updateStrategy:
 | 
						|
      method: InPlace
 | 
						|
 | 
						|
  hooks:
 | 
						|
    sync:
 | 
						|
      webhook:
 | 
						|
        service:
 | 
						|
          namespace: {{ .Release.Namespace }}
 | 
						|
          name: {{ .Values.name }}-backend
 | 
						|
          port: {{ .Values.opPort }}
 | 
						|
        path: /op/cronjob/sync
 | 
						|
 | 
						|
    customize:
 | 
						|
      webhook:
 | 
						|
        service:
 | 
						|
          namespace: {{ .Release.Namespace }}
 | 
						|
          name: {{ .Values.name }}-backend
 | 
						|
          port: {{ .Values.opPort }}
 | 
						|
        path: /op/cronjob/customize
 | 
						|
---
 | 
						|
apiVersion: metacontroller.k8s.io/v1alpha1
 | 
						|
kind: DecoratorController
 | 
						|
metadata:
 | 
						|
  name: background-job-operator
 | 
						|
spec:
 | 
						|
  resyncPeriodSeconds: 30
 | 
						|
  resources:
 | 
						|
  - apiVersion: batch/v1
 | 
						|
    resource: jobs
 | 
						|
    labelSelector:
 | 
						|
      matchLabels:
 | 
						|
        role: background-job
 | 
						|
  hooks:
 | 
						|
    sync:
 | 
						|
      webhook:
 | 
						|
        service:
 | 
						|
          namespace: {{ .Release.Namespace }}
 | 
						|
          name: {{ .Values.name }}-backend
 | 
						|
          port: {{ .Values.opPort }}
 | 
						|
        path: /op/backgroundjob/sync
 | 
						|
    finalize:
 | 
						|
      webhook:
 | 
						|
        service:
 | 
						|
          namespace: {{ .Release.Namespace }}
 | 
						|
          name: {{ .Values.name }}-backend
 | 
						|
          port: {{ .Values.opPort }}
 | 
						|
        path: /op/backgroundjob/finalize
 |