Supports running QA Runs via the QA API! Builds on top of the `issue-1498-crawl-qa-backend-support` branch, fixes #1498 Also requires the latest Browsertrix Crawler 1.1.0+ (from webrecorder/browsertrix-crawler#469 branch) Notable changes: - QARun objects contain info about QA runs, which are crawls performed on data loaded from existing crawls. - Various crawl db operations can be performed on either the crawl or `qa.` object, and core crawl fields have been moved to CoreCrawlable. - While running,`QARun` data stored in a single `qa` object, while finished qa runs are added to `qaFinished` dictionary on the Crawl. The QA list API returns data from the finished list, sorted by most recent first. - Includes additional type fixes / type safety, especially around BaseCrawl / Crawl / UploadedCrawl functionality, also creating specific get_upload(), get_basecrawl(), get_crawl() getters for internal use and get_crawl_out() for API - Support filtering and sorting pages via `qaFilterBy` (screenshotMatch, textMatch) along with `gt`, `lt`, `gte`, `lte` params to return pages based on QA results. --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
52 lines
1.1 KiB
YAML
52 lines
1.1 KiB
YAML
# test overrides
|
|
# --------------
|
|
|
|
# use local images built to :latest tag
|
|
backend_image: docker.io/webrecorder/browsertrix-backend:latest
|
|
frontend_image: docker.io/webrecorder/browsertrix-frontend:latest
|
|
|
|
backend_pull_policy: "Never"
|
|
frontend_pull_policy: "Never"
|
|
|
|
default_crawl_filename_template: "@ts-testing-@hostsuffix.wacz"
|
|
|
|
operator_resync_seconds: 3
|
|
|
|
# for testing only
|
|
crawler_extra_cpu_per_browser: 300m
|
|
|
|
crawler_extra_memory_per_browser: 256Mi
|
|
|
|
crawler_channels:
|
|
- id: default
|
|
image: "docker.io/webrecorder/browsertrix-crawler:latest"
|
|
|
|
- id: test
|
|
image: "docker.io/webrecorder/browsertrix-crawler:1.1.0-beta.1"
|
|
|
|
mongo_auth:
|
|
# specify either username + password (for local mongo)
|
|
username: root
|
|
password: PASSWORD@
|
|
|
|
|
|
superuser:
|
|
# set this to enable a superuser admin
|
|
email: admin@example.com
|
|
|
|
# optional: if not set, automatically generated
|
|
# change or remove this
|
|
password: PASSW0RD!
|
|
|
|
|
|
# test max pages per crawl global limit
|
|
max_pages_per_crawl: 4
|
|
|
|
registration_enabled: "0"
|
|
|
|
# log failed crawl pods to operator backend
|
|
log_failed_crawl_lines: 200
|
|
|
|
# disable for tests
|
|
disk_utilization_threshold: 0
|