Backend: - add 'maxCrawlSize' to models and crawljob spec - add 'MAX_CRAWL_SIZE' to configmap - add maxCrawlSize to new crawlconfig + update APIs - operator: gracefully stop crawl if current size (from stats) exceeds maxCrawlSize - tests: add max crawl size tests Frontend: - Add Max Crawl Size text box Limits tab - Users enter max crawl size in GB, convert to bytes - Add BYTES_PER_GB as constant for converting to bytes - docs: Crawl Size Limit to user guide workflow setup section Operator Refactor: - use 'status.stopping' instead of 'crawl.stopping' to indicate crawl is being stopped, as changing later has no effect in operator - add is_crawl_stopping() to return if crawl is being stopped, based on crawl.stopping or size or time limit being reached - crawlerjob status: store byte size under 'size', human readable size under 'sizeHuman' for clarity - size stat always exists so remove unneeded conditional (defaults to 0) - store raw byte size in 'size', human readable size in 'sizeHuman' Charts: - subchart: update crawlerjob crd in btrix-crds to show status.stopping instead of spec.stopping - subchart: show 'sizeHuman' property instead of 'size' - bump subchart version to 0.1.1 --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
83 lines
1.9 KiB
YAML
83 lines
1.9 KiB
YAML
---
|
|
apiVersion: apiextensions.k8s.io/v1
|
|
kind: CustomResourceDefinition
|
|
metadata:
|
|
name: crawljobs.btrix.cloud
|
|
spec:
|
|
scope: Namespaced
|
|
group: btrix.cloud
|
|
names:
|
|
kind: CrawlJob
|
|
plural: crawljobs
|
|
singular: crawlob
|
|
shortNames:
|
|
- cjs
|
|
|
|
versions:
|
|
- name: v1
|
|
served: true
|
|
storage: true
|
|
subresources:
|
|
status: {}
|
|
scale:
|
|
specReplicasPath: .spec.scale
|
|
statusReplicasPath: .status.scale
|
|
|
|
schema:
|
|
openAPIV3Schema:
|
|
type: object
|
|
properties:
|
|
spec:
|
|
type: object
|
|
x-kubernetes-preserve-unknown-fields: true
|
|
|
|
status:
|
|
type: object
|
|
x-kubernetes-preserve-unknown-fields: true
|
|
|
|
additionalPrinterColumns:
|
|
- name: State
|
|
type: string
|
|
jsonPath: .status.state
|
|
description: Crawl State
|
|
|
|
- name: Pages Done
|
|
type: integer
|
|
jsonPath: .status.pagesDone
|
|
description: Pages Done
|
|
|
|
- name: Pages Found
|
|
type: integer
|
|
jsonPath: .status.pagesFound
|
|
description: Pages Found
|
|
|
|
- name: Size
|
|
type: string
|
|
jsonPath: .status.sizeHuman
|
|
description: Crawl Size
|
|
|
|
- name: Time Started
|
|
type: date
|
|
jsonPath: .metadata.creationTimestamp
|
|
description: "time crawl started"
|
|
|
|
- name: Time Finished
|
|
type: date
|
|
jsonPath: .status.finished
|
|
description: "if set, time crawl has finished"
|
|
|
|
- name: Stopping
|
|
type: boolean
|
|
jsonPath: .status.stopping
|
|
description: "if set, crawl is being stopped"
|
|
|
|
- name: Files Added
|
|
type: integer
|
|
jsonPath: .status.filesAdded
|
|
description: "number of WACZ files uploaded so far from this crawl"
|
|
|
|
- name: Scale
|
|
type: integer
|
|
jsonPath: .status.scale
|
|
description: Crawl Scale
|