Backend: - add 'maxCrawlSize' to models and crawljob spec - add 'MAX_CRAWL_SIZE' to configmap - add maxCrawlSize to new crawlconfig + update APIs - operator: gracefully stop crawl if current size (from stats) exceeds maxCrawlSize - tests: add max crawl size tests Frontend: - Add Max Crawl Size text box Limits tab - Users enter max crawl size in GB, convert to bytes - Add BYTES_PER_GB as constant for converting to bytes - docs: Crawl Size Limit to user guide workflow setup section Operator Refactor: - use 'status.stopping' instead of 'crawl.stopping' to indicate crawl is being stopped, as changing later has no effect in operator - add is_crawl_stopping() to return if crawl is being stopped, based on crawl.stopping or size or time limit being reached - crawlerjob status: store byte size under 'size', human readable size under 'sizeHuman' for clarity - size stat always exists so remove unneeded conditional (defaults to 0) - store raw byte size in 'size', human readable size in 'sizeHuman' Charts: - subchart: update crawlerjob crd in btrix-crds to show status.stopping instead of spec.stopping - subchart: show 'sizeHuman' property instead of 'size' - bump subchart version to 0.1.1 --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
		
			
				
	
	
		
			83 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			83 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| ---
 | |
| apiVersion: apiextensions.k8s.io/v1
 | |
| kind: CustomResourceDefinition
 | |
| metadata:
 | |
|   name: crawljobs.btrix.cloud
 | |
| spec:
 | |
|   scope: Namespaced
 | |
|   group: btrix.cloud
 | |
|   names:
 | |
|     kind: CrawlJob
 | |
|     plural: crawljobs
 | |
|     singular: crawlob
 | |
|     shortNames:
 | |
|       - cjs
 | |
| 
 | |
|   versions:
 | |
|     - name: v1
 | |
|       served: true
 | |
|       storage: true
 | |
|       subresources:
 | |
|         status: {}
 | |
|         scale:
 | |
|           specReplicasPath: .spec.scale
 | |
|           statusReplicasPath: .status.scale
 | |
| 
 | |
|       schema:
 | |
|         openAPIV3Schema:
 | |
|           type: object
 | |
|           properties:
 | |
|             spec:
 | |
|               type: object
 | |
|               x-kubernetes-preserve-unknown-fields: true
 | |
| 
 | |
|             status:
 | |
|               type: object
 | |
|               x-kubernetes-preserve-unknown-fields: true
 | |
| 
 | |
|       additionalPrinterColumns:
 | |
|         - name: State
 | |
|           type: string
 | |
|           jsonPath: .status.state
 | |
|           description: Crawl State
 | |
| 
 | |
|         - name: Pages Done
 | |
|           type: integer
 | |
|           jsonPath: .status.pagesDone
 | |
|           description: Pages Done
 | |
| 
 | |
|         - name: Pages Found
 | |
|           type: integer
 | |
|           jsonPath: .status.pagesFound
 | |
|           description: Pages Found
 | |
| 
 | |
|         - name: Size
 | |
|           type: string
 | |
|           jsonPath: .status.sizeHuman
 | |
|           description: Crawl Size
 | |
| 
 | |
|         - name: Time Started
 | |
|           type: date
 | |
|           jsonPath: .metadata.creationTimestamp
 | |
|           description: "time crawl started"
 | |
| 
 | |
|         - name: Time Finished
 | |
|           type: date
 | |
|           jsonPath: .status.finished
 | |
|           description: "if set, time crawl has finished"
 | |
| 
 | |
|         - name: Stopping
 | |
|           type: boolean
 | |
|           jsonPath: .status.stopping
 | |
|           description: "if set, crawl is being stopped"
 | |
| 
 | |
|         - name: Files Added
 | |
|           type: integer
 | |
|           jsonPath: .status.filesAdded
 | |
|           description: "number of WACZ files uploaded so far from this crawl"
 | |
| 
 | |
|         - name: Scale
 | |
|           type: integer
 | |
|           jsonPath: .status.scale
 | |
|           description: Crawl Scale
 |