- Refactors storage to support replicas + custom storages on the Org. - There is a default primary + replica storage, while an Org can also have primary and replica storages. - StorageRef object is used to store references to default and custom storage. - CrawlFile has been updated to contain a StorageRef instead of a def_storage_name, which references either a default storage (in StorageOps) or custom storage (in Organization) - There is also a 'replicas' Optional[List[StorageRef]] which contains replicas, if any. - CrawlFileOut contain a numReplicas for how many replicas exist for a given file. - Migration: migration 0020 added to migrate existing Orgs, CrawlFile and ProfileFile objects to new storage system (CrawlFile and ProfileFile now extend BaseFile) Part of #1262 --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
		
			
				
	
	
		
			139 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			139 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| ---
 | |
| apiVersion: v1
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   name: {{ .Values.name }}-env-config
 | |
|   namespace: {{ .Release.Namespace }}
 | |
| 
 | |
| data:
 | |
|   APP_ORIGIN: {{ .Values.ingress.tls | ternary "https" "http" }}://{{ .Values.ingress.host | default "localhost:9870" }}
 | |
| 
 | |
|   CRAWLER_NAMESPACE: {{ .Values.crawler_namespace }}
 | |
| 
 | |
|   DEFAULT_NAMESPACE: {{ .Release.Namespace }}
 | |
| 
 | |
|   CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}.svc.cluster.local"
 | |
| 
 | |
|   DEFAULT_ORG: "{{ .Values.default_org }}"
 | |
| 
 | |
|   INVITE_EXPIRE_SECONDS: "{{ .Values.invite_expire_seconds }}"
 | |
| 
 | |
|   REGISTRATION_ENABLED: "{{ .Values.registration_enabled | default 0 }}"
 | |
| 
 | |
|   ALLOW_DUPE_INVITES: "{{ .Values.allow_dupe_invites | default 0 }}"
 | |
| 
 | |
|   JWT_TOKEN_LIFETIME_MINUTES: "{{ .Values.jwt_token_lifetime_minutes | default 60 }}"
 | |
| 
 | |
|   DEFAULT_BEHAVIOR_TIME_SECONDS: "{{ .Values.default_behavior_time_seconds }}"
 | |
| 
 | |
|   DEFAULT_PAGE_LOAD_TIME_SECONDS: "{{ .Values.default_page_load_time_seconds }}"
 | |
| 
 | |
|   DEFAULT_CRAWL_FILENAME_TEMPLATE: "{{ .Values.default_crawl_filename_template }}"
 | |
| 
 | |
|   MAX_PAGES_PER_CRAWL: "{{ .Values.max_pages_per_crawl | default 0 }}"
 | |
| 
 | |
|   IDLE_TIMEOUT: "{{ .Values.profile_browser_idle_seconds | default 60 }}"
 | |
| 
 | |
|   RERUN_FROM_MIGRATION: "{{ .Values.rerun_from_migration }}"
 | |
| 
 | |
|   PRESIGN_DURATION_MINUTES: "{{ .Values.storage_presign_duration_minutes }}"
 | |
| 
 | |
|   FAST_RETRY_SECS: "{{ .Values.operator_fast_resync_secs | default 3 }}"
 | |
| 
 | |
|   MAX_CRAWL_SCALE: "{{ .Values.max_crawl_scale | default 3 }}"
 | |
|   
 | |
|   LOG_FAILED_CRAWL_LINES: "{{ .Values.log_failed_crawl_lines | default 0 }}"
 | |
| 
 | |
|   IS_LOCAL_MINIO: "{{ .Values.minio_local }}"
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   name: shared-crawler-config
 | |
|   namespace: {{ .Values.crawler_namespace }}
 | |
| 
 | |
| data:
 | |
|   CRAWL_ARGS: >-
 | |
|     --workers {{ .Values.crawler_browser_instances | default 1 }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --userAgentSuffix {{ .Values.user_agent_suffix | quote }} --userAgent {{ .Values.user_agent | quote }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --collection thecrawl --screencastPort 9037 --logErrorsToRedis --restartsOnError --headless {{ .Values.crawler_extra_args }}    
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   name: shared-job-config
 | |
|   #namespace: {{ .Values.crawler_namespace }}
 | |
|   namespace: {{ .Release.Namespace }}
 | |
| 
 | |
| 
 | |
| data:
 | |
|   config.yaml: |
 | |
|     namespace: {{ .Values.crawler_namespace }}
 | |
|     termination_grace_secs: "{{ .Values.grace_period_secs | default 600 }}"    
 | |
|  
 | |
|     volume_storage_class: "{{ .Values.volume_storage_class }}"
 | |
| 
 | |
|     # redis
 | |
|     redis_image: {{ .Values.redis_image }}
 | |
|     redis_image_pull_policy: {{ .Values.redis_pull_policy }}
 | |
| 
 | |
|     redis_cpu: "{{ .Values.redis_cpu }}"
 | |
| 
 | |
|     redis_memory: "{{ .Values.redis_memory }}"
 | |
| 
 | |
|     redis_storage: "{{ .Values.redis_storage }}"
 | |
| 
 | |
|     # crawler
 | |
|     crawler_image: {{ .Values.crawler_image }}
 | |
|     crawler_image_pull_policy: {{ .Values.crawler_pull_policy }}
 | |
| 
 | |
|     crawler_cpu_base: "{{ .Values.crawler_cpu_base }}"
 | |
|     crawler_memory_base: "{{ .Values.crawler_memory_base }}"
 | |
| 
 | |
|     crawler_extra_cpu_per_browser: "{{ .Values.crawler_extra_cpu_per_browser | default 0 }}"
 | |
|     crawler_extra_memory_per_browser: "{{ .Values.crawler_extra_memory_per_browser | default 0 }}"
 | |
| 
 | |
|     crawler_browser_instances: "{{ .Values.crawler_browser_instances }}"
 | |
| 
 | |
|     crawler_cpu: "{{ .Values.crawler_cpu }}"
 | |
|     crawler_memory: "{{ .Values.crawler_memory }}"
 | |
| 
 | |
|     crawler_storage: "{{ .Values.crawler_storage }}"
 | |
|  
 | |
|     volume_storage_class: "{{ .Values.volume_storage_class }}"
 | |
| 
 | |
|     crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}"
 | |
| 
 | |
|     crawler_socks_proxy_host: "{{ .Values.crawler_socks_proxy_host }}"
 | |
|     crawler_socks_proxy_port: "{{ .Values.crawler_socks_proxy_port }}"
 | |
| 
 | |
|     crawler_node_type: "{{ .Values.crawler_node_type }}"
 | |
|     redis_node_type: "{{ .Values.redis_node_type }}"
 | |
| 
 | |
|     signing_secret: {{ .Values.signer.enabled | ternary "signing-secret" "" }}
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   name: shared-redis-conf
 | |
|   namespace: {{ .Values.crawler_namespace }}
 | |
| 
 | |
| data:
 | |
|   redis.conf: |
 | |
|     appendonly yes
 | |
|     dir /data    
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   name: nginx-config
 | |
|   namespace: {{ .Release.Namespace }}
 | |
| 
 | |
| data:
 | |
| {{ (.Files.Glob "*.conf").AsConfig | indent 2 }}
 | |
| 
 | |
| #{{ (.Files.Glob "frontend/*.*").AsConfig | indent 2 }}
 | |
| 
 |