* use metacontroller's decoratorcontroller to create CrawlJob from Job * scheduled job work: - use existing job name for scheduled crawljob - use suspended job, set startTime, completionTime and succeeded status on job when crawljob is done - simplify cronjob template: remove job_image, cron_namespace, using same namespace as crawls, placeholder job image for cronjobs * move storage quota check to crawljob handler: - add 'skipped_quota_reached' as new failed status type - check for storage quota before checking if crawljob can be started, fail if not (check before any pods/pvcs created) * frontend: - show all crawls in crawl workflow, no need to filter by status - add 'skipped_quota_reached' status, show as 'Skipped (Quota Reached)', render same as failed * migration: make release namespace available as DEFAULT_NAMESPACE, delete old cronjobs in DEFAULT_NAMESPACE and recreate in crawlers namespace with new template
		
			
				
	
	
		
			136 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			136 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| ---
 | |
| apiVersion: v1
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   name: {{ .Values.name }}-env-config
 | |
|   namespace: {{ .Release.Namespace }}
 | |
| 
 | |
| data:
 | |
|   APP_ORIGIN: {{ .Values.ingress.tls | ternary "https" "http" }}://{{ .Values.ingress.host | default "localhost:9870" }}
 | |
| 
 | |
|   CRAWLER_NAMESPACE: {{ .Values.crawler_namespace }}
 | |
| 
 | |
|   DEFAULT_NAMESPACE: {{ .Release.Namespace }}
 | |
| 
 | |
|   CRAWLER_FQDN_SUFFIX: ".{{ .Values.crawler_namespace }}.svc.cluster.local"
 | |
| 
 | |
|   DEFAULT_ORG: "{{ .Values.default_org }}"
 | |
| 
 | |
|   INVITE_EXPIRE_SECONDS: "{{ .Values.invite_expire_seconds }}"
 | |
| 
 | |
|   REGISTRATION_ENABLED: "{{ .Values.registration_enabled | default 0 }}"
 | |
| 
 | |
|   ALLOW_DUPE_INVITES: "{{ .Values.allow_dupe_invites | default 0 }}"
 | |
| 
 | |
|   JWT_TOKEN_LIFETIME_MINUTES: "{{ .Values.jwt_token_lifetime_minutes | default 60 }}"
 | |
| 
 | |
|   DEFAULT_BEHAVIOR_TIME_SECONDS: "{{ .Values.default_behavior_time_seconds }}"
 | |
| 
 | |
|   DEFAULT_PAGE_LOAD_TIME_SECONDS: "{{ .Values.default_page_load_time_seconds }}"
 | |
| 
 | |
|   DEFAULT_CRAWL_FILENAME_TEMPLATE: "{{ .Values.default_crawl_filename_template }}"
 | |
| 
 | |
|   MAX_PAGES_PER_CRAWL: "{{ .Values.max_pages_per_crawl | default 0 }}"
 | |
| 
 | |
|   IDLE_TIMEOUT: "{{ .Values.profile_browser_idle_seconds | default 60 }}"
 | |
| 
 | |
|   RERUN_LAST_MIGRATION: "{{ .Values.rerun_last_migration }}"
 | |
| 
 | |
|   PRESIGN_DURATION_MINUTES: "{{ .Values.storage_presign_duration_minutes | default 60 }}"
 | |
| 
 | |
|   FAST_RETRY_SECS: "{{ .Values.operator_fast_resync_secs | default 3 }}"
 | |
| 
 | |
|   MAX_CRAWL_SCALE: "{{ .Values.max_crawl_scale | default 3 }}"
 | |
|   
 | |
|   LOG_FAILED_CRAWL_LINES: "{{ .Values.log_failed_crawl_lines | default 0 }}"
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   name: shared-crawler-config
 | |
|   namespace: {{ .Values.crawler_namespace }}
 | |
| 
 | |
| data:
 | |
|   CRAWL_ARGS: >-
 | |
|     --workers {{ .Values.crawler_browser_instances | default 1 }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --userAgentSuffix {{ .Values.user_agent_suffix | quote }} --userAgent {{ .Values.user_agent | quote }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --collection thecrawl --screencastPort 9037 --logErrorsToRedis {{ .Values.crawler_extra_args }} --restartsOnError    
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   name: shared-job-config
 | |
|   #namespace: {{ .Values.crawler_namespace }}
 | |
|   namespace: {{ .Release.Namespace }}
 | |
| 
 | |
| 
 | |
| data:
 | |
|   config.yaml: |
 | |
|     namespace: {{ .Values.crawler_namespace }}
 | |
|     termination_grace_secs: "{{ .Values.grace_period_secs | default 600 }}"    
 | |
|  
 | |
|     volume_storage_class: "{{ .Values.volume_storage_class }}"
 | |
| 
 | |
|     # redis
 | |
|     redis_image: {{ .Values.redis_image }}
 | |
|     redis_image_pull_policy: {{ .Values.redis_pull_policy }}
 | |
| 
 | |
|     redis_cpu: "{{ .Values.redis_cpu }}"
 | |
| 
 | |
|     redis_memory: "{{ .Values.redis_memory }}"
 | |
| 
 | |
|     redis_storage: "{{ .Values.redis_storage }}"
 | |
| 
 | |
|     # crawler
 | |
|     crawler_image: {{ .Values.crawler_image }}
 | |
|     crawler_image_pull_policy: {{ .Values.crawler_pull_policy }}
 | |
| 
 | |
|     crawler_cpu_base: "{{ .Values.crawler_cpu_base }}"
 | |
|     crawler_memory_base: "{{ .Values.crawler_memory_base }}"
 | |
| 
 | |
|     crawler_extra_cpu_per_browser: "{{ .Values.crawler_extra_cpu_per_browser | default 0 }}"
 | |
|     crawler_extra_memory_per_browser: "{{ .Values.crawler_extra_memory_per_browser | default 0 }}"
 | |
| 
 | |
|     crawler_browser_instances: "{{ .Values.crawler_browser_instances }}"
 | |
| 
 | |
|     crawler_cpu: "{{ .Values.crawler_cpu }}"
 | |
|     crawler_memory: "{{ .Values.crawler_memory }}"
 | |
| 
 | |
|     crawler_storage: "{{ .Values.crawler_storage }}"
 | |
|  
 | |
|     volume_storage_class: "{{ .Values.volume_storage_class }}"
 | |
| 
 | |
|     crawler_liveness_port: "{{ .Values.crawler_liveness_port | default 0 }}"
 | |
| 
 | |
|     crawler_socks_proxy_host: "{{ .Values.crawler_socks_proxy_host }}"
 | |
|     crawler_socks_proxy_port: "{{ .Values.crawler_socks_proxy_port }}"
 | |
| 
 | |
|     crawler_node_type: "{{ .Values.crawler_node_type }}"
 | |
|     redis_node_type: "{{ .Values.redis_node_type }}"
 | |
| 
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   name: shared-redis-conf
 | |
|   namespace: {{ .Values.crawler_namespace }}
 | |
| 
 | |
| data:
 | |
|   redis.conf: |
 | |
|     appendonly yes
 | |
|     dir /data    
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: ConfigMap
 | |
| metadata:
 | |
|   name: nginx-config
 | |
|   namespace: {{ .Release.Namespace }}
 | |
| 
 | |
| data:
 | |
| {{ (.Files.Glob "*.conf").AsConfig | indent 2 }}
 | |
| 
 | |
| #{{ (.Files.Glob "frontend/*.*").AsConfig | indent 2 }}
 | |
| 
 |