- Refactors storage to support replicas + custom storages on the Org. - There is a default primary + replica storage, while an Org can also have primary and replica storages. - StorageRef object is used to store references to default and custom storage. - CrawlFile has been updated to contain a StorageRef instead of a def_storage_name, which references either a default storage (in StorageOps) or custom storage (in Organization) - There is also a 'replicas' Optional[List[StorageRef]] which contains replicas, if any. - CrawlFileOut contain a numReplicas for how many replicas exist for a given file. - Migration: migration 0020 added to migrate existing Orgs, CrawlFile and ProfileFile objects to new storage system (CrawlFile and ProfileFile now extend BaseFile) Part of #1262 --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
77 lines
1.7 KiB
YAML
77 lines
1.7 KiB
YAML
---
|
|
apiVersion: v1
|
|
kind: Pod
|
|
metadata:
|
|
name: browser-{{ id }}
|
|
namespace: {{ namespace }}
|
|
labels:
|
|
browser: {{ id }}
|
|
role: browser
|
|
|
|
spec:
|
|
hostname: browser-{{ id }}
|
|
subdomain: browser
|
|
|
|
{% if priorityClassName %}
|
|
priorityClassName: {{ priorityClassName }}
|
|
{% endif %}
|
|
|
|
restartPolicy: OnFailure
|
|
|
|
affinity:
|
|
nodeAffinity:
|
|
preferredDuringSchedulingIgnoredDuringExecution:
|
|
- weight: 1
|
|
preference:
|
|
matchExpressions:
|
|
- key: nodeType
|
|
operator: In
|
|
values:
|
|
- "{{ crawler_node_type }}"
|
|
|
|
tolerations:
|
|
- key: nodeType
|
|
operator: Equal
|
|
value: crawling
|
|
effect: NoSchedule
|
|
- key: node.kubernetes.io/not-ready
|
|
operator: Exists
|
|
tolerationSeconds: 300
|
|
effect: NoExecute
|
|
- key: node.kubernetes.io/unreachable
|
|
operator: Exists
|
|
effect: NoExecute
|
|
tolerationSeconds: 300
|
|
|
|
containers:
|
|
- name: browser
|
|
image: {{ crawler_image }}
|
|
imagePullPolicy: {{ crawler_image_pull_policy }}
|
|
command:
|
|
- create-login-profile
|
|
- --interactive
|
|
- --filename
|
|
- /tmp/profile.tar.gz
|
|
- --url
|
|
- {{ url }}
|
|
{%- if profile_filename %}
|
|
- --profile
|
|
- "@{{ profile_filename }}"
|
|
{%- endif %}
|
|
|
|
envFrom:
|
|
- secretRef:
|
|
name: {{ storage_secret }}
|
|
|
|
env:
|
|
- name: STORE_PATH
|
|
value: {{ storage_path }}
|
|
|
|
- name: VNC_PASS
|
|
value: {{ vnc_password }}
|
|
|
|
{% if crawler_socks_proxy_host %}
|
|
- name: CHROME_FLAGS
|
|
value: "--proxy-server=socks5://{{ crawler_socks_proxy_host }}:{{ crawler_socks_proxy_port | default('9050') }}"
|
|
{% endif %}
|