cleanup crawl config, add separate 'runNow' field

crawler: add cpu/memory limits
minio: auto-create bucket for local minio
This commit is contained in:
Ilya Kreymer 2021-08-19 14:15:21 -07:00
parent eaa87c8b43
commit 627e9a6f14
6 changed files with 60 additions and 18 deletions

View File

@ -73,8 +73,9 @@ class CrawlConfigIn(BaseModel):
"""CrawlConfig input model, submitted via API"""
schedule: Optional[str] = ""
runNow: Optional[bool] = False
storageName: Optional[str] = "default"
#storageName: Optional[str] = "default"
config: RawCrawlConfig
@ -84,8 +85,9 @@ class CrawlConfig(BaseMongoModel):
"""Schedulable config"""
schedule: Optional[str] = ""
runNow: Optional[bool] = False
storageName: Optional[str] = "default"
#storageName: Optional[str] = "default"
archive: Optional[str]

View File

@ -1,3 +1,5 @@
# pylint: skip-file
from archives import Archive
from crawls import CrawlConfig

View File

@ -7,9 +7,6 @@ import json
from kubernetes_asyncio import client, config
# from fastapi.templating import Jinja2Templates
from jinja2 import Environment, FileSystemLoader
# ============================================================================
DEFAULT_NAMESPACE = os.environ.get("CRAWLER_NAMESPACE") or "crawlers"
@ -31,11 +28,6 @@ class K8SManager:
self.namespace = namespace
loader = FileSystemLoader("templates")
self.jinja_env = Environment(
loader=loader, autoescape=False, lstrip_blocks=False, trim_blocks=False
)
self.crawler_image = os.environ.get("CRAWLER_IMAGE")
self.crawler_image_pull_policy = "IfNotPresent"
@ -97,15 +89,17 @@ class K8SManager:
)
# Create Cron Job
run_now = False
schedule = crawlconfig.schedule
suspend = False
if not schedule or schedule == "now":
if schedule == "now":
run_now = True
schedule = crawlconfig.schedule
if not schedule:
schedule = DEFAULT_NO_SCHEDULE
suspend = True
run_now = False
if crawlconfig.runNow:
run_now = True
job_template = self._get_job_template(cid, labels, extra_crawl_params)
spec = client.V1beta1CronJobSpec(
@ -205,6 +199,23 @@ class K8SManager:
if extra_crawl_params:
command += extra_crawl_params
requests_memory = "256M"
limit_memory = "1G"
requests_cpu = "120m"
limit_cpu = "1000m"
resources = {
"limits": {
"cpu": limit_cpu,
"memory": limit_memory,
},
"requests": {
"cpu": requests_cpu,
"memory": requests_memory,
},
}
return {
"spec": {
"template": {
@ -227,6 +238,7 @@ class K8SManager:
"envFrom": [
{"secretRef": {"name": f"crawl-secret-{uid}"}}
],
"resources": resources
}
],
"volumes": [

View File

@ -1,6 +1,5 @@
uvicorn
fastapi-users[mongodb]==6.0.0
loguru
jinja2
aiofiles
kubernetes-asyncio

View File

@ -26,6 +26,8 @@ stringData:
{{- if .Values.minio_local }}
MINIO_ROOT_USER: "{{ .Values.storage.access_key }}"
MINIO_ROOT_PASSWORD: "{{ .Values.storage.secret_key }}"
MC_HOST: "{{ .Values.minio_scheme }}://{{ .Values.storage.access_key }}:{{ .Values.storage.secret_key }}@{{ .Values.minio_host }}"
{{- end }}
STORE_ACCESS_KEY: "{{ .Values.storage.access_key }}"
@ -52,6 +54,23 @@ spec:
app: {{ .Values.name }}
spec:
{{- if .Values.minio_local }}
initContainers:
- name: init-bucket
image: {{ .Values.minio_mc_image }}
imagePullPolicy: {{ .Values.minio_pull_policy }}
env:
- name: MC_HOST_local
valueFrom:
secretKeyRef:
name: storage-auth
key: MC_HOST
command: ['/bin/sh']
args: ['-c', 'mc mb local/test-bucket; mc policy set public local/test-bucket' ]
{{- end }}
containers:
- name: api
image: {{ .Values.api_image }}
@ -67,10 +86,12 @@ spec:
resources:
limits:
cpu: {{ .Values.api_limit_cpu }}
cpu: {{ .Values.api_limits_cpu }}
memory: {{ .Values.api_limits_memory }}
requests:
cpu: {{ .Values.api_requests_cpu }}
memory: {{ .Values.api_requests_memory }}
---

View File

@ -9,9 +9,11 @@ api_password_secret: "c9085f33ecce4347aa1d69339e16c499"
api_num_replicas: 1
api_limit_cpu: "100m"
api_requests_cpu: "25m"
api_limits_cpu: "100m"
api_requests_memory: "100M"
api_limits_memory: "256M"
# MongoDB Image
@ -66,7 +68,11 @@ storage:
# set to true to use a local minio image
minio_local: True
minio_scheme: "http"
minio_host: "local-minio.default:9000"
minio_image: minio/minio
minio_mc_image: minio/mc
minio_pull_policy: "IfNotPresent"