diff --git a/backend/btrixcloud/operator/baseoperator.py b/backend/btrixcloud/operator/baseoperator.py index f108b1b7..47307224 100644 --- a/backend/btrixcloud/operator/baseoperator.py +++ b/backend/btrixcloud/operator/baseoperator.py @@ -47,20 +47,31 @@ class K8sOpAPI(K8sAPI): def compute_crawler_resources(self): """compute memory / cpu resources for crawlers""" p = self.shared_params - num = max(int(p["crawler_browser_instances"]) - 1, 0) + num_workers = max(int(p["crawler_browser_instances"]), 1) + try: + qa_num_workers = max(int(p["qa_browser_instances"]), 1) + # pylint: disable=bare-except + except: + # default to 1 for now for best results (to revisit in the future) + qa_num_workers = 1 crawler_cpu: float = 0 crawler_memory: int = 0 + qa_cpu: float = 0 + qa_memory: int = 0 print("crawler resources") if not p.get("crawler_cpu"): base = parse_quantity(p["crawler_cpu_base"]) extra = parse_quantity(p["crawler_extra_cpu_per_browser"]) # cpu is a floating value of cpu cores - crawler_cpu = float(base + num * extra) + crawler_cpu = float(base + (num_workers - 1) * extra) + qa_cpu = float(base + (qa_num_workers - 1) * extra) - print(f"cpu = {base} + {num} * {extra} = {crawler_cpu}") + print(f"cpu = {base} + {num_workers - 1} * {extra} = {crawler_cpu}") + print(f"qa_cpu = {base} + {qa_num_workers - 1} * {extra} = {qa_cpu}") else: crawler_cpu = float(parse_quantity(p["crawler_cpu"])) + qa_cpu = crawler_cpu print(f"cpu = {crawler_cpu}") if not p.get("crawler_memory"): @@ -68,11 +79,14 @@ class K8sOpAPI(K8sAPI): extra = parse_quantity(p["crawler_extra_memory_per_browser"]) # memory is always an int - crawler_memory = int(base + num * extra) + crawler_memory = int(base + (num_workers - 1) * extra) + qa_memory = int(base + (qa_num_workers - 1) * extra) - print(f"memory = {base} + {num} * {extra} = {crawler_memory}") + print(f"memory = {base} + {num_workers - 1} * {extra} = {crawler_memory}") + print(f"qa_memory = {base} + {qa_num_workers - 1} * {extra} = {qa_memory}") else: crawler_memory = int(parse_quantity(p["crawler_memory"])) + qa_memory = crawler_memory print(f"memory = {crawler_memory}") max_crawler_memory_size = 0 @@ -86,6 +100,10 @@ class K8sOpAPI(K8sAPI): p["crawler_cpu"] = crawler_cpu p["crawler_memory"] = crawler_memory + p["crawler_workers"] = num_workers + p["qa_cpu"] = qa_cpu + p["qa_memory"] = qa_memory + p["qa_workers"] = qa_num_workers def compute_profile_resources(self): """compute memory /cpu resources for a single profile browser""" diff --git a/backend/btrixcloud/operator/crawls.py b/backend/btrixcloud/operator/crawls.py index 62e75676..3f1a9ef9 100644 --- a/backend/btrixcloud/operator/crawls.py +++ b/backend/btrixcloud/operator/crawls.py @@ -335,22 +335,30 @@ class CrawlOperator(BaseOperator): name = f"crawl-{params['id']}-{i}" has_pod = name in children[POD] + if params.get("qa_source_crawl_id"): + cpu_field = "qa_cpu" + mem_field = "qa_memory" + worker_field = "qa_workers" + pri_class = f"qa-crawl-pri-{i}" + else: + cpu_field = "crawler_cpu" + mem_field = "crawler_memory" + worker_field = "crawler_workers" + pri_class = f"crawl-pri-{i}" + pod_info = status.podStatus[name] params["name"] = name - params["cpu"] = pod_info.newCpu or params.get("crawler_cpu") - params["memory"] = pod_info.newMemory or params.get("crawler_memory") + params["priorityClassName"] = pri_class + params["cpu"] = pod_info.newCpu or params.get(cpu_field) + params["memory"] = pod_info.newMemory or params.get(mem_field) params["memory_limit"] = float(params["memory"]) * MEM_LIMIT_PADDING + params["workers"] = params.get(worker_field) or 1 params["do_restart"] = ( pod_info.should_restart_pod() or params.get("force_restart") ) and has_pod if params.get("do_restart"): print(f"Restart {name}") - if params.get("qa_source_crawl_id"): - params["priorityClassName"] = f"qa-crawl-pri-{i}" - else: - params["priorityClassName"] = f"crawl-pri-{i}" - return self.load_from_yaml("crawler.yaml", params) # pylint: disable=too-many-arguments diff --git a/chart/app-templates/crawler.yaml b/chart/app-templates/crawler.yaml index 339eaadc..acd6e1b9 100644 --- a/chart/app-templates/crawler.yaml +++ b/chart/app-templates/crawler.yaml @@ -61,11 +61,12 @@ spec: volumes: - name: crawl-config configMap: - {% if not qa_source_crawl_id %} name: crawl-config-{{ cid }} - {% else %} + {% if qa_source_crawl_id %} + - name: qa-config + configMap: name: qa-replay-{{ qa_source_crawl_id }} - {% endif %} + {% endif %} - name: crawl-data persistentVolumeClaim: claimName: {{ name }} @@ -114,31 +115,33 @@ spec: image: {{ crawler_image }} imagePullPolicy: {{ crawler_image_pull_policy }} command: - {% if not qa_source_crawl_id %} - - crawl + - {{ "crawl" if not qa_source_crawl_id else "qa" }} - --config - /tmp/crawl-config.json + - --workers + - "{{ workers }}" - --redisStoreUrl - {{ redis_url }} - {%- if profile_filename %} + {% if qa_source_crawl_id %} + - --qaSource + - /tmp/qa-config.json + {% elif profile_filename %} - --profile - "@{{ profile_filename }}" - {%- endif %} - - {% else %} - - qa - - --qaSource - - /tmp/crawl-config.json - - --redisStoreUrl - - {{ redis_url }} - - --writePagesToRedis - {% endif %} + {% endif %} volumeMounts: - name: crawl-config mountPath: /tmp/crawl-config.json subPath: crawl-config.json readOnly: True + {% if qa_source_crawl_id %} + - name: qa-config + mountPath: /tmp/qa-config.json + subPath: qa-config.json + readOnly: True + {% endif %} + - name: crawl-data mountPath: /crawls envFrom: diff --git a/chart/app-templates/qa_configmap.yaml b/chart/app-templates/qa_configmap.yaml index 9fd9e405..dbff8439 100644 --- a/chart/app-templates/qa_configmap.yaml +++ b/chart/app-templates/qa_configmap.yaml @@ -11,4 +11,4 @@ metadata: role: crawler data: - crawl-config.json: {{ qa_source_replay_json | tojson }} + qa-config.json: {{ qa_source_replay_json | tojson }} diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index 6899dc8a..c7c21351 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -67,7 +67,7 @@ metadata: data: CRAWL_ARGS: >- - --workers {{ .Values.crawler_browser_instances | default 1 }} --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --collection thecrawl --screencastPort 9037 --logErrorsToRedis --writePagesToRedis --restartsOnError --headless --screenshot view,thumbnail {{ .Values.crawler_extra_args }} + --sizeLimit {{ .Values.crawler_session_size_limit_bytes }} --timeLimit {{ .Values.crawler_session_time_limit_seconds }} --maxPageLimit {{ .Values.max_pages_per_crawl | default 0 }} --healthCheckPort {{ .Values.crawler_liveness_port }} --diskUtilization {{ .Values.disk_utilization_threshold }} --logging {{ .Values.crawler_logging_opts }} --text {{ .Values.crawler_extract_full_text }} --generateWACZ --collection thecrawl --screencastPort 9037 --logErrorsToRedis --writePagesToRedis --restartsOnError --headless --screenshot view,thumbnail {{ .Values.crawler_extra_args }} --- apiVersion: v1 @@ -105,6 +105,7 @@ data: crawler_extra_memory_per_browser: "{{ .Values.crawler_extra_memory_per_browser | default 0 }}" crawler_browser_instances: "{{ .Values.crawler_browser_instances }}" + qa_browser_instances: "{{ .Values.qa_browser_instances }}" crawler_cpu: "{{ .Values.crawler_cpu }}" crawler_memory: "{{ .Values.crawler_memory }}"