diff --git a/backend/btrixcloud/crawlmanager.py b/backend/btrixcloud/crawlmanager.py index 2b5f194a..1ea58e00 100644 --- a/backend/btrixcloud/crawlmanager.py +++ b/backend/btrixcloud/crawlmanager.py @@ -220,6 +220,12 @@ class CrawlManager(K8sAPI): proxy_id=crawlconfig.proxyId or DEFAULT_PROXY_ID, ) + async def reload_running_crawl_config(self, crawl_id: str): + """force reload of configmap for crawl""" + return await self._patch_job( + crawl_id, {"lastConfigUpdate": date_to_str(dt_now())} + ) + async def create_qa_crawl_job( self, crawlconfig: CrawlConfig, diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py index 87ea7e3a..2a89e04f 100644 --- a/backend/btrixcloud/crawls.py +++ b/backend/btrixcloud/crawls.py @@ -544,6 +544,8 @@ class CrawlOps(BaseCrawlOps): regex, cid, org, user, add ) + await self.crawl_manager.reload_running_crawl_config(crawl.id) + await self.crawls.find_one_and_update( {"_id": crawl_id, "type": "crawl", "oid": org.id}, {"$set": {"config": new_config.dict()}}, diff --git a/backend/btrixcloud/operator/crawls.py b/backend/btrixcloud/operator/crawls.py index 91976cad..172af560 100644 --- a/backend/btrixcloud/operator/crawls.py +++ b/backend/btrixcloud/operator/crawls.py @@ -274,12 +274,9 @@ class CrawlOperator(BaseOperator): params["storage_path"] = storage_path params["storage_secret"] = storage_secret - # only resolve if not already set - # not automatically updating image for existing crawls - if not status.crawlerImage: - status.crawlerImage = self.crawl_config_ops.get_channel_crawler_image( - crawl.crawler_channel - ) + status.crawlerImage = self.crawl_config_ops.get_channel_crawler_image( + crawl.crawler_channel + ) params["crawler_image"] = status.crawlerImage @@ -306,7 +303,16 @@ class CrawlOperator(BaseOperator): else: params["force_restart"] = False - children.extend(await self._load_crawl_configmap(crawl, data.children, params)) + config_update_needed = ( + spec.get("lastConfigUpdate", "") != status.lastConfigUpdate + ) + status.lastConfigUpdate = spec.get("lastConfigUpdate", "") + + children.extend( + await self._load_crawl_configmap( + crawl, data.children, params, config_update_needed + ) + ) if crawl.qa_source_crawl_id: params["qa_source_crawl_id"] = crawl.qa_source_crawl_id @@ -364,11 +370,13 @@ class CrawlOperator(BaseOperator): return behaviors - async def _load_crawl_configmap(self, crawl: CrawlSpec, children, params): + async def _load_crawl_configmap( + self, crawl: CrawlSpec, children, params, config_update_needed: bool + ): name = f"crawl-config-{crawl.id}" configmap = children[CMAP].get(name) - if configmap: + if configmap and not config_update_needed: metadata = configmap["metadata"] configmap["metadata"] = { "name": metadata["name"], @@ -390,6 +398,9 @@ class CrawlOperator(BaseOperator): params["config"] = json.dumps(raw_config) + if config_update_needed: + print(f"Updating config for {crawl.id}") + return self.load_from_yaml("crawl_configmap.yaml", params) async def _load_qa_configmap(self, params, children): diff --git a/backend/btrixcloud/operator/models.py b/backend/btrixcloud/operator/models.py index 9f511ee7..3bd449d8 100644 --- a/backend/btrixcloud/operator/models.py +++ b/backend/btrixcloud/operator/models.py @@ -209,6 +209,7 @@ class CrawlStatus(BaseModel): stopReason: Optional[StopReason] = None initRedis: bool = False crawlerImage: Optional[str] = None + lastConfigUpdate: str = "" lastActiveTime: str = "" podStatus: DefaultDict[str, Annotated[PodInfo, Field(default_factory=PodInfo)]] = ( diff --git a/chart/app-templates/crawler.yaml b/chart/app-templates/crawler.yaml index a00d4af3..566ce4f0 100644 --- a/chart/app-templates/crawler.yaml +++ b/chart/app-templates/crawler.yaml @@ -127,7 +127,7 @@ spec: command: - {{ "crawl" if not qa_source_crawl_id else "qa" }} - --config - - /tmp/crawl-config.json + - /tmp/config/crawl-config.json - --workers - "{{ workers }}" - --redisStoreUrl @@ -153,8 +153,7 @@ spec: {% endif %} volumeMounts: - name: crawl-config - mountPath: /tmp/crawl-config.json - subPath: crawl-config.json + mountPath: /tmp/config/ readOnly: True {% if qa_source_crawl_id %}