ensure running crawl configmap is updated when exclusions are added/removed (#2409)

exclusions are already updated dynamically if crawler pod is running,
but when crawler pod is restarted, this ensures new exclusions are also
picked up:
- mount configmap in separate path, avoiding subPath, to allow dynamic
updates of mounted volume
- adds a lastConfigUpdate timestamp to CrawlJob - if lastConfigUpdate in
spec is different from current, the configmap is recreated by operator
- operator: also update image from channel avoid any issues with
updating crawler in channel
- only updates for exclusion add/remove so far, can later be expanded to
other crawler settings (see: #2355 for broader running crawl config
updates)
- fixes #2408
This commit is contained in:
Ilya Kreymer 2025-02-19 11:42:19 -08:00 committed by GitHub
parent 905fe059a4
commit 88a9f3baf7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 31 additions and 12 deletions

View File

@ -220,6 +220,12 @@ class CrawlManager(K8sAPI):
proxy_id=crawlconfig.proxyId or DEFAULT_PROXY_ID,
)
async def reload_running_crawl_config(self, crawl_id: str):
"""force reload of configmap for crawl"""
return await self._patch_job(
crawl_id, {"lastConfigUpdate": date_to_str(dt_now())}
)
async def create_qa_crawl_job(
self,
crawlconfig: CrawlConfig,

View File

@ -544,6 +544,8 @@ class CrawlOps(BaseCrawlOps):
regex, cid, org, user, add
)
await self.crawl_manager.reload_running_crawl_config(crawl.id)
await self.crawls.find_one_and_update(
{"_id": crawl_id, "type": "crawl", "oid": org.id},
{"$set": {"config": new_config.dict()}},

View File

@ -274,12 +274,9 @@ class CrawlOperator(BaseOperator):
params["storage_path"] = storage_path
params["storage_secret"] = storage_secret
# only resolve if not already set
# not automatically updating image for existing crawls
if not status.crawlerImage:
status.crawlerImage = self.crawl_config_ops.get_channel_crawler_image(
crawl.crawler_channel
)
status.crawlerImage = self.crawl_config_ops.get_channel_crawler_image(
crawl.crawler_channel
)
params["crawler_image"] = status.crawlerImage
@ -306,7 +303,16 @@ class CrawlOperator(BaseOperator):
else:
params["force_restart"] = False
children.extend(await self._load_crawl_configmap(crawl, data.children, params))
config_update_needed = (
spec.get("lastConfigUpdate", "") != status.lastConfigUpdate
)
status.lastConfigUpdate = spec.get("lastConfigUpdate", "")
children.extend(
await self._load_crawl_configmap(
crawl, data.children, params, config_update_needed
)
)
if crawl.qa_source_crawl_id:
params["qa_source_crawl_id"] = crawl.qa_source_crawl_id
@ -364,11 +370,13 @@ class CrawlOperator(BaseOperator):
return behaviors
async def _load_crawl_configmap(self, crawl: CrawlSpec, children, params):
async def _load_crawl_configmap(
self, crawl: CrawlSpec, children, params, config_update_needed: bool
):
name = f"crawl-config-{crawl.id}"
configmap = children[CMAP].get(name)
if configmap:
if configmap and not config_update_needed:
metadata = configmap["metadata"]
configmap["metadata"] = {
"name": metadata["name"],
@ -390,6 +398,9 @@ class CrawlOperator(BaseOperator):
params["config"] = json.dumps(raw_config)
if config_update_needed:
print(f"Updating config for {crawl.id}")
return self.load_from_yaml("crawl_configmap.yaml", params)
async def _load_qa_configmap(self, params, children):

View File

@ -209,6 +209,7 @@ class CrawlStatus(BaseModel):
stopReason: Optional[StopReason] = None
initRedis: bool = False
crawlerImage: Optional[str] = None
lastConfigUpdate: str = ""
lastActiveTime: str = ""
podStatus: DefaultDict[str, Annotated[PodInfo, Field(default_factory=PodInfo)]] = (

View File

@ -127,7 +127,7 @@ spec:
command:
- {{ "crawl" if not qa_source_crawl_id else "qa" }}
- --config
- /tmp/crawl-config.json
- /tmp/config/crawl-config.json
- --workers
- "{{ workers }}"
- --redisStoreUrl
@ -153,8 +153,7 @@ spec:
{% endif %}
volumeMounts:
- name: crawl-config
mountPath: /tmp/crawl-config.json
subPath: crawl-config.json
mountPath: /tmp/config/
readOnly: True
{% if qa_source_crawl_id %}