ensure running crawl configmap is updated when exclusions are added/removed (#2409)
exclusions are already updated dynamically if crawler pod is running, but when crawler pod is restarted, this ensures new exclusions are also picked up: - mount configmap in separate path, avoiding subPath, to allow dynamic updates of mounted volume - adds a lastConfigUpdate timestamp to CrawlJob - if lastConfigUpdate in spec is different from current, the configmap is recreated by operator - operator: also update image from channel avoid any issues with updating crawler in channel - only updates for exclusion add/remove so far, can later be expanded to other crawler settings (see: #2355 for broader running crawl config updates) - fixes #2408
This commit is contained in:
parent
905fe059a4
commit
88a9f3baf7
@ -220,6 +220,12 @@ class CrawlManager(K8sAPI):
|
||||
proxy_id=crawlconfig.proxyId or DEFAULT_PROXY_ID,
|
||||
)
|
||||
|
||||
async def reload_running_crawl_config(self, crawl_id: str):
|
||||
"""force reload of configmap for crawl"""
|
||||
return await self._patch_job(
|
||||
crawl_id, {"lastConfigUpdate": date_to_str(dt_now())}
|
||||
)
|
||||
|
||||
async def create_qa_crawl_job(
|
||||
self,
|
||||
crawlconfig: CrawlConfig,
|
||||
|
@ -544,6 +544,8 @@ class CrawlOps(BaseCrawlOps):
|
||||
regex, cid, org, user, add
|
||||
)
|
||||
|
||||
await self.crawl_manager.reload_running_crawl_config(crawl.id)
|
||||
|
||||
await self.crawls.find_one_and_update(
|
||||
{"_id": crawl_id, "type": "crawl", "oid": org.id},
|
||||
{"$set": {"config": new_config.dict()}},
|
||||
|
@ -274,12 +274,9 @@ class CrawlOperator(BaseOperator):
|
||||
params["storage_path"] = storage_path
|
||||
params["storage_secret"] = storage_secret
|
||||
|
||||
# only resolve if not already set
|
||||
# not automatically updating image for existing crawls
|
||||
if not status.crawlerImage:
|
||||
status.crawlerImage = self.crawl_config_ops.get_channel_crawler_image(
|
||||
crawl.crawler_channel
|
||||
)
|
||||
status.crawlerImage = self.crawl_config_ops.get_channel_crawler_image(
|
||||
crawl.crawler_channel
|
||||
)
|
||||
|
||||
params["crawler_image"] = status.crawlerImage
|
||||
|
||||
@ -306,7 +303,16 @@ class CrawlOperator(BaseOperator):
|
||||
else:
|
||||
params["force_restart"] = False
|
||||
|
||||
children.extend(await self._load_crawl_configmap(crawl, data.children, params))
|
||||
config_update_needed = (
|
||||
spec.get("lastConfigUpdate", "") != status.lastConfigUpdate
|
||||
)
|
||||
status.lastConfigUpdate = spec.get("lastConfigUpdate", "")
|
||||
|
||||
children.extend(
|
||||
await self._load_crawl_configmap(
|
||||
crawl, data.children, params, config_update_needed
|
||||
)
|
||||
)
|
||||
|
||||
if crawl.qa_source_crawl_id:
|
||||
params["qa_source_crawl_id"] = crawl.qa_source_crawl_id
|
||||
@ -364,11 +370,13 @@ class CrawlOperator(BaseOperator):
|
||||
|
||||
return behaviors
|
||||
|
||||
async def _load_crawl_configmap(self, crawl: CrawlSpec, children, params):
|
||||
async def _load_crawl_configmap(
|
||||
self, crawl: CrawlSpec, children, params, config_update_needed: bool
|
||||
):
|
||||
name = f"crawl-config-{crawl.id}"
|
||||
|
||||
configmap = children[CMAP].get(name)
|
||||
if configmap:
|
||||
if configmap and not config_update_needed:
|
||||
metadata = configmap["metadata"]
|
||||
configmap["metadata"] = {
|
||||
"name": metadata["name"],
|
||||
@ -390,6 +398,9 @@ class CrawlOperator(BaseOperator):
|
||||
|
||||
params["config"] = json.dumps(raw_config)
|
||||
|
||||
if config_update_needed:
|
||||
print(f"Updating config for {crawl.id}")
|
||||
|
||||
return self.load_from_yaml("crawl_configmap.yaml", params)
|
||||
|
||||
async def _load_qa_configmap(self, params, children):
|
||||
|
@ -209,6 +209,7 @@ class CrawlStatus(BaseModel):
|
||||
stopReason: Optional[StopReason] = None
|
||||
initRedis: bool = False
|
||||
crawlerImage: Optional[str] = None
|
||||
lastConfigUpdate: str = ""
|
||||
|
||||
lastActiveTime: str = ""
|
||||
podStatus: DefaultDict[str, Annotated[PodInfo, Field(default_factory=PodInfo)]] = (
|
||||
|
@ -127,7 +127,7 @@ spec:
|
||||
command:
|
||||
- {{ "crawl" if not qa_source_crawl_id else "qa" }}
|
||||
- --config
|
||||
- /tmp/crawl-config.json
|
||||
- /tmp/config/crawl-config.json
|
||||
- --workers
|
||||
- "{{ workers }}"
|
||||
- --redisStoreUrl
|
||||
@ -153,8 +153,7 @@ spec:
|
||||
{% endif %}
|
||||
volumeMounts:
|
||||
- name: crawl-config
|
||||
mountPath: /tmp/crawl-config.json
|
||||
subPath: crawl-config.json
|
||||
mountPath: /tmp/config/
|
||||
readOnly: True
|
||||
|
||||
{% if qa_source_crawl_id %}
|
||||
|
Loading…
Reference in New Issue
Block a user