ensure running crawl configmap is updated when exclusions are added/removed (#2409)
exclusions are already updated dynamically if crawler pod is running, but when crawler pod is restarted, this ensures new exclusions are also picked up: - mount configmap in separate path, avoiding subPath, to allow dynamic updates of mounted volume - adds a lastConfigUpdate timestamp to CrawlJob - if lastConfigUpdate in spec is different from current, the configmap is recreated by operator - operator: also update image from channel avoid any issues with updating crawler in channel - only updates for exclusion add/remove so far, can later be expanded to other crawler settings (see: #2355 for broader running crawl config updates) - fixes #2408
This commit is contained in:
parent
905fe059a4
commit
88a9f3baf7
@ -220,6 +220,12 @@ class CrawlManager(K8sAPI):
|
|||||||
proxy_id=crawlconfig.proxyId or DEFAULT_PROXY_ID,
|
proxy_id=crawlconfig.proxyId or DEFAULT_PROXY_ID,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def reload_running_crawl_config(self, crawl_id: str):
|
||||||
|
"""force reload of configmap for crawl"""
|
||||||
|
return await self._patch_job(
|
||||||
|
crawl_id, {"lastConfigUpdate": date_to_str(dt_now())}
|
||||||
|
)
|
||||||
|
|
||||||
async def create_qa_crawl_job(
|
async def create_qa_crawl_job(
|
||||||
self,
|
self,
|
||||||
crawlconfig: CrawlConfig,
|
crawlconfig: CrawlConfig,
|
||||||
|
@ -544,6 +544,8 @@ class CrawlOps(BaseCrawlOps):
|
|||||||
regex, cid, org, user, add
|
regex, cid, org, user, add
|
||||||
)
|
)
|
||||||
|
|
||||||
|
await self.crawl_manager.reload_running_crawl_config(crawl.id)
|
||||||
|
|
||||||
await self.crawls.find_one_and_update(
|
await self.crawls.find_one_and_update(
|
||||||
{"_id": crawl_id, "type": "crawl", "oid": org.id},
|
{"_id": crawl_id, "type": "crawl", "oid": org.id},
|
||||||
{"$set": {"config": new_config.dict()}},
|
{"$set": {"config": new_config.dict()}},
|
||||||
|
@ -274,12 +274,9 @@ class CrawlOperator(BaseOperator):
|
|||||||
params["storage_path"] = storage_path
|
params["storage_path"] = storage_path
|
||||||
params["storage_secret"] = storage_secret
|
params["storage_secret"] = storage_secret
|
||||||
|
|
||||||
# only resolve if not already set
|
status.crawlerImage = self.crawl_config_ops.get_channel_crawler_image(
|
||||||
# not automatically updating image for existing crawls
|
crawl.crawler_channel
|
||||||
if not status.crawlerImage:
|
)
|
||||||
status.crawlerImage = self.crawl_config_ops.get_channel_crawler_image(
|
|
||||||
crawl.crawler_channel
|
|
||||||
)
|
|
||||||
|
|
||||||
params["crawler_image"] = status.crawlerImage
|
params["crawler_image"] = status.crawlerImage
|
||||||
|
|
||||||
@ -306,7 +303,16 @@ class CrawlOperator(BaseOperator):
|
|||||||
else:
|
else:
|
||||||
params["force_restart"] = False
|
params["force_restart"] = False
|
||||||
|
|
||||||
children.extend(await self._load_crawl_configmap(crawl, data.children, params))
|
config_update_needed = (
|
||||||
|
spec.get("lastConfigUpdate", "") != status.lastConfigUpdate
|
||||||
|
)
|
||||||
|
status.lastConfigUpdate = spec.get("lastConfigUpdate", "")
|
||||||
|
|
||||||
|
children.extend(
|
||||||
|
await self._load_crawl_configmap(
|
||||||
|
crawl, data.children, params, config_update_needed
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if crawl.qa_source_crawl_id:
|
if crawl.qa_source_crawl_id:
|
||||||
params["qa_source_crawl_id"] = crawl.qa_source_crawl_id
|
params["qa_source_crawl_id"] = crawl.qa_source_crawl_id
|
||||||
@ -364,11 +370,13 @@ class CrawlOperator(BaseOperator):
|
|||||||
|
|
||||||
return behaviors
|
return behaviors
|
||||||
|
|
||||||
async def _load_crawl_configmap(self, crawl: CrawlSpec, children, params):
|
async def _load_crawl_configmap(
|
||||||
|
self, crawl: CrawlSpec, children, params, config_update_needed: bool
|
||||||
|
):
|
||||||
name = f"crawl-config-{crawl.id}"
|
name = f"crawl-config-{crawl.id}"
|
||||||
|
|
||||||
configmap = children[CMAP].get(name)
|
configmap = children[CMAP].get(name)
|
||||||
if configmap:
|
if configmap and not config_update_needed:
|
||||||
metadata = configmap["metadata"]
|
metadata = configmap["metadata"]
|
||||||
configmap["metadata"] = {
|
configmap["metadata"] = {
|
||||||
"name": metadata["name"],
|
"name": metadata["name"],
|
||||||
@ -390,6 +398,9 @@ class CrawlOperator(BaseOperator):
|
|||||||
|
|
||||||
params["config"] = json.dumps(raw_config)
|
params["config"] = json.dumps(raw_config)
|
||||||
|
|
||||||
|
if config_update_needed:
|
||||||
|
print(f"Updating config for {crawl.id}")
|
||||||
|
|
||||||
return self.load_from_yaml("crawl_configmap.yaml", params)
|
return self.load_from_yaml("crawl_configmap.yaml", params)
|
||||||
|
|
||||||
async def _load_qa_configmap(self, params, children):
|
async def _load_qa_configmap(self, params, children):
|
||||||
|
@ -209,6 +209,7 @@ class CrawlStatus(BaseModel):
|
|||||||
stopReason: Optional[StopReason] = None
|
stopReason: Optional[StopReason] = None
|
||||||
initRedis: bool = False
|
initRedis: bool = False
|
||||||
crawlerImage: Optional[str] = None
|
crawlerImage: Optional[str] = None
|
||||||
|
lastConfigUpdate: str = ""
|
||||||
|
|
||||||
lastActiveTime: str = ""
|
lastActiveTime: str = ""
|
||||||
podStatus: DefaultDict[str, Annotated[PodInfo, Field(default_factory=PodInfo)]] = (
|
podStatus: DefaultDict[str, Annotated[PodInfo, Field(default_factory=PodInfo)]] = (
|
||||||
|
@ -127,7 +127,7 @@ spec:
|
|||||||
command:
|
command:
|
||||||
- {{ "crawl" if not qa_source_crawl_id else "qa" }}
|
- {{ "crawl" if not qa_source_crawl_id else "qa" }}
|
||||||
- --config
|
- --config
|
||||||
- /tmp/crawl-config.json
|
- /tmp/config/crawl-config.json
|
||||||
- --workers
|
- --workers
|
||||||
- "{{ workers }}"
|
- "{{ workers }}"
|
||||||
- --redisStoreUrl
|
- --redisStoreUrl
|
||||||
@ -153,8 +153,7 @@ spec:
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: crawl-config
|
- name: crawl-config
|
||||||
mountPath: /tmp/crawl-config.json
|
mountPath: /tmp/config/
|
||||||
subPath: crawl-config.json
|
|
||||||
readOnly: True
|
readOnly: True
|
||||||
|
|
||||||
{% if qa_source_crawl_id %}
|
{% if qa_source_crawl_id %}
|
||||||
|
Loading…
Reference in New Issue
Block a user