Regression fix: add profiles/ prefix to profile filenames (#1365)

Fixes #1364 

Regression fix for issue introduced in storage refactoring (see issue
for more details).

Changes:
1. Add `profiles/` prefix to profile filename passed in to crawler for
profile creation and written into db
2. Remove hardcoded `profiles/` prefix from crawler YAML
3. Add migration to add `profiles/` prefix to profile filenames that
don't already have it, including updating PROFILE_FILENAME in ConfigMaps

This way between the related storage document and the profile filename,
we have the full path to the object in the database rather than relying
on additional prefixes hardcoded into k8s job YAML files.

Note that this as a follow-up it'll be necessary to manually move any
profiles that had been written into the `<oid>` "directory" in object
storage rather than `<oid>/profiles` to the latter. This should only
affect profiles created very recently in a 1.8.0-beta release.
This commit is contained in:
Tessa Walsh 2023-11-09 20:44:16 -05:00 committed by GitHub
parent a71815a342
commit 82a5d1e4e4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 76 additions and 3 deletions

View File

@ -16,7 +16,7 @@ from pymongo.errors import InvalidName
from .migrations import BaseMigration
CURR_DB_VERSION = "0020"
CURR_DB_VERSION = "0021"
# ============================================================================

View File

@ -0,0 +1,73 @@
"""
Migration 0021 - Profile filenames
"""
from btrixcloud.crawlmanager import CrawlManager
from btrixcloud.migrations import BaseMigration
from btrixcloud.models import CrawlConfig, Profile, UpdateCrawlConfig
MIGRATION_VERSION = "0021"
# pylint: disable=duplicate-code, broad-exception-caught
class Migration(BaseMigration):
"""Migration class."""
def __init__(self, mdb, migration_version=MIGRATION_VERSION):
super().__init__(mdb, migration_version)
async def migrate_up(self):
"""Perform migration up.
Add `profiles/` prefix to all profile filenames without it and
update configmaps.
"""
mdb_profiles = self.mdb["profiles"]
mdb_crawl_configs = self.mdb["crawl_configs"]
async for profile_res in mdb_profiles.find({}):
profile = Profile.from_dict(profile_res)
if not profile.resource:
continue
filename = profile.resource.filename
if not filename.startswith("profiles/"):
try:
await mdb_profiles.find_one_and_update(
{"_id": profile.id},
{"$set": {"resource.filename": f"profiles/{filename}"}},
)
except Exception as err:
print(
f"Error updating filename for profile {profile.name}: {err}",
flush=True,
)
# Update profile filenames in configmaps
crawl_manager = CrawlManager()
match_query = {"profileid": {"$nin": ["", None]}}
async for config_dict in mdb_crawl_configs.find(match_query):
config = CrawlConfig.from_dict(config_dict)
profile_res = await mdb_profiles.find_one({"_id": config.profileid})
if not profile_res:
continue
profile = Profile.from_dict(profile_res)
if not profile.resource:
continue
updated_filename = profile.resource.filename
print(
f"Updating Crawl Config {config.id}: profile_filename: {updated_filename}"
)
try:
await crawl_manager.update_crawl_config(
config, UpdateCrawlConfig(), profile_filename=updated_filename
)
# pylint: disable=broad-except
except Exception as exc:
print(
"Skip crawl config migration due to error, likely missing config",
exc,
)

View File

@ -155,7 +155,7 @@ class ProfileOps:
if not profileid:
profileid = uuid4()
filename_data = {"filename": f"profile-{profileid}.tar.gz"}
filename_data = {"filename": f"profiles/profile-{profileid}.tar.gz"}
json = await self._send_browser_req(
browser_commit.browserid, "/createProfileJS", "POST", json=filename_data

View File

@ -105,7 +105,7 @@ spec:
- {{ redis_url }}
{%- if profile_filename %}
- --profile
- "@profiles/{{ profile_filename }}"
- "@{{ profile_filename }}"
{%- endif %}
volumeMounts: