* updates pydantic to 2.x * also update to python 3.12 * additional type fixes: - all Optional[] types must have a default value - update to constrained types - URL types converted from str - test updates Fixes #1940
		
			
				
	
	
		
			116 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			116 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """
 | |
| Migration 0004 - Ensuring all config.seeds are Seeds not HttpUrls
 | |
| """
 | |
| 
 | |
| from pydantic import HttpUrl
 | |
| 
 | |
| from btrixcloud.models import Crawl, CrawlConfig, ScopeType, Seed
 | |
| from btrixcloud.migrations import BaseMigration
 | |
| 
 | |
| 
 | |
| MIGRATION_VERSION = "0004"
 | |
| 
 | |
| 
 | |
| class Migration(BaseMigration):
 | |
|     """Migration class."""
 | |
| 
 | |
|     # pylint: disable=unused-argument
 | |
|     def __init__(self, mdb, **kwargs):
 | |
|         super().__init__(mdb, migration_version=MIGRATION_VERSION)
 | |
| 
 | |
|     async def migrate_up(self):
 | |
|         """Perform migration up.
 | |
| 
 | |
|         Convert any crawlconfig.config.seed HttpUrl values to Seeds with url value.
 | |
|         """
 | |
|         # pylint: disable=too-many-branches
 | |
| 
 | |
|         # Migrate workflows
 | |
|         crawl_configs = self.mdb["crawl_configs"]
 | |
| 
 | |
|         async for config_dict in crawl_configs.find({}):
 | |
|             seeds_to_migrate = []
 | |
|             seed_dicts = []
 | |
| 
 | |
|             seed_list = config_dict["config"]["seeds"]
 | |
|             for seed in seed_list:
 | |
|                 if isinstance(seed, HttpUrl):
 | |
|                     new_seed = Seed(url=str(seed), scopeType=ScopeType.PAGE)
 | |
|                     seeds_to_migrate.append(new_seed)
 | |
|                 elif isinstance(seed, str):
 | |
|                     new_seed = Seed(url=str(seed), scopeType=ScopeType.PAGE)
 | |
|                     seeds_to_migrate.append(new_seed)
 | |
|                 elif isinstance(seed, Seed):
 | |
|                     seeds_to_migrate.append(seed)
 | |
| 
 | |
|             for seed in seeds_to_migrate:
 | |
|                 seed_dict = {
 | |
|                     "url": str(seed.url),
 | |
|                     "scopeType": seed.scopeType,
 | |
|                     "include": seed.include,
 | |
|                     "exclude": seed.exclude,
 | |
|                     "sitemap": seed.sitemap,
 | |
|                     "allowHash": seed.allowHash,
 | |
|                     "depth": seed.depth,
 | |
|                     "extraHops": seed.extraHops,
 | |
|                 }
 | |
|                 seed_dicts.append(seed_dict)
 | |
| 
 | |
|             if seed_dicts:
 | |
|                 await crawl_configs.find_one_and_update(
 | |
|                     {"_id": config_dict["_id"]},
 | |
|                     {"$set": {"config.seeds": seed_dicts}},
 | |
|                 )
 | |
| 
 | |
|         # Migrate seeds copied into crawls
 | |
|         crawls = self.mdb["crawls"]
 | |
| 
 | |
|         async for crawl_dict in crawls.find({}):
 | |
|             seeds_to_migrate = []
 | |
|             seed_dicts = []
 | |
| 
 | |
|             seed_list = crawl_dict["config"]["seeds"]
 | |
|             for seed in seed_list:
 | |
|                 if isinstance(seed, HttpUrl):
 | |
|                     new_seed = Seed(url=str(seed), scopeType=ScopeType.PAGE)
 | |
|                     seeds_to_migrate.append(new_seed)
 | |
|                 elif isinstance(seed, str):
 | |
|                     new_seed = Seed(url=str(seed), scopeType=ScopeType.PAGE)
 | |
|                     seeds_to_migrate.append(new_seed)
 | |
|                 elif isinstance(seed, Seed):
 | |
|                     seeds_to_migrate.append(seed)
 | |
| 
 | |
|             for seed in seeds_to_migrate:
 | |
|                 seed_dict = {
 | |
|                     "url": str(seed.url),
 | |
|                     "scopeType": seed.scopeType,
 | |
|                     "include": seed.include,
 | |
|                     "exclude": seed.exclude,
 | |
|                     "sitemap": seed.sitemap,
 | |
|                     "allowHash": seed.allowHash,
 | |
|                     "depth": seed.depth,
 | |
|                     "extraHops": seed.extraHops,
 | |
|                 }
 | |
|                 seed_dicts.append(seed_dict)
 | |
| 
 | |
|             if seed_dicts:
 | |
|                 await crawls.find_one_and_update(
 | |
|                     {"_id": crawl_dict["_id"]},
 | |
|                     {"$set": {"config.seeds": seed_dicts}},
 | |
|                 )
 | |
| 
 | |
|         # Test migration
 | |
|         async for config_dict in crawl_configs.find({}):
 | |
|             config = CrawlConfig.from_dict(config_dict)
 | |
|             seeds = config.config.seeds or []
 | |
|             for seed in seeds:
 | |
|                 assert isinstance(seed, Seed)
 | |
|                 assert seed.url
 | |
| 
 | |
|         async for crawl_dict in crawls.find({}):
 | |
|             crawl = Crawl.from_dict(crawl_dict)
 | |
|             seeds = crawl.config.seeds or []
 | |
|             for seed in seeds:
 | |
|                 assert isinstance(seed, Seed)
 | |
|                 assert seed.url
 |