From aaf18e70a02e546e14c5bac50e76aa98de9c7777 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Mon, 15 Jul 2024 22:46:32 -0400 Subject: [PATCH] Add created date to Organization and fix datetimes across backend (#1921) Fixes #1916 - Add `created` field to Organization and OrgOut, set on org creation - Add migration to backfill `created` dates from first workflow `created` - Replace `datetime.now()` and `datetime.utcnow()` across app with consistent timezone-aware `utils.dt_now` helper function, which now uses `datetime.now(timezone.utc)`. This is in part to ensure consistency in how we handle datetimes, and also to get ahead of timezone naive datetime creation methods like `datetime.utcnow()` being deprecated in Python 3.12. For more, see: https://blog.miguelgrinberg.com/post/it-s-time-for-a-change-datetime-utcnow-is-now-deprecated --- backend/btrixcloud/auth.py | 5 +- backend/btrixcloud/background_jobs.py | 9 ++-- backend/btrixcloud/colls.py | 10 ++-- backend/btrixcloud/crawlconfigs.py | 4 +- backend/btrixcloud/crawls.py | 2 +- backend/btrixcloud/db.py | 2 +- backend/btrixcloud/invites.py | 5 +- .../migration_0003_mutable_crawl_configs.py | 5 +- .../migrations/migration_0031_org_created.py | 49 +++++++++++++++++++ backend/btrixcloud/models.py | 9 +++- backend/btrixcloud/orgs.py | 11 ++--- backend/btrixcloud/pages.py | 16 +++--- backend/btrixcloud/profiles.py | 6 +-- backend/btrixcloud/subs.py | 5 +- backend/btrixcloud/utils.py | 4 +- backend/btrixcloud/webhooks.py | 17 ++++--- backend/test/test_org.py | 12 ++--- 17 files changed, 112 insertions(+), 59 deletions(-) create mode 100644 backend/btrixcloud/migrations/migration_0031_org_created.py diff --git a/backend/btrixcloud/auth.py b/backend/btrixcloud/auth.py index 04f4408b..29508d52 100644 --- a/backend/btrixcloud/auth.py +++ b/backend/btrixcloud/auth.py @@ -3,7 +3,7 @@ import os from uuid import UUID, uuid4 import asyncio -from datetime import datetime, timedelta +from datetime import timedelta from typing import Optional, Tuple, List from passlib import pwd from passlib.context import CryptContext @@ -22,6 +22,7 @@ from fastapi import ( from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm from .models import User +from .utils import dt_now # ============================================================================ @@ -95,7 +96,7 @@ class OA2BearerOrQuery(OAuth2PasswordBearer): def generate_jwt(data: dict, minutes: int) -> str: """generate JWT token with expiration time (in minutes)""" expires_delta = timedelta(minutes=minutes) - expire = datetime.utcnow() + expires_delta + expire = dt_now() + expires_delta payload = data.copy() payload["exp"] = expire return jwt.encode(payload, PASSWORD_SECRET, algorithm=ALGORITHM) diff --git a/backend/btrixcloud/background_jobs.py b/backend/btrixcloud/background_jobs.py index 59204ed3..44220b6e 100644 --- a/backend/btrixcloud/background_jobs.py +++ b/backend/btrixcloud/background_jobs.py @@ -25,6 +25,7 @@ from .models import ( User, ) from .pagination import DEFAULT_PAGE_SIZE, paginated_format +from .utils import dt_now if TYPE_CHECKING: from .orgs import OrgOps @@ -163,14 +164,14 @@ class BackgroundJobOps: replication_job.previousAttempts.append(previous_attempt) else: replication_job.previousAttempts = [previous_attempt] - replication_job.started = datetime.now() + replication_job.started = dt_now() replication_job.finished = None replication_job.success = None else: replication_job = CreateReplicaJob( id=job_id, oid=org.id, - started=datetime.now(), + started=dt_now(), file_path=file.filename, object_type=object_type, object_id=object_id, @@ -243,14 +244,14 @@ class BackgroundJobOps: delete_replica_job.previousAttempts.append(previous_attempt) else: delete_replica_job.previousAttempts = [previous_attempt] - delete_replica_job.started = datetime.now() + delete_replica_job.started = dt_now() delete_replica_job.finished = None delete_replica_job.success = None else: delete_replica_job = DeleteReplicaJob( id=job_id, oid=org.id, - started=datetime.now(), + started=dt_now(), file_path=file.filename, object_id=object_id, object_type=object_type, diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index 6a23fe50..2b81f5e5 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -3,7 +3,6 @@ Collections API """ from collections import Counter -from datetime import datetime from uuid import UUID, uuid4 from typing import Optional, List, TYPE_CHECKING, cast @@ -26,6 +25,7 @@ from .models import ( PaginatedResponse, SUCCESSFUL_STATES, ) +from .utils import dt_now if TYPE_CHECKING: from .orgs import OrgOps @@ -75,7 +75,7 @@ class CollectionOps: """Add new collection""" crawl_ids = coll_in.crawlIds if coll_in.crawlIds else [] coll_id = uuid4() - modified = datetime.utcnow().replace(microsecond=0, tzinfo=None) + modified = dt_now() coll = Collection( id=coll_id, @@ -111,7 +111,7 @@ class CollectionOps: if len(query) == 0: raise HTTPException(status_code=400, detail="no_update_data") - query["modified"] = datetime.utcnow().replace(microsecond=0, tzinfo=None) + query["modified"] = dt_now() try: result = await self.collections.find_one_and_update( @@ -134,7 +134,7 @@ class CollectionOps: """Add crawls to collection""" await self.crawl_ops.add_to_collection(crawl_ids, coll_id, org) - modified = datetime.utcnow().replace(microsecond=0, tzinfo=None) + modified = dt_now() result = await self.collections.find_one_and_update( {"_id": coll_id}, {"$set": {"modified": modified}}, @@ -158,7 +158,7 @@ class CollectionOps: ) -> CollOut: """Remove crawls from collection""" await self.crawl_ops.remove_from_collection(crawl_ids, coll_id) - modified = datetime.utcnow().replace(microsecond=0, tzinfo=None) + modified = dt_now() result = await self.collections.find_one_and_update( {"_id": coll_id}, {"$set": {"modified": modified}}, diff --git a/backend/btrixcloud/crawlconfigs.py b/backend/btrixcloud/crawlconfigs.py index 2e8a39b8..c552deee 100644 --- a/backend/btrixcloud/crawlconfigs.py +++ b/backend/btrixcloud/crawlconfigs.py @@ -192,7 +192,7 @@ class CrawlConfigOps: data["modifiedBy"] = user.id data["modifiedByName"] = user.name data["_id"] = uuid4() - data["created"] = datetime.utcnow().replace(microsecond=0, tzinfo=None) + data["created"] = dt_now() data["modified"] = data["created"] if config.runNow: @@ -368,7 +368,7 @@ class CrawlConfigOps: query = update.dict(exclude_unset=True) query["modifiedBy"] = user.id query["modifiedByName"] = user.name - query["modified"] = datetime.utcnow().replace(microsecond=0, tzinfo=None) + query["modified"] = dt_now() query["profileid"], _ = await self._lookup_profile(update.profileid, org) diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py index 58f897ae..c0e41fd2 100644 --- a/backend/btrixcloud/crawls.py +++ b/backend/btrixcloud/crawls.py @@ -783,7 +783,7 @@ class CrawlOps(BaseCrawlOps): qa_run = QARun( id=qa_run_id, - started=datetime.now(), + started=dt_now(), userid=user.id, userName=user.name, state="starting", diff --git a/backend/btrixcloud/db.py b/backend/btrixcloud/db.py index 7724054f..dae83198 100644 --- a/backend/btrixcloud/db.py +++ b/backend/btrixcloud/db.py @@ -17,7 +17,7 @@ from pymongo.errors import InvalidName from .migrations import BaseMigration -CURR_DB_VERSION = "0030" +CURR_DB_VERSION = "0031" # ============================================================================ diff --git a/backend/btrixcloud/invites.py b/backend/btrixcloud/invites.py index d65b715e..27870b37 100644 --- a/backend/btrixcloud/invites.py +++ b/backend/btrixcloud/invites.py @@ -1,6 +1,5 @@ """ Invite system management """ -from datetime import datetime from typing import Optional, Any import os import urllib.parse @@ -23,7 +22,7 @@ from .models import ( ) from .users import UserManager from .emailsender import EmailSender -from .utils import is_bool +from .utils import is_bool, dt_now # ============================================================================ @@ -195,7 +194,7 @@ class InviteOps: invite_pending = InvitePending( id=uuid4(), oid=oid, - created=datetime.utcnow(), + created=dt_now(), role=invite.role if hasattr(invite, "role") else None, # URL decode email address just in case email=urllib.parse.unquote(invite.email), diff --git a/backend/btrixcloud/migrations/migration_0003_mutable_crawl_configs.py b/backend/btrixcloud/migrations/migration_0003_mutable_crawl_configs.py index c0427bc5..d4b67db7 100644 --- a/backend/btrixcloud/migrations/migration_0003_mutable_crawl_configs.py +++ b/backend/btrixcloud/migrations/migration_0003_mutable_crawl_configs.py @@ -2,10 +2,9 @@ Migration 0003 - Mutable crawl configs and crawl revision history """ -from datetime import datetime - from btrixcloud.models import Crawl, CrawlConfig from btrixcloud.migrations import BaseMigration, MigrationError +from btrixcloud.utils import dt_now MIGRATION_VERSION = "0003" @@ -31,7 +30,7 @@ class Migration(BaseMigration): if not await crawl_configs.count_documents({}): return - utc_now_datetime = datetime.utcnow().replace(microsecond=0, tzinfo=None) + utc_now_datetime = dt_now() await crawl_configs.update_many( {"createdBy": None}, [{"$set": {"createdBy": "$userid"}}] diff --git a/backend/btrixcloud/migrations/migration_0031_org_created.py b/backend/btrixcloud/migrations/migration_0031_org_created.py new file mode 100644 index 00000000..b0c2cfb4 --- /dev/null +++ b/backend/btrixcloud/migrations/migration_0031_org_created.py @@ -0,0 +1,49 @@ +""" +Migration 0031 - Organization created field +""" + +from btrixcloud.migrations import BaseMigration + + +MIGRATION_VERSION = "0031" + + +class Migration(BaseMigration): + """Migration class.""" + + # pylint: disable=unused-argument + def __init__(self, mdb, **kwargs): + super().__init__(mdb, migration_version=MIGRATION_VERSION) + + async def migrate_up(self): + """Perform migration up. + + Add created field to orgs without one, based on first workflow creation date. + """ + # pylint: disable=duplicate-code, line-too-long + orgs_db = self.mdb["organizations"] + crawl_configs_db = self.mdb["crawl_configs"] + + cursor = orgs_db.find({"created": None}) + async for org_dict in cursor: + oid = org_dict.get("_id") + try: + cursor = crawl_configs_db.find({"oid": oid}).sort("created", 1).limit(1) + workflows = await cursor.to_list(length=1) + workflow_dict = workflows[0] + workflow_created = workflow_dict.get("created") + await orgs_db.find_one_and_update( + {"_id": oid}, {"$set": {"created": workflow_created}} + ) + print(f"Created date set for org {oid}", flush=True) + except IndexError: + print( + f"Error setting created date for org {oid}, no workflows exist to set date from", + flush=True, + ) + # pylint: disable=broad-exception-caught + except Exception as err: + print( + f"Error setting created date for org {oid} from first workflow: {err}", + flush=True, + ) diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index a7e0a520..362bc3d5 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -21,6 +21,7 @@ from pydantic import ( # from fastapi_users import models as fastapi_users_models from .db import BaseMongoModel +from .utils import dt_now # crawl scale for constraint MAX_CRAWL_SCALE = int(os.environ.get("MAX_CRAWL_SCALE", 3)) @@ -130,7 +131,7 @@ class FailedLogin(BaseMongoModel): Failed login model """ - attempted: datetime = datetime.now() + attempted: datetime = dt_now() email: str # Consecutive failed logins, reset to 0 on successful login or after @@ -1129,6 +1130,8 @@ class OrgOut(BaseMongoModel): slug: str users: Optional[Dict[str, Any]] + created: Optional[datetime] + default: bool = False bytesStored: int bytesStoredCrawls: int @@ -1175,6 +1178,8 @@ class Organization(BaseMongoModel): slug: str users: Dict[str, UserRole] = {} + created: Optional[datetime] + default: bool = False storage: StorageRef @@ -1724,7 +1729,7 @@ class PageNote(BaseModel): id: UUID text: str - created: datetime = datetime.now() + created: datetime = dt_now() userid: UUID userName: str diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index 0908e005..de857586 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -11,7 +11,6 @@ import time import urllib.parse from uuid import UUID, uuid4 -from datetime import datetime from tempfile import NamedTemporaryFile from typing import Optional, TYPE_CHECKING, Dict, Callable, List, AsyncGenerator, Any @@ -68,6 +67,7 @@ from .models import ( ) from .pagination import DEFAULT_PAGE_SIZE, paginated_format from .utils import ( + dt_now, slug_from_name, validate_slug, get_duplicate_key_error_field, @@ -337,6 +337,7 @@ class OrgOps: id=id_, name=name, slug=slug, + created=dt_now(), storage=self.default_primary, quotas=quotas or OrgQuotas(), subscription=subscription, @@ -480,9 +481,7 @@ class OrgOps: quota_updates = [] for prev_update in org.quotaUpdates or []: quota_updates.append(prev_update.dict()) - quota_updates.append( - OrgQuotaUpdate(update=update, modified=datetime.now()).dict() - ) + quota_updates.append(OrgQuotaUpdate(update=update, modified=dt_now()).dict()) await self.orgs.find_one_and_update( {"_id": org.id}, @@ -675,7 +674,7 @@ class OrgOps: if not org_data: return 0 org = Organization.from_dict(org_data) - yymm = datetime.utcnow().strftime("%Y-%m") + yymm = dt_now().strftime("%Y-%m") try: return org.monthlyExecSeconds[yymm] except KeyError: @@ -762,7 +761,7 @@ class OrgOps: """ # pylint: disable=too-many-return-statements, too-many-locals key = "crawlExecSeconds" if is_exec_time else "usage" - yymm = datetime.utcnow().strftime("%Y-%m") + yymm = dt_now().strftime("%Y-%m") inc_query = {f"{key}.{yymm}": duration} if is_qa: qa_key = "qaCrawlExecSeconds" if is_exec_time else "qaUsage" diff --git a/backend/btrixcloud/pages.py b/backend/btrixcloud/pages.py index dadb0e22..457db352 100644 --- a/backend/btrixcloud/pages.py +++ b/backend/btrixcloud/pages.py @@ -25,7 +25,7 @@ from .models import ( QARunBucketStats, ) from .pagination import DEFAULT_PAGE_SIZE, paginated_format -from .utils import from_k8s_date, str_list_to_bools +from .utils import from_k8s_date, str_list_to_bools, dt_now if TYPE_CHECKING: from .crawls import CrawlOps @@ -106,9 +106,7 @@ class PageOps: status=status, mime=page_dict.get("mime", "text/html"), ts=( - from_k8s_date(page_dict.get("ts")) - if page_dict.get("ts") - else datetime.now() + from_k8s_date(page_dict.get("ts")) if page_dict.get("ts") else dt_now() ), ) p.compute_page_type() @@ -271,7 +269,7 @@ class PageOps: ) -> bool: """Update page heuristics and mime/type from QA run""" - # modified = datetime.utcnow().replace(microsecond=0, tzinfo=None) + # modified = dt_now() result = await self.pages.find_one_and_update( {"_id": page_id, "oid": oid}, @@ -303,7 +301,7 @@ class PageOps: query: Dict[str, Union[Optional[bool], str, datetime, UUID]] = { "approved": approved } - query["modified"] = datetime.utcnow().replace(microsecond=0, tzinfo=None) + query["modified"] = dt_now() if user: query["userid"] = user.id @@ -329,7 +327,7 @@ class PageOps: """Add note to page""" note = PageNote(id=uuid4(), text=text, userid=user.id, userName=user.name) - modified = datetime.utcnow().replace(microsecond=0, tzinfo=None) + modified = dt_now() result = await self.pages.find_one_and_update( {"_id": page_id, "oid": oid, "crawl_id": crawl_id}, @@ -373,7 +371,7 @@ class PageOps: ) page_notes[matching_index] = new_note.dict() - modified = datetime.utcnow().replace(microsecond=0, tzinfo=None) + modified = dt_now() result = await self.pages.find_one_and_update( {"_id": page_id, "oid": oid, "crawl_id": crawl_id}, @@ -402,7 +400,7 @@ class PageOps: if not note.get("id") in delete.delete_list: remaining_notes.append(note) - modified = datetime.utcnow().replace(microsecond=0, tzinfo=None) + modified = dt_now() result = await self.pages.find_one_and_update( {"_id": page_id, "oid": oid, "crawl_id": crawl_id}, diff --git a/backend/btrixcloud/profiles.py b/backend/btrixcloud/profiles.py index f93d9f2f..fa960584 100644 --- a/backend/btrixcloud/profiles.py +++ b/backend/btrixcloud/profiles.py @@ -1,7 +1,6 @@ """ Profile Management """ from typing import Optional, TYPE_CHECKING, Any, cast, Dict, List -from datetime import datetime from uuid import UUID, uuid4 import os @@ -25,6 +24,7 @@ from .models import ( PaginatedResponse, StorageRef, ) +from .utils import dt_now if TYPE_CHECKING: from .orgs import OrgOps @@ -162,7 +162,7 @@ class ProfileOps: """commit profile and shutdown profile browser""" # pylint: disable=too-many-locals - now = datetime.utcnow().replace(microsecond=0, tzinfo=None) + now = dt_now() if existing_profile: profileid = existing_profile.id @@ -252,7 +252,7 @@ class ProfileOps: """Update name and description metadata only on existing profile""" query = { "name": update.name, - "modified": datetime.utcnow().replace(microsecond=0, tzinfo=None), + "modified": dt_now(), "modifiedBy": user.id, "modifiedByName": user.name if user.name else user.email, } diff --git a/backend/btrixcloud/subs.py b/backend/btrixcloud/subs.py index 6ad5f591..8afad03d 100644 --- a/backend/btrixcloud/subs.py +++ b/backend/btrixcloud/subs.py @@ -6,8 +6,6 @@ from typing import Callable, Union, Any, Optional, Tuple, List import os from uuid import UUID -from datetime import datetime - from fastapi import Depends, HTTPException, Request import aiohttp @@ -30,6 +28,7 @@ from .models import ( UserRole, ) from .pagination import DEFAULT_PAGE_SIZE, paginated_format +from .utils import dt_now # if set, will enable this api @@ -134,7 +133,7 @@ class SubOps: """add a subscription event to the db""" data = event.dict(exclude_unset=True) data["type"] = type_ - data["timestamp"] = datetime.utcnow() + data["timestamp"] = dt_now() data["oid"] = oid await self.subs.insert_one(data) diff --git a/backend/btrixcloud/utils.py b/backend/btrixcloud/utils.py index bf207c22..5b0f60e6 100644 --- a/backend/btrixcloud/utils.py +++ b/backend/btrixcloud/utils.py @@ -10,7 +10,7 @@ import os import sys import re -from datetime import datetime +from datetime import datetime, timezone from typing import Optional, Dict, Union, List, Any from uuid import UUID @@ -52,7 +52,7 @@ def to_k8s_date(dt_val): def dt_now(): """get current ts""" - return datetime.utcnow().replace(microsecond=0, tzinfo=None) + return datetime.now(timezone.utc).replace(microsecond=0, tzinfo=None) def ts_now(): diff --git a/backend/btrixcloud/webhooks.py b/backend/btrixcloud/webhooks.py index 5b3ec992..7d32447b 100644 --- a/backend/btrixcloud/webhooks.py +++ b/backend/btrixcloud/webhooks.py @@ -1,7 +1,6 @@ """Webhook management""" import asyncio -from datetime import datetime from typing import List, Union, Optional, TYPE_CHECKING, cast from uuid import UUID, uuid4 @@ -24,6 +23,7 @@ from .models import ( PaginatedResponse, Organization, ) +from .utils import dt_now if TYPE_CHECKING: from .orgs import OrgOps @@ -173,7 +173,7 @@ class EventWebhookOps: { "$set": { "success": True, - "lastAttempted": datetime.utcnow(), + "lastAttempted": dt_now(), }, "$inc": {"attempts": 1}, }, @@ -184,7 +184,10 @@ class EventWebhookOps: print(f"Webhook notification failed: {err}", flush=True) await self.webhooks.find_one_and_update( {"_id": notification.id}, - {"$set": {"lastAttempted": datetime.utcnow()}, "$inc": {"attempts": 1}}, + { + "$set": {"lastAttempted": dt_now()}, + "$inc": {"attempts": 1}, + }, ) async def _create_item_finished_notification( @@ -207,7 +210,7 @@ class EventWebhookOps: event=event, oid=org.id, body=body, - created=datetime.utcnow(), + created=dt_now(), ) await self.webhooks.insert_one(notification.to_dict()) @@ -232,7 +235,7 @@ class EventWebhookOps: event=event, oid=org.id, body=body, - created=datetime.utcnow(), + created=dt_now(), ) await self.webhooks.insert_one(notification.to_dict()) @@ -335,7 +338,7 @@ class EventWebhookOps: orgId=str(oid), scheduled=scheduled, ), - created=datetime.utcnow(), + created=dt_now(), ) await self.webhooks.insert_one(notification.to_dict()) @@ -363,7 +366,7 @@ class EventWebhookOps: event=event, oid=org.id, body=body, - created=datetime.utcnow(), + created=dt_now(), ) await self.webhooks.insert_one(notification.to_dict()) diff --git a/backend/test/test_org.py b/backend/test/test_org.py index 9ecf78c3..ed6e088d 100644 --- a/backend/test/test_org.py +++ b/backend/test/test_org.py @@ -122,10 +122,11 @@ def test_rename_org_duplicate_name( def test_create_org(admin_auth_headers): NEW_ORG_NAME = "New Org" + NEW_ORG_SLUG = "new-org" r = requests.post( f"{API_PREFIX}/orgs/create", headers=admin_auth_headers, - json={"name": NEW_ORG_NAME, "slug": "new-org"}, + json={"name": NEW_ORG_NAME, "slug": NEW_ORG_SLUG}, ) assert r.status_code == 200 @@ -137,13 +138,12 @@ def test_create_org(admin_auth_headers): new_oid = data["id"] # Verify that org exists. - r = requests.get(f"{API_PREFIX}/orgs", headers=admin_auth_headers) + r = requests.get(f"{API_PREFIX}/orgs/{new_oid}", headers=admin_auth_headers) assert r.status_code == 200 data = r.json() - org_names = [] - for org in data["items"]: - org_names.append(org["name"]) - assert NEW_ORG_NAME in org_names + assert data["name"] == NEW_ORG_NAME + assert data["slug"] == NEW_ORG_SLUG + assert data["created"] def test_create_org_duplicate_name(admin_auth_headers, non_default_org_id):