Fixes #2406 Converts migration 0042 to launch a background job (parallelized across several pods) to migrate all crawls by optimizing their pages and setting `version: 2` on the crawl when complete. Also Optimizes MongoDB queries for better performance. Migration Improvements: - Add `isMigrating` and `version` fields to `BaseCrawl` - Add new background job type to use in migration with accompanying `migration_job.yaml` template that allows for parallelization - Add new API endpoint to launch this crawl migration job, and ensure that we have list and retry endpoints for superusers that work with background jobs that aren't tied to a specific org - Rework background job models and methods now that not all background jobs are tied to a single org - Ensure new crawls and uploads have `version` set to `2` - Modify crawl and collection replay.json endpoints to only include fields for replay optimization (`initialPages`, `pageQueryUrl`, `preloadResources`) if all relevant crawls/uploads have `version` set to `2` - Remove `distinct` calls from migration pathways - Consolidate collection recompute stats Query Optimizations: - Remove all uses of $group and $facet - Optimize /replay.json endpoints to precompute preload_resources, avoid fetching crawl list twice - Optimize /collections endpoint by not fetching resources - Rename /urls -> /pageUrlCounts and avoid $group, instead sort with index, either by seed + ts or by url to get top matches. - Use $gte instead of $regex to get prefix matches on URL - Use $text instead of $regex to get text search on title - Remove total from /pages and /pageUrlCounts queries by not using $facet - frontend: only call /pageUrlCounts when dialog is opened. --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com> Co-authored-by: Emma Segal-Grossman <hi@emma.cafe> Co-authored-by: Ilya Kreymer <ikreymer@users.noreply.github.com>
189 lines
5.8 KiB
Python
189 lines
5.8 KiB
Python
"""Basic Email Sending Support"""
|
|
|
|
from datetime import datetime
|
|
import os
|
|
import smtplib
|
|
import ssl
|
|
from uuid import UUID
|
|
from typing import Optional, Union
|
|
|
|
from email.message import EmailMessage
|
|
from email.mime.text import MIMEText
|
|
from email.mime.multipart import MIMEMultipart
|
|
from fastapi import HTTPException
|
|
from fastapi.templating import Jinja2Templates
|
|
|
|
from .models import CreateReplicaJob, DeleteReplicaJob, Organization, InvitePending
|
|
from .utils import is_bool, get_origin
|
|
|
|
|
|
# pylint: disable=too-few-public-methods, too-many-instance-attributes
|
|
class EmailSender:
|
|
"""SMTP Email Sender"""
|
|
|
|
sender: str
|
|
password: str
|
|
reply_to: str
|
|
smtp_server: Optional[str]
|
|
smtp_port: int
|
|
smtp_use_tls: bool
|
|
support_email: str
|
|
survey_url: str
|
|
|
|
templates: Jinja2Templates
|
|
|
|
log_sent_emails: bool
|
|
|
|
def __init__(self):
|
|
self.sender = os.environ.get("EMAIL_SENDER") or "Browsertrix admin"
|
|
self.password = os.environ.get("EMAIL_PASSWORD") or ""
|
|
self.reply_to = os.environ.get("EMAIL_REPLY_TO") or self.sender
|
|
self.support_email = os.environ.get("EMAIL_SUPPORT") or self.reply_to
|
|
self.survey_url = os.environ.get("USER_SURVEY_URL") or ""
|
|
self.smtp_server = os.environ.get("EMAIL_SMTP_HOST")
|
|
self.smtp_port = int(os.environ.get("EMAIL_SMTP_PORT", 587))
|
|
self.smtp_use_tls = is_bool(os.environ.get("EMAIL_SMTP_USE_TLS"))
|
|
|
|
self.log_sent_emails = is_bool(os.environ.get("LOG_SENT_EMAILS"))
|
|
|
|
self.templates = Jinja2Templates(
|
|
directory=os.path.join(os.path.dirname(__file__), "email-templates")
|
|
)
|
|
|
|
def _send_encrypted(self, receiver: str, name: str, **kwargs) -> None:
|
|
"""Send Encrypted SMTP Message using given template name"""
|
|
|
|
full = self.templates.env.get_template(name).render(kwargs)
|
|
parts = full.split("~~~")
|
|
if len(parts) == 3:
|
|
subject, html, text = parts
|
|
elif len(parts) == 2:
|
|
subject, text = parts
|
|
html = None
|
|
else:
|
|
raise HTTPException(status_code=500, detail="invalid_email_template")
|
|
|
|
if self.log_sent_emails:
|
|
print(full, flush=True)
|
|
|
|
if not self.smtp_server:
|
|
print(
|
|
f'Email: created "{name}" msg for "{receiver}", but not sent (no SMTP server set)',
|
|
flush=True,
|
|
)
|
|
return
|
|
|
|
msg: Union[EmailMessage, MIMEMultipart]
|
|
|
|
if html:
|
|
msg = MIMEMultipart("alternative")
|
|
msg.attach(MIMEText(text.strip(), "plain"))
|
|
msg.attach(MIMEText(html.strip(), "html"))
|
|
else:
|
|
msg = EmailMessage()
|
|
msg.set_content(text.strip())
|
|
|
|
msg["Subject"] = subject.strip()
|
|
msg["From"] = self.reply_to
|
|
msg["To"] = receiver
|
|
msg["Reply-To"] = msg["From"]
|
|
|
|
context = ssl.create_default_context()
|
|
with smtplib.SMTP(self.smtp_server, self.smtp_port) as server:
|
|
if self.smtp_use_tls:
|
|
server.ehlo()
|
|
server.starttls(context=context)
|
|
server.ehlo()
|
|
if self.password:
|
|
server.login(self.sender, self.password)
|
|
server.send_message(msg)
|
|
# server.sendmail(self.sender, receiver, message)
|
|
|
|
def send_user_validation(
|
|
self, receiver_email: str, token: str, headers: Optional[dict] = None
|
|
):
|
|
"""Send email to validate registration email address"""
|
|
|
|
origin = get_origin(headers)
|
|
|
|
self._send_encrypted(receiver_email, "validate", origin=origin, token=token)
|
|
|
|
# pylint: disable=too-many-arguments
|
|
def send_user_invite(
|
|
self,
|
|
invite: InvitePending,
|
|
token: UUID,
|
|
org_name: str,
|
|
is_new: bool,
|
|
headers: Optional[dict] = None,
|
|
):
|
|
"""Send email to invite new user"""
|
|
|
|
origin = get_origin(headers)
|
|
|
|
receiver_email = invite.email or ""
|
|
|
|
invite_url = (
|
|
f"{origin}/join/{token}?email={receiver_email}"
|
|
if is_new
|
|
else f"{origin}/invite/accept/{token}?email={receiver_email}"
|
|
)
|
|
|
|
self._send_encrypted(
|
|
receiver_email,
|
|
"invite",
|
|
invite_url=invite_url,
|
|
is_new=is_new,
|
|
sender=invite.inviterEmail if not invite.fromSuperuser else "",
|
|
org_name=org_name,
|
|
support_email=self.support_email,
|
|
)
|
|
|
|
def send_user_forgot_password(self, receiver_email, token, headers=None):
|
|
"""Send password reset email with token"""
|
|
origin = get_origin(headers)
|
|
|
|
self._send_encrypted(
|
|
receiver_email,
|
|
"password_reset",
|
|
origin=origin,
|
|
token=token,
|
|
support_email=self.support_email,
|
|
)
|
|
|
|
def send_background_job_failed(
|
|
self,
|
|
job: Union[CreateReplicaJob, DeleteReplicaJob],
|
|
finished: datetime,
|
|
receiver_email: str,
|
|
org: Optional[Organization] = None,
|
|
):
|
|
"""Send background job failed email to superuser"""
|
|
self._send_encrypted(
|
|
receiver_email, "failed_bg_job", job=job, org=org, finished=finished
|
|
)
|
|
|
|
def send_subscription_will_be_canceled(
|
|
self,
|
|
cancel_date: datetime,
|
|
user_name: str,
|
|
receiver_email: str,
|
|
org: Organization,
|
|
headers=None,
|
|
):
|
|
"""Send email indicating subscription is cancelled and all org data will be deleted"""
|
|
|
|
origin = get_origin(headers)
|
|
org_url = f"{origin}/orgs/{org.slug}/"
|
|
|
|
self._send_encrypted(
|
|
receiver_email,
|
|
"sub_cancel",
|
|
org_url=org_url,
|
|
user_name=user_name,
|
|
org_name=org.name,
|
|
cancel_date=cancel_date,
|
|
support_email=self.support_email,
|
|
survey_url=self.survey_url,
|
|
)
|