browsertrix/backend/btrixcloud/basecrawls.py
Tessa Walsh 14189b7cfb
Add crawl pages and related API endpoints (#1516)
Fixes #1502 

- Adds pages to database as they get added to Redis during crawl
- Adds migration to add pages to database for older crawls from
pages.jsonl and extraPages.jsonl files in WACZ
- Adds GET, list GET, and PATCH update endpoints for pages
- Adds POST (add), PATCH, and POST (delete) endpoints for page notes,
each with their own id, timestamp, and user info in addition to text
- Adds page_ops methods for 1. adding resources/urls to page, and 2.
adding automated heuristics and supplemental info (mime, type, etc.) to
page (for use in crawl QA job)
- Modifies `Migration` class to accept kwargs so that we can pass in ops
classes as needed for migrations
- Deletes WACZ files and pages from database for failed crawls during
crawl_finished process
- Deletes crawl pages when a crawl is deleted

Note: Requires a crawler version 1.0.0 beta3 or later, with support for
`--writePagesToRedis` to populate pages at crawl completion. Beta 4 is
configured in the test chart, which should be upgraded to stable 1.0.0
when it's released.

Connected to https://github.com/webrecorder/browsertrix-crawler/pull/464

---------

Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
2024-02-28 12:11:35 -05:00

843 lines
28 KiB
Python

""" base crawl type """
import os
from datetime import timedelta
from typing import Optional, List, Union, Dict, Any, Type, TYPE_CHECKING, cast
from uuid import UUID
import urllib.parse
import contextlib
import asyncio
from fastapi import HTTPException, Depends
from .models import (
CrawlFile,
CrawlFileOut,
BaseCrawl,
CrawlOut,
CrawlOutWithResources,
UpdateCrawl,
DeleteCrawlList,
Organization,
PaginatedResponse,
User,
StorageRef,
RUNNING_AND_STARTING_STATES,
SUCCESSFUL_STATES,
)
from .pagination import paginated_format, DEFAULT_PAGE_SIZE
from .utils import dt_now
if TYPE_CHECKING:
from .crawlconfigs import CrawlConfigOps
from .crawlmanager import CrawlManager
from .users import UserManager
from .orgs import OrgOps
from .colls import CollectionOps
from .storages import StorageOps
from .webhooks import EventWebhookOps
from .background_jobs import BackgroundJobOps
from .pages import PageOps
else:
CrawlConfigOps = UserManager = OrgOps = CollectionOps = PageOps = object
CrawlManager = StorageOps = EventWebhookOps = BackgroundJobOps = object
# Presign duration must be less than 604800 seconds (one week),
# so set this one minute short of a week.
PRESIGN_MINUTES_MAX = 10079
PRESIGN_MINUTES_DEFAULT = PRESIGN_MINUTES_MAX
# ============================================================================
# pylint: disable=too-many-instance-attributes
class BaseCrawlOps:
"""operations that apply to all crawls"""
# pylint: disable=duplicate-code, too-many-arguments, too-many-locals
crawl_configs: CrawlConfigOps
crawl_manager: CrawlManager
user_manager: UserManager
orgs: OrgOps
colls: CollectionOps
storage_ops: StorageOps
event_webhook_ops: EventWebhookOps
background_job_ops: BackgroundJobOps
page_ops: PageOps
def __init__(
self,
mdb,
users: UserManager,
orgs: OrgOps,
crawl_manager: CrawlManager,
crawl_configs: CrawlConfigOps,
colls: CollectionOps,
storage_ops: StorageOps,
event_webhook_ops: EventWebhookOps,
background_job_ops: BackgroundJobOps,
):
self.crawls = mdb["crawls"]
self.crawl_manager = crawl_manager
self.crawl_configs = crawl_configs
self.user_manager = users
self.orgs = orgs
self.colls = colls
self.storage_ops = storage_ops
self.event_webhook_ops = event_webhook_ops
self.background_job_ops = background_job_ops
self.page_ops = cast(PageOps, None)
presign_duration_minutes = int(
os.environ.get("PRESIGN_DURATION_MINUTES") or PRESIGN_MINUTES_DEFAULT
)
self.presign_duration_seconds = (
min(presign_duration_minutes, PRESIGN_MINUTES_MAX) * 60
)
def set_page_ops(self, page_ops):
"""set page ops reference"""
self.page_ops = page_ops
async def get_crawl_raw(
self,
crawlid: str,
org: Optional[Organization] = None,
type_: Optional[str] = None,
project: Optional[dict[str, bool]] = None,
):
"""Get data for single crawl"""
query: dict[str, object] = {"_id": crawlid}
if org:
query["oid"] = org.id
if type_:
query["type"] = type_
res = await self.crawls.find_one(query, project)
if not res:
raise HTTPException(status_code=404, detail=f"Crawl not found: {crawlid}")
return res
async def _files_to_resources(self, files, org, crawlid):
if not files:
return []
crawl_files = [CrawlFile(**data) for data in files]
return await self._resolve_signed_urls(crawl_files, org, crawlid)
async def get_crawl(
self,
crawlid: str,
org: Optional[Organization] = None,
type_: Optional[str] = None,
cls_type: Type[Union[CrawlOut, CrawlOutWithResources]] = CrawlOutWithResources,
):
"""Get data for single base crawl"""
res = await self.get_crawl_raw(crawlid, org, type_)
if cls_type == CrawlOutWithResources:
res["resources"] = await self._files_to_resources(
res.get("files"), org, crawlid
)
if res.get("collectionIds"):
res["collections"] = await self.colls.get_collection_names(
res.get("collectionIds")
)
res.pop("files", None)
res.pop("errors", None)
crawl = cls_type.from_dict(res)
if crawl.type == "crawl":
crawl = await self._resolve_crawl_refs(crawl, org)
if crawl.config and crawl.config.seeds:
crawl.config.seeds = None
crawl.storageQuotaReached = await self.orgs.storage_quota_reached(crawl.oid)
crawl.execMinutesQuotaReached = await self.orgs.exec_mins_quota_reached(
crawl.oid
)
return crawl
async def get_resource_resolved_raw_crawl(
self, crawlid: str, org: Organization, type_=None
):
"""return single base crawl with resources resolved"""
res = await self.get_crawl_raw(crawlid=crawlid, type_=type_, org=org)
res["resources"] = await self._files_to_resources(
res.get("files"), org, res["_id"]
)
return res
async def _update_crawl_collections(
self, crawl_id: str, org: Organization, collection_ids: List[UUID]
):
"""Update crawl collections to match updated list."""
crawl = await self.get_crawl(crawl_id, org, cls_type=CrawlOut)
prior_coll_ids = set(crawl.collectionIds)
updated_coll_ids = set(collection_ids)
# Add new collections
added = list(updated_coll_ids.difference(prior_coll_ids))
for coll_id in added:
await self.colls.add_crawls_to_collection(coll_id, [crawl_id], org)
# Remove collections crawl no longer belongs to
removed = list(prior_coll_ids.difference(updated_coll_ids))
for coll_id in removed:
await self.colls.remove_crawls_from_collection(coll_id, [crawl_id], org)
async def update_crawl(
self, crawl_id: str, org: Organization, update: UpdateCrawl, type_=None
):
"""Update existing crawl"""
update_values = update.dict(exclude_unset=True)
if len(update_values) == 0:
raise HTTPException(status_code=400, detail="no_update_data")
# Update collections then unset from update_values
# We handle these separately due to updates required for collection changes
collection_ids = update_values.get("collectionIds")
if collection_ids is not None:
await self._update_crawl_collections(crawl_id, org, collection_ids)
update_values.pop("collectionIds", None)
query = {"_id": crawl_id, "oid": org.id}
if type_:
query["type"] = type_
# update in db
result = await self.crawls.find_one_and_update(
query,
{"$set": update_values},
)
if not result:
raise HTTPException(status_code=404, detail="crawl_not_found")
return {"updated": True}
async def update_crawl_state(self, crawl_id: str, state: str):
"""called only when job container is being stopped/canceled"""
data = {"state": state}
# if cancelation, set the finish time here
if state == "canceled":
data["finished"] = dt_now()
await self.crawls.find_one_and_update(
{
"_id": crawl_id,
"type": "crawl",
"state": {"$in": RUNNING_AND_STARTING_STATES},
},
{"$set": data},
)
async def update_usernames(self, userid: UUID, updated_name: str) -> None:
"""Update username references matching userid"""
await self.crawls.update_many(
{"userid": userid}, {"$set": {"userName": updated_name}}
)
async def add_crawl_file_replica(
self, crawl_id: str, filename: str, ref: StorageRef
) -> dict[str, object]:
"""Add replica StorageRef to existing CrawlFile"""
return await self.crawls.find_one_and_update(
{"_id": crawl_id, "files.filename": filename},
{
"$addToSet": {
"files.$.replicas": {"name": ref.name, "custom": ref.custom}
}
},
)
async def shutdown_crawl(self, crawl_id: str, org: Organization, graceful: bool):
"""stop or cancel specified crawl"""
crawl = await self.get_crawl_raw(crawl_id, org)
if crawl.get("type") != "crawl":
return
result = None
try:
result = await self.crawl_manager.shutdown_crawl(
crawl_id, graceful=graceful
)
if result.get("success"):
if graceful:
await self.crawls.find_one_and_update(
{"_id": crawl_id, "type": "crawl", "oid": org.id},
{"$set": {"stopping": True}},
)
return result
except Exception as exc:
# pylint: disable=raise-missing-from
# if reached here, probably crawl doesn't exist anymore
raise HTTPException(
status_code=404, detail=f"crawl_not_found, (details: {exc})"
)
# if job no longer running, canceling is considered success,
# but graceful stoppage is not possible, so would be a failure
if result.get("error") == "Not Found":
if not graceful:
await self.update_crawl_state(crawl_id, "canceled")
crawl = await self.get_crawl_raw(crawl_id, org)
if not await self.crawl_configs.stats_recompute_last(
crawl["cid"], 0, -1
):
raise HTTPException(
status_code=404,
detail=f"crawl_config_not_found: {crawl['cid']}",
)
return {"success": True}
# return whatever detail may be included in the response
raise HTTPException(status_code=400, detail=result)
async def delete_crawls(
self,
org: Organization,
delete_list: DeleteCrawlList,
type_: str,
user: Optional[User] = None,
):
"""Delete a list of crawls by id for given org"""
cids_to_update: dict[str, dict[str, int]] = {}
size = 0
for crawl_id in delete_list.crawl_ids:
crawl = await self.get_crawl_raw(crawl_id, org)
if crawl.get("type") != type_:
continue
# Ensure user has appropriate permissions for all crawls in list:
# - Crawler users can delete their own crawls
# - Org owners can delete any crawls in org
if user and (crawl.get("userid") != user.id) and not org.is_owner(user):
raise HTTPException(status_code=403, detail="not_allowed")
if type_ == "crawl" and not crawl.get("finished"):
try:
await self.shutdown_crawl(crawl_id, org, graceful=False)
except Exception as exc:
# pylint: disable=raise-missing-from
raise HTTPException(
status_code=400, detail=f"Error Stopping Crawl: {exc}"
)
if type_ == "crawl":
await self.page_ops.delete_crawl_pages(crawl_id, org.id)
crawl_size = await self._delete_crawl_files(crawl, org)
size += crawl_size
cid = crawl.get("cid")
if cid:
if cids_to_update.get(cid):
cids_to_update[cid]["inc"] += 1
cids_to_update[cid]["size"] += crawl_size
else:
cids_to_update[cid] = {}
cids_to_update[cid]["inc"] = 1
cids_to_update[cid]["size"] = crawl_size
if type_ == "crawl":
asyncio.create_task(
self.event_webhook_ops.create_crawl_deleted_notification(
crawl_id, org
)
)
if type_ == "upload":
asyncio.create_task(
self.event_webhook_ops.create_upload_deleted_notification(
crawl_id, org
)
)
query = {"_id": {"$in": delete_list.crawl_ids}, "oid": org.id, "type": type_}
res = await self.crawls.delete_many(query)
quota_reached = await self.orgs.inc_org_bytes_stored(org.id, -size, type_)
return res.deleted_count, cids_to_update, quota_reached
async def _delete_crawl_files(self, crawl_raw: Dict[str, Any], org: Organization):
"""Delete files associated with crawl from storage."""
crawl = BaseCrawl.from_dict(crawl_raw)
size = 0
for file_ in crawl.files:
size += file_.size
if not await self.storage_ops.delete_crawl_file_object(org, file_):
raise HTTPException(status_code=400, detail="file_deletion_error")
await self.background_job_ops.create_delete_replica_jobs(
org, file_, crawl.id, crawl.type
)
return size
async def delete_crawl_files(self, crawl_id: str, oid: UUID):
"""Delete crawl files"""
crawl_raw = await self.get_crawl_raw(crawl_id)
org = await self.orgs.get_org_by_id(oid)
return await self._delete_crawl_files(crawl_raw, org)
async def _resolve_crawl_refs(
self,
crawl: Union[CrawlOut, CrawlOutWithResources],
org: Optional[Organization],
add_first_seed: bool = True,
files: Optional[list[dict]] = None,
):
"""Resolve running crawl data"""
# pylint: disable=too-many-branches
config = None
if crawl.cid:
config = await self.crawl_configs.get_crawl_config(
crawl.cid, org.id if org else None, active_only=False
)
if config and config.config.seeds:
if add_first_seed:
first_seed = config.config.seeds[0]
crawl.firstSeed = first_seed.url
crawl.seedCount = len(config.config.seeds)
if hasattr(crawl, "profileid") and crawl.profileid:
crawl.profileName = await self.crawl_configs.profiles.get_profile_name(
crawl.profileid, org
)
if (
files
and crawl.state in SUCCESSFUL_STATES
and isinstance(crawl, CrawlOutWithResources)
):
crawl.resources = await self._files_to_resources(files, org, crawl.id)
return crawl
async def _resolve_signed_urls(
self, files: List[CrawlFile], org: Organization, crawl_id: Optional[str] = None
):
if not files:
print("no files")
return
delta = timedelta(seconds=self.presign_duration_seconds)
out_files = []
for file_ in files:
presigned_url = file_.presignedUrl
now = dt_now()
if not presigned_url or now >= file_.expireAt:
exp = now + delta
presigned_url = await self.storage_ops.get_presigned_url(
org, file_, self.presign_duration_seconds
)
await self.crawls.find_one_and_update(
{"files.filename": file_.filename},
{
"$set": {
"files.$.presignedUrl": presigned_url,
"files.$.expireAt": exp,
}
},
)
file_.expireAt = exp
expire_at_str = ""
if file_.expireAt:
expire_at_str = file_.expireAt.isoformat()
out_files.append(
CrawlFileOut(
name=file_.filename,
path=presigned_url or "",
hash=file_.hash,
crc32=file_.crc32,
size=file_.size,
crawlId=crawl_id,
numReplicas=len(file_.replicas) if file_.replicas else 0,
expireAt=expire_at_str,
)
)
return out_files
@contextlib.asynccontextmanager
async def get_redis(self, crawl_id):
"""get redis url for crawl id"""
redis_url = self.crawl_manager.get_redis_url(crawl_id)
redis = await self.crawl_manager.get_redis_client(redis_url)
try:
yield redis
finally:
await redis.close()
async def add_to_collection(
self, crawl_ids: List[str], collection_id: UUID, org: Organization
):
"""Add crawls to collection."""
for crawl_id in crawl_ids:
crawl_raw = await self.get_crawl_raw(crawl_id, org)
crawl_collections = crawl_raw.get("collectionIds")
if crawl_collections and crawl_id in crawl_collections:
raise HTTPException(
status_code=400, detail="crawl_already_in_collection"
)
await self.crawls.find_one_and_update(
{"_id": crawl_id},
{"$push": {"collectionIds": collection_id}},
)
async def remove_from_collection(self, crawl_ids: List[str], collection_id: UUID):
"""Remove crawls from collection."""
for crawl_id in crawl_ids:
await self.crawls.find_one_and_update(
{"_id": crawl_id},
{"$pull": {"collectionIds": collection_id}},
)
async def remove_collection_from_all_crawls(self, collection_id: UUID):
"""Remove collection id from all crawls it's currently in."""
await self.crawls.update_many(
{"collectionIds": collection_id},
{"$pull": {"collectionIds": collection_id}},
)
# pylint: disable=too-many-branches, invalid-name, too-many-statements
async def list_all_base_crawls(
self,
org: Optional[Organization] = None,
userid: Optional[UUID] = None,
name: Optional[str] = None,
description: Optional[str] = None,
collection_id: Optional[UUID] = None,
states: Optional[List[str]] = None,
first_seed: Optional[str] = None,
type_: Optional[str] = None,
cid: Optional[UUID] = None,
cls_type: Type[Union[CrawlOut, CrawlOutWithResources]] = CrawlOut,
page_size: int = DEFAULT_PAGE_SIZE,
page: int = 1,
sort_by: Optional[str] = None,
sort_direction: int = -1,
):
"""List crawls of all types from the db"""
# Zero-index page for query
page = page - 1
skip = page * page_size
oid = org.id if org else None
resources = False
if cls_type == CrawlOutWithResources:
resources = True
query: dict[str, object] = {}
if type_:
query["type"] = type_
if oid:
query["oid"] = oid
if userid:
query["userid"] = userid
if states:
# validated_states = [value for value in state if value in ALL_CRAWL_STATES]
query["state"] = {"$in": states}
if cid:
query["cid"] = cid
aggregate = [
{"$match": query},
{"$set": {"firstSeedObject": {"$arrayElemAt": ["$config.seeds", 0]}}},
{"$set": {"firstSeed": "$firstSeedObject.url"}},
{"$unset": ["firstSeedObject", "errors", "config"]},
]
if not resources:
aggregate.extend([{"$unset": ["files"]}])
if name:
aggregate.extend([{"$match": {"name": name}}])
if first_seed:
aggregate.extend([{"$match": {"firstSeed": first_seed}}])
if description:
aggregate.extend([{"$match": {"description": description}}])
if collection_id:
aggregate.extend([{"$match": {"collectionIds": {"$in": [collection_id]}}}])
if sort_by:
if sort_by not in ("started", "finished", "fileSize"):
raise HTTPException(status_code=400, detail="invalid_sort_by")
if sort_direction not in (1, -1):
raise HTTPException(status_code=400, detail="invalid_sort_direction")
aggregate.extend([{"$sort": {sort_by: sort_direction}}])
aggregate.extend(
[
{
"$facet": {
"items": [
{"$skip": skip},
{"$limit": page_size},
],
"total": [{"$count": "count"}],
}
},
]
)
# Get total
cursor = self.crawls.aggregate(aggregate)
results = await cursor.to_list(length=1)
result = results[0]
items = result["items"]
try:
total = int(result["total"][0]["count"])
except (IndexError, ValueError):
total = 0
crawls = []
for res in items:
crawl = cls_type.from_dict(res)
if resources or crawl.type == "crawl":
# pass files only if we want to include resolved resources
files = res.get("files") if resources else None
crawl = await self._resolve_crawl_refs(crawl, org, files=files)
crawls.append(crawl)
return crawls, total
async def delete_crawls_all_types(
self,
delete_list: DeleteCrawlList,
org: Organization,
user: Optional[User] = None,
):
"""Delete uploaded crawls"""
crawls: list[str] = []
uploads: list[str] = []
for crawl_id in delete_list.crawl_ids:
crawl = await self.get_crawl_raw(crawl_id, org)
type_ = crawl.get("type")
if type_ == "crawl":
crawls.append(crawl_id)
if type_ == "upload":
uploads.append(crawl_id)
crawls_length = len(crawls)
uploads_length = len(uploads)
if crawls_length + uploads_length == 0:
raise HTTPException(status_code=400, detail="nothing_to_delete")
deleted_count = 0
# Value is set in delete calls, but initialize to keep linter happy.
quota_reached = False
if crawls_length:
crawl_delete_list = DeleteCrawlList(crawl_ids=crawls)
deleted, cids_to_update, quota_reached = await self.delete_crawls(
org, crawl_delete_list, "crawl", user
)
deleted_count += deleted
for cid, cid_dict in cids_to_update.items():
cid_size = cid_dict["size"]
cid_inc = cid_dict["inc"]
await self.crawl_configs.stats_recompute_last(cid, -cid_size, -cid_inc)
if uploads_length:
upload_delete_list = DeleteCrawlList(crawl_ids=uploads)
deleted, _, quota_reached = await self.delete_crawls(
org, upload_delete_list, "upload", user
)
deleted_count += deleted
if deleted_count < 1:
raise HTTPException(status_code=404, detail="crawl_not_found")
return {"deleted": True, "storageQuotaReached": quota_reached}
async def get_all_crawl_search_values(
self, org: Organization, type_: Optional[str] = None
):
"""List unique names, first seeds, and descriptions from all captures in org"""
match_query: dict[str, object] = {"oid": org.id}
if type_:
match_query["type"] = type_
names = await self.crawls.distinct("name", match_query)
descriptions = await self.crawls.distinct("description", match_query)
cids = (
await self.crawls.distinct("cid", match_query)
if not type_ or type_ == "crawl"
else []
)
# Remove empty strings
names = [name for name in names if name]
descriptions = [description for description in descriptions if description]
first_seeds = set()
for cid in cids:
if not cid:
continue
config = await self.crawl_configs.get_crawl_config(cid, org.id)
if not config:
continue
first_seed = config.config.seeds[0]
first_seeds.add(first_seed.url)
return {
"names": names,
"descriptions": descriptions,
"firstSeeds": list(first_seeds),
}
# ============================================================================
def init_base_crawls_api(app, user_dep, *args):
"""base crawls api"""
# pylint: disable=invalid-name, duplicate-code, too-many-arguments, too-many-locals
ops = BaseCrawlOps(*args)
org_viewer_dep = ops.orgs.org_viewer_dep
org_crawl_dep = ops.orgs.org_crawl_dep
@app.get(
"/orgs/{oid}/all-crawls",
tags=["all-crawls"],
response_model=PaginatedResponse,
)
async def list_all_base_crawls(
org: Organization = Depends(org_viewer_dep),
pageSize: int = DEFAULT_PAGE_SIZE,
page: int = 1,
userid: Optional[UUID] = None,
name: Optional[str] = None,
state: Optional[str] = None,
firstSeed: Optional[str] = None,
description: Optional[str] = None,
collectionId: Optional[UUID] = None,
crawlType: Optional[str] = None,
cid: Optional[UUID] = None,
sortBy: Optional[str] = "finished",
sortDirection: int = -1,
):
states = state.split(",") if state else None
if firstSeed:
firstSeed = urllib.parse.unquote(firstSeed)
if name:
name = urllib.parse.unquote(name)
if description:
description = urllib.parse.unquote(description)
if crawlType and crawlType not in ("crawl", "upload"):
raise HTTPException(status_code=400, detail="invalid_crawl_type")
crawls, total = await ops.list_all_base_crawls(
org,
userid=userid,
name=name,
description=description,
collection_id=collectionId,
states=states,
first_seed=firstSeed,
type_=crawlType,
cid=cid,
page_size=pageSize,
page=page,
sort_by=sortBy,
sort_direction=sortDirection,
)
return paginated_format(crawls, total, page, pageSize)
@app.get("/orgs/{oid}/all-crawls/search-values", tags=["all-crawls"])
async def get_all_crawls_search_values(
org: Organization = Depends(org_viewer_dep),
crawlType: Optional[str] = None,
):
if crawlType and crawlType not in ("crawl", "upload"):
raise HTTPException(status_code=400, detail="invalid_crawl_type")
return await ops.get_all_crawl_search_values(org, type_=crawlType)
@app.get(
"/orgs/{oid}/all-crawls/{crawl_id}",
tags=["all-crawls"],
response_model=CrawlOutWithResources,
)
async def get_base_crawl(crawl_id: str, org: Organization = Depends(org_crawl_dep)):
return await ops.get_crawl(crawl_id, org)
@app.get(
"/orgs/all/all-crawls/{crawl_id}/replay.json",
tags=["all-crawls"],
response_model=CrawlOutWithResources,
)
async def get_base_crawl_admin(crawl_id, user: User = Depends(user_dep)):
if not user.is_superuser:
raise HTTPException(status_code=403, detail="Not Allowed")
return await ops.get_crawl(crawl_id, None)
@app.get(
"/orgs/{oid}/all-crawls/{crawl_id}/replay.json",
tags=["all-crawls"],
response_model=CrawlOutWithResources,
)
async def get_crawl(crawl_id, org: Organization = Depends(org_viewer_dep)):
return await ops.get_crawl(crawl_id, org)
@app.patch("/orgs/{oid}/all-crawls/{crawl_id}", tags=["all-crawls"])
async def update_crawl(
update: UpdateCrawl, crawl_id: str, org: Organization = Depends(org_crawl_dep)
):
return await ops.update_crawl(crawl_id, org, update)
@app.post("/orgs/{oid}/all-crawls/delete", tags=["all-crawls"])
async def delete_crawls_all_types(
delete_list: DeleteCrawlList,
user: User = Depends(user_dep),
org: Organization = Depends(org_crawl_dep),
):
return await ops.delete_crawls_all_types(delete_list, org, user)
return ops