* Implement in backend - Track bytesStored in org - Add migration to pre-calculate based on size of crawlfiles and profilefiles - Add methods to increase or decrease org storage when crawl or profile files are added or deleted - Include storageQuotaReached boolean in API responses that alter storage - Don't start new crawls and fail uploads if storage quota reached * Implement in frontend - Add to orgs-list quotas - Update org's storageQuotaReached based on backend endpoint responses - Disable buttons when storage quota is met - Show toast notification when attempting to run a crawl when org storage quota is met
88 lines
2.3 KiB
Python
88 lines
2.3 KiB
Python
""" entrypoint for cron crawl job"""
|
|
|
|
import asyncio
|
|
import os
|
|
import uuid
|
|
|
|
from .k8sapi import K8sAPI
|
|
from .db import init_db
|
|
from .crawlconfigs import (
|
|
get_crawl_config,
|
|
inc_crawl_count,
|
|
)
|
|
from .crawls import add_new_crawl
|
|
from .orgs import storage_quota_reached
|
|
from .utils import register_exit_handler
|
|
|
|
|
|
# ============================================================================
|
|
class ScheduledJob(K8sAPI):
|
|
"""Schedulued Job APIs for starting CrawlJobs on schedule"""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.cid = os.environ["CID"]
|
|
|
|
_, mdb = init_db()
|
|
|
|
self.crawls = mdb["crawls"]
|
|
self.crawlconfigs = mdb["crawl_configs"]
|
|
self.orgs = mdb["organizations"]
|
|
|
|
async def run(self):
|
|
"""run crawl!"""
|
|
register_exit_handler()
|
|
|
|
config_map = await self.core_api.read_namespaced_config_map(
|
|
name=f"crawl-config-{self.cid}", namespace=self.namespace
|
|
)
|
|
data = config_map.data
|
|
|
|
userid = data["USER_ID"]
|
|
scale = int(data.get("INITIAL_SCALE", 0))
|
|
try:
|
|
crawl_timeout = int(data.get("CRAWL_TIMEOUT", 0))
|
|
# pylint: disable=bare-except
|
|
except:
|
|
crawl_timeout = 0
|
|
|
|
oid = data["ORG_ID"]
|
|
|
|
crawlconfig = await get_crawl_config(self.crawlconfigs, uuid.UUID(self.cid))
|
|
|
|
if await storage_quota_reached(self.orgs, uuid.UUID(oid)):
|
|
print(
|
|
f"Scheduled crawl from workflow {self.cid} not started - storage quota reached",
|
|
flush=True,
|
|
)
|
|
return
|
|
|
|
# k8s create
|
|
crawl_id = await self.new_crawl_job(
|
|
self.cid, userid, oid, scale, crawl_timeout, manual=False
|
|
)
|
|
|
|
# db create
|
|
await inc_crawl_count(self.crawlconfigs, crawlconfig.id)
|
|
await add_new_crawl(
|
|
self.crawls,
|
|
self.crawlconfigs,
|
|
crawl_id,
|
|
crawlconfig,
|
|
uuid.UUID(userid),
|
|
manual=False,
|
|
)
|
|
print("Crawl Created: " + crawl_id)
|
|
|
|
|
|
# ============================================================================
|
|
def main():
|
|
"""main entrypoint"""
|
|
job = ScheduledJob()
|
|
loop = asyncio.get_event_loop()
|
|
loop.run_until_complete(job.run())
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|