Consolidate list page endpoints + better QA sorting + optimize pages fix (#2417)
- consolidate list_pages() and list_replay_query_pages() into list_pages() - to keep backwards compatibility, add <crawl>/pagesSearch that does not include page totals, keep <crawl>/pages with page total (slower) - qa frontend: add default 'Crawl Order' sort order, to better show pages in QA view - bgjob: account for parallelism in bgjobs, add logging if succeeded mismatches parallelism - QA sorting: default to 'crawl order' by default to get better results. - Optimize pages job: also cover crawls that may not have any pages but have pages listed in done stats - Bgjobs: give custom op jobs more memory
This commit is contained in:
parent
06f6d9d4f2
commit
8a507f0473
@ -171,14 +171,15 @@ class BaseCrawlOps:
|
||||
res["collections"] = await self.colls.get_collection_names(coll_ids)
|
||||
|
||||
if res.get("version", 1) == 2:
|
||||
res["initialPages"] = await self.page_ops.list_replay_query_pages(
|
||||
res["initialPages"], _ = await self.page_ops.list_pages(
|
||||
crawl_ids=[crawlid], is_seed=True, page_size=25
|
||||
)
|
||||
|
||||
oid = res.get("oid")
|
||||
if oid:
|
||||
res["pagesQueryUrl"] = (
|
||||
get_origin(headers) + f"/api/orgs/{oid}/crawls/{crawlid}/pages"
|
||||
get_origin(headers)
|
||||
+ f"/api/orgs/{oid}/crawls/{crawlid}/pagesSearch"
|
||||
)
|
||||
|
||||
crawl = CrawlOutWithResources.from_dict(res)
|
||||
|
@ -42,7 +42,6 @@ from .models import (
|
||||
OrgPublicCollections,
|
||||
PublicOrgDetails,
|
||||
CollAccessType,
|
||||
PageOut,
|
||||
UpdateCollHomeUrl,
|
||||
User,
|
||||
ImageFile,
|
||||
@ -346,8 +345,7 @@ class CollectionOps:
|
||||
await self.get_collection_crawl_resources(coll_id)
|
||||
)
|
||||
|
||||
initial_pages: List[PageOut] = await self.page_ops.list_replay_query_pages(
|
||||
coll_id,
|
||||
initial_pages, _ = await self.page_ops.list_pages(
|
||||
crawl_ids=crawl_ids,
|
||||
page_size=25,
|
||||
)
|
||||
|
@ -198,6 +198,7 @@ class CrawlManager(K8sAPI):
|
||||
"job_type": job_type,
|
||||
"backend_image": os.environ.get("BACKEND_IMAGE", ""),
|
||||
"pull_policy": os.environ.get("BACKEND_IMAGE_PULL_POLICY", ""),
|
||||
"larger_resources": True,
|
||||
**kwargs,
|
||||
}
|
||||
if oid:
|
||||
|
@ -38,7 +38,12 @@ class BgJobOperator(BaseOperator):
|
||||
job_id: str = labels.get("job_id") or metadata.get("name")
|
||||
|
||||
status = data.object["status"]
|
||||
success = status.get("succeeded") == 1
|
||||
spec = data.object["spec"]
|
||||
success = status.get("succeeded") == spec.get("parallelism")
|
||||
if not success:
|
||||
print(
|
||||
"Succeeded: {status.get('succeeded')}, Num Pods: {spec.get('parallelism')}"
|
||||
)
|
||||
completion_time = status.get("completionTime")
|
||||
|
||||
finalized = True
|
||||
|
@ -3,7 +3,6 @@
|
||||
# pylint: disable=too-many-lines
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
import traceback
|
||||
import urllib.parse
|
||||
from datetime import datetime
|
||||
@ -495,7 +494,10 @@ class PageOps:
|
||||
|
||||
async def list_pages(
|
||||
self,
|
||||
crawl_id: str,
|
||||
coll_id: Optional[UUID] = None,
|
||||
crawl_ids: Optional[List[str]] = None,
|
||||
public_or_unlisted_only=False,
|
||||
# pylint: disable=unused-argument
|
||||
org: Optional[Organization] = None,
|
||||
search: Optional[str] = None,
|
||||
url: Optional[str] = None,
|
||||
@ -516,6 +518,7 @@ class PageOps:
|
||||
page: int = 1,
|
||||
sort_by: Optional[str] = None,
|
||||
sort_direction: Optional[int] = -1,
|
||||
include_total=False,
|
||||
) -> Tuple[Union[List[PageOut], List[PageOutWithSingleQA]], int]:
|
||||
"""List all pages in crawl"""
|
||||
# pylint: disable=duplicate-code, too-many-locals, too-many-branches, too-many-statements
|
||||
@ -523,26 +526,45 @@ class PageOps:
|
||||
page = page - 1
|
||||
skip = page_size * page
|
||||
|
||||
# Crawl or Collection Selection
|
||||
if coll_id:
|
||||
if crawl_ids:
|
||||
# both coll_id and crawll_ids, error
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="only one of crawl_ids or coll_id can be provided",
|
||||
)
|
||||
|
||||
crawl_ids = await self.coll_ops.get_collection_crawl_ids(
|
||||
coll_id, public_or_unlisted_only
|
||||
)
|
||||
elif not crawl_ids:
|
||||
# neither coll_id nor crawl_id, error
|
||||
raise HTTPException(
|
||||
status_code=400, detail="either crawl_ids or coll_id must be provided"
|
||||
)
|
||||
|
||||
query: dict[str, object] = {
|
||||
"crawl_id": crawl_id,
|
||||
"crawl_id": {"$in": crawl_ids},
|
||||
}
|
||||
if org:
|
||||
query["oid"] = org.id
|
||||
# if org:
|
||||
# query["oid"] = org.id
|
||||
|
||||
# Text Search
|
||||
is_text_search = False
|
||||
if search:
|
||||
search_regex = re.escape(urllib.parse.unquote(search))
|
||||
query["$or"] = [
|
||||
{"url": {"$regex": search_regex, "$options": "i"}},
|
||||
{"title": {"$regex": search_regex, "$options": "i"}},
|
||||
]
|
||||
search = urllib.parse.unquote(search)
|
||||
if search.startswith("http:") or search.startswith("https:"):
|
||||
query["url"] = {"$gte": search}
|
||||
else:
|
||||
query["$text"] = {"$search": search}
|
||||
is_text_search = True
|
||||
|
||||
if url_prefix:
|
||||
url_prefix = urllib.parse.unquote(url_prefix)
|
||||
regex_pattern = f"^{re.escape(url_prefix)}"
|
||||
query["url"] = {"$regex": regex_pattern, "$options": "i"}
|
||||
|
||||
elif url:
|
||||
# Seed Settings
|
||||
if url:
|
||||
query["url"] = urllib.parse.unquote(url)
|
||||
elif url_prefix:
|
||||
query["url"] = {"$gte": urllib.parse.unquote(url_prefix)}
|
||||
|
||||
if ts:
|
||||
query["ts"] = ts
|
||||
@ -553,6 +575,7 @@ class PageOps:
|
||||
if isinstance(depth, int):
|
||||
query["depth"] = depth
|
||||
|
||||
# QA Settings
|
||||
if reviewed:
|
||||
query["$or"] = [
|
||||
{"approved": {"$ne": None}},
|
||||
@ -591,8 +614,14 @@ class PageOps:
|
||||
|
||||
query[f"qa.{qa_run_id}.{qa_filter_by}"] = range_filter
|
||||
|
||||
aggregate = [{"$match": query}]
|
||||
aggregate: List[Dict[str, Union[int, object]]] = [{"$match": query}]
|
||||
|
||||
# Extra QA Set
|
||||
if qa_run_id:
|
||||
aggregate.extend([{"$set": {"qa": f"$qa.{qa_run_id}"}}])
|
||||
# aggregate.extend([{"$project": {"qa": f"$qa.{qa_run_id}"}}])
|
||||
|
||||
# Sorting
|
||||
if sort_by:
|
||||
# Sorting options to add:
|
||||
# - automated heuristics like screenshot_comparison (dict keyed by QA run id)
|
||||
@ -625,33 +654,52 @@ class PageOps:
|
||||
|
||||
aggregate.extend([{"$sort": {sort_by: sort_direction}}])
|
||||
|
||||
if qa_run_id:
|
||||
aggregate.extend([{"$set": {"qa": f"$qa.{qa_run_id}"}}])
|
||||
# aggregate.extend([{"$project": {"qa": f"$qa.{qa_run_id}"}}])
|
||||
# default sort with search
|
||||
elif search or url_prefix:
|
||||
if is_text_search:
|
||||
aggregate.extend(
|
||||
[
|
||||
{"$sort": {"score": {"$meta": "textScore"}}},
|
||||
]
|
||||
)
|
||||
else:
|
||||
aggregate.extend([{"$sort": {"url": 1}}])
|
||||
else:
|
||||
# default sort: seeds first, then by timestamp
|
||||
aggregate.extend([{"$sort": {"isSeed": -1, "ts": 1}}])
|
||||
|
||||
aggregate.extend(
|
||||
[
|
||||
{
|
||||
"$facet": {
|
||||
"items": [
|
||||
{"$skip": skip},
|
||||
{"$limit": page_size},
|
||||
],
|
||||
"total": [{"$count": "count"}],
|
||||
if include_total:
|
||||
aggregate.extend(
|
||||
[
|
||||
{
|
||||
"$facet": {
|
||||
"items": [
|
||||
{"$skip": skip},
|
||||
{"$limit": page_size},
|
||||
],
|
||||
"total": [{"$count": "count"}],
|
||||
}
|
||||
}
|
||||
},
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# Get total
|
||||
cursor = self.pages.aggregate(aggregate)
|
||||
results = await cursor.to_list(length=1)
|
||||
result = results[0]
|
||||
items = result["items"]
|
||||
cursor = self.pages.aggregate(aggregate)
|
||||
results = await cursor.to_list(length=1)
|
||||
result = results[0]
|
||||
items = result["items"]
|
||||
|
||||
try:
|
||||
total = int(result["total"][0]["count"])
|
||||
except (IndexError, ValueError):
|
||||
try:
|
||||
total = int(result["total"][0]["count"])
|
||||
except (IndexError, ValueError, KeyError):
|
||||
total = 0
|
||||
|
||||
else:
|
||||
if skip:
|
||||
aggregate.extend([{"$skip": skip}])
|
||||
|
||||
aggregate.extend([{"$limit": page_size}])
|
||||
cursor = self.pages.aggregate(aggregate)
|
||||
items = await cursor.to_list(page_size)
|
||||
total = 0
|
||||
|
||||
if qa_run_id:
|
||||
@ -667,35 +715,18 @@ class PageOps:
|
||||
) -> List[PageUrlCount]:
|
||||
"""List all page URLs in collection sorted desc by snapshot count
|
||||
unless prefix is specified"""
|
||||
# pylint: disable=duplicate-code, too-many-locals, too-many-branches, too-many-statements
|
||||
# Zero-index page for query
|
||||
|
||||
crawl_ids = await self.coll_ops.get_collection_crawl_ids(coll_id)
|
||||
|
||||
match_query: dict[str, object] = {"crawl_id": {"$in": crawl_ids}}
|
||||
sort_query: dict[str, int] = {"isSeed": -1, "ts": 1}
|
||||
|
||||
if url_prefix:
|
||||
url_prefix = urllib.parse.unquote(url_prefix)
|
||||
# regex_pattern = f"^{re.escape(url_prefix)}"
|
||||
# match_query["url"] = {"$regex": regex_pattern, "$options": "i"}
|
||||
match_query["url"] = {"$gte": url_prefix}
|
||||
sort_query = {"url": 1}
|
||||
|
||||
aggregate: List[Dict[str, Union[int, object]]] = [
|
||||
{"$match": match_query},
|
||||
{"$sort": sort_query},
|
||||
]
|
||||
|
||||
aggregate.append({"$limit": page_size * len(crawl_ids)})
|
||||
|
||||
cursor = self.pages.aggregate(aggregate)
|
||||
results = await cursor.to_list(length=page_size * len(crawl_ids))
|
||||
pages, _ = await self.list_pages(
|
||||
crawl_ids=crawl_ids,
|
||||
url_prefix=url_prefix,
|
||||
page_size=page_size * len(crawl_ids),
|
||||
)
|
||||
|
||||
url_counts: dict[str, PageUrlCount] = {}
|
||||
|
||||
for result in results:
|
||||
url = result.get("url")
|
||||
for page in pages:
|
||||
url = page.url
|
||||
count = url_counts.get(url)
|
||||
if not count:
|
||||
# if already at max pages, this would add a new page, so we're done
|
||||
@ -705,125 +736,15 @@ class PageOps:
|
||||
url_counts[url] = count
|
||||
count.snapshots.append(
|
||||
PageIdTimestamp(
|
||||
pageId=result.get("_id"),
|
||||
ts=result.get("ts"),
|
||||
status=result.get("status", 200),
|
||||
pageId=page.id,
|
||||
ts=page.ts,
|
||||
status=page.status or 200,
|
||||
)
|
||||
)
|
||||
count.count += 1
|
||||
|
||||
return list(url_counts.values())
|
||||
|
||||
async def list_replay_query_pages(
|
||||
self,
|
||||
coll_id: Optional[UUID] = None,
|
||||
crawl_ids: Optional[List[str]] = None,
|
||||
org: Optional[Organization] = None,
|
||||
search: Optional[str] = None,
|
||||
url: Optional[str] = None,
|
||||
url_prefix: Optional[str] = None,
|
||||
ts: Optional[datetime] = None,
|
||||
is_seed: Optional[bool] = None,
|
||||
depth: Optional[int] = None,
|
||||
page_size: int = DEFAULT_PAGE_SIZE,
|
||||
page: int = 1,
|
||||
sort_by: Optional[str] = None,
|
||||
sort_direction: Optional[int] = -1,
|
||||
public_or_unlisted_only=False,
|
||||
) -> List[PageOut]:
|
||||
"""Query pages in collection, with filtering sorting. No total returned for optimization"""
|
||||
# pylint: disable=duplicate-code, too-many-locals, too-many-branches, too-many-statements
|
||||
# Zero-index page for query
|
||||
page = page - 1
|
||||
skip = page_size * page
|
||||
|
||||
if crawl_ids is None and coll_id is None:
|
||||
raise HTTPException(
|
||||
status_code=400, detail="either crawl_ids or coll_id must be provided"
|
||||
)
|
||||
|
||||
if coll_id and crawl_ids is None:
|
||||
crawl_ids = await self.coll_ops.get_collection_crawl_ids(
|
||||
coll_id, public_or_unlisted_only
|
||||
)
|
||||
|
||||
query: dict[str, object] = {
|
||||
"crawl_id": {"$in": crawl_ids},
|
||||
}
|
||||
if org:
|
||||
query["oid"] = org.id
|
||||
|
||||
is_text_search = False
|
||||
if search:
|
||||
search = urllib.parse.unquote(search)
|
||||
if search.startswith("http:") or search.startswith("https:"):
|
||||
query["url"] = {"$gte": search}
|
||||
else:
|
||||
query["$text"] = {"$search": search}
|
||||
is_text_search = True
|
||||
|
||||
elif url_prefix:
|
||||
url_prefix = urllib.parse.unquote(url_prefix)
|
||||
regex_pattern = f"^{re.escape(url_prefix)}"
|
||||
query["url"] = {"$regex": regex_pattern, "$options": "i"}
|
||||
|
||||
elif url:
|
||||
query["url"] = urllib.parse.unquote(url)
|
||||
|
||||
if ts:
|
||||
query["ts"] = ts
|
||||
|
||||
if is_seed in (True, False):
|
||||
query["isSeed"] = is_seed
|
||||
|
||||
if isinstance(depth, int):
|
||||
query["depth"] = depth
|
||||
|
||||
aggregate: list[dict[str, object]] = [{"$match": query}]
|
||||
|
||||
if sort_by:
|
||||
# Sorting options to add:
|
||||
# - automated heuristics like screenshot_comparison (dict keyed by QA run id)
|
||||
# - Ensure notes sorting works okay with notes in list
|
||||
sort_fields = (
|
||||
"url",
|
||||
"crawl_id",
|
||||
"ts",
|
||||
"status",
|
||||
"mime",
|
||||
"filename",
|
||||
"depth",
|
||||
"isSeed",
|
||||
)
|
||||
if sort_by not in sort_fields:
|
||||
raise HTTPException(status_code=400, detail="invalid_sort_by")
|
||||
if sort_direction not in (1, -1):
|
||||
raise HTTPException(status_code=400, detail="invalid_sort_direction")
|
||||
|
||||
aggregate.extend([{"$sort": {sort_by: sort_direction}}])
|
||||
elif search:
|
||||
if is_text_search:
|
||||
aggregate.extend(
|
||||
[
|
||||
{"$sort": {"score": {"$meta": "textScore"}}},
|
||||
]
|
||||
)
|
||||
else:
|
||||
aggregate.extend([{"$sort": {"url": 1}}])
|
||||
else:
|
||||
# default sort: seeds first, then by timestamp
|
||||
aggregate.extend([{"$sort": {"isSeed": -1, "ts": 1}}])
|
||||
|
||||
if skip:
|
||||
aggregate.append({"$skip": skip})
|
||||
aggregate.append({"$limit": page_size})
|
||||
|
||||
cursor = self.pages.aggregate(aggregate)
|
||||
|
||||
results = await cursor.to_list(length=page_size)
|
||||
|
||||
return [PageOut.from_dict(data) for data in results]
|
||||
|
||||
async def re_add_crawl_pages(self, crawl_id: str, oid: Optional[UUID] = None):
|
||||
"""Delete existing pages for crawl and re-add from WACZs."""
|
||||
|
||||
@ -1006,8 +927,7 @@ class PageOps:
|
||||
|
||||
async def set_archived_item_page_counts(self, crawl_id: str):
|
||||
"""Store archived item page and unique page counts in crawl document"""
|
||||
_, page_count = await self.list_pages(crawl_id)
|
||||
|
||||
page_count = await self.pages.count_documents({"crawl_id": crawl_id})
|
||||
unique_page_count = await self.get_unique_page_count([crawl_id])
|
||||
|
||||
await self.crawls.find_one_and_update(
|
||||
@ -1031,6 +951,7 @@ class PageOps:
|
||||
match_query,
|
||||
{"$set": {"isMigrating": True}},
|
||||
sort=[("finished", -1)],
|
||||
projection={"_id": 1, "pageCount": 1, "stats": 1, "state": 1},
|
||||
)
|
||||
if next_crawl is None:
|
||||
print("No more finished crawls to migrate")
|
||||
@ -1046,6 +967,13 @@ class PageOps:
|
||||
if has_page_no_filename:
|
||||
print("Re-importing pages to migrate to v2")
|
||||
await self.re_add_crawl_pages(crawl_id)
|
||||
elif (
|
||||
next_crawl.get("pageCount") == 0
|
||||
and next_crawl.get("stats", {}).get("done", 0) > 0
|
||||
and next_crawl.get("state") not in ["canceled", "failed"]
|
||||
):
|
||||
print("Pages likely missing, importing pages to migrate to v2")
|
||||
await self.re_add_crawl_pages(crawl_id)
|
||||
else:
|
||||
print("Pages already have filename, set to v2")
|
||||
|
||||
@ -1291,7 +1219,7 @@ def init_pages_api(
|
||||
formatted_approved = str_list_to_bools(approved.split(","))
|
||||
|
||||
pages, total = await ops.list_pages(
|
||||
crawl_id=crawl_id,
|
||||
crawl_ids=[crawl_id],
|
||||
org=org,
|
||||
search=search,
|
||||
url=url,
|
||||
@ -1306,9 +1234,41 @@ def init_pages_api(
|
||||
page=page,
|
||||
sort_by=sortBy,
|
||||
sort_direction=sortDirection,
|
||||
include_total=True,
|
||||
)
|
||||
return paginated_format(pages, total, page, pageSize)
|
||||
|
||||
@app.get(
|
||||
"/orgs/{oid}/crawls/{crawl_id}/pagesSearch",
|
||||
tags=["pages", "crawls"],
|
||||
response_model=PageOutItemsResponse,
|
||||
)
|
||||
async def get_search_pages_list(
|
||||
crawl_id: str,
|
||||
org: Organization = Depends(org_crawl_dep),
|
||||
search: Optional[str] = None,
|
||||
url: Optional[str] = None,
|
||||
ts: Optional[datetime] = None,
|
||||
isSeed: Optional[bool] = None,
|
||||
depth: Optional[int] = None,
|
||||
pageSize: int = DEFAULT_PAGE_SIZE,
|
||||
page: int = 1,
|
||||
):
|
||||
"""Retrieve paginated list of pages"""
|
||||
pages, _ = await ops.list_pages(
|
||||
crawl_ids=[crawl_id],
|
||||
search=search,
|
||||
url=url,
|
||||
ts=ts,
|
||||
is_seed=isSeed,
|
||||
depth=depth,
|
||||
org=org,
|
||||
page_size=pageSize,
|
||||
page=page,
|
||||
include_total=False,
|
||||
)
|
||||
return {"items": pages}
|
||||
|
||||
@app.get(
|
||||
"/orgs/{oid}/collections/{coll_id}/public/pages",
|
||||
tags=["pages", "collections"],
|
||||
@ -1320,7 +1280,6 @@ def init_pages_api(
|
||||
org: Organization = Depends(org_public),
|
||||
search: Optional[str] = None,
|
||||
url: Optional[str] = None,
|
||||
urlPrefix: Optional[str] = None,
|
||||
ts: Optional[datetime] = None,
|
||||
isSeed: Optional[bool] = None,
|
||||
depth: Optional[int] = None,
|
||||
@ -1330,12 +1289,11 @@ def init_pages_api(
|
||||
sortDirection: Optional[int] = -1,
|
||||
):
|
||||
"""Retrieve paginated list of pages in collection"""
|
||||
pages = await ops.list_replay_query_pages(
|
||||
pages, _ = await ops.list_pages(
|
||||
coll_id=coll_id,
|
||||
org=org,
|
||||
search=search,
|
||||
url=url,
|
||||
url_prefix=urlPrefix,
|
||||
ts=ts,
|
||||
is_seed=isSeed,
|
||||
depth=depth,
|
||||
@ -1377,7 +1335,6 @@ def init_pages_api(
|
||||
org: Organization = Depends(org_viewer_dep),
|
||||
search: Optional[str] = None,
|
||||
url: Optional[str] = None,
|
||||
urlPrefix: Optional[str] = None,
|
||||
ts: Optional[datetime] = None,
|
||||
isSeed: Optional[bool] = None,
|
||||
depth: Optional[int] = None,
|
||||
@ -1387,12 +1344,11 @@ def init_pages_api(
|
||||
sortDirection: Optional[int] = -1,
|
||||
):
|
||||
"""Retrieve paginated list of pages in collection"""
|
||||
pages = await ops.list_replay_query_pages(
|
||||
pages, _ = await ops.list_pages(
|
||||
coll_id=coll_id,
|
||||
org=org,
|
||||
search=search,
|
||||
url=url,
|
||||
url_prefix=urlPrefix,
|
||||
ts=ts,
|
||||
is_seed=isSeed,
|
||||
depth=depth,
|
||||
@ -1433,7 +1389,7 @@ def init_pages_api(
|
||||
formatted_approved = str_list_to_bools(approved.split(","))
|
||||
|
||||
pages, total = await ops.list_pages(
|
||||
crawl_id=crawl_id,
|
||||
crawl_ids=[crawl_id],
|
||||
org=org,
|
||||
qa_run_id=qa_run_id,
|
||||
qa_filter_by=filterQABy,
|
||||
|
@ -1,3 +1,3 @@
|
||||
"""current version"""
|
||||
|
||||
__version__ = "1.14.0-beta.6"
|
||||
__version__ = "1.14.0-beta.7"
|
||||
|
@ -186,7 +186,7 @@ def test_wait_for_complete(admin_auth_headers, default_org_id):
|
||||
|
||||
assert len(data["initialPages"]) == 1
|
||||
assert data["pagesQueryUrl"].endswith(
|
||||
f"/orgs/{default_org_id}/crawls/{admin_crawl_id}/pages"
|
||||
f"/orgs/{default_org_id}/crawls/{admin_crawl_id}/pagesSearch"
|
||||
)
|
||||
|
||||
# ensure filename matches specified pattern
|
||||
|
@ -5,7 +5,7 @@ type: application
|
||||
icon: https://webrecorder.net/assets/icon.png
|
||||
|
||||
# Browsertrix and Chart Version
|
||||
version: v1.14.0-beta.6
|
||||
version: v1.14.0-beta.7
|
||||
|
||||
dependencies:
|
||||
- name: btrix-admin-logging
|
||||
|
@ -65,9 +65,18 @@ spec:
|
||||
command: ["python3", "-m", "btrixcloud.main_bg"]
|
||||
|
||||
resources:
|
||||
{% if larger_resources %}
|
||||
limits:
|
||||
memory: "500Mi"
|
||||
memory: "1200Mi"
|
||||
|
||||
requests:
|
||||
memory: "250Mi"
|
||||
memory: "500Mi"
|
||||
cpu: "200m"
|
||||
{% else %}
|
||||
limits:
|
||||
memory: "200Mi"
|
||||
|
||||
requests:
|
||||
memory: "200Mi"
|
||||
cpu: "50m"
|
||||
{% endif %}
|
||||
|
@ -103,7 +103,7 @@ replica_deletion_delay_days: 0
|
||||
|
||||
# API Image
|
||||
# =========================================
|
||||
backend_image: "docker.io/webrecorder/browsertrix-backend:1.14.0-beta.6"
|
||||
backend_image: "docker.io/webrecorder/browsertrix-backend:1.14.0-beta.7"
|
||||
backend_pull_policy: "Always"
|
||||
|
||||
backend_password_secret: "PASSWORD!"
|
||||
@ -161,7 +161,7 @@ backend_avg_memory_threshold: 95
|
||||
|
||||
# Nginx Image
|
||||
# =========================================
|
||||
frontend_image: "docker.io/webrecorder/browsertrix-frontend:1.14.0-beta.6"
|
||||
frontend_image: "docker.io/webrecorder/browsertrix-frontend:1.14.0-beta.7"
|
||||
frontend_pull_policy: "Always"
|
||||
|
||||
frontend_cpu: "10m"
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "browsertrix-frontend",
|
||||
"version": "1.14.0-beta.6",
|
||||
"version": "1.14.0-beta.7",
|
||||
"main": "index.ts",
|
||||
"license": "AGPL-3.0-or-later",
|
||||
"dependencies": {
|
||||
|
@ -703,7 +703,7 @@ export class ArchivedItemDetailQA extends BtrixElement {
|
||||
class="label-same-line"
|
||||
label=${msg("Sort by:")}
|
||||
size="small"
|
||||
value=${this.qaRunId ? "approved.-1" : "url.1"}
|
||||
value=${this.qaRunId ? "approved.-1" : ".1"}
|
||||
pill
|
||||
@sl-change=${(e: SlChangeEvent) => {
|
||||
const { value } = e.target as SlSelect;
|
||||
@ -717,6 +717,7 @@ export class ArchivedItemDetailQA extends BtrixElement {
|
||||
});
|
||||
}}
|
||||
>
|
||||
<sl-option value=".1">${msg("Crawl Order")}</sl-option>
|
||||
<sl-option value="title.1">${msg("Title")}</sl-option>
|
||||
<sl-option value="url.1">${msg("URL")}</sl-option>
|
||||
<sl-option value="notes.-1" ?disabled=${!this.qaRunId}
|
||||
|
@ -1 +1 @@
|
||||
1.14.0-beta.6
|
||||
1.14.0-beta.7
|
||||
|
Loading…
Reference in New Issue
Block a user