Add mime field to Page model (#1678)
This commit is contained in:
parent
1b034957ff
commit
87e0873f1a
@ -1529,6 +1529,7 @@ class Page(BaseMongoModel):
|
||||
ts: Optional[datetime] = None
|
||||
loadState: Optional[int] = None
|
||||
status: Optional[int] = None
|
||||
mime: Optional[str] = None
|
||||
|
||||
# manual review
|
||||
userid: Optional[UUID] = None
|
||||
|
@ -101,6 +101,7 @@ class PageOps:
|
||||
title=page_dict.get("title"),
|
||||
loadState=page_dict.get("loadState"),
|
||||
status=status,
|
||||
mime=page_dict.get("mime", "text/html"),
|
||||
ts=(
|
||||
from_k8s_date(page_dict.get("ts"))
|
||||
if page_dict.get("ts")
|
||||
@ -131,13 +132,15 @@ class PageOps:
|
||||
):
|
||||
"""Add page to database"""
|
||||
page = self._get_page_from_dict(page_dict, crawl_id, oid)
|
||||
print(f"PAGE: {page}", flush=True)
|
||||
|
||||
page_to_insert = page.to_dict(
|
||||
exclude_unset=True, exclude_none=True, exclude_defaults=True
|
||||
)
|
||||
print(f"PAGE TO INSERT: {page_to_insert}")
|
||||
|
||||
try:
|
||||
await self.pages.insert_one(
|
||||
page.to_dict(
|
||||
exclude_unset=True, exclude_none=True, exclude_defaults=True
|
||||
)
|
||||
)
|
||||
await self.pages.insert_one(page_to_insert)
|
||||
except pymongo.errors.DuplicateKeyError:
|
||||
pass
|
||||
|
||||
|
@ -214,6 +214,8 @@ def test_qa_page_data(
|
||||
|
||||
assert page["title"] == "Webrecorder"
|
||||
assert page["url"] == "https://webrecorder.net/"
|
||||
assert page["mime"] == "text/html"
|
||||
assert page["status"] == 200
|
||||
assert page["qa"]["textMatch"] == 1.0
|
||||
assert page["qa"]["screenshotMatch"] == 1.0
|
||||
assert page["qa"]["resourceCounts"] == {
|
||||
@ -231,6 +233,8 @@ def test_qa_page_data(
|
||||
assert page["id"]
|
||||
assert page["title"] == "Webrecorder"
|
||||
assert page["url"] == "https://webrecorder.net/"
|
||||
assert page["mime"] == "text/html"
|
||||
assert page["status"] == 200
|
||||
assert page["qa"]["textMatch"] == 1.0
|
||||
assert page["qa"]["screenshotMatch"] == 1.0
|
||||
assert page["qa"]["resourceCounts"] == {
|
||||
|
@ -435,6 +435,7 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id):
|
||||
assert page.get("title") or page.get("title") is None
|
||||
assert page["loadState"]
|
||||
assert page["status"]
|
||||
assert page["mime"]
|
||||
|
||||
# Test GET page endpoint
|
||||
global page_id
|
||||
@ -453,6 +454,7 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id):
|
||||
assert page["ts"]
|
||||
assert page.get("title") or page.get("title") is None
|
||||
assert page["loadState"]
|
||||
assert page["mime"]
|
||||
|
||||
assert page["notes"] == []
|
||||
assert page.get("userid") is None
|
||||
@ -550,6 +552,7 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id):
|
||||
assert page["ts"]
|
||||
assert page.get("title") or page.get("title") is None
|
||||
assert page["loadState"]
|
||||
assert page["mime"]
|
||||
|
||||
assert page["notes"] == []
|
||||
assert page["userid"]
|
||||
@ -626,6 +629,7 @@ def test_re_add_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_
|
||||
assert page.get("title") or page.get("title") is None
|
||||
assert page["loadState"]
|
||||
assert page["status"]
|
||||
assert page["mime"]
|
||||
|
||||
# Ensure only superuser can re-add pages for all crawls in an org
|
||||
r = requests.post(
|
||||
|
Loading…
Reference in New Issue
Block a user