Add mime field to Page model (#1678)
This commit is contained in:
		
							parent
							
								
									1b034957ff
								
							
						
					
					
						commit
						87e0873f1a
					
				| @ -1529,6 +1529,7 @@ class Page(BaseMongoModel): | ||||
|     ts: Optional[datetime] = None | ||||
|     loadState: Optional[int] = None | ||||
|     status: Optional[int] = None | ||||
|     mime: Optional[str] = None | ||||
| 
 | ||||
|     # manual review | ||||
|     userid: Optional[UUID] = None | ||||
|  | ||||
| @ -101,6 +101,7 @@ class PageOps: | ||||
|             title=page_dict.get("title"), | ||||
|             loadState=page_dict.get("loadState"), | ||||
|             status=status, | ||||
|             mime=page_dict.get("mime", "text/html"), | ||||
|             ts=( | ||||
|                 from_k8s_date(page_dict.get("ts")) | ||||
|                 if page_dict.get("ts") | ||||
| @ -131,13 +132,15 @@ class PageOps: | ||||
|     ): | ||||
|         """Add page to database""" | ||||
|         page = self._get_page_from_dict(page_dict, crawl_id, oid) | ||||
|         print(f"PAGE: {page}", flush=True) | ||||
| 
 | ||||
|         page_to_insert = page.to_dict( | ||||
|             exclude_unset=True, exclude_none=True, exclude_defaults=True | ||||
|         ) | ||||
|         print(f"PAGE TO INSERT: {page_to_insert}") | ||||
| 
 | ||||
|         try: | ||||
|             await self.pages.insert_one( | ||||
|                 page.to_dict( | ||||
|                     exclude_unset=True, exclude_none=True, exclude_defaults=True | ||||
|                 ) | ||||
|             ) | ||||
|             await self.pages.insert_one(page_to_insert) | ||||
|         except pymongo.errors.DuplicateKeyError: | ||||
|             pass | ||||
| 
 | ||||
|  | ||||
| @ -214,6 +214,8 @@ def test_qa_page_data( | ||||
| 
 | ||||
|     assert page["title"] == "Webrecorder" | ||||
|     assert page["url"] == "https://webrecorder.net/" | ||||
|     assert page["mime"] == "text/html" | ||||
|     assert page["status"] == 200 | ||||
|     assert page["qa"]["textMatch"] == 1.0 | ||||
|     assert page["qa"]["screenshotMatch"] == 1.0 | ||||
|     assert page["qa"]["resourceCounts"] == { | ||||
| @ -231,6 +233,8 @@ def test_qa_page_data( | ||||
|     assert page["id"] | ||||
|     assert page["title"] == "Webrecorder" | ||||
|     assert page["url"] == "https://webrecorder.net/" | ||||
|     assert page["mime"] == "text/html" | ||||
|     assert page["status"] == 200 | ||||
|     assert page["qa"]["textMatch"] == 1.0 | ||||
|     assert page["qa"]["screenshotMatch"] == 1.0 | ||||
|     assert page["qa"]["resourceCounts"] == { | ||||
|  | ||||
| @ -435,6 +435,7 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id): | ||||
|         assert page.get("title") or page.get("title") is None | ||||
|         assert page["loadState"] | ||||
|         assert page["status"] | ||||
|         assert page["mime"] | ||||
| 
 | ||||
|     # Test GET page endpoint | ||||
|     global page_id | ||||
| @ -453,6 +454,7 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id): | ||||
|     assert page["ts"] | ||||
|     assert page.get("title") or page.get("title") is None | ||||
|     assert page["loadState"] | ||||
|     assert page["mime"] | ||||
| 
 | ||||
|     assert page["notes"] == [] | ||||
|     assert page.get("userid") is None | ||||
| @ -550,6 +552,7 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id): | ||||
|     assert page["ts"] | ||||
|     assert page.get("title") or page.get("title") is None | ||||
|     assert page["loadState"] | ||||
|     assert page["mime"] | ||||
| 
 | ||||
|     assert page["notes"] == [] | ||||
|     assert page["userid"] | ||||
| @ -626,6 +629,7 @@ def test_re_add_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_ | ||||
|         assert page.get("title") or page.get("title") is None | ||||
|         assert page["loadState"] | ||||
|         assert page["status"] | ||||
|         assert page["mime"] | ||||
| 
 | ||||
|     # Ensure only superuser can re-add pages for all crawls in an org | ||||
|     r = requests.post( | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user