Add mime field to Page model (#1678)
This commit is contained in:
		
							parent
							
								
									1b034957ff
								
							
						
					
					
						commit
						87e0873f1a
					
				| @ -1529,6 +1529,7 @@ class Page(BaseMongoModel): | |||||||
|     ts: Optional[datetime] = None |     ts: Optional[datetime] = None | ||||||
|     loadState: Optional[int] = None |     loadState: Optional[int] = None | ||||||
|     status: Optional[int] = None |     status: Optional[int] = None | ||||||
|  |     mime: Optional[str] = None | ||||||
| 
 | 
 | ||||||
|     # manual review |     # manual review | ||||||
|     userid: Optional[UUID] = None |     userid: Optional[UUID] = None | ||||||
|  | |||||||
| @ -101,6 +101,7 @@ class PageOps: | |||||||
|             title=page_dict.get("title"), |             title=page_dict.get("title"), | ||||||
|             loadState=page_dict.get("loadState"), |             loadState=page_dict.get("loadState"), | ||||||
|             status=status, |             status=status, | ||||||
|  |             mime=page_dict.get("mime", "text/html"), | ||||||
|             ts=( |             ts=( | ||||||
|                 from_k8s_date(page_dict.get("ts")) |                 from_k8s_date(page_dict.get("ts")) | ||||||
|                 if page_dict.get("ts") |                 if page_dict.get("ts") | ||||||
| @ -131,13 +132,15 @@ class PageOps: | |||||||
|     ): |     ): | ||||||
|         """Add page to database""" |         """Add page to database""" | ||||||
|         page = self._get_page_from_dict(page_dict, crawl_id, oid) |         page = self._get_page_from_dict(page_dict, crawl_id, oid) | ||||||
|  |         print(f"PAGE: {page}", flush=True) | ||||||
|  | 
 | ||||||
|  |         page_to_insert = page.to_dict( | ||||||
|  |             exclude_unset=True, exclude_none=True, exclude_defaults=True | ||||||
|  |         ) | ||||||
|  |         print(f"PAGE TO INSERT: {page_to_insert}") | ||||||
| 
 | 
 | ||||||
|         try: |         try: | ||||||
|             await self.pages.insert_one( |             await self.pages.insert_one(page_to_insert) | ||||||
|                 page.to_dict( |  | ||||||
|                     exclude_unset=True, exclude_none=True, exclude_defaults=True |  | ||||||
|                 ) |  | ||||||
|             ) |  | ||||||
|         except pymongo.errors.DuplicateKeyError: |         except pymongo.errors.DuplicateKeyError: | ||||||
|             pass |             pass | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -214,6 +214,8 @@ def test_qa_page_data( | |||||||
| 
 | 
 | ||||||
|     assert page["title"] == "Webrecorder" |     assert page["title"] == "Webrecorder" | ||||||
|     assert page["url"] == "https://webrecorder.net/" |     assert page["url"] == "https://webrecorder.net/" | ||||||
|  |     assert page["mime"] == "text/html" | ||||||
|  |     assert page["status"] == 200 | ||||||
|     assert page["qa"]["textMatch"] == 1.0 |     assert page["qa"]["textMatch"] == 1.0 | ||||||
|     assert page["qa"]["screenshotMatch"] == 1.0 |     assert page["qa"]["screenshotMatch"] == 1.0 | ||||||
|     assert page["qa"]["resourceCounts"] == { |     assert page["qa"]["resourceCounts"] == { | ||||||
| @ -231,6 +233,8 @@ def test_qa_page_data( | |||||||
|     assert page["id"] |     assert page["id"] | ||||||
|     assert page["title"] == "Webrecorder" |     assert page["title"] == "Webrecorder" | ||||||
|     assert page["url"] == "https://webrecorder.net/" |     assert page["url"] == "https://webrecorder.net/" | ||||||
|  |     assert page["mime"] == "text/html" | ||||||
|  |     assert page["status"] == 200 | ||||||
|     assert page["qa"]["textMatch"] == 1.0 |     assert page["qa"]["textMatch"] == 1.0 | ||||||
|     assert page["qa"]["screenshotMatch"] == 1.0 |     assert page["qa"]["screenshotMatch"] == 1.0 | ||||||
|     assert page["qa"]["resourceCounts"] == { |     assert page["qa"]["resourceCounts"] == { | ||||||
|  | |||||||
| @ -435,6 +435,7 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id): | |||||||
|         assert page.get("title") or page.get("title") is None |         assert page.get("title") or page.get("title") is None | ||||||
|         assert page["loadState"] |         assert page["loadState"] | ||||||
|         assert page["status"] |         assert page["status"] | ||||||
|  |         assert page["mime"] | ||||||
| 
 | 
 | ||||||
|     # Test GET page endpoint |     # Test GET page endpoint | ||||||
|     global page_id |     global page_id | ||||||
| @ -453,6 +454,7 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id): | |||||||
|     assert page["ts"] |     assert page["ts"] | ||||||
|     assert page.get("title") or page.get("title") is None |     assert page.get("title") or page.get("title") is None | ||||||
|     assert page["loadState"] |     assert page["loadState"] | ||||||
|  |     assert page["mime"] | ||||||
| 
 | 
 | ||||||
|     assert page["notes"] == [] |     assert page["notes"] == [] | ||||||
|     assert page.get("userid") is None |     assert page.get("userid") is None | ||||||
| @ -550,6 +552,7 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id): | |||||||
|     assert page["ts"] |     assert page["ts"] | ||||||
|     assert page.get("title") or page.get("title") is None |     assert page.get("title") or page.get("title") is None | ||||||
|     assert page["loadState"] |     assert page["loadState"] | ||||||
|  |     assert page["mime"] | ||||||
| 
 | 
 | ||||||
|     assert page["notes"] == [] |     assert page["notes"] == [] | ||||||
|     assert page["userid"] |     assert page["userid"] | ||||||
| @ -626,6 +629,7 @@ def test_re_add_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_ | |||||||
|         assert page.get("title") or page.get("title") is None |         assert page.get("title") or page.get("title") is None | ||||||
|         assert page["loadState"] |         assert page["loadState"] | ||||||
|         assert page["status"] |         assert page["status"] | ||||||
|  |         assert page["mime"] | ||||||
| 
 | 
 | ||||||
|     # Ensure only superuser can re-add pages for all crawls in an org |     # Ensure only superuser can re-add pages for all crawls in an org | ||||||
|     r = requests.post( |     r = requests.post( | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user