Add basic backend validation for selectLinks (#2510)
Follow-up to #2152 Related to https://github.com/webrecorder/browsertrix/pull/2487 This PR provides very basic validation of the `config.selectLinks` argument on workflow creation and update. Namely, it checks that: - `config.selectLinks` is not an empty array - Each entry consists of two non-empty text sequences separated by `->` At this point we're not validating the actual CSS selector on the backend, though we could add that down the road. Tests have been added accordingly. Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
		
							parent
							
								
									23f9e08a22
								
							
						
					
					
						commit
						f84f6f55e0
					
				@ -233,6 +233,8 @@ class CrawlConfigOps:
 | 
			
		||||
                exclude = [exclude]
 | 
			
		||||
            validate_regexes(exclude)
 | 
			
		||||
 | 
			
		||||
        self._validate_link_selectors(config_in.config.selectLinks)
 | 
			
		||||
 | 
			
		||||
        if config_in.config.customBehaviors:
 | 
			
		||||
            for url in config_in.config.customBehaviors:
 | 
			
		||||
                self._validate_custom_behavior_url_syntax(url)
 | 
			
		||||
@ -297,6 +299,24 @@ class CrawlConfigOps:
 | 
			
		||||
            execMinutesQuotaReached=exec_mins_quota_reached,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def _validate_link_selectors(self, link_selectors: List[str]):
 | 
			
		||||
        """Validate link selectors
 | 
			
		||||
 | 
			
		||||
        Ensure at least one link selector is set and that all the link slectors passed
 | 
			
		||||
        follow expected syntax: selector->attribute/property.
 | 
			
		||||
 | 
			
		||||
        We don't yet check the validity of the CSS selector itself.
 | 
			
		||||
        """
 | 
			
		||||
        if not link_selectors:
 | 
			
		||||
            raise HTTPException(status_code=400, detail="invalid_link_selector")
 | 
			
		||||
 | 
			
		||||
        for link_selector in link_selectors:
 | 
			
		||||
            parts = link_selector.split("->")
 | 
			
		||||
            if not len(parts) == 2:
 | 
			
		||||
                raise HTTPException(status_code=400, detail="invalid_link_selector")
 | 
			
		||||
            if not parts[0] or not parts[1]:
 | 
			
		||||
                raise HTTPException(status_code=400, detail="invalid_link_selector")
 | 
			
		||||
 | 
			
		||||
    def _validate_custom_behavior_url_syntax(self, url: str) -> Tuple[bool, List[str]]:
 | 
			
		||||
        """Validate custom behaviors are valid URLs after removing custom git syntax"""
 | 
			
		||||
        git_prefix = "git+"
 | 
			
		||||
@ -379,6 +399,9 @@ class CrawlConfigOps:
 | 
			
		||||
                exclude = [exclude]
 | 
			
		||||
            validate_regexes(exclude)
 | 
			
		||||
 | 
			
		||||
        if update.config and update.config.selectLinks is not None:
 | 
			
		||||
            self._validate_link_selectors(update.config.selectLinks)
 | 
			
		||||
 | 
			
		||||
        if update.config and update.config.customBehaviors:
 | 
			
		||||
            for url in update.config.customBehaviors:
 | 
			
		||||
                self._validate_custom_behavior_url_syntax(url)
 | 
			
		||||
 | 
			
		||||
@ -172,6 +172,24 @@ def test_update_config_invalid_exclude_regex(
 | 
			
		||||
    assert r.status_code == 400
 | 
			
		||||
    assert r.json()["detail"] == "invalid_regex"
 | 
			
		||||
 | 
			
		||||
def test_update_config_invalid_link_selector(
 | 
			
		||||
    crawler_auth_headers, default_org_id, sample_crawl_data
 | 
			
		||||
):
 | 
			
		||||
    r = requests.patch(
 | 
			
		||||
        f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{cid}/",
 | 
			
		||||
        headers=crawler_auth_headers,
 | 
			
		||||
        json={"config": {"selectLinks": []}},
 | 
			
		||||
    )
 | 
			
		||||
    assert r.status_code == 400
 | 
			
		||||
    assert r.json()["detail"] == "invalid_link_selector"
 | 
			
		||||
 | 
			
		||||
    r = requests.patch(
 | 
			
		||||
        f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{cid}/",
 | 
			
		||||
        headers=crawler_auth_headers,
 | 
			
		||||
        json={"config": {"selectLinks": ["a[href]->href", "->href"]}},
 | 
			
		||||
    )
 | 
			
		||||
    assert r.status_code == 400
 | 
			
		||||
    assert r.json()["detail"] == "invalid_link_selector"
 | 
			
		||||
 | 
			
		||||
def test_verify_default_select_links(
 | 
			
		||||
    crawler_auth_headers, default_org_id, sample_crawl_data
 | 
			
		||||
@ -545,6 +563,28 @@ def test_add_crawl_config_invalid_exclude_regex(
 | 
			
		||||
    assert r.json()["detail"] == "invalid_regex"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_add_crawl_config_invalid_link_selectors(
 | 
			
		||||
    crawler_auth_headers, default_org_id, sample_crawl_data
 | 
			
		||||
):
 | 
			
		||||
    sample_crawl_data["config"]["selectLinks"] = []
 | 
			
		||||
    r = requests.post(
 | 
			
		||||
        f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
 | 
			
		||||
        headers=crawler_auth_headers,
 | 
			
		||||
        json=sample_crawl_data,
 | 
			
		||||
    )
 | 
			
		||||
    assert r.status_code == 400
 | 
			
		||||
    assert r.json()["detail"] == "invalid_link_selector"
 | 
			
		||||
 | 
			
		||||
    sample_crawl_data["config"]["selectLinks"] = ["a[href]->href", "->href"]
 | 
			
		||||
    r = requests.post(
 | 
			
		||||
        f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
 | 
			
		||||
        headers=crawler_auth_headers,
 | 
			
		||||
        json=sample_crawl_data,
 | 
			
		||||
    )
 | 
			
		||||
    assert r.status_code == 400
 | 
			
		||||
    assert r.json()["detail"] == "invalid_link_selector"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_add_crawl_config_custom_behaviors_invalid_url(
 | 
			
		||||
    crawler_auth_headers, default_org_id, sample_crawl_data
 | 
			
		||||
):
 | 
			
		||||
 | 
			
		||||
@ -77,7 +77,7 @@ const errorFor: Record<ValidationErrorCode, string> = {
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const inputStyle = [
 | 
			
		||||
  tw`[--sl-input-background-color-hover:transparent] [--sl-input-background-color:transparent] [--sl-input-border-color-hover:transparent] [--sl-input-border-radius-medium:0] [--sl-input-spacing-medium:var(--sl-spacing-small)]`,
 | 
			
		||||
  tw`[--sl-input-border-radius-medium:0] [--sl-input-spacing-medium:var(--sl-spacing-small)] [--sl-input-background-color-hover:transparent] [--sl-input-background-color:transparent] [--sl-input-border-color-hover:transparent]`,
 | 
			
		||||
  tw`data-[valid]:[--sl-input-border-color:transparent]`,
 | 
			
		||||
  tw`part-[form-control-help-text]:mx-1 part-[form-control-help-text]:mb-1`,
 | 
			
		||||
];
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user