Add basic backend validation for selectLinks (#2510)
Follow-up to #2152 Related to https://github.com/webrecorder/browsertrix/pull/2487 This PR provides very basic validation of the `config.selectLinks` argument on workflow creation and update. Namely, it checks that: - `config.selectLinks` is not an empty array - Each entry consists of two non-empty text sequences separated by `->` At this point we're not validating the actual CSS selector on the backend, though we could add that down the road. Tests have been added accordingly. Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
parent
23f9e08a22
commit
f84f6f55e0
@ -233,6 +233,8 @@ class CrawlConfigOps:
|
||||
exclude = [exclude]
|
||||
validate_regexes(exclude)
|
||||
|
||||
self._validate_link_selectors(config_in.config.selectLinks)
|
||||
|
||||
if config_in.config.customBehaviors:
|
||||
for url in config_in.config.customBehaviors:
|
||||
self._validate_custom_behavior_url_syntax(url)
|
||||
@ -297,6 +299,24 @@ class CrawlConfigOps:
|
||||
execMinutesQuotaReached=exec_mins_quota_reached,
|
||||
)
|
||||
|
||||
def _validate_link_selectors(self, link_selectors: List[str]):
|
||||
"""Validate link selectors
|
||||
|
||||
Ensure at least one link selector is set and that all the link slectors passed
|
||||
follow expected syntax: selector->attribute/property.
|
||||
|
||||
We don't yet check the validity of the CSS selector itself.
|
||||
"""
|
||||
if not link_selectors:
|
||||
raise HTTPException(status_code=400, detail="invalid_link_selector")
|
||||
|
||||
for link_selector in link_selectors:
|
||||
parts = link_selector.split("->")
|
||||
if not len(parts) == 2:
|
||||
raise HTTPException(status_code=400, detail="invalid_link_selector")
|
||||
if not parts[0] or not parts[1]:
|
||||
raise HTTPException(status_code=400, detail="invalid_link_selector")
|
||||
|
||||
def _validate_custom_behavior_url_syntax(self, url: str) -> Tuple[bool, List[str]]:
|
||||
"""Validate custom behaviors are valid URLs after removing custom git syntax"""
|
||||
git_prefix = "git+"
|
||||
@ -379,6 +399,9 @@ class CrawlConfigOps:
|
||||
exclude = [exclude]
|
||||
validate_regexes(exclude)
|
||||
|
||||
if update.config and update.config.selectLinks is not None:
|
||||
self._validate_link_selectors(update.config.selectLinks)
|
||||
|
||||
if update.config and update.config.customBehaviors:
|
||||
for url in update.config.customBehaviors:
|
||||
self._validate_custom_behavior_url_syntax(url)
|
||||
|
@ -172,6 +172,24 @@ def test_update_config_invalid_exclude_regex(
|
||||
assert r.status_code == 400
|
||||
assert r.json()["detail"] == "invalid_regex"
|
||||
|
||||
def test_update_config_invalid_link_selector(
|
||||
crawler_auth_headers, default_org_id, sample_crawl_data
|
||||
):
|
||||
r = requests.patch(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{cid}/",
|
||||
headers=crawler_auth_headers,
|
||||
json={"config": {"selectLinks": []}},
|
||||
)
|
||||
assert r.status_code == 400
|
||||
assert r.json()["detail"] == "invalid_link_selector"
|
||||
|
||||
r = requests.patch(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{cid}/",
|
||||
headers=crawler_auth_headers,
|
||||
json={"config": {"selectLinks": ["a[href]->href", "->href"]}},
|
||||
)
|
||||
assert r.status_code == 400
|
||||
assert r.json()["detail"] == "invalid_link_selector"
|
||||
|
||||
def test_verify_default_select_links(
|
||||
crawler_auth_headers, default_org_id, sample_crawl_data
|
||||
@ -545,6 +563,28 @@ def test_add_crawl_config_invalid_exclude_regex(
|
||||
assert r.json()["detail"] == "invalid_regex"
|
||||
|
||||
|
||||
def test_add_crawl_config_invalid_link_selectors(
|
||||
crawler_auth_headers, default_org_id, sample_crawl_data
|
||||
):
|
||||
sample_crawl_data["config"]["selectLinks"] = []
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
|
||||
headers=crawler_auth_headers,
|
||||
json=sample_crawl_data,
|
||||
)
|
||||
assert r.status_code == 400
|
||||
assert r.json()["detail"] == "invalid_link_selector"
|
||||
|
||||
sample_crawl_data["config"]["selectLinks"] = ["a[href]->href", "->href"]
|
||||
r = requests.post(
|
||||
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
|
||||
headers=crawler_auth_headers,
|
||||
json=sample_crawl_data,
|
||||
)
|
||||
assert r.status_code == 400
|
||||
assert r.json()["detail"] == "invalid_link_selector"
|
||||
|
||||
|
||||
def test_add_crawl_config_custom_behaviors_invalid_url(
|
||||
crawler_auth_headers, default_org_id, sample_crawl_data
|
||||
):
|
||||
|
@ -77,7 +77,7 @@ const errorFor: Record<ValidationErrorCode, string> = {
|
||||
};
|
||||
|
||||
const inputStyle = [
|
||||
tw`[--sl-input-background-color-hover:transparent] [--sl-input-background-color:transparent] [--sl-input-border-color-hover:transparent] [--sl-input-border-radius-medium:0] [--sl-input-spacing-medium:var(--sl-spacing-small)]`,
|
||||
tw`[--sl-input-border-radius-medium:0] [--sl-input-spacing-medium:var(--sl-spacing-small)] [--sl-input-background-color-hover:transparent] [--sl-input-background-color:transparent] [--sl-input-border-color-hover:transparent]`,
|
||||
tw`data-[valid]:[--sl-input-border-color:transparent]`,
|
||||
tw`part-[form-control-help-text]:mx-1 part-[form-control-help-text]:mb-1`,
|
||||
];
|
||||
|
Loading…
Reference in New Issue
Block a user