diff --git a/backend/btrixcloud/crawlconfigs.py b/backend/btrixcloud/crawlconfigs.py index db5b1bb7..97d1f5f6 100644 --- a/backend/btrixcloud/crawlconfigs.py +++ b/backend/btrixcloud/crawlconfigs.py @@ -79,6 +79,8 @@ class RawCrawlConfig(BaseModel): blockAds: Optional[bool] = False behaviorTimeout: Optional[int] + pageLoadTimeout: Optional[int] + pageExtraDelay: Optional[int] = 0 workers: Optional[int] diff --git a/backend/btrixcloud/main.py b/backend/btrixcloud/main.py index 8799709b..a984a868 100644 --- a/backend/btrixcloud/main.py +++ b/backend/btrixcloud/main.py @@ -54,6 +54,9 @@ def main(): "defaultBehaviorTimeSeconds": int( os.environ.get("DEFAULT_BEHAVIOR_TIME_SECONDS", 300) ), + "defaultPageLoadTimeSeconds": int( + os.environ.get("DEFAULT_PAGE_LOAD_TIME_SECONDS", 120) + ), "maxPagesPerCrawl": int(os.environ.get("MAX_PAGES_PER_CRAWL", 0)), } diff --git a/backend/test/test_settings.py b/backend/test/test_settings.py index 27697c8a..f3f4da1e 100644 --- a/backend/test/test_settings.py +++ b/backend/test/test_settings.py @@ -14,4 +14,5 @@ def test_settings(): "jwtTokenLifetime": 86400, "defaultBehaviorTimeSeconds": 300, "maxPagesPerCrawl": 2, + "defaultPageLoadTimeSeconds": 120 } diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index b71aeabc..336a2c36 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -54,6 +54,8 @@ data: DEFAULT_BEHAVIOR_TIME_SECONDS: "{{ .Values.default_behavior_time_seconds }}" + DEFAULT_PAGE_LOAD_TIME_SECONDS: "{{ .Values.default_page_load_time_seconds }}" + MAX_PAGES_PER_CRAWL: "{{ .Values.max_pages_per_crawl | default 0 }}" WEB_CONCURRENCY: "{{ .Values.backend_workers | default 4 }}" diff --git a/chart/values.yaml b/chart/values.yaml index 5fc83d53..dd7431b0 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -18,6 +18,9 @@ jwt_token_lifetime_minutes: 1440 # default time to run behaviors on each page (in seconds) default_behavior_time_seconds: 300 +# default time to wait for page to fully load before running behaviors (in seconds) +default_page_load_time_seconds: 120 + # max pages per crawl # set to non-zero value to enforce global max pages per crawl limit # if set, each workflow can have a lower limit, but not higher