browsertrix/backend/btrixcloud/swarm/templates/crawler.yaml

version: '3.9'

services:
  crawler:
    image: {{ crawler_image }}
    command:
      - crawl
      - --config
      - /crawlconfig.json
      - --redisStoreUrl
      - {{ redis_url }}
    {%- if profile_filename %}
      - --profile
      - "@profiles/{{ profile_filename }}"
    {%- endif %}

    hostname: "crawl-{{ id }}-{{ index }}"

    networks:
      - btrix

    configs:
      - crawlconfig.json

    volumes:
      - crawl-data:/crawls

    stop_grace_period: 1000s

    deploy:
      endpoint_mode: dnsrr
      replicas: 1
      labels:
        crawl: {{ id }}
        role: crawler

      resources:
        limits:
          cpus: "{{ crawler_limits_cpu }}"
          memory: "{{ crawler_limits_memory }}"
        reservations:
          cpus: "{{ crawler_requests_cpu }}"
          memory: "{{ crawler_requests_memory }}"

    environment:
      - CRAWL_ID={{ id }}

      - STORE_ENDPOINT_URL={{ endpoint_url }}
      - STORE_ACCESS_KEY={{ access_key }}
      - STORE_SECRET_KEY={{ secret_key }}

      - STORE_PATH={{ storage_path }}
      - STORE_FILENAME={{ storage_filename }}
      - STORE_USER={{ userid }}

      {%- if auth_token %}
      - WACZ_SIGN_TOKEN={{ auth_token }}
      - WACZ_SIGN_URL=http://authsign:5053/sign
      {%- endif %}

      - WEBHOOK_URL={{ redis_url }}/crawls-done
      - CRAWL_ARGS={{ crawler_args }}

{% if index == 0 %}
  redis:
    image: {{ redis_image }}
    command: ["redis-server", "--appendonly", "yes"]

    deploy:
      endpoint_mode: dnsrr
      replicas: 1
      labels:
        crawl: {{ id }}
        role: redis

    networks:
      - btrix

{% endif %}

networks:
  btrix:
    external:
      name: btrix-net

configs:
  crawlconfig.json:
    external: true
    name: crawl-config-{{ cid }}

volumes:
  crawl-data:
    name: "crawl-{{ id }}-{{ index }}"
    labels:
      btrix.crawl: {{ id }}