# This is Ray on Golem Example yaml 
# - the example config for testnet cluster, 
# - ready for easy switch to mainnet
# - with properties allowing fine-tuning

# Ray on Golem cluster name
cluster_name: golem-cluster

# The maximum number of workers the cluster will have at any given time
max_workers: 10

# The number of minutes that need to pass before an idle worker node is removed by the Autoscaler
idle_timeout_minutes: 5

# The cloud provider-specific configuration properties.
provider:
  type: "external"
  use_internal_ips: true
  module: "ray_on_golem.provider.node_provider.GolemNodeProvider"
  parameters:
    
    # Blockchain used for payments.
    # `holesky` means running free nodes on testnet,
    # `polygon` is for mainnet operations.
    payment_network: "holesky"
    #payment_network: "polygon"

    # Maximum amount of GLMs that's going to be spent for the whole cluster
    total_budget: 1

    # Common parameters for all node types. Can be overridden in available_node_types
    node_config:
      # Parameters for golem demands (same for head and workers)
      demand:
        # Check available versions at https://registry.golem.network/explore/golem/ray-on-golem
        image_tag: "golem/ray-on-golem:0.10.0-py3.10.13-ray2.9.3"

        # List of urls which will be added to the Computation Manifest
        # Requires protocol to be defined in all URLs
        # If not provided demand will not use Computation Manifest
        outbound_urls: ["https://pypi.dev.golem.network"]

        # Minimal values of parameters offered by Golem provider 
        #min_mem_gib: 1
        #min_cpu_threads: 1
        #min_storage_gib: 1

        # Maximal values of parameters offered by Golem provider
        #max_cpu_threads: null

      budget_control:
        per_cpu_expected_usage:
          # Per cpu expected cost is calculated as a sum of:
          # - start_price / cpu_count
          # - env_per_hour_price * duration_hours / cpu_count
          # - cpu_per_hour_price * duration_hours * cpu_load 
        
          # Estimated expected load and duration for worker that tells budget control to pick the least expensive Golem provider offers first.
          # If not provided, offers will be picked at random.
          cpu_load: 0.8
          duration_hours: 0.5 # 30 minutes

          # Amount of GLMs for expected usage which Golem provider offer will be rejected if exceeded.
          max_cost: 1.5

        # Amount of GLMs for worker initiation which Golem provider offer will be rejected if exceeded.
        max_start_price: 0.5

        # Amount of GLMs for CPU utilisation per hour which Golem provider offer will be rejected if exceeded.
        max_cpu_per_hour_price: 0.5

        # Amount of GLMs for each hour that worker runs which Golem provider offer will be rejected if exceeded.
        max_env_per_hour_price: 0.5


# Tells the autoscaler the allowed node types and the resources they provide
available_node_types:
  ray.head.default:
    # The minimum number of worker nodes of this type to launch
    #min_workers: 0

    # The maximum number of worker nodes of this type to launch
    #max_workers: 0

    # The node type's CPU and GPU resources - leave it empty for autodetection
    resources: {}

    # Additional parameters specific for this node type added on top of node_config from provider.parameters.node_config
    node_config: {}
    #node_config:
    #  demand:
    #    min_mem_gib: 10

  ray.worker.default:
    min_workers: 1
    max_workers: 10
    resources: {}
    node_config: {}

# The files or directories to copy to the head and worker nodes
# Remote workdir is /root/
file_mounts: {
  # <remote_path>: <local_path>
  # "/absolute/path/dir/": ".",
  # "./relative/path/dir/": ".",
  # "./relative/path/file.txt": "./file.txt"
}

# A list of paths to the files or directories to copy from the head node to the worker nodes
cluster_synced_files: []

# List of commands that will be run to initialize the nodes (before `setup_commands`)
initialization_commands: []
#initialization_commands: [
#  "pip install endplay",
#]

# List of shell commands to run to set up nodes
setup_commands: []

# Custom commands that will be run on the head node after common setup.
head_setup_commands: []

# Custom commands that will be run on worker nodes after common setup.
worker_setup_commands: []

# Command to start ray on the head node. You don't need to change this.
head_start_ray_commands: [
  "ray start --head --node-ip-address $NODE_IP --include-dashboard=True --dashboard-host 0.0.0.0 --disable-usage-stats --autoscaling-config=~/ray_bootstrap_config.yaml",
]

# Command to start ray on worker nodes. You don't need to change this.
worker_start_ray_commands: [
  "ray start --address $RAY_HEAD_IP:6379",
]

# Satisfy checks to disable warning about legacy fields at `ray up`.
# This will be removed by ray-on-golem on the fly.
head_node: True
worker_nodes: True