-
Notifications
You must be signed in to change notification settings - Fork 0
148 lines (144 loc) · 5.72 KB
/
runner.yml
File metadata and controls
148 lines (144 loc) · 5.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
name: Lambda Runner
#
# Environment variables (can be set at org/repo level):
# LAMBDA_SSH_KEY_NAMES - Comma-separated list of SSH key names registered in Lambda Labs
# LAMBDA_INSTANCE_TYPE - Default instance type (e.g., gpu_1x_a10)
# LAMBDA_REGION - Default region (e.g., us-south-1)
#
# Priority: inputs > vars > defaults
on:
workflow_call:
secrets:
GH_SA_TOKEN:
description: "GitHub token with permissions to manage self-hosted runners"
required: true
LAMBDA_API_KEY:
description: "Lambda Labs API key"
required: true
LAMBDA_SSH_PRIVATE_KEY:
description: "SSH private key for connecting to Lambda instances"
required: true
inputs:
action_ref:
description: "lambda-gha Git ref (branch/tag/SHA) to checkout"
required: false
type: string
default: "main"
check_availability:
description: "Pre-check capacity before attempting launches (default: auto, enabled when multiple types/regions)"
required: false
type: string
debug:
description: "Debug mode: false=off, true/trace=set -x only, number=set -x + sleep N minutes before shutdown"
required: false
type: string
default: "false"
extra_gh_labels:
description: "Extra GitHub labels for the runner (comma-separated)"
required: false
type: string
instance_count:
description: "Number of instances to create (for parallel jobs)"
required: false
type: string
default: "1"
instance_type:
description: "Lambda instance type(s), comma-separated for fallback (e.g., gpu_1x_a10,gpu_1x_a100)"
required: false
type: string
max_instance_lifetime:
description: "Maximum instance lifetime in minutes before shutdown (default: 360)"
required: false
type: string
name:
description: "Name for the launch job"
required: false
type: string
region:
description: "Lambda region(s), comma-separated for fallback (e.g., us-east-1,us-west-1)"
required: false
type: string
retry_count:
description: "Number of retries per instance type/region combination (default: 1)"
required: false
type: string
default: "1"
retry_delay:
description: "Initial delay between retries in seconds (default: 5)"
required: false
type: string
default: "5"
runner_grace_period:
description: "Seconds before terminating after last job completes (default: 60)"
required: false
type: string
runner_initial_grace_period:
description: "Seconds before terminating if no jobs start (default: 180)"
required: false
type: string
runner_poll_interval:
description: "Seconds between termination condition checks (default: 10)"
required: false
type: string
runner_registration_timeout:
description: "Max seconds to wait for runner registration (default: 300)"
required: false
type: string
ssh_key_names:
description: "SSH key names registered in Lambda Labs (comma-separated)"
required: false
type: string
userdata:
description: "Additional script to run before runner setup"
required: false
type: string
outputs:
id:
description: "Runner label for runs-on (single instance)"
value: ${{ jobs.launch.outputs.id }}
mtx:
description: "JSON array of objects for matrix strategies"
value: ${{ jobs.launch.outputs.mtx }}
jobs:
launch:
name: ${{ inputs.name || format('Launch {0}', inputs.instance_type || vars.LAMBDA_INSTANCE_TYPE || 'gpu_1x_a10') }}
runs-on: ubuntu-latest
outputs:
id: ${{ steps.lambda-start.outputs.label }}
mtx: ${{ steps.lambda-start.outputs.mtx }}
steps:
- name: Check SSH key configuration
run: |
if [ -z "${{ inputs.ssh_key_names || vars.LAMBDA_SSH_KEY_NAMES }}" ]; then
echo "ERROR: SSH key names must be provided either as input or as LAMBDA_SSH_KEY_NAMES variable"
exit 1
fi
- name: Checkout lambda-gha repository
uses: actions/checkout@v4
with:
repository: Open-Athena/lambda-gha
ref: ${{ inputs.action_ref }}
- name: Create Lambda runner
id: lambda-start
uses: ./
with:
action_ref: ${{ inputs.action_ref }}
check_availability: ${{ inputs.check_availability }}
debug: ${{ inputs.debug }}
extra_gh_labels: ${{ inputs.extra_gh_labels }}
instance_count: ${{ inputs.instance_count }}
instance_type: ${{ inputs.instance_type || vars.LAMBDA_INSTANCE_TYPE }}
max_instance_lifetime: ${{ inputs.max_instance_lifetime || vars.MAX_INSTANCE_LIFETIME }}
region: ${{ inputs.region || vars.LAMBDA_REGION }}
retry_count: ${{ inputs.retry_count }}
retry_delay: ${{ inputs.retry_delay }}
runner_grace_period: ${{ inputs.runner_grace_period || vars.RUNNER_GRACE_PERIOD }}
runner_initial_grace_period: ${{ inputs.runner_initial_grace_period || vars.RUNNER_INITIAL_GRACE_PERIOD }}
runner_poll_interval: ${{ inputs.runner_poll_interval || vars.RUNNER_POLL_INTERVAL }}
runner_registration_timeout: ${{ inputs.runner_registration_timeout || vars.RUNNER_REGISTRATION_TIMEOUT }}
ssh_key_names: ${{ inputs.ssh_key_names || vars.LAMBDA_SSH_KEY_NAMES }}
ssh_private_key: ${{ secrets.LAMBDA_SSH_PRIVATE_KEY }}
userdata: ${{ inputs.userdata }}
env:
GH_PAT: ${{ secrets.GH_SA_TOKEN }}
LAMBDA_API_KEY: ${{ secrets.LAMBDA_API_KEY }}