|
Bogdan Timofte
authored
3 months ago
|
1
|
#!/usr/bin/env bash
|
|
|
2
|
set -euo pipefail
|
|
|
3
|
|
|
|
4
|
BRIDGE="thunderbridge"
|
|
|
5
|
MTU="65520"
|
|
|
6
|
STATE_DIR="/run/tb-recover"
|
|
|
7
|
LAST_BOLT_RESTART_FILE="${STATE_DIR}/last_bolt_restart_epoch"
|
|
|
8
|
BOLT_RESTART_COOLDOWN_SEC=600
|
|
|
9
|
LAST_NHI_RESCAN_FILE="${STATE_DIR}/last_nhi_rescan_epoch"
|
|
|
10
|
NHI_RESCAN_COOLDOWN_SEC=600
|
|
|
11
|
NHI_SETTLE_SEC=8
|
|
|
12
|
PEER_FAIL_THRESHOLD="${TB_PEER_FAIL_THRESHOLD:-2}"
|
|
|
13
|
IFACE_CYCLE_COOLDOWN_SEC="${TB_IFACE_CYCLE_COOLDOWN_SEC:-300}"
|
|
|
14
|
IFACE_CYCLE_SETTLE_SEC="${TB_IFACE_CYCLE_SETTLE_SEC:-5}"
|
|
|
15
|
PING_TIMEOUT_SEC="${TB_PING_TIMEOUT_SEC:-1}"
|
|
Bogdan Timofte
authored
2 weeks ago
|
16
|
MONITOR_INTERVAL_SEC="${TB_MONITOR_INTERVAL_SEC:-60}"
|
|
Bogdan Timofte
authored
3 months ago
|
17
|
LOCAL_HOST="$(hostname -s 2>/dev/null || hostname)"
|
|
|
18
|
|
|
|
19
|
mkdir -p "$STATE_DIR"
|
|
Bogdan Timofte
authored
2 weeks ago
|
20
|
trap "log 'Shutting down tb-recover'; exit 0" SIGTERM SIGINT
|
|
Bogdan Timofte
authored
3 months ago
|
21
|
|
|
|
22
|
log() {
|
|
|
23
|
printf '%s %s\n' "$(date -Is)" "$*"
|
|
|
24
|
}
|
|
|
25
|
|
|
|
26
|
command_exists() {
|
|
|
27
|
command -v "$1" >/dev/null 2>&1
|
|
|
28
|
}
|
|
|
29
|
|
|
|
30
|
counter_file_for_iface() {
|
|
|
31
|
printf '%s/peer-fail-%s.count\n' "$STATE_DIR" "$1"
|
|
|
32
|
}
|
|
|
33
|
|
|
|
34
|
cooldown_file_for_iface() {
|
|
|
35
|
printf '%s/last-iface-cycle-%s.epoch\n' "$STATE_DIR" "$1"
|
|
|
36
|
}
|
|
|
37
|
|
|
|
38
|
read_epoch_file() {
|
|
|
39
|
local file="$1"
|
|
|
40
|
local value="0"
|
|
|
41
|
|
|
|
42
|
if [ -f "$file" ]; then
|
|
|
43
|
value="$(cat "$file" 2>/dev/null || echo 0)"
|
|
|
44
|
fi
|
|
|
45
|
|
|
|
46
|
case "$value" in
|
|
|
47
|
''|*[!0-9]*)
|
|
|
48
|
value=0
|
|
|
49
|
;;
|
|
|
50
|
esac
|
|
|
51
|
|
|
|
52
|
printf '%s\n' "$value"
|
|
|
53
|
}
|
|
|
54
|
|
|
|
55
|
read_counter_file() {
|
|
|
56
|
read_epoch_file "$1"
|
|
|
57
|
}
|
|
|
58
|
|
|
|
59
|
peer_ip_for_iface() {
|
|
|
60
|
local iface="$1"
|
|
|
61
|
|
|
Bogdan Timofte
authored
2 weeks ago
|
62
|
# Dynamically resolve peer by looking up the XDomain device name bound to this
|
|
|
63
|
# interface. The kernel exposes a symlink at /sys/class/net/<iface>/device
|
|
|
64
|
# pointing to the XDomain service path (e.g. .../1-1.0). Its parent directory
|
|
|
65
|
# is the XDomain device whose device_name attribute holds the peer hostname.
|
|
|
66
|
local dev_path xdomain_dev peer_name
|
|
|
67
|
dev_path="$(readlink -f "/sys/class/net/${iface}/device" 2>/dev/null || true)"
|
|
|
68
|
if [ -n "$dev_path" ] && [ -d "$dev_path" ]; then
|
|
|
69
|
xdomain_dev="$(dirname "$dev_path")"
|
|
|
70
|
peer_name="$(cat "${xdomain_dev}/device_name" 2>/dev/null || true)"
|
|
|
71
|
case "$peer_name" in
|
|
|
72
|
baobab) printf '%s\n' "192.168.10.91" ; return 0 ;;
|
|
|
73
|
ebony) printf '%s\n' "192.168.10.92" ; return 0 ;;
|
|
|
74
|
tapia) printf '%s\n' "192.168.10.93" ; return 0 ;;
|
|
|
75
|
esac
|
|
|
76
|
fi
|
|
|
77
|
|
|
|
78
|
# Static fallback (used when sysfs path is not available).
|
|
Bogdan Timofte
authored
3 months ago
|
79
|
case "${LOCAL_HOST}:${iface}" in
|
|
|
80
|
baobab:thunderbolt0)
|
|
|
81
|
printf '%s\n' "192.168.10.92"
|
|
|
82
|
;;
|
|
|
83
|
baobab:thunderbolt1)
|
|
|
84
|
printf '%s\n' "192.168.10.93"
|
|
|
85
|
;;
|
|
|
86
|
ebony:thunderbolt0)
|
|
|
87
|
printf '%s\n' "192.168.10.91"
|
|
|
88
|
;;
|
|
|
89
|
tapia:thunderbolt0)
|
|
|
90
|
printf '%s\n' "192.168.10.91"
|
|
|
91
|
;;
|
|
|
92
|
*)
|
|
|
93
|
return 1
|
|
|
94
|
;;
|
|
|
95
|
esac
|
|
|
96
|
}
|
|
|
97
|
|
|
|
98
|
iface_is_forwarding() {
|
|
|
99
|
local iface="$1"
|
|
|
100
|
local state_file="/sys/class/net/${iface}/brport/state"
|
|
|
101
|
|
|
|
102
|
[ -r "$state_file" ] || return 1
|
|
|
103
|
[ "$(cat "$state_file" 2>/dev/null || echo 0)" = "3" ]
|
|
|
104
|
}
|
|
|
105
|
|
|
|
106
|
iface_is_oper_up() {
|
|
|
107
|
local iface="$1"
|
|
|
108
|
local operstate_file="/sys/class/net/${iface}/operstate"
|
|
|
109
|
|
|
|
110
|
[ -r "$operstate_file" ] || return 1
|
|
|
111
|
[ "$(cat "$operstate_file" 2>/dev/null || true)" = "up" ]
|
|
|
112
|
}
|
|
|
113
|
|
|
|
114
|
probe_peer_ip() {
|
|
|
115
|
local peer_ip="$1"
|
|
|
116
|
|
|
|
117
|
ip neigh del "$peer_ip" dev "$BRIDGE" 2>/dev/null || true
|
|
|
118
|
ping -I "$BRIDGE" -n -c 1 -W "$PING_TIMEOUT_SEC" "$peer_ip" >/dev/null 2>&1
|
|
|
119
|
}
|
|
|
120
|
|
|
|
121
|
recover_iface_cycle() {
|
|
|
122
|
local iface="$1"
|
|
|
123
|
local peer_ip="$2"
|
|
|
124
|
local now
|
|
|
125
|
local last_cycle
|
|
|
126
|
local cooldown_file
|
|
|
127
|
|
|
|
128
|
now="$(date +%s)"
|
|
|
129
|
cooldown_file="$(cooldown_file_for_iface "$iface")"
|
|
|
130
|
last_cycle="$(read_epoch_file "$cooldown_file")"
|
|
|
131
|
if [ $((now - last_cycle)) -lt "$IFACE_CYCLE_COOLDOWN_SEC" ]; then
|
|
|
132
|
log "peer ${peer_ip} still unhealthy on ${iface}, but iface cycle is cooling down"
|
|
|
133
|
return 0
|
|
|
134
|
fi
|
|
|
135
|
|
|
|
136
|
log "peer ${peer_ip} unhealthy on ${iface}; cycling link with ifdown/ifup"
|
|
|
137
|
if command_exists ifdown && command_exists ifup; then
|
|
|
138
|
ifdown --force "$iface" || log "ifdown reported a non-zero exit code for ${iface}"
|
|
|
139
|
sleep 2
|
|
|
140
|
if ! ifup "$iface"; then
|
|
|
141
|
log "ifup failed for ${iface}"
|
|
|
142
|
return 1
|
|
|
143
|
fi
|
|
|
144
|
else
|
|
|
145
|
log "ifdown/ifup unavailable; falling back to ip link bounce for ${iface}"
|
|
|
146
|
ip link set "$iface" down || true
|
|
|
147
|
sleep 2
|
|
|
148
|
ip link set "$iface" up || true
|
|
|
149
|
fi
|
|
|
150
|
|
|
|
151
|
ip link set "$iface" mtu "$MTU" || true
|
|
|
152
|
ip link set "$iface" master "$BRIDGE" || true
|
|
|
153
|
systemctl start "tb-enlist@${iface}.service" || true
|
|
|
154
|
printf '%s\n' "$now" > "$cooldown_file"
|
|
|
155
|
rm -f "$(counter_file_for_iface "$iface")"
|
|
|
156
|
sleep "$IFACE_CYCLE_SETTLE_SEC"
|
|
|
157
|
}
|
|
|
158
|
|
|
|
159
|
assess_peer_health() {
|
|
|
160
|
local iface="$1"
|
|
|
161
|
local peer_ip=""
|
|
|
162
|
local counter_file=""
|
|
|
163
|
local fail_count=0
|
|
|
164
|
|
|
|
165
|
if ! peer_ip="$(peer_ip_for_iface "$iface")"; then
|
|
|
166
|
return 0
|
|
|
167
|
fi
|
|
|
168
|
|
|
|
169
|
counter_file="$(counter_file_for_iface "$iface")"
|
|
|
170
|
|
|
|
171
|
if ! iface_is_oper_up "$iface" || ! iface_is_forwarding "$iface"; then
|
|
|
172
|
rm -f "$counter_file"
|
|
|
173
|
return 0
|
|
|
174
|
fi
|
|
|
175
|
|
|
|
176
|
if probe_peer_ip "$peer_ip"; then
|
|
|
177
|
rm -f "$counter_file"
|
|
|
178
|
return 0
|
|
|
179
|
fi
|
|
|
180
|
|
|
|
181
|
fail_count="$(read_counter_file "$counter_file")"
|
|
|
182
|
fail_count=$((fail_count + 1))
|
|
|
183
|
printf '%s\n' "$fail_count" > "$counter_file"
|
|
|
184
|
log "peer probe failed on ${iface} towards ${peer_ip} (${fail_count}/${PEER_FAIL_THRESHOLD})"
|
|
|
185
|
|
|
|
186
|
if [ "$fail_count" -lt "$PEER_FAIL_THRESHOLD" ]; then
|
|
|
187
|
return 0
|
|
|
188
|
fi
|
|
|
189
|
|
|
|
190
|
recover_iface_cycle "$iface" "$peer_ip"
|
|
|
191
|
}
|
|
|
192
|
|
|
|
193
|
has_tb_netdev() {
|
|
|
194
|
ls /sys/class/net/thunderbolt* >/dev/null 2>&1
|
|
|
195
|
}
|
|
|
196
|
|
|
|
197
|
has_stale_tb_xdomain() {
|
|
|
198
|
local dev=""
|
|
|
199
|
for dev in /sys/bus/thunderbolt/devices/[0-9]-[1-9]*; do
|
|
|
200
|
[ -e "$dev" ] || continue
|
|
|
201
|
case "${dev##*/}" in
|
|
|
202
|
*.*|*:*)
|
|
|
203
|
continue
|
|
|
204
|
;;
|
|
|
205
|
esac
|
|
|
206
|
|
|
|
207
|
if ! ls "${dev}".* >/dev/null 2>&1; then
|
|
|
208
|
return 0
|
|
|
209
|
fi
|
|
|
210
|
done
|
|
|
211
|
|
|
|
212
|
return 1
|
|
|
213
|
}
|
|
|
214
|
|
|
|
215
|
trigger_tb_rescan() {
|
|
|
216
|
local domain=""
|
|
|
217
|
for domain in /sys/bus/thunderbolt/devices/domain*; do
|
|
|
218
|
[ -e "$domain/rescan" ] && echo 1 > "$domain/rescan" || true
|
|
|
219
|
done
|
|
|
220
|
|
|
|
221
|
udevadm trigger --subsystem-match=thunderbolt --action=change || true
|
|
|
222
|
udevadm trigger --subsystem-match=net --action=add || true
|
|
|
223
|
}
|
|
|
224
|
|
|
|
225
|
run_nhi_rescan() {
|
|
|
226
|
local epoch="$1"
|
|
|
227
|
local dev=""
|
|
|
228
|
local cls=""
|
|
|
229
|
local drv=""
|
|
|
230
|
local nhi_pci=""
|
|
|
231
|
|
|
|
232
|
for dev in /sys/bus/pci/devices/*; do
|
|
|
233
|
[ -e "$dev/class" ] || continue
|
|
|
234
|
[ -e "$dev/driver" ] || continue
|
|
|
235
|
[ -w "$dev/remove" ] || continue
|
|
|
236
|
cls="$(cat "$dev/class" 2>/dev/null || true)"
|
|
|
237
|
drv="$(basename "$(readlink -f "$dev/driver" 2>/dev/null || true)")"
|
|
|
238
|
if [ "$cls" = "0x088000" ] && [ "$drv" = "thunderbolt" ]; then
|
|
|
239
|
nhi_pci="$dev"
|
|
|
240
|
break
|
|
|
241
|
fi
|
|
|
242
|
done
|
|
|
243
|
|
|
|
244
|
if [ -n "$nhi_pci" ]; then
|
|
|
245
|
echo 1 > "$nhi_pci/remove" || true
|
|
|
246
|
sleep 1
|
|
|
247
|
echo 1 > /sys/bus/pci/rescan || true
|
|
|
248
|
printf '%s\n' "$epoch" > "$LAST_NHI_RESCAN_FILE"
|
|
|
249
|
return 0
|
|
|
250
|
fi
|
|
|
251
|
|
|
|
252
|
return 1
|
|
|
253
|
}
|
|
|
254
|
|
|
Bogdan Timofte
authored
2 weeks ago
|
255
|
init_bridge() {
|
|
|
256
|
ip link show "$BRIDGE" >/dev/null 2>&1 || ip link add name "$BRIDGE" type bridge || true
|
|
|
257
|
ip link set "$BRIDGE" mtu "$MTU" || true
|
|
|
258
|
ip link set "$BRIDGE" up || true
|
|
|
259
|
}
|
|
Bogdan Timofte
authored
3 months ago
|
260
|
|
|
Bogdan Timofte
authored
2 weeks ago
|
261
|
handle_missing_interfaces() {
|
|
|
262
|
local found_tb_iface=0
|
|
|
263
|
|
|
|
264
|
for path in /sys/class/net/thunderbolt*; do
|
|
|
265
|
[ -e "$path" ] || continue
|
|
|
266
|
found_tb_iface=1
|
|
|
267
|
IFACE="${path##*/}"
|
|
|
268
|
ip link set "$IFACE" up || true
|
|
|
269
|
ip link set "$IFACE" mtu "$MTU" || true
|
|
|
270
|
ip link set "$IFACE" master "$BRIDGE" || true
|
|
|
271
|
systemctl start "tb-enlist@${IFACE}.service" || true
|
|
|
272
|
done
|
|
Bogdan Timofte
authored
3 months ago
|
273
|
|
|
Bogdan Timofte
authored
2 weeks ago
|
274
|
if [ "$found_tb_iface" -eq 0 ] && [ -d /sys/bus/thunderbolt/devices ]; then
|
|
|
275
|
trigger_tb_rescan
|
|
|
276
|
sleep 2
|
|
Bogdan Timofte
authored
3 months ago
|
277
|
|
|
Bogdan Timofte
authored
2 weeks ago
|
278
|
if ! has_tb_netdev; then
|
|
|
279
|
now="$(date +%s)"
|
|
|
280
|
last="$(read_epoch_file "$LAST_BOLT_RESTART_FILE")"
|
|
|
281
|
nhi_last="$(read_epoch_file "$LAST_NHI_RESCAN_FILE")"
|
|
|
282
|
|
|
|
283
|
if [ $((now - nhi_last)) -ge "$NHI_RESCAN_COOLDOWN_SEC" ]; then
|
|
|
284
|
if run_nhi_rescan "$now"; then
|
|
|
285
|
sleep "$NHI_SETTLE_SEC"
|
|
|
286
|
trigger_tb_rescan
|
|
|
287
|
|
|
|
288
|
if ! has_tb_netdev && has_stale_tb_xdomain; then
|
|
|
289
|
retry_now="$(date +%s)"
|
|
|
290
|
if run_nhi_rescan "$retry_now"; then
|
|
|
291
|
sleep "$NHI_SETTLE_SEC"
|
|
|
292
|
trigger_tb_rescan
|
|
|
293
|
fi
|
|
Bogdan Timofte
authored
3 months ago
|
294
|
fi
|
|
|
295
|
fi
|
|
|
296
|
fi
|
|
|
297
|
|
|
Bogdan Timofte
authored
2 weeks ago
|
298
|
if ! has_tb_netdev && [ $((now - last)) -ge "$BOLT_RESTART_COOLDOWN_SEC" ]; then
|
|
Bogdan Timofte
authored
3 months ago
|
299
|
if systemctl list-unit-files bolt.service >/dev/null 2>&1; then
|
|
|
300
|
systemctl restart bolt.service || true
|
|
|
301
|
printf '%s\n' "$now" > "$LAST_BOLT_RESTART_FILE"
|
|
|
302
|
fi
|
|
|
303
|
fi
|
|
|
304
|
|
|
Bogdan Timofte
authored
2 weeks ago
|
305
|
trigger_tb_rescan
|
|
|
306
|
fi
|
|
Bogdan Timofte
authored
3 months ago
|
307
|
fi
|
|
Bogdan Timofte
authored
2 weeks ago
|
308
|
}
|
|
|
309
|
|
|
|
310
|
monitor_interfaces() {
|
|
|
311
|
for path in /sys/class/net/thunderbolt*; do
|
|
|
312
|
[ -e "$path" ] || continue
|
|
|
313
|
assess_peer_health "${path##*/}"
|
|
|
314
|
done
|
|
|
315
|
}
|
|
|
316
|
|
|
|
317
|
init_bridge
|
|
|
318
|
|
|
|
319
|
log "tb-recover monitor started (interval: ${MONITOR_INTERVAL_SEC}s)"
|
|
Bogdan Timofte
authored
3 months ago
|
320
|
|
|
Bogdan Timofte
authored
2 weeks ago
|
321
|
while true; do
|
|
|
322
|
handle_missing_interfaces
|
|
|
323
|
monitor_interfaces
|
|
|
324
|
sleep "$MONITOR_INTERVAL_SEC"
|
|
Bogdan Timofte
authored
3 months ago
|
325
|
done
|