1 contributor
#!/usr/bin/env bash
set -euo pipefail
BRIDGE="thunderbridge"
MTU="65520"
FOUND_TB_IFACE=0
STATE_DIR="/run/tb-recover"
LAST_BOLT_RESTART_FILE="${STATE_DIR}/last_bolt_restart_epoch"
BOLT_RESTART_COOLDOWN_SEC=600
LAST_NHI_RESCAN_FILE="${STATE_DIR}/last_nhi_rescan_epoch"
NHI_RESCAN_COOLDOWN_SEC=600
NHI_SETTLE_SEC=8
PEER_FAIL_THRESHOLD="${TB_PEER_FAIL_THRESHOLD:-2}"
IFACE_CYCLE_COOLDOWN_SEC="${TB_IFACE_CYCLE_COOLDOWN_SEC:-300}"
IFACE_CYCLE_SETTLE_SEC="${TB_IFACE_CYCLE_SETTLE_SEC:-5}"
PING_TIMEOUT_SEC="${TB_PING_TIMEOUT_SEC:-1}"
LOCAL_HOST="$(hostname -s 2>/dev/null || hostname)"
mkdir -p "$STATE_DIR"
log() {
printf '%s %s\n' "$(date -Is)" "$*"
}
command_exists() {
command -v "$1" >/dev/null 2>&1
}
counter_file_for_iface() {
printf '%s/peer-fail-%s.count\n' "$STATE_DIR" "$1"
}
cooldown_file_for_iface() {
printf '%s/last-iface-cycle-%s.epoch\n' "$STATE_DIR" "$1"
}
read_epoch_file() {
local file="$1"
local value="0"
if [ -f "$file" ]; then
value="$(cat "$file" 2>/dev/null || echo 0)"
fi
case "$value" in
''|*[!0-9]*)
value=0
;;
esac
printf '%s\n' "$value"
}
read_counter_file() {
read_epoch_file "$1"
}
peer_ip_for_iface() {
local iface="$1"
case "${LOCAL_HOST}:${iface}" in
baobab:thunderbolt0)
printf '%s\n' "192.168.10.92"
;;
baobab:thunderbolt1)
printf '%s\n' "192.168.10.93"
;;
ebony:thunderbolt0)
printf '%s\n' "192.168.10.91"
;;
tapia:thunderbolt0)
printf '%s\n' "192.168.10.91"
;;
*)
return 1
;;
esac
}
iface_is_forwarding() {
local iface="$1"
local state_file="/sys/class/net/${iface}/brport/state"
[ -r "$state_file" ] || return 1
[ "$(cat "$state_file" 2>/dev/null || echo 0)" = "3" ]
}
iface_is_oper_up() {
local iface="$1"
local operstate_file="/sys/class/net/${iface}/operstate"
[ -r "$operstate_file" ] || return 1
[ "$(cat "$operstate_file" 2>/dev/null || true)" = "up" ]
}
probe_peer_ip() {
local peer_ip="$1"
ip neigh del "$peer_ip" dev "$BRIDGE" 2>/dev/null || true
ping -I "$BRIDGE" -n -c 1 -W "$PING_TIMEOUT_SEC" "$peer_ip" >/dev/null 2>&1
}
recover_iface_cycle() {
local iface="$1"
local peer_ip="$2"
local now
local last_cycle
local cooldown_file
now="$(date +%s)"
cooldown_file="$(cooldown_file_for_iface "$iface")"
last_cycle="$(read_epoch_file "$cooldown_file")"
if [ $((now - last_cycle)) -lt "$IFACE_CYCLE_COOLDOWN_SEC" ]; then
log "peer ${peer_ip} still unhealthy on ${iface}, but iface cycle is cooling down"
return 0
fi
log "peer ${peer_ip} unhealthy on ${iface}; cycling link with ifdown/ifup"
if command_exists ifdown && command_exists ifup; then
ifdown --force "$iface" || log "ifdown reported a non-zero exit code for ${iface}"
sleep 2
if ! ifup "$iface"; then
log "ifup failed for ${iface}"
return 1
fi
else
log "ifdown/ifup unavailable; falling back to ip link bounce for ${iface}"
ip link set "$iface" down || true
sleep 2
ip link set "$iface" up || true
fi
ip link set "$iface" mtu "$MTU" || true
ip link set "$iface" master "$BRIDGE" || true
systemctl start "tb-enlist@${iface}.service" || true
printf '%s\n' "$now" > "$cooldown_file"
rm -f "$(counter_file_for_iface "$iface")"
sleep "$IFACE_CYCLE_SETTLE_SEC"
}
assess_peer_health() {
local iface="$1"
local peer_ip=""
local counter_file=""
local fail_count=0
if ! peer_ip="$(peer_ip_for_iface "$iface")"; then
return 0
fi
counter_file="$(counter_file_for_iface "$iface")"
if ! iface_is_oper_up "$iface" || ! iface_is_forwarding "$iface"; then
rm -f "$counter_file"
return 0
fi
if probe_peer_ip "$peer_ip"; then
rm -f "$counter_file"
return 0
fi
fail_count="$(read_counter_file "$counter_file")"
fail_count=$((fail_count + 1))
printf '%s\n' "$fail_count" > "$counter_file"
log "peer probe failed on ${iface} towards ${peer_ip} (${fail_count}/${PEER_FAIL_THRESHOLD})"
if [ "$fail_count" -lt "$PEER_FAIL_THRESHOLD" ]; then
return 0
fi
recover_iface_cycle "$iface" "$peer_ip"
}
has_tb_netdev() {
ls /sys/class/net/thunderbolt* >/dev/null 2>&1
}
has_stale_tb_xdomain() {
local dev=""
for dev in /sys/bus/thunderbolt/devices/[0-9]-[1-9]*; do
[ -e "$dev" ] || continue
case "${dev##*/}" in
*.*|*:*)
continue
;;
esac
if ! ls "${dev}".* >/dev/null 2>&1; then
return 0
fi
done
return 1
}
trigger_tb_rescan() {
local domain=""
for domain in /sys/bus/thunderbolt/devices/domain*; do
[ -e "$domain/rescan" ] && echo 1 > "$domain/rescan" || true
done
udevadm trigger --subsystem-match=thunderbolt --action=change || true
udevadm trigger --subsystem-match=net --action=add || true
}
run_nhi_rescan() {
local epoch="$1"
local dev=""
local cls=""
local drv=""
local nhi_pci=""
for dev in /sys/bus/pci/devices/*; do
[ -e "$dev/class" ] || continue
[ -e "$dev/driver" ] || continue
[ -w "$dev/remove" ] || continue
cls="$(cat "$dev/class" 2>/dev/null || true)"
drv="$(basename "$(readlink -f "$dev/driver" 2>/dev/null || true)")"
if [ "$cls" = "0x088000" ] && [ "$drv" = "thunderbolt" ]; then
nhi_pci="$dev"
break
fi
done
if [ -n "$nhi_pci" ]; then
echo 1 > "$nhi_pci/remove" || true
sleep 1
echo 1 > /sys/bus/pci/rescan || true
printf '%s\n' "$epoch" > "$LAST_NHI_RESCAN_FILE"
return 0
fi
return 1
}
# Keep the bridge present and up before trying to enslave ports.
ip link show "$BRIDGE" >/dev/null 2>&1 || ip link add name "$BRIDGE" type bridge || true
ip link set "$BRIDGE" mtu "$MTU" || true
ip link set "$BRIDGE" up || true
for path in /sys/class/net/thunderbolt*; do
[ -e "$path" ] || continue
IFACE="${path##*/}"
FOUND_TB_IFACE=1
ip link set "$IFACE" up || true
ip link set "$IFACE" mtu "$MTU" || true
ip link set "$IFACE" master "$BRIDGE" || true
systemctl start "tb-enlist@${IFACE}.service" || true
done
# If no thunderbolt netdev exists but a TB domain exists, force a rescan + udev retrigger.
if [ "$FOUND_TB_IFACE" -eq 0 ] && [ -d /sys/bus/thunderbolt/devices ]; then
trigger_tb_rescan
# Escalate with cooldown: try PCI NHI remove+rescan to emulate a soft replug.
sleep 2
if ! has_tb_netdev; then
now="$(date +%s)"
last="0"
if [ -f "$LAST_BOLT_RESTART_FILE" ]; then
last="$(cat "$LAST_BOLT_RESTART_FILE" 2>/dev/null || echo 0)"
fi
case "$last" in
''|*[!0-9]*)
last=0
;;
esac
nhi_last="0"
if [ -f "$LAST_NHI_RESCAN_FILE" ]; then
nhi_last="$(cat "$LAST_NHI_RESCAN_FILE" 2>/dev/null || echo 0)"
fi
case "$nhi_last" in
''|*[!0-9]*)
nhi_last=0
;;
esac
if [ $((now - nhi_last)) -ge "$NHI_RESCAN_COOLDOWN_SEC" ]; then
if run_nhi_rescan "$now"; then
sleep "$NHI_SETTLE_SEC"
trigger_tb_rescan
# On newer kernels the first NHI reset can stop at the peer xdomain host
# node without recreating the matching *.0 network service.
if ! has_tb_netdev && has_stale_tb_xdomain; then
retry_now="$(date +%s)"
if run_nhi_rescan "$retry_now"; then
sleep "$NHI_SETTLE_SEC"
trigger_tb_rescan
fi
fi
fi
fi
# Secondary fallback with cooldown: restart boltd if interface is still missing
# and the host actually uses that service.
if ! has_tb_netdev; then
if [ $((now - last)) -ge "$BOLT_RESTART_COOLDOWN_SEC" ]; then
if systemctl list-unit-files bolt.service >/dev/null 2>&1; then
systemctl restart bolt.service || true
printf '%s\n' "$now" > "$LAST_BOLT_RESTART_FILE"
fi
fi
fi
trigger_tb_rescan
fi
fi
for path in /sys/class/net/thunderbolt*; do
[ -e "$path" ] || continue
assess_peer_health "${path##*/}"
done