#!/usr/bin/env bash set -euo pipefail BRIDGE="thunderbridge" MTU="65520" FOUND_TB_IFACE=0 STATE_DIR="/run/tb-recover" LAST_BOLT_RESTART_FILE="${STATE_DIR}/last_bolt_restart_epoch" BOLT_RESTART_COOLDOWN_SEC=600 LAST_NHI_RESCAN_FILE="${STATE_DIR}/last_nhi_rescan_epoch" NHI_RESCAN_COOLDOWN_SEC=600 NHI_SETTLE_SEC=8 PEER_FAIL_THRESHOLD="${TB_PEER_FAIL_THRESHOLD:-2}" IFACE_CYCLE_COOLDOWN_SEC="${TB_IFACE_CYCLE_COOLDOWN_SEC:-300}" IFACE_CYCLE_SETTLE_SEC="${TB_IFACE_CYCLE_SETTLE_SEC:-5}" PING_TIMEOUT_SEC="${TB_PING_TIMEOUT_SEC:-1}" LOCAL_HOST="$(hostname -s 2>/dev/null || hostname)" mkdir -p "$STATE_DIR" log() { printf '%s %s\n' "$(date -Is)" "$*" } command_exists() { command -v "$1" >/dev/null 2>&1 } counter_file_for_iface() { printf '%s/peer-fail-%s.count\n' "$STATE_DIR" "$1" } cooldown_file_for_iface() { printf '%s/last-iface-cycle-%s.epoch\n' "$STATE_DIR" "$1" } read_epoch_file() { local file="$1" local value="0" if [ -f "$file" ]; then value="$(cat "$file" 2>/dev/null || echo 0)" fi case "$value" in ''|*[!0-9]*) value=0 ;; esac printf '%s\n' "$value" } read_counter_file() { read_epoch_file "$1" } peer_ip_for_iface() { local iface="$1" case "${LOCAL_HOST}:${iface}" in baobab:thunderbolt0) printf '%s\n' "192.168.10.92" ;; baobab:thunderbolt1) printf '%s\n' "192.168.10.93" ;; ebony:thunderbolt0) printf '%s\n' "192.168.10.91" ;; tapia:thunderbolt0) printf '%s\n' "192.168.10.91" ;; *) return 1 ;; esac } iface_is_forwarding() { local iface="$1" local state_file="/sys/class/net/${iface}/brport/state" [ -r "$state_file" ] || return 1 [ "$(cat "$state_file" 2>/dev/null || echo 0)" = "3" ] } iface_is_oper_up() { local iface="$1" local operstate_file="/sys/class/net/${iface}/operstate" [ -r "$operstate_file" ] || return 1 [ "$(cat "$operstate_file" 2>/dev/null || true)" = "up" ] } probe_peer_ip() { local peer_ip="$1" ip neigh del "$peer_ip" dev "$BRIDGE" 2>/dev/null || true ping -I "$BRIDGE" -n -c 1 -W "$PING_TIMEOUT_SEC" "$peer_ip" >/dev/null 2>&1 } recover_iface_cycle() { local iface="$1" local peer_ip="$2" local now local last_cycle local cooldown_file now="$(date +%s)" cooldown_file="$(cooldown_file_for_iface "$iface")" last_cycle="$(read_epoch_file "$cooldown_file")" if [ $((now - last_cycle)) -lt "$IFACE_CYCLE_COOLDOWN_SEC" ]; then log "peer ${peer_ip} still unhealthy on ${iface}, but iface cycle is cooling down" return 0 fi log "peer ${peer_ip} unhealthy on ${iface}; cycling link with ifdown/ifup" if command_exists ifdown && command_exists ifup; then ifdown --force "$iface" || log "ifdown reported a non-zero exit code for ${iface}" sleep 2 if ! ifup "$iface"; then log "ifup failed for ${iface}" return 1 fi else log "ifdown/ifup unavailable; falling back to ip link bounce for ${iface}" ip link set "$iface" down || true sleep 2 ip link set "$iface" up || true fi ip link set "$iface" mtu "$MTU" || true ip link set "$iface" master "$BRIDGE" || true systemctl start "tb-enlist@${iface}.service" || true printf '%s\n' "$now" > "$cooldown_file" rm -f "$(counter_file_for_iface "$iface")" sleep "$IFACE_CYCLE_SETTLE_SEC" } assess_peer_health() { local iface="$1" local peer_ip="" local counter_file="" local fail_count=0 if ! peer_ip="$(peer_ip_for_iface "$iface")"; then return 0 fi counter_file="$(counter_file_for_iface "$iface")" if ! iface_is_oper_up "$iface" || ! iface_is_forwarding "$iface"; then rm -f "$counter_file" return 0 fi if probe_peer_ip "$peer_ip"; then rm -f "$counter_file" return 0 fi fail_count="$(read_counter_file "$counter_file")" fail_count=$((fail_count + 1)) printf '%s\n' "$fail_count" > "$counter_file" log "peer probe failed on ${iface} towards ${peer_ip} (${fail_count}/${PEER_FAIL_THRESHOLD})" if [ "$fail_count" -lt "$PEER_FAIL_THRESHOLD" ]; then return 0 fi recover_iface_cycle "$iface" "$peer_ip" } has_tb_netdev() { ls /sys/class/net/thunderbolt* >/dev/null 2>&1 } has_stale_tb_xdomain() { local dev="" for dev in /sys/bus/thunderbolt/devices/[0-9]-[1-9]*; do [ -e "$dev" ] || continue case "${dev##*/}" in *.*|*:*) continue ;; esac if ! ls "${dev}".* >/dev/null 2>&1; then return 0 fi done return 1 } trigger_tb_rescan() { local domain="" for domain in /sys/bus/thunderbolt/devices/domain*; do [ -e "$domain/rescan" ] && echo 1 > "$domain/rescan" || true done udevadm trigger --subsystem-match=thunderbolt --action=change || true udevadm trigger --subsystem-match=net --action=add || true } run_nhi_rescan() { local epoch="$1" local dev="" local cls="" local drv="" local nhi_pci="" for dev in /sys/bus/pci/devices/*; do [ -e "$dev/class" ] || continue [ -e "$dev/driver" ] || continue [ -w "$dev/remove" ] || continue cls="$(cat "$dev/class" 2>/dev/null || true)" drv="$(basename "$(readlink -f "$dev/driver" 2>/dev/null || true)")" if [ "$cls" = "0x088000" ] && [ "$drv" = "thunderbolt" ]; then nhi_pci="$dev" break fi done if [ -n "$nhi_pci" ]; then echo 1 > "$nhi_pci/remove" || true sleep 1 echo 1 > /sys/bus/pci/rescan || true printf '%s\n' "$epoch" > "$LAST_NHI_RESCAN_FILE" return 0 fi return 1 } # Keep the bridge present and up before trying to enslave ports. ip link show "$BRIDGE" >/dev/null 2>&1 || ip link add name "$BRIDGE" type bridge || true ip link set "$BRIDGE" mtu "$MTU" || true ip link set "$BRIDGE" up || true for path in /sys/class/net/thunderbolt*; do [ -e "$path" ] || continue IFACE="${path##*/}" FOUND_TB_IFACE=1 ip link set "$IFACE" up || true ip link set "$IFACE" mtu "$MTU" || true ip link set "$IFACE" master "$BRIDGE" || true systemctl start "tb-enlist@${IFACE}.service" || true done # If no thunderbolt netdev exists but a TB domain exists, force a rescan + udev retrigger. if [ "$FOUND_TB_IFACE" -eq 0 ] && [ -d /sys/bus/thunderbolt/devices ]; then trigger_tb_rescan # Escalate with cooldown: try PCI NHI remove+rescan to emulate a soft replug. sleep 2 if ! has_tb_netdev; then now="$(date +%s)" last="0" if [ -f "$LAST_BOLT_RESTART_FILE" ]; then last="$(cat "$LAST_BOLT_RESTART_FILE" 2>/dev/null || echo 0)" fi case "$last" in ''|*[!0-9]*) last=0 ;; esac nhi_last="0" if [ -f "$LAST_NHI_RESCAN_FILE" ]; then nhi_last="$(cat "$LAST_NHI_RESCAN_FILE" 2>/dev/null || echo 0)" fi case "$nhi_last" in ''|*[!0-9]*) nhi_last=0 ;; esac if [ $((now - nhi_last)) -ge "$NHI_RESCAN_COOLDOWN_SEC" ]; then if run_nhi_rescan "$now"; then sleep "$NHI_SETTLE_SEC" trigger_tb_rescan # On newer kernels the first NHI reset can stop at the peer xdomain host # node without recreating the matching *.0 network service. if ! has_tb_netdev && has_stale_tb_xdomain; then retry_now="$(date +%s)" if run_nhi_rescan "$retry_now"; then sleep "$NHI_SETTLE_SEC" trigger_tb_rescan fi fi fi fi # Secondary fallback with cooldown: restart boltd if interface is still missing # and the host actually uses that service. if ! has_tb_netdev; then if [ $((now - last)) -ge "$BOLT_RESTART_COOLDOWN_SEC" ]; then if systemctl list-unit-files bolt.service >/dev/null 2>&1; then systemctl restart bolt.service || true printf '%s\n' "$now" > "$LAST_BOLT_RESTART_FILE" fi fi fi trigger_tb_rescan fi fi for path in /sys/class/net/thunderbolt*; do [ -e "$path" ] || continue assess_peer_health "${path##*/}" done