f16725e 3 months ago History
1 contributor
489 lines | 15.604kb
#!/bin/bash

# autoSMART Cluster Deployment Script
# Version: 1.0  
# Description: Complete cluster deployment and node installation for autoSMART

set -e

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
INSTALL_DIR="/opt/autoSMART"
CONFIG_DIR="/etc/autosmart"
SERVICE_NAME="autosmart"

# Default configuration
DB_HOST="${DB_HOST:-192.168.2.102}"
DB_USER="${DB_USER:-autosmart}"
DB_PASS="${DB_PASS:-autoSMART2025!}"
DB_NAME="${DB_NAME:-autosmart}"

# Node configuration
NODE_ID="${NODE_ID:-$(hostname -s)}"
SCAN_INTERVAL="${SCAN_INTERVAL:-300}"

# Operation modes
FORCE_REINSTALL=false
CONFIG_ONLY=false
DATABASE_MODE=false

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

log_info() {
    echo -e "${BLUE}[INFO]${NC} $1"
}

log_success() {
    echo -e "${GREEN}[SUCCESS]${NC} $1"
}

log_warning() {
    echo -e "${YELLOW}[WARNING]${NC} $1"
}

log_error() {
    echo -e "${RED}[ERROR]${NC} $1"
}

show_usage() {
    echo "autoSMART Cluster Deployment Script v1.0"
    echo "========================================="
    echo ""
    echo "Usage: $0 [COMMAND] [IP_ADDRESS] [OPTIONS]"
    echo ""
    echo "Commands:"
    echo "  install [IP]          Install autoSMART (local or remote node)"
    echo "  install database      Install database schema remotely using psql"
    echo "  uninstall [IP]        Remove autoSMART (local or remote node)"
    echo "  status [IP]           Show autoSMART status (local or remote node)"
    echo ""
    echo "Cluster Options:"
    echo "  --cluster             Execute command on entire cluster"
    echo ""
    echo "Database Options (for 'install database'):"
    echo "  --db-host HOST        Database host (default: 192.168.2.102)"
    echo "  --db-user USER        Database user (default: autosmart)"
    echo "  --db-pass PASS        Database password (default: autoSMART2025!)"
    echo "  --db-name NAME        Database name (default: autosmart)"
    echo ""
    echo "Examples:"
    echo "  $0 install <node>                    # Install on a node (name or IP from cluster.json)"
    echo "  $0 install database                  # Install database schema"
    echo "  $0 status <node>                     # Check status on a node (name or IP from cluster.json)"
    echo "  $0 install --cluster                 # Install on entire cluster"
    echo "  $0 status --cluster                  # Check status on all nodes"
}

parse_arguments() {
    COMMAND=""
    TARGET_IP=""
    CLUSTER_MODE=false
    DATABASE_MODE=false
    
    # If no arguments provided, show help
    if [[ $# -eq 0 ]]; then
        show_usage
        exit 0
    fi
    
    while [[ $# -gt 0 ]]; do
        case $1 in
            install|uninstall|status)
                COMMAND="$1"
                shift
                ;;
            database)
                if [[ "$COMMAND" == "install" ]]; then
                    DATABASE_MODE=true
                    shift
                else
                    log_error "database can only be used with install command"
                    exit 1
                fi
                ;;
            --help)
                show_usage
                exit 0
                ;;
            --cluster)
                CLUSTER_MODE=true
                shift
                ;;
            --db-host)
                DB_HOST="$2"
                shift 2
                ;;
            --db-user)
                DB_USER="$2"
                shift 2
                ;;
            --db-pass)
                DB_PASS="$2"
                shift 2
                ;;
            --db-name)
                DB_NAME="$2"
                shift 2
                ;;
            --*)
                log_error "Unknown option: $1"
                exit 1
                ;;
            *)
                if [[ $1 =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
                    TARGET_IP="$1"
                    shift
                else
                    # Try to resolve node name from cluster.json
                    local cluster_config="$SCRIPT_DIR/cluster.json"
                    if [[ -f "$cluster_config" ]] && command -v jq &> /dev/null; then
                        local resolved_ip=$(jq -r --arg name "$1" '.cluster.nodes[] | select(.hostname==$name) | .ip' "$cluster_config")
                        if [[ -n "$resolved_ip" && "$resolved_ip" != "null" ]]; then
                            TARGET_IP="$resolved_ip"
                            shift
                        else
                            log_error "Unknown argument: $1 (not an IP or known node name)"
                            exit 1
                        fi
                    else
                        log_error "Unknown argument: $1"
                        exit 1
                    fi
                fi
                ;;
        esac
    done
    
    # Validate that a command was provided
    if [[ -z "$COMMAND" ]]; then
        log_error "No command specified"
        show_usage
        exit 1
    fi
    
    if [[ "$CLUSTER_MODE" == true ]]; then
        TARGET_IP=""
    fi
}

show_header() {
    log_info "๏ฟฝ๏ฟฝ autoSMART Cluster Deployment v1.0"
    log_info "====================================="
    log_info "Hardware-based HDD tracking with differential storage"
    log_info ""
    log_info "Operation: $COMMAND"
    
    if [[ "$CLUSTER_MODE" == true ]]; then
        log_info "Target: Entire cluster (nodes from cluster.json)"
    elif [[ -n "$TARGET_IP" ]]; then
        log_info "Target: Remote node ($TARGET_IP)"
    else
        log_info "Target: Current node ($(hostname -s))"
    fi
    
    log_info "Database: $DB_HOST:5432/$DB_NAME"
    log_info ""
}

handle_database_deployment() {
    log_info "๐Ÿ’พ Installing autoSMART Database Schema"
    log_info "======================================="
    log_info "Target Database: $DB_HOST:5432/$DB_NAME"
    log_info "Database User: $DB_USER"
    log_info ""
    
    # Check if psql is available
    if ! command -v psql &> /dev/null; then
        log_error "psql client not found. Please install PostgreSQL client:"
        log_error "  macOS: brew install postgresql"
        log_error "  Ubuntu: sudo apt install postgresql-client"
        log_error "  CentOS: sudo dnf install postgresql"
        return 1
    fi
    
    # Test database connection
    log_info "๐Ÿ”— Testing database connection..."
    local psql_cmd="psql -h $DB_HOST -U $DB_USER -d $DB_NAME"
    if [[ -n "$DB_PASS" ]]; then
        export PGPASSWORD="$DB_PASS"
    fi
    
    if ! $psql_cmd -c "SELECT version();" >/dev/null 2>&1; then
        log_error "Cannot connect to database $DB_HOST:5432/$DB_NAME"
        log_error "Please check:"
        log_error "  โ€ข Database server is running"
        log_error "  โ€ข Database '$DB_NAME' exists"
        log_error "  โ€ข User '$DB_USER' has proper permissions"
        log_error "  โ€ข Network connectivity to $DB_HOST"
        return 1
    fi
    
    log_success "โœ… Database connection successful"
    
    # Check schema files
    if [[ ! -f "$SCRIPT_DIR/sql/schema.sql" ]]; then
        log_error "Schema file not found: $SCRIPT_DIR/sql/schema.sql"
        return 1
    fi
    
    # Install schema
    log_info "๐Ÿ“Š Installing database schema..."
    if ! $psql_cmd -f "$SCRIPT_DIR/sql/schema.sql" >/dev/null 2>&1; then
        log_error "Failed to install database schema"
        log_error "Check for conflicts or permission issues"
        return 1
    fi
    
    log_success "โœ… Database schema installed"
    
    # Verify installation
    log_info "๐Ÿ” Verifying schema installation..."
    local table_count=$($psql_cmd -t -c "
        SELECT COUNT(*) FROM information_schema.tables 
        WHERE table_schema = 'public' AND table_name LIKE '%smart%' OR table_name LIKE '%hdd%';
    " 2>/dev/null | tr -d ' ')
    
    if [[ "$table_count" -lt 3 ]]; then
        log_error "Schema verification failed. Expected tables not found."
        return 1
    fi
    
    log_success "โœ… Schema verification passed ($table_count tables found)"
    
    # Show installed components
    log_info "๐Ÿ“‹ Database Installation Summary:"
    $psql_cmd -c "
        SELECT 
            'Table' as type,
            table_name as name,
            pg_size_pretty(pg_total_relation_size('public.'||table_name)) as size
        FROM information_schema.tables 
        WHERE table_schema = 'public'
        UNION ALL
        SELECT 
            'View' as type,
            viewname as name,
            'N/A' as size
        FROM pg_views 
        WHERE schemaname = 'public'
        ORDER BY type, name;
    " 2>/dev/null || true
    
    log_success "โœ… autoSMART database deployment completed successfully!"
    log_info ""
    log_info "๐Ÿš€ Next Steps:"
    log_info "  1. Deploy nodes: ./deploy.sh install <node>"
    log_info "  2. Configure clusters in config files"
    log_info "  3. Start collecting SMART data"
    log_info ""
    
    return 0
}

handle_remote_deployment() {
    local target_ip="$1"
    local command="$2"
    
    # Determine the correct node name from cluster.json
    local node_name=""
    local cluster_config="$SCRIPT_DIR/cluster.json"
    if [[ -f "$cluster_config" ]] && command -v jq &> /dev/null; then
        node_name=$(jq -r --arg ip "$target_ip" '.cluster.nodes[] | select(.ip==$ip) | .hostname' "$cluster_config")
        if [[ -z "$node_name" || "$node_name" == "null" ]]; then
            # Fallback: try to get hostname from target machine
            node_name=$(ssh -o ConnectTimeout=5 "root@$target_ip" "hostname -s" 2>/dev/null || echo "unknown-node")
        fi
    else
        # Fallback: try to get hostname from target machine
        node_name=$(ssh -o ConnectTimeout=5 "root@$target_ip" "hostname -s" 2>/dev/null || echo "unknown-node")
    fi
    
    log_info "๐ŸŒ Remote deployment to $target_ip (node: $node_name)"
    
    # Test connectivity
    log_info "๐Ÿ” Testing connectivity to $target_ip..."
    if ! ping -c 1 -W 5 "$target_ip" >/dev/null 2>&1; then
        log_error "Cannot reach $target_ip (ping failed)"
        return 1
    fi
    
    # Test SSH
    log_info "๐Ÿ” Testing SSH access to $target_ip..."
    if ! ssh -o ConnectTimeout=10 -o BatchMode=yes -o StrictHostKeyChecking=no "root@$target_ip" true 2>/dev/null; then
        log_error "Cannot connect to $target_ip via SSH"
        log_info "Setup SSH keys: ssh-copy-id root@$target_ip"
        return 1
    fi
    
    log_success "โœ… SSH connection to $target_ip successful"
    
    # Create temp directory
    local remote_temp="/tmp/autosmart-deploy-$(date +%s)"
    log_info "๐Ÿ“ Creating remote directory: $remote_temp"
    ssh "root@$target_ip" "mkdir -p $remote_temp"
    
    # Copy files
    log_info "๐Ÿ“ฆ Syncing project files to $target_ip..."
    if ! rsync -avz --progress \
        --exclude-from="$SCRIPT_DIR/.deployignore" \
        --include='docs/' \
        --include='docs/*.md' \
        --exclude='.git*' \
        --exclude='*.md' \
        --exclude='deploy.sh' \
        "$SCRIPT_DIR/" "root@$target_ip:$remote_temp/"; then
        log_error "Failed to sync files to $target_ip"
        return 1
    fi
    
    # Execute install.sh
    log_info "๐Ÿš€ Executing $command on $target_ip..."
    
    local install_args="$command --node-id $node_name --db-host $DB_HOST"
    
    if ssh "root@$target_ip" "cd $remote_temp/scripts && bash install.sh $install_args"; then
        log_success "โœ… $command completed successfully on $target_ip"
        ssh "root@$target_ip" "rm -rf $remote_temp"
        return 0
    else
        log_error "โŒ $command failed on $target_ip"
        return 1
    fi
}

handle_status() {
    local target_ip="$1"
    
    if [[ -n "$target_ip" ]]; then
        log_info "๐Ÿ“Š Checking autoSMART status on $target_ip"
        ssh "root@$target_ip" "systemctl status autosmart --no-pager"
    else
        log_info "๐Ÿ“Š Checking autoSMART status on current node"
        if command -v systemctl >/dev/null 2>&1; then
            systemctl status autosmart --no-pager
        else
            log_error "systemctl not available"
            return 1
        fi
    fi
}

handle_cluster_operation() {
    local command="$1"
    
    log_info "๐Ÿš€ Executing $command on cluster..."
    
    # Check if cluster.json exists
    local cluster_config="$SCRIPT_DIR/cluster.json"
    if [[ ! -f "$cluster_config" ]]; then
        log_error "Cluster configuration not found: $cluster_config"
        return 1
    fi
    
    # Check if jq is available for JSON parsing
    if ! command -v jq &> /dev/null; then
        log_error "jq is required for cluster operations"
        return 1
    fi
    
    # Parse cluster configuration
    local cluster_name=$(jq -r '.cluster.name' "$cluster_config")
    local total_nodes=$(jq -r '.cluster.nodes | length' "$cluster_config")
    
    log_info "Cluster: $cluster_name ($total_nodes nodes)"
    log_info ""
    
    local success_count=0
    local failed_nodes=()
    
    # Process nodes
    while IFS= read -r node_data; do
        local node_hostname=$(echo "$node_data" | jq -r '.hostname')
        local node_ip=$(echo "$node_data" | jq -r '.ip')
        
        log_info "๐Ÿ”ง Processing node: $node_hostname ($node_ip)"
        
        if handle_remote_deployment "$node_ip" "$command"; then
            ((success_count++))
            log_success "โœ… $node_hostname completed successfully"
        else
            log_error "โŒ $node_hostname failed"
            failed_nodes+=("$node_hostname")
        fi
        
        sleep 2
        log_info ""
    done < <(jq -c '.cluster.nodes[]' "$cluster_config")
    
    # Summary
    log_info "๐Ÿ“Š Cluster Summary:"
    log_info "  โ€ข Successful: $success_count/$total_nodes"
    
    if [[ ${#failed_nodes[@]} -gt 0 ]]; then
        log_error "  โ€ข Failed nodes: ${failed_nodes[*]}"
    fi
    
    if [[ $success_count -eq $total_nodes ]]; then
        log_success "๐ŸŽ‰ All nodes processed successfully!"
        return 0
    else
        log_error "โŒ Some nodes failed"
        return 1
    fi
}

# Main execution
main() {
    parse_arguments "$@"
    show_header
    
    # Handle database deployment mode
    if [[ "$DATABASE_MODE" == true ]]; then
        handle_database_deployment
        exit $?
    fi
    
    if [[ "$CLUSTER_MODE" == true ]]; then
        handle_cluster_operation "$COMMAND"
        exit $?
    elif [[ -n "$TARGET_IP" ]]; then
        if [[ "$COMMAND" == "status" ]]; then
            handle_status "$TARGET_IP"
        else
            handle_remote_deployment "$TARGET_IP" "$COMMAND"
        fi
        exit $?
    fi
    
    # Local execution
    case "$COMMAND" in
        status)
            handle_status
            ;;
        install|uninstall)
            if [[ "$(uname)" == "Darwin" ]]; then
                log_error "Cannot install autoSMART on macOS development machine"
                log_info "Deploy to target nodes instead:"
                log_info "  ./deploy.sh install <node>    # Deploy to node from cluster.json"
                log_info "  ./deploy.sh install --cluster       # Deploy to all nodes"
                exit 1
            fi
            
            log_info "๐Ÿš€ Local deployment mode"
            sudo bash "$SCRIPT_DIR/scripts/install.sh" "$COMMAND" --node-id "$NODE_ID"
            ;;
        *)
            log_error "Unknown command: $COMMAND"
            show_usage
            exit 1
            ;;
    esac
}

# Run main
main "$@"