Newer Older
f16725e 3 months ago History
489 lines | 15.604kb
Bogdan Timofte authored 3 months ago
1
#!/bin/bash
2

            
3
# autoSMART Cluster Deployment Script
4
# Version: 1.0
5
# Description: Complete cluster deployment and node installation for autoSMART
6

            
7
set -e
8

            
9
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
10
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
11
INSTALL_DIR="/opt/autoSMART"
12
CONFIG_DIR="/etc/autosmart"
13
SERVICE_NAME="autosmart"
14

            
15
# Default configuration
16
DB_HOST="${DB_HOST:-192.168.2.102}"
17
DB_USER="${DB_USER:-autosmart}"
18
DB_PASS="${DB_PASS:-autoSMART2025!}"
19
DB_NAME="${DB_NAME:-autosmart}"
20

            
21
# Node configuration
22
NODE_ID="${NODE_ID:-$(hostname -s)}"
23
SCAN_INTERVAL="${SCAN_INTERVAL:-300}"
24

            
25
# Operation modes
26
FORCE_REINSTALL=false
27
CONFIG_ONLY=false
28
DATABASE_MODE=false
29

            
30
# Colors for output
31
RED='\033[0;31m'
32
GREEN='\033[0;32m'
33
YELLOW='\033[1;33m'
34
BLUE='\033[0;34m'
35
NC='\033[0m' # No Color
36

            
37
log_info() {
38
    echo -e "${BLUE}[INFO]${NC} $1"
39
}
40

            
41
log_success() {
42
    echo -e "${GREEN}[SUCCESS]${NC} $1"
43
}
44

            
45
log_warning() {
46
    echo -e "${YELLOW}[WARNING]${NC} $1"
47
}
48

            
49
log_error() {
50
    echo -e "${RED}[ERROR]${NC} $1"
51
}
52

            
53
show_usage() {
54
    echo "autoSMART Cluster Deployment Script v1.0"
55
    echo "========================================="
56
    echo ""
57
    echo "Usage: $0 [COMMAND] [IP_ADDRESS] [OPTIONS]"
58
    echo ""
59
    echo "Commands:"
60
    echo "  install [IP]          Install autoSMART (local or remote node)"
61
    echo "  install database      Install database schema remotely using psql"
62
    echo "  uninstall [IP]        Remove autoSMART (local or remote node)"
63
    echo "  status [IP]           Show autoSMART status (local or remote node)"
64
    echo ""
65
    echo "Cluster Options:"
66
    echo "  --cluster             Execute command on entire cluster"
67
    echo ""
68
    echo "Database Options (for 'install database'):"
69
    echo "  --db-host HOST        Database host (default: 192.168.2.102)"
70
    echo "  --db-user USER        Database user (default: autosmart)"
71
    echo "  --db-pass PASS        Database password (default: autoSMART2025!)"
72
    echo "  --db-name NAME        Database name (default: autosmart)"
73
    echo ""
74
    echo "Examples:"
75
    echo "  $0 install <node>                    # Install on a node (name or IP from cluster.json)"
76
    echo "  $0 install database                  # Install database schema"
77
    echo "  $0 status <node>                     # Check status on a node (name or IP from cluster.json)"
78
    echo "  $0 install --cluster                 # Install on entire cluster"
79
    echo "  $0 status --cluster                  # Check status on all nodes"
80
}
81

            
82
parse_arguments() {
83
    COMMAND=""
84
    TARGET_IP=""
85
    CLUSTER_MODE=false
86
    DATABASE_MODE=false
87

            
88
    # If no arguments provided, show help
89
    if [[ $# -eq 0 ]]; then
90
        show_usage
91
        exit 0
92
    fi
93

            
94
    while [[ $# -gt 0 ]]; do
95
        case $1 in
96
            install|uninstall|status)
97
                COMMAND="$1"
98
                shift
99
                ;;
100
            database)
101
                if [[ "$COMMAND" == "install" ]]; then
102
                    DATABASE_MODE=true
103
                    shift
104
                else
105
                    log_error "database can only be used with install command"
106
                    exit 1
107
                fi
108
                ;;
109
            --help)
110
                show_usage
111
                exit 0
112
                ;;
113
            --cluster)
114
                CLUSTER_MODE=true
115
                shift
116
                ;;
117
            --db-host)
118
                DB_HOST="$2"
119
                shift 2
120
                ;;
121
            --db-user)
122
                DB_USER="$2"
123
                shift 2
124
                ;;
125
            --db-pass)
126
                DB_PASS="$2"
127
                shift 2
128
                ;;
129
            --db-name)
130
                DB_NAME="$2"
131
                shift 2
132
                ;;
133
            --*)
134
                log_error "Unknown option: $1"
135
                exit 1
136
                ;;
137
            *)
138
                if [[ $1 =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
139
                    TARGET_IP="$1"
140
                    shift
141
                else
142
                    # Try to resolve node name from cluster.json
143
                    local cluster_config="$SCRIPT_DIR/cluster.json"
144
                    if [[ -f "$cluster_config" ]] && command -v jq &> /dev/null; then
145
                        local resolved_ip=$(jq -r --arg name "$1" '.cluster.nodes[] | select(.hostname==$name) | .ip' "$cluster_config")
146
                        if [[ -n "$resolved_ip" && "$resolved_ip" != "null" ]]; then
147
                            TARGET_IP="$resolved_ip"
148
                            shift
149
                        else
150
                            log_error "Unknown argument: $1 (not an IP or known node name)"
151
                            exit 1
152
                        fi
153
                    else
154
                        log_error "Unknown argument: $1"
155
                        exit 1
156
                    fi
157
                fi
158
                ;;
159
        esac
160
    done
161

            
162
    # Validate that a command was provided
163
    if [[ -z "$COMMAND" ]]; then
164
        log_error "No command specified"
165
        show_usage
166
        exit 1
167
    fi
168

            
169
    if [[ "$CLUSTER_MODE" == true ]]; then
170
        TARGET_IP=""
171
    fi
172
}
173

            
174
show_header() {
175
    log_info "�� autoSMART Cluster Deployment v1.0"
176
    log_info "====================================="
177
    log_info "Hardware-based HDD tracking with differential storage"
178
    log_info ""
179
    log_info "Operation: $COMMAND"
180

            
181
    if [[ "$CLUSTER_MODE" == true ]]; then
182
        log_info "Target: Entire cluster (nodes from cluster.json)"
183
    elif [[ -n "$TARGET_IP" ]]; then
184
        log_info "Target: Remote node ($TARGET_IP)"
185
    else
186
        log_info "Target: Current node ($(hostname -s))"
187
    fi
188

            
189
    log_info "Database: $DB_HOST:5432/$DB_NAME"
190
    log_info ""
191
}
192

            
193
handle_database_deployment() {
194
    log_info "💾 Installing autoSMART Database Schema"
195
    log_info "======================================="
196
    log_info "Target Database: $DB_HOST:5432/$DB_NAME"
197
    log_info "Database User: $DB_USER"
198
    log_info ""
199

            
200
    # Check if psql is available
201
    if ! command -v psql &> /dev/null; then
202
        log_error "psql client not found. Please install PostgreSQL client:"
203
        log_error "  macOS: brew install postgresql"
204
        log_error "  Ubuntu: sudo apt install postgresql-client"
205
        log_error "  CentOS: sudo dnf install postgresql"
206
        return 1
207
    fi
208

            
209
    # Test database connection
210
    log_info "🔗 Testing database connection..."
211
    local psql_cmd="psql -h $DB_HOST -U $DB_USER -d $DB_NAME"
212
    if [[ -n "$DB_PASS" ]]; then
213
        export PGPASSWORD="$DB_PASS"
214
    fi
215

            
216
    if ! $psql_cmd -c "SELECT version();" >/dev/null 2>&1; then
217
        log_error "Cannot connect to database $DB_HOST:5432/$DB_NAME"
218
        log_error "Please check:"
219
        log_error "  • Database server is running"
220
        log_error "  • Database '$DB_NAME' exists"
221
        log_error "  • User '$DB_USER' has proper permissions"
222
        log_error "  • Network connectivity to $DB_HOST"
223
        return 1
224
    fi
225

            
226
    log_success "✅ Database connection successful"
227

            
228
    # Check schema files
229
    if [[ ! -f "$SCRIPT_DIR/sql/schema.sql" ]]; then
230
        log_error "Schema file not found: $SCRIPT_DIR/sql/schema.sql"
231
        return 1
232
    fi
233

            
234
    # Install schema
235
    log_info "📊 Installing database schema..."
236
    if ! $psql_cmd -f "$SCRIPT_DIR/sql/schema.sql" >/dev/null 2>&1; then
237
        log_error "Failed to install database schema"
238
        log_error "Check for conflicts or permission issues"
239
        return 1
240
    fi
241

            
242
    log_success "✅ Database schema installed"
243

            
244
    # Verify installation
245
    log_info "🔍 Verifying schema installation..."
246
    local table_count=$($psql_cmd -t -c "
247
        SELECT COUNT(*) FROM information_schema.tables
248
        WHERE table_schema = 'public' AND table_name LIKE '%smart%' OR table_name LIKE '%hdd%';
249
    " 2>/dev/null | tr -d ' ')
250

            
251
    if [[ "$table_count" -lt 3 ]]; then
252
        log_error "Schema verification failed. Expected tables not found."
253
        return 1
254
    fi
255

            
256
    log_success "✅ Schema verification passed ($table_count tables found)"
257

            
258
    # Show installed components
259
    log_info "📋 Database Installation Summary:"
260
    $psql_cmd -c "
261
        SELECT
262
            'Table' as type,
263
            table_name as name,
264
            pg_size_pretty(pg_total_relation_size('public.'||table_name)) as size
265
        FROM information_schema.tables
266
        WHERE table_schema = 'public'
267
        UNION ALL
268
        SELECT
269
            'View' as type,
270
            viewname as name,
271
            'N/A' as size
272
        FROM pg_views
273
        WHERE schemaname = 'public'
274
        ORDER BY type, name;
275
    " 2>/dev/null || true
276

            
277
    log_success "✅ autoSMART database deployment completed successfully!"
278
    log_info ""
279
    log_info "🚀 Next Steps:"
280
    log_info "  1. Deploy nodes: ./deploy.sh install <node>"
281
    log_info "  2. Configure clusters in config files"
282
    log_info "  3. Start collecting SMART data"
283
    log_info ""
284

            
285
    return 0
286
}
287

            
288
handle_remote_deployment() {
289
    local target_ip="$1"
290
    local command="$2"
291

            
292
    # Determine the correct node name from cluster.json
293
    local node_name=""
294
    local cluster_config="$SCRIPT_DIR/cluster.json"
295
    if [[ -f "$cluster_config" ]] && command -v jq &> /dev/null; then
296
        node_name=$(jq -r --arg ip "$target_ip" '.cluster.nodes[] | select(.ip==$ip) | .hostname' "$cluster_config")
297
        if [[ -z "$node_name" || "$node_name" == "null" ]]; then
298
            # Fallback: try to get hostname from target machine
299
            node_name=$(ssh -o ConnectTimeout=5 "root@$target_ip" "hostname -s" 2>/dev/null || echo "unknown-node")
300
        fi
301
    else
302
        # Fallback: try to get hostname from target machine
303
        node_name=$(ssh -o ConnectTimeout=5 "root@$target_ip" "hostname -s" 2>/dev/null || echo "unknown-node")
304
    fi
305

            
306
    log_info "🌐 Remote deployment to $target_ip (node: $node_name)"
307

            
308
    # Test connectivity
309
    log_info "🔍 Testing connectivity to $target_ip..."
310
    if ! ping -c 1 -W 5 "$target_ip" >/dev/null 2>&1; then
311
        log_error "Cannot reach $target_ip (ping failed)"
312
        return 1
313
    fi
314

            
315
    # Test SSH
316
    log_info "🔐 Testing SSH access to $target_ip..."
317
    if ! ssh -o ConnectTimeout=10 -o BatchMode=yes -o StrictHostKeyChecking=no "root@$target_ip" true 2>/dev/null; then
318
        log_error "Cannot connect to $target_ip via SSH"
319
        log_info "Setup SSH keys: ssh-copy-id root@$target_ip"
320
        return 1
321
    fi
322

            
323
    log_success "✅ SSH connection to $target_ip successful"
324

            
325
    # Create temp directory
326
    local remote_temp="/tmp/autosmart-deploy-$(date +%s)"
327
    log_info "📁 Creating remote directory: $remote_temp"
328
    ssh "root@$target_ip" "mkdir -p $remote_temp"
329

            
330
    # Copy files
331
    log_info "📦 Syncing project files to $target_ip..."
332
    if ! rsync -avz --progress \
333
        --exclude-from="$SCRIPT_DIR/.deployignore" \
334
        --include='docs/' \
335
        --include='docs/*.md' \
336
        --exclude='.git*' \
337
        --exclude='*.md' \
338
        --exclude='deploy.sh' \
339
        "$SCRIPT_DIR/" "root@$target_ip:$remote_temp/"; then
340
        log_error "Failed to sync files to $target_ip"
341
        return 1
342
    fi
343

            
344
    # Execute install.sh
345
    log_info "🚀 Executing $command on $target_ip..."
346

            
347
    local install_args="$command --node-id $node_name --db-host $DB_HOST"
348

            
349
    if ssh "root@$target_ip" "cd $remote_temp/scripts && bash install.sh $install_args"; then
350
        log_success "✅ $command completed successfully on $target_ip"
351
        ssh "root@$target_ip" "rm -rf $remote_temp"
352
        return 0
353
    else
354
        log_error "❌ $command failed on $target_ip"
355
        return 1
356
    fi
357
}
358

            
359
handle_status() {
360
    local target_ip="$1"
361

            
362
    if [[ -n "$target_ip" ]]; then
363
        log_info "📊 Checking autoSMART status on $target_ip"
364
        ssh "root@$target_ip" "systemctl status autosmart --no-pager"
365
    else
366
        log_info "📊 Checking autoSMART status on current node"
367
        if command -v systemctl >/dev/null 2>&1; then
368
            systemctl status autosmart --no-pager
369
        else
370
            log_error "systemctl not available"
371
            return 1
372
        fi
373
    fi
374
}
375

            
376
handle_cluster_operation() {
377
    local command="$1"
378

            
379
    log_info "🚀 Executing $command on cluster..."
380

            
381
    # Check if cluster.json exists
382
    local cluster_config="$SCRIPT_DIR/cluster.json"
383
    if [[ ! -f "$cluster_config" ]]; then
384
        log_error "Cluster configuration not found: $cluster_config"
385
        return 1
386
    fi
387

            
388
    # Check if jq is available for JSON parsing
389
    if ! command -v jq &> /dev/null; then
390
        log_error "jq is required for cluster operations"
391
        return 1
392
    fi
393

            
394
    # Parse cluster configuration
395
    local cluster_name=$(jq -r '.cluster.name' "$cluster_config")
396
    local total_nodes=$(jq -r '.cluster.nodes | length' "$cluster_config")
397

            
398
    log_info "Cluster: $cluster_name ($total_nodes nodes)"
399
    log_info ""
400

            
401
    local success_count=0
402
    local failed_nodes=()
403

            
404
    # Process nodes
405
    while IFS= read -r node_data; do
406
        local node_hostname=$(echo "$node_data" | jq -r '.hostname')
407
        local node_ip=$(echo "$node_data" | jq -r '.ip')
408

            
409
        log_info "🔧 Processing node: $node_hostname ($node_ip)"
410

            
411
        if handle_remote_deployment "$node_ip" "$command"; then
412
            ((success_count++))
413
            log_success "✅ $node_hostname completed successfully"
414
        else
415
            log_error "❌ $node_hostname failed"
416
            failed_nodes+=("$node_hostname")
417
        fi
418

            
419
        sleep 2
420
        log_info ""
421
    done < <(jq -c '.cluster.nodes[]' "$cluster_config")
422

            
423
    # Summary
424
    log_info "📊 Cluster Summary:"
425
    log_info "  • Successful: $success_count/$total_nodes"
426

            
427
    if [[ ${#failed_nodes[@]} -gt 0 ]]; then
428
        log_error "  • Failed nodes: ${failed_nodes[*]}"
429
    fi
430

            
431
    if [[ $success_count -eq $total_nodes ]]; then
432
        log_success "🎉 All nodes processed successfully!"
433
        return 0
434
    else
435
        log_error "❌ Some nodes failed"
436
        return 1
437
    fi
438
}
439

            
440
# Main execution
441
main() {
442
    parse_arguments "$@"
443
    show_header
444

            
445
    # Handle database deployment mode
446
    if [[ "$DATABASE_MODE" == true ]]; then
447
        handle_database_deployment
448
        exit $?
449
    fi
450

            
451
    if [[ "$CLUSTER_MODE" == true ]]; then
452
        handle_cluster_operation "$COMMAND"
453
        exit $?
454
    elif [[ -n "$TARGET_IP" ]]; then
455
        if [[ "$COMMAND" == "status" ]]; then
456
            handle_status "$TARGET_IP"
457
        else
458
            handle_remote_deployment "$TARGET_IP" "$COMMAND"
459
        fi
460
        exit $?
461
    fi
462

            
463
    # Local execution
464
    case "$COMMAND" in
465
        status)
466
            handle_status
467
            ;;
468
        install|uninstall)
469
            if [[ "$(uname)" == "Darwin" ]]; then
470
                log_error "Cannot install autoSMART on macOS development machine"
471
                log_info "Deploy to target nodes instead:"
472
                log_info "  ./deploy.sh install <node>    # Deploy to node from cluster.json"
473
                log_info "  ./deploy.sh install --cluster       # Deploy to all nodes"
474
                exit 1
475
            fi
476

            
477
            log_info "🚀 Local deployment mode"
478
            sudo bash "$SCRIPT_DIR/scripts/install.sh" "$COMMAND" --node-id "$NODE_ID"
479
            ;;
480
        *)
481
            log_error "Unknown command: $COMMAND"
482
            show_usage
483
            exit 1
484
            ;;
485
    esac
486
}
487

            
488
# Run main
489
main "$@"