VariaReEncoder / cleanup_garmin_varia_media_folder.sh
1 contributor
356 lines | 8.436kb
#!/usr/bin/env bash
set -euo pipefail

TOOL_NAME="cleanup_garmin_varia_media_folder.sh"
DEFAULT_MEDIA_ROOT="."
MAX_APPLEDOUBLE_BYTES=4096

DRY_RUN=false
VERBOSE=false
MEDIA_ROOT="$DEFAULT_MEDIA_ROOT"

TMP_MP4_LIST=""
TMP_ACTIONS=""
TMP_NONSTANDARD=""
TMP_ZERO=""
TMP_BLOCKED=""
TMP_APPLE=""

APPLE_ARTIFACTS_REMOVED=0
ZERO_SIZE_REMOVED=0
NONSTANDARD_REMOVED=0
RENAMES_DONE=0
BLOCKED_GROUPS=0
BLOCKED_FILES=0

usage() {
  cat <<'EOF'
Usage:
  cleanup_garmin_varia_media_folder.sh [options] [MEDIA_ROOT]

Purpose:
  Clean common Apple artifacts and zero-size MP4 files, then normalize
  duplicate MP4 names produced during copy/import retries.

Rules:
  - Delete AppleDouble sidecars matching ._* only when size <= 4096 bytes
  - Delete zero-size .mp4 files
  - For canonical timestamp names:
      YYYY-MM-DD_HH-MM-SS.mp4
      YYYY-MM-DD_HH-MM-SS_<n>.mp4
    if exactly one suffixed duplicate exists and the base file is missing,
    rename duplicate to base name
  - If base exists, or multiple suffixed duplicates exist, keep files unchanged
    and report them as blocked

Options:
  --dry-run         Print actions without changing files
  --verbose         Print per-file operations
  -h, --help        Show this help

Examples:
  ./cleanup_garmin_varia_media_folder.sh --dry-run ~/Autofs/xdev/autonas/ext01/@Camera/import
  ./cleanup_garmin_varia_media_folder.sh ~/Autofs/xdev/is-baobab/nvme0n1/@backup/Garmin
EOF
}

log_msg() {
  local level="$1"
  shift
  printf '[%s] [%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$level" "$*"
}

vlog_msg() {
  if [[ "$VERBOSE" == true ]]; then
    log_msg "INFO" "$*"
  fi
}

die() {
  log_msg "ERROR" "$*"
  exit 2
}

cleanup_tmp_files() {
  rm -f -- "$TMP_MP4_LIST" "$TMP_ACTIONS" "$TMP_NONSTANDARD" "$TMP_ZERO" "$TMP_BLOCKED" "$TMP_APPLE" "$TMP_ACTIONS.tmp" 2>/dev/null || true
}

parse_args() {
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --dry-run)
        DRY_RUN=true
        shift
        ;;
      --verbose)
        VERBOSE=true
        shift
        ;;
      -h|--help)
        usage
        exit 0
        ;;
      -*)
        die "Unknown option: $1"
        ;;
      *)
        MEDIA_ROOT="$1"
        shift
        ;;
    esac
  done
}

init_tmp_files() {
  TMP_MP4_LIST="$(mktemp)"
  TMP_ACTIONS="$(mktemp)"
  TMP_NONSTANDARD="$(mktemp)"
  TMP_ZERO="$(mktemp)"
  TMP_BLOCKED="$(mktemp)"
  TMP_APPLE="$(mktemp)"
  trap cleanup_tmp_files EXIT
}

validate_media_root() {
  [[ -d "$MEDIA_ROOT" ]] || die "Media root not found: $MEDIA_ROOT"
}

rescan_mp4_files() {
  find "$MEDIA_ROOT" -type f -name '*.mp4' | sort > "$TMP_MP4_LIST"
}

safe_remove_file() {
  local path="$1"
  if [[ "$DRY_RUN" == true ]]; then
    return 0
  fi
  rm -f -- "$path"
}

remove_apple_artifacts() {
  local file size
  while IFS= read -r file; do
    [[ -n "$file" ]] || continue
    size="$(wc -c < "$file" | tr -d ' ')"

    if [[ "$size" =~ ^[0-9]+$ ]] && [[ "$size" -le "$MAX_APPLEDOUBLE_BYTES" ]]; then
      APPLE_ARTIFACTS_REMOVED=$((APPLE_ARTIFACTS_REMOVED + 1))
      printf '%s\n' "$file" >> "$TMP_APPLE"
      vlog_msg "Apple artifact: $file (${size} bytes)"
      safe_remove_file "$file"
    else
      vlog_msg "Skipped ._* larger than threshold: $file (${size} bytes)"
    fi
  done < <(find "$MEDIA_ROOT" -type f -name '._*' | sort)

  if [[ "$APPLE_ARTIFACTS_REMOVED" -gt 0 ]]; then
    if [[ "$DRY_RUN" == true ]]; then
      log_msg "INFO" "Would remove $APPLE_ARTIFACTS_REMOVED Apple artifact file(s)"
    else
      log_msg "INFO" "Removed $APPLE_ARTIFACTS_REMOVED Apple artifact file(s)"
    fi
    rescan_mp4_files
  fi
}

remove_zero_size_mp4() {
  local file
  while IFS= read -r file; do
    [[ -n "$file" ]] || continue
    if [[ ! -s "$file" ]]; then
      ZERO_SIZE_REMOVED=$((ZERO_SIZE_REMOVED + 1))
      printf '%s\n' "$file" >> "$TMP_ZERO"
      vlog_msg "Zero-size MP4: $file"
      safe_remove_file "$file"
    fi
  done < "$TMP_MP4_LIST"

  if [[ "$ZERO_SIZE_REMOVED" -gt 0 ]]; then
    if [[ "$DRY_RUN" == true ]]; then
      log_msg "INFO" "Would remove $ZERO_SIZE_REMOVED zero-size MP4 file(s)"
    else
      log_msg "INFO" "Removed $ZERO_SIZE_REMOVED zero-size MP4 file(s)"
    fi
    rescan_mp4_files
  fi
}

collect_duplicate_actions() {
  local file base dir timestamp suffix

  : > "$TMP_ACTIONS"
  : > "$TMP_NONSTANDARD"

  while IFS= read -r file; do
    [[ -n "$file" ]] || continue
    base="$(basename "$file")"

    if [[ "$base" =~ ^([0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{2}-[0-9]{2}-[0-9]{2})(_[0-9]+)?\.mp4$ ]]; then
      dir="$(dirname "$file")"
      timestamp="${BASH_REMATCH[1]}"
      suffix="${BASH_REMATCH[2]:-}"
      if [[ -n "$suffix" ]]; then
        suffix="${suffix#_}"
      else
        suffix=0
      fi
      printf '%s\t%s\t%s\t%s\n' "$dir" "$timestamp" "$suffix" "$file" >> "$TMP_ACTIONS"
    elif [[ "$base" =~ _[0-9]+\.mp4$ ]]; then
      printf '%s\n' "$file" >> "$TMP_NONSTANDARD"
      NONSTANDARD_REMOVED=$((NONSTANDARD_REMOVED + 1))
      vlog_msg "Non-standard duplicate-like file: $file"
      safe_remove_file "$file"
    fi
  done < "$TMP_MP4_LIST"
}

build_rename_plan() {
  awk -F'\t' '
function flush() {
  if (dup_count == 1 && orig_seen == 0) {
    print "RENAME\t" dup_files[1] "\t" dir "\t" timestamp
  } else if (dup_count > 0) {
    print "GROUP\t" dir "\t" timestamp "\t" dup_count
    for (i = 1; i <= dup_count; i++) {
      print "BLOCK\t" dup_files[i]
    }
  }
}

BEGIN {
  OFS = "\t"
  dir = ""
  timestamp = ""
  orig_seen = 0
  dup_count = 0
}

{
  if ($1 != dir || $2 != timestamp) {
    if (NR > 1) {
      flush()
    }
    dir = $1
    timestamp = $2
    orig_seen = 0
    dup_count = 0
    delete dup_files
  }

  if ($3 == 0) {
    orig_seen = 1
  } else {
    dup_count++
    dup_files[dup_count] = $4
  }
}

END {
  if (NR > 0) {
    flush()
  }
}
' "$TMP_ACTIONS" > "$TMP_ACTIONS.tmp"

  mv "$TMP_ACTIONS.tmp" "$TMP_ACTIONS"
}

apply_rename_plan() {
  local action src dir timestamp dup_count dst

  while IFS=$'\t' read -r action src dir timestamp dup_count; do
    [[ -n "$action" ]] || continue
    case "$action" in
      RENAME)
        dst="$dir/$timestamp.mp4"
        RENAMES_DONE=$((RENAMES_DONE + 1))
        if [[ "$DRY_RUN" == true ]]; then
          log_msg "INFO" "DRY-RUN rename: $src -> $dst"
        else
          mv -- "$src" "$dst"
          vlog_msg "Renamed: $src -> $dst"
        fi
        ;;
      GROUP)
        BLOCKED_GROUPS=$((BLOCKED_GROUPS + 1))
        ;;
      BLOCK)
        BLOCKED_FILES=$((BLOCKED_FILES + 1))
        printf '%s\n' "$src" >> "$TMP_BLOCKED"
        ;;
    esac
  done < "$TMP_ACTIONS"
}

print_summary_lists() {
  if [[ -s "$TMP_APPLE" ]]; then
    if [[ "$DRY_RUN" == true ]]; then
      log_msg "INFO" "Apple artifacts that would be removed:"
    else
      log_msg "INFO" "Apple artifacts removed:"
    fi
    cat "$TMP_APPLE"
  fi

  if [[ -s "$TMP_ZERO" ]]; then
    if [[ "$DRY_RUN" == true ]]; then
      log_msg "INFO" "Zero-size MP4 files that would be removed:"
    else
      log_msg "INFO" "Zero-size MP4 files removed:"
    fi
    cat "$TMP_ZERO"
  fi

  if [[ -s "$TMP_NONSTANDARD" ]]; then
    if [[ "$DRY_RUN" == true ]]; then
      log_msg "INFO" "Non-standard duplicate-like MP4 files that would be removed:"
    else
      log_msg "INFO" "Non-standard duplicate-like MP4 files removed:"
    fi
    cat "$TMP_NONSTANDARD"
  fi

  if [[ -s "$TMP_BLOCKED" ]]; then
    log_msg "WARN" "Blocked duplicate files (manual review needed):"
    cat "$TMP_BLOCKED"
  fi
}

print_summary() {
  local mode_text
  mode_text="apply"
  if [[ "$DRY_RUN" == true ]]; then
    mode_text="dry-run"
  fi

  log_msg "INFO" "Summary ($mode_text): apple_removed=$APPLE_ARTIFACTS_REMOVED zero_removed=$ZERO_SIZE_REMOVED nonstandard_removed=$NONSTANDARD_REMOVED renamed=$RENAMES_DONE blocked_groups=$BLOCKED_GROUPS blocked_files=$BLOCKED_FILES"
}

main() {
  parse_args "$@"
  init_tmp_files
  validate_media_root

  log_msg "INFO" "Starting cleanup for media root: $MEDIA_ROOT"
  if [[ "$DRY_RUN" == true ]]; then
    log_msg "INFO" "Dry-run enabled; no files will be changed"
  fi

  rescan_mp4_files
  remove_apple_artifacts
  remove_zero_size_mp4
  collect_duplicate_actions
  build_rename_plan
  apply_rename_plan
  print_summary_lists
  print_summary

  if [[ "$BLOCKED_GROUPS" -gt 0 ]]; then
    log_msg "WARN" "Cleanup completed with blocked duplicate groups"
    exit 1
  fi

  log_msg "INFO" "Cleanup completed successfully"
}

main "$@"