diff --git a/CLAUDE.md b/CLAUDE.md index 8eb3a85..f1b8eb8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -2,4 +2,4 @@ - This is my nix configuration system. Whenever you need to introduce update to my config, remember to check the current config. - After you introduce updates, remember to reflect those updates in the readme, should they bring any changes. -- Do not add the shebang when writing shell scripts \ No newline at end of file +- Never write shebang in any context unless specifically requested diff --git a/hosts/nixos/hs/disk-health.nix b/hosts/nixos/hs/disk-health.nix index 5c56215..54d02d6 100644 --- a/hosts/nixos/hs/disk-health.nix +++ b/hosts/nixos/hs/disk-health.nix @@ -18,32 +18,64 @@ # Global smartd options extraOptions = [ "-A /var/log/smartd/" "-i 600" ]; + # Disable default notifications + notifications = { + mail.enable = false; + x11.enable = false; + test = false; + }; + # Device-specific monitoring configurations devices = [ # ZFS Mirror drives (NVMe SSDs) - more frequent monitoring { device = "/dev/disk/by-id/ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431J4R"; - options = "-d auto -a -o on -S on -s (S/../.././02|L/../../6/03) -m exec=/home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh"; + options = "-d auto -a -o on -S on -s (S/../.././02|L/../../6/03) -M exec ${pkgs.writeShellScript "smartd-notify-hs-zfs1" '' + export SMARTD_DEVICE="$SMARTD_DEVICE" + export SMARTD_FAILTYPE="$SMARTD_FAILTYPE" + export SMARTD_MESSAGE="$SMARTD_MESSAGE" + /home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh Ac9qKFH5cA.7Yly ZFS_Mirror_1 + ''}"; } { device = "/dev/disk/by-id/ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431KEG"; - options = "-d auto -a -o on -S on -s (S/../.././02|L/../../6/03) -m exec=/home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh"; + options = "-d auto -a -o on -S on -s (S/../.././02|L/../../6/03) -M exec ${pkgs.writeShellScript "smartd-notify-hs-zfs2" '' + export SMARTD_DEVICE="$SMARTD_DEVICE" + export SMARTD_FAILTYPE="$SMARTD_FAILTYPE" + export SMARTD_MESSAGE="$SMARTD_MESSAGE" + /home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh Ac9qKFH5cA.7Yly ZFS_Mirror_2 + ''}"; } # Data drives (12TB HDDs) - standard monitoring { device = "/dev/disk/by-id/ata-HGST_HUH721212ALE604_5PK2N4GB"; - options = "-d auto -a -o on -S on -s (S/../.././02|L/../../7/03) -W 4,45,55 -m exec=/home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh"; + options = "-d auto -a -o on -S on -s (S/../.././02|L/../../7/03) -W 4,45,55 -M exec ${pkgs.writeShellScript "smartd-notify-hs-data1" '' + export SMARTD_DEVICE="$SMARTD_DEVICE" + export SMARTD_FAILTYPE="$SMARTD_FAILTYPE" + export SMARTD_MESSAGE="$SMARTD_MESSAGE" + /home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh Ac9qKFH5cA.7Yly Data_Drive_1_12TB + ''}"; } { device = "/dev/disk/by-id/ata-HGST_HUH721212ALE604_5PJ7Z3LE"; - options = "-d auto -a -o on -S on -s (S/../.././02|L/../../7/03) -W 4,45,55 -m exec=/home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh"; + options = "-d auto -a -o on -S on -s (S/../.././02|L/../../7/03) -W 4,45,55 -M exec ${pkgs.writeShellScript "smartd-notify-hs-data2" '' + export SMARTD_DEVICE="$SMARTD_DEVICE" + export SMARTD_FAILTYPE="$SMARTD_FAILTYPE" + export SMARTD_MESSAGE="$SMARTD_MESSAGE" + /home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh Ac9qKFH5cA.7Yly Data_Drive_2_12TB + ''}"; } # Parity drive (16TB HDD) - enhanced monitoring due to criticality { device = "/dev/disk/by-id/ata-ST16000NM000J-2TW103_WRS0F8BE"; - options = "-d auto -a -o on -S on -s (S/../.././02|L/../../1/03) -W 2,45,55 -m exec=/home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh"; + options = "-d auto -a -o on -S on -s (S/../.././02|L/../../1/03) -W 2,45,55 -M exec ${pkgs.writeShellScript "smartd-notify-hs-parity" '' + export SMARTD_DEVICE="$SMARTD_DEVICE" + export SMARTD_FAILTYPE="$SMARTD_FAILTYPE" + export SMARTD_MESSAGE="$SMARTD_MESSAGE" + /home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh Ac9qKFH5cA.7Yly Parity_Drive_16TB + ''}"; } ]; }; @@ -56,14 +88,17 @@ after = [ "multi-user.target" ]; serviceConfig = { Type = "oneshot"; - ExecStart = "${pkgs.bash}/bin/bash /home/yanlin/.config/nix/scripts/daily-smart-report.sh"; + ExecStart = "${pkgs.bash}/bin/bash /home/yanlin/.config/nix/scripts/daily-smart-report.sh Ac9qKFH5cA.7Yly"; User = "root"; StandardOutput = "journal"; StandardError = "journal"; # Add timeout to prevent hanging TimeoutStartSec = "300"; # 5 minutes max - # Set PATH to include system binaries for smartctl and curl - Environment = "PATH=/run/current-system/sw/bin"; + # Set PATH and SMART_DRIVES environment variables + Environment = [ + "PATH=/run/current-system/sw/bin" + "SMART_DRIVES=/dev/disk/by-id/ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431J4R:ZFS Mirror 1;/dev/disk/by-id/ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431KEG:ZFS Mirror 2;/dev/disk/by-id/ata-HGST_HUH721212ALE604_5PK2N4GB:Data Drive 1 (12TB);/dev/disk/by-id/ata-HGST_HUH721212ALE604_5PJ7Z3LE:Data Drive 2 (12TB);/dev/disk/by-id/ata-ST16000NM000J-2TW103_WRS0F8BE:Parity Drive (16TB)" + ]; # Allow access to block devices for SMART commands DeviceAllow = [ "/dev/disk/by-id/* rw" "/dev/sd* rw" "/dev/nvme* rw" "char-* rw" "block-* rw" ]; DevicePolicy = "closed"; diff --git a/hosts/nixos/hs/home.nix b/hosts/nixos/hs/home.nix index 0f6ec26..f223d29 100644 --- a/hosts/nixos/hs/home.nix +++ b/hosts/nixos/hs/home.nix @@ -9,7 +9,7 @@ # hs-specific home configuration programs.zsh.shellAliases = { # Disk health monitoring - smart-report = "sudo /home/yanlin/.config/nix/scripts/daily-smart-report.sh"; + smart-report = "sudo SMART_DRIVES='/dev/disk/by-id/ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431J4R:ZFS Mirror 1;/dev/disk/by-id/ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431KEG:ZFS Mirror 2;/dev/disk/by-id/ata-HGST_HUH721212ALE604_5PK2N4GB:Data Drive 1 (12TB);/dev/disk/by-id/ata-HGST_HUH721212ALE604_5PJ7Z3LE:Data Drive 2 (12TB);/dev/disk/by-id/ata-ST16000NM000J-2TW103_WRS0F8BE:Parity Drive (16TB)' /home/yanlin/.config/nix/scripts/daily-smart-report.sh Ac9qKFH5cA.7Yly"; move-inbox = "cp -rl /mnt/storage/Media/downloads/.inbox/* /mnt/storage/Media/downloads/inbox && chown -R yanlin:users /mnt/storage/Media/downloads/inbox"; }; diff --git a/hosts/nixos/thinkpad/disk-health.nix b/hosts/nixos/thinkpad/disk-health.nix new file mode 100644 index 0000000..da4229c --- /dev/null +++ b/hosts/nixos/thinkpad/disk-health.nix @@ -0,0 +1,123 @@ +{ config, lib, pkgs, ... }: + +{ + # Simplified disk health monitoring for ThinkPad laptop + # Single NVMe SSD monitoring with laptop-friendly settings + + # Package requirements + environment.systemPackages = with pkgs; [ + smartmontools + curl # For Gotify notifications + ]; + + # Smartd configuration for laptop NVMe SSD + services.smartd = { + enable = true; + autodetect = false; # Explicit configuration + + # Global smartd options + extraOptions = [ "-A /var/log/smartd/" "-i 900" ]; # Check every 15 minutes + + # Disable default notifications + notifications = { + mail.enable = false; + x11.enable = false; + test = false; + }; + + # Single NVMe drive monitoring with all options inline + devices = [ + { + device = "/dev/nvme0n1"; + options = "-d nvme -a -o on -S on -s (S/../.././03|L/../../7/04) -W 4,60,70 -M exec ${pkgs.writeShellScript "smartd-notify-thinkpad" '' + export SMARTD_DEVICE="$SMARTD_DEVICE" + export SMARTD_FAILTYPE="$SMARTD_FAILTYPE" + export SMARTD_MESSAGE="$SMARTD_MESSAGE" + /home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh AieM4SJHFcyl7TC System_SSD_ThinkPad + ''}"; + } + ]; + }; + + # Daily SMART report service + systemd.services = { + daily-smart-report = { + description = "Daily SMART Health Report for ThinkPad"; + after = [ "multi-user.target" ]; + serviceConfig = { + Type = "oneshot"; + ExecStart = "${pkgs.bash}/bin/bash /home/yanlin/.config/nix/scripts/daily-smart-report.sh AieM4SJHFcyl7TC"; + User = "root"; + StandardOutput = "journal"; + StandardError = "journal"; + TimeoutStartSec = "300"; # 5 minutes max + # Environment with single NVMe drive + Environment = [ + "PATH=/run/current-system/sw/bin" + "SMART_DRIVES=/dev/nvme0n1:System SSD (ThinkPad)" + ]; + # Allow access to NVMe devices + DeviceAllow = [ "/dev/nvme* rw" "char-* rw" "block-* rw" ]; + DevicePolicy = "closed"; + }; + }; + }; + + # Daily SMART report timer - runs at 09:00 (later than server) + systemd.timers = { + daily-smart-report = { + description = "Daily SMART Report Timer for ThinkPad"; + wantedBy = [ "timers.target" ]; + timerConfig = { + OnCalendar = "09:00:00"; # Later time for laptop + Persistent = true; + RandomizedDelaySec = "10m"; # Longer randomization for laptop + }; + }; + }; + + # Ensure log directories exist + systemd.tmpfiles.rules = [ + "d /var/log 0755 root root -" + "f /var/log/daily-smart-report.log 0644 root root -" + "f /var/log/smartd-alerts.log 0644 root root -" + "d /var/log/smartd 0755 root root -" + ]; + + # Enable the timer + systemd.targets.timers.wants = [ + "daily-smart-report.timer" + ]; + + # Logrotate configuration + services.logrotate = { + enable = true; + settings = { + "/var/log/daily-smart-report.log" = { + frequency = "weekly"; + rotate = 4; + compress = true; + delaycompress = true; + missingok = true; + notifempty = true; + create = "644 root root"; + }; + "/var/log/smartd-alerts.log" = { + frequency = "weekly"; + rotate = 4; + compress = true; + delaycompress = true; + missingok = true; + notifempty = true; + create = "644 root root"; + }; + }; + }; + + # Ensure scripts are executable + system.activationScripts.disk-health-scripts = '' + chmod +x /home/yanlin/.config/nix/scripts/gotify-notify.sh + chmod +x /home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh + chmod +x /home/yanlin/.config/nix/scripts/daily-smart-report.sh + ''; +} \ No newline at end of file diff --git a/hosts/nixos/thinkpad/home.nix b/hosts/nixos/thinkpad/home.nix index ba51812..33b1d0e 100644 --- a/hosts/nixos/thinkpad/home.nix +++ b/hosts/nixos/thinkpad/home.nix @@ -32,6 +32,8 @@ # For example, laptop-specific aliases or scripts programs.zsh.shellAliases = { + # Disk health monitoring + smart-report = "sudo SMART_DRIVES='/dev/nvme0n1:System SSD (ThinkPad)' /home/yanlin/.config/nix/scripts/daily-smart-report.sh AieM4SJHFcyl7TC"; }; home.packages = with pkgs; [ diff --git a/hosts/nixos/thinkpad/system.nix b/hosts/nixos/thinkpad/system.nix index 49a94d8..d05315b 100644 --- a/hosts/nixos/thinkpad/system.nix +++ b/hosts/nixos/thinkpad/system.nix @@ -1,6 +1,7 @@ { config, pkgs, lib, ... }: { imports = [ ./hardware-configuration.nix + ./disk-health.nix ../../../modules/wireguard.nix ../../../modules/borg-server.nix ]; diff --git a/scripts/daily-smart-report.sh b/scripts/daily-smart-report.sh index 15094f7..3e43565 100755 --- a/scripts/daily-smart-report.sh +++ b/scripts/daily-smart-report.sh @@ -1,5 +1,7 @@ # Simple daily SMART report script - plain text version # Only checks SMART attributes and sends report via Gotify +# Usage: daily-smart-report.sh +# Drive list should be passed via SMART_DRIVES environment variable as "device:name" pairs set -euo pipefail @@ -7,18 +9,37 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" GOTIFY_SCRIPT="${SCRIPT_DIR}/gotify-notify.sh" LOG_FILE="/var/log/daily-smart-report.log" -# Host-specific Gotify configuration -GOTIFY_URL="https://notify.yanlincs.com" -GOTIFY_TOKEN="Ac9qKFH5cA.7Yly" +# Get parameters +GOTIFY_TOKEN="${1:-}" -# Drive configurations -declare -A DRIVES=( - ["/dev/disk/by-id/ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431J4R"]="ZFS Mirror 1" - ["/dev/disk/by-id/ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431KEG"]="ZFS Mirror 2" - ["/dev/disk/by-id/ata-HGST_HUH721212ALE604_5PK2N4GB"]="Data Drive 1 (12TB)" - ["/dev/disk/by-id/ata-HGST_HUH721212ALE604_5PJ7Z3LE"]="Data Drive 2 (12TB)" - ["/dev/disk/by-id/ata-ST16000NM000J-2TW103_WRS0F8BE"]="Parity Drive (16TB)" -) +# Validate parameters +if [[ -z "$GOTIFY_TOKEN" ]]; then + echo "Error: Gotify token not provided" + echo "Usage: $0 " + echo "Drives should be in SMART_DRIVES environment variable" + exit 1 +fi + +# Gotify configuration +GOTIFY_URL="https://notify.yanlincs.com" + +# Parse drive configurations from environment variable +# SMART_DRIVES format: "device1:name1;device2:name2;..." +declare -A DRIVES=() + +if [[ -n "${SMART_DRIVES:-}" ]]; then + IFS=';' read -ra DRIVE_PAIRS <<< "$SMART_DRIVES" + for pair in "${DRIVE_PAIRS[@]}"; do + IFS=':' read -r device name <<< "$pair" + if [[ -n "$device" && -n "$name" ]]; then + DRIVES["$device"]="$name" + fi + done +else + echo "Warning: No drives specified in SMART_DRIVES environment variable" + echo "Format: SMART_DRIVES='device1:name1;device2:name2'" + exit 1 +fi main() { local report="" diff --git a/scripts/disk-health-smartd-alert.sh b/scripts/disk-health-smartd-alert.sh index 27fe8cd..af96c0d 100755 --- a/scripts/disk-health-smartd-alert.sh +++ b/scripts/disk-health-smartd-alert.sh @@ -1,6 +1,7 @@ # SMART daemon alert script for Gotify notifications # Called by smartd when SMART issues are detected -# No arguments needed - uses SMARTD_DEVICE environment variable +# Usage: disk-health-smartd-alert.sh +# Uses SMARTD_DEVICE environment variable for device info set -euo pipefail @@ -8,34 +9,24 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" GOTIFY_SCRIPT="${SCRIPT_DIR}/gotify-notify.sh" LOG_FILE="/var/log/smartd-alerts.log" -# Host-specific Gotify configuration -GOTIFY_URL="https://notify.yanlincs.com" -GOTIFY_TOKEN="Ac9qKFH5cA.7Yly" +# Get parameters +GOTIFY_TOKEN="${1:-}" +DRIVE_NAME="${2:-}" -# Drive name mapping based on device path -get_drive_name() { - local device="$1" - case "$device" in - *"ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431J4R"*) - echo "ZFS Mirror 1 (System)" - ;; - *"ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431KEG"*) - echo "ZFS Mirror 2 (System)" - ;; - *"ata-HGST_HUH721212ALE604_5PK2N4GB"*) - echo "Data Drive 1 (12TB)" - ;; - *"ata-HGST_HUH721212ALE604_5PJ7Z3LE"*) - echo "Data Drive 2 (12TB)" - ;; - *"ata-ST16000NM000J-2TW103_WRS0F8BE"*) - echo "Parity Drive (16TB)" - ;; - *) - echo "Unknown Drive ($device)" - ;; - esac -} +# Validate parameters +if [[ -z "$GOTIFY_TOKEN" ]]; then + echo "Error: Gotify token not provided" + echo "Usage: $0 " + exit 1 +fi + +# If drive name not provided, use device path +if [[ -z "$DRIVE_NAME" ]]; then + DRIVE_NAME="${SMARTD_DEVICE:-Unknown Drive}" +fi + +# Gotify configuration +GOTIFY_URL="https://notify.yanlincs.com" log_message() { local message="$1" @@ -48,10 +39,7 @@ send_smartd_alert() { local failtype="${SMARTD_FAILTYPE:-unknown}" local message="${SMARTD_MESSAGE:-No details provided}" - local drive_name - drive_name=$(get_drive_name "$device") - - log_message "SMART alert for $drive_name ($device): $failtype - $message" + log_message "SMART alert for $DRIVE_NAME ($device): $failtype - $message" # Determine priority based on failure type local priority="high" @@ -68,7 +56,7 @@ send_smartd_alert() { esac # Create notification message - local notification_title="SMART Alert: $drive_name" + local notification_title="SMART Alert: $DRIVE_NAME" local notification_message="Device: $device Failure Type: $failtype Details: $message