Implement drive health check for thinkpad
This commit is contained in:
parent
a8dd25576e
commit
fd8a537a18
8 changed files with 224 additions and 54 deletions
|
|
@ -2,4 +2,4 @@
|
|||
|
||||
- This is my nix configuration system. Whenever you need to introduce update to my config, remember to check the current config.
|
||||
- After you introduce updates, remember to reflect those updates in the readme, should they bring any changes.
|
||||
- Do not add the shebang when writing shell scripts
|
||||
- Never write shebang in any context unless specifically requested
|
||||
|
|
|
|||
|
|
@ -18,32 +18,64 @@
|
|||
# Global smartd options
|
||||
extraOptions = [ "-A /var/log/smartd/" "-i 600" ];
|
||||
|
||||
# Disable default notifications
|
||||
notifications = {
|
||||
mail.enable = false;
|
||||
x11.enable = false;
|
||||
test = false;
|
||||
};
|
||||
|
||||
# Device-specific monitoring configurations
|
||||
devices = [
|
||||
# ZFS Mirror drives (NVMe SSDs) - more frequent monitoring
|
||||
{
|
||||
device = "/dev/disk/by-id/ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431J4R";
|
||||
options = "-d auto -a -o on -S on -s (S/../.././02|L/../../6/03) -m exec=/home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh";
|
||||
options = "-d auto -a -o on -S on -s (S/../.././02|L/../../6/03) -M exec ${pkgs.writeShellScript "smartd-notify-hs-zfs1" ''
|
||||
export SMARTD_DEVICE="$SMARTD_DEVICE"
|
||||
export SMARTD_FAILTYPE="$SMARTD_FAILTYPE"
|
||||
export SMARTD_MESSAGE="$SMARTD_MESSAGE"
|
||||
/home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh Ac9qKFH5cA.7Yly ZFS_Mirror_1
|
||||
''}";
|
||||
}
|
||||
{
|
||||
device = "/dev/disk/by-id/ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431KEG";
|
||||
options = "-d auto -a -o on -S on -s (S/../.././02|L/../../6/03) -m exec=/home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh";
|
||||
options = "-d auto -a -o on -S on -s (S/../.././02|L/../../6/03) -M exec ${pkgs.writeShellScript "smartd-notify-hs-zfs2" ''
|
||||
export SMARTD_DEVICE="$SMARTD_DEVICE"
|
||||
export SMARTD_FAILTYPE="$SMARTD_FAILTYPE"
|
||||
export SMARTD_MESSAGE="$SMARTD_MESSAGE"
|
||||
/home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh Ac9qKFH5cA.7Yly ZFS_Mirror_2
|
||||
''}";
|
||||
}
|
||||
|
||||
# Data drives (12TB HDDs) - standard monitoring
|
||||
{
|
||||
device = "/dev/disk/by-id/ata-HGST_HUH721212ALE604_5PK2N4GB";
|
||||
options = "-d auto -a -o on -S on -s (S/../.././02|L/../../7/03) -W 4,45,55 -m exec=/home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh";
|
||||
options = "-d auto -a -o on -S on -s (S/../.././02|L/../../7/03) -W 4,45,55 -M exec ${pkgs.writeShellScript "smartd-notify-hs-data1" ''
|
||||
export SMARTD_DEVICE="$SMARTD_DEVICE"
|
||||
export SMARTD_FAILTYPE="$SMARTD_FAILTYPE"
|
||||
export SMARTD_MESSAGE="$SMARTD_MESSAGE"
|
||||
/home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh Ac9qKFH5cA.7Yly Data_Drive_1_12TB
|
||||
''}";
|
||||
}
|
||||
{
|
||||
device = "/dev/disk/by-id/ata-HGST_HUH721212ALE604_5PJ7Z3LE";
|
||||
options = "-d auto -a -o on -S on -s (S/../.././02|L/../../7/03) -W 4,45,55 -m exec=/home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh";
|
||||
options = "-d auto -a -o on -S on -s (S/../.././02|L/../../7/03) -W 4,45,55 -M exec ${pkgs.writeShellScript "smartd-notify-hs-data2" ''
|
||||
export SMARTD_DEVICE="$SMARTD_DEVICE"
|
||||
export SMARTD_FAILTYPE="$SMARTD_FAILTYPE"
|
||||
export SMARTD_MESSAGE="$SMARTD_MESSAGE"
|
||||
/home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh Ac9qKFH5cA.7Yly Data_Drive_2_12TB
|
||||
''}";
|
||||
}
|
||||
|
||||
# Parity drive (16TB HDD) - enhanced monitoring due to criticality
|
||||
{
|
||||
device = "/dev/disk/by-id/ata-ST16000NM000J-2TW103_WRS0F8BE";
|
||||
options = "-d auto -a -o on -S on -s (S/../.././02|L/../../1/03) -W 2,45,55 -m exec=/home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh";
|
||||
options = "-d auto -a -o on -S on -s (S/../.././02|L/../../1/03) -W 2,45,55 -M exec ${pkgs.writeShellScript "smartd-notify-hs-parity" ''
|
||||
export SMARTD_DEVICE="$SMARTD_DEVICE"
|
||||
export SMARTD_FAILTYPE="$SMARTD_FAILTYPE"
|
||||
export SMARTD_MESSAGE="$SMARTD_MESSAGE"
|
||||
/home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh Ac9qKFH5cA.7Yly Parity_Drive_16TB
|
||||
''}";
|
||||
}
|
||||
];
|
||||
};
|
||||
|
|
@ -56,14 +88,17 @@
|
|||
after = [ "multi-user.target" ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = "${pkgs.bash}/bin/bash /home/yanlin/.config/nix/scripts/daily-smart-report.sh";
|
||||
ExecStart = "${pkgs.bash}/bin/bash /home/yanlin/.config/nix/scripts/daily-smart-report.sh Ac9qKFH5cA.7Yly";
|
||||
User = "root";
|
||||
StandardOutput = "journal";
|
||||
StandardError = "journal";
|
||||
# Add timeout to prevent hanging
|
||||
TimeoutStartSec = "300"; # 5 minutes max
|
||||
# Set PATH to include system binaries for smartctl and curl
|
||||
Environment = "PATH=/run/current-system/sw/bin";
|
||||
# Set PATH and SMART_DRIVES environment variables
|
||||
Environment = [
|
||||
"PATH=/run/current-system/sw/bin"
|
||||
"SMART_DRIVES=/dev/disk/by-id/ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431J4R:ZFS Mirror 1;/dev/disk/by-id/ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431KEG:ZFS Mirror 2;/dev/disk/by-id/ata-HGST_HUH721212ALE604_5PK2N4GB:Data Drive 1 (12TB);/dev/disk/by-id/ata-HGST_HUH721212ALE604_5PJ7Z3LE:Data Drive 2 (12TB);/dev/disk/by-id/ata-ST16000NM000J-2TW103_WRS0F8BE:Parity Drive (16TB)"
|
||||
];
|
||||
# Allow access to block devices for SMART commands
|
||||
DeviceAllow = [ "/dev/disk/by-id/* rw" "/dev/sd* rw" "/dev/nvme* rw" "char-* rw" "block-* rw" ];
|
||||
DevicePolicy = "closed";
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
# hs-specific home configuration
|
||||
programs.zsh.shellAliases = {
|
||||
# Disk health monitoring
|
||||
smart-report = "sudo /home/yanlin/.config/nix/scripts/daily-smart-report.sh";
|
||||
smart-report = "sudo SMART_DRIVES='/dev/disk/by-id/ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431J4R:ZFS Mirror 1;/dev/disk/by-id/ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431KEG:ZFS Mirror 2;/dev/disk/by-id/ata-HGST_HUH721212ALE604_5PK2N4GB:Data Drive 1 (12TB);/dev/disk/by-id/ata-HGST_HUH721212ALE604_5PJ7Z3LE:Data Drive 2 (12TB);/dev/disk/by-id/ata-ST16000NM000J-2TW103_WRS0F8BE:Parity Drive (16TB)' /home/yanlin/.config/nix/scripts/daily-smart-report.sh Ac9qKFH5cA.7Yly";
|
||||
move-inbox = "cp -rl /mnt/storage/Media/downloads/.inbox/* /mnt/storage/Media/downloads/inbox && chown -R yanlin:users /mnt/storage/Media/downloads/inbox";
|
||||
};
|
||||
|
||||
|
|
|
|||
123
hosts/nixos/thinkpad/disk-health.nix
Normal file
123
hosts/nixos/thinkpad/disk-health.nix
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
{ config, lib, pkgs, ... }:
|
||||
|
||||
{
|
||||
# Simplified disk health monitoring for ThinkPad laptop
|
||||
# Single NVMe SSD monitoring with laptop-friendly settings
|
||||
|
||||
# Package requirements
|
||||
environment.systemPackages = with pkgs; [
|
||||
smartmontools
|
||||
curl # For Gotify notifications
|
||||
];
|
||||
|
||||
# Smartd configuration for laptop NVMe SSD
|
||||
services.smartd = {
|
||||
enable = true;
|
||||
autodetect = false; # Explicit configuration
|
||||
|
||||
# Global smartd options
|
||||
extraOptions = [ "-A /var/log/smartd/" "-i 900" ]; # Check every 15 minutes
|
||||
|
||||
# Disable default notifications
|
||||
notifications = {
|
||||
mail.enable = false;
|
||||
x11.enable = false;
|
||||
test = false;
|
||||
};
|
||||
|
||||
# Single NVMe drive monitoring with all options inline
|
||||
devices = [
|
||||
{
|
||||
device = "/dev/nvme0n1";
|
||||
options = "-d nvme -a -o on -S on -s (S/../.././03|L/../../7/04) -W 4,60,70 -M exec ${pkgs.writeShellScript "smartd-notify-thinkpad" ''
|
||||
export SMARTD_DEVICE="$SMARTD_DEVICE"
|
||||
export SMARTD_FAILTYPE="$SMARTD_FAILTYPE"
|
||||
export SMARTD_MESSAGE="$SMARTD_MESSAGE"
|
||||
/home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh AieM4SJHFcyl7TC System_SSD_ThinkPad
|
||||
''}";
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
# Daily SMART report service
|
||||
systemd.services = {
|
||||
daily-smart-report = {
|
||||
description = "Daily SMART Health Report for ThinkPad";
|
||||
after = [ "multi-user.target" ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = "${pkgs.bash}/bin/bash /home/yanlin/.config/nix/scripts/daily-smart-report.sh AieM4SJHFcyl7TC";
|
||||
User = "root";
|
||||
StandardOutput = "journal";
|
||||
StandardError = "journal";
|
||||
TimeoutStartSec = "300"; # 5 minutes max
|
||||
# Environment with single NVMe drive
|
||||
Environment = [
|
||||
"PATH=/run/current-system/sw/bin"
|
||||
"SMART_DRIVES=/dev/nvme0n1:System SSD (ThinkPad)"
|
||||
];
|
||||
# Allow access to NVMe devices
|
||||
DeviceAllow = [ "/dev/nvme* rw" "char-* rw" "block-* rw" ];
|
||||
DevicePolicy = "closed";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Daily SMART report timer - runs at 09:00 (later than server)
|
||||
systemd.timers = {
|
||||
daily-smart-report = {
|
||||
description = "Daily SMART Report Timer for ThinkPad";
|
||||
wantedBy = [ "timers.target" ];
|
||||
timerConfig = {
|
||||
OnCalendar = "09:00:00"; # Later time for laptop
|
||||
Persistent = true;
|
||||
RandomizedDelaySec = "10m"; # Longer randomization for laptop
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Ensure log directories exist
|
||||
systemd.tmpfiles.rules = [
|
||||
"d /var/log 0755 root root -"
|
||||
"f /var/log/daily-smart-report.log 0644 root root -"
|
||||
"f /var/log/smartd-alerts.log 0644 root root -"
|
||||
"d /var/log/smartd 0755 root root -"
|
||||
];
|
||||
|
||||
# Enable the timer
|
||||
systemd.targets.timers.wants = [
|
||||
"daily-smart-report.timer"
|
||||
];
|
||||
|
||||
# Logrotate configuration
|
||||
services.logrotate = {
|
||||
enable = true;
|
||||
settings = {
|
||||
"/var/log/daily-smart-report.log" = {
|
||||
frequency = "weekly";
|
||||
rotate = 4;
|
||||
compress = true;
|
||||
delaycompress = true;
|
||||
missingok = true;
|
||||
notifempty = true;
|
||||
create = "644 root root";
|
||||
};
|
||||
"/var/log/smartd-alerts.log" = {
|
||||
frequency = "weekly";
|
||||
rotate = 4;
|
||||
compress = true;
|
||||
delaycompress = true;
|
||||
missingok = true;
|
||||
notifempty = true;
|
||||
create = "644 root root";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# Ensure scripts are executable
|
||||
system.activationScripts.disk-health-scripts = ''
|
||||
chmod +x /home/yanlin/.config/nix/scripts/gotify-notify.sh
|
||||
chmod +x /home/yanlin/.config/nix/scripts/disk-health-smartd-alert.sh
|
||||
chmod +x /home/yanlin/.config/nix/scripts/daily-smart-report.sh
|
||||
'';
|
||||
}
|
||||
|
|
@ -32,6 +32,8 @@
|
|||
# For example, laptop-specific aliases or scripts
|
||||
|
||||
programs.zsh.shellAliases = {
|
||||
# Disk health monitoring
|
||||
smart-report = "sudo SMART_DRIVES='/dev/nvme0n1:System SSD (ThinkPad)' /home/yanlin/.config/nix/scripts/daily-smart-report.sh AieM4SJHFcyl7TC";
|
||||
};
|
||||
|
||||
home.packages = with pkgs; [
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
{ config, pkgs, lib, ... }: {
|
||||
imports = [
|
||||
./hardware-configuration.nix
|
||||
./disk-health.nix
|
||||
../../../modules/wireguard.nix
|
||||
../../../modules/borg-server.nix
|
||||
];
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
# Simple daily SMART report script - plain text version
|
||||
# Only checks SMART attributes and sends report via Gotify
|
||||
# Usage: daily-smart-report.sh <gotify_token>
|
||||
# Drive list should be passed via SMART_DRIVES environment variable as "device:name" pairs
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
|
|
@ -7,18 +9,37 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|||
GOTIFY_SCRIPT="${SCRIPT_DIR}/gotify-notify.sh"
|
||||
LOG_FILE="/var/log/daily-smart-report.log"
|
||||
|
||||
# Host-specific Gotify configuration
|
||||
GOTIFY_URL="https://notify.yanlincs.com"
|
||||
GOTIFY_TOKEN="Ac9qKFH5cA.7Yly"
|
||||
# Get parameters
|
||||
GOTIFY_TOKEN="${1:-}"
|
||||
|
||||
# Drive configurations
|
||||
declare -A DRIVES=(
|
||||
["/dev/disk/by-id/ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431J4R"]="ZFS Mirror 1"
|
||||
["/dev/disk/by-id/ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431KEG"]="ZFS Mirror 2"
|
||||
["/dev/disk/by-id/ata-HGST_HUH721212ALE604_5PK2N4GB"]="Data Drive 1 (12TB)"
|
||||
["/dev/disk/by-id/ata-HGST_HUH721212ALE604_5PJ7Z3LE"]="Data Drive 2 (12TB)"
|
||||
["/dev/disk/by-id/ata-ST16000NM000J-2TW103_WRS0F8BE"]="Parity Drive (16TB)"
|
||||
)
|
||||
# Validate parameters
|
||||
if [[ -z "$GOTIFY_TOKEN" ]]; then
|
||||
echo "Error: Gotify token not provided"
|
||||
echo "Usage: $0 <gotify_token>"
|
||||
echo "Drives should be in SMART_DRIVES environment variable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Gotify configuration
|
||||
GOTIFY_URL="https://notify.yanlincs.com"
|
||||
|
||||
# Parse drive configurations from environment variable
|
||||
# SMART_DRIVES format: "device1:name1;device2:name2;..."
|
||||
declare -A DRIVES=()
|
||||
|
||||
if [[ -n "${SMART_DRIVES:-}" ]]; then
|
||||
IFS=';' read -ra DRIVE_PAIRS <<< "$SMART_DRIVES"
|
||||
for pair in "${DRIVE_PAIRS[@]}"; do
|
||||
IFS=':' read -r device name <<< "$pair"
|
||||
if [[ -n "$device" && -n "$name" ]]; then
|
||||
DRIVES["$device"]="$name"
|
||||
fi
|
||||
done
|
||||
else
|
||||
echo "Warning: No drives specified in SMART_DRIVES environment variable"
|
||||
echo "Format: SMART_DRIVES='device1:name1;device2:name2'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
main() {
|
||||
local report=""
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
# SMART daemon alert script for Gotify notifications
|
||||
# Called by smartd when SMART issues are detected
|
||||
# No arguments needed - uses SMARTD_DEVICE environment variable
|
||||
# Usage: disk-health-smartd-alert.sh <gotify_token> <drive_name>
|
||||
# Uses SMARTD_DEVICE environment variable for device info
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
|
|
@ -8,34 +9,24 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|||
GOTIFY_SCRIPT="${SCRIPT_DIR}/gotify-notify.sh"
|
||||
LOG_FILE="/var/log/smartd-alerts.log"
|
||||
|
||||
# Host-specific Gotify configuration
|
||||
GOTIFY_URL="https://notify.yanlincs.com"
|
||||
GOTIFY_TOKEN="Ac9qKFH5cA.7Yly"
|
||||
# Get parameters
|
||||
GOTIFY_TOKEN="${1:-}"
|
||||
DRIVE_NAME="${2:-}"
|
||||
|
||||
# Drive name mapping based on device path
|
||||
get_drive_name() {
|
||||
local device="$1"
|
||||
case "$device" in
|
||||
*"ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431J4R"*)
|
||||
echo "ZFS Mirror 1 (System)"
|
||||
;;
|
||||
*"ata-ZHITAI_SC001_XT_1000GB_ZTB401TAB244431KEG"*)
|
||||
echo "ZFS Mirror 2 (System)"
|
||||
;;
|
||||
*"ata-HGST_HUH721212ALE604_5PK2N4GB"*)
|
||||
echo "Data Drive 1 (12TB)"
|
||||
;;
|
||||
*"ata-HGST_HUH721212ALE604_5PJ7Z3LE"*)
|
||||
echo "Data Drive 2 (12TB)"
|
||||
;;
|
||||
*"ata-ST16000NM000J-2TW103_WRS0F8BE"*)
|
||||
echo "Parity Drive (16TB)"
|
||||
;;
|
||||
*)
|
||||
echo "Unknown Drive ($device)"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
# Validate parameters
|
||||
if [[ -z "$GOTIFY_TOKEN" ]]; then
|
||||
echo "Error: Gotify token not provided"
|
||||
echo "Usage: $0 <gotify_token> <drive_name>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# If drive name not provided, use device path
|
||||
if [[ -z "$DRIVE_NAME" ]]; then
|
||||
DRIVE_NAME="${SMARTD_DEVICE:-Unknown Drive}"
|
||||
fi
|
||||
|
||||
# Gotify configuration
|
||||
GOTIFY_URL="https://notify.yanlincs.com"
|
||||
|
||||
log_message() {
|
||||
local message="$1"
|
||||
|
|
@ -48,10 +39,7 @@ send_smartd_alert() {
|
|||
local failtype="${SMARTD_FAILTYPE:-unknown}"
|
||||
local message="${SMARTD_MESSAGE:-No details provided}"
|
||||
|
||||
local drive_name
|
||||
drive_name=$(get_drive_name "$device")
|
||||
|
||||
log_message "SMART alert for $drive_name ($device): $failtype - $message"
|
||||
log_message "SMART alert for $DRIVE_NAME ($device): $failtype - $message"
|
||||
|
||||
# Determine priority based on failure type
|
||||
local priority="high"
|
||||
|
|
@ -68,7 +56,7 @@ send_smartd_alert() {
|
|||
esac
|
||||
|
||||
# Create notification message
|
||||
local notification_title="SMART Alert: $drive_name"
|
||||
local notification_title="SMART Alert: $DRIVE_NAME"
|
||||
local notification_message="Device: $device
|
||||
Failure Type: $failtype
|
||||
Details: $message
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue