Update and refine,

2026-03-16 00:36:04 +00:00 · 2026-03-16 00:36:04 +00:00 · f0a82d4b8e
parent 89efbba240
commit f0a82d4b8e
1 changed files with 231 additions and 271 deletions
--- a/020_sanoid.sh
+++ b/020_sanoid.sh
@ -13,22 +13,26 @@ echo "override them in /etc/sanoid/sanoid.conf"
 echo "Installed files:"
 cat <<-EOF
-	syncoid                     /usr/sbin/
+syncoid                     /usr/sbin/
-	sanoid                      /usr/sbin/
+sanoid                      /usr/sbin/
-	findoid                     /usr/sbin/
+findoid                     /usr/sbin/
-	sanoid.defaults.conf        /usr/share/sanoid/
+sanoid.defaults.conf        /usr/share/sanoid/
-	debian/sanoid-prune.service /lib/systemd/system
+debian/sanoid-prune.service /lib/systemd/system
-	CHANGELIST /usr/share/doc/sanoid/changelog
+CHANGELIST /usr/share/doc/sanoid/changelog
 EOF
 # Both sanoid and syncoid are oneshot processes so it makes little sense to
 # provide an init file, cron is just fine. In this case the systemd file is there
 # because systemd decided to manage cron tasks.
 #
 # Cronjob for non-systemd systems: every 15 minutes.
 # If you require a different interval, you will need to disable the
 # timer or the cron job according to your system configuration.
 conf_print_sanoid_cron() {
  cat <<-EOF
-		PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
+PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
-		*/15 * * * * root [ -f /etc/sanoid/sanoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/sanoid --cron --quiet; fi
+*/15 * * * * root [ -f /etc/sanoid/sanoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/sanoid --cron --quiet; fi
 EOF
 }
 conf_print_sanoid_cron | sudo tee /etc/cron.d/sanoid
@ -36,9 +40,9 @@ conf_print_sanoid_cron | sudo tee /etc/cron.d/sanoid
 # Do the same for syncoid for backups.
 conf_print_syncoid_cron() {
  cat <<-EOF
-		PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
+PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
-		*/15 * * * * root [ -f /etc/sanoid/syncoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/syncoid --cron --quiet; fi
+*/15 * * * * root [ -f /etc/sanoid/syncoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/syncoid --cron --quiet; fi
 EOF
 }
 # conf_print_syncoid_cron | sudo tee /etc/cron.d/syncoid
@ -48,49 +52,75 @@ sudo mkdir -p /etc/sanoid/
 conf_print_sanoid() {
  cat <<-EOF
-    [rpool/docker]
+#-- Organisational datasets
 # DATA
 [rpool/DATA]
  use_template = ignore
  recursive = yes
  process_children_only = yes
-    [rpool/archive]
+# ROOT
 [rpool/ROOT]
  use_template = ignore
  recursive = yes
  process_children_only = yes
-    [rpool/swap]
+#-- END
 # These datasets contain the docker zsh backing store "graph" (layers).
 # Just restore broken container with docker-compose down && docker-compose up 
 [rpool/docker]
  use_template = ignore
  recursive = yes
-    [rpool/tmp]
+# Docker persistent data
 [rpool/DATA/docker-volumes]
  use_template = docker-persistent
  recursive = yes
 [rpool/archive]
  use_template = ignore
  recursive = yes
-    [rpool/ROOT/devuan-1]
+[rpool/swap]
  use_template = ignore
  recursive = yes
 [rpool/tmp]
  use_template = ignore
  recursive = yes
 [rpool/ROOT/devuan-1]
  use_template = root
  recursive = yes
-    # rpool/ROOT/devuan-1/opt
+# rpool/ROOT/devuan-1/opt
-    # rpool/ROOT/devuan-1/usr
+# rpool/ROOT/devuan-1/usr
-    # rpool/ROOT/devuan-1/usr_local
+# rpool/ROOT/devuan-1/usr_local
-    # rpool/ROOT/devuan-1/usr_share
+# rpool/ROOT/devuan-1/usr_share
-    # rpool/ROOT/devuan-1/var
+# rpool/ROOT/devuan-1/var
-    # rpool/ROOT/devuan-1/var_lib
+# rpool/ROOT/devuan-1/var_lib
-  	# Specific override for Virtual Machines to use scripts
+# Specific override for Virtual Machines to use scripts
-    [rpool/ROOT/devuan-1/var_lib_virt]
+[rpool/ROOT/devuan-1/var_lib_virt]
  use_template = root
-  	recursive = no
+  recursive = yes
-    pre_snapshot_script = /usr/local/bin/virt-freeze-all.sh
+  pre_snapshot_script = /usr/local/bin/sanoid_virt-freeze-all.sh
-    post_snapshot_script = /usr/local/bin/virt-thaw-all.sh
+  post_snapshot_script = /usr/local/bin/sanoid_virt-thaw-all.sh
-  	# -- User Data --
+# -- User Data --
-    [rpool/home]
+[rpool/home]
  use_template = production
  recursive = yes
-    [rpool/space]
+[rpool/space]
  use_template = production
-    #############################
+#############################
-    # templates below this line #
+# templates below this line #
-    #############################
+#############################
-    [template_production]
+[template_production]
  frequently = 0
  hourly = 36
  daily = 30
@ -99,7 +129,7 @@ conf_print_sanoid() {
  autosnap = yes
  autoprune = yes
-    [template_root]
+[template_root]
  # Root changes fast; shorter history often suffices
  hourly = 24
  daily = 7
@ -108,12 +138,34 @@ conf_print_sanoid() {
  autosnap = yes
  autoprune = yes
-    [template_ignore]
+[template_ignore]
  autoprune = no
  autosnap = no
  monitor = no
-		[template_backup]
+##############################
 # Docker Persistent Template #
 ##############################
 [template_docker-persistent]
  # Frequent snapshots for active databases/configs
  frequently = 0
  hourly = 24
  daily = 7
  monthly = 1
  yearly = 0
  # Safety checks
  autosnap = yes
  autoprune = yes
  # Don't take a snapshot if the dataset hasn't changed
  # (Saves metadata overhead)
  # skip_hourless = yes
  pre_snapshot_script = /usr/local/bin/sanoid_zfs-skip-empty.sh
 #-- END
 [template_backup]
  autoprune = yes
  frequently = 0
  hourly = 30
@ -134,11 +186,14 @@ conf_print_sanoid() {
  daily_warn = 48
  daily_crit = 60
-    [template_hotspare]
+#-- END
 [template_hotspare]
 	autoprune = yes
 	frequently = 0
 	hourly = 30
 	daily = 90
 	weekly = 4
 	monthly = 3
 	yearly = 0
@ -154,177 +209,82 @@ conf_print_sanoid() {
 	hourly_crit = 6h
 	daily_warn = 2d
 	daily_crit = 4d
    [template_scripts]
    ### information about the snapshot will be supplied as environment variables,
    ### see the README.md file for details about what is passed when.
    ### run script before snapshot
    pre_snapshot_script = /path/to/script.sh
    ### run script after snapshot
    post_snapshot_script = /path/to/script.sh
    ### run script before pruning snapshot
    pre_pruning_script = /path/to/script.sh
    ### run script after pruning snapshot
    pruning_script = /path/to/script.sh
    ### don't take an inconsistent snapshot (skip if pre script fails)
    #no_inconsistent_snapshot = yes
    ### run post_snapshot_script when pre_snapshot_script is failing
    #force_post_snapshot_script = yes
    ### limit allowed execution time of scripts before continuing (<= 0: infinite)
    script_timeout = 5
 EOF
 }
 conf_print_sanoid | sudo tee /etc/sanoid/sanoid.conf
-# Both sanoid and synmcoid are oneshot processes so it makes little sense to
+########################
-# provide an init file, cron is just fine. In this case the systemd file is there
+# Pre-snapshot scripts #
-# because systemd decided to manage cron tasks.
+########################
 # Generated using:
 # https://raw.githubusercontent.com/akhilvij/systemd-to-sysvinit-converter/master/converter.py
 # python2 converter /usr/src/sanoid-2.2.0/sanoid.service > sanoid
 conf_print_sanoid_init() {
  cat <<-'EOF'
    #!/bin/sh
    ### BEGIN INIT INFO
    # Provides: sanoid
    # Required-Start: $syslog $local_fs $remote_fs 
    # Required-Stop:  $syslog $local_fs $remote_fs 
    # Default-Start: 2 3 4 5
    # Default-Stop: 0 1 6
    # Short-Description: Snapshot ZFS filesystems
    ### END INIT INFO
-    . /lib/lsb/init-functions
+# In ZFS, even if no data has changed, creating a snapshot still consumes a
-    prog=sanoid
+# small amount of space for metadata and adds an entry to the ZFS history.
-    PIDFILE=/var/run/$prog.pid
+# If you have hundreds of datasets being snapshotted every 15 minutes, this
-    DESC="Snapshot ZFS filesystems"
+# "metadata bloat" can make commands like zfs list -t snapshot feel sluggish
-    start() {
+# over time. If you think this is an issue for ypu use zfs-skip-empty.sh as
-      log_daemon_msg "Starting $DESC" "$prog"
+# a pre_snapshot_script
-      start_daemon -p $PIDFILE /usr/sbin/sanoid --take-snapshots --verbose
+conf_print_skip_empty() {
-      if [ $? -ne 0 ]; then
+  cat <<'EOF'
-        log_end_msg 1
+#!/bin/bash
        exit 1
      fi
      if [ $? -eq 0 ]; then
        log_end_msg 0
      fi
      exit 0
    }
-    stop() {
+# Usage: ./sanoid-threshold.sh <dataset> <threshold_size>
-      log_daemon_msg "Stopping $DESC" "$prog"
+# or
-      killproc -p $PIDFILE /usr/sbin/sanoid
+# Add this to you /etc/sanoid.conf to fire this script.
      if [ $? -ne 0 ]; then
        log_end_msg 1
        exit 1
      fi
      if [ $? -eq 0 ]; then
        log_end_msg 0
      fi
    }
-    force_reload() {
+# [tank/important_data]
-      stop
+#     use_template = production
-      start
+#     # Only snapshot if more than 5MB changed
-    }
+#     pre_snapshot_script = /usr/local/bin/sanoid-threshold.sh 5M
-    case "$1" in
+DATASET=$1
-      start)
+RAW_THRESHOLD=$2
        start
  ;;
      stop)
        stop
        ;;
      force-reload)
        force_reload
        ;;
      restart)
        stop
        start
        ;;
-      *)
+convert_to_bytes() {
-        echo "$Usage: $prog {start|stop|force-reload|restart}"
+    local number=$(echo "$1" | grep -oE '^[0-9.]+')
-        exit 2
+    local unit=$(echo "$1" | grep -oI '[KMGPT]' | tr '[:lower:]' '[:upper:]')
    case "$unit" in
        K) awk "BEGIN { printf \"%.0f\", $number * 1024 }" ;;
        M) awk "BEGIN { printf \"%.0f\", $number * 1024^2 }" ;;
        G) awk "BEGIN { printf \"%.0f\", $number * 1024^3 }" ;;
        T) awk "BEGIN { printf \"%.0f\", $number * 1024^4 }" ;;
        *) printf "%.0f" "$number" ;;
    esac
 }
 if [[ -z "$DATASET" || -z "$RAW_THRESHOLD" ]]; then
    logger -t sanoid "Threshold Error: Usage: $0 <dataset> <threshold>"
    exit 1
 fi
 if ! zfs list -H "$DATASET" >/dev/null 2>&1; then
    logger -t sanoid "Threshold Error: Dataset $DATASET not found."
    exit 1
 fi
 THRESHOLD=$(convert_to_bytes "$RAW_THRESHOLD")
 WRITTEN_BYTES=$(zfs get -Hp -o value written "$DATASET")
 if [[ "$WRITTEN_BYTES" -lt "$THRESHOLD" ]]; then
    WRITTEN_HUMAN=$(zfs get -H -o value written "$DATASET")
    # Optional: Comment out the logger below if your logs get too noisy
    logger -t sanoid "Skipping $DATASET: Written $WRITTEN_HUMAN < Threshold $RAW_THRESHOLD."
    exit 1 
 fi
 exit 0
 EOF
 }
-# Sandoid doesn't ran as a daemon it runs vi cron
+conf_print_skip_hourless | sudo tee /usr/local/bin/sanoid_zfs-skip-empty.sh
 # conf_print_sanoid_init | sudo tee /etc/init.d/sanoid
 # sudo chmod +x /etc/init.d/sanoid
-# Generated using:
+chmod +x /usr/local/bin/sanoid_zfs-skip-empty.sh
-# https://raw.githubusercontent.com/akhilvij/systemd-to-sysvinit-converter/master/converter.py
+# VM Consistency (The "Freeze/Thaw" Logic)
 # python2 converter /usr/src/sanoid-2.2.0/sanoid-prune.service > sanoid-prune
 conf_print_sanoid-prune_init() {
  cat <<-'EOF'
    #!/bin/sh
    ### BEGIN INIT INFO
    # Provides: sanoid-prune
    # Required-Start: $syslog $local_fs $remote_fs 
    # Required-Stop:  $syslog $local_fs $remote_fs 
    # Short-Description: Prune ZFS snapshots
    ### END INIT INFO
-    . /lib/lsb/init-functions
+# The inclusion of virt-freeze-all.sh and virt-thaw-all.sh to ensure data integrity:
-    prog=sanoid-prune
+#   *     Pre-snapshot: virsh domfsfreeze tells the Guest OS (via qemu-guest-agent) to flush its write buffers and temporarily pause I/O.
-    PIDFILE=/var/run/$prog.pid
+#   *     Snapshot: Sanoid takes an atomic ZFS snapshot.
-    DESC="Prune ZFS snapshots"
+#   *     Post-snapshot: virsh domfsthaw resumes I/O.
-    start() {
+# Result: You get an "application-consistent" backup rather than a "crash-consistent" one.
      log_daemon_msg "Starting $DESC" "$prog"
      start_daemon -p $PIDFILE /usr/sbin/sanoid --prune-snapshots --verbose
      if [ $? -ne 0 ]; then
        log_end_msg 1
        exit 1
      fi
      if [ $? -eq 0 ]; then
        log_end_msg 0
      fi
      exit 0
    }
    stop() {
      log_daemon_msg "Stopping $DESC" "$prog"
      killproc -p $PIDFILE /usr/sbin/sanoid
      if [ $? -ne 0 ]; then
        log_end_msg 1
        exit 1
      fi
      if [ $? -eq 0 ]; then
        log_end_msg 0
      fi
    }
    force_reload() {
      stop
      start
    }
    case "$1" in
      start)
        start
        ;;
      stop)
        stop
        ;;
      force-reload)
        force_reload
        ;;
      restart)
        stop
        start
        ;;
      *)
        echo "$Usage: $prog {start|stop|force-reload|restart}"
        exit 2
    esac
 EOF
 }
 # Sandoid doesn't ran as a daemon it runs vi cron
 # conf_print_sanoid-prune_init | sudo tee /etc/init.d/sanoid-prune
 # sudo chmod +x /etc/init.d/sanoid-prune
 # Give sudo access to virsh or is part of the libvirt group.
 # qemu-guest-agent must be running in the vm
@ -341,7 +301,7 @@ if [ -z "$VM_NAME" ]; then
 fi
 # Check if the VM is running
-STATE=$(virsh domstate "$VM_NAME" 2>/dev/null)
+STATE=$(/usr/bin/virsh domstate "$VM_NAME" 2>/dev/null)
 if [ "$STATE" != "running" ]; then
    echo "VM $VM_NAME is not running or does not exist. Skipping freeze."
    exit 0
@ -349,7 +309,7 @@ fi
 echo "Freezing filesystems for $VM_NAME..."
 # domfsfreeze returns the number of frozen filesystems on success
-if virsh domfsfreeze "$VM_NAME" > /dev/null; then
+if /usr/bin/virsh domfsfreeze "$VM_NAME" > /dev/null; then
    echo "Successfully frozen $VM_NAME."
 else
    echo "Error: Failed to freeze $VM_NAME. Ensure qemu-guest-agent is active."
@ -357,8 +317,8 @@ else
 fi
 EOF
 }
-conf_print_virt_freeze | sudo tee /usr/local/bin/virt-freeze.sh
+conf_print_virt_freeze | sudo tee /usr/local/bin/sanoid_virt-freeze.sh
-sudo chmod +x /usr/local/bin/virt-freeze.sh
+sudo chmod +x /usr/local/bin/sanoid_virt-freeze.sh
 #---
@ -375,7 +335,7 @@ if [ -z "$VM_NAME" ]; then
 fi
 # Check if the VM is running
-STATE=$(virsh domstate "$VM_NAME" 2>/dev/null)
+STATE=$(/usr/bin/virsh domstate "$VM_NAME" 2>/dev/null)
 if [ "$STATE" != "running" ]; then
    echo "VM $VM_NAME is not running. Skipping unfreeze."
    exit 0
@ -383,7 +343,7 @@ fi
 echo "Thawing filesystems for $VM_NAME..."
 # domfsthaw returns the number of thawed filesystems on success
-if virsh domfsthaw "$VM_NAME" > /dev/null; then
+if /usr/bin/virsh domfsthaw "$VM_NAME" > /dev/null; then
    echo "Successfully thawed $VM_NAME."
 else
    echo "Error: Failed to thaw $VM_NAME."
@ -391,8 +351,8 @@ else
 fi
 EOF
 }
-conf_print_virt_unfreeze | sudo tee /usr/local/bin/virt-unfreeze.sh
+conf_print_virt_unfreeze | sudo tee /usr/local/bin/sanoid_virt-unfreeze.sh
-sudo chmod +x /usr/local/bin/virt-unfreeze.sh
+sudo chmod +x /usr/local/bin/sanoid_virt-unfreeze.sh
 #---
@ -402,7 +362,7 @@ conf_print_virt_thaw_all() {
 # /usr/local/bin/virt-thaw-all.sh
 # 1. Get running VM names, filtering out empty lines with awk
-RUNNING_VMS=$(virsh list --state-running --name | awk 'NF')
+RUNNING_VMS=$(/usr/bin/virsh list --state-running --name | awk 'NF')
 if [ -z "$RUNNING_VMS" ]; then
    echo "No running VMs found."
@ -415,11 +375,11 @@ for VM_NAME in $RUNNING_VMS; do
    # Use the native thaw command. 
    # It handles the guest agent communication for you.
-    if virsh domfsthaw "$VM_NAME" > /dev/null 2>&1; then
+    if /usr/bin/virsh domfsthaw "$VM_NAME" > /dev/null 2>&1; then
        echo "Successfully thawed $VM_NAME."
    else
        # If native fails, we capture the error for the user
-        ERROR=$(virsh domfsthaw "$VM_NAME" 2>&1)
+        ERROR=$(/usr/bin/virsh domfsthaw "$VM_NAME" 2>&1)
        echo "Error thawing $VM_NAME: $ERROR"
    fi
 done
@ -427,8 +387,8 @@ done
 echo "Finished processing all VMs."
 EOF
 }
-conf_print_virt_thaw_all | sudo tee /usr/local/bin/virt-thaw-all.sh
+conf_print_virt_thaw_all | sudo tee /usr/local/bin/sanoid_virt-thaw-all.sh
-sudo chmod +x /usr/local/bin/virt-thaw-all.sh
+sudo chmod +x /usr/local/bin/sanoid_virt-thaw-all.sh
 #---
@ -437,7 +397,7 @@ conf_print_virt_freeze-all() {
 #!/bin/bash
 # 1. Get running VM names, filtering out empty lines
-RUNNING_VMS=$(virsh list --state-running --name | awk 'NF')
+RUNNING_VMS=$(/usr/bin/virsh list --state-running --name | awk 'NF')
 if [ -z "$RUNNING_VMS" ]; then
    echo "No running VMs found."
@ -450,7 +410,7 @@ for VM_NAME in $RUNNING_VMS; do
    # Using the native virsh command is generally cleaner
    # It returns the number of frozen filesystems on success
-    if virsh domfsfreeze "$VM_NAME" > /dev/null 2>&1; then
+    if /usr/bin/virsh domfsfreeze "$VM_NAME" > /dev/null 2>&1; then
        echo "Successfully frozen $VM_NAME."
    else
        echo "Error: Could not freeze $VM_NAME. Check if QEMU Guest Agent is running."
@ -460,8 +420,8 @@ done
 echo "Finished processing all VMs."
 EOF
 }
-conf_print_virt_freeze-all | sudo tee /usr/local/bin/virt-freeze-all.sh
+conf_print_virt_freeze-all | sudo tee /usr/local/bin/sanoid_virt-freeze-all.sh
-sudo chmod +x /usr/local/bin/virt-freeze-all.sh
+sudo chmod +x /usr/local/bin/sanoid_virt-freeze-all.sh
 #---