Update and refine,

2026-03-16 00:36:04 +00:00 · 2026-03-16 00:36:04 +00:00 · f0a82d4b8e
parent 89efbba240
commit f0a82d4b8e
1 changed files with 231 additions and 271 deletions
--- a/020_sanoid.sh
+++ b/020_sanoid.sh
@ -13,22 +13,26 @@ echo "override them in /etc/sanoid/sanoid.conf"

 echo "Installed files:"
 cat <<-EOF
-	syncoid                     /usr/sbin/
-	sanoid                      /usr/sbin/
-	findoid                     /usr/sbin/
-	sanoid.defaults.conf        /usr/share/sanoid/
-	debian/sanoid-prune.service /lib/systemd/system
-	CHANGELIST /usr/share/doc/sanoid/changelog
+syncoid                     /usr/sbin/
+sanoid                      /usr/sbin/
+findoid                     /usr/sbin/
+sanoid.defaults.conf        /usr/share/sanoid/
+debian/sanoid-prune.service /lib/systemd/system
+CHANGELIST /usr/share/doc/sanoid/changelog
 EOF

+# Both sanoid and syncoid are oneshot processes so it makes little sense to
+# provide an init file, cron is just fine. In this case the systemd file is there
+# because systemd decided to manage cron tasks.
+#
 # Cronjob for non-systemd systems: every 15 minutes.
 # If you require a different interval, you will need to disable the
 # timer or the cron job according to your system configuration.
 conf_print_sanoid_cron() {
  cat <<-EOF
-		PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
+PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin

-		*/15 * * * * root [ -f /etc/sanoid/sanoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/sanoid --cron --quiet; fi
+*/15 * * * * root [ -f /etc/sanoid/sanoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/sanoid --cron --quiet; fi
 EOF
 }
 conf_print_sanoid_cron | sudo tee /etc/cron.d/sanoid
@ -36,9 +40,9 @@ conf_print_sanoid_cron | sudo tee /etc/cron.d/sanoid
 # Do the same for syncoid for backups.
 conf_print_syncoid_cron() {
  cat <<-EOF
-		PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
+PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin

-		*/15 * * * * root [ -f /etc/sanoid/syncoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/syncoid --cron --quiet; fi
+*/15 * * * * root [ -f /etc/sanoid/syncoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/syncoid --cron --quiet; fi
 EOF
 }
 # conf_print_syncoid_cron | sudo tee /etc/cron.d/syncoid
@ -48,283 +52,239 @@ sudo mkdir -p /etc/sanoid/

 conf_print_sanoid() {
  cat <<-EOF
-    [rpool/docker]
-    use_template = ignore
+#-- Organisational datasets
+# DATA
+[rpool/DATA]
+  use_template = ignore
+  recursive = yes
+  process_children_only = yes

-    [rpool/archive]
-    use_template = ignore
+# ROOT
+[rpool/ROOT]
+  use_template = ignore
+  recursive = yes
+  process_children_only = yes

-    [rpool/swap]
-    use_template = ignore
+#-- END

-    [rpool/tmp]
-    use_template = ignore
+# These datasets contain the docker zsh backing store "graph" (layers).
+# Just restore broken container with docker-compose down && docker-compose up 
+[rpool/docker]
+  use_template = ignore
+  recursive = yes

-    [rpool/ROOT/devuan-1]
-    use_template = root
-    recursive = yes
+# Docker persistent data
+[rpool/DATA/docker-volumes]
+  use_template = docker-persistent
+  recursive = yes

-    # rpool/ROOT/devuan-1/opt
-    # rpool/ROOT/devuan-1/usr
-    # rpool/ROOT/devuan-1/usr_local
-    # rpool/ROOT/devuan-1/usr_share
-    # rpool/ROOT/devuan-1/var
-    # rpool/ROOT/devuan-1/var_lib
+[rpool/archive]
+  use_template = ignore
+  recursive = yes

-  	# Specific override for Virtual Machines to use scripts
-    [rpool/ROOT/devuan-1/var_lib_virt]
-    use_template = root
-  	recursive = no
-    pre_snapshot_script = /usr/local/bin/virt-freeze-all.sh
-    post_snapshot_script = /usr/local/bin/virt-thaw-all.sh
+[rpool/swap]
+  use_template = ignore
+  recursive = yes

-  	# -- User Data --
-    [rpool/home]
-    use_template = production
-    recursive = yes
+[rpool/tmp]
+  use_template = ignore
+  recursive = yes

-    [rpool/space]
-    use_template = production
+[rpool/ROOT/devuan-1]
+  use_template = root
+  recursive = yes

-    #############################
-    # templates below this line #
-    #############################
+# rpool/ROOT/devuan-1/opt
+# rpool/ROOT/devuan-1/usr
+# rpool/ROOT/devuan-1/usr_local
+# rpool/ROOT/devuan-1/usr_share
+# rpool/ROOT/devuan-1/var
+# rpool/ROOT/devuan-1/var_lib

-    [template_production]
-    frequently = 0
-    hourly = 36
-    daily = 30
-    monthly = 3
-    yearly = 0
-    autosnap = yes
-    autoprune = yes
+# Specific override for Virtual Machines to use scripts
+[rpool/ROOT/devuan-1/var_lib_virt]
+  use_template = root
+  recursive = yes
+  pre_snapshot_script = /usr/local/bin/sanoid_virt-freeze-all.sh
+  post_snapshot_script = /usr/local/bin/sanoid_virt-thaw-all.sh

-    [template_root]
-    # Root changes fast; shorter history often suffices
-    hourly = 24
-    daily = 7
-    monthly = 1
-    yearly = 0
-    autosnap = yes
-    autoprune = yes
+# -- User Data --
+[rpool/home]
+  use_template = production
+  recursive = yes

-    [template_ignore]
-    autoprune = no
-    autosnap = no
-    monitor = no
+[rpool/space]
+  use_template = production

-		[template_backup]
-    autoprune = yes
-    frequently = 0
-    hourly = 30
-    daily = 90
-    monthly = 12
-    yearly = 0
+#############################
+# templates below this line #
+#############################

-    ### don't take new snapshots - snapshots on backup
-    ### datasets are replicated in from source, not
-    ### generated locally
-    autosnap = no
+[template_production]
+  frequently = 0
+  hourly = 36
+  daily = 30
+  monthly = 3
+  yearly = 0
+  autosnap = yes
+  autoprune = yes

-    ### monitor hourlies and dailies, but don't warn or
-    ### crit until they're over 48h old, since replication
-    ### is typically daily only
-    hourly_warn = 2880
-    hourly_crit = 3600
-    daily_warn = 48
-    daily_crit = 60
+[template_root]
+  # Root changes fast; shorter history often suffices
+  hourly = 24
+  daily = 7
+  monthly = 1
+  yearly = 0
+  autosnap = yes
+  autoprune = yes

-    [template_hotspare]
-    autoprune = yes
-    frequently = 0
-    hourly = 30
-    daily = 90
-    monthly = 3
-    yearly = 0
+[template_ignore]
+  autoprune = no
+  autosnap = no
+  monitor = no

-    ### don't take new snapshots - snapshots on backup
-    ### datasets are replicated in from source, not
-    ### generated locally
-    autosnap = no
+##############################
+# Docker Persistent Template #
+##############################
+[template_docker-persistent]
+  # Frequent snapshots for active databases/configs
+  frequently = 0
+  hourly = 24
+  daily = 7
+  monthly = 1
+  yearly = 0

-    ### monitor hourlies and dailies, but don't warn or
-    ### crit until they're over 4h old, since replication
-    ### is typically hourly only
-    hourly_warn = 4h
-    hourly_crit = 6h
-    daily_warn = 2d
-    daily_crit = 4d
+  # Safety checks
+  autosnap = yes
+  autoprune = yes

-    [template_scripts]
-    ### information about the snapshot will be supplied as environment variables,
-    ### see the README.md file for details about what is passed when.
-    ### run script before snapshot
-    pre_snapshot_script = /path/to/script.sh
-    ### run script after snapshot
-    post_snapshot_script = /path/to/script.sh
-    ### run script before pruning snapshot
-    pre_pruning_script = /path/to/script.sh
-    ### run script after pruning snapshot
-    pruning_script = /path/to/script.sh
-    ### don't take an inconsistent snapshot (skip if pre script fails)
-    #no_inconsistent_snapshot = yes
-    ### run post_snapshot_script when pre_snapshot_script is failing
-    #force_post_snapshot_script = yes
-    ### limit allowed execution time of scripts before continuing (<= 0: infinite)
-    script_timeout = 5
+  # Don't take a snapshot if the dataset hasn't changed
+  # (Saves metadata overhead)
+  # skip_hourless = yes
+  pre_snapshot_script = /usr/local/bin/sanoid_zfs-skip-empty.sh
+
+#-- END
+
+[template_backup]
+  autoprune = yes
+  frequently = 0
+  hourly = 30
+  daily = 90
+  monthly = 12
+  yearly = 0
+
+  ### don't take new snapshots - snapshots on backup
+  ### datasets are replicated in from source, not
+  ### generated locally
+  autosnap = no
+
+  ### monitor hourlies and dailies, but don't warn or
+  ### crit until they're over 48h old, since replication
+  ### is typically daily only
+  hourly_warn = 2880
+  hourly_crit = 3600
+  daily_warn = 48
+  daily_crit = 60
+
+#-- END
+
+[template_hotspare]
+	autoprune = yes
+	frequently = 0
+	hourly = 30
+	daily = 90
+	weekly = 4
+	monthly = 3
+	yearly = 0
+
+	### don't take new snapshots - snapshots on backup
+	### datasets are replicated in from source, not
+	### generated locally
+	autosnap = no
+
+	### monitor hourlies and dailies, but don't warn or
+	### crit until they're over 4h old, since replication
+	### is typically hourly only
+	hourly_warn = 4h
+	hourly_crit = 6h
+	daily_warn = 2d
+	daily_crit = 4d
 EOF
 }
 conf_print_sanoid | sudo tee /etc/sanoid/sanoid.conf

-# Both sanoid and synmcoid are oneshot processes so it makes little sense to
-# provide an init file, cron is just fine. In this case the systemd file is there
-# because systemd decided to manage cron tasks.
-# Generated using:
-# https://raw.githubusercontent.com/akhilvij/systemd-to-sysvinit-converter/master/converter.py
-# python2 converter /usr/src/sanoid-2.2.0/sanoid.service > sanoid
-conf_print_sanoid_init() {
-  cat <<-'EOF'
-    #!/bin/sh
-    ### BEGIN INIT INFO
-    # Provides: sanoid
-    # Required-Start: $syslog $local_fs $remote_fs 
-    # Required-Stop:  $syslog $local_fs $remote_fs 
-    # Default-Start: 2 3 4 5
-    # Default-Stop: 0 1 6
-    # Short-Description: Snapshot ZFS filesystems
-    ### END INIT INFO
+########################
+# Pre-snapshot scripts #
+########################

-    . /lib/lsb/init-functions
-    prog=sanoid
-    PIDFILE=/var/run/$prog.pid
-    DESC="Snapshot ZFS filesystems"
-    start() {
-      log_daemon_msg "Starting $DESC" "$prog"
-      start_daemon -p $PIDFILE /usr/sbin/sanoid --take-snapshots --verbose
-      if [ $? -ne 0 ]; then
-        log_end_msg 1
-        exit 1
-      fi
-      if [ $? -eq 0 ]; then
-        log_end_msg 0
-      fi
-      exit 0
-    }
+# In ZFS, even if no data has changed, creating a snapshot still consumes a
+# small amount of space for metadata and adds an entry to the ZFS history.
+# If you have hundreds of datasets being snapshotted every 15 minutes, this
+# "metadata bloat" can make commands like zfs list -t snapshot feel sluggish
+# over time. If you think this is an issue for ypu use zfs-skip-empty.sh as
+# a pre_snapshot_script
+conf_print_skip_empty() {
+  cat <<'EOF'
+#!/bin/bash
+ 
+# Usage: ./sanoid-threshold.sh <dataset> <threshold_size>
+# or
+# Add this to you /etc/sanoid.conf to fire this script.

-    stop() {
-      log_daemon_msg "Stopping $DESC" "$prog"
-      killproc -p $PIDFILE /usr/sbin/sanoid
-      if [ $? -ne 0 ]; then
-        log_end_msg 1
-        exit 1
-      fi
-      if [ $? -eq 0 ]; then
-        log_end_msg 0
-      fi
-    }
+# [tank/important_data]
+#     use_template = production
+#     # Only snapshot if more than 5MB changed
+#     pre_snapshot_script = /usr/local/bin/sanoid-threshold.sh 5M

-    force_reload() {
-      stop
-      start
-    }
+DATASET=$1
+RAW_THRESHOLD=$2

-    case "$1" in
-      start)
-        start
-  ;;
-      stop)
-        stop
-        ;;
-      force-reload)
-        force_reload
-        ;;
-      restart)
-        stop
-        start
-        ;;
+convert_to_bytes() {
+    local number=$(echo "$1" | grep -oE '^[0-9.]+')
+    local unit=$(echo "$1" | grep -oI '[KMGPT]' | tr '[:lower:]' '[:upper:]')

-      *)
-        echo "$Usage: $prog {start|stop|force-reload|restart}"
-        exit 2
+    case "$unit" in
+        K) awk "BEGIN { printf \"%.0f\", $number * 1024 }" ;;
+        M) awk "BEGIN { printf \"%.0f\", $number * 1024^2 }" ;;
+        G) awk "BEGIN { printf \"%.0f\", $number * 1024^3 }" ;;
+        T) awk "BEGIN { printf \"%.0f\", $number * 1024^4 }" ;;
+        *) printf "%.0f" "$number" ;;
    esac
+}
+
+if [[ -z "$DATASET" || -z "$RAW_THRESHOLD" ]]; then
+    logger -t sanoid "Threshold Error: Usage: $0 <dataset> <threshold>"
+    exit 1
+fi
+
+if ! zfs list -H "$DATASET" >/dev/null 2>&1; then
+    logger -t sanoid "Threshold Error: Dataset $DATASET not found."
+    exit 1
+fi
+
+THRESHOLD=$(convert_to_bytes "$RAW_THRESHOLD")
+WRITTEN_BYTES=$(zfs get -Hp -o value written "$DATASET")
+
+if [[ "$WRITTEN_BYTES" -lt "$THRESHOLD" ]]; then
+    WRITTEN_HUMAN=$(zfs get -H -o value written "$DATASET")
+    # Optional: Comment out the logger below if your logs get too noisy
+    logger -t sanoid "Skipping $DATASET: Written $WRITTEN_HUMAN < Threshold $RAW_THRESHOLD."
+    exit 1 
+fi
+
+exit 0
 EOF
 }
-# Sandoid doesn't ran as a daemon it runs vi cron
-# conf_print_sanoid_init | sudo tee /etc/init.d/sanoid
-# sudo chmod +x /etc/init.d/sanoid
+conf_print_skip_hourless | sudo tee /usr/local/bin/sanoid_zfs-skip-empty.sh

-# Generated using:
-# https://raw.githubusercontent.com/akhilvij/systemd-to-sysvinit-converter/master/converter.py
-# python2 converter /usr/src/sanoid-2.2.0/sanoid-prune.service > sanoid-prune
-conf_print_sanoid-prune_init() {
-  cat <<-'EOF'
-    #!/bin/sh
-    ### BEGIN INIT INFO
-    # Provides: sanoid-prune
-    # Required-Start: $syslog $local_fs $remote_fs 
-    # Required-Stop:  $syslog $local_fs $remote_fs 
-    # Short-Description: Prune ZFS snapshots
-    ### END INIT INFO
+chmod +x /usr/local/bin/sanoid_zfs-skip-empty.sh
+# VM Consistency (The "Freeze/Thaw" Logic)

-    . /lib/lsb/init-functions
-    prog=sanoid-prune
-    PIDFILE=/var/run/$prog.pid
-    DESC="Prune ZFS snapshots"
-    start() {
-      log_daemon_msg "Starting $DESC" "$prog"
-      start_daemon -p $PIDFILE /usr/sbin/sanoid --prune-snapshots --verbose
-      if [ $? -ne 0 ]; then
-        log_end_msg 1
-        exit 1
-      fi
-      if [ $? -eq 0 ]; then
-        log_end_msg 0
-      fi
-      exit 0
-    }
-
-    stop() {
-      log_daemon_msg "Stopping $DESC" "$prog"
-      killproc -p $PIDFILE /usr/sbin/sanoid
-      if [ $? -ne 0 ]; then
-        log_end_msg 1
-        exit 1
-      fi
-      if [ $? -eq 0 ]; then
-        log_end_msg 0
-      fi
-    }
-
-    force_reload() {
-      stop
-      start
-
-    }
-
-    case "$1" in
-      start)
-        start
-        ;;
-      stop)
-        stop
-        ;;
-      force-reload)
-        force_reload
-        ;;
-      restart)
-        stop
-        start
-        ;;
-
-      *)
-        echo "$Usage: $prog {start|stop|force-reload|restart}"
-        exit 2
-    esac
-EOF
-}
-# Sandoid doesn't ran as a daemon it runs vi cron
-# conf_print_sanoid-prune_init | sudo tee /etc/init.d/sanoid-prune
-# sudo chmod +x /etc/init.d/sanoid-prune
+# The inclusion of virt-freeze-all.sh and virt-thaw-all.sh to ensure data integrity:
+#   *     Pre-snapshot: virsh domfsfreeze tells the Guest OS (via qemu-guest-agent) to flush its write buffers and temporarily pause I/O.
+#   *     Snapshot: Sanoid takes an atomic ZFS snapshot.
+#   *     Post-snapshot: virsh domfsthaw resumes I/O.
+# Result: You get an "application-consistent" backup rather than a "crash-consistent" one.

 # Give sudo access to virsh or is part of the libvirt group.
 # qemu-guest-agent must be running in the vm
@ -341,7 +301,7 @@ if [ -z "$VM_NAME" ]; then
 fi

 # Check if the VM is running
-STATE=$(virsh domstate "$VM_NAME" 2>/dev/null)
+STATE=$(/usr/bin/virsh domstate "$VM_NAME" 2>/dev/null)
 if [ "$STATE" != "running" ]; then
    echo "VM $VM_NAME is not running or does not exist. Skipping freeze."
    exit 0
@ -349,7 +309,7 @@ fi

 echo "Freezing filesystems for $VM_NAME..."
 # domfsfreeze returns the number of frozen filesystems on success
-if virsh domfsfreeze "$VM_NAME" > /dev/null; then
+if /usr/bin/virsh domfsfreeze "$VM_NAME" > /dev/null; then
    echo "Successfully frozen $VM_NAME."
 else
    echo "Error: Failed to freeze $VM_NAME. Ensure qemu-guest-agent is active."
@ -357,8 +317,8 @@ else
 fi
 EOF
 }
-conf_print_virt_freeze | sudo tee /usr/local/bin/virt-freeze.sh
-sudo chmod +x /usr/local/bin/virt-freeze.sh
+conf_print_virt_freeze | sudo tee /usr/local/bin/sanoid_virt-freeze.sh
+sudo chmod +x /usr/local/bin/sanoid_virt-freeze.sh

 #---

@ -375,7 +335,7 @@ if [ -z "$VM_NAME" ]; then
 fi

 # Check if the VM is running
-STATE=$(virsh domstate "$VM_NAME" 2>/dev/null)
+STATE=$(/usr/bin/virsh domstate "$VM_NAME" 2>/dev/null)
 if [ "$STATE" != "running" ]; then
    echo "VM $VM_NAME is not running. Skipping unfreeze."
    exit 0
@ -383,7 +343,7 @@ fi

 echo "Thawing filesystems for $VM_NAME..."
 # domfsthaw returns the number of thawed filesystems on success
-if virsh domfsthaw "$VM_NAME" > /dev/null; then
+if /usr/bin/virsh domfsthaw "$VM_NAME" > /dev/null; then
    echo "Successfully thawed $VM_NAME."
 else
    echo "Error: Failed to thaw $VM_NAME."
@ -391,8 +351,8 @@ else
 fi
 EOF
 }
-conf_print_virt_unfreeze | sudo tee /usr/local/bin/virt-unfreeze.sh
-sudo chmod +x /usr/local/bin/virt-unfreeze.sh
+conf_print_virt_unfreeze | sudo tee /usr/local/bin/sanoid_virt-unfreeze.sh
+sudo chmod +x /usr/local/bin/sanoid_virt-unfreeze.sh

 #---

@ -402,7 +362,7 @@ conf_print_virt_thaw_all() {
 # /usr/local/bin/virt-thaw-all.sh

 # 1. Get running VM names, filtering out empty lines with awk
-RUNNING_VMS=$(virsh list --state-running --name | awk 'NF')
+RUNNING_VMS=$(/usr/bin/virsh list --state-running --name | awk 'NF')

 if [ -z "$RUNNING_VMS" ]; then
    echo "No running VMs found."
@ -415,11 +375,11 @@ for VM_NAME in $RUNNING_VMS; do

    # Use the native thaw command. 
    # It handles the guest agent communication for you.
-    if virsh domfsthaw "$VM_NAME" > /dev/null 2>&1; then
+    if /usr/bin/virsh domfsthaw "$VM_NAME" > /dev/null 2>&1; then
        echo "Successfully thawed $VM_NAME."
    else
        # If native fails, we capture the error for the user
-        ERROR=$(virsh domfsthaw "$VM_NAME" 2>&1)
+        ERROR=$(/usr/bin/virsh domfsthaw "$VM_NAME" 2>&1)
        echo "Error thawing $VM_NAME: $ERROR"
    fi
 done
@ -427,8 +387,8 @@ done
 echo "Finished processing all VMs."
 EOF
 }
-conf_print_virt_thaw_all | sudo tee /usr/local/bin/virt-thaw-all.sh
-sudo chmod +x /usr/local/bin/virt-thaw-all.sh
+conf_print_virt_thaw_all | sudo tee /usr/local/bin/sanoid_virt-thaw-all.sh
+sudo chmod +x /usr/local/bin/sanoid_virt-thaw-all.sh

 #---

@ -437,7 +397,7 @@ conf_print_virt_freeze-all() {
 #!/bin/bash

 # 1. Get running VM names, filtering out empty lines
-RUNNING_VMS=$(virsh list --state-running --name | awk 'NF')
+RUNNING_VMS=$(/usr/bin/virsh list --state-running --name | awk 'NF')

 if [ -z "$RUNNING_VMS" ]; then
    echo "No running VMs found."
@ -450,7 +410,7 @@ for VM_NAME in $RUNNING_VMS; do

    # Using the native virsh command is generally cleaner
    # It returns the number of frozen filesystems on success
-    if virsh domfsfreeze "$VM_NAME" > /dev/null 2>&1; then
+    if /usr/bin/virsh domfsfreeze "$VM_NAME" > /dev/null 2>&1; then
        echo "Successfully frozen $VM_NAME."
    else
        echo "Error: Could not freeze $VM_NAME. Check if QEMU Guest Agent is running."
@ -460,8 +420,8 @@ done
 echo "Finished processing all VMs."
 EOF
 }
-conf_print_virt_freeze-all | sudo tee /usr/local/bin/virt-freeze-all.sh
-sudo chmod +x /usr/local/bin/virt-freeze-all.sh
+conf_print_virt_freeze-all | sudo tee /usr/local/bin/sanoid_virt-freeze-all.sh
+sudo chmod +x /usr/local/bin/sanoid_virt-freeze-all.sh

 #---