Update and refine,

2026-03-16 00:36:04 +00:00 · 2026-03-16 00:36:04 +00:00 · f0a82d4b8e
parent 89efbba240
commit f0a82d4b8e
1 changed files with 231 additions and 271 deletions
--- a/020_sanoid.sh
+++ b/020_sanoid.sh
@ -13,22 +13,26 @@ echo "override them in /etc/sanoid/sanoid.conf"
 echo "Installed files:"
 cat <<-EOF
-	syncoid                     /usr/sbin/
+syncoid                     /usr/sbin/
-	sanoid                      /usr/sbin/
+sanoid                      /usr/sbin/
-	findoid                     /usr/sbin/
+findoid                     /usr/sbin/
-	sanoid.defaults.conf        /usr/share/sanoid/
+sanoid.defaults.conf        /usr/share/sanoid/
-	debian/sanoid-prune.service /lib/systemd/system
+debian/sanoid-prune.service /lib/systemd/system
-	CHANGELIST /usr/share/doc/sanoid/changelog
+CHANGELIST /usr/share/doc/sanoid/changelog
 EOF
 # Both sanoid and syncoid are oneshot processes so it makes little sense to
 # provide an init file, cron is just fine. In this case the systemd file is there
 # because systemd decided to manage cron tasks.
 #
 # Cronjob for non-systemd systems: every 15 minutes.
 # If you require a different interval, you will need to disable the
 # timer or the cron job according to your system configuration.
 conf_print_sanoid_cron() {
  cat <<-EOF
-		PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
+PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
-		*/15 * * * * root [ -f /etc/sanoid/sanoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/sanoid --cron --quiet; fi
+*/15 * * * * root [ -f /etc/sanoid/sanoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/sanoid --cron --quiet; fi
 EOF
 }
 conf_print_sanoid_cron | sudo tee /etc/cron.d/sanoid
@ -36,9 +40,9 @@ conf_print_sanoid_cron | sudo tee /etc/cron.d/sanoid
 # Do the same for syncoid for backups.
 conf_print_syncoid_cron() {
  cat <<-EOF
-		PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
+PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
-		*/15 * * * * root [ -f /etc/sanoid/syncoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/syncoid --cron --quiet; fi
+*/15 * * * * root [ -f /etc/sanoid/syncoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/syncoid --cron --quiet; fi
 EOF
 }
 # conf_print_syncoid_cron | sudo tee /etc/cron.d/syncoid
@ -48,283 +52,239 @@ sudo mkdir -p /etc/sanoid/
 conf_print_sanoid() {
  cat <<-EOF
-    [rpool/docker]
+#-- Organisational datasets
-    use_template = ignore
+# DATA
 [rpool/DATA]
  use_template = ignore
  recursive = yes
  process_children_only = yes
-    [rpool/archive]
+# ROOT
-    use_template = ignore
+[rpool/ROOT]
  use_template = ignore
  recursive = yes
  process_children_only = yes
-    [rpool/swap]
+#-- END
    use_template = ignore
-    [rpool/tmp]
+# These datasets contain the docker zsh backing store "graph" (layers).
-    use_template = ignore
+# Just restore broken container with docker-compose down && docker-compose up 
 [rpool/docker]
  use_template = ignore
  recursive = yes
-    [rpool/ROOT/devuan-1]
+# Docker persistent data
-    use_template = root
+[rpool/DATA/docker-volumes]
-    recursive = yes
+  use_template = docker-persistent
  recursive = yes
-    # rpool/ROOT/devuan-1/opt
+[rpool/archive]
-    # rpool/ROOT/devuan-1/usr
+  use_template = ignore
-    # rpool/ROOT/devuan-1/usr_local
+  recursive = yes
    # rpool/ROOT/devuan-1/usr_share
    # rpool/ROOT/devuan-1/var
    # rpool/ROOT/devuan-1/var_lib
-  	# Specific override for Virtual Machines to use scripts
+[rpool/swap]
-    [rpool/ROOT/devuan-1/var_lib_virt]
+  use_template = ignore
-    use_template = root
+  recursive = yes
  	recursive = no
    pre_snapshot_script = /usr/local/bin/virt-freeze-all.sh
    post_snapshot_script = /usr/local/bin/virt-thaw-all.sh
-  	# -- User Data --
+[rpool/tmp]
-    [rpool/home]
+  use_template = ignore
-    use_template = production
+  recursive = yes
    recursive = yes
-    [rpool/space]
+[rpool/ROOT/devuan-1]
-    use_template = production
+  use_template = root
  recursive = yes
-    #############################
+# rpool/ROOT/devuan-1/opt
-    # templates below this line #
+# rpool/ROOT/devuan-1/usr
-    #############################
+# rpool/ROOT/devuan-1/usr_local
 # rpool/ROOT/devuan-1/usr_share
 # rpool/ROOT/devuan-1/var
 # rpool/ROOT/devuan-1/var_lib
-    [template_production]
+# Specific override for Virtual Machines to use scripts
-    frequently = 0
+[rpool/ROOT/devuan-1/var_lib_virt]
-    hourly = 36
+  use_template = root
-    daily = 30
+  recursive = yes
-    monthly = 3
+  pre_snapshot_script = /usr/local/bin/sanoid_virt-freeze-all.sh
-    yearly = 0
+  post_snapshot_script = /usr/local/bin/sanoid_virt-thaw-all.sh
    autosnap = yes
    autoprune = yes
-    [template_root]
+# -- User Data --
-    # Root changes fast; shorter history often suffices
+[rpool/home]
-    hourly = 24
+  use_template = production
-    daily = 7
+  recursive = yes
    monthly = 1
    yearly = 0
    autosnap = yes
    autoprune = yes
-    [template_ignore]
+[rpool/space]
-    autoprune = no
+  use_template = production
    autosnap = no
    monitor = no
-		[template_backup]
+#############################
-    autoprune = yes
+# templates below this line #
-    frequently = 0
+#############################
    hourly = 30
    daily = 90
    monthly = 12
    yearly = 0
-    ### don't take new snapshots - snapshots on backup
+[template_production]
-    ### datasets are replicated in from source, not
+  frequently = 0
-    ### generated locally
+  hourly = 36
-    autosnap = no
+  daily = 30
  monthly = 3
  yearly = 0
  autosnap = yes
  autoprune = yes
-    ### monitor hourlies and dailies, but don't warn or
+[template_root]
-    ### crit until they're over 48h old, since replication
+  # Root changes fast; shorter history often suffices
-    ### is typically daily only
+  hourly = 24
-    hourly_warn = 2880
+  daily = 7
-    hourly_crit = 3600
+  monthly = 1
-    daily_warn = 48
+  yearly = 0
-    daily_crit = 60
+  autosnap = yes
  autoprune = yes
-    [template_hotspare]
+[template_ignore]
-    autoprune = yes
+  autoprune = no
-    frequently = 0
+  autosnap = no
-    hourly = 30
+  monitor = no
    daily = 90
    monthly = 3
    yearly = 0
-    ### don't take new snapshots - snapshots on backup
+##############################
-    ### datasets are replicated in from source, not
+# Docker Persistent Template #
-    ### generated locally
+##############################
-    autosnap = no
+[template_docker-persistent]
  # Frequent snapshots for active databases/configs
  frequently = 0
  hourly = 24
  daily = 7
  monthly = 1
  yearly = 0
-    ### monitor hourlies and dailies, but don't warn or
+  # Safety checks
-    ### crit until they're over 4h old, since replication
+  autosnap = yes
-    ### is typically hourly only
+  autoprune = yes
    hourly_warn = 4h
    hourly_crit = 6h
    daily_warn = 2d
    daily_crit = 4d
-    [template_scripts]
+  # Don't take a snapshot if the dataset hasn't changed
-    ### information about the snapshot will be supplied as environment variables,
+  # (Saves metadata overhead)
-    ### see the README.md file for details about what is passed when.
+  # skip_hourless = yes
-    ### run script before snapshot
+  pre_snapshot_script = /usr/local/bin/sanoid_zfs-skip-empty.sh
-    pre_snapshot_script = /path/to/script.sh
+
-    ### run script after snapshot
+#-- END
-    post_snapshot_script = /path/to/script.sh
+
-    ### run script before pruning snapshot
+[template_backup]
-    pre_pruning_script = /path/to/script.sh
+  autoprune = yes
-    ### run script after pruning snapshot
+  frequently = 0
-    pruning_script = /path/to/script.sh
+  hourly = 30
-    ### don't take an inconsistent snapshot (skip if pre script fails)
+  daily = 90
-    #no_inconsistent_snapshot = yes
+  monthly = 12
-    ### run post_snapshot_script when pre_snapshot_script is failing
+  yearly = 0
-    #force_post_snapshot_script = yes
+
-    ### limit allowed execution time of scripts before continuing (<= 0: infinite)
+  ### don't take new snapshots - snapshots on backup
-    script_timeout = 5
+  ### datasets are replicated in from source, not
  ### generated locally
  autosnap = no
  ### monitor hourlies and dailies, but don't warn or
  ### crit until they're over 48h old, since replication
  ### is typically daily only
  hourly_warn = 2880
  hourly_crit = 3600
  daily_warn = 48
  daily_crit = 60
 #-- END
 [template_hotspare]
 	autoprune = yes
 	frequently = 0
 	hourly = 30
 	daily = 90
 	weekly = 4
 	monthly = 3
 	yearly = 0
 	### don't take new snapshots - snapshots on backup
 	### datasets are replicated in from source, not
 	### generated locally
 	autosnap = no
 	### monitor hourlies and dailies, but don't warn or
 	### crit until they're over 4h old, since replication
 	### is typically hourly only
 	hourly_warn = 4h
 	hourly_crit = 6h
 	daily_warn = 2d
 	daily_crit = 4d
 EOF
 }
 conf_print_sanoid | sudo tee /etc/sanoid/sanoid.conf
-# Both sanoid and synmcoid are oneshot processes so it makes little sense to
+########################
-# provide an init file, cron is just fine. In this case the systemd file is there
+# Pre-snapshot scripts #
-# because systemd decided to manage cron tasks.
+########################
 # Generated using:
 # https://raw.githubusercontent.com/akhilvij/systemd-to-sysvinit-converter/master/converter.py
 # python2 converter /usr/src/sanoid-2.2.0/sanoid.service > sanoid
 conf_print_sanoid_init() {
  cat <<-'EOF'
    #!/bin/sh
    ### BEGIN INIT INFO
    # Provides: sanoid
    # Required-Start: $syslog $local_fs $remote_fs 
    # Required-Stop:  $syslog $local_fs $remote_fs 
    # Default-Start: 2 3 4 5
    # Default-Stop: 0 1 6
    # Short-Description: Snapshot ZFS filesystems
    ### END INIT INFO
-    . /lib/lsb/init-functions
+# In ZFS, even if no data has changed, creating a snapshot still consumes a
-    prog=sanoid
+# small amount of space for metadata and adds an entry to the ZFS history.
-    PIDFILE=/var/run/$prog.pid
+# If you have hundreds of datasets being snapshotted every 15 minutes, this
-    DESC="Snapshot ZFS filesystems"
+# "metadata bloat" can make commands like zfs list -t snapshot feel sluggish
-    start() {
+# over time. If you think this is an issue for ypu use zfs-skip-empty.sh as
-      log_daemon_msg "Starting $DESC" "$prog"
+# a pre_snapshot_script
-      start_daemon -p $PIDFILE /usr/sbin/sanoid --take-snapshots --verbose
+conf_print_skip_empty() {
-      if [ $? -ne 0 ]; then
+  cat <<'EOF'
-        log_end_msg 1
+#!/bin/bash
-        exit 1
+ 
-      fi
+# Usage: ./sanoid-threshold.sh <dataset> <threshold_size>
-      if [ $? -eq 0 ]; then
+# or
-        log_end_msg 0
+# Add this to you /etc/sanoid.conf to fire this script.
      fi
      exit 0
    }
-    stop() {
+# [tank/important_data]
-      log_daemon_msg "Stopping $DESC" "$prog"
+#     use_template = production
-      killproc -p $PIDFILE /usr/sbin/sanoid
+#     # Only snapshot if more than 5MB changed
-      if [ $? -ne 0 ]; then
+#     pre_snapshot_script = /usr/local/bin/sanoid-threshold.sh 5M
        log_end_msg 1
        exit 1
      fi
      if [ $? -eq 0 ]; then
        log_end_msg 0
      fi
    }
-    force_reload() {
+DATASET=$1
-      stop
+RAW_THRESHOLD=$2
      start
    }
-    case "$1" in
+convert_to_bytes() {
-      start)
+    local number=$(echo "$1" | grep -oE '^[0-9.]+')
-        start
+    local unit=$(echo "$1" | grep -oI '[KMGPT]' | tr '[:lower:]' '[:upper:]')
  ;;
      stop)
        stop
        ;;
      force-reload)
        force_reload
        ;;
      restart)
        stop
        start
        ;;
-      *)
+    case "$unit" in
-        echo "$Usage: $prog {start|stop|force-reload|restart}"
+        K) awk "BEGIN { printf \"%.0f\", $number * 1024 }" ;;
-        exit 2
+        M) awk "BEGIN { printf \"%.0f\", $number * 1024^2 }" ;;
        G) awk "BEGIN { printf \"%.0f\", $number * 1024^3 }" ;;
        T) awk "BEGIN { printf \"%.0f\", $number * 1024^4 }" ;;
        *) printf "%.0f" "$number" ;;
    esac
 }
 if [[ -z "$DATASET" || -z "$RAW_THRESHOLD" ]]; then
    logger -t sanoid "Threshold Error: Usage: $0 <dataset> <threshold>"
    exit 1
 fi
 if ! zfs list -H "$DATASET" >/dev/null 2>&1; then
    logger -t sanoid "Threshold Error: Dataset $DATASET not found."
    exit 1
 fi
 THRESHOLD=$(convert_to_bytes "$RAW_THRESHOLD")
 WRITTEN_BYTES=$(zfs get -Hp -o value written "$DATASET")
 if [[ "$WRITTEN_BYTES" -lt "$THRESHOLD" ]]; then
    WRITTEN_HUMAN=$(zfs get -H -o value written "$DATASET")
    # Optional: Comment out the logger below if your logs get too noisy
    logger -t sanoid "Skipping $DATASET: Written $WRITTEN_HUMAN < Threshold $RAW_THRESHOLD."
    exit 1 
 fi
 exit 0
 EOF
 }
-# Sandoid doesn't ran as a daemon it runs vi cron
+conf_print_skip_hourless | sudo tee /usr/local/bin/sanoid_zfs-skip-empty.sh
 # conf_print_sanoid_init | sudo tee /etc/init.d/sanoid
 # sudo chmod +x /etc/init.d/sanoid
-# Generated using:
+chmod +x /usr/local/bin/sanoid_zfs-skip-empty.sh
-# https://raw.githubusercontent.com/akhilvij/systemd-to-sysvinit-converter/master/converter.py
+# VM Consistency (The "Freeze/Thaw" Logic)
 # python2 converter /usr/src/sanoid-2.2.0/sanoid-prune.service > sanoid-prune
 conf_print_sanoid-prune_init() {
  cat <<-'EOF'
    #!/bin/sh
    ### BEGIN INIT INFO
    # Provides: sanoid-prune
    # Required-Start: $syslog $local_fs $remote_fs 
    # Required-Stop:  $syslog $local_fs $remote_fs 
    # Short-Description: Prune ZFS snapshots
    ### END INIT INFO
-    . /lib/lsb/init-functions
+# The inclusion of virt-freeze-all.sh and virt-thaw-all.sh to ensure data integrity:
-    prog=sanoid-prune
+#   *     Pre-snapshot: virsh domfsfreeze tells the Guest OS (via qemu-guest-agent) to flush its write buffers and temporarily pause I/O.
-    PIDFILE=/var/run/$prog.pid
+#   *     Snapshot: Sanoid takes an atomic ZFS snapshot.
-    DESC="Prune ZFS snapshots"
+#   *     Post-snapshot: virsh domfsthaw resumes I/O.
-    start() {
+# Result: You get an "application-consistent" backup rather than a "crash-consistent" one.
      log_daemon_msg "Starting $DESC" "$prog"
      start_daemon -p $PIDFILE /usr/sbin/sanoid --prune-snapshots --verbose
      if [ $? -ne 0 ]; then
        log_end_msg 1
        exit 1
      fi
      if [ $? -eq 0 ]; then
        log_end_msg 0
      fi
      exit 0
    }
    stop() {
      log_daemon_msg "Stopping $DESC" "$prog"
      killproc -p $PIDFILE /usr/sbin/sanoid
      if [ $? -ne 0 ]; then
        log_end_msg 1
        exit 1
      fi
      if [ $? -eq 0 ]; then
        log_end_msg 0
      fi
    }
    force_reload() {
      stop
      start
    }
    case "$1" in
      start)
        start
        ;;
      stop)
        stop
        ;;
      force-reload)
        force_reload
        ;;
      restart)
        stop
        start
        ;;
      *)
        echo "$Usage: $prog {start|stop|force-reload|restart}"
        exit 2
    esac
 EOF
 }
 # Sandoid doesn't ran as a daemon it runs vi cron
 # conf_print_sanoid-prune_init | sudo tee /etc/init.d/sanoid-prune
 # sudo chmod +x /etc/init.d/sanoid-prune
 # Give sudo access to virsh or is part of the libvirt group.
 # qemu-guest-agent must be running in the vm
@ -341,7 +301,7 @@ if [ -z "$VM_NAME" ]; then
 fi
 # Check if the VM is running
-STATE=$(virsh domstate "$VM_NAME" 2>/dev/null)
+STATE=$(/usr/bin/virsh domstate "$VM_NAME" 2>/dev/null)
 if [ "$STATE" != "running" ]; then
    echo "VM $VM_NAME is not running or does not exist. Skipping freeze."
    exit 0
@ -349,7 +309,7 @@ fi
 echo "Freezing filesystems for $VM_NAME..."
 # domfsfreeze returns the number of frozen filesystems on success
-if virsh domfsfreeze "$VM_NAME" > /dev/null; then
+if /usr/bin/virsh domfsfreeze "$VM_NAME" > /dev/null; then
    echo "Successfully frozen $VM_NAME."
 else
    echo "Error: Failed to freeze $VM_NAME. Ensure qemu-guest-agent is active."
@ -357,8 +317,8 @@ else
 fi
 EOF
 }
-conf_print_virt_freeze | sudo tee /usr/local/bin/virt-freeze.sh
+conf_print_virt_freeze | sudo tee /usr/local/bin/sanoid_virt-freeze.sh
-sudo chmod +x /usr/local/bin/virt-freeze.sh
+sudo chmod +x /usr/local/bin/sanoid_virt-freeze.sh
 #---
@ -375,7 +335,7 @@ if [ -z "$VM_NAME" ]; then
 fi
 # Check if the VM is running
-STATE=$(virsh domstate "$VM_NAME" 2>/dev/null)
+STATE=$(/usr/bin/virsh domstate "$VM_NAME" 2>/dev/null)
 if [ "$STATE" != "running" ]; then
    echo "VM $VM_NAME is not running. Skipping unfreeze."
    exit 0
@ -383,7 +343,7 @@ fi
 echo "Thawing filesystems for $VM_NAME..."
 # domfsthaw returns the number of thawed filesystems on success
-if virsh domfsthaw "$VM_NAME" > /dev/null; then
+if /usr/bin/virsh domfsthaw "$VM_NAME" > /dev/null; then
    echo "Successfully thawed $VM_NAME."
 else
    echo "Error: Failed to thaw $VM_NAME."
@ -391,8 +351,8 @@ else
 fi
 EOF
 }
-conf_print_virt_unfreeze | sudo tee /usr/local/bin/virt-unfreeze.sh
+conf_print_virt_unfreeze | sudo tee /usr/local/bin/sanoid_virt-unfreeze.sh
-sudo chmod +x /usr/local/bin/virt-unfreeze.sh
+sudo chmod +x /usr/local/bin/sanoid_virt-unfreeze.sh
 #---
@ -402,7 +362,7 @@ conf_print_virt_thaw_all() {
 # /usr/local/bin/virt-thaw-all.sh
 # 1. Get running VM names, filtering out empty lines with awk
-RUNNING_VMS=$(virsh list --state-running --name | awk 'NF')
+RUNNING_VMS=$(/usr/bin/virsh list --state-running --name | awk 'NF')
 if [ -z "$RUNNING_VMS" ]; then
    echo "No running VMs found."
@ -415,11 +375,11 @@ for VM_NAME in $RUNNING_VMS; do
    # Use the native thaw command. 
    # It handles the guest agent communication for you.
-    if virsh domfsthaw "$VM_NAME" > /dev/null 2>&1; then
+    if /usr/bin/virsh domfsthaw "$VM_NAME" > /dev/null 2>&1; then
        echo "Successfully thawed $VM_NAME."
    else
        # If native fails, we capture the error for the user
-        ERROR=$(virsh domfsthaw "$VM_NAME" 2>&1)
+        ERROR=$(/usr/bin/virsh domfsthaw "$VM_NAME" 2>&1)
        echo "Error thawing $VM_NAME: $ERROR"
    fi
 done
@ -427,8 +387,8 @@ done
 echo "Finished processing all VMs."
 EOF
 }
-conf_print_virt_thaw_all | sudo tee /usr/local/bin/virt-thaw-all.sh
+conf_print_virt_thaw_all | sudo tee /usr/local/bin/sanoid_virt-thaw-all.sh
-sudo chmod +x /usr/local/bin/virt-thaw-all.sh
+sudo chmod +x /usr/local/bin/sanoid_virt-thaw-all.sh
 #---
@ -437,7 +397,7 @@ conf_print_virt_freeze-all() {
 #!/bin/bash
 # 1. Get running VM names, filtering out empty lines
-RUNNING_VMS=$(virsh list --state-running --name | awk 'NF')
+RUNNING_VMS=$(/usr/bin/virsh list --state-running --name | awk 'NF')
 if [ -z "$RUNNING_VMS" ]; then
    echo "No running VMs found."
@ -450,7 +410,7 @@ for VM_NAME in $RUNNING_VMS; do
    # Using the native virsh command is generally cleaner
    # It returns the number of frozen filesystems on success
-    if virsh domfsfreeze "$VM_NAME" > /dev/null 2>&1; then
+    if /usr/bin/virsh domfsfreeze "$VM_NAME" > /dev/null 2>&1; then
        echo "Successfully frozen $VM_NAME."
    else
        echo "Error: Could not freeze $VM_NAME. Check if QEMU Guest Agent is running."
@ -460,8 +420,8 @@ done
 echo "Finished processing all VMs."
 EOF
 }
-conf_print_virt_freeze-all | sudo tee /usr/local/bin/virt-freeze-all.sh
+conf_print_virt_freeze-all | sudo tee /usr/local/bin/sanoid_virt-freeze-all.sh
-sudo chmod +x /usr/local/bin/virt-freeze-all.sh
+sudo chmod +x /usr/local/bin/sanoid_virt-freeze-all.sh
 #---