Update and refine,

This commit is contained in:
cyteen 2026-03-16 00:36:04 +00:00
parent 89efbba240
commit f0a82d4b8e
1 changed files with 231 additions and 271 deletions

View File

@ -13,22 +13,26 @@ echo "override them in /etc/sanoid/sanoid.conf"
echo "Installed files:" echo "Installed files:"
cat <<-EOF cat <<-EOF
syncoid /usr/sbin/ syncoid /usr/sbin/
sanoid /usr/sbin/ sanoid /usr/sbin/
findoid /usr/sbin/ findoid /usr/sbin/
sanoid.defaults.conf /usr/share/sanoid/ sanoid.defaults.conf /usr/share/sanoid/
debian/sanoid-prune.service /lib/systemd/system debian/sanoid-prune.service /lib/systemd/system
CHANGELIST /usr/share/doc/sanoid/changelog CHANGELIST /usr/share/doc/sanoid/changelog
EOF EOF
# Both sanoid and syncoid are oneshot processes so it makes little sense to
# provide an init file, cron is just fine. In this case the systemd file is there
# because systemd decided to manage cron tasks.
#
# Cronjob for non-systemd systems: every 15 minutes. # Cronjob for non-systemd systems: every 15 minutes.
# If you require a different interval, you will need to disable the # If you require a different interval, you will need to disable the
# timer or the cron job according to your system configuration. # timer or the cron job according to your system configuration.
conf_print_sanoid_cron() { conf_print_sanoid_cron() {
cat <<-EOF cat <<-EOF
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
*/15 * * * * root [ -f /etc/sanoid/sanoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/sanoid --cron --quiet; fi */15 * * * * root [ -f /etc/sanoid/sanoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/sanoid --cron --quiet; fi
EOF EOF
} }
conf_print_sanoid_cron | sudo tee /etc/cron.d/sanoid conf_print_sanoid_cron | sudo tee /etc/cron.d/sanoid
@ -36,9 +40,9 @@ conf_print_sanoid_cron | sudo tee /etc/cron.d/sanoid
# Do the same for syncoid for backups. # Do the same for syncoid for backups.
conf_print_syncoid_cron() { conf_print_syncoid_cron() {
cat <<-EOF cat <<-EOF
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
*/15 * * * * root [ -f /etc/sanoid/syncoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/syncoid --cron --quiet; fi */15 * * * * root [ -f /etc/sanoid/syncoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/syncoid --cron --quiet; fi
EOF EOF
} }
# conf_print_syncoid_cron | sudo tee /etc/cron.d/syncoid # conf_print_syncoid_cron | sudo tee /etc/cron.d/syncoid
@ -48,283 +52,239 @@ sudo mkdir -p /etc/sanoid/
conf_print_sanoid() { conf_print_sanoid() {
cat <<-EOF cat <<-EOF
[rpool/docker] #-- Organisational datasets
use_template = ignore # DATA
[rpool/DATA]
use_template = ignore
recursive = yes
process_children_only = yes
[rpool/archive] # ROOT
use_template = ignore [rpool/ROOT]
use_template = ignore
recursive = yes
process_children_only = yes
[rpool/swap] #-- END
use_template = ignore
[rpool/tmp] # These datasets contain the docker zsh backing store "graph" (layers).
use_template = ignore # Just restore broken container with docker-compose down && docker-compose up
[rpool/docker]
use_template = ignore
recursive = yes
[rpool/ROOT/devuan-1] # Docker persistent data
use_template = root [rpool/DATA/docker-volumes]
recursive = yes use_template = docker-persistent
recursive = yes
# rpool/ROOT/devuan-1/opt [rpool/archive]
# rpool/ROOT/devuan-1/usr use_template = ignore
# rpool/ROOT/devuan-1/usr_local recursive = yes
# rpool/ROOT/devuan-1/usr_share
# rpool/ROOT/devuan-1/var
# rpool/ROOT/devuan-1/var_lib
# Specific override for Virtual Machines to use scripts [rpool/swap]
[rpool/ROOT/devuan-1/var_lib_virt] use_template = ignore
use_template = root recursive = yes
recursive = no
pre_snapshot_script = /usr/local/bin/virt-freeze-all.sh
post_snapshot_script = /usr/local/bin/virt-thaw-all.sh
# -- User Data -- [rpool/tmp]
[rpool/home] use_template = ignore
use_template = production recursive = yes
recursive = yes
[rpool/space] [rpool/ROOT/devuan-1]
use_template = production use_template = root
recursive = yes
############################# # rpool/ROOT/devuan-1/opt
# templates below this line # # rpool/ROOT/devuan-1/usr
############################# # rpool/ROOT/devuan-1/usr_local
# rpool/ROOT/devuan-1/usr_share
# rpool/ROOT/devuan-1/var
# rpool/ROOT/devuan-1/var_lib
[template_production] # Specific override for Virtual Machines to use scripts
frequently = 0 [rpool/ROOT/devuan-1/var_lib_virt]
hourly = 36 use_template = root
daily = 30 recursive = yes
monthly = 3 pre_snapshot_script = /usr/local/bin/sanoid_virt-freeze-all.sh
yearly = 0 post_snapshot_script = /usr/local/bin/sanoid_virt-thaw-all.sh
autosnap = yes
autoprune = yes
[template_root] # -- User Data --
# Root changes fast; shorter history often suffices [rpool/home]
hourly = 24 use_template = production
daily = 7 recursive = yes
monthly = 1
yearly = 0
autosnap = yes
autoprune = yes
[template_ignore] [rpool/space]
autoprune = no use_template = production
autosnap = no
monitor = no
[template_backup] #############################
autoprune = yes # templates below this line #
frequently = 0 #############################
hourly = 30
daily = 90
monthly = 12
yearly = 0
### don't take new snapshots - snapshots on backup [template_production]
### datasets are replicated in from source, not frequently = 0
### generated locally hourly = 36
autosnap = no daily = 30
monthly = 3
yearly = 0
autosnap = yes
autoprune = yes
### monitor hourlies and dailies, but don't warn or [template_root]
### crit until they're over 48h old, since replication # Root changes fast; shorter history often suffices
### is typically daily only hourly = 24
hourly_warn = 2880 daily = 7
hourly_crit = 3600 monthly = 1
daily_warn = 48 yearly = 0
daily_crit = 60 autosnap = yes
autoprune = yes
[template_hotspare] [template_ignore]
autoprune = yes autoprune = no
frequently = 0 autosnap = no
hourly = 30 monitor = no
daily = 90
monthly = 3
yearly = 0
### don't take new snapshots - snapshots on backup ##############################
### datasets are replicated in from source, not # Docker Persistent Template #
### generated locally ##############################
autosnap = no [template_docker-persistent]
# Frequent snapshots for active databases/configs
frequently = 0
hourly = 24
daily = 7
monthly = 1
yearly = 0
### monitor hourlies and dailies, but don't warn or # Safety checks
### crit until they're over 4h old, since replication autosnap = yes
### is typically hourly only autoprune = yes
hourly_warn = 4h
hourly_crit = 6h
daily_warn = 2d
daily_crit = 4d
[template_scripts] # Don't take a snapshot if the dataset hasn't changed
### information about the snapshot will be supplied as environment variables, # (Saves metadata overhead)
### see the README.md file for details about what is passed when. # skip_hourless = yes
### run script before snapshot pre_snapshot_script = /usr/local/bin/sanoid_zfs-skip-empty.sh
pre_snapshot_script = /path/to/script.sh
### run script after snapshot #-- END
post_snapshot_script = /path/to/script.sh
### run script before pruning snapshot [template_backup]
pre_pruning_script = /path/to/script.sh autoprune = yes
### run script after pruning snapshot frequently = 0
pruning_script = /path/to/script.sh hourly = 30
### don't take an inconsistent snapshot (skip if pre script fails) daily = 90
#no_inconsistent_snapshot = yes monthly = 12
### run post_snapshot_script when pre_snapshot_script is failing yearly = 0
#force_post_snapshot_script = yes
### limit allowed execution time of scripts before continuing (<= 0: infinite) ### don't take new snapshots - snapshots on backup
script_timeout = 5 ### datasets are replicated in from source, not
### generated locally
autosnap = no
### monitor hourlies and dailies, but don't warn or
### crit until they're over 48h old, since replication
### is typically daily only
hourly_warn = 2880
hourly_crit = 3600
daily_warn = 48
daily_crit = 60
#-- END
[template_hotspare]
autoprune = yes
frequently = 0
hourly = 30
daily = 90
weekly = 4
monthly = 3
yearly = 0
### don't take new snapshots - snapshots on backup
### datasets are replicated in from source, not
### generated locally
autosnap = no
### monitor hourlies and dailies, but don't warn or
### crit until they're over 4h old, since replication
### is typically hourly only
hourly_warn = 4h
hourly_crit = 6h
daily_warn = 2d
daily_crit = 4d
EOF EOF
} }
conf_print_sanoid | sudo tee /etc/sanoid/sanoid.conf conf_print_sanoid | sudo tee /etc/sanoid/sanoid.conf
# Both sanoid and synmcoid are oneshot processes so it makes little sense to ########################
# provide an init file, cron is just fine. In this case the systemd file is there # Pre-snapshot scripts #
# because systemd decided to manage cron tasks. ########################
# Generated using:
# https://raw.githubusercontent.com/akhilvij/systemd-to-sysvinit-converter/master/converter.py
# python2 converter /usr/src/sanoid-2.2.0/sanoid.service > sanoid
conf_print_sanoid_init() {
cat <<-'EOF'
#!/bin/sh
### BEGIN INIT INFO
# Provides: sanoid
# Required-Start: $syslog $local_fs $remote_fs
# Required-Stop: $syslog $local_fs $remote_fs
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: Snapshot ZFS filesystems
### END INIT INFO
. /lib/lsb/init-functions # In ZFS, even if no data has changed, creating a snapshot still consumes a
prog=sanoid # small amount of space for metadata and adds an entry to the ZFS history.
PIDFILE=/var/run/$prog.pid # If you have hundreds of datasets being snapshotted every 15 minutes, this
DESC="Snapshot ZFS filesystems" # "metadata bloat" can make commands like zfs list -t snapshot feel sluggish
start() { # over time. If you think this is an issue for ypu use zfs-skip-empty.sh as
log_daemon_msg "Starting $DESC" "$prog" # a pre_snapshot_script
start_daemon -p $PIDFILE /usr/sbin/sanoid --take-snapshots --verbose conf_print_skip_empty() {
if [ $? -ne 0 ]; then cat <<'EOF'
log_end_msg 1 #!/bin/bash
exit 1
fi # Usage: ./sanoid-threshold.sh <dataset> <threshold_size>
if [ $? -eq 0 ]; then # or
log_end_msg 0 # Add this to you /etc/sanoid.conf to fire this script.
fi
exit 0
}
stop() { # [tank/important_data]
log_daemon_msg "Stopping $DESC" "$prog" # use_template = production
killproc -p $PIDFILE /usr/sbin/sanoid # # Only snapshot if more than 5MB changed
if [ $? -ne 0 ]; then # pre_snapshot_script = /usr/local/bin/sanoid-threshold.sh 5M
log_end_msg 1
exit 1
fi
if [ $? -eq 0 ]; then
log_end_msg 0
fi
}
force_reload() { DATASET=$1
stop RAW_THRESHOLD=$2
start
}
case "$1" in convert_to_bytes() {
start) local number=$(echo "$1" | grep -oE '^[0-9.]+')
start local unit=$(echo "$1" | grep -oI '[KMGPT]' | tr '[:lower:]' '[:upper:]')
;;
stop)
stop
;;
force-reload)
force_reload
;;
restart)
stop
start
;;
*) case "$unit" in
echo "$Usage: $prog {start|stop|force-reload|restart}" K) awk "BEGIN { printf \"%.0f\", $number * 1024 }" ;;
exit 2 M) awk "BEGIN { printf \"%.0f\", $number * 1024^2 }" ;;
G) awk "BEGIN { printf \"%.0f\", $number * 1024^3 }" ;;
T) awk "BEGIN { printf \"%.0f\", $number * 1024^4 }" ;;
*) printf "%.0f" "$number" ;;
esac esac
}
if [[ -z "$DATASET" || -z "$RAW_THRESHOLD" ]]; then
logger -t sanoid "Threshold Error: Usage: $0 <dataset> <threshold>"
exit 1
fi
if ! zfs list -H "$DATASET" >/dev/null 2>&1; then
logger -t sanoid "Threshold Error: Dataset $DATASET not found."
exit 1
fi
THRESHOLD=$(convert_to_bytes "$RAW_THRESHOLD")
WRITTEN_BYTES=$(zfs get -Hp -o value written "$DATASET")
if [[ "$WRITTEN_BYTES" -lt "$THRESHOLD" ]]; then
WRITTEN_HUMAN=$(zfs get -H -o value written "$DATASET")
# Optional: Comment out the logger below if your logs get too noisy
logger -t sanoid "Skipping $DATASET: Written $WRITTEN_HUMAN < Threshold $RAW_THRESHOLD."
exit 1
fi
exit 0
EOF EOF
} }
# Sandoid doesn't ran as a daemon it runs vi cron conf_print_skip_hourless | sudo tee /usr/local/bin/sanoid_zfs-skip-empty.sh
# conf_print_sanoid_init | sudo tee /etc/init.d/sanoid
# sudo chmod +x /etc/init.d/sanoid
# Generated using: chmod +x /usr/local/bin/sanoid_zfs-skip-empty.sh
# https://raw.githubusercontent.com/akhilvij/systemd-to-sysvinit-converter/master/converter.py # VM Consistency (The "Freeze/Thaw" Logic)
# python2 converter /usr/src/sanoid-2.2.0/sanoid-prune.service > sanoid-prune
conf_print_sanoid-prune_init() {
cat <<-'EOF'
#!/bin/sh
### BEGIN INIT INFO
# Provides: sanoid-prune
# Required-Start: $syslog $local_fs $remote_fs
# Required-Stop: $syslog $local_fs $remote_fs
# Short-Description: Prune ZFS snapshots
### END INIT INFO
. /lib/lsb/init-functions # The inclusion of virt-freeze-all.sh and virt-thaw-all.sh to ensure data integrity:
prog=sanoid-prune # * Pre-snapshot: virsh domfsfreeze tells the Guest OS (via qemu-guest-agent) to flush its write buffers and temporarily pause I/O.
PIDFILE=/var/run/$prog.pid # * Snapshot: Sanoid takes an atomic ZFS snapshot.
DESC="Prune ZFS snapshots" # * Post-snapshot: virsh domfsthaw resumes I/O.
start() { # Result: You get an "application-consistent" backup rather than a "crash-consistent" one.
log_daemon_msg "Starting $DESC" "$prog"
start_daemon -p $PIDFILE /usr/sbin/sanoid --prune-snapshots --verbose
if [ $? -ne 0 ]; then
log_end_msg 1
exit 1
fi
if [ $? -eq 0 ]; then
log_end_msg 0
fi
exit 0
}
stop() {
log_daemon_msg "Stopping $DESC" "$prog"
killproc -p $PIDFILE /usr/sbin/sanoid
if [ $? -ne 0 ]; then
log_end_msg 1
exit 1
fi
if [ $? -eq 0 ]; then
log_end_msg 0
fi
}
force_reload() {
stop
start
}
case "$1" in
start)
start
;;
stop)
stop
;;
force-reload)
force_reload
;;
restart)
stop
start
;;
*)
echo "$Usage: $prog {start|stop|force-reload|restart}"
exit 2
esac
EOF
}
# Sandoid doesn't ran as a daemon it runs vi cron
# conf_print_sanoid-prune_init | sudo tee /etc/init.d/sanoid-prune
# sudo chmod +x /etc/init.d/sanoid-prune
# Give sudo access to virsh or is part of the libvirt group. # Give sudo access to virsh or is part of the libvirt group.
# qemu-guest-agent must be running in the vm # qemu-guest-agent must be running in the vm
@ -341,7 +301,7 @@ if [ -z "$VM_NAME" ]; then
fi fi
# Check if the VM is running # Check if the VM is running
STATE=$(virsh domstate "$VM_NAME" 2>/dev/null) STATE=$(/usr/bin/virsh domstate "$VM_NAME" 2>/dev/null)
if [ "$STATE" != "running" ]; then if [ "$STATE" != "running" ]; then
echo "VM $VM_NAME is not running or does not exist. Skipping freeze." echo "VM $VM_NAME is not running or does not exist. Skipping freeze."
exit 0 exit 0
@ -349,7 +309,7 @@ fi
echo "Freezing filesystems for $VM_NAME..." echo "Freezing filesystems for $VM_NAME..."
# domfsfreeze returns the number of frozen filesystems on success # domfsfreeze returns the number of frozen filesystems on success
if virsh domfsfreeze "$VM_NAME" > /dev/null; then if /usr/bin/virsh domfsfreeze "$VM_NAME" > /dev/null; then
echo "Successfully frozen $VM_NAME." echo "Successfully frozen $VM_NAME."
else else
echo "Error: Failed to freeze $VM_NAME. Ensure qemu-guest-agent is active." echo "Error: Failed to freeze $VM_NAME. Ensure qemu-guest-agent is active."
@ -357,8 +317,8 @@ else
fi fi
EOF EOF
} }
conf_print_virt_freeze | sudo tee /usr/local/bin/virt-freeze.sh conf_print_virt_freeze | sudo tee /usr/local/bin/sanoid_virt-freeze.sh
sudo chmod +x /usr/local/bin/virt-freeze.sh sudo chmod +x /usr/local/bin/sanoid_virt-freeze.sh
#--- #---
@ -375,7 +335,7 @@ if [ -z "$VM_NAME" ]; then
fi fi
# Check if the VM is running # Check if the VM is running
STATE=$(virsh domstate "$VM_NAME" 2>/dev/null) STATE=$(/usr/bin/virsh domstate "$VM_NAME" 2>/dev/null)
if [ "$STATE" != "running" ]; then if [ "$STATE" != "running" ]; then
echo "VM $VM_NAME is not running. Skipping unfreeze." echo "VM $VM_NAME is not running. Skipping unfreeze."
exit 0 exit 0
@ -383,7 +343,7 @@ fi
echo "Thawing filesystems for $VM_NAME..." echo "Thawing filesystems for $VM_NAME..."
# domfsthaw returns the number of thawed filesystems on success # domfsthaw returns the number of thawed filesystems on success
if virsh domfsthaw "$VM_NAME" > /dev/null; then if /usr/bin/virsh domfsthaw "$VM_NAME" > /dev/null; then
echo "Successfully thawed $VM_NAME." echo "Successfully thawed $VM_NAME."
else else
echo "Error: Failed to thaw $VM_NAME." echo "Error: Failed to thaw $VM_NAME."
@ -391,8 +351,8 @@ else
fi fi
EOF EOF
} }
conf_print_virt_unfreeze | sudo tee /usr/local/bin/virt-unfreeze.sh conf_print_virt_unfreeze | sudo tee /usr/local/bin/sanoid_virt-unfreeze.sh
sudo chmod +x /usr/local/bin/virt-unfreeze.sh sudo chmod +x /usr/local/bin/sanoid_virt-unfreeze.sh
#--- #---
@ -402,7 +362,7 @@ conf_print_virt_thaw_all() {
# /usr/local/bin/virt-thaw-all.sh # /usr/local/bin/virt-thaw-all.sh
# 1. Get running VM names, filtering out empty lines with awk # 1. Get running VM names, filtering out empty lines with awk
RUNNING_VMS=$(virsh list --state-running --name | awk 'NF') RUNNING_VMS=$(/usr/bin/virsh list --state-running --name | awk 'NF')
if [ -z "$RUNNING_VMS" ]; then if [ -z "$RUNNING_VMS" ]; then
echo "No running VMs found." echo "No running VMs found."
@ -415,11 +375,11 @@ for VM_NAME in $RUNNING_VMS; do
# Use the native thaw command. # Use the native thaw command.
# It handles the guest agent communication for you. # It handles the guest agent communication for you.
if virsh domfsthaw "$VM_NAME" > /dev/null 2>&1; then if /usr/bin/virsh domfsthaw "$VM_NAME" > /dev/null 2>&1; then
echo "Successfully thawed $VM_NAME." echo "Successfully thawed $VM_NAME."
else else
# If native fails, we capture the error for the user # If native fails, we capture the error for the user
ERROR=$(virsh domfsthaw "$VM_NAME" 2>&1) ERROR=$(/usr/bin/virsh domfsthaw "$VM_NAME" 2>&1)
echo "Error thawing $VM_NAME: $ERROR" echo "Error thawing $VM_NAME: $ERROR"
fi fi
done done
@ -427,8 +387,8 @@ done
echo "Finished processing all VMs." echo "Finished processing all VMs."
EOF EOF
} }
conf_print_virt_thaw_all | sudo tee /usr/local/bin/virt-thaw-all.sh conf_print_virt_thaw_all | sudo tee /usr/local/bin/sanoid_virt-thaw-all.sh
sudo chmod +x /usr/local/bin/virt-thaw-all.sh sudo chmod +x /usr/local/bin/sanoid_virt-thaw-all.sh
#--- #---
@ -437,7 +397,7 @@ conf_print_virt_freeze-all() {
#!/bin/bash #!/bin/bash
# 1. Get running VM names, filtering out empty lines # 1. Get running VM names, filtering out empty lines
RUNNING_VMS=$(virsh list --state-running --name | awk 'NF') RUNNING_VMS=$(/usr/bin/virsh list --state-running --name | awk 'NF')
if [ -z "$RUNNING_VMS" ]; then if [ -z "$RUNNING_VMS" ]; then
echo "No running VMs found." echo "No running VMs found."
@ -450,7 +410,7 @@ for VM_NAME in $RUNNING_VMS; do
# Using the native virsh command is generally cleaner # Using the native virsh command is generally cleaner
# It returns the number of frozen filesystems on success # It returns the number of frozen filesystems on success
if virsh domfsfreeze "$VM_NAME" > /dev/null 2>&1; then if /usr/bin/virsh domfsfreeze "$VM_NAME" > /dev/null 2>&1; then
echo "Successfully frozen $VM_NAME." echo "Successfully frozen $VM_NAME."
else else
echo "Error: Could not freeze $VM_NAME. Check if QEMU Guest Agent is running." echo "Error: Could not freeze $VM_NAME. Check if QEMU Guest Agent is running."
@ -460,8 +420,8 @@ done
echo "Finished processing all VMs." echo "Finished processing all VMs."
EOF EOF
} }
conf_print_virt_freeze-all | sudo tee /usr/local/bin/virt-freeze-all.sh conf_print_virt_freeze-all | sudo tee /usr/local/bin/sanoid_virt-freeze-all.sh
sudo chmod +x /usr/local/bin/virt-freeze-all.sh sudo chmod +x /usr/local/bin/sanoid_virt-freeze-all.sh
#--- #---