Update and refine,

This commit is contained in:
cyteen 2026-03-16 00:36:04 +00:00
parent 89efbba240
commit f0a82d4b8e
1 changed files with 231 additions and 271 deletions

View File

@ -13,22 +13,26 @@ echo "override them in /etc/sanoid/sanoid.conf"
echo "Installed files:" echo "Installed files:"
cat <<-EOF cat <<-EOF
syncoid /usr/sbin/ syncoid /usr/sbin/
sanoid /usr/sbin/ sanoid /usr/sbin/
findoid /usr/sbin/ findoid /usr/sbin/
sanoid.defaults.conf /usr/share/sanoid/ sanoid.defaults.conf /usr/share/sanoid/
debian/sanoid-prune.service /lib/systemd/system debian/sanoid-prune.service /lib/systemd/system
CHANGELIST /usr/share/doc/sanoid/changelog CHANGELIST /usr/share/doc/sanoid/changelog
EOF EOF
# Both sanoid and syncoid are oneshot processes so it makes little sense to
# provide an init file, cron is just fine. In this case the systemd file is there
# because systemd decided to manage cron tasks.
#
# Cronjob for non-systemd systems: every 15 minutes. # Cronjob for non-systemd systems: every 15 minutes.
# If you require a different interval, you will need to disable the # If you require a different interval, you will need to disable the
# timer or the cron job according to your system configuration. # timer or the cron job according to your system configuration.
conf_print_sanoid_cron() { conf_print_sanoid_cron() {
cat <<-EOF cat <<-EOF
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
*/15 * * * * root [ -f /etc/sanoid/sanoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/sanoid --cron --quiet; fi */15 * * * * root [ -f /etc/sanoid/sanoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/sanoid --cron --quiet; fi
EOF EOF
} }
conf_print_sanoid_cron | sudo tee /etc/cron.d/sanoid conf_print_sanoid_cron | sudo tee /etc/cron.d/sanoid
@ -36,9 +40,9 @@ conf_print_sanoid_cron | sudo tee /etc/cron.d/sanoid
# Do the same for syncoid for backups. # Do the same for syncoid for backups.
conf_print_syncoid_cron() { conf_print_syncoid_cron() {
cat <<-EOF cat <<-EOF
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
*/15 * * * * root [ -f /etc/sanoid/syncoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/syncoid --cron --quiet; fi */15 * * * * root [ -f /etc/sanoid/syncoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/syncoid --cron --quiet; fi
EOF EOF
} }
# conf_print_syncoid_cron | sudo tee /etc/cron.d/syncoid # conf_print_syncoid_cron | sudo tee /etc/cron.d/syncoid
@ -48,49 +52,75 @@ sudo mkdir -p /etc/sanoid/
conf_print_sanoid() { conf_print_sanoid() {
cat <<-EOF cat <<-EOF
[rpool/docker] #-- Organisational datasets
# DATA
[rpool/DATA]
use_template = ignore use_template = ignore
recursive = yes
process_children_only = yes
[rpool/archive] # ROOT
[rpool/ROOT]
use_template = ignore use_template = ignore
recursive = yes
process_children_only = yes
[rpool/swap] #-- END
# These datasets contain the docker zsh backing store "graph" (layers).
# Just restore broken container with docker-compose down && docker-compose up
[rpool/docker]
use_template = ignore use_template = ignore
recursive = yes
[rpool/tmp] # Docker persistent data
[rpool/DATA/docker-volumes]
use_template = docker-persistent
recursive = yes
[rpool/archive]
use_template = ignore use_template = ignore
recursive = yes
[rpool/ROOT/devuan-1] [rpool/swap]
use_template = ignore
recursive = yes
[rpool/tmp]
use_template = ignore
recursive = yes
[rpool/ROOT/devuan-1]
use_template = root use_template = root
recursive = yes recursive = yes
# rpool/ROOT/devuan-1/opt # rpool/ROOT/devuan-1/opt
# rpool/ROOT/devuan-1/usr # rpool/ROOT/devuan-1/usr
# rpool/ROOT/devuan-1/usr_local # rpool/ROOT/devuan-1/usr_local
# rpool/ROOT/devuan-1/usr_share # rpool/ROOT/devuan-1/usr_share
# rpool/ROOT/devuan-1/var # rpool/ROOT/devuan-1/var
# rpool/ROOT/devuan-1/var_lib # rpool/ROOT/devuan-1/var_lib
# Specific override for Virtual Machines to use scripts # Specific override for Virtual Machines to use scripts
[rpool/ROOT/devuan-1/var_lib_virt] [rpool/ROOT/devuan-1/var_lib_virt]
use_template = root use_template = root
recursive = no recursive = yes
pre_snapshot_script = /usr/local/bin/virt-freeze-all.sh pre_snapshot_script = /usr/local/bin/sanoid_virt-freeze-all.sh
post_snapshot_script = /usr/local/bin/virt-thaw-all.sh post_snapshot_script = /usr/local/bin/sanoid_virt-thaw-all.sh
# -- User Data -- # -- User Data --
[rpool/home] [rpool/home]
use_template = production use_template = production
recursive = yes recursive = yes
[rpool/space] [rpool/space]
use_template = production use_template = production
############################# #############################
# templates below this line # # templates below this line #
############################# #############################
[template_production] [template_production]
frequently = 0 frequently = 0
hourly = 36 hourly = 36
daily = 30 daily = 30
@ -99,7 +129,7 @@ conf_print_sanoid() {
autosnap = yes autosnap = yes
autoprune = yes autoprune = yes
[template_root] [template_root]
# Root changes fast; shorter history often suffices # Root changes fast; shorter history often suffices
hourly = 24 hourly = 24
daily = 7 daily = 7
@ -108,12 +138,34 @@ conf_print_sanoid() {
autosnap = yes autosnap = yes
autoprune = yes autoprune = yes
[template_ignore] [template_ignore]
autoprune = no autoprune = no
autosnap = no autosnap = no
monitor = no monitor = no
[template_backup] ##############################
# Docker Persistent Template #
##############################
[template_docker-persistent]
# Frequent snapshots for active databases/configs
frequently = 0
hourly = 24
daily = 7
monthly = 1
yearly = 0
# Safety checks
autosnap = yes
autoprune = yes
# Don't take a snapshot if the dataset hasn't changed
# (Saves metadata overhead)
# skip_hourless = yes
pre_snapshot_script = /usr/local/bin/sanoid_zfs-skip-empty.sh
#-- END
[template_backup]
autoprune = yes autoprune = yes
frequently = 0 frequently = 0
hourly = 30 hourly = 30
@ -134,11 +186,14 @@ conf_print_sanoid() {
daily_warn = 48 daily_warn = 48
daily_crit = 60 daily_crit = 60
[template_hotspare] #-- END
[template_hotspare]
autoprune = yes autoprune = yes
frequently = 0 frequently = 0
hourly = 30 hourly = 30
daily = 90 daily = 90
weekly = 4
monthly = 3 monthly = 3
yearly = 0 yearly = 0
@ -154,177 +209,82 @@ conf_print_sanoid() {
hourly_crit = 6h hourly_crit = 6h
daily_warn = 2d daily_warn = 2d
daily_crit = 4d daily_crit = 4d
[template_scripts]
### information about the snapshot will be supplied as environment variables,
### see the README.md file for details about what is passed when.
### run script before snapshot
pre_snapshot_script = /path/to/script.sh
### run script after snapshot
post_snapshot_script = /path/to/script.sh
### run script before pruning snapshot
pre_pruning_script = /path/to/script.sh
### run script after pruning snapshot
pruning_script = /path/to/script.sh
### don't take an inconsistent snapshot (skip if pre script fails)
#no_inconsistent_snapshot = yes
### run post_snapshot_script when pre_snapshot_script is failing
#force_post_snapshot_script = yes
### limit allowed execution time of scripts before continuing (<= 0: infinite)
script_timeout = 5
EOF EOF
} }
conf_print_sanoid | sudo tee /etc/sanoid/sanoid.conf conf_print_sanoid | sudo tee /etc/sanoid/sanoid.conf
# Both sanoid and synmcoid are oneshot processes so it makes little sense to ########################
# provide an init file, cron is just fine. In this case the systemd file is there # Pre-snapshot scripts #
# because systemd decided to manage cron tasks. ########################
# Generated using:
# https://raw.githubusercontent.com/akhilvij/systemd-to-sysvinit-converter/master/converter.py
# python2 converter /usr/src/sanoid-2.2.0/sanoid.service > sanoid
conf_print_sanoid_init() {
cat <<-'EOF'
#!/bin/sh
### BEGIN INIT INFO
# Provides: sanoid
# Required-Start: $syslog $local_fs $remote_fs
# Required-Stop: $syslog $local_fs $remote_fs
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: Snapshot ZFS filesystems
### END INIT INFO
. /lib/lsb/init-functions # In ZFS, even if no data has changed, creating a snapshot still consumes a
prog=sanoid # small amount of space for metadata and adds an entry to the ZFS history.
PIDFILE=/var/run/$prog.pid # If you have hundreds of datasets being snapshotted every 15 minutes, this
DESC="Snapshot ZFS filesystems" # "metadata bloat" can make commands like zfs list -t snapshot feel sluggish
start() { # over time. If you think this is an issue for ypu use zfs-skip-empty.sh as
log_daemon_msg "Starting $DESC" "$prog" # a pre_snapshot_script
start_daemon -p $PIDFILE /usr/sbin/sanoid --take-snapshots --verbose conf_print_skip_empty() {
if [ $? -ne 0 ]; then cat <<'EOF'
log_end_msg 1 #!/bin/bash
exit 1
fi
if [ $? -eq 0 ]; then
log_end_msg 0
fi
exit 0
}
stop() { # Usage: ./sanoid-threshold.sh <dataset> <threshold_size>
log_daemon_msg "Stopping $DESC" "$prog" # or
killproc -p $PIDFILE /usr/sbin/sanoid # Add this to you /etc/sanoid.conf to fire this script.
if [ $? -ne 0 ]; then
log_end_msg 1
exit 1
fi
if [ $? -eq 0 ]; then
log_end_msg 0
fi
}
force_reload() { # [tank/important_data]
stop # use_template = production
start # # Only snapshot if more than 5MB changed
} # pre_snapshot_script = /usr/local/bin/sanoid-threshold.sh 5M
case "$1" in DATASET=$1
start) RAW_THRESHOLD=$2
start
;;
stop)
stop
;;
force-reload)
force_reload
;;
restart)
stop
start
;;
*) convert_to_bytes() {
echo "$Usage: $prog {start|stop|force-reload|restart}" local number=$(echo "$1" | grep -oE '^[0-9.]+')
exit 2 local unit=$(echo "$1" | grep -oI '[KMGPT]' | tr '[:lower:]' '[:upper:]')
case "$unit" in
K) awk "BEGIN { printf \"%.0f\", $number * 1024 }" ;;
M) awk "BEGIN { printf \"%.0f\", $number * 1024^2 }" ;;
G) awk "BEGIN { printf \"%.0f\", $number * 1024^3 }" ;;
T) awk "BEGIN { printf \"%.0f\", $number * 1024^4 }" ;;
*) printf "%.0f" "$number" ;;
esac esac
}
if [[ -z "$DATASET" || -z "$RAW_THRESHOLD" ]]; then
logger -t sanoid "Threshold Error: Usage: $0 <dataset> <threshold>"
exit 1
fi
if ! zfs list -H "$DATASET" >/dev/null 2>&1; then
logger -t sanoid "Threshold Error: Dataset $DATASET not found."
exit 1
fi
THRESHOLD=$(convert_to_bytes "$RAW_THRESHOLD")
WRITTEN_BYTES=$(zfs get -Hp -o value written "$DATASET")
if [[ "$WRITTEN_BYTES" -lt "$THRESHOLD" ]]; then
WRITTEN_HUMAN=$(zfs get -H -o value written "$DATASET")
# Optional: Comment out the logger below if your logs get too noisy
logger -t sanoid "Skipping $DATASET: Written $WRITTEN_HUMAN < Threshold $RAW_THRESHOLD."
exit 1
fi
exit 0
EOF EOF
} }
# Sandoid doesn't ran as a daemon it runs vi cron conf_print_skip_hourless | sudo tee /usr/local/bin/sanoid_zfs-skip-empty.sh
# conf_print_sanoid_init | sudo tee /etc/init.d/sanoid
# sudo chmod +x /etc/init.d/sanoid
# Generated using: chmod +x /usr/local/bin/sanoid_zfs-skip-empty.sh
# https://raw.githubusercontent.com/akhilvij/systemd-to-sysvinit-converter/master/converter.py # VM Consistency (The "Freeze/Thaw" Logic)
# python2 converter /usr/src/sanoid-2.2.0/sanoid-prune.service > sanoid-prune
conf_print_sanoid-prune_init() {
cat <<-'EOF'
#!/bin/sh
### BEGIN INIT INFO
# Provides: sanoid-prune
# Required-Start: $syslog $local_fs $remote_fs
# Required-Stop: $syslog $local_fs $remote_fs
# Short-Description: Prune ZFS snapshots
### END INIT INFO
. /lib/lsb/init-functions # The inclusion of virt-freeze-all.sh and virt-thaw-all.sh to ensure data integrity:
prog=sanoid-prune # * Pre-snapshot: virsh domfsfreeze tells the Guest OS (via qemu-guest-agent) to flush its write buffers and temporarily pause I/O.
PIDFILE=/var/run/$prog.pid # * Snapshot: Sanoid takes an atomic ZFS snapshot.
DESC="Prune ZFS snapshots" # * Post-snapshot: virsh domfsthaw resumes I/O.
start() { # Result: You get an "application-consistent" backup rather than a "crash-consistent" one.
log_daemon_msg "Starting $DESC" "$prog"
start_daemon -p $PIDFILE /usr/sbin/sanoid --prune-snapshots --verbose
if [ $? -ne 0 ]; then
log_end_msg 1
exit 1
fi
if [ $? -eq 0 ]; then
log_end_msg 0
fi
exit 0
}
stop() {
log_daemon_msg "Stopping $DESC" "$prog"
killproc -p $PIDFILE /usr/sbin/sanoid
if [ $? -ne 0 ]; then
log_end_msg 1
exit 1
fi
if [ $? -eq 0 ]; then
log_end_msg 0
fi
}
force_reload() {
stop
start
}
case "$1" in
start)
start
;;
stop)
stop
;;
force-reload)
force_reload
;;
restart)
stop
start
;;
*)
echo "$Usage: $prog {start|stop|force-reload|restart}"
exit 2
esac
EOF
}
# Sandoid doesn't ran as a daemon it runs vi cron
# conf_print_sanoid-prune_init | sudo tee /etc/init.d/sanoid-prune
# sudo chmod +x /etc/init.d/sanoid-prune
# Give sudo access to virsh or is part of the libvirt group. # Give sudo access to virsh or is part of the libvirt group.
# qemu-guest-agent must be running in the vm # qemu-guest-agent must be running in the vm
@ -341,7 +301,7 @@ if [ -z "$VM_NAME" ]; then
fi fi
# Check if the VM is running # Check if the VM is running
STATE=$(virsh domstate "$VM_NAME" 2>/dev/null) STATE=$(/usr/bin/virsh domstate "$VM_NAME" 2>/dev/null)
if [ "$STATE" != "running" ]; then if [ "$STATE" != "running" ]; then
echo "VM $VM_NAME is not running or does not exist. Skipping freeze." echo "VM $VM_NAME is not running or does not exist. Skipping freeze."
exit 0 exit 0
@ -349,7 +309,7 @@ fi
echo "Freezing filesystems for $VM_NAME..." echo "Freezing filesystems for $VM_NAME..."
# domfsfreeze returns the number of frozen filesystems on success # domfsfreeze returns the number of frozen filesystems on success
if virsh domfsfreeze "$VM_NAME" > /dev/null; then if /usr/bin/virsh domfsfreeze "$VM_NAME" > /dev/null; then
echo "Successfully frozen $VM_NAME." echo "Successfully frozen $VM_NAME."
else else
echo "Error: Failed to freeze $VM_NAME. Ensure qemu-guest-agent is active." echo "Error: Failed to freeze $VM_NAME. Ensure qemu-guest-agent is active."
@ -357,8 +317,8 @@ else
fi fi
EOF EOF
} }
conf_print_virt_freeze | sudo tee /usr/local/bin/virt-freeze.sh conf_print_virt_freeze | sudo tee /usr/local/bin/sanoid_virt-freeze.sh
sudo chmod +x /usr/local/bin/virt-freeze.sh sudo chmod +x /usr/local/bin/sanoid_virt-freeze.sh
#--- #---
@ -375,7 +335,7 @@ if [ -z "$VM_NAME" ]; then
fi fi
# Check if the VM is running # Check if the VM is running
STATE=$(virsh domstate "$VM_NAME" 2>/dev/null) STATE=$(/usr/bin/virsh domstate "$VM_NAME" 2>/dev/null)
if [ "$STATE" != "running" ]; then if [ "$STATE" != "running" ]; then
echo "VM $VM_NAME is not running. Skipping unfreeze." echo "VM $VM_NAME is not running. Skipping unfreeze."
exit 0 exit 0
@ -383,7 +343,7 @@ fi
echo "Thawing filesystems for $VM_NAME..." echo "Thawing filesystems for $VM_NAME..."
# domfsthaw returns the number of thawed filesystems on success # domfsthaw returns the number of thawed filesystems on success
if virsh domfsthaw "$VM_NAME" > /dev/null; then if /usr/bin/virsh domfsthaw "$VM_NAME" > /dev/null; then
echo "Successfully thawed $VM_NAME." echo "Successfully thawed $VM_NAME."
else else
echo "Error: Failed to thaw $VM_NAME." echo "Error: Failed to thaw $VM_NAME."
@ -391,8 +351,8 @@ else
fi fi
EOF EOF
} }
conf_print_virt_unfreeze | sudo tee /usr/local/bin/virt-unfreeze.sh conf_print_virt_unfreeze | sudo tee /usr/local/bin/sanoid_virt-unfreeze.sh
sudo chmod +x /usr/local/bin/virt-unfreeze.sh sudo chmod +x /usr/local/bin/sanoid_virt-unfreeze.sh
#--- #---
@ -402,7 +362,7 @@ conf_print_virt_thaw_all() {
# /usr/local/bin/virt-thaw-all.sh # /usr/local/bin/virt-thaw-all.sh
# 1. Get running VM names, filtering out empty lines with awk # 1. Get running VM names, filtering out empty lines with awk
RUNNING_VMS=$(virsh list --state-running --name | awk 'NF') RUNNING_VMS=$(/usr/bin/virsh list --state-running --name | awk 'NF')
if [ -z "$RUNNING_VMS" ]; then if [ -z "$RUNNING_VMS" ]; then
echo "No running VMs found." echo "No running VMs found."
@ -415,11 +375,11 @@ for VM_NAME in $RUNNING_VMS; do
# Use the native thaw command. # Use the native thaw command.
# It handles the guest agent communication for you. # It handles the guest agent communication for you.
if virsh domfsthaw "$VM_NAME" > /dev/null 2>&1; then if /usr/bin/virsh domfsthaw "$VM_NAME" > /dev/null 2>&1; then
echo "Successfully thawed $VM_NAME." echo "Successfully thawed $VM_NAME."
else else
# If native fails, we capture the error for the user # If native fails, we capture the error for the user
ERROR=$(virsh domfsthaw "$VM_NAME" 2>&1) ERROR=$(/usr/bin/virsh domfsthaw "$VM_NAME" 2>&1)
echo "Error thawing $VM_NAME: $ERROR" echo "Error thawing $VM_NAME: $ERROR"
fi fi
done done
@ -427,8 +387,8 @@ done
echo "Finished processing all VMs." echo "Finished processing all VMs."
EOF EOF
} }
conf_print_virt_thaw_all | sudo tee /usr/local/bin/virt-thaw-all.sh conf_print_virt_thaw_all | sudo tee /usr/local/bin/sanoid_virt-thaw-all.sh
sudo chmod +x /usr/local/bin/virt-thaw-all.sh sudo chmod +x /usr/local/bin/sanoid_virt-thaw-all.sh
#--- #---
@ -437,7 +397,7 @@ conf_print_virt_freeze-all() {
#!/bin/bash #!/bin/bash
# 1. Get running VM names, filtering out empty lines # 1. Get running VM names, filtering out empty lines
RUNNING_VMS=$(virsh list --state-running --name | awk 'NF') RUNNING_VMS=$(/usr/bin/virsh list --state-running --name | awk 'NF')
if [ -z "$RUNNING_VMS" ]; then if [ -z "$RUNNING_VMS" ]; then
echo "No running VMs found." echo "No running VMs found."
@ -450,7 +410,7 @@ for VM_NAME in $RUNNING_VMS; do
# Using the native virsh command is generally cleaner # Using the native virsh command is generally cleaner
# It returns the number of frozen filesystems on success # It returns the number of frozen filesystems on success
if virsh domfsfreeze "$VM_NAME" > /dev/null 2>&1; then if /usr/bin/virsh domfsfreeze "$VM_NAME" > /dev/null 2>&1; then
echo "Successfully frozen $VM_NAME." echo "Successfully frozen $VM_NAME."
else else
echo "Error: Could not freeze $VM_NAME. Check if QEMU Guest Agent is running." echo "Error: Could not freeze $VM_NAME. Check if QEMU Guest Agent is running."
@ -460,8 +420,8 @@ done
echo "Finished processing all VMs." echo "Finished processing all VMs."
EOF EOF
} }
conf_print_virt_freeze-all | sudo tee /usr/local/bin/virt-freeze-all.sh conf_print_virt_freeze-all | sudo tee /usr/local/bin/sanoid_virt-freeze-all.sh
sudo chmod +x /usr/local/bin/virt-freeze-all.sh sudo chmod +x /usr/local/bin/sanoid_virt-freeze-all.sh
#--- #---