Update and refine,

This commit is contained in:
cyteen 2026-03-16 00:36:04 +00:00
parent 89efbba240
commit f0a82d4b8e
1 changed files with 231 additions and 271 deletions

View File

@ -13,22 +13,26 @@ echo "override them in /etc/sanoid/sanoid.conf"
echo "Installed files:"
cat <<-EOF
syncoid /usr/sbin/
sanoid /usr/sbin/
findoid /usr/sbin/
sanoid.defaults.conf /usr/share/sanoid/
debian/sanoid-prune.service /lib/systemd/system
CHANGELIST /usr/share/doc/sanoid/changelog
syncoid /usr/sbin/
sanoid /usr/sbin/
findoid /usr/sbin/
sanoid.defaults.conf /usr/share/sanoid/
debian/sanoid-prune.service /lib/systemd/system
CHANGELIST /usr/share/doc/sanoid/changelog
EOF
# Both sanoid and syncoid are oneshot processes so it makes little sense to
# provide an init file, cron is just fine. In this case the systemd file is there
# because systemd decided to manage cron tasks.
#
# Cronjob for non-systemd systems: every 15 minutes.
# If you require a different interval, you will need to disable the
# timer or the cron job according to your system configuration.
conf_print_sanoid_cron() {
cat <<-EOF
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
*/15 * * * * root [ -f /etc/sanoid/sanoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/sanoid --cron --quiet; fi
*/15 * * * * root [ -f /etc/sanoid/sanoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/sanoid --cron --quiet; fi
EOF
}
conf_print_sanoid_cron | sudo tee /etc/cron.d/sanoid
@ -36,9 +40,9 @@ conf_print_sanoid_cron | sudo tee /etc/cron.d/sanoid
# Do the same for syncoid for backups.
conf_print_syncoid_cron() {
cat <<-EOF
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
*/15 * * * * root [ -f /etc/sanoid/syncoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/syncoid --cron --quiet; fi
*/15 * * * * root [ -f /etc/sanoid/syncoid.conf ] && if [ ! -d /run/systemd/system ]; then TZ=UTC /usr/sbin/syncoid --cron --quiet; fi
EOF
}
# conf_print_syncoid_cron | sudo tee /etc/cron.d/syncoid
@ -48,283 +52,239 @@ sudo mkdir -p /etc/sanoid/
conf_print_sanoid() {
cat <<-EOF
[rpool/docker]
use_template = ignore
#-- Organisational datasets
# DATA
[rpool/DATA]
use_template = ignore
recursive = yes
process_children_only = yes
[rpool/archive]
use_template = ignore
# ROOT
[rpool/ROOT]
use_template = ignore
recursive = yes
process_children_only = yes
[rpool/swap]
use_template = ignore
#-- END
[rpool/tmp]
use_template = ignore
# These datasets contain the docker zsh backing store "graph" (layers).
# Just restore broken container with docker-compose down && docker-compose up
[rpool/docker]
use_template = ignore
recursive = yes
[rpool/ROOT/devuan-1]
use_template = root
recursive = yes
# Docker persistent data
[rpool/DATA/docker-volumes]
use_template = docker-persistent
recursive = yes
# rpool/ROOT/devuan-1/opt
# rpool/ROOT/devuan-1/usr
# rpool/ROOT/devuan-1/usr_local
# rpool/ROOT/devuan-1/usr_share
# rpool/ROOT/devuan-1/var
# rpool/ROOT/devuan-1/var_lib
[rpool/archive]
use_template = ignore
recursive = yes
# Specific override for Virtual Machines to use scripts
[rpool/ROOT/devuan-1/var_lib_virt]
use_template = root
recursive = no
pre_snapshot_script = /usr/local/bin/virt-freeze-all.sh
post_snapshot_script = /usr/local/bin/virt-thaw-all.sh
[rpool/swap]
use_template = ignore
recursive = yes
# -- User Data --
[rpool/home]
use_template = production
recursive = yes
[rpool/tmp]
use_template = ignore
recursive = yes
[rpool/space]
use_template = production
[rpool/ROOT/devuan-1]
use_template = root
recursive = yes
#############################
# templates below this line #
#############################
# rpool/ROOT/devuan-1/opt
# rpool/ROOT/devuan-1/usr
# rpool/ROOT/devuan-1/usr_local
# rpool/ROOT/devuan-1/usr_share
# rpool/ROOT/devuan-1/var
# rpool/ROOT/devuan-1/var_lib
[template_production]
frequently = 0
hourly = 36
daily = 30
monthly = 3
yearly = 0
autosnap = yes
autoprune = yes
# Specific override for Virtual Machines to use scripts
[rpool/ROOT/devuan-1/var_lib_virt]
use_template = root
recursive = yes
pre_snapshot_script = /usr/local/bin/sanoid_virt-freeze-all.sh
post_snapshot_script = /usr/local/bin/sanoid_virt-thaw-all.sh
[template_root]
# Root changes fast; shorter history often suffices
hourly = 24
daily = 7
monthly = 1
yearly = 0
autosnap = yes
autoprune = yes
# -- User Data --
[rpool/home]
use_template = production
recursive = yes
[template_ignore]
autoprune = no
autosnap = no
monitor = no
[rpool/space]
use_template = production
[template_backup]
autoprune = yes
frequently = 0
hourly = 30
daily = 90
monthly = 12
yearly = 0
#############################
# templates below this line #
#############################
### don't take new snapshots - snapshots on backup
### datasets are replicated in from source, not
### generated locally
autosnap = no
[template_production]
frequently = 0
hourly = 36
daily = 30
monthly = 3
yearly = 0
autosnap = yes
autoprune = yes
### monitor hourlies and dailies, but don't warn or
### crit until they're over 48h old, since replication
### is typically daily only
hourly_warn = 2880
hourly_crit = 3600
daily_warn = 48
daily_crit = 60
[template_root]
# Root changes fast; shorter history often suffices
hourly = 24
daily = 7
monthly = 1
yearly = 0
autosnap = yes
autoprune = yes
[template_hotspare]
autoprune = yes
frequently = 0
hourly = 30
daily = 90
monthly = 3
yearly = 0
[template_ignore]
autoprune = no
autosnap = no
monitor = no
### don't take new snapshots - snapshots on backup
### datasets are replicated in from source, not
### generated locally
autosnap = no
##############################
# Docker Persistent Template #
##############################
[template_docker-persistent]
# Frequent snapshots for active databases/configs
frequently = 0
hourly = 24
daily = 7
monthly = 1
yearly = 0
### monitor hourlies and dailies, but don't warn or
### crit until they're over 4h old, since replication
### is typically hourly only
hourly_warn = 4h
hourly_crit = 6h
daily_warn = 2d
daily_crit = 4d
# Safety checks
autosnap = yes
autoprune = yes
[template_scripts]
### information about the snapshot will be supplied as environment variables,
### see the README.md file for details about what is passed when.
### run script before snapshot
pre_snapshot_script = /path/to/script.sh
### run script after snapshot
post_snapshot_script = /path/to/script.sh
### run script before pruning snapshot
pre_pruning_script = /path/to/script.sh
### run script after pruning snapshot
pruning_script = /path/to/script.sh
### don't take an inconsistent snapshot (skip if pre script fails)
#no_inconsistent_snapshot = yes
### run post_snapshot_script when pre_snapshot_script is failing
#force_post_snapshot_script = yes
### limit allowed execution time of scripts before continuing (<= 0: infinite)
script_timeout = 5
# Don't take a snapshot if the dataset hasn't changed
# (Saves metadata overhead)
# skip_hourless = yes
pre_snapshot_script = /usr/local/bin/sanoid_zfs-skip-empty.sh
#-- END
[template_backup]
autoprune = yes
frequently = 0
hourly = 30
daily = 90
monthly = 12
yearly = 0
### don't take new snapshots - snapshots on backup
### datasets are replicated in from source, not
### generated locally
autosnap = no
### monitor hourlies and dailies, but don't warn or
### crit until they're over 48h old, since replication
### is typically daily only
hourly_warn = 2880
hourly_crit = 3600
daily_warn = 48
daily_crit = 60
#-- END
[template_hotspare]
autoprune = yes
frequently = 0
hourly = 30
daily = 90
weekly = 4
monthly = 3
yearly = 0
### don't take new snapshots - snapshots on backup
### datasets are replicated in from source, not
### generated locally
autosnap = no
### monitor hourlies and dailies, but don't warn or
### crit until they're over 4h old, since replication
### is typically hourly only
hourly_warn = 4h
hourly_crit = 6h
daily_warn = 2d
daily_crit = 4d
EOF
}
conf_print_sanoid | sudo tee /etc/sanoid/sanoid.conf
# Both sanoid and synmcoid are oneshot processes so it makes little sense to
# provide an init file, cron is just fine. In this case the systemd file is there
# because systemd decided to manage cron tasks.
# Generated using:
# https://raw.githubusercontent.com/akhilvij/systemd-to-sysvinit-converter/master/converter.py
# python2 converter /usr/src/sanoid-2.2.0/sanoid.service > sanoid
conf_print_sanoid_init() {
cat <<-'EOF'
#!/bin/sh
### BEGIN INIT INFO
# Provides: sanoid
# Required-Start: $syslog $local_fs $remote_fs
# Required-Stop: $syslog $local_fs $remote_fs
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: Snapshot ZFS filesystems
### END INIT INFO
########################
# Pre-snapshot scripts #
########################
. /lib/lsb/init-functions
prog=sanoid
PIDFILE=/var/run/$prog.pid
DESC="Snapshot ZFS filesystems"
start() {
log_daemon_msg "Starting $DESC" "$prog"
start_daemon -p $PIDFILE /usr/sbin/sanoid --take-snapshots --verbose
if [ $? -ne 0 ]; then
log_end_msg 1
exit 1
fi
if [ $? -eq 0 ]; then
log_end_msg 0
fi
exit 0
}
# In ZFS, even if no data has changed, creating a snapshot still consumes a
# small amount of space for metadata and adds an entry to the ZFS history.
# If you have hundreds of datasets being snapshotted every 15 minutes, this
# "metadata bloat" can make commands like zfs list -t snapshot feel sluggish
# over time. If you think this is an issue for ypu use zfs-skip-empty.sh as
# a pre_snapshot_script
conf_print_skip_empty() {
cat <<'EOF'
#!/bin/bash
# Usage: ./sanoid-threshold.sh <dataset> <threshold_size>
# or
# Add this to you /etc/sanoid.conf to fire this script.
stop() {
log_daemon_msg "Stopping $DESC" "$prog"
killproc -p $PIDFILE /usr/sbin/sanoid
if [ $? -ne 0 ]; then
log_end_msg 1
exit 1
fi
if [ $? -eq 0 ]; then
log_end_msg 0
fi
}
# [tank/important_data]
# use_template = production
# # Only snapshot if more than 5MB changed
# pre_snapshot_script = /usr/local/bin/sanoid-threshold.sh 5M
force_reload() {
stop
start
}
DATASET=$1
RAW_THRESHOLD=$2
case "$1" in
start)
start
;;
stop)
stop
;;
force-reload)
force_reload
;;
restart)
stop
start
;;
convert_to_bytes() {
local number=$(echo "$1" | grep -oE '^[0-9.]+')
local unit=$(echo "$1" | grep -oI '[KMGPT]' | tr '[:lower:]' '[:upper:]')
*)
echo "$Usage: $prog {start|stop|force-reload|restart}"
exit 2
case "$unit" in
K) awk "BEGIN { printf \"%.0f\", $number * 1024 }" ;;
M) awk "BEGIN { printf \"%.0f\", $number * 1024^2 }" ;;
G) awk "BEGIN { printf \"%.0f\", $number * 1024^3 }" ;;
T) awk "BEGIN { printf \"%.0f\", $number * 1024^4 }" ;;
*) printf "%.0f" "$number" ;;
esac
}
if [[ -z "$DATASET" || -z "$RAW_THRESHOLD" ]]; then
logger -t sanoid "Threshold Error: Usage: $0 <dataset> <threshold>"
exit 1
fi
if ! zfs list -H "$DATASET" >/dev/null 2>&1; then
logger -t sanoid "Threshold Error: Dataset $DATASET not found."
exit 1
fi
THRESHOLD=$(convert_to_bytes "$RAW_THRESHOLD")
WRITTEN_BYTES=$(zfs get -Hp -o value written "$DATASET")
if [[ "$WRITTEN_BYTES" -lt "$THRESHOLD" ]]; then
WRITTEN_HUMAN=$(zfs get -H -o value written "$DATASET")
# Optional: Comment out the logger below if your logs get too noisy
logger -t sanoid "Skipping $DATASET: Written $WRITTEN_HUMAN < Threshold $RAW_THRESHOLD."
exit 1
fi
exit 0
EOF
}
# Sandoid doesn't ran as a daemon it runs vi cron
# conf_print_sanoid_init | sudo tee /etc/init.d/sanoid
# sudo chmod +x /etc/init.d/sanoid
conf_print_skip_hourless | sudo tee /usr/local/bin/sanoid_zfs-skip-empty.sh
# Generated using:
# https://raw.githubusercontent.com/akhilvij/systemd-to-sysvinit-converter/master/converter.py
# python2 converter /usr/src/sanoid-2.2.0/sanoid-prune.service > sanoid-prune
conf_print_sanoid-prune_init() {
cat <<-'EOF'
#!/bin/sh
### BEGIN INIT INFO
# Provides: sanoid-prune
# Required-Start: $syslog $local_fs $remote_fs
# Required-Stop: $syslog $local_fs $remote_fs
# Short-Description: Prune ZFS snapshots
### END INIT INFO
chmod +x /usr/local/bin/sanoid_zfs-skip-empty.sh
# VM Consistency (The "Freeze/Thaw" Logic)
. /lib/lsb/init-functions
prog=sanoid-prune
PIDFILE=/var/run/$prog.pid
DESC="Prune ZFS snapshots"
start() {
log_daemon_msg "Starting $DESC" "$prog"
start_daemon -p $PIDFILE /usr/sbin/sanoid --prune-snapshots --verbose
if [ $? -ne 0 ]; then
log_end_msg 1
exit 1
fi
if [ $? -eq 0 ]; then
log_end_msg 0
fi
exit 0
}
stop() {
log_daemon_msg "Stopping $DESC" "$prog"
killproc -p $PIDFILE /usr/sbin/sanoid
if [ $? -ne 0 ]; then
log_end_msg 1
exit 1
fi
if [ $? -eq 0 ]; then
log_end_msg 0
fi
}
force_reload() {
stop
start
}
case "$1" in
start)
start
;;
stop)
stop
;;
force-reload)
force_reload
;;
restart)
stop
start
;;
*)
echo "$Usage: $prog {start|stop|force-reload|restart}"
exit 2
esac
EOF
}
# Sandoid doesn't ran as a daemon it runs vi cron
# conf_print_sanoid-prune_init | sudo tee /etc/init.d/sanoid-prune
# sudo chmod +x /etc/init.d/sanoid-prune
# The inclusion of virt-freeze-all.sh and virt-thaw-all.sh to ensure data integrity:
# * Pre-snapshot: virsh domfsfreeze tells the Guest OS (via qemu-guest-agent) to flush its write buffers and temporarily pause I/O.
# * Snapshot: Sanoid takes an atomic ZFS snapshot.
# * Post-snapshot: virsh domfsthaw resumes I/O.
# Result: You get an "application-consistent" backup rather than a "crash-consistent" one.
# Give sudo access to virsh or is part of the libvirt group.
# qemu-guest-agent must be running in the vm
@ -341,7 +301,7 @@ if [ -z "$VM_NAME" ]; then
fi
# Check if the VM is running
STATE=$(virsh domstate "$VM_NAME" 2>/dev/null)
STATE=$(/usr/bin/virsh domstate "$VM_NAME" 2>/dev/null)
if [ "$STATE" != "running" ]; then
echo "VM $VM_NAME is not running or does not exist. Skipping freeze."
exit 0
@ -349,7 +309,7 @@ fi
echo "Freezing filesystems for $VM_NAME..."
# domfsfreeze returns the number of frozen filesystems on success
if virsh domfsfreeze "$VM_NAME" > /dev/null; then
if /usr/bin/virsh domfsfreeze "$VM_NAME" > /dev/null; then
echo "Successfully frozen $VM_NAME."
else
echo "Error: Failed to freeze $VM_NAME. Ensure qemu-guest-agent is active."
@ -357,8 +317,8 @@ else
fi
EOF
}
conf_print_virt_freeze | sudo tee /usr/local/bin/virt-freeze.sh
sudo chmod +x /usr/local/bin/virt-freeze.sh
conf_print_virt_freeze | sudo tee /usr/local/bin/sanoid_virt-freeze.sh
sudo chmod +x /usr/local/bin/sanoid_virt-freeze.sh
#---
@ -375,7 +335,7 @@ if [ -z "$VM_NAME" ]; then
fi
# Check if the VM is running
STATE=$(virsh domstate "$VM_NAME" 2>/dev/null)
STATE=$(/usr/bin/virsh domstate "$VM_NAME" 2>/dev/null)
if [ "$STATE" != "running" ]; then
echo "VM $VM_NAME is not running. Skipping unfreeze."
exit 0
@ -383,7 +343,7 @@ fi
echo "Thawing filesystems for $VM_NAME..."
# domfsthaw returns the number of thawed filesystems on success
if virsh domfsthaw "$VM_NAME" > /dev/null; then
if /usr/bin/virsh domfsthaw "$VM_NAME" > /dev/null; then
echo "Successfully thawed $VM_NAME."
else
echo "Error: Failed to thaw $VM_NAME."
@ -391,8 +351,8 @@ else
fi
EOF
}
conf_print_virt_unfreeze | sudo tee /usr/local/bin/virt-unfreeze.sh
sudo chmod +x /usr/local/bin/virt-unfreeze.sh
conf_print_virt_unfreeze | sudo tee /usr/local/bin/sanoid_virt-unfreeze.sh
sudo chmod +x /usr/local/bin/sanoid_virt-unfreeze.sh
#---
@ -402,7 +362,7 @@ conf_print_virt_thaw_all() {
# /usr/local/bin/virt-thaw-all.sh
# 1. Get running VM names, filtering out empty lines with awk
RUNNING_VMS=$(virsh list --state-running --name | awk 'NF')
RUNNING_VMS=$(/usr/bin/virsh list --state-running --name | awk 'NF')
if [ -z "$RUNNING_VMS" ]; then
echo "No running VMs found."
@ -415,11 +375,11 @@ for VM_NAME in $RUNNING_VMS; do
# Use the native thaw command.
# It handles the guest agent communication for you.
if virsh domfsthaw "$VM_NAME" > /dev/null 2>&1; then
if /usr/bin/virsh domfsthaw "$VM_NAME" > /dev/null 2>&1; then
echo "Successfully thawed $VM_NAME."
else
# If native fails, we capture the error for the user
ERROR=$(virsh domfsthaw "$VM_NAME" 2>&1)
ERROR=$(/usr/bin/virsh domfsthaw "$VM_NAME" 2>&1)
echo "Error thawing $VM_NAME: $ERROR"
fi
done
@ -427,8 +387,8 @@ done
echo "Finished processing all VMs."
EOF
}
conf_print_virt_thaw_all | sudo tee /usr/local/bin/virt-thaw-all.sh
sudo chmod +x /usr/local/bin/virt-thaw-all.sh
conf_print_virt_thaw_all | sudo tee /usr/local/bin/sanoid_virt-thaw-all.sh
sudo chmod +x /usr/local/bin/sanoid_virt-thaw-all.sh
#---
@ -437,7 +397,7 @@ conf_print_virt_freeze-all() {
#!/bin/bash
# 1. Get running VM names, filtering out empty lines
RUNNING_VMS=$(virsh list --state-running --name | awk 'NF')
RUNNING_VMS=$(/usr/bin/virsh list --state-running --name | awk 'NF')
if [ -z "$RUNNING_VMS" ]; then
echo "No running VMs found."
@ -450,7 +410,7 @@ for VM_NAME in $RUNNING_VMS; do
# Using the native virsh command is generally cleaner
# It returns the number of frozen filesystems on success
if virsh domfsfreeze "$VM_NAME" > /dev/null 2>&1; then
if /usr/bin/virsh domfsfreeze "$VM_NAME" > /dev/null 2>&1; then
echo "Successfully frozen $VM_NAME."
else
echo "Error: Could not freeze $VM_NAME. Check if QEMU Guest Agent is running."
@ -460,8 +420,8 @@ done
echo "Finished processing all VMs."
EOF
}
conf_print_virt_freeze-all | sudo tee /usr/local/bin/virt-freeze-all.sh
sudo chmod +x /usr/local/bin/virt-freeze-all.sh
conf_print_virt_freeze-all | sudo tee /usr/local/bin/sanoid_virt-freeze-all.sh
sudo chmod +x /usr/local/bin/sanoid_virt-freeze-all.sh
#---