Files
atlas/internal/snapshot/scheduler.go
othman.suseno ed96137bad
Some checks failed
CI / test-build (push) Failing after 1m0s
adding snapshot function
2025-12-14 23:17:26 +07:00

262 lines
7.3 KiB
Go

package snapshot
import (
"fmt"
"log"
"sort"
"strings"
"time"
"gitea.avt.data-center.id/othman.suseno/atlas/internal/job"
"gitea.avt.data-center.id/othman.suseno/atlas/internal/models"
"gitea.avt.data-center.id/othman.suseno/atlas/internal/zfs"
)
// Scheduler manages automatic snapshot creation and pruning
type Scheduler struct {
policyStore *PolicyStore
zfsService *zfs.Service
jobManager *job.Manager
stopChan chan struct{}
ticker *time.Ticker
}
// NewScheduler creates a new snapshot scheduler
func NewScheduler(policyStore *PolicyStore, zfsService *zfs.Service, jobManager *job.Manager) *Scheduler {
return &Scheduler{
policyStore: policyStore,
zfsService: zfsService,
jobManager: jobManager,
stopChan: make(chan struct{}),
}
}
// Start starts the scheduler with the given interval
func (s *Scheduler) Start(interval time.Duration) {
s.ticker = time.NewTicker(interval)
log.Printf("[scheduler] started with interval %v", interval)
go s.run()
}
// Stop stops the scheduler
func (s *Scheduler) Stop() {
if s.ticker != nil {
s.ticker.Stop()
}
close(s.stopChan)
log.Printf("[scheduler] stopped")
}
// run executes the scheduler loop
func (s *Scheduler) run() {
// Run immediately on start
s.execute()
for {
select {
case <-s.ticker.C:
s.execute()
case <-s.stopChan:
return
}
}
}
// execute checks policies and creates/prunes snapshots
func (s *Scheduler) execute() {
policies := s.policyStore.List()
log.Printf("[scheduler] checking %d snapshot policies", len(policies))
for _, policy := range policies {
if !policy.Autosnap {
continue
}
// Check if we need to create a snapshot based on schedule
s.checkAndCreateSnapshot(policy)
// Prune old snapshots if enabled
if policy.Autoprune {
s.pruneSnapshots(policy)
}
}
}
// checkAndCreateSnapshot checks if a snapshot should be created
func (s *Scheduler) checkAndCreateSnapshot(policy models.SnapshotPolicy) {
now := time.Now()
snapshots, err := s.zfsService.ListSnapshots(policy.Dataset)
if err != nil {
log.Printf("[scheduler] error listing snapshots for %s: %v", policy.Dataset, err)
return
}
// Check if we need frequent snapshots (every 15 minutes)
if policy.Frequent > 0 {
if s.shouldCreateSnapshot(snapshots, 15*time.Minute, policy.Frequent) {
s.createSnapshot(policy.Dataset, "frequent", now)
}
}
// Check if we need hourly snapshots
if policy.Hourly > 0 {
if s.shouldCreateSnapshot(snapshots, time.Hour, policy.Hourly) {
s.createSnapshot(policy.Dataset, "hourly", now)
}
}
// Check if we need daily snapshots
if policy.Daily > 0 {
if s.shouldCreateSnapshot(snapshots, 24*time.Hour, policy.Daily) {
s.createSnapshot(policy.Dataset, "daily", now)
}
}
// Check if we need weekly snapshots
if policy.Weekly > 0 {
if s.shouldCreateSnapshot(snapshots, 7*24*time.Hour, policy.Weekly) {
s.createSnapshot(policy.Dataset, "weekly", now)
}
}
// Check if we need monthly snapshots
if policy.Monthly > 0 {
if s.shouldCreateSnapshot(snapshots, 30*24*time.Hour, policy.Monthly) {
s.createSnapshot(policy.Dataset, "monthly", now)
}
}
// Check if we need yearly snapshots
if policy.Yearly > 0 {
if s.shouldCreateSnapshot(snapshots, 365*24*time.Hour, policy.Yearly) {
s.createSnapshot(policy.Dataset, "yearly", now)
}
}
}
// shouldCreateSnapshot checks if a new snapshot should be created
func (s *Scheduler) shouldCreateSnapshot(snapshots []models.Snapshot, interval time.Duration, keepCount int) bool {
now := time.Now()
cutoff := now.Add(-interval)
// Count snapshots in the interval
count := 0
for _, snap := range snapshots {
if snap.CreatedAt.After(cutoff) {
count++
}
}
// If we have fewer than keepCount, we should create one
return count < keepCount
}
// createSnapshot creates a snapshot with a timestamped name
func (s *Scheduler) createSnapshot(dataset, prefix string, t time.Time) {
timestamp := t.Format("20060102-150405")
name := fmt.Sprintf("%s-%s", prefix, timestamp)
job := s.jobManager.Create("snapshot_create", map[string]interface{}{
"dataset": dataset,
"name": name,
"type": prefix,
})
s.jobManager.UpdateStatus(job.ID, models.JobStatusRunning, fmt.Sprintf("Creating snapshot %s@%s", dataset, name))
if err := s.zfsService.CreateSnapshot(dataset, name, false); err != nil {
log.Printf("[scheduler] error creating snapshot %s@%s: %v", dataset, name, err)
s.jobManager.SetError(job.ID, err)
return
}
s.jobManager.UpdateProgress(job.ID, 100, "Snapshot created successfully")
s.jobManager.UpdateStatus(job.ID, models.JobStatusCompleted, "Snapshot created")
log.Printf("[scheduler] created snapshot %s@%s", dataset, name)
}
// pruneSnapshots removes old snapshots based on retention policy
func (s *Scheduler) pruneSnapshots(policy models.SnapshotPolicy) {
snapshots, err := s.zfsService.ListSnapshots(policy.Dataset)
if err != nil {
log.Printf("[scheduler] error listing snapshots for pruning %s: %v", policy.Dataset, err)
return
}
now := time.Now()
pruned := 0
// Group snapshots by type
frequent := []models.Snapshot{}
hourly := []models.Snapshot{}
daily := []models.Snapshot{}
weekly := []models.Snapshot{}
monthly := []models.Snapshot{}
yearly := []models.Snapshot{}
for _, snap := range snapshots {
// Parse snapshot name to determine type
parts := strings.Split(snap.Name, "@")
if len(parts) != 2 {
continue
}
snapName := parts[1]
if strings.HasPrefix(snapName, "frequent-") {
frequent = append(frequent, snap)
} else if strings.HasPrefix(snapName, "hourly-") {
hourly = append(hourly, snap)
} else if strings.HasPrefix(snapName, "daily-") {
daily = append(daily, snap)
} else if strings.HasPrefix(snapName, "weekly-") {
weekly = append(weekly, snap)
} else if strings.HasPrefix(snapName, "monthly-") {
monthly = append(monthly, snap)
} else if strings.HasPrefix(snapName, "yearly-") {
yearly = append(yearly, snap)
}
}
// Prune each type
pruned += s.pruneByType(frequent, policy.Frequent, 15*time.Minute, now, policy.Dataset)
pruned += s.pruneByType(hourly, policy.Hourly, time.Hour, now, policy.Dataset)
pruned += s.pruneByType(daily, policy.Daily, 24*time.Hour, now, policy.Dataset)
pruned += s.pruneByType(weekly, policy.Weekly, 7*24*time.Hour, now, policy.Dataset)
pruned += s.pruneByType(monthly, policy.Monthly, 30*24*time.Hour, now, policy.Dataset)
pruned += s.pruneByType(yearly, policy.Yearly, 365*24*time.Hour, now, policy.Dataset)
if pruned > 0 {
log.Printf("[scheduler] pruned %d snapshots for %s", pruned, policy.Dataset)
}
}
// pruneByType prunes snapshots of a specific type
func (s *Scheduler) pruneByType(snapshots []models.Snapshot, keepCount int, interval time.Duration, now time.Time, dataset string) int {
if keepCount == 0 || len(snapshots) <= keepCount {
return 0
}
// Sort by creation time (newest first)
sort.Slice(snapshots, func(i, j int) bool {
return snapshots[i].CreatedAt.After(snapshots[j].CreatedAt)
})
// Keep the newest keepCount snapshots, delete the rest
toDelete := snapshots[keepCount:]
pruned := 0
for _, snap := range toDelete {
// Only delete if it's older than the interval
if now.Sub(snap.CreatedAt) > interval {
if err := s.zfsService.DestroySnapshot(snap.Name, false); err != nil {
log.Printf("[scheduler] error pruning snapshot %s: %v", snap.Name, err)
continue
}
pruned++
}
}
return pruned
}