This commit is contained in:
261
internal/snapshot/scheduler.go
Normal file
261
internal/snapshot/scheduler.go
Normal file
@@ -0,0 +1,261 @@
|
||||
package snapshot
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.avt.data-center.id/othman.suseno/atlas/internal/job"
|
||||
"gitea.avt.data-center.id/othman.suseno/atlas/internal/models"
|
||||
"gitea.avt.data-center.id/othman.suseno/atlas/internal/zfs"
|
||||
)
|
||||
|
||||
// Scheduler manages automatic snapshot creation and pruning
|
||||
type Scheduler struct {
|
||||
policyStore *PolicyStore
|
||||
zfsService *zfs.Service
|
||||
jobManager *job.Manager
|
||||
stopChan chan struct{}
|
||||
ticker *time.Ticker
|
||||
}
|
||||
|
||||
// NewScheduler creates a new snapshot scheduler
|
||||
func NewScheduler(policyStore *PolicyStore, zfsService *zfs.Service, jobManager *job.Manager) *Scheduler {
|
||||
return &Scheduler{
|
||||
policyStore: policyStore,
|
||||
zfsService: zfsService,
|
||||
jobManager: jobManager,
|
||||
stopChan: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// Start starts the scheduler with the given interval
|
||||
func (s *Scheduler) Start(interval time.Duration) {
|
||||
s.ticker = time.NewTicker(interval)
|
||||
log.Printf("[scheduler] started with interval %v", interval)
|
||||
|
||||
go s.run()
|
||||
}
|
||||
|
||||
// Stop stops the scheduler
|
||||
func (s *Scheduler) Stop() {
|
||||
if s.ticker != nil {
|
||||
s.ticker.Stop()
|
||||
}
|
||||
close(s.stopChan)
|
||||
log.Printf("[scheduler] stopped")
|
||||
}
|
||||
|
||||
// run executes the scheduler loop
|
||||
func (s *Scheduler) run() {
|
||||
// Run immediately on start
|
||||
s.execute()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-s.ticker.C:
|
||||
s.execute()
|
||||
case <-s.stopChan:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// execute checks policies and creates/prunes snapshots
|
||||
func (s *Scheduler) execute() {
|
||||
policies := s.policyStore.List()
|
||||
log.Printf("[scheduler] checking %d snapshot policies", len(policies))
|
||||
|
||||
for _, policy := range policies {
|
||||
if !policy.Autosnap {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if we need to create a snapshot based on schedule
|
||||
s.checkAndCreateSnapshot(policy)
|
||||
|
||||
// Prune old snapshots if enabled
|
||||
if policy.Autoprune {
|
||||
s.pruneSnapshots(policy)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// checkAndCreateSnapshot checks if a snapshot should be created
|
||||
func (s *Scheduler) checkAndCreateSnapshot(policy models.SnapshotPolicy) {
|
||||
now := time.Now()
|
||||
snapshots, err := s.zfsService.ListSnapshots(policy.Dataset)
|
||||
if err != nil {
|
||||
log.Printf("[scheduler] error listing snapshots for %s: %v", policy.Dataset, err)
|
||||
return
|
||||
}
|
||||
|
||||
// Check if we need frequent snapshots (every 15 minutes)
|
||||
if policy.Frequent > 0 {
|
||||
if s.shouldCreateSnapshot(snapshots, 15*time.Minute, policy.Frequent) {
|
||||
s.createSnapshot(policy.Dataset, "frequent", now)
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we need hourly snapshots
|
||||
if policy.Hourly > 0 {
|
||||
if s.shouldCreateSnapshot(snapshots, time.Hour, policy.Hourly) {
|
||||
s.createSnapshot(policy.Dataset, "hourly", now)
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we need daily snapshots
|
||||
if policy.Daily > 0 {
|
||||
if s.shouldCreateSnapshot(snapshots, 24*time.Hour, policy.Daily) {
|
||||
s.createSnapshot(policy.Dataset, "daily", now)
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we need weekly snapshots
|
||||
if policy.Weekly > 0 {
|
||||
if s.shouldCreateSnapshot(snapshots, 7*24*time.Hour, policy.Weekly) {
|
||||
s.createSnapshot(policy.Dataset, "weekly", now)
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we need monthly snapshots
|
||||
if policy.Monthly > 0 {
|
||||
if s.shouldCreateSnapshot(snapshots, 30*24*time.Hour, policy.Monthly) {
|
||||
s.createSnapshot(policy.Dataset, "monthly", now)
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we need yearly snapshots
|
||||
if policy.Yearly > 0 {
|
||||
if s.shouldCreateSnapshot(snapshots, 365*24*time.Hour, policy.Yearly) {
|
||||
s.createSnapshot(policy.Dataset, "yearly", now)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// shouldCreateSnapshot checks if a new snapshot should be created
|
||||
func (s *Scheduler) shouldCreateSnapshot(snapshots []models.Snapshot, interval time.Duration, keepCount int) bool {
|
||||
now := time.Now()
|
||||
cutoff := now.Add(-interval)
|
||||
|
||||
// Count snapshots in the interval
|
||||
count := 0
|
||||
for _, snap := range snapshots {
|
||||
if snap.CreatedAt.After(cutoff) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
|
||||
// If we have fewer than keepCount, we should create one
|
||||
return count < keepCount
|
||||
}
|
||||
|
||||
// createSnapshot creates a snapshot with a timestamped name
|
||||
func (s *Scheduler) createSnapshot(dataset, prefix string, t time.Time) {
|
||||
timestamp := t.Format("20060102-150405")
|
||||
name := fmt.Sprintf("%s-%s", prefix, timestamp)
|
||||
|
||||
job := s.jobManager.Create("snapshot_create", map[string]interface{}{
|
||||
"dataset": dataset,
|
||||
"name": name,
|
||||
"type": prefix,
|
||||
})
|
||||
|
||||
s.jobManager.UpdateStatus(job.ID, models.JobStatusRunning, fmt.Sprintf("Creating snapshot %s@%s", dataset, name))
|
||||
|
||||
if err := s.zfsService.CreateSnapshot(dataset, name, false); err != nil {
|
||||
log.Printf("[scheduler] error creating snapshot %s@%s: %v", dataset, name, err)
|
||||
s.jobManager.SetError(job.ID, err)
|
||||
return
|
||||
}
|
||||
|
||||
s.jobManager.UpdateProgress(job.ID, 100, "Snapshot created successfully")
|
||||
s.jobManager.UpdateStatus(job.ID, models.JobStatusCompleted, "Snapshot created")
|
||||
log.Printf("[scheduler] created snapshot %s@%s", dataset, name)
|
||||
}
|
||||
|
||||
// pruneSnapshots removes old snapshots based on retention policy
|
||||
func (s *Scheduler) pruneSnapshots(policy models.SnapshotPolicy) {
|
||||
snapshots, err := s.zfsService.ListSnapshots(policy.Dataset)
|
||||
if err != nil {
|
||||
log.Printf("[scheduler] error listing snapshots for pruning %s: %v", policy.Dataset, err)
|
||||
return
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
pruned := 0
|
||||
|
||||
// Group snapshots by type
|
||||
frequent := []models.Snapshot{}
|
||||
hourly := []models.Snapshot{}
|
||||
daily := []models.Snapshot{}
|
||||
weekly := []models.Snapshot{}
|
||||
monthly := []models.Snapshot{}
|
||||
yearly := []models.Snapshot{}
|
||||
|
||||
for _, snap := range snapshots {
|
||||
// Parse snapshot name to determine type
|
||||
parts := strings.Split(snap.Name, "@")
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
snapName := parts[1]
|
||||
|
||||
if strings.HasPrefix(snapName, "frequent-") {
|
||||
frequent = append(frequent, snap)
|
||||
} else if strings.HasPrefix(snapName, "hourly-") {
|
||||
hourly = append(hourly, snap)
|
||||
} else if strings.HasPrefix(snapName, "daily-") {
|
||||
daily = append(daily, snap)
|
||||
} else if strings.HasPrefix(snapName, "weekly-") {
|
||||
weekly = append(weekly, snap)
|
||||
} else if strings.HasPrefix(snapName, "monthly-") {
|
||||
monthly = append(monthly, snap)
|
||||
} else if strings.HasPrefix(snapName, "yearly-") {
|
||||
yearly = append(yearly, snap)
|
||||
}
|
||||
}
|
||||
|
||||
// Prune each type
|
||||
pruned += s.pruneByType(frequent, policy.Frequent, 15*time.Minute, now, policy.Dataset)
|
||||
pruned += s.pruneByType(hourly, policy.Hourly, time.Hour, now, policy.Dataset)
|
||||
pruned += s.pruneByType(daily, policy.Daily, 24*time.Hour, now, policy.Dataset)
|
||||
pruned += s.pruneByType(weekly, policy.Weekly, 7*24*time.Hour, now, policy.Dataset)
|
||||
pruned += s.pruneByType(monthly, policy.Monthly, 30*24*time.Hour, now, policy.Dataset)
|
||||
pruned += s.pruneByType(yearly, policy.Yearly, 365*24*time.Hour, now, policy.Dataset)
|
||||
|
||||
if pruned > 0 {
|
||||
log.Printf("[scheduler] pruned %d snapshots for %s", pruned, policy.Dataset)
|
||||
}
|
||||
}
|
||||
|
||||
// pruneByType prunes snapshots of a specific type
|
||||
func (s *Scheduler) pruneByType(snapshots []models.Snapshot, keepCount int, interval time.Duration, now time.Time, dataset string) int {
|
||||
if keepCount == 0 || len(snapshots) <= keepCount {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Sort by creation time (newest first)
|
||||
sort.Slice(snapshots, func(i, j int) bool {
|
||||
return snapshots[i].CreatedAt.After(snapshots[j].CreatedAt)
|
||||
})
|
||||
|
||||
// Keep the newest keepCount snapshots, delete the rest
|
||||
toDelete := snapshots[keepCount:]
|
||||
|
||||
pruned := 0
|
||||
for _, snap := range toDelete {
|
||||
// Only delete if it's older than the interval
|
||||
if now.Sub(snap.CreatedAt) > interval {
|
||||
if err := s.zfsService.DestroySnapshot(snap.Name, false); err != nil {
|
||||
log.Printf("[scheduler] error pruning snapshot %s: %v", snap.Name, err)
|
||||
continue
|
||||
}
|
||||
pruned++
|
||||
}
|
||||
}
|
||||
|
||||
return pruned
|
||||
}
|
||||
Reference in New Issue
Block a user