adding snapshot function
Some checks failed
CI / test-build (push) Failing after 1m0s

This commit is contained in:
2025-12-14 23:17:26 +07:00
parent 461edbc970
commit ed96137bad
8 changed files with 1075 additions and 20 deletions

125
docs/BACKGROUND_JOBS.md Normal file
View File

@@ -0,0 +1,125 @@
# Background Job System
The atlasOS API includes a background job system that automatically executes snapshot policies and manages long-running operations.
## Architecture
### Components
1. **Job Manager** (`internal/job/manager.go`)
- Tracks job lifecycle (pending, running, completed, failed, cancelled)
- Stores job metadata and progress
- Thread-safe job operations
2. **Snapshot Scheduler** (`internal/snapshot/scheduler.go`)
- Automatically creates snapshots based on policies
- Prunes old snapshots based on retention rules
- Runs every 15 minutes by default
3. **Integration**
- Scheduler starts automatically when API server starts
- Gracefully stops on server shutdown
- Jobs are accessible via API endpoints
## How It Works
### Snapshot Creation
The scheduler checks all enabled snapshot policies every 15 minutes and:
1. **Frequent snapshots**: Creates every 15 minutes if `frequent > 0`
2. **Hourly snapshots**: Creates every hour if `hourly > 0`
3. **Daily snapshots**: Creates daily if `daily > 0`
4. **Weekly snapshots**: Creates weekly if `weekly > 0`
5. **Monthly snapshots**: Creates monthly if `monthly > 0`
6. **Yearly snapshots**: Creates yearly if `yearly > 0`
Snapshot names follow the pattern: `{type}-{timestamp}` (e.g., `hourly-20241214-143000`)
### Snapshot Pruning
When `autoprune` is enabled, the scheduler:
1. Groups snapshots by type (frequent, hourly, daily, etc.)
2. Sorts by creation time (newest first)
3. Keeps only the number specified in the policy
4. Deletes older snapshots that exceed the retention count
### Job Tracking
Every snapshot operation creates a job that tracks:
- Status (pending → running → completed/failed)
- Progress (0-100%)
- Error messages (if failed)
- Timestamps (created, started, completed)
## API Endpoints
### List Jobs
```bash
GET /api/v1/jobs
GET /api/v1/jobs?status=running
```
### Get Job
```bash
GET /api/v1/jobs/{id}
```
### Cancel Job
```bash
POST /api/v1/jobs/{id}/cancel
```
## Configuration
The scheduler interval is hardcoded to 15 minutes. To change it, modify:
```go
// In internal/httpapp/app.go
scheduler.Start(15 * time.Minute) // Change interval here
```
## Example Workflow
1. **Create a snapshot policy:**
```bash
curl -X POST http://localhost:8080/api/v1/snapshot-policies \
-H "Content-Type: application/json" \
-d '{
"dataset": "pool/dataset",
"hourly": 24,
"daily": 7,
"autosnap": true,
"autoprune": true
}'
```
2. **Scheduler automatically:**
- Creates hourly snapshots (keeps 24)
- Creates daily snapshots (keeps 7)
- Prunes old snapshots beyond retention
3. **Monitor jobs:**
```bash
curl http://localhost:8080/api/v1/jobs
```
## Job Statuses
- `pending`: Job created but not started
- `running`: Job is currently executing
- `completed`: Job finished successfully
- `failed`: Job encountered an error
- `cancelled`: Job was cancelled by user
## Notes
- Jobs are stored in-memory (will be lost on restart)
- Scheduler runs in a background goroutine
- Snapshot operations are synchronous (blocking)
- For production, consider:
- Database persistence for jobs
- Async job execution with worker pool
- Job history retention policies
- Metrics/alerting for failed jobs

View File

@@ -2,6 +2,7 @@ package httpapp
import (
"encoding/json"
"fmt"
"log"
"net/http"
@@ -287,47 +288,167 @@ func (a *App) handleDeleteZVOL(w http.ResponseWriter, r *http.Request) {
// Snapshot Handlers
func (a *App) handleListSnapshots(w http.ResponseWriter, r *http.Request) {
snapshots := []models.Snapshot{} // Stub
dataset := r.URL.Query().Get("dataset")
snapshots, err := a.zfs.ListSnapshots(dataset)
if err != nil {
log.Printf("list snapshots error: %v", err)
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
return
}
writeJSON(w, http.StatusOK, snapshots)
}
func (a *App) handleCreateSnapshot(w http.ResponseWriter, r *http.Request) {
writeJSON(w, http.StatusNotImplemented, map[string]string{"error": "not implemented"})
var req struct {
Dataset string `json:"dataset"`
Name string `json:"name"`
Recursive bool `json:"recursive,omitempty"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request body"})
return
}
if req.Dataset == "" || req.Name == "" {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "dataset and name are required"})
return
}
if err := a.zfs.CreateSnapshot(req.Dataset, req.Name, req.Recursive); err != nil {
log.Printf("create snapshot error: %v", err)
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
return
}
snapshotName := fmt.Sprintf("%s@%s", req.Dataset, req.Name)
snap, err := a.zfs.GetSnapshot(snapshotName)
if err != nil {
writeJSON(w, http.StatusCreated, map[string]string{"message": "snapshot created", "name": snapshotName})
return
}
writeJSON(w, http.StatusCreated, snap)
}
func (a *App) handleGetSnapshot(w http.ResponseWriter, r *http.Request) {
name := pathParam(r, "/api/v1/snapshots/")
writeJSON(w, http.StatusNotImplemented, map[string]string{"error": "not implemented", "name": name})
if name == "" {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "snapshot name required"})
return
}
snap, err := a.zfs.GetSnapshot(name)
if err != nil {
writeJSON(w, http.StatusNotFound, map[string]string{"error": err.Error()})
return
}
writeJSON(w, http.StatusOK, snap)
}
func (a *App) handleDeleteSnapshot(w http.ResponseWriter, r *http.Request) {
name := pathParam(r, "/api/v1/snapshots/")
writeJSON(w, http.StatusNotImplemented, map[string]string{"error": "not implemented", "name": name})
if name == "" {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "snapshot name required"})
return
}
recursive := r.URL.Query().Get("recursive") == "true"
if err := a.zfs.DestroySnapshot(name, recursive); err != nil {
log.Printf("destroy snapshot error: %v", err)
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
return
}
writeJSON(w, http.StatusOK, map[string]string{"message": "snapshot destroyed", "name": name})
}
// Snapshot Policy Handlers
func (a *App) handleListSnapshotPolicies(w http.ResponseWriter, r *http.Request) {
policies := []models.SnapshotPolicy{} // Stub
dataset := r.URL.Query().Get("dataset")
var policies []models.SnapshotPolicy
if dataset != "" {
policies = a.snapshotPolicy.ListForDataset(dataset)
} else {
policies = a.snapshotPolicy.List()
}
writeJSON(w, http.StatusOK, policies)
}
func (a *App) handleCreateSnapshotPolicy(w http.ResponseWriter, r *http.Request) {
writeJSON(w, http.StatusNotImplemented, map[string]string{"error": "not implemented"})
var policy models.SnapshotPolicy
if err := json.NewDecoder(r.Body).Decode(&policy); err != nil {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request body"})
return
}
if policy.Dataset == "" {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "dataset is required"})
return
}
a.snapshotPolicy.Set(&policy)
writeJSON(w, http.StatusCreated, policy)
}
func (a *App) handleGetSnapshotPolicy(w http.ResponseWriter, r *http.Request) {
dataset := pathParam(r, "/api/v1/snapshot-policies/")
writeJSON(w, http.StatusNotImplemented, map[string]string{"error": "not implemented", "dataset": dataset})
if dataset == "" {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "dataset name required"})
return
}
policy, err := a.snapshotPolicy.Get(dataset)
if err != nil {
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
return
}
if policy == nil {
writeJSON(w, http.StatusNotFound, map[string]string{"error": "policy not found"})
return
}
writeJSON(w, http.StatusOK, policy)
}
func (a *App) handleUpdateSnapshotPolicy(w http.ResponseWriter, r *http.Request) {
dataset := pathParam(r, "/api/v1/snapshot-policies/")
writeJSON(w, http.StatusNotImplemented, map[string]string{"error": "not implemented", "dataset": dataset})
if dataset == "" {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "dataset name required"})
return
}
var policy models.SnapshotPolicy
if err := json.NewDecoder(r.Body).Decode(&policy); err != nil {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request body"})
return
}
// Ensure dataset matches URL parameter
policy.Dataset = dataset
a.snapshotPolicy.Set(&policy)
writeJSON(w, http.StatusOK, policy)
}
func (a *App) handleDeleteSnapshotPolicy(w http.ResponseWriter, r *http.Request) {
dataset := pathParam(r, "/api/v1/snapshot-policies/")
writeJSON(w, http.StatusNotImplemented, map[string]string{"error": "not implemented", "dataset": dataset})
if dataset == "" {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "dataset name required"})
return
}
if err := a.snapshotPolicy.Delete(dataset); err != nil {
log.Printf("delete snapshot policy error: %v", err)
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()})
return
}
writeJSON(w, http.StatusOK, map[string]string{"message": "policy deleted", "dataset": dataset})
}
// SMB Share Handlers
@@ -417,18 +538,41 @@ func (a *App) handleRemoveLUN(w http.ResponseWriter, r *http.Request) {
// Job Handlers
func (a *App) handleListJobs(w http.ResponseWriter, r *http.Request) {
jobs := []models.Job{} // Stub
status := models.JobStatus(r.URL.Query().Get("status"))
jobs := a.jobManager.List(status)
writeJSON(w, http.StatusOK, jobs)
}
func (a *App) handleGetJob(w http.ResponseWriter, r *http.Request) {
id := pathParam(r, "/api/v1/jobs/")
writeJSON(w, http.StatusNotImplemented, map[string]string{"error": "not implemented", "id": id})
if id == "" {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "job id required"})
return
}
job, err := a.jobManager.Get(id)
if err != nil {
writeJSON(w, http.StatusNotFound, map[string]string{"error": err.Error()})
return
}
writeJSON(w, http.StatusOK, job)
}
func (a *App) handleCancelJob(w http.ResponseWriter, r *http.Request) {
id := pathParam(r, "/api/v1/jobs/")
writeJSON(w, http.StatusNotImplemented, map[string]string{"error": "not implemented", "id": id})
if id == "" {
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "job id required"})
return
}
if err := a.jobManager.Cancel(id); err != nil {
log.Printf("cancel job error: %v", err)
writeJSON(w, http.StatusBadRequest, map[string]string{"error": err.Error()})
return
}
writeJSON(w, http.StatusOK, map[string]string{"message": "job cancelled", "id": id})
}
// Auth Handlers (stubs)

View File

@@ -7,6 +7,8 @@ import (
"path/filepath"
"time"
"gitea.avt.data-center.id/othman.suseno/atlas/internal/job"
"gitea.avt.data-center.id/othman.suseno/atlas/internal/snapshot"
"gitea.avt.data-center.id/othman.suseno/atlas/internal/zfs"
)
@@ -21,6 +23,9 @@ type App struct {
tmpl *template.Template
mux *http.ServeMux
zfs *zfs.Service
snapshotPolicy *snapshot.PolicyStore
jobManager *job.Manager
scheduler *snapshot.Scheduler
}
func New(cfg Config) (*App, error) {
@@ -36,13 +41,24 @@ func New(cfg Config) (*App, error) {
return nil, err
}
zfsService := zfs.New()
policyStore := snapshot.NewPolicyStore()
jobMgr := job.NewManager()
scheduler := snapshot.NewScheduler(policyStore, zfsService, jobMgr)
a := &App{
cfg: cfg,
tmpl: tmpl,
mux: http.NewServeMux(),
zfs: zfs.New(),
zfs: zfsService,
snapshotPolicy: policyStore,
jobManager: jobMgr,
scheduler: scheduler,
}
// Start snapshot scheduler (runs every 15 minutes)
scheduler.Start(15 * time.Minute)
a.routes()
return a, nil
}
@@ -52,6 +68,13 @@ func (a *App) Router() http.Handler {
return requestID(logging(a.mux))
}
// StopScheduler stops the snapshot scheduler (for graceful shutdown)
func (a *App) StopScheduler() {
if a.scheduler != nil {
a.scheduler.Stop()
}
}
// routes() is now in routes.go
func parseTemplates(dir string) (*template.Template, error) {

163
internal/job/manager.go Normal file
View File

@@ -0,0 +1,163 @@
package job
import (
"fmt"
"sync"
"time"
"gitea.avt.data-center.id/othman.suseno/atlas/internal/models"
)
// Manager handles job lifecycle and tracking
type Manager struct {
mu sync.RWMutex
jobs map[string]*models.Job
nextID int64
}
// NewManager creates a new job manager
func NewManager() *Manager {
return &Manager{
jobs: make(map[string]*models.Job),
nextID: 1,
}
}
// Create creates a new job
func (m *Manager) Create(jobType string, metadata map[string]interface{}) *models.Job {
m.mu.Lock()
defer m.mu.Unlock()
id := fmt.Sprintf("job-%d", m.nextID)
m.nextID++
job := &models.Job{
ID: id,
Type: jobType,
Status: models.JobStatusPending,
Progress: 0,
Message: "Job created",
Metadata: metadata,
CreatedAt: time.Now(),
}
m.jobs[id] = job
return job
}
// Get returns a job by ID
func (m *Manager) Get(id string) (*models.Job, error) {
m.mu.RLock()
defer m.mu.RUnlock()
job, exists := m.jobs[id]
if !exists {
return nil, fmt.Errorf("job %s not found", id)
}
return job, nil
}
// List returns all jobs, optionally filtered by status
func (m *Manager) List(status models.JobStatus) []models.Job {
m.mu.RLock()
defer m.mu.RUnlock()
var jobs []models.Job
for _, job := range m.jobs {
if status == "" || job.Status == status {
jobs = append(jobs, *job)
}
}
return jobs
}
// UpdateStatus updates job status
func (m *Manager) UpdateStatus(id string, status models.JobStatus, message string) error {
m.mu.Lock()
defer m.mu.Unlock()
job, exists := m.jobs[id]
if !exists {
return fmt.Errorf("job %s not found", id)
}
job.Status = status
job.Message = message
now := time.Now()
switch status {
case models.JobStatusRunning:
if job.StartedAt == nil {
job.StartedAt = &now
}
case models.JobStatusCompleted, models.JobStatusFailed, models.JobStatusCancelled:
job.CompletedAt = &now
}
return nil
}
// UpdateProgress updates job progress (0-100)
func (m *Manager) UpdateProgress(id string, progress int, message string) error {
m.mu.Lock()
defer m.mu.Unlock()
job, exists := m.jobs[id]
if !exists {
return fmt.Errorf("job %s not found", id)
}
if progress < 0 {
progress = 0
}
if progress > 100 {
progress = 100
}
job.Progress = progress
if message != "" {
job.Message = message
}
return nil
}
// SetError sets job error and marks as failed
func (m *Manager) SetError(id string, err error) error {
m.mu.Lock()
defer m.mu.Unlock()
job, exists := m.jobs[id]
if !exists {
return fmt.Errorf("job %s not found", id)
}
job.Status = models.JobStatusFailed
job.Error = err.Error()
now := time.Now()
job.CompletedAt = &now
return nil
}
// Cancel cancels a job
func (m *Manager) Cancel(id string) error {
m.mu.Lock()
defer m.mu.Unlock()
job, exists := m.jobs[id]
if !exists {
return fmt.Errorf("job %s not found", id)
}
if job.Status == models.JobStatusCompleted || job.Status == models.JobStatusFailed {
return fmt.Errorf("cannot cancel job in status %s", job.Status)
}
job.Status = models.JobStatusCancelled
job.Message = "Job cancelled by user"
now := time.Now()
job.CompletedAt = &now
return nil
}

View File

@@ -0,0 +1,76 @@
package snapshot
import (
"sync"
"gitea.avt.data-center.id/othman.suseno/atlas/internal/models"
)
// PolicyStore manages snapshot policies
type PolicyStore struct {
mu sync.RWMutex
policies map[string]*models.SnapshotPolicy
}
// NewPolicyStore creates a new policy store
func NewPolicyStore() *PolicyStore {
return &PolicyStore{
policies: make(map[string]*models.SnapshotPolicy),
}
}
// List returns all snapshot policies
func (s *PolicyStore) List() []models.SnapshotPolicy {
s.mu.RLock()
defer s.mu.RUnlock()
policies := make([]models.SnapshotPolicy, 0, len(s.policies))
for _, p := range s.policies {
policies = append(policies, *p)
}
return policies
}
// Get returns a policy for a dataset
func (s *PolicyStore) Get(dataset string) (*models.SnapshotPolicy, error) {
s.mu.RLock()
defer s.mu.RUnlock()
policy, exists := s.policies[dataset]
if !exists {
return nil, nil // Return nil if not found (not an error)
}
return policy, nil
}
// Set creates or updates a policy
func (s *PolicyStore) Set(policy *models.SnapshotPolicy) {
s.mu.Lock()
defer s.mu.Unlock()
s.policies[policy.Dataset] = policy
}
// Delete removes a policy
func (s *PolicyStore) Delete(dataset string) error {
s.mu.Lock()
defer s.mu.Unlock()
delete(s.policies, dataset)
return nil
}
// ListForDataset returns all policies (for future filtering by dataset prefix)
func (s *PolicyStore) ListForDataset(datasetPrefix string) []models.SnapshotPolicy {
s.mu.RLock()
defer s.mu.RUnlock()
var policies []models.SnapshotPolicy
for _, p := range s.policies {
if datasetPrefix == "" || p.Dataset == datasetPrefix ||
(len(p.Dataset) > len(datasetPrefix) && p.Dataset[:len(datasetPrefix)] == datasetPrefix) {
policies = append(policies, *p)
}
}
return policies
}

View File

@@ -0,0 +1,261 @@
package snapshot
import (
"fmt"
"log"
"sort"
"strings"
"time"
"gitea.avt.data-center.id/othman.suseno/atlas/internal/job"
"gitea.avt.data-center.id/othman.suseno/atlas/internal/models"
"gitea.avt.data-center.id/othman.suseno/atlas/internal/zfs"
)
// Scheduler manages automatic snapshot creation and pruning
type Scheduler struct {
policyStore *PolicyStore
zfsService *zfs.Service
jobManager *job.Manager
stopChan chan struct{}
ticker *time.Ticker
}
// NewScheduler creates a new snapshot scheduler
func NewScheduler(policyStore *PolicyStore, zfsService *zfs.Service, jobManager *job.Manager) *Scheduler {
return &Scheduler{
policyStore: policyStore,
zfsService: zfsService,
jobManager: jobManager,
stopChan: make(chan struct{}),
}
}
// Start starts the scheduler with the given interval
func (s *Scheduler) Start(interval time.Duration) {
s.ticker = time.NewTicker(interval)
log.Printf("[scheduler] started with interval %v", interval)
go s.run()
}
// Stop stops the scheduler
func (s *Scheduler) Stop() {
if s.ticker != nil {
s.ticker.Stop()
}
close(s.stopChan)
log.Printf("[scheduler] stopped")
}
// run executes the scheduler loop
func (s *Scheduler) run() {
// Run immediately on start
s.execute()
for {
select {
case <-s.ticker.C:
s.execute()
case <-s.stopChan:
return
}
}
}
// execute checks policies and creates/prunes snapshots
func (s *Scheduler) execute() {
policies := s.policyStore.List()
log.Printf("[scheduler] checking %d snapshot policies", len(policies))
for _, policy := range policies {
if !policy.Autosnap {
continue
}
// Check if we need to create a snapshot based on schedule
s.checkAndCreateSnapshot(policy)
// Prune old snapshots if enabled
if policy.Autoprune {
s.pruneSnapshots(policy)
}
}
}
// checkAndCreateSnapshot checks if a snapshot should be created
func (s *Scheduler) checkAndCreateSnapshot(policy models.SnapshotPolicy) {
now := time.Now()
snapshots, err := s.zfsService.ListSnapshots(policy.Dataset)
if err != nil {
log.Printf("[scheduler] error listing snapshots for %s: %v", policy.Dataset, err)
return
}
// Check if we need frequent snapshots (every 15 minutes)
if policy.Frequent > 0 {
if s.shouldCreateSnapshot(snapshots, 15*time.Minute, policy.Frequent) {
s.createSnapshot(policy.Dataset, "frequent", now)
}
}
// Check if we need hourly snapshots
if policy.Hourly > 0 {
if s.shouldCreateSnapshot(snapshots, time.Hour, policy.Hourly) {
s.createSnapshot(policy.Dataset, "hourly", now)
}
}
// Check if we need daily snapshots
if policy.Daily > 0 {
if s.shouldCreateSnapshot(snapshots, 24*time.Hour, policy.Daily) {
s.createSnapshot(policy.Dataset, "daily", now)
}
}
// Check if we need weekly snapshots
if policy.Weekly > 0 {
if s.shouldCreateSnapshot(snapshots, 7*24*time.Hour, policy.Weekly) {
s.createSnapshot(policy.Dataset, "weekly", now)
}
}
// Check if we need monthly snapshots
if policy.Monthly > 0 {
if s.shouldCreateSnapshot(snapshots, 30*24*time.Hour, policy.Monthly) {
s.createSnapshot(policy.Dataset, "monthly", now)
}
}
// Check if we need yearly snapshots
if policy.Yearly > 0 {
if s.shouldCreateSnapshot(snapshots, 365*24*time.Hour, policy.Yearly) {
s.createSnapshot(policy.Dataset, "yearly", now)
}
}
}
// shouldCreateSnapshot checks if a new snapshot should be created
func (s *Scheduler) shouldCreateSnapshot(snapshots []models.Snapshot, interval time.Duration, keepCount int) bool {
now := time.Now()
cutoff := now.Add(-interval)
// Count snapshots in the interval
count := 0
for _, snap := range snapshots {
if snap.CreatedAt.After(cutoff) {
count++
}
}
// If we have fewer than keepCount, we should create one
return count < keepCount
}
// createSnapshot creates a snapshot with a timestamped name
func (s *Scheduler) createSnapshot(dataset, prefix string, t time.Time) {
timestamp := t.Format("20060102-150405")
name := fmt.Sprintf("%s-%s", prefix, timestamp)
job := s.jobManager.Create("snapshot_create", map[string]interface{}{
"dataset": dataset,
"name": name,
"type": prefix,
})
s.jobManager.UpdateStatus(job.ID, models.JobStatusRunning, fmt.Sprintf("Creating snapshot %s@%s", dataset, name))
if err := s.zfsService.CreateSnapshot(dataset, name, false); err != nil {
log.Printf("[scheduler] error creating snapshot %s@%s: %v", dataset, name, err)
s.jobManager.SetError(job.ID, err)
return
}
s.jobManager.UpdateProgress(job.ID, 100, "Snapshot created successfully")
s.jobManager.UpdateStatus(job.ID, models.JobStatusCompleted, "Snapshot created")
log.Printf("[scheduler] created snapshot %s@%s", dataset, name)
}
// pruneSnapshots removes old snapshots based on retention policy
func (s *Scheduler) pruneSnapshots(policy models.SnapshotPolicy) {
snapshots, err := s.zfsService.ListSnapshots(policy.Dataset)
if err != nil {
log.Printf("[scheduler] error listing snapshots for pruning %s: %v", policy.Dataset, err)
return
}
now := time.Now()
pruned := 0
// Group snapshots by type
frequent := []models.Snapshot{}
hourly := []models.Snapshot{}
daily := []models.Snapshot{}
weekly := []models.Snapshot{}
monthly := []models.Snapshot{}
yearly := []models.Snapshot{}
for _, snap := range snapshots {
// Parse snapshot name to determine type
parts := strings.Split(snap.Name, "@")
if len(parts) != 2 {
continue
}
snapName := parts[1]
if strings.HasPrefix(snapName, "frequent-") {
frequent = append(frequent, snap)
} else if strings.HasPrefix(snapName, "hourly-") {
hourly = append(hourly, snap)
} else if strings.HasPrefix(snapName, "daily-") {
daily = append(daily, snap)
} else if strings.HasPrefix(snapName, "weekly-") {
weekly = append(weekly, snap)
} else if strings.HasPrefix(snapName, "monthly-") {
monthly = append(monthly, snap)
} else if strings.HasPrefix(snapName, "yearly-") {
yearly = append(yearly, snap)
}
}
// Prune each type
pruned += s.pruneByType(frequent, policy.Frequent, 15*time.Minute, now, policy.Dataset)
pruned += s.pruneByType(hourly, policy.Hourly, time.Hour, now, policy.Dataset)
pruned += s.pruneByType(daily, policy.Daily, 24*time.Hour, now, policy.Dataset)
pruned += s.pruneByType(weekly, policy.Weekly, 7*24*time.Hour, now, policy.Dataset)
pruned += s.pruneByType(monthly, policy.Monthly, 30*24*time.Hour, now, policy.Dataset)
pruned += s.pruneByType(yearly, policy.Yearly, 365*24*time.Hour, now, policy.Dataset)
if pruned > 0 {
log.Printf("[scheduler] pruned %d snapshots for %s", pruned, policy.Dataset)
}
}
// pruneByType prunes snapshots of a specific type
func (s *Scheduler) pruneByType(snapshots []models.Snapshot, keepCount int, interval time.Duration, now time.Time, dataset string) int {
if keepCount == 0 || len(snapshots) <= keepCount {
return 0
}
// Sort by creation time (newest first)
sort.Slice(snapshots, func(i, j int) bool {
return snapshots[i].CreatedAt.After(snapshots[j].CreatedAt)
})
// Keep the newest keepCount snapshots, delete the rest
toDelete := snapshots[keepCount:]
pruned := 0
for _, snap := range toDelete {
// Only delete if it's older than the interval
if now.Sub(snap.CreatedAt) > interval {
if err := s.zfsService.DestroySnapshot(snap.Name, false); err != nil {
log.Printf("[scheduler] error pruning snapshot %s: %v", snap.Name, err)
continue
}
pruned++
}
}
return pruned
}

View File

@@ -398,3 +398,107 @@ func parseSize(s string) (uint64, error) {
return val * multiplier, nil
}
// ListSnapshots returns all snapshots for a dataset (or all if dataset is empty)
func (s *Service) ListSnapshots(dataset string) ([]models.Snapshot, error) {
args := []string{"list", "-H", "-o", "name,used,creation", "-t", "snapshot", "-s", "creation"}
if dataset != "" {
args = append(args, "-r", dataset)
} else {
args = append(args, "-r")
}
output, err := s.execCommand(s.zfsPath, args...)
if err != nil {
return nil, err
}
var snapshots []models.Snapshot
lines := strings.Split(output, "\n")
for _, line := range lines {
if line == "" {
continue
}
fields := strings.Fields(line)
if len(fields) < 3 {
continue
}
fullName := fields[0]
// Snapshot name format: dataset@snapshot
parts := strings.Split(fullName, "@")
if len(parts) != 2 {
continue
}
datasetName := parts[0]
snapshot := models.Snapshot{
Name: fullName,
Dataset: datasetName,
}
// Parse size
if used, err := parseSize(fields[1]); err == nil {
snapshot.Size = used
}
// Parse creation time
// ZFS creation format: "Mon Jan 2 15:04:05 2006"
createdStr := strings.Join(fields[2:], " ")
if t, err := time.Parse("Mon Jan 2 15:04:05 2006", createdStr); err == nil {
snapshot.CreatedAt = t
} else {
// Try RFC3339 format if available
if t, err := time.Parse(time.RFC3339, createdStr); err == nil {
snapshot.CreatedAt = t
}
}
snapshots = append(snapshots, snapshot)
}
return snapshots, nil
}
// CreateSnapshot creates a new snapshot
func (s *Service) CreateSnapshot(dataset, name string, recursive bool) error {
args := []string{"snapshot"}
if recursive {
args = append(args, "-r")
}
snapshotName := fmt.Sprintf("%s@%s", dataset, name)
args = append(args, snapshotName)
_, err := s.execCommand(s.zfsPath, args...)
return err
}
// DestroySnapshot destroys a snapshot
func (s *Service) DestroySnapshot(name string, recursive bool) error {
args := []string{"destroy"}
if recursive {
args = append(args, "-r")
}
args = append(args, name)
_, err := s.execCommand(s.zfsPath, args...)
return err
}
// GetSnapshot returns snapshot details
func (s *Service) GetSnapshot(name string) (*models.Snapshot, error) {
snapshots, err := s.ListSnapshots("")
if err != nil {
return nil, err
}
for _, snap := range snapshots {
if snap.Name == name {
return &snap, nil
}
}
return nil, fmt.Errorf("snapshot %s not found", name)
}

159
test_api.sh Executable file
View File

@@ -0,0 +1,159 @@
#!/bin/bash
# Atlas API Test Script
# This script tests the API endpoints
BASE_URL="${ATLAS_URL:-http://localhost:8080}"
API_URL="${BASE_URL}/api/v1"
echo "=========================================="
echo "Atlas API Test Suite"
echo "=========================================="
echo "Base URL: $BASE_URL"
echo "API URL: $API_URL"
echo ""
# Colors for output
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Test helper function
test_endpoint() {
local method=$1
local endpoint=$2
local data=$3
local description=$4
echo -e "${YELLOW}Testing: $description${NC}"
echo " $method $endpoint"
if [ -n "$data" ]; then
response=$(curl -s -w "\n%{http_code}" -X "$method" \
-H "Content-Type: application/json" \
-d "$data" \
"$endpoint")
else
response=$(curl -s -w "\n%{http_code}" -X "$method" "$endpoint")
fi
http_code=$(echo "$response" | tail -n1)
body=$(echo "$response" | sed '$d')
if [ "$http_code" -ge 200 ] && [ "$http_code" -lt 300 ]; then
echo -e " ${GREEN}✓ Success (HTTP $http_code)${NC}"
echo "$body" | jq '.' 2>/dev/null || echo "$body"
elif [ "$http_code" -ge 400 ] && [ "$http_code" -lt 500 ]; then
echo -e " ${YELLOW}⚠ Client Error (HTTP $http_code)${NC}"
echo "$body" | jq '.' 2>/dev/null || echo "$body"
else
echo -e " ${RED}✗ Error (HTTP $http_code)${NC}"
echo "$body"
fi
echo ""
}
# Check if server is running
echo "Checking if server is running..."
if ! curl -s "$BASE_URL/healthz" > /dev/null; then
echo -e "${RED}Error: Server is not running at $BASE_URL${NC}"
echo "Start the server with: go run ./cmd/atlas-api/main.go"
exit 1
fi
echo -e "${GREEN}Server is running!${NC}"
echo ""
# Health & Metrics
echo "=========================================="
echo "1. Health & Metrics"
echo "=========================================="
test_endpoint "GET" "$BASE_URL/healthz" "" "Health check"
test_endpoint "GET" "$BASE_URL/metrics" "" "Prometheus metrics"
echo ""
# Disk Discovery
echo "=========================================="
echo "2. Disk Discovery"
echo "=========================================="
test_endpoint "GET" "$API_URL/disks" "" "List available disks"
echo ""
# ZFS Pools
echo "=========================================="
echo "3. ZFS Pool Management"
echo "=========================================="
test_endpoint "GET" "$API_URL/pools" "" "List ZFS pools"
test_endpoint "GET" "$API_URL/pools/testpool" "" "Get pool details (if exists)"
echo ""
# Datasets
echo "=========================================="
echo "4. Dataset Management"
echo "=========================================="
test_endpoint "GET" "$API_URL/datasets" "" "List all datasets"
test_endpoint "GET" "$API_URL/datasets?pool=testpool" "" "List datasets in pool"
echo ""
# ZVOLs
echo "=========================================="
echo "5. ZVOL Management"
echo "=========================================="
test_endpoint "GET" "$API_URL/zvols" "" "List all ZVOLs"
test_endpoint "GET" "$API_URL/zvols?pool=testpool" "" "List ZVOLs in pool"
echo ""
# Snapshots
echo "=========================================="
echo "6. Snapshot Management"
echo "=========================================="
test_endpoint "GET" "$API_URL/snapshots" "" "List all snapshots"
test_endpoint "GET" "$API_URL/snapshots?dataset=testpool/test" "" "List snapshots for dataset"
echo ""
# Snapshot Policies
echo "=========================================="
echo "7. Snapshot Policies"
echo "=========================================="
test_endpoint "GET" "$API_URL/snapshot-policies" "" "List snapshot policies"
# Create a test policy
test_endpoint "POST" "$API_URL/snapshot-policies" \
'{"dataset":"testpool/test","frequent":4,"hourly":24,"daily":7,"weekly":4,"monthly":12,"yearly":2,"autosnap":true,"autoprune":true}' \
"Create snapshot policy"
test_endpoint "GET" "$API_URL/snapshot-policies/testpool/test" "" "Get snapshot policy"
echo ""
# Storage Services
echo "=========================================="
echo "8. Storage Services"
echo "=========================================="
test_endpoint "GET" "$API_URL/shares/smb" "" "List SMB shares"
test_endpoint "GET" "$API_URL/exports/nfs" "" "List NFS exports"
test_endpoint "GET" "$API_URL/iscsi/targets" "" "List iSCSI targets"
echo ""
# Jobs
echo "=========================================="
echo "9. Job Management"
echo "=========================================="
test_endpoint "GET" "$API_URL/jobs" "" "List jobs"
echo ""
# Users & Auth
echo "=========================================="
echo "10. Authentication & Users"
echo "=========================================="
test_endpoint "GET" "$API_URL/users" "" "List users"
echo ""
# Audit Logs
echo "=========================================="
echo "11. Audit Logs"
echo "=========================================="
test_endpoint "GET" "$API_URL/audit" "" "List audit logs"
echo ""
echo "=========================================="
echo "Test Suite Complete!"
echo "=========================================="