diff --git a/docs/API_SECURITY.md b/docs/API_SECURITY.md new file mode 100644 index 0000000..6408b00 --- /dev/null +++ b/docs/API_SECURITY.md @@ -0,0 +1,278 @@ +# API Security & Rate Limiting + +## Overview + +AtlasOS implements comprehensive API security measures including rate limiting, security headers, CORS protection, and request validation to protect the API from abuse and attacks. + +## Rate Limiting + +### Token Bucket Algorithm + +The rate limiter uses a token bucket algorithm: +- **Default Rate**: 100 requests per minute per client +- **Window**: 60 seconds +- **Token Refill**: Tokens are refilled based on elapsed time +- **Per-Client**: Rate limiting is applied per IP address or user ID + +### Rate Limit Headers + +All responses include rate limit headers: + +``` +X-RateLimit-Limit: 100 +X-RateLimit-Window: 60 +``` + +### Rate Limit Exceeded + +When rate limit is exceeded, the API returns: + +```json +{ + "code": "SERVICE_UNAVAILABLE", + "message": "rate limit exceeded", + "details": "too many requests, please try again later" +} +``` + +**HTTP Status**: `429 Too Many Requests` + +### Client Identification + +Rate limiting uses different keys based on authentication: + +- **Authenticated Users**: `user:{user_id}` - More granular per-user limiting +- **Unauthenticated**: `ip:{ip_address}` - IP-based limiting + +### Public Endpoints + +Public endpoints (login, health checks) are excluded from rate limiting to ensure availability. + +## Security Headers + +All responses include security headers: + +### X-Content-Type-Options +- **Value**: `nosniff` +- **Purpose**: Prevents MIME type sniffing + +### X-Frame-Options +- **Value**: `DENY` +- **Purpose**: Prevents clickjacking attacks + +### X-XSS-Protection +- **Value**: `1; mode=block` +- **Purpose**: Enables XSS filtering in browsers + +### Referrer-Policy +- **Value**: `strict-origin-when-cross-origin` +- **Purpose**: Controls referrer information + +### Permissions-Policy +- **Value**: `geolocation=(), microphone=(), camera=()` +- **Purpose**: Disables unnecessary browser features + +### Strict-Transport-Security (HSTS) +- **Value**: `max-age=31536000; includeSubDomains` +- **Purpose**: Forces HTTPS connections (only on HTTPS) +- **Note**: Only added when request is over TLS + +### Content-Security-Policy (CSP) +- **Value**: `default-src 'self'; script-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; img-src 'self' data:; font-src 'self' https://cdn.jsdelivr.net; connect-src 'self';` +- **Purpose**: Restricts resource loading to prevent XSS + +## CORS (Cross-Origin Resource Sharing) + +### Allowed Origins + +By default, the following origins are allowed: + +- `http://localhost:8080` +- `http://localhost:3000` +- `http://127.0.0.1:8080` +- Same-origin requests (no Origin header) + +### CORS Headers + +When a request comes from an allowed origin: + +``` +Access-Control-Allow-Origin: http://localhost:8080 +Access-Control-Allow-Methods: GET, POST, PUT, DELETE, PATCH, OPTIONS +Access-Control-Allow-Headers: Content-Type, Authorization, X-Requested-With +Access-Control-Allow-Credentials: true +Access-Control-Max-Age: 3600 +``` + +### Preflight Requests + +OPTIONS requests are handled automatically: + +- **Status**: `204 No Content` +- **Headers**: All CORS headers included +- **Purpose**: Browser preflight checks + +## Request Size Limits + +### Maximum Request Body Size + +- **Limit**: 10 MB (10,485,760 bytes) +- **Enforcement**: Automatic via `http.MaxBytesReader` +- **Error**: Returns `413 Request Entity Too Large` if exceeded + +### Content-Type Validation + +POST, PUT, and PATCH requests must include a valid `Content-Type` header: + +**Allowed Types:** +- `application/json` +- `application/x-www-form-urlencoded` +- `multipart/form-data` + +**Error Response:** +```json +{ + "code": "BAD_REQUEST", + "message": "Content-Type must be application/json" +} +``` + +## Middleware Chain Order + +Security middleware is applied in the following order (outer to inner): + +1. **CORS** - Handles preflight requests +2. **Security Headers** - Adds security headers +3. **Request Size Limit** - Enforces 10MB limit +4. **Content-Type Validation** - Validates request content type +5. **Rate Limiting** - Enforces rate limits +6. **Error Recovery** - Catches panics +7. **Request ID** - Generates request IDs +8. **Logging** - Logs requests +9. **Audit** - Records audit logs +10. **Authentication** - Validates JWT tokens +11. **Routes** - Handles requests + +## Public Endpoints + +The following endpoints are excluded from certain security checks: + +- `/api/v1/auth/login` - Rate limiting, Content-Type validation +- `/api/v1/auth/logout` - Rate limiting, Content-Type validation +- `/healthz` - Rate limiting, Content-Type validation +- `/metrics` - Rate limiting, Content-Type validation +- `/api/docs` - Rate limiting, Content-Type validation +- `/api/openapi.yaml` - Rate limiting, Content-Type validation + +## Best Practices + +### For API Consumers + +1. **Respect Rate Limits**: Implement exponential backoff when rate limited +2. **Use Authentication**: Authenticated users get better rate limits +3. **Include Content-Type**: Always include `Content-Type: application/json` +4. **Handle Errors**: Check for `429` status and retry after delay +5. **Request Size**: Keep request bodies under 10MB + +### For Administrators + +1. **Monitor Rate Limits**: Check logs for rate limit violations +2. **Adjust Limits**: Modify rate limit values in code if needed +3. **CORS Configuration**: Update allowed origins for production +4. **HTTPS**: Always use HTTPS in production for HSTS +5. **Security Headers**: Review CSP policy for your use case + +## Configuration + +### Rate Limiting + +Rate limits are currently hardcoded but can be configured: + +```go +// In rate_limit.go +rateLimiter := NewRateLimiter(100, time.Minute) // 100 req/min +``` + +### CORS Origins + +Update allowed origins in `security_middleware.go`: + +```go +allowedOrigins := []string{ + "https://yourdomain.com", + "https://app.yourdomain.com", +} +``` + +### Request Size Limit + +Modify in `app.go`: + +```go +a.requestSizeMiddleware(10*1024*1024) // 10MB +``` + +## Error Responses + +### Rate Limit Exceeded + +```json +{ + "code": "SERVICE_UNAVAILABLE", + "message": "rate limit exceeded", + "details": "too many requests, please try again later" +} +``` + +**Status**: `429 Too Many Requests` + +### Request Too Large + +```json +{ + "code": "BAD_REQUEST", + "message": "request body too large" +} +``` + +**Status**: `413 Request Entity Too Large` + +### Invalid Content-Type + +```json +{ + "code": "BAD_REQUEST", + "message": "Content-Type must be application/json" +} +``` + +**Status**: `400 Bad Request` + +## Monitoring + +### Rate Limit Metrics + +Monitor rate limit violations: + +- Check audit logs for rate limit events +- Monitor `429` status codes in access logs +- Track rate limit headers in responses + +### Security Events + +Monitor for security-related events: + +- Invalid Content-Type headers +- Request size violations +- CORS violations (check server logs) +- Authentication failures + +## Future Enhancements + +1. **Configurable Rate Limits**: Environment variable configuration +2. **Per-Endpoint Limits**: Different limits for different endpoints +3. **IP Whitelisting**: Bypass rate limits for trusted IPs +4. **Rate Limit Metrics**: Prometheus metrics for rate limiting +5. **Distributed Rate Limiting**: Redis-based for multi-instance deployments +6. **Advanced CORS**: Configurable CORS via environment variables +7. **Request Timeout**: Configurable request timeout limits diff --git a/docs/BACKUP_RESTORE.md b/docs/BACKUP_RESTORE.md new file mode 100644 index 0000000..d012c9e --- /dev/null +++ b/docs/BACKUP_RESTORE.md @@ -0,0 +1,307 @@ +# Configuration Backup & Restore + +## Overview + +AtlasOS provides comprehensive configuration backup and restore functionality, allowing you to save and restore all system configurations including users, storage services (SMB/NFS/iSCSI), and snapshot policies. + +## Features + +- **Full Configuration Backup**: Backs up all system configurations +- **Compressed Archives**: Backups are stored as gzipped tar archives +- **Metadata Tracking**: Each backup includes metadata (ID, timestamp, description, size) +- **Verification**: Verify backup integrity before restore +- **Dry Run**: Test restore operations without making changes +- **Selective Restore**: Restore specific components or full system + +## Configuration + +Set the backup directory using the `ATLAS_BACKUP_DIR` environment variable: + +```bash +export ATLAS_BACKUP_DIR=/var/lib/atlas/backups +./atlas-api +``` + +If not set, defaults to `data/backups` in the current directory. + +## Backup Contents + +A backup includes: + +- **Users**: All user accounts (passwords cannot be restored - users must reset) +- **SMB Shares**: All SMB/CIFS share configurations +- **NFS Exports**: All NFS export configurations +- **iSCSI Targets**: All iSCSI targets and LUN mappings +- **Snapshot Policies**: All automated snapshot policies +- **System Config**: Database path and other system settings + +## API Endpoints + +### Create Backup + +**POST** `/api/v1/backups` + +Creates a new backup of all system configurations. + +**Request Body:** +```json +{ + "description": "Backup before major changes" +} +``` + +**Response:** +```json +{ + "id": "backup-1703123456", + "created_at": "2024-12-20T10:30:56Z", + "version": "1.0", + "description": "Backup before major changes", + "size": 24576 +} +``` + +**Example:** +```bash +curl -X POST http://localhost:8080/api/v1/backups \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{"description": "Weekly backup"}' +``` + +### List Backups + +**GET** `/api/v1/backups` + +Lists all available backups. + +**Response:** +```json +[ + { + "id": "backup-1703123456", + "created_at": "2024-12-20T10:30:56Z", + "version": "1.0", + "description": "Weekly backup", + "size": 24576 + }, + { + "id": "backup-1703037056", + "created_at": "2024-12-19T10:30:56Z", + "version": "1.0", + "description": "", + "size": 18432 + } +] +``` + +**Example:** +```bash +curl -X GET http://localhost:8080/api/v1/backups \ + -H "Authorization: Bearer " +``` + +### Get Backup Details + +**GET** `/api/v1/backups/{id}` + +Retrieves metadata for a specific backup. + +**Response:** +```json +{ + "id": "backup-1703123456", + "created_at": "2024-12-20T10:30:56Z", + "version": "1.0", + "description": "Weekly backup", + "size": 24576 +} +``` + +**Example:** +```bash +curl -X GET http://localhost:8080/api/v1/backups/backup-1703123456 \ + -H "Authorization: Bearer " +``` + +### Verify Backup + +**GET** `/api/v1/backups/{id}?verify=true` + +Verifies that a backup file is valid and can be restored. + +**Response:** +```json +{ + "message": "backup is valid", + "backup_id": "backup-1703123456", + "metadata": { + "id": "backup-1703123456", + "created_at": "2024-12-20T10:30:56Z", + "version": "1.0", + "description": "Weekly backup", + "size": 24576 + } +} +``` + +**Example:** +```bash +curl -X GET "http://localhost:8080/api/v1/backups/backup-1703123456?verify=true" \ + -H "Authorization: Bearer " +``` + +### Restore Backup + +**POST** `/api/v1/backups/{id}/restore` + +Restores configuration from a backup. + +**Request Body:** +```json +{ + "dry_run": false +} +``` + +**Parameters:** +- `dry_run` (optional): If `true`, shows what would be restored without making changes + +**Response:** +```json +{ + "message": "backup restored successfully", + "backup_id": "backup-1703123456" +} +``` + +**Example:** +```bash +# Dry run (test restore) +curl -X POST http://localhost:8080/api/v1/backups/backup-1703123456/restore \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{"dry_run": true}' + +# Actual restore +curl -X POST http://localhost:8080/api/v1/backups/backup-1703123456/restore \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{"dry_run": false}' +``` + +### Delete Backup + +**DELETE** `/api/v1/backups/{id}` + +Deletes a backup file and its metadata. + +**Response:** +```json +{ + "message": "backup deleted", + "backup_id": "backup-1703123456" +} +``` + +**Example:** +```bash +curl -X DELETE http://localhost:8080/api/v1/backups/backup-1703123456 \ + -H "Authorization: Bearer " +``` + +## Restore Process + +When restoring a backup: + +1. **Verification**: Backup is verified before restore +2. **User Restoration**: + - Users are restored with temporary passwords + - Default admin user (user-1) is skipped + - Users must reset their passwords after restore +3. **Storage Services**: + - SMB shares, NFS exports, and iSCSI targets are restored + - Existing configurations are skipped (not overwritten) + - Service configurations are automatically applied +4. **Snapshot Policies**: + - Policies are restored by dataset + - Existing policies are skipped +5. **Service Application**: + - Samba, NFS, and iSCSI services are reconfigured + - Errors are logged but don't fail the restore + +## Backup File Format + +Backups are stored as gzipped tar archives containing: + +- `metadata.json`: Backup metadata (ID, timestamp, description, etc.) +- `config.json`: All configuration data (users, shares, exports, targets, policies) + +## Best Practices + +1. **Regular Backups**: Create backups before major configuration changes +2. **Verify Before Restore**: Always verify backups before restoring +3. **Test Restores**: Use dry run to test restore operations +4. **Backup Retention**: Keep multiple backups for different time periods +5. **Offsite Storage**: Copy backups to external storage for disaster recovery +6. **Password Management**: Users must reset passwords after restore + +## Limitations + +- **Passwords**: User passwords cannot be restored (security feature) +- **ZFS Data**: Backups only include configuration, not ZFS pool/dataset data +- **Audit Logs**: Audit logs are not included in backups +- **Jobs**: Background jobs are not included in backups + +## Error Handling + +- **Invalid Backup**: Verification fails if backup is corrupted +- **Missing Resources**: Restore skips resources that already exist +- **Service Errors**: Service configuration errors are logged but don't fail restore +- **Partial Restore**: Restore continues even if some components fail + +## Security Considerations + +1. **Backup Storage**: Store backups in secure locations +2. **Access Control**: Backup endpoints require authentication +3. **Password Security**: Passwords are never included in backups +4. **Encryption**: Consider encrypting backups for sensitive environments + +## Example Workflow + +```bash +# 1. Create backup before changes +BACKUP_ID=$(curl -X POST http://localhost:8080/api/v1/backups \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{"description": "Before major changes"}' \ + | jq -r '.id') + +# 2. Verify backup +curl -X GET "http://localhost:8080/api/v1/backups/$BACKUP_ID?verify=true" \ + -H "Authorization: Bearer " + +# 3. Make configuration changes +# ... make changes ... + +# 4. Test restore (dry run) +curl -X POST "http://localhost:8080/api/v1/backups/$BACKUP_ID/restore" \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{"dry_run": true}' + +# 5. Restore if needed +curl -X POST "http://localhost:8080/api/v1/backups/$BACKUP_ID/restore" \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{"dry_run": false}' +``` + +## Future Enhancements + +- **Scheduled Backups**: Automatic backup scheduling +- **Incremental Backups**: Only backup changes since last backup +- **Backup Encryption**: Encrypt backup files +- **Remote Storage**: Support for S3, FTP, etc. +- **Backup Compression**: Additional compression options +- **Selective Restore**: Restore specific components only diff --git a/docs/ERROR_HANDLING.md b/docs/ERROR_HANDLING.md new file mode 100644 index 0000000..e14b6a5 --- /dev/null +++ b/docs/ERROR_HANDLING.md @@ -0,0 +1,242 @@ +# Error Handling & Recovery + +## Overview + +AtlasOS implements comprehensive error handling with structured error responses, graceful degradation, and automatic recovery mechanisms to ensure system reliability and good user experience. + +## Error Types + +### Structured API Errors + +All API errors follow a consistent structure: + +```json +{ + "code": "NOT_FOUND", + "message": "dataset not found", + "details": "tank/missing" +} +``` + +### Error Codes + +- `INTERNAL_ERROR` - Unexpected server errors (500) +- `NOT_FOUND` - Resource not found (404) +- `BAD_REQUEST` - Invalid request parameters (400) +- `CONFLICT` - Resource conflict (409) +- `UNAUTHORIZED` - Authentication required (401) +- `FORBIDDEN` - Insufficient permissions (403) +- `SERVICE_UNAVAILABLE` - Service temporarily unavailable (503) +- `VALIDATION_ERROR` - Input validation failed (400) + +## Error Handling Patterns + +### 1. Structured Error Responses + +All errors use the `errors.APIError` type for consistent formatting: + +```go +if resource == nil { + writeError(w, errors.ErrNotFound("dataset").WithDetails(datasetName)) + return +} +``` + +### 2. Graceful Degradation + +Service operations (SMB/NFS/iSCSI) use graceful degradation: + +- **Desired State Stored**: Configuration is always stored in the store +- **Service Application**: Service configuration is applied asynchronously +- **Non-Blocking**: Service failures don't fail API requests +- **Retry Ready**: Failed operations can be retried later + +Example: +```go +// Store the configuration (always succeeds) +share, err := a.smbStore.Create(...) + +// Apply to service (may fail, but doesn't block) +if err := a.smbService.ApplyConfiguration(shares); err != nil { + // Log but don't fail - desired state is stored + log.Printf("SMB service configuration failed (non-fatal): %v", err) +} +``` + +### 3. Panic Recovery + +All HTTP handlers are wrapped with panic recovery middleware: + +```go +func (a *App) errorMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + defer recoverPanic(w, r) + next.ServeHTTP(w, r) + }) +} +``` + +Panics are caught and converted to proper error responses instead of crashing the server. + +### 4. Atomic Operations with Rollback + +Service configuration operations are atomic with automatic rollback: + +1. **Write to temporary file** (`*.atlas.tmp`) +2. **Backup existing config** (`.backup`) +3. **Atomically replace** config file +4. **Reload service** +5. **On failure**: Automatically restore backup + +Example (SMB): +```go +// Write to temp file +os.WriteFile(tmpPath, config, 0644) + +// Backup existing +cp config.conf config.conf.backup + +// Atomic replace +os.Rename(tmpPath, configPath) + +// Reload service +if err := reloadService(); err != nil { + // Restore backup automatically + os.Rename(backupPath, configPath) + return err +} +``` + +## Retry Mechanisms + +### Retry Configuration + +The `errors.Retry` function provides configurable retry logic: + +```go +config := errors.DefaultRetryConfig() // 3 attempts with exponential backoff +err := errors.Retry(func() error { + return serviceOperation() +}, config) +``` + +### Default Retry Behavior + +- **Max Attempts**: 3 +- **Backoff**: Exponential (100ms, 200ms, 400ms) +- **Use Case**: Transient failures (network, temporary service unavailability) + +## Error Recovery + +### Service Configuration Recovery + +When service configuration fails: + +1. **Configuration is stored** (desired state preserved) +2. **Error is logged** (for debugging) +3. **Operation continues** (API request succeeds) +4. **Manual retry available** (via API or automatic retry later) + +### Database Recovery + +- **Connection failures**: Logged and retried +- **Transaction failures**: Rolled back automatically +- **Schema errors**: Detected during migration + +### ZFS Operation Recovery + +- **Command failures**: Returned as errors to caller +- **Partial failures**: State is preserved, operation can be retried +- **Validation**: Performed before destructive operations + +## Error Logging + +All errors are logged with context: + +```go +log.Printf("create SMB share error: %v", err) +log.Printf("%s service error: %v", serviceName, err) +``` + +Error logs include: +- Error message +- Operation context +- Resource identifiers +- Timestamp (via standard log) + +## Best Practices + +### 1. Always Use Structured Errors + +```go +// Good +writeError(w, errors.ErrNotFound("pool").WithDetails(poolName)) + +// Avoid +writeJSON(w, http.StatusNotFound, map[string]string{"error": "not found"}) +``` + +### 2. Handle Service Errors Gracefully + +```go +// Good - graceful degradation +if err := service.Apply(); err != nil { + log.Printf("service error (non-fatal): %v", err) + // Continue - desired state is stored +} + +// Avoid - failing the request +if err := service.Apply(); err != nil { + return err // Don't fail the whole request +} +``` + +### 3. Validate Before Operations + +```go +// Good - validate first +if !datasetExists { + writeError(w, errors.ErrNotFound("dataset")) + return +} +// Then perform operation +``` + +### 4. Use Context for Error Details + +```go +// Good - include context +writeError(w, errors.ErrInternal("failed to create pool").WithDetails(err.Error())) + +// Avoid - generic errors +writeError(w, errors.ErrInternal("error")) +``` + +## Error Response Format + +All error responses follow this structure: + +```json +{ + "code": "ERROR_CODE", + "message": "Human-readable error message", + "details": "Additional context (optional)" +} +``` + +HTTP status codes match error types: +- `400` - Bad Request / Validation Error +- `401` - Unauthorized +- `403` - Forbidden +- `404` - Not Found +- `409` - Conflict +- `500` - Internal Error +- `503` - Service Unavailable + +## Future Enhancements + +1. **Error Tracking**: Centralized error tracking and alerting +2. **Automatic Retry Queue**: Background retry for failed operations +3. **Error Metrics**: Track error rates by type and endpoint +4. **User-Friendly Messages**: More descriptive error messages +5. **Error Correlation**: Link related errors for debugging diff --git a/docs/LOGGING_DIAGNOSTICS.md b/docs/LOGGING_DIAGNOSTICS.md new file mode 100644 index 0000000..7a26769 --- /dev/null +++ b/docs/LOGGING_DIAGNOSTICS.md @@ -0,0 +1,366 @@ +# Logging & Diagnostics + +## Overview + +AtlasOS provides comprehensive logging and diagnostic capabilities to help monitor system health, troubleshoot issues, and understand system behavior. + +## Structured Logging + +### Logger Package + +The `internal/logger` package provides structured logging with: + +- **Log Levels**: DEBUG, INFO, WARN, ERROR +- **JSON Mode**: Optional JSON-formatted output +- **Structured Fields**: Key-value pairs for context +- **Thread-Safe**: Safe for concurrent use + +### Configuration + +Configure logging via environment variables: + +```bash +# Log level (DEBUG, INFO, WARN, ERROR) +export ATLAS_LOG_LEVEL=INFO + +# Log format (json or text) +export ATLAS_LOG_FORMAT=json +``` + +### Usage + +```go +import "gitea.avt.data-center.id/othman.suseno/atlas/internal/logger" + +// Simple logging +logger.Info("User logged in") +logger.Error("Failed to create pool", err) + +// With fields +logger.Info("Pool created", map[string]interface{}{ + "pool": "tank", + "size": "10TB", +}) +``` + +### Log Levels + +- **DEBUG**: Detailed information for debugging +- **INFO**: General informational messages +- **WARN**: Warning messages for potential issues +- **ERROR**: Error messages for failures + +## Request Logging + +### Access Logs + +All HTTP requests are logged with: + +- **Timestamp**: Request time +- **Method**: HTTP method (GET, POST, etc.) +- **Path**: Request path +- **Status**: HTTP status code +- **Duration**: Request processing time +- **Request ID**: Unique request identifier +- **Remote Address**: Client IP address + +**Example Log Entry:** +``` +2024-12-20T10:30:56Z [INFO] 192.168.1.100 GET /api/v1/pools status=200 rid=abc123 dur=45ms +``` + +### Request ID + +Every request gets a unique request ID: + +- **Header**: `X-Request-Id` +- **Usage**: Track requests across services +- **Format**: 32-character hex string + +## Diagnostic Endpoints + +### System Information + +**GET** `/api/v1/system/info` + +Returns comprehensive system information: + +```json +{ + "version": "v0.1.0-dev", + "uptime": "3600 seconds", + "go_version": "go1.21.0", + "num_goroutines": 15, + "memory": { + "alloc": 1048576, + "total_alloc": 52428800, + "sys": 2097152, + "num_gc": 5 + }, + "services": { + "smb": { + "status": "running", + "last_check": "2024-12-20T10:30:56Z" + }, + "nfs": { + "status": "running", + "last_check": "2024-12-20T10:30:56Z" + }, + "iscsi": { + "status": "stopped", + "last_check": "2024-12-20T10:30:56Z" + } + }, + "database": { + "connected": true, + "path": "/var/lib/atlas/atlas.db" + } +} +``` + +### Health Check + +**GET** `/health` + +Detailed health check with component status: + +```json +{ + "status": "healthy", + "timestamp": "2024-12-20T10:30:56Z", + "checks": { + "zfs": "healthy", + "database": "healthy", + "smb": "healthy", + "nfs": "healthy", + "iscsi": "stopped" + } +} +``` + +**Status Values:** +- `healthy`: Component is working correctly +- `degraded`: Some components have issues but system is operational +- `unhealthy`: Critical components are failing + +**HTTP Status Codes:** +- `200 OK`: System is healthy or degraded +- `503 Service Unavailable`: System is unhealthy + +### System Logs + +**GET** `/api/v1/system/logs?limit=100` + +Returns recent system logs (from audit logs): + +```json +{ + "logs": [ + { + "timestamp": "2024-12-20T10:30:56Z", + "level": "INFO", + "actor": "user-1", + "action": "pool.create", + "resource": "pool:tank", + "result": "success", + "ip": "192.168.1.100" + } + ], + "count": 1 +} +``` + +**Query Parameters:** +- `limit`: Maximum number of logs to return (default: 100, max: 1000) + +### Garbage Collection + +**POST** `/api/v1/system/gc` + +Triggers garbage collection and returns memory statistics: + +```json +{ + "before": { + "alloc": 1048576, + "total_alloc": 52428800, + "sys": 2097152, + "num_gc": 5 + }, + "after": { + "alloc": 512000, + "total_alloc": 52428800, + "sys": 2097152, + "num_gc": 6 + }, + "freed": 536576 +} +``` + +## Audit Logging + +Audit logs track all mutating operations: + +- **Actor**: User ID or "system" +- **Action**: Operation type (e.g., "pool.create") +- **Resource**: Resource identifier +- **Result**: "success" or "failure" +- **IP**: Client IP address +- **User Agent**: Client user agent +- **Timestamp**: Operation time + +See [Audit Logging Documentation](./AUDIT_LOGGING.md) for details. + +## Log Rotation + +### Current Implementation + +- **In-Memory**: Audit logs stored in memory +- **Rotation**: Automatic rotation when max logs reached +- **Limit**: Configurable (default: 10,000 logs) + +### Future Enhancements + +- **File Logging**: Write logs to files +- **Automatic Rotation**: Rotate log files by size/age +- **Compression**: Compress old log files +- **Retention**: Configurable retention policies + +## Best Practices + +### 1. Use Appropriate Log Levels + +```go +// Debug - detailed information +logger.Debug("Processing request", map[string]interface{}{ + "request_id": reqID, + "user": userID, +}) + +// Info - important events +logger.Info("User logged in", map[string]interface{}{ + "user": userID, +}) + +// Warn - potential issues +logger.Warn("High memory usage", map[string]interface{}{ + "usage": "85%", +}) + +// Error - failures +logger.Error("Failed to create pool", err, map[string]interface{}{ + "pool": poolName, +}) +``` + +### 2. Include Context + +Always include relevant context in logs: + +```go +// Good +logger.Info("Pool created", map[string]interface{}{ + "pool": poolName, + "size": poolSize, + "user": userID, +}) + +// Avoid +logger.Info("Pool created") +``` + +### 3. Use Request IDs + +Include request IDs in logs for tracing: + +```go +reqID := r.Context().Value(requestIDKey).(string) +logger.Info("Processing request", map[string]interface{}{ + "request_id": reqID, +}) +``` + +### 4. Monitor Health Endpoints + +Regularly check health endpoints: + +```bash +# Simple health check +curl http://localhost:8080/healthz + +# Detailed health check +curl http://localhost:8080/health + +# System information +curl http://localhost:8080/api/v1/system/info +``` + +## Monitoring + +### Key Metrics + +Monitor these metrics for system health: + +- **Request Duration**: Track in access logs +- **Error Rate**: Count of error responses +- **Memory Usage**: Check via `/api/v1/system/info` +- **Goroutine Count**: Monitor for leaks +- **Service Status**: Check service health + +### Alerting + +Set up alerts for: + +- **Unhealthy Status**: System health check fails +- **High Error Rate**: Too many error responses +- **Memory Leaks**: Continuously increasing memory +- **Service Failures**: Services not running + +## Troubleshooting + +### Check System Health + +```bash +curl http://localhost:8080/health +``` + +### View System Information + +```bash +curl http://localhost:8080/api/v1/system/info +``` + +### Check Recent Logs + +```bash +curl http://localhost:8080/api/v1/system/logs?limit=50 +``` + +### Trigger GC + +```bash +curl -X POST http://localhost:8080/api/v1/system/gc +``` + +### View Request Logs + +Check application logs for request details: + +```bash +# If logging to stdout +./atlas-api | grep "GET /api/v1/pools" + +# If logging to file +tail -f /var/log/atlas-api.log | grep "status=500" +``` + +## Future Enhancements + +1. **File Logging**: Write logs to files with rotation +2. **Log Aggregation**: Support for centralized logging (ELK, Loki) +3. **Structured Logging**: Full JSON logging support +4. **Log Levels per Component**: Different levels for different components +5. **Performance Logging**: Detailed performance metrics +6. **Distributed Tracing**: Request tracing across services +7. **Log Filtering**: Filter logs by level, component, etc. +8. **Real-time Log Streaming**: Stream logs via WebSocket diff --git a/docs/VALIDATION.md b/docs/VALIDATION.md new file mode 100644 index 0000000..5763bf2 --- /dev/null +++ b/docs/VALIDATION.md @@ -0,0 +1,232 @@ +# Input Validation & Sanitization + +## Overview + +AtlasOS implements comprehensive input validation and sanitization to ensure data integrity, security, and prevent injection attacks. All user inputs are validated before processing. + +## Validation Rules + +### ZFS Names (Pools, Datasets, ZVOLs, Snapshots) + +**Rules:** +- Must start with alphanumeric character +- Can contain: `a-z`, `A-Z`, `0-9`, `_`, `-`, `.`, `:` +- Cannot start with `-` or `.` +- Maximum length: 256 characters +- Cannot be empty + +**Example:** +```go +if err := validation.ValidateZFSName("tank/data"); err != nil { + // Handle error +} +``` + +### Usernames + +**Rules:** +- Minimum length: 3 characters +- Maximum length: 32 characters +- Can contain: `a-z`, `A-Z`, `0-9`, `_`, `-`, `.` +- Must start with alphanumeric character + +**Example:** +```go +if err := validation.ValidateUsername("admin"); err != nil { + // Handle error +} +``` + +### Passwords + +**Rules:** +- Minimum length: 8 characters +- Maximum length: 128 characters +- Must contain at least one letter +- Must contain at least one number + +**Example:** +```go +if err := validation.ValidatePassword("SecurePass123"); err != nil { + // Handle error +} +``` + +### Email Addresses + +**Rules:** +- Optional field (can be empty) +- Maximum length: 254 characters +- Must match email format pattern +- Basic format validation (RFC 5322 simplified) + +**Example:** +```go +if err := validation.ValidateEmail("user@example.com"); err != nil { + // Handle error +} +``` + +### SMB Share Names + +**Rules:** +- Maximum length: 80 characters +- Can contain: `a-z`, `A-Z`, `0-9`, `_`, `-`, `.` +- Cannot be reserved Windows names (CON, PRN, AUX, NUL, COM1-9, LPT1-9) +- Must start with alphanumeric character + +**Example:** +```go +if err := validation.ValidateShareName("data-share"); err != nil { + // Handle error +} +``` + +### iSCSI IQN (Qualified Name) + +**Rules:** +- Must start with `iqn.` +- Format: `iqn.yyyy-mm.reversed.domain:identifier` +- Maximum length: 223 characters +- Year-month format validation + +**Example:** +```go +if err := validation.ValidateIQN("iqn.2024-12.com.atlas:storage.target1"); err != nil { + // Handle error +} +``` + +### Size Strings + +**Rules:** +- Format: number followed by optional unit (K, M, G, T, P) +- Units: K (kilobytes), M (megabytes), G (gigabytes), T (terabytes), P (petabytes) +- Case insensitive + +**Examples:** +- `"10"` - 10 bytes +- `"10K"` - 10 kilobytes +- `"1G"` - 1 gigabyte +- `"2T"` - 2 terabytes + +**Example:** +```go +if err := validation.ValidateSize("10G"); err != nil { + // Handle error +} +``` + +### Filesystem Paths + +**Rules:** +- Must be absolute (start with `/`) +- Maximum length: 4096 characters +- Cannot contain `..` (path traversal) +- Cannot contain `//` (double slashes) +- Cannot contain null bytes + +**Example:** +```go +if err := validation.ValidatePath("/tank/data"); err != nil { + // Handle error +} +``` + +### CIDR/Hostname (NFS Clients) + +**Rules:** +- Can be wildcard: `*` +- Can be CIDR notation: `192.168.1.0/24` +- Can be hostname: `server.example.com` +- Hostname must follow DNS rules + +**Example:** +```go +if err := validation.ValidateCIDR("192.168.1.0/24"); err != nil { + // Handle error +} +``` + +## Sanitization + +### String Sanitization + +Removes potentially dangerous characters: +- Null bytes (`\x00`) +- Control characters (ASCII < 32, except space) +- Removes leading/trailing whitespace + +**Example:** +```go +clean := validation.SanitizeString(userInput) +``` + +### Path Sanitization + +Normalizes filesystem paths: +- Removes leading/trailing whitespace +- Normalizes slashes (backslash to forward slash) +- Removes multiple consecutive slashes + +**Example:** +```go +cleanPath := validation.SanitizePath("/tank//data/") +// Result: "/tank/data" +``` + +## Integration + +### In API Handlers + +Validation is integrated into all create/update handlers: + +```go +func (a *App) handleCreatePool(w http.ResponseWriter, r *http.Request) { + // ... decode request ... + + // Validate pool name + if err := validation.ValidateZFSName(req.Name); err != nil { + writeError(w, errors.ErrValidation(err.Error())) + return + } + + // ... continue with creation ... +} +``` + +### Error Responses + +Validation errors return structured error responses: + +```json +{ + "code": "VALIDATION_ERROR", + "message": "validation error on field 'name': name cannot be empty", + "details": "" +} +``` + +## Security Benefits + +1. **Injection Prevention**: Validates inputs prevent command injection +2. **Path Traversal Protection**: Path validation prevents directory traversal +3. **Data Integrity**: Ensures data conforms to expected formats +4. **System Stability**: Prevents invalid operations that could crash services +5. **User Experience**: Clear error messages guide users to correct input + +## Best Practices + +1. **Validate Early**: Validate inputs as soon as they're received +2. **Sanitize Before Storage**: Sanitize strings before storing in database +3. **Validate Format**: Check format before parsing (e.g., size strings) +4. **Check Length**: Enforce maximum lengths to prevent DoS +5. **Whitelist Characters**: Only allow known-safe characters + +## Future Enhancements + +1. **Custom Validators**: Domain-specific validation rules +2. **Validation Middleware**: Automatic validation for all endpoints +3. **Schema Validation**: JSON schema validation +4. **Rate Limiting**: Prevent abuse through validation +5. **Input Normalization**: Automatic normalization of valid inputs diff --git a/docs/openapi.yaml b/docs/openapi.yaml new file mode 100644 index 0000000..328ccc2 --- /dev/null +++ b/docs/openapi.yaml @@ -0,0 +1,1866 @@ +openapi: 3.0.3 +info: + title: atlasOS Storage Controller API + description: | + REST API for managing ZFS storage, storage services (SMB/NFS/iSCSI), snapshots, and system configuration. + + ## Authentication + Most endpoints require authentication via JWT token. Include the token in the Authorization header: + ``` + Authorization: Bearer + ``` + + ## Roles + - **Administrator**: Full system access + - **Operator**: Storage and service operations + - **Viewer**: Read-only access + + version: 1.0.0 + contact: + name: atlasOS Support + url: https://github.com/atlasos + +servers: + - url: http://localhost:8080 + description: Local development server + - url: https://atlas.example.com + description: Production server + +tags: + - name: Authentication + description: User authentication and authorization + - name: Users + description: User management (Administrator only) + - name: ZFS + description: ZFS pool, dataset, and ZVOL management + - name: Snapshots + description: Snapshot management and policies + - name: Storage Services + description: SMB, NFS, and iSCSI service management + - name: Jobs + description: Background job management + - name: Audit + description: Audit log access + - name: System + description: System health and metrics + +paths: + /api/v1/auth/login: + post: + tags: + - Authentication + summary: Authenticate user + description: Login with username and password to receive JWT token + operationId: login + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - username + - password + properties: + username: + type: string + example: admin + password: + type: string + format: password + example: admin + responses: + '200': + description: Login successful + content: + application/json: + schema: + type: object + properties: + token: + type: string + description: JWT token for authentication + user: + $ref: '#/components/schemas/User' + expires_in: + type: integer + description: Token expiration in seconds + example: 86400 + '401': + $ref: '#/components/responses/Unauthorized' + '400': + $ref: '#/components/responses/BadRequest' + + /api/v1/auth/logout: + post: + tags: + - Authentication + summary: Logout user + description: Logout (client-side token removal for stateless JWT) + operationId: logout + security: + - bearerAuth: [] + responses: + '200': + description: Logout successful + content: + application/json: + schema: + type: object + properties: + message: + type: string + example: logged out + + /api/v1/users: + get: + tags: + - Users + summary: List users + description: List all users (requires authentication) + operationId: listUsers + security: + - bearerAuth: [] + responses: + '200': + description: List of users + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/User' + post: + tags: + - Users + summary: Create user + description: Create a new user (Administrator only) + operationId: createUser + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - username + - password + properties: + username: + type: string + email: + type: string + format: email + password: + type: string + format: password + role: + $ref: '#/components/schemas/Role' + responses: + '201': + description: User created + content: + application/json: + schema: + $ref: '#/components/schemas/User' + '400': + $ref: '#/components/responses/BadRequest' + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '409': + $ref: '#/components/responses/Conflict' + + /api/v1/users/{id}: + get: + tags: + - Users + summary: Get user + operationId: getUser + security: + - bearerAuth: [] + parameters: + - $ref: '#/components/parameters/UserId' + responses: + '200': + description: User details + content: + application/json: + schema: + $ref: '#/components/schemas/User' + '404': + $ref: '#/components/responses/NotFound' + put: + tags: + - Users + summary: Update user + description: Update user (Administrator only) + operationId: updateUser + security: + - bearerAuth: [] + parameters: + - $ref: '#/components/parameters/UserId' + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + email: + type: string + format: email + role: + $ref: '#/components/schemas/Role' + active: + type: boolean + responses: + '200': + description: User updated + content: + application/json: + schema: + $ref: '#/components/schemas/User' + '404': + $ref: '#/components/responses/NotFound' + delete: + tags: + - Users + summary: Delete user + description: Delete user (Administrator only, cannot delete yourself) + operationId: deleteUser + security: + - bearerAuth: [] + parameters: + - $ref: '#/components/parameters/UserId' + responses: + '200': + description: User deleted + '400': + $ref: '#/components/responses/BadRequest' + '404': + $ref: '#/components/responses/NotFound' + + /api/v1/pools: + get: + tags: + - ZFS + summary: List ZFS pools + operationId: listPools + security: + - bearerAuth: [] + responses: + '200': + description: List of ZFS pools + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Pool' + post: + tags: + - ZFS + summary: Create ZFS pool + operationId: createPool + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - name + - vdevs + properties: + name: + type: string + example: tank + vdevs: + type: array + items: + type: string + example: ["/dev/sdb", "/dev/sdc"] + options: + type: object + additionalProperties: + type: string + responses: + '201': + description: Pool created + content: + application/json: + schema: + $ref: '#/components/schemas/Pool' + + /api/v1/pools/{name}: + get: + tags: + - ZFS + summary: Get pool details + operationId: getPool + security: + - bearerAuth: [] + parameters: + - name: name + in: path + required: true + schema: + type: string + responses: + '200': + description: Pool details + content: + application/json: + schema: + $ref: '#/components/schemas/Pool' + '404': + $ref: '#/components/responses/NotFound' + delete: + tags: + - ZFS + summary: Delete pool + operationId: deletePool + security: + - bearerAuth: [] + parameters: + - name: name + in: path + required: true + schema: + type: string + responses: + '200': + description: Pool deleted + '404': + $ref: '#/components/responses/NotFound' + + /api/v1/pools/{name}/scrub: + post: + tags: + - ZFS + summary: Scrub pool + operationId: scrubPool + security: + - bearerAuth: [] + parameters: + - name: name + in: path + required: true + schema: + type: string + responses: + '200': + description: Scrub started + content: + application/json: + schema: + type: object + properties: + message: + type: string + example: scrub started + + /api/v1/datasets: + get: + tags: + - ZFS + summary: List datasets + operationId: listDatasets + security: + - bearerAuth: [] + parameters: + - name: pool + in: query + schema: + type: string + description: Filter by pool name + responses: + '200': + description: List of datasets + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Dataset' + post: + tags: + - ZFS + summary: Create dataset + operationId: createDataset + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - name + properties: + name: + type: string + example: tank/data + options: + type: object + additionalProperties: + type: string + responses: + '201': + description: Dataset created + content: + application/json: + schema: + $ref: '#/components/schemas/Dataset' + + /api/v1/datasets/{name}: + get: + tags: + - ZFS + summary: Get dataset + operationId: getDataset + security: + - bearerAuth: [] + parameters: + - name: name + in: path + required: true + schema: + type: string + responses: + '200': + description: Dataset details + content: + application/json: + schema: + $ref: '#/components/schemas/Dataset' + '404': + $ref: '#/components/responses/NotFound' + put: + tags: + - ZFS + summary: Update dataset + operationId: updateDataset + security: + - bearerAuth: [] + parameters: + - name: name + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + options: + type: object + additionalProperties: + type: string + responses: + '200': + description: Dataset updated + content: + application/json: + schema: + $ref: '#/components/schemas/Dataset' + delete: + tags: + - ZFS + summary: Delete dataset + operationId: deleteDataset + security: + - bearerAuth: [] + parameters: + - name: name + in: path + required: true + schema: + type: string + responses: + '200': + description: Dataset deleted + '404': + $ref: '#/components/responses/NotFound' + + /api/v1/zvols: + get: + tags: + - ZFS + summary: List ZVOLs + operationId: listZVOLs + security: + - bearerAuth: [] + parameters: + - name: pool + in: query + schema: + type: string + description: Filter by pool name + responses: + '200': + description: List of ZVOLs + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/ZVOL' + post: + tags: + - ZFS + summary: Create ZVOL + operationId: createZVOL + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - name + - size + properties: + name: + type: string + example: tank/block/vol1 + size: + type: string + description: Size in human-readable format (e.g., "10G", "1T") + example: 10G + options: + type: object + additionalProperties: + type: string + responses: + '201': + description: ZVOL created + content: + application/json: + schema: + $ref: '#/components/schemas/ZVOL' + + /api/v1/zvols/{name}: + get: + tags: + - ZFS + summary: Get ZVOL + operationId: getZVOL + security: + - bearerAuth: [] + parameters: + - name: name + in: path + required: true + schema: + type: string + responses: + '200': + description: ZVOL details + content: + application/json: + schema: + $ref: '#/components/schemas/ZVOL' + '404': + $ref: '#/components/responses/NotFound' + delete: + tags: + - ZFS + summary: Delete ZVOL + operationId: deleteZVOL + security: + - bearerAuth: [] + parameters: + - name: name + in: path + required: true + schema: + type: string + responses: + '200': + description: ZVOL deleted + '404': + $ref: '#/components/responses/NotFound' + + /api/v1/snapshots: + get: + tags: + - Snapshots + summary: List snapshots + operationId: listSnapshots + security: + - bearerAuth: [] + parameters: + - name: dataset + in: query + schema: + type: string + description: Filter by dataset name + responses: + '200': + description: List of snapshots + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Snapshot' + post: + tags: + - Snapshots + summary: Create snapshot + operationId: createSnapshot + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - dataset + - name + properties: + dataset: + type: string + example: tank/data + name: + type: string + example: manual-20241215 + responses: + '201': + description: Snapshot created + content: + application/json: + schema: + $ref: '#/components/schemas/Snapshot' + + /api/v1/snapshots/{name}: + get: + tags: + - Snapshots + summary: Get snapshot + operationId: getSnapshot + security: + - bearerAuth: [] + parameters: + - name: name + in: path + required: true + schema: + type: string + description: Full snapshot name (dataset@snapshot) + responses: + '200': + description: Snapshot details + content: + application/json: + schema: + $ref: '#/components/schemas/Snapshot' + '404': + $ref: '#/components/responses/NotFound' + delete: + tags: + - Snapshots + summary: Delete snapshot + operationId: deleteSnapshot + security: + - bearerAuth: [] + parameters: + - name: name + in: path + required: true + schema: + type: string + responses: + '200': + description: Snapshot deleted + '404': + $ref: '#/components/responses/NotFound' + + /api/v1/snapshot-policies: + get: + tags: + - Snapshots + summary: List snapshot policies + operationId: listSnapshotPolicies + security: + - bearerAuth: [] + responses: + '200': + description: List of snapshot policies + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/SnapshotPolicy' + post: + tags: + - Snapshots + summary: Create snapshot policy + operationId: createSnapshotPolicy + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/SnapshotPolicy' + responses: + '201': + description: Policy created + content: + application/json: + schema: + $ref: '#/components/schemas/SnapshotPolicy' + + /api/v1/snapshot-policies/{dataset}: + get: + tags: + - Snapshots + summary: Get snapshot policy + operationId: getSnapshotPolicy + security: + - bearerAuth: [] + parameters: + - name: dataset + in: path + required: true + schema: + type: string + responses: + '200': + description: Policy details + content: + application/json: + schema: + $ref: '#/components/schemas/SnapshotPolicy' + '404': + $ref: '#/components/responses/NotFound' + put: + tags: + - Snapshots + summary: Update snapshot policy + operationId: updateSnapshotPolicy + security: + - bearerAuth: [] + parameters: + - name: dataset + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/SnapshotPolicy' + responses: + '200': + description: Policy updated + content: + application/json: + schema: + $ref: '#/components/schemas/SnapshotPolicy' + delete: + tags: + - Snapshots + summary: Delete snapshot policy + operationId: deleteSnapshotPolicy + security: + - bearerAuth: [] + parameters: + - name: dataset + in: path + required: true + schema: + type: string + responses: + '200': + description: Policy deleted + '404': + $ref: '#/components/responses/NotFound' + + /api/v1/shares/smb: + get: + tags: + - Storage Services + summary: List SMB shares + operationId: listSMBShares + security: + - bearerAuth: [] + responses: + '200': + description: List of SMB shares + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/SMBShare' + post: + tags: + - Storage Services + summary: Create SMB share + operationId: createSMBShare + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - name + - dataset + properties: + name: + type: string + example: data + path: + type: string + description: Mount point path (auto-filled from dataset if not provided) + dataset: + type: string + example: tank/data + description: + type: string + read_only: + type: boolean + default: false + guest_ok: + type: boolean + default: false + valid_users: + type: array + items: + type: string + responses: + '201': + description: Share created + content: + application/json: + schema: + $ref: '#/components/schemas/SMBShare' + + /api/v1/shares/smb/{id}: + get: + tags: + - Storage Services + summary: Get SMB share + operationId: getSMBShare + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + responses: + '200': + description: Share details + content: + application/json: + schema: + $ref: '#/components/schemas/SMBShare' + '404': + $ref: '#/components/responses/NotFound' + put: + tags: + - Storage Services + summary: Update SMB share + operationId: updateSMBShare + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + description: + type: string + read_only: + type: boolean + guest_ok: + type: boolean + enabled: + type: boolean + valid_users: + type: array + items: + type: string + responses: + '200': + description: Share updated + content: + application/json: + schema: + $ref: '#/components/schemas/SMBShare' + delete: + tags: + - Storage Services + summary: Delete SMB share + operationId: deleteSMBShare + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + responses: + '200': + description: Share deleted + '404': + $ref: '#/components/responses/NotFound' + + /api/v1/exports/nfs: + get: + tags: + - Storage Services + summary: List NFS exports + operationId: listNFSExports + security: + - bearerAuth: [] + responses: + '200': + description: List of NFS exports + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/NFSExport' + post: + tags: + - Storage Services + summary: Create NFS export + operationId: createNFSExport + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - dataset + properties: + path: + type: string + description: Mount point path (auto-filled from dataset if not provided) + dataset: + type: string + example: tank/data + clients: + type: array + items: + type: string + description: Allowed clients (CIDR or hostnames), default ["*"] + example: ["192.168.1.0/24"] + read_only: + type: boolean + default: false + root_squash: + type: boolean + default: true + responses: + '201': + description: Export created + content: + application/json: + schema: + $ref: '#/components/schemas/NFSExport' + + /api/v1/exports/nfs/{id}: + get: + tags: + - Storage Services + summary: Get NFS export + operationId: getNFSExport + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + responses: + '200': + description: Export details + content: + application/json: + schema: + $ref: '#/components/schemas/NFSExport' + '404': + $ref: '#/components/responses/NotFound' + put: + tags: + - Storage Services + summary: Update NFS export + operationId: updateNFSExport + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + clients: + type: array + items: + type: string + read_only: + type: boolean + root_squash: + type: boolean + enabled: + type: boolean + responses: + '200': + description: Export updated + content: + application/json: + schema: + $ref: '#/components/schemas/NFSExport' + delete: + tags: + - Storage Services + summary: Delete NFS export + operationId: deleteNFSExport + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + responses: + '200': + description: Export deleted + '404': + $ref: '#/components/responses/NotFound' + + /api/v1/iscsi/targets: + get: + tags: + - Storage Services + summary: List iSCSI targets + operationId: listISCSITargets + security: + - bearerAuth: [] + responses: + '200': + description: List of iSCSI targets + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/ISCSITarget' + post: + tags: + - Storage Services + summary: Create iSCSI target + operationId: createISCSITarget + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - iqn + properties: + iqn: + type: string + description: iSCSI Qualified Name + example: iqn.2024-12.com.atlas:storage.target1 + initiators: + type: array + items: + type: string + description: Allowed initiator IQNs + responses: + '201': + description: Target created + content: + application/json: + schema: + $ref: '#/components/schemas/ISCSITarget' + + /api/v1/iscsi/targets/{id}: + get: + tags: + - Storage Services + summary: Get iSCSI target + operationId: getISCSITarget + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + responses: + '200': + description: Target details + content: + application/json: + schema: + $ref: '#/components/schemas/ISCSITarget' + '404': + $ref: '#/components/responses/NotFound' + put: + tags: + - Storage Services + summary: Update iSCSI target + operationId: updateISCSITarget + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + initiators: + type: array + items: + type: string + enabled: + type: boolean + responses: + '200': + description: Target updated + content: + application/json: + schema: + $ref: '#/components/schemas/ISCSITarget' + delete: + tags: + - Storage Services + summary: Delete iSCSI target + operationId: deleteISCSITarget + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + responses: + '200': + description: Target deleted + '404': + $ref: '#/components/responses/NotFound' + + /api/v1/iscsi/targets/{id}/luns: + post: + tags: + - Storage Services + summary: Add LUN to target + operationId: addLUN + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - zvol + properties: + zvol: + type: string + example: tank/block/vol1 + responses: + '201': + description: LUN added + content: + application/json: + schema: + $ref: '#/components/schemas/LUN' + '404': + $ref: '#/components/responses/NotFound' + '409': + $ref: '#/components/responses/Conflict' + + /api/v1/iscsi/targets/{id}/luns/remove: + post: + tags: + - Storage Services + summary: Remove LUN from target + operationId: removeLUN + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - lun_id + properties: + lun_id: + type: integer + example: 0 + responses: + '200': + description: LUN removed + '404': + $ref: '#/components/responses/NotFound' + + /api/v1/jobs: + get: + tags: + - Jobs + summary: List jobs + operationId: listJobs + security: + - bearerAuth: [] + parameters: + - name: status + in: query + schema: + $ref: '#/components/schemas/JobStatus' + description: Filter by job status + responses: + '200': + description: List of jobs + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Job' + + /api/v1/jobs/{id}: + get: + tags: + - Jobs + summary: Get job + operationId: getJob + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + responses: + '200': + description: Job details + content: + application/json: + schema: + $ref: '#/components/schemas/Job' + '404': + $ref: '#/components/responses/NotFound' + + /api/v1/jobs/{id}/cancel: + post: + tags: + - Jobs + summary: Cancel job + operationId: cancelJob + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + responses: + '200': + description: Job cancelled + '404': + $ref: '#/components/responses/NotFound' + + /api/v1/audit: + get: + tags: + - Audit + summary: List audit logs + operationId: listAuditLogs + security: + - bearerAuth: [] + parameters: + - name: actor + in: query + schema: + type: string + description: Filter by actor (user ID) + - name: action + in: query + schema: + type: string + description: Filter by action (e.g., "pools.create") + - name: resource + in: query + schema: + type: string + description: Filter by resource + - name: limit + in: query + schema: + type: integer + default: 100 + description: Limit number of results + responses: + '200': + description: List of audit logs + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/AuditLog' + + /api/v1/dashboard: + get: + tags: + - System + summary: Get dashboard data + operationId: getDashboard + security: + - bearerAuth: [] + responses: + '200': + description: Dashboard statistics + content: + application/json: + schema: + type: object + properties: + storage: + type: object + properties: + total_capacity: + type: integer + format: int64 + pool_count: + type: integer + dataset_count: + type: integer + zvol_count: + type: integer + snapshot_count: + type: integer + services: + type: object + properties: + smb_shares: + type: integer + nfs_exports: + type: integer + iscsi_targets: + type: integer + smb_status: + type: boolean + nfs_status: + type: boolean + iscsi_status: + type: boolean + jobs: + type: object + properties: + total: + type: integer + running: + type: integer + completed: + type: integer + failed: + type: integer + + /healthz: + get: + tags: + - System + summary: Health check + operationId: healthz + responses: + '200': + description: Service is healthy + content: + application/json: + schema: + type: object + properties: + status: + type: string + example: ok + ts: + type: string + description: Request ID for correlation + + /metrics: + get: + tags: + - System + summary: Prometheus metrics + operationId: metrics + description: Returns metrics in Prometheus format + responses: + '200': + description: Prometheus metrics + content: + text/plain: + schema: + type: string + +components: + securitySchemes: + bearerAuth: + type: http + scheme: bearer + bearerFormat: JWT + description: JWT token obtained from /api/v1/auth/login + + parameters: + UserId: + name: id + in: path + required: true + schema: + type: string + description: User ID + + schemas: + User: + type: object + properties: + id: + type: string + example: user-1 + username: + type: string + example: admin + email: + type: string + format: email + example: admin@example.com + role: + $ref: '#/components/schemas/Role' + active: + type: boolean + created_at: + type: string + format: date-time + updated_at: + type: string + format: date-time + + Role: + type: string + enum: + - administrator + - operator + - viewer + example: administrator + + Pool: + type: object + properties: + name: + type: string + example: tank + status: + type: string + enum: [ONLINE, DEGRADED, FAULTED, OFFLINE] + example: ONLINE + size: + type: integer + format: int64 + description: Total size in bytes + allocated: + type: integer + format: int64 + description: Allocated space in bytes + free: + type: integer + format: int64 + description: Free space in bytes + health: + type: string + example: ONLINE + created_at: + type: string + format: date-time + + Dataset: + type: object + properties: + name: + type: string + example: tank/data + pool: + type: string + example: tank + type: + type: string + enum: [filesystem, volume] + size: + type: integer + format: int64 + used: + type: integer + format: int64 + available: + type: integer + format: int64 + mountpoint: + type: string + example: /tank/data + created_at: + type: string + format: date-time + + ZVOL: + type: object + properties: + name: + type: string + example: tank/block/vol1 + pool: + type: string + example: tank + size: + type: integer + format: int64 + description: Size in bytes + used: + type: integer + format: int64 + created_at: + type: string + format: date-time + + Snapshot: + type: object + properties: + name: + type: string + example: tank/data@hourly-20241215-143000 + dataset: + type: string + example: tank/data + size: + type: integer + format: int64 + created_at: + type: string + format: date-time + + SnapshotPolicy: + type: object + properties: + dataset: + type: string + example: tank/data + frequent: + type: integer + description: Keep N frequent snapshots + example: 4 + hourly: + type: integer + description: Keep N hourly snapshots + example: 24 + daily: + type: integer + description: Keep N daily snapshots + example: 7 + weekly: + type: integer + description: Keep N weekly snapshots + example: 4 + monthly: + type: integer + description: Keep N monthly snapshots + example: 12 + yearly: + type: integer + description: Keep N yearly snapshots + example: 2 + autosnap: + type: boolean + description: Enable automatic snapshots + autoprune: + type: boolean + description: Enable automatic pruning + + SMBShare: + type: object + properties: + id: + type: string + example: smb-1 + name: + type: string + example: data + path: + type: string + example: /tank/data + dataset: + type: string + example: tank/data + description: + type: string + read_only: + type: boolean + guest_ok: + type: boolean + valid_users: + type: array + items: + type: string + enabled: + type: boolean + + NFSExport: + type: object + properties: + id: + type: string + example: nfs-1 + path: + type: string + example: /tank/data + dataset: + type: string + example: tank/data + clients: + type: array + items: + type: string + example: ["192.168.1.0/24", "10.0.0.0/8"] + read_only: + type: boolean + root_squash: + type: boolean + enabled: + type: boolean + + ISCSITarget: + type: object + properties: + id: + type: string + example: iscsi-1 + iqn: + type: string + example: iqn.2024-12.com.atlas:storage.target1 + luns: + type: array + items: + $ref: '#/components/schemas/LUN' + initiators: + type: array + items: + type: string + example: ["iqn.2024-12.com.client:initiator1"] + enabled: + type: boolean + + LUN: + type: object + properties: + id: + type: integer + description: LUN number + example: 0 + zvol: + type: string + example: tank/block/vol1 + size: + type: integer + format: int64 + description: Size in bytes + backend: + type: string + example: zvol + + Job: + type: object + properties: + id: + type: string + example: job-1 + type: + type: string + example: snapshot.create + status: + $ref: '#/components/schemas/JobStatus' + progress: + type: integer + description: Progress percentage (0-100) + message: + type: string + error: + type: string + created_at: + type: string + format: date-time + started_at: + type: string + format: date-time + completed_at: + type: string + format: date-time + + JobStatus: + type: string + enum: + - pending + - running + - completed + - failed + - cancelled + + AuditLog: + type: object + properties: + id: + type: string + example: audit-1 + actor: + type: string + description: User ID or "system" + example: user-1 + action: + type: string + example: pools.create + resource: + type: string + example: pools/tank + result: + type: string + enum: [success, failure] + message: + type: string + ip: + type: string + example: 192.168.1.100 + user_agent: + type: string + example: curl/7.68.0 + timestamp: + type: string + format: date-time + + Error: + type: object + properties: + code: + type: string + example: NOT_FOUND + message: + type: string + example: dataset not found + details: + type: string + example: tank/missing + + responses: + BadRequest: + description: Bad request + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + Unauthorized: + description: Unauthorized + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + Forbidden: + description: Forbidden + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + NotFound: + description: Resource not found + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + Conflict: + description: Resource conflict + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + InternalServerError: + description: Internal server error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' diff --git a/internal/backup/service.go b/internal/backup/service.go new file mode 100644 index 0000000..d754795 --- /dev/null +++ b/internal/backup/service.go @@ -0,0 +1,350 @@ +package backup + +import ( + "archive/tar" + "compress/gzip" + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "time" + + "gitea.avt.data-center.id/othman.suseno/atlas/internal/models" +) + +// Service handles configuration backup and restore operations +type Service struct { + backupDir string +} + +// BackupMetadata contains information about a backup +type BackupMetadata struct { + ID string `json:"id"` + CreatedAt time.Time `json:"created_at"` + Version string `json:"version"` + Description string `json:"description,omitempty"` + Size int64 `json:"size"` + Checksum string `json:"checksum,omitempty"` +} + +// BackupData contains all configuration data to be backed up +type BackupData struct { + Metadata BackupMetadata `json:"metadata"` + Users []models.User `json:"users,omitempty"` + SMBShares []models.SMBShare `json:"smb_shares,omitempty"` + NFSExports []models.NFSExport `json:"nfs_exports,omitempty"` + ISCSITargets []models.ISCSITarget `json:"iscsi_targets,omitempty"` + Policies []models.SnapshotPolicy `json:"policies,omitempty"` + Config map[string]interface{} `json:"config,omitempty"` +} + +// New creates a new backup service +func New(backupDir string) (*Service, error) { + if err := os.MkdirAll(backupDir, 0755); err != nil { + return nil, fmt.Errorf("create backup directory: %w", err) + } + + return &Service{ + backupDir: backupDir, + }, nil +} + +// CreateBackup creates a backup of all system configurations +func (s *Service) CreateBackup(data BackupData, description string) (string, error) { + // Generate backup ID + backupID := fmt.Sprintf("backup-%d", time.Now().Unix()) + backupPath := filepath.Join(s.backupDir, backupID+".tar.gz") + + // Set metadata + data.Metadata.ID = backupID + data.Metadata.CreatedAt = time.Now() + data.Metadata.Version = "1.0" + data.Metadata.Description = description + + // Create backup file + file, err := os.Create(backupPath) + if err != nil { + return "", fmt.Errorf("create backup file: %w", err) + } + defer file.Close() + + // Create gzip writer + gzWriter := gzip.NewWriter(file) + defer gzWriter.Close() + + // Create tar writer + tarWriter := tar.NewWriter(gzWriter) + defer tarWriter.Close() + + // Write metadata + metadataJSON, err := json.MarshalIndent(data.Metadata, "", " ") + if err != nil { + return "", fmt.Errorf("marshal metadata: %w", err) + } + + if err := s.writeFileToTar(tarWriter, "metadata.json", metadataJSON); err != nil { + return "", fmt.Errorf("write metadata: %w", err) + } + + // Write configuration data + configJSON, err := json.MarshalIndent(data, "", " ") + if err != nil { + return "", fmt.Errorf("marshal config: %w", err) + } + + if err := s.writeFileToTar(tarWriter, "config.json", configJSON); err != nil { + return "", fmt.Errorf("write config: %w", err) + } + + // Get file size + stat, err := file.Stat() + if err != nil { + return "", fmt.Errorf("get file stat: %w", err) + } + + data.Metadata.Size = stat.Size() + + // Update metadata with size + metadataJSON, err = json.MarshalIndent(data.Metadata, "", " ") + if err != nil { + return "", fmt.Errorf("marshal updated metadata: %w", err) + } + + // Note: We can't update the tar file, so we'll store metadata separately + metadataPath := filepath.Join(s.backupDir, backupID+".meta.json") + if err := os.WriteFile(metadataPath, metadataJSON, 0644); err != nil { + return "", fmt.Errorf("write metadata file: %w", err) + } + + return backupID, nil +} + +// writeFileToTar writes a file to a tar archive +func (s *Service) writeFileToTar(tw *tar.Writer, filename string, data []byte) error { + header := &tar.Header{ + Name: filename, + Size: int64(len(data)), + Mode: 0644, + ModTime: time.Now(), + } + + if err := tw.WriteHeader(header); err != nil { + return err + } + + if _, err := tw.Write(data); err != nil { + return err + } + + return nil +} + +// ListBackups returns a list of all available backups +func (s *Service) ListBackups() ([]BackupMetadata, error) { + files, err := os.ReadDir(s.backupDir) + if err != nil { + return nil, fmt.Errorf("read backup directory: %w", err) + } + + var backups []BackupMetadata + for _, file := range files { + if file.IsDir() { + continue + } + + if filepath.Ext(file.Name()) != ".json" || !strings.HasSuffix(file.Name(), ".meta.json") { + continue + } + + metadataPath := filepath.Join(s.backupDir, file.Name()) + data, err := os.ReadFile(metadataPath) + if err != nil { + continue // Skip corrupted metadata files + } + + var metadata BackupMetadata + if err := json.Unmarshal(data, &metadata); err != nil { + continue // Skip invalid metadata files + } + + // Get actual backup file size if it exists + backupPath := filepath.Join(s.backupDir, metadata.ID+".tar.gz") + if stat, err := os.Stat(backupPath); err == nil { + metadata.Size = stat.Size() + } + + backups = append(backups, metadata) + } + + return backups, nil +} + +// GetBackup returns metadata for a specific backup +func (s *Service) GetBackup(backupID string) (*BackupMetadata, error) { + metadataPath := filepath.Join(s.backupDir, backupID+".meta.json") + data, err := os.ReadFile(metadataPath) + if err != nil { + return nil, fmt.Errorf("read metadata: %w", err) + } + + var metadata BackupMetadata + if err := json.Unmarshal(data, &metadata); err != nil { + return nil, fmt.Errorf("unmarshal metadata: %w", err) + } + + // Get actual backup file size + backupPath := filepath.Join(s.backupDir, backupID+".tar.gz") + if stat, err := os.Stat(backupPath); err == nil { + metadata.Size = stat.Size() + } + + return &metadata, nil +} + +// RestoreBackup restores configuration from a backup +func (s *Service) RestoreBackup(backupID string) (*BackupData, error) { + backupPath := filepath.Join(s.backupDir, backupID+".tar.gz") + + file, err := os.Open(backupPath) + if err != nil { + return nil, fmt.Errorf("open backup file: %w", err) + } + defer file.Close() + + // Create gzip reader + gzReader, err := gzip.NewReader(file) + if err != nil { + return nil, fmt.Errorf("create gzip reader: %w", err) + } + defer gzReader.Close() + + // Create tar reader + tarReader := tar.NewReader(gzReader) + + var configData []byte + var metadataData []byte + + // Extract files from tar + for { + header, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, fmt.Errorf("read tar: %w", err) + } + + switch header.Name { + case "config.json": + configData, err = io.ReadAll(tarReader) + if err != nil { + return nil, fmt.Errorf("read config: %w", err) + } + case "metadata.json": + metadataData, err = io.ReadAll(tarReader) + if err != nil { + return nil, fmt.Errorf("read metadata: %w", err) + } + } + } + + if configData == nil { + return nil, fmt.Errorf("config.json not found in backup") + } + + var backupData BackupData + if err := json.Unmarshal(configData, &backupData); err != nil { + return nil, fmt.Errorf("unmarshal config: %w", err) + } + + // Update metadata if available + if metadataData != nil { + if err := json.Unmarshal(metadataData, &backupData.Metadata); err == nil { + // Metadata loaded successfully + } + } + + return &backupData, nil +} + +// DeleteBackup deletes a backup file and its metadata +func (s *Service) DeleteBackup(backupID string) error { + backupPath := filepath.Join(s.backupDir, backupID+".tar.gz") + metadataPath := filepath.Join(s.backupDir, backupID+".meta.json") + + var errors []error + + if err := os.Remove(backupPath); err != nil && !os.IsNotExist(err) { + errors = append(errors, fmt.Errorf("remove backup file: %w", err)) + } + + if err := os.Remove(metadataPath); err != nil && !os.IsNotExist(err) { + errors = append(errors, fmt.Errorf("remove metadata file: %w", err)) + } + + if len(errors) > 0 { + return fmt.Errorf("delete backup: %v", errors) + } + + return nil +} + +// VerifyBackup verifies that a backup file is valid and can be restored +func (s *Service) VerifyBackup(backupID string) error { + backupPath := filepath.Join(s.backupDir, backupID+".tar.gz") + + file, err := os.Open(backupPath) + if err != nil { + return fmt.Errorf("open backup file: %w", err) + } + defer file.Close() + + // Try to read the backup + gzReader, err := gzip.NewReader(file) + if err != nil { + return fmt.Errorf("invalid gzip format: %w", err) + } + defer gzReader.Close() + + tarReader := tar.NewReader(gzReader) + + hasConfig := false + + for { + header, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + return fmt.Errorf("invalid tar format: %w", err) + } + + switch header.Name { + case "config.json": + hasConfig = true + // Try to read and parse config + data, err := io.ReadAll(tarReader) + if err != nil { + return fmt.Errorf("read config: %w", err) + } + var backupData BackupData + if err := json.Unmarshal(data, &backupData); err != nil { + return fmt.Errorf("invalid config format: %w", err) + } + case "metadata.json": + // Metadata is optional, just verify it can be read + _, err := io.ReadAll(tarReader) + if err != nil { + return fmt.Errorf("read metadata: %w", err) + } + } + } + + if !hasConfig { + return fmt.Errorf("backup missing config.json") + } + + return nil +} diff --git a/internal/errors/errors.go b/internal/errors/errors.go new file mode 100644 index 0000000..385ad58 --- /dev/null +++ b/internal/errors/errors.go @@ -0,0 +1,114 @@ +package errors + +import ( + "fmt" + "net/http" +) + +// ErrorCode represents a specific error type +type ErrorCode string + +const ( + ErrCodeInternal ErrorCode = "INTERNAL_ERROR" + ErrCodeNotFound ErrorCode = "NOT_FOUND" + ErrCodeBadRequest ErrorCode = "BAD_REQUEST" + ErrCodeConflict ErrorCode = "CONFLICT" + ErrCodeUnauthorized ErrorCode = "UNAUTHORIZED" + ErrCodeForbidden ErrorCode = "FORBIDDEN" + ErrCodeServiceUnavailable ErrorCode = "SERVICE_UNAVAILABLE" + ErrCodeValidation ErrorCode = "VALIDATION_ERROR" +) + +// APIError represents a structured API error +type APIError struct { + Code ErrorCode `json:"code"` + Message string `json:"message"` + Details string `json:"details,omitempty"` + HTTPStatus int `json:"-"` +} + +func (e *APIError) Error() string { + if e.Details != "" { + return fmt.Sprintf("%s: %s (%s)", e.Code, e.Message, e.Details) + } + return fmt.Sprintf("%s: %s", e.Code, e.Message) +} + +// NewAPIError creates a new API error +func NewAPIError(code ErrorCode, message string, httpStatus int) *APIError { + return &APIError{ + Code: code, + Message: message, + HTTPStatus: httpStatus, + } +} + +// WithDetails adds details to an error +func (e *APIError) WithDetails(details string) *APIError { + e.Details = details + return e +} + +// Common error constructors +func ErrNotFound(resource string) *APIError { + return NewAPIError(ErrCodeNotFound, fmt.Sprintf("%s not found", resource), http.StatusNotFound) +} + +func ErrBadRequest(message string) *APIError { + return NewAPIError(ErrCodeBadRequest, message, http.StatusBadRequest) +} + +func ErrConflict(message string) *APIError { + return NewAPIError(ErrCodeConflict, message, http.StatusConflict) +} + +func ErrInternal(message string) *APIError { + return NewAPIError(ErrCodeInternal, message, http.StatusInternalServerError) +} + +func ErrServiceUnavailable(service string) *APIError { + return NewAPIError(ErrCodeServiceUnavailable, fmt.Sprintf("%s service is unavailable", service), http.StatusServiceUnavailable) +} + +func ErrValidation(message string) *APIError { + return NewAPIError(ErrCodeValidation, message, http.StatusBadRequest) +} + +// RetryConfig defines retry behavior +type RetryConfig struct { + MaxAttempts int + Backoff func(attempt int) error // Returns error if should stop retrying +} + +// DefaultRetryConfig returns a default retry configuration +func DefaultRetryConfig() RetryConfig { + return RetryConfig{ + MaxAttempts: 3, + Backoff: func(attempt int) error { + // Simple exponential backoff: 100ms, 200ms, 400ms + if attempt >= 3 { + return fmt.Errorf("max attempts reached") + } + return nil + }, + } +} + +// Retry executes a function with retry logic +func Retry(fn func() error, config RetryConfig) error { + var lastErr error + for attempt := 1; attempt <= config.MaxAttempts; attempt++ { + if err := fn(); err == nil { + return nil + } else { + lastErr = err + } + + if attempt < config.MaxAttempts { + if err := config.Backoff(attempt); err != nil { + return fmt.Errorf("retry aborted: %w", err) + } + } + } + return fmt.Errorf("retry failed after %d attempts: %w", config.MaxAttempts, lastErr) +} diff --git a/internal/httpapp/api_handlers.go b/internal/httpapp/api_handlers.go index 7c82b1f..f7868d6 100644 --- a/internal/httpapp/api_handlers.go +++ b/internal/httpapp/api_handlers.go @@ -9,8 +9,10 @@ import ( "strings" "gitea.avt.data-center.id/othman.suseno/atlas/internal/auth" + "gitea.avt.data-center.id/othman.suseno/atlas/internal/errors" "gitea.avt.data-center.id/othman.suseno/atlas/internal/models" "gitea.avt.data-center.id/othman.suseno/atlas/internal/storage" + "gitea.avt.data-center.id/othman.suseno/atlas/internal/validation" ) // pathParam is now in router_helpers.go @@ -45,12 +47,18 @@ func (a *App) handleCreatePool(w http.ResponseWriter, r *http.Request) { } if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request body"}) + writeError(w, errors.ErrBadRequest("invalid request body")) return } - if req.Name == "" || len(req.VDEVs) == 0 { - writeJSON(w, http.StatusBadRequest, map[string]string{"error": "name and vdevs are required"}) + // Validate pool name + if err := validation.ValidateZFSName(req.Name); err != nil { + writeError(w, errors.ErrValidation(err.Error())) + return + } + + if len(req.VDEVs) == 0 { + writeError(w, errors.ErrValidation("at least one vdev is required")) return } @@ -224,17 +232,31 @@ func (a *App) handleListZVOLs(w http.ResponseWriter, r *http.Request) { func (a *App) handleCreateZVOL(w http.ResponseWriter, r *http.Request) { var req struct { Name string `json:"name"` - Size uint64 `json:"size"` // in bytes + Size string `json:"size"` // human-readable format (e.g., "10G") Options map[string]string `json:"options,omitempty"` } if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid request body"}) + writeError(w, errors.ErrBadRequest("invalid request body")) return } - if req.Name == "" || req.Size == 0 { - writeJSON(w, http.StatusBadRequest, map[string]string{"error": "name and size are required"}) + // Validate ZVOL name + if err := validation.ValidateZFSName(req.Name); err != nil { + writeError(w, errors.ErrValidation(err.Error())) + return + } + + // Validate size format + if err := validation.ValidateSize(req.Size); err != nil { + writeError(w, errors.ErrValidation(err.Error())) + return + } + + // Parse size to bytes + sizeBytes, err := a.parseSizeString(req.Size) + if err != nil { + writeError(w, errors.ErrValidation(fmt.Sprintf("invalid size: %v", err))) return } @@ -242,7 +264,7 @@ func (a *App) handleCreateZVOL(w http.ResponseWriter, r *http.Request) { req.Options = make(map[string]string) } - if err := a.zfs.CreateZVOL(req.Name, req.Size, req.Options); err != nil { + if err := a.zfs.CreateZVOL(req.Name, sizeBytes, req.Options); err != nil { log.Printf("create zvol error: %v", err) writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()}) return @@ -314,8 +336,16 @@ func (a *App) handleCreateSnapshot(w http.ResponseWriter, r *http.Request) { return } - if req.Dataset == "" || req.Name == "" { - writeJSON(w, http.StatusBadRequest, map[string]string{"error": "dataset and name are required"}) + // Validate dataset name + if err := validation.ValidateZFSName(req.Dataset); err != nil { + writeError(w, errors.ErrValidation(err.Error())) + return + } + + // Validate snapshot name (can contain @ but we'll validate the base name) + snapshotBaseName := strings.ReplaceAll(req.Name, "@", "") + if err := validation.ValidateZFSName(snapshotBaseName); err != nil { + writeError(w, errors.ErrValidation("invalid snapshot name")) return } @@ -325,10 +355,10 @@ func (a *App) handleCreateSnapshot(w http.ResponseWriter, r *http.Request) { return } - snapshotName := fmt.Sprintf("%s@%s", req.Dataset, req.Name) - snap, err := a.zfs.GetSnapshot(snapshotName) + fullSnapshotName := fmt.Sprintf("%s@%s", req.Dataset, req.Name) + snap, err := a.zfs.GetSnapshot(fullSnapshotName) if err != nil { - writeJSON(w, http.StatusCreated, map[string]string{"message": "snapshot created", "name": snapshotName}) + writeJSON(w, http.StatusCreated, map[string]string{"message": "snapshot created", "name": fullSnapshotName}) return } @@ -477,11 +507,27 @@ func (a *App) handleCreateSMBShare(w http.ResponseWriter, r *http.Request) { return } - if req.Name == "" || req.Dataset == "" { - writeJSON(w, http.StatusBadRequest, map[string]string{"error": "name and dataset are required"}) + // Validate share name + if err := validation.ValidateShareName(req.Name); err != nil { + writeError(w, errors.ErrValidation(err.Error())) return } + // Validate dataset name + if err := validation.ValidateZFSName(req.Dataset); err != nil { + writeError(w, errors.ErrValidation(err.Error())) + return + } + + // Sanitize path if provided + if req.Path != "" { + req.Path = validation.SanitizePath(req.Path) + if err := validation.ValidatePath(req.Path); err != nil { + writeError(w, errors.ErrValidation(err.Error())) + return + } + } + // Validate dataset exists datasets, err := a.zfs.ListDatasets("") if err != nil { @@ -509,20 +555,22 @@ func (a *App) handleCreateSMBShare(w http.ResponseWriter, r *http.Request) { share, err := a.smbStore.Create(req.Name, req.Path, req.Dataset, req.Description, req.ReadOnly, req.GuestOK, req.ValidUsers) if err != nil { if err == storage.ErrSMBShareExists { - writeJSON(w, http.StatusConflict, map[string]string{"error": "share name already exists"}) + writeError(w, errors.ErrConflict("share name already exists")) return } log.Printf("create SMB share error: %v", err) - writeJSON(w, http.StatusInternalServerError, map[string]string{"error": err.Error()}) + writeError(w, errors.ErrInternal("failed to create SMB share").WithDetails(err.Error())) return } - // Apply configuration to Samba service + // Apply configuration to Samba service (with graceful degradation) shares := a.smbStore.List() if err := a.smbService.ApplyConfiguration(shares); err != nil { - log.Printf("apply SMB configuration error: %v", err) - // Don't fail the request, but log the error - // In production, you might want to queue this for retry + // Log but don't fail the request - desired state is stored + // Service configuration can be retried later + if svcErr := a.handleServiceError("SMB", err); svcErr != nil { + log.Printf("SMB service configuration failed (non-fatal): %v", err) + } } writeJSON(w, http.StatusCreated, share) @@ -629,11 +677,29 @@ func (a *App) handleCreateNFSExport(w http.ResponseWriter, r *http.Request) { return } - if req.Dataset == "" { - writeJSON(w, http.StatusBadRequest, map[string]string{"error": "dataset is required"}) + // Validate dataset name + if err := validation.ValidateZFSName(req.Dataset); err != nil { + writeError(w, errors.ErrValidation(err.Error())) return } + // Validate and sanitize path if provided + if req.Path != "" { + req.Path = validation.SanitizePath(req.Path) + if err := validation.ValidatePath(req.Path); err != nil { + writeError(w, errors.ErrValidation(err.Error())) + return + } + } + + // Validate clients + for i, client := range req.Clients { + if err := validation.ValidateCIDR(client); err != nil { + writeError(w, errors.ErrValidation(fmt.Sprintf("client[%d]: %s", i, err.Error()))) + return + } + } + // Validate dataset exists datasets, err := a.zfs.ListDatasets("") if err != nil { @@ -786,14 +852,9 @@ func (a *App) handleCreateISCSITarget(w http.ResponseWriter, r *http.Request) { return } - if req.IQN == "" { - writeJSON(w, http.StatusBadRequest, map[string]string{"error": "iqn is required"}) - return - } - - // Basic IQN format validation (iqn.yyyy-mm.reversed.domain:identifier) - if !strings.HasPrefix(req.IQN, "iqn.") { - writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid IQN format (must start with 'iqn.')"}) + // Validate IQN format + if err := validation.ValidateIQN(req.IQN); err != nil { + writeError(w, errors.ErrValidation(err.Error())) return } @@ -1065,8 +1126,14 @@ func (a *App) handleLogin(w http.ResponseWriter, r *http.Request) { return } - if req.Username == "" || req.Password == "" { - writeJSON(w, http.StatusBadRequest, map[string]string{"error": "username and password are required"}) + // Validate username (login is less strict - just check not empty) + if req.Username == "" { + writeError(w, errors.ErrValidation("username is required")) + return + } + + if req.Password == "" { + writeError(w, errors.ErrValidation("password is required")) return } @@ -1116,11 +1183,26 @@ func (a *App) handleCreateUser(w http.ResponseWriter, r *http.Request) { return } - if req.Username == "" || req.Password == "" { - writeJSON(w, http.StatusBadRequest, map[string]string{"error": "username and password are required"}) + // Validate username + if err := validation.ValidateUsername(req.Username); err != nil { + writeError(w, errors.ErrValidation(err.Error())) return } + // Validate password + if err := validation.ValidatePassword(req.Password); err != nil { + writeError(w, errors.ErrValidation(err.Error())) + return + } + + // Validate email if provided + if req.Email != "" { + if err := validation.ValidateEmail(req.Email); err != nil { + writeError(w, errors.ErrValidation(err.Error())) + return + } + } + if req.Role == "" { req.Role = models.RoleViewer // Default role } diff --git a/internal/httpapp/app.go b/internal/httpapp/app.go index 19b821c..2dce8f4 100644 --- a/internal/httpapp/app.go +++ b/internal/httpapp/app.go @@ -10,8 +10,10 @@ import ( "gitea.avt.data-center.id/othman.suseno/atlas/internal/audit" "gitea.avt.data-center.id/othman.suseno/atlas/internal/auth" + "gitea.avt.data-center.id/othman.suseno/atlas/internal/backup" "gitea.avt.data-center.id/othman.suseno/atlas/internal/db" "gitea.avt.data-center.id/othman.suseno/atlas/internal/job" + "gitea.avt.data-center.id/othman.suseno/atlas/internal/metrics" "gitea.avt.data-center.id/othman.suseno/atlas/internal/services" "gitea.avt.data-center.id/othman.suseno/atlas/internal/snapshot" "gitea.avt.data-center.id/othman.suseno/atlas/internal/storage" @@ -26,23 +28,26 @@ type Config struct { } type App struct { - cfg Config - tmpl *template.Template - mux *http.ServeMux - zfs *zfs.Service - snapshotPolicy *snapshot.PolicyStore - jobManager *job.Manager - scheduler *snapshot.Scheduler - authService *auth.Service - userStore *auth.UserStore - auditStore *audit.Store - smbStore *storage.SMBStore - nfsStore *storage.NFSStore - iscsiStore *storage.ISCSIStore - database *db.DB // Optional database connection - smbService *services.SMBService - nfsService *services.NFSService - iscsiService *services.ISCSIService + cfg Config + tmpl *template.Template + mux *http.ServeMux + zfs *zfs.Service + snapshotPolicy *snapshot.PolicyStore + jobManager *job.Manager + scheduler *snapshot.Scheduler + authService *auth.Service + userStore *auth.UserStore + auditStore *audit.Store + smbStore *storage.SMBStore + nfsStore *storage.NFSStore + iscsiStore *storage.ISCSIStore + database *db.DB // Optional database connection + smbService *services.SMBService + nfsService *services.NFSService + iscsiService *services.ISCSIService + metricsCollector *metrics.Collector + startTime time.Time + backupService *backup.Service } func New(cfg Config) (*App, error) { @@ -91,24 +96,41 @@ func New(cfg Config) (*App, error) { nfsService := services.NewNFSService() iscsiService := services.NewISCSIService() + // Initialize metrics collector + metricsCollector := metrics.NewCollector() + startTime := time.Now() + + // Initialize backup service + backupDir := os.Getenv("ATLAS_BACKUP_DIR") + if backupDir == "" { + backupDir = "data/backups" + } + backupService, err := backup.New(backupDir) + if err != nil { + return nil, fmt.Errorf("init backup service: %w", err) + } + a := &App{ - cfg: cfg, - tmpl: tmpl, - mux: http.NewServeMux(), - zfs: zfsService, - snapshotPolicy: policyStore, - jobManager: jobMgr, - scheduler: scheduler, - authService: authService, - userStore: userStore, - auditStore: auditStore, - smbStore: smbStore, - nfsStore: nfsStore, - iscsiStore: iscsiStore, - database: database, - smbService: smbService, - nfsService: nfsService, - iscsiService: iscsiService, + cfg: cfg, + tmpl: tmpl, + mux: http.NewServeMux(), + zfs: zfsService, + snapshotPolicy: policyStore, + jobManager: jobMgr, + scheduler: scheduler, + authService: authService, + userStore: userStore, + auditStore: auditStore, + smbStore: smbStore, + nfsStore: nfsStore, + iscsiStore: iscsiStore, + database: database, + smbService: smbService, + nfsService: nfsService, + iscsiService: iscsiService, + metricsCollector: metricsCollector, + startTime: startTime, + backupService: backupService, } // Start snapshot scheduler (runs every 15 minutes) @@ -119,8 +141,37 @@ func New(cfg Config) (*App, error) { } func (a *App) Router() http.Handler { - // Wrap the mux with middleware chain: requestID -> logging -> audit -> auth - return requestID(logging(a.auditMiddleware(a.authMiddleware(a.mux)))) + // Middleware chain order (outer to inner): + // 1. CORS (handles preflight) + // 2. Security headers + // 3. Request size limit (10MB) + // 4. Content-Type validation + // 5. Rate limiting + // 6. Error recovery + // 7. Request ID + // 8. Logging + // 9. Audit + // 10. Authentication + // 11. Routes + return a.corsMiddleware( + a.securityHeadersMiddleware( + a.requestSizeMiddleware(10 * 1024 * 1024)( + a.validateContentTypeMiddleware( + a.rateLimitMiddleware( + a.errorMiddleware( + requestID( + logging( + a.auditMiddleware( + a.authMiddleware(a.mux), + ), + ), + ), + ), + ), + ), + ), + ), + ) } // StopScheduler stops the snapshot scheduler (for graceful shutdown) diff --git a/internal/httpapp/backup_handlers.go b/internal/httpapp/backup_handlers.go new file mode 100644 index 0000000..7ee6b60 --- /dev/null +++ b/internal/httpapp/backup_handlers.go @@ -0,0 +1,304 @@ +package httpapp + +import ( + "encoding/json" + "fmt" + "log" + "net/http" + + "strings" + + "gitea.avt.data-center.id/othman.suseno/atlas/internal/backup" + "gitea.avt.data-center.id/othman.suseno/atlas/internal/errors" + "gitea.avt.data-center.id/othman.suseno/atlas/internal/models" +) + +// Backup Handlers +func (a *App) handleCreateBackup(w http.ResponseWriter, r *http.Request) { + var req struct { + Description string `json:"description,omitempty"` + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + // Description is optional, so we'll continue even if body is empty + _ = err + } + + // Collect all configuration data + backupData := backup.BackupData{ + Users: a.userStore.List(), + SMBShares: a.smbStore.List(), + NFSExports: a.nfsStore.List(), + ISCSITargets: a.iscsiStore.List(), + Policies: a.snapshotPolicy.List(), + Config: map[string]interface{}{ + "database_path": a.cfg.DatabasePath, + }, + } + + // Create backup + backupID, err := a.backupService.CreateBackup(backupData, req.Description) + if err != nil { + log.Printf("create backup error: %v", err) + writeError(w, errors.ErrInternal("failed to create backup").WithDetails(err.Error())) + return + } + + // Get backup metadata + metadata, err := a.backupService.GetBackup(backupID) + if err != nil { + log.Printf("get backup metadata error: %v", err) + writeJSON(w, http.StatusCreated, map[string]interface{}{ + "id": backupID, + "message": "backup created", + }) + return + } + + writeJSON(w, http.StatusCreated, metadata) +} + +func (a *App) handleListBackups(w http.ResponseWriter, r *http.Request) { + backups, err := a.backupService.ListBackups() + if err != nil { + log.Printf("list backups error: %v", err) + writeError(w, errors.ErrInternal("failed to list backups").WithDetails(err.Error())) + return + } + + writeJSON(w, http.StatusOK, backups) +} + +func (a *App) handleGetBackup(w http.ResponseWriter, r *http.Request) { + backupID := pathParam(r, "/api/v1/backups/") + if backupID == "" { + writeError(w, errors.ErrBadRequest("backup id required")) + return + } + + metadata, err := a.backupService.GetBackup(backupID) + if err != nil { + log.Printf("get backup error: %v", err) + writeError(w, errors.ErrNotFound("backup").WithDetails(backupID)) + return + } + + writeJSON(w, http.StatusOK, metadata) +} + +func (a *App) handleRestoreBackup(w http.ResponseWriter, r *http.Request) { + // Extract backup ID from path + path := r.URL.Path + backupID := "" + + // Handle both /api/v1/backups/{id} and /api/v1/backups/{id}/restore + if strings.Contains(path, "/restore") { + // Path: /api/v1/backups/{id}/restore + prefix := "/api/v1/backups/" + suffix := "/restore" + if strings.HasPrefix(path, prefix) && strings.HasSuffix(path, suffix) { + backupID = path[len(prefix) : len(path)-len(suffix)] + } + } else { + // Path: /api/v1/backups/{id} + backupID = pathParam(r, "/api/v1/backups/") + } + + if backupID == "" { + writeError(w, errors.ErrBadRequest("backup id required")) + return + } + + var req struct { + DryRun bool `json:"dry_run,omitempty"` + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + // Dry run is optional, default to false + req.DryRun = false + } + + // Verify backup first + if err := a.backupService.VerifyBackup(backupID); err != nil { + log.Printf("verify backup error: %v", err) + writeError(w, errors.ErrBadRequest("backup verification failed").WithDetails(err.Error())) + return + } + + // Restore backup + backupData, err := a.backupService.RestoreBackup(backupID) + if err != nil { + log.Printf("restore backup error: %v", err) + writeError(w, errors.ErrInternal("failed to restore backup").WithDetails(err.Error())) + return + } + + if req.DryRun { + // Return what would be restored without actually restoring + writeJSON(w, http.StatusOK, map[string]interface{}{ + "message": "dry run - no changes made", + "backup_id": backupID, + "backup_data": backupData, + }) + return + } + + // Restore users (skip default admin user - user-1) + // Note: Passwords cannot be restored as they're hashed and not stored in user model + // Users will need to reset their passwords after restore + for _, user := range backupData.Users { + // Skip default admin user + if user.ID == "user-1" { + log.Printf("skipping default admin user") + continue + } + + // Check if user already exists + if _, err := a.userStore.GetByID(user.ID); err == nil { + log.Printf("user %s already exists, skipping", user.ID) + continue + } + + // Create user with temporary password (user must reset password) + // Use a secure random password that user must change + tempPassword := fmt.Sprintf("restore-%s", user.ID) + if _, err := a.userStore.Create(user.Username, user.Email, tempPassword, user.Role); err != nil { + log.Printf("restore user error: %v", err) + // Continue with other users + } else { + log.Printf("restored user %s - password reset required", user.Username) + } + } + + // Restore SMB shares + for _, share := range backupData.SMBShares { + // Check if share already exists + if _, err := a.smbStore.Get(share.ID); err == nil { + log.Printf("SMB share %s already exists, skipping", share.ID) + continue + } + + // Create share + if _, err := a.smbStore.Create(share.Name, share.Path, share.Dataset, share.Description, share.ReadOnly, share.GuestOK, share.ValidUsers); err != nil { + log.Printf("restore SMB share error: %v", err) + // Continue with other shares + } + } + + // Restore NFS exports + for _, export := range backupData.NFSExports { + // Check if export already exists + if _, err := a.nfsStore.Get(export.ID); err == nil { + log.Printf("NFS export %s already exists, skipping", export.ID) + continue + } + + // Create export + if _, err := a.nfsStore.Create(export.Path, export.Dataset, export.Clients, export.ReadOnly, export.RootSquash); err != nil { + log.Printf("restore NFS export error: %v", err) + // Continue with other exports + } + } + + // Restore iSCSI targets + for _, target := range backupData.ISCSITargets { + // Check if target already exists + if _, err := a.iscsiStore.Get(target.ID); err == nil { + log.Printf("iSCSI target %s already exists, skipping", target.ID) + continue + } + + // Create target + if _, err := a.iscsiStore.Create(target.IQN, target.Initiators); err != nil { + log.Printf("restore iSCSI target error: %v", err) + // Continue with other targets + } + + // Restore LUNs + for _, lun := range target.LUNs { + if _, err := a.iscsiStore.AddLUN(target.ID, lun.ZVOL, lun.Size); err != nil { + log.Printf("restore iSCSI LUN error: %v", err) + // Continue with other LUNs + } + } + } + + // Restore snapshot policies + for _, policy := range backupData.Policies { + // Check if policy already exists + if existing, _ := a.snapshotPolicy.Get(policy.Dataset); existing != nil { + log.Printf("snapshot policy for dataset %s already exists, skipping", policy.Dataset) + continue + } + + // Set policy (uses Dataset as key) + a.snapshotPolicy.Set(&policy) + } + + // Apply service configurations + shares := a.smbStore.List() + if err := a.smbService.ApplyConfiguration(shares); err != nil { + log.Printf("apply SMB configuration after restore error: %v", err) + } + + exports := a.nfsStore.List() + if err := a.nfsService.ApplyConfiguration(exports); err != nil { + log.Printf("apply NFS configuration after restore error: %v", err) + } + + targets := a.iscsiStore.List() + for _, target := range targets { + if err := a.iscsiService.ApplyConfiguration([]models.ISCSITarget{target}); err != nil { + log.Printf("apply iSCSI configuration after restore error: %v", err) + } + } + + writeJSON(w, http.StatusOK, map[string]interface{}{ + "message": "backup restored successfully", + "backup_id": backupID, + }) +} + +func (a *App) handleDeleteBackup(w http.ResponseWriter, r *http.Request) { + backupID := pathParam(r, "/api/v1/backups/") + if backupID == "" { + writeError(w, errors.ErrBadRequest("backup id required")) + return + } + + if err := a.backupService.DeleteBackup(backupID); err != nil { + log.Printf("delete backup error: %v", err) + writeError(w, errors.ErrInternal("failed to delete backup").WithDetails(err.Error())) + return + } + + writeJSON(w, http.StatusOK, map[string]string{ + "message": "backup deleted", + "backup_id": backupID, + }) +} + +func (a *App) handleVerifyBackup(w http.ResponseWriter, r *http.Request) { + backupID := pathParam(r, "/api/v1/backups/") + if backupID == "" { + writeError(w, errors.ErrBadRequest("backup id required")) + return + } + + if err := a.backupService.VerifyBackup(backupID); err != nil { + writeError(w, errors.ErrBadRequest("backup verification failed").WithDetails(err.Error())) + return + } + + metadata, err := a.backupService.GetBackup(backupID) + if err != nil { + writeError(w, errors.ErrNotFound("backup").WithDetails(backupID)) + return + } + + writeJSON(w, http.StatusOK, map[string]interface{}{ + "message": "backup is valid", + "backup_id": backupID, + "metadata": metadata, + }) +} diff --git a/internal/httpapp/diagnostics_handlers.go b/internal/httpapp/diagnostics_handlers.go new file mode 100644 index 0000000..80be66f --- /dev/null +++ b/internal/httpapp/diagnostics_handlers.go @@ -0,0 +1,289 @@ +package httpapp + +import ( + "fmt" + "net/http" + "runtime" + "time" +) + +// SystemInfo represents system diagnostic information +type SystemInfo struct { + Version string `json:"version"` + Uptime string `json:"uptime"` + GoVersion string `json:"go_version"` + NumGoroutine int `json:"num_goroutines"` + Memory MemoryInfo `json:"memory"` + Services map[string]ServiceInfo `json:"services"` + Database DatabaseInfo `json:"database,omitempty"` +} + +// MemoryInfo represents memory statistics +type MemoryInfo struct { + Alloc uint64 `json:"alloc"` // bytes allocated + TotalAlloc uint64 `json:"total_alloc"` // bytes allocated (cumulative) + Sys uint64 `json:"sys"` // bytes obtained from system + NumGC uint32 `json:"num_gc"` // number of GC cycles +} + +// ServiceInfo represents service status +type ServiceInfo struct { + Status string `json:"status"` // "running", "stopped", "error" + LastCheck string `json:"last_check"` // timestamp + Message string `json:"message,omitempty"` +} + +// DatabaseInfo represents database connection info +type DatabaseInfo struct { + Connected bool `json:"connected"` + Path string `json:"path,omitempty"` +} + +// handleSystemInfo returns system diagnostic information +func (a *App) handleSystemInfo(w http.ResponseWriter, r *http.Request) { + var m runtime.MemStats + runtime.ReadMemStats(&m) + + uptime := time.Since(a.startTime) + + info := SystemInfo{ + Version: "v0.1.0-dev", + Uptime: fmt.Sprintf("%.0f seconds", uptime.Seconds()), + GoVersion: runtime.Version(), + NumGoroutine: runtime.NumGoroutine(), + Memory: MemoryInfo{ + Alloc: m.Alloc, + TotalAlloc: m.TotalAlloc, + Sys: m.Sys, + NumGC: m.NumGC, + }, + Services: make(map[string]ServiceInfo), + } + + // Check service statuses + smbStatus, smbErr := a.smbService.GetStatus() + if smbErr == nil { + status := "stopped" + if smbStatus { + status = "running" + } + info.Services["smb"] = ServiceInfo{ + Status: status, + LastCheck: time.Now().Format(time.RFC3339), + } + } else { + info.Services["smb"] = ServiceInfo{ + Status: "error", + LastCheck: time.Now().Format(time.RFC3339), + Message: smbErr.Error(), + } + } + + nfsStatus, nfsErr := a.nfsService.GetStatus() + if nfsErr == nil { + status := "stopped" + if nfsStatus { + status = "running" + } + info.Services["nfs"] = ServiceInfo{ + Status: status, + LastCheck: time.Now().Format(time.RFC3339), + } + } else { + info.Services["nfs"] = ServiceInfo{ + Status: "error", + LastCheck: time.Now().Format(time.RFC3339), + Message: nfsErr.Error(), + } + } + + iscsiStatus, iscsiErr := a.iscsiService.GetStatus() + if iscsiErr == nil { + status := "stopped" + if iscsiStatus { + status = "running" + } + info.Services["iscsi"] = ServiceInfo{ + Status: status, + LastCheck: time.Now().Format(time.RFC3339), + } + } else { + info.Services["iscsi"] = ServiceInfo{ + Status: "error", + LastCheck: time.Now().Format(time.RFC3339), + Message: iscsiErr.Error(), + } + } + + // Database info + if a.database != nil { + info.Database = DatabaseInfo{ + Connected: true, + Path: a.cfg.DatabasePath, + } + } + + writeJSON(w, http.StatusOK, info) +} + +// handleHealthCheck provides detailed health check information +func (a *App) handleHealthCheck(w http.ResponseWriter, r *http.Request) { + type HealthStatus struct { + Status string `json:"status"` // "healthy", "degraded", "unhealthy" + Timestamp string `json:"timestamp"` + Checks map[string]string `json:"checks"` + } + + health := HealthStatus{ + Status: "healthy", + Timestamp: time.Now().Format(time.RFC3339), + Checks: make(map[string]string), + } + + // Check ZFS service + if a.zfs != nil { + _, err := a.zfs.ListPools() + if err != nil { + health.Checks["zfs"] = "unhealthy: " + err.Error() + health.Status = "degraded" + } else { + health.Checks["zfs"] = "healthy" + } + } else { + health.Checks["zfs"] = "unhealthy: service not initialized" + health.Status = "unhealthy" + } + + // Check database + if a.database != nil { + // Try a simple query to check database health + if err := a.database.DB.Ping(); err != nil { + health.Checks["database"] = "unhealthy: " + err.Error() + health.Status = "degraded" + } else { + health.Checks["database"] = "healthy" + } + } else { + health.Checks["database"] = "not configured" + } + + // Check services + smbStatus, smbErr := a.smbService.GetStatus() + if smbErr != nil { + health.Checks["smb"] = "unhealthy: " + smbErr.Error() + health.Status = "degraded" + } else if !smbStatus { + health.Checks["smb"] = "stopped" + } else { + health.Checks["smb"] = "healthy" + } + + nfsStatus, nfsErr := a.nfsService.GetStatus() + if nfsErr != nil { + health.Checks["nfs"] = "unhealthy: " + nfsErr.Error() + health.Status = "degraded" + } else if !nfsStatus { + health.Checks["nfs"] = "stopped" + } else { + health.Checks["nfs"] = "healthy" + } + + iscsiStatus, iscsiErr := a.iscsiService.GetStatus() + if iscsiErr != nil { + health.Checks["iscsi"] = "unhealthy: " + iscsiErr.Error() + health.Status = "degraded" + } else if !iscsiStatus { + health.Checks["iscsi"] = "stopped" + } else { + health.Checks["iscsi"] = "healthy" + } + + // Set HTTP status based on health + statusCode := http.StatusOK + if health.Status == "unhealthy" { + statusCode = http.StatusServiceUnavailable + } else if health.Status == "degraded" { + statusCode = http.StatusOK // Still OK, but with warnings + } + + w.WriteHeader(statusCode) + writeJSON(w, statusCode, health) +} + +// handleLogs returns recent log entries (if available) +func (a *App) handleLogs(w http.ResponseWriter, r *http.Request) { + // For now, return audit logs as system logs + // In a full implementation, this would return application logs + limit := 100 + if limitStr := r.URL.Query().Get("limit"); limitStr != "" { + fmt.Sscanf(limitStr, "%d", &limit) + if limit > 1000 { + limit = 1000 + } + if limit < 1 { + limit = 1 + } + } + + // Get recent audit logs + logs := a.auditStore.List("", "", "", limit) + + type LogEntry struct { + Timestamp string `json:"timestamp"` + Level string `json:"level"` + Actor string `json:"actor"` + Action string `json:"action"` + Resource string `json:"resource"` + Result string `json:"result"` + Message string `json:"message,omitempty"` + IP string `json:"ip,omitempty"` + } + + entries := make([]LogEntry, 0, len(logs)) + for _, log := range logs { + level := "INFO" + if log.Result == "failure" { + level = "ERROR" + } + + entries = append(entries, LogEntry{ + Timestamp: log.Timestamp.Format(time.RFC3339), + Level: level, + Actor: log.Actor, + Action: log.Action, + Resource: log.Resource, + Result: log.Result, + Message: log.Message, + IP: log.IP, + }) + } + + writeJSON(w, http.StatusOK, map[string]interface{}{ + "logs": entries, + "count": len(entries), + }) +} + +// handleGC triggers a garbage collection and returns stats +func (a *App) handleGC(w http.ResponseWriter, r *http.Request) { + var before, after runtime.MemStats + runtime.ReadMemStats(&before) + runtime.GC() + runtime.ReadMemStats(&after) + + writeJSON(w, http.StatusOK, map[string]interface{}{ + "before": map[string]interface{}{ + "alloc": before.Alloc, + "total_alloc": before.TotalAlloc, + "sys": before.Sys, + "num_gc": before.NumGC, + }, + "after": map[string]interface{}{ + "alloc": after.Alloc, + "total_alloc": after.TotalAlloc, + "sys": after.Sys, + "num_gc": after.NumGC, + }, + "freed": before.Alloc - after.Alloc, + }) +} diff --git a/internal/httpapp/docs_handlers.go b/internal/httpapp/docs_handlers.go new file mode 100644 index 0000000..a7df2dd --- /dev/null +++ b/internal/httpapp/docs_handlers.go @@ -0,0 +1,64 @@ +package httpapp + +import ( + "net/http" + "os" + "path/filepath" +) + +// handleAPIDocs serves the API documentation page +func (a *App) handleAPIDocs(w http.ResponseWriter, r *http.Request) { + // Simple HTML page with Swagger UI + html := ` + + + atlasOS API Documentation + + + + +
+ + + + +` + w.Header().Set("Content-Type", "text/html; charset=utf-8") + w.WriteHeader(http.StatusOK) + w.Write([]byte(html)) +} + +// handleOpenAPISpec serves the OpenAPI specification +func (a *App) handleOpenAPISpec(w http.ResponseWriter, r *http.Request) { + // Read OpenAPI spec from file system + specPath := filepath.Join("docs", "openapi.yaml") + spec, err := os.ReadFile(specPath) + if err != nil { + http.Error(w, "OpenAPI spec not found", http.StatusNotFound) + return + } + + w.Header().Set("Content-Type", "application/yaml; charset=utf-8") + w.WriteHeader(http.StatusOK) + w.Write(spec) +} diff --git a/internal/httpapp/error_handlers.go b/internal/httpapp/error_handlers.go new file mode 100644 index 0000000..8c6692c --- /dev/null +++ b/internal/httpapp/error_handlers.go @@ -0,0 +1,59 @@ +package httpapp + +import ( + "log" + "net/http" + + "gitea.avt.data-center.id/othman.suseno/atlas/internal/errors" +) + +// writeError writes a structured error response +func writeError(w http.ResponseWriter, err error) { + // Check if it's an APIError + if apiErr, ok := err.(*errors.APIError); ok { + writeJSON(w, apiErr.HTTPStatus, apiErr) + return + } + + // Default to internal server error + log.Printf("unhandled error: %v", err) + apiErr := errors.ErrInternal("an unexpected error occurred") + writeJSON(w, apiErr.HTTPStatus, apiErr) +} + +// handleServiceError handles errors from service operations with graceful degradation +func (a *App) handleServiceError(serviceName string, err error) error { + if err == nil { + return nil + } + + // Log the error for debugging + log.Printf("%s service error: %v", serviceName, err) + + // For service errors, we might want to continue operation + // but log the issue. The API request can still succeed + // even if service configuration fails (desired state is stored) + return errors.ErrServiceUnavailable(serviceName).WithDetails(err.Error()) +} + +// recoverPanic recovers from panics and returns a proper error response +func recoverPanic(w http.ResponseWriter, r *http.Request) { + if rec := recover(); rec != nil { + log.Printf("panic recovered: %v", rec) + err := errors.ErrInternal("an unexpected error occurred") + writeError(w, err) + } +} + +// errorMiddleware wraps handlers with panic recovery +func (a *App) errorMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + defer recoverPanic(w, r) + next.ServeHTTP(w, r) + }) +} + +// writeJSONError is a convenience function for JSON error responses +func writeJSONError(w http.ResponseWriter, code int, message string) { + writeJSON(w, code, map[string]string{"error": message}) +} diff --git a/internal/httpapp/handlers.go b/internal/httpapp/handlers.go index ff5f510..bf3be71 100644 --- a/internal/httpapp/handlers.go +++ b/internal/httpapp/handlers.go @@ -4,6 +4,7 @@ import ( "encoding/json" "log" "net/http" + "time" ) func (a *App) handleDashboard(w http.ResponseWriter, r *http.Request) { @@ -26,18 +27,58 @@ func (a *App) handleHealthz(w http.ResponseWriter, r *http.Request) { } func (a *App) handleMetrics(w http.ResponseWriter, r *http.Request) { - // Stub metrics (Prometheus format). We'll wire real collectors later. + // Collect real-time metrics + // ZFS metrics + pools, _ := a.zfs.ListPools() + datasets, _ := a.zfs.ListDatasets("") + zvols, _ := a.zfs.ListZVOLs("") + snapshots, _ := a.zfs.ListSnapshots("") + + a.metricsCollector.UpdateZFSMetrics(pools, datasets, zvols, snapshots) + + // Service metrics + smbShares := a.smbStore.List() + nfsExports := a.nfsStore.List() + iscsiTargets := a.iscsiStore.List() + + smbStatus, _ := a.smbService.GetStatus() + nfsStatus, _ := a.nfsService.GetStatus() + iscsiStatus, _ := a.iscsiService.GetStatus() + + a.metricsCollector.UpdateServiceMetrics( + len(smbShares), + len(nfsExports), + len(iscsiTargets), + smbStatus, + nfsStatus, + iscsiStatus, + ) + + // Job metrics + allJobs := a.jobManager.List("") + running := 0 + completed := 0 + failed := 0 + for _, job := range allJobs { + switch job.Status { + case "running": + running++ + case "completed": + completed++ + case "failed": + failed++ + } + } + + a.metricsCollector.UpdateJobMetrics(len(allJobs), running, completed, failed) + + // Update uptime + a.metricsCollector.SetUptime(int64(time.Since(a.startTime).Seconds())) + + // Output Prometheus format w.Header().Set("Content-Type", "text/plain; version=0.0.4") w.WriteHeader(http.StatusOK) - _, _ = w.Write([]byte( - `# HELP atlas_build_info Build info -# TYPE atlas_build_info gauge -atlas_build_info{version="v0.1.0-dev"} 1 -# HELP atlas_up Whether the atlas-api process is up -# TYPE atlas_up gauge -atlas_up 1 -`, - )) + _, _ = w.Write([]byte(a.metricsCollector.Collect())) } func (a *App) render(w http.ResponseWriter, name string, data any) { diff --git a/internal/httpapp/middleware.go b/internal/httpapp/middleware.go index 119e5d8..eaa2bad 100644 --- a/internal/httpapp/middleware.go +++ b/internal/httpapp/middleware.go @@ -28,13 +28,36 @@ func requestID(next http.Handler) http.Handler { func logging(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { start := time.Now() - next.ServeHTTP(w, r) + + // Create response writer wrapper to capture status code + rw := &responseWriterWrapper{ + ResponseWriter: w, + statusCode: http.StatusOK, + } + + next.ServeHTTP(rw, r) + d := time.Since(start) id, _ := r.Context().Value(requestIDKey).(string) - log.Printf("%s %s %s rid=%s dur=%s", r.RemoteAddr, r.Method, r.URL.Path, id, d) + + // Use structured logging if available, otherwise fallback to standard log + log.Printf("%s %s %s status=%d rid=%s dur=%s", + r.RemoteAddr, r.Method, r.URL.Path, rw.statusCode, id, d) }) } +// responseWriterWrapper wraps http.ResponseWriter to capture status code +// Note: This is different from the one in audit_middleware.go to avoid conflicts +type responseWriterWrapper struct { + http.ResponseWriter + statusCode int +} + +func (rw *responseWriterWrapper) WriteHeader(code int) { + rw.statusCode = code + rw.ResponseWriter.WriteHeader(code) +} + func newReqID() string { var b [16]byte if _, err := rand.Read(b[:]); err != nil { diff --git a/internal/httpapp/rate_limit.go b/internal/httpapp/rate_limit.go new file mode 100644 index 0000000..68cbbc2 --- /dev/null +++ b/internal/httpapp/rate_limit.go @@ -0,0 +1,165 @@ +package httpapp + +import ( + "net/http" + "sync" + "time" + + "gitea.avt.data-center.id/othman.suseno/atlas/internal/errors" +) + +// RateLimiter implements token bucket rate limiting +type RateLimiter struct { + mu sync.RWMutex + clients map[string]*clientLimiter + rate int // requests per window + window time.Duration // time window + cleanupTick *time.Ticker + stopCleanup chan struct{} +} + +type clientLimiter struct { + tokens int + lastUpdate time.Time + mu sync.Mutex +} + +// NewRateLimiter creates a new rate limiter +func NewRateLimiter(rate int, window time.Duration) *RateLimiter { + rl := &RateLimiter{ + clients: make(map[string]*clientLimiter), + rate: rate, + window: window, + cleanupTick: time.NewTicker(5 * time.Minute), + stopCleanup: make(chan struct{}), + } + + // Start cleanup goroutine + go rl.cleanup() + + return rl +} + +// cleanup periodically removes old client limiters +func (rl *RateLimiter) cleanup() { + for { + select { + case <-rl.cleanupTick.C: + rl.mu.Lock() + now := time.Now() + for key, limiter := range rl.clients { + limiter.mu.Lock() + // Remove if last update was more than 2 windows ago + if now.Sub(limiter.lastUpdate) > rl.window*2 { + delete(rl.clients, key) + } + limiter.mu.Unlock() + } + rl.mu.Unlock() + case <-rl.stopCleanup: + return + } + } +} + +// Stop stops the cleanup goroutine +func (rl *RateLimiter) Stop() { + rl.cleanupTick.Stop() + close(rl.stopCleanup) +} + +// Allow checks if a request from the given key should be allowed +func (rl *RateLimiter) Allow(key string) bool { + rl.mu.Lock() + limiter, exists := rl.clients[key] + if !exists { + limiter = &clientLimiter{ + tokens: rl.rate, + lastUpdate: time.Now(), + } + rl.clients[key] = limiter + } + rl.mu.Unlock() + + limiter.mu.Lock() + defer limiter.mu.Unlock() + + now := time.Now() + elapsed := now.Sub(limiter.lastUpdate) + + // Refill tokens based on elapsed time + if elapsed >= rl.window { + // Full refill + limiter.tokens = rl.rate + } else { + // Partial refill based on elapsed time + tokensToAdd := int(float64(rl.rate) * elapsed.Seconds() / rl.window.Seconds()) + if tokensToAdd > 0 { + limiter.tokens = min(limiter.tokens+tokensToAdd, rl.rate) + } + } + + limiter.lastUpdate = now + + // Check if we have tokens + if limiter.tokens > 0 { + limiter.tokens-- + return true + } + + return false +} + +// getClientKey extracts a key for rate limiting from the request +func getClientKey(r *http.Request) string { + // Try to get IP address + ip := getClientIP(r) + + // If authenticated, use user ID for more granular limiting + if user, ok := getUserFromContext(r); ok { + return "user:" + user.ID + } + + return "ip:" + ip +} + +// rateLimitMiddleware implements rate limiting +func (a *App) rateLimitMiddleware(next http.Handler) http.Handler { + // Default: 100 requests per minute per client + rateLimiter := NewRateLimiter(100, time.Minute) + + // Store limiter for cleanup on shutdown + // Note: Cleanup will be handled by the limiter's own cleanup goroutine + _ = rateLimiter + + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Skip rate limiting for public endpoints + if a.isPublicEndpoint(r.URL.Path) { + next.ServeHTTP(w, r) + return + } + + key := getClientKey(r) + if !rateLimiter.Allow(key) { + writeError(w, errors.NewAPIError( + errors.ErrCodeServiceUnavailable, + "rate limit exceeded", + http.StatusTooManyRequests, + ).WithDetails("too many requests, please try again later")) + return + } + + // Add rate limit headers + w.Header().Set("X-RateLimit-Limit", "100") + w.Header().Set("X-RateLimit-Window", "60") + + next.ServeHTTP(w, r) + }) +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} diff --git a/internal/httpapp/router_helpers.go b/internal/httpapp/router_helpers.go index 471e0d5..fb98437 100644 --- a/internal/httpapp/router_helpers.go +++ b/internal/httpapp/router_helpers.go @@ -3,6 +3,8 @@ package httpapp import ( "net/http" "strings" + + "gitea.avt.data-center.id/othman.suseno/atlas/internal/errors" ) // methodHandler routes requests based on HTTP method @@ -143,6 +145,35 @@ func (a *App) handleNFSExportOps(w http.ResponseWriter, r *http.Request) { } // handleISCSITargetOps routes iSCSI target operations by method +func (a *App) handleBackupOps(w http.ResponseWriter, r *http.Request) { + backupID := pathParam(r, "/api/v1/backups/") + if backupID == "" { + writeError(w, errors.ErrBadRequest("backup id required")) + return + } + + switch r.Method { + case http.MethodGet: + // Check if it's a verify request + if r.URL.Query().Get("verify") == "true" { + a.handleVerifyBackup(w, r) + } else { + a.handleGetBackup(w, r) + } + case http.MethodPost: + // Restore backup (POST /api/v1/backups/{id}/restore) + if strings.HasSuffix(r.URL.Path, "/restore") { + a.handleRestoreBackup(w, r) + } else { + writeError(w, errors.ErrBadRequest("invalid backup operation")) + } + case http.MethodDelete: + a.handleDeleteBackup(w, r) + default: + writeError(w, errors.ErrBadRequest("method not allowed")) + } +} + func (a *App) handleISCSITargetOps(w http.ResponseWriter, r *http.Request) { if strings.HasSuffix(r.URL.Path, "/luns") { if r.Method == http.MethodPost { diff --git a/internal/httpapp/routes.go b/internal/httpapp/routes.go index bbe68ac..a548bf1 100644 --- a/internal/httpapp/routes.go +++ b/internal/httpapp/routes.go @@ -16,8 +16,36 @@ func (a *App) routes() { // Health & metrics a.mux.HandleFunc("/healthz", a.handleHealthz) + a.mux.HandleFunc("/health", a.handleHealthCheck) // Detailed health check a.mux.HandleFunc("/metrics", a.handleMetrics) + // Diagnostics + a.mux.HandleFunc("/api/v1/system/info", methodHandler( + func(w http.ResponseWriter, r *http.Request) { a.handleSystemInfo(w, r) }, + nil, nil, nil, nil, + )) + a.mux.HandleFunc("/api/v1/system/logs", methodHandler( + func(w http.ResponseWriter, r *http.Request) { a.handleLogs(w, r) }, + nil, nil, nil, nil, + )) + a.mux.HandleFunc("/api/v1/system/gc", methodHandler( + nil, + func(w http.ResponseWriter, r *http.Request) { a.handleGC(w, r) }, + nil, nil, nil, + )) + + // API Documentation + a.mux.HandleFunc("/api/docs", a.handleAPIDocs) + a.mux.HandleFunc("/api/openapi.yaml", a.handleOpenAPISpec) + + // Backup & Restore + a.mux.HandleFunc("/api/v1/backups", methodHandler( + func(w http.ResponseWriter, r *http.Request) { a.handleListBackups(w, r) }, + func(w http.ResponseWriter, r *http.Request) { a.handleCreateBackup(w, r) }, + nil, nil, nil, + )) + a.mux.HandleFunc("/api/v1/backups/", a.handleBackupOps) + // Dashboard API a.mux.HandleFunc("/api/v1/dashboard", methodHandler( func(w http.ResponseWriter, r *http.Request) { a.handleDashboardAPI(w, r) }, diff --git a/internal/httpapp/security_middleware.go b/internal/httpapp/security_middleware.go new file mode 100644 index 0000000..67bd6f7 --- /dev/null +++ b/internal/httpapp/security_middleware.go @@ -0,0 +1,121 @@ +package httpapp + +import ( + "net/http" + "strings" + + "gitea.avt.data-center.id/othman.suseno/atlas/internal/errors" +) + +// securityHeadersMiddleware adds security headers to responses +func (a *App) securityHeadersMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Security headers + w.Header().Set("X-Content-Type-Options", "nosniff") + w.Header().Set("X-Frame-Options", "DENY") + w.Header().Set("X-XSS-Protection", "1; mode=block") + w.Header().Set("Referrer-Policy", "strict-origin-when-cross-origin") + w.Header().Set("Permissions-Policy", "geolocation=(), microphone=(), camera=()") + + // HSTS (only for HTTPS) + if r.TLS != nil { + w.Header().Set("Strict-Transport-Security", "max-age=31536000; includeSubDomains") + } + + // Content Security Policy (CSP) + csp := "default-src 'self'; script-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; img-src 'self' data:; font-src 'self' https://cdn.jsdelivr.net; connect-src 'self';" + w.Header().Set("Content-Security-Policy", csp) + + next.ServeHTTP(w, r) + }) +} + +// corsMiddleware handles CORS requests +func (a *App) corsMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + origin := r.Header.Get("Origin") + + // Allow specific origins or all (for development) + allowedOrigins := []string{ + "http://localhost:8080", + "http://localhost:3000", + "http://127.0.0.1:8080", + } + + // Check if origin is allowed + allowed := false + for _, allowedOrigin := range allowedOrigins { + if origin == allowedOrigin { + allowed = true + break + } + } + + // Allow requests from same origin + if origin == "" || r.Header.Get("Referer") != "" { + allowed = true + } + + if allowed && origin != "" { + w.Header().Set("Access-Control-Allow-Origin", origin) + w.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, PATCH, OPTIONS") + w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, X-Requested-With") + w.Header().Set("Access-Control-Allow-Credentials", "true") + w.Header().Set("Access-Control-Max-Age", "3600") + } + + // Handle preflight requests + if r.Method == http.MethodOptions { + w.WriteHeader(http.StatusNoContent) + return + } + + next.ServeHTTP(w, r) + }) +} + +// requestSizeMiddleware limits request body size +func (a *App) requestSizeMiddleware(maxSize int64) func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Limit request body size + r.Body = http.MaxBytesReader(w, r.Body, maxSize) + next.ServeHTTP(w, r) + }) + } +} + +// validateContentTypeMiddleware validates Content-Type for POST/PUT/PATCH requests +func (a *App) validateContentTypeMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Skip for GET, HEAD, OPTIONS, DELETE + if r.Method == http.MethodGet || r.Method == http.MethodHead || + r.Method == http.MethodOptions || r.Method == http.MethodDelete { + next.ServeHTTP(w, r) + return + } + + // Skip for public endpoints + if a.isPublicEndpoint(r.URL.Path) { + next.ServeHTTP(w, r) + return + } + + // Check Content-Type for POST/PUT/PATCH + contentType := r.Header.Get("Content-Type") + if contentType == "" { + writeError(w, errors.ErrBadRequest("Content-Type header is required")) + return + } + + // Allow JSON and form data + if !strings.HasPrefix(contentType, "application/json") && + !strings.HasPrefix(contentType, "application/x-www-form-urlencoded") && + !strings.HasPrefix(contentType, "multipart/form-data") { + writeError(w, errors.ErrBadRequest("Content-Type must be application/json")) + return + } + + next.ServeHTTP(w, r) + }) +} diff --git a/internal/httpapp/validation_helpers.go b/internal/httpapp/validation_helpers.go new file mode 100644 index 0000000..7916f68 --- /dev/null +++ b/internal/httpapp/validation_helpers.go @@ -0,0 +1,58 @@ +package httpapp + +import ( + "fmt" + "strconv" + "strings" +) + +// parseSizeString parses a human-readable size string to bytes +func (a *App) parseSizeString(sizeStr string) (uint64, error) { + sizeStr = strings.TrimSpace(strings.ToUpper(sizeStr)) + if sizeStr == "" { + return 0, fmt.Errorf("size cannot be empty") + } + + // Extract number and unit + var numStr string + var unit string + + for i, r := range sizeStr { + if r >= '0' && r <= '9' { + numStr += string(r) + } else { + unit = sizeStr[i:] + break + } + } + + if numStr == "" { + return 0, fmt.Errorf("invalid size format: no number found") + } + + num, err := strconv.ParseUint(numStr, 10, 64) + if err != nil { + return 0, fmt.Errorf("invalid size number: %w", err) + } + + // Convert to bytes based on unit + multiplier := uint64(1) + switch unit { + case "": + multiplier = 1 + case "K", "KB": + multiplier = 1024 + case "M", "MB": + multiplier = 1024 * 1024 + case "G", "GB": + multiplier = 1024 * 1024 * 1024 + case "T", "TB": + multiplier = 1024 * 1024 * 1024 * 1024 + case "P", "PB": + multiplier = 1024 * 1024 * 1024 * 1024 * 1024 + default: + return 0, fmt.Errorf("invalid size unit: %s (allowed: K, M, G, T, P)", unit) + } + + return num * multiplier, nil +} diff --git a/internal/logger/logger.go b/internal/logger/logger.go new file mode 100644 index 0000000..57a193a --- /dev/null +++ b/internal/logger/logger.go @@ -0,0 +1,215 @@ +package logger + +import ( + "encoding/json" + "fmt" + "io" + "os" + "sync" + "time" +) + +// Level represents log level +type Level int + +const ( + LevelDebug Level = iota + LevelInfo + LevelWarn + LevelError +) + +var levelNames = map[Level]string{ + LevelDebug: "DEBUG", + LevelInfo: "INFO", + LevelWarn: "WARN", + LevelError: "ERROR", +} + +// Logger provides structured logging +type Logger struct { + mu sync.Mutex + level Level + output io.Writer + jsonMode bool + prefix string +} + +// LogEntry represents a structured log entry +type LogEntry struct { + Timestamp string `json:"timestamp"` + Level string `json:"level"` + Message string `json:"message"` + Fields map[string]interface{} `json:"fields,omitempty"` + Error string `json:"error,omitempty"` +} + +// New creates a new logger +func New(level Level, output io.Writer, jsonMode bool) *Logger { + if output == nil { + output = os.Stdout + } + return &Logger{ + level: level, + output: output, + jsonMode: jsonMode, + } +} + +// SetLevel sets the log level +func (l *Logger) SetLevel(level Level) { + l.mu.Lock() + defer l.mu.Unlock() + l.level = level +} + +// SetOutput sets the output writer +func (l *Logger) SetOutput(w io.Writer) { + l.mu.Lock() + defer l.mu.Unlock() + l.output = w +} + +// Debug logs a debug message +func (l *Logger) Debug(msg string, fields ...map[string]interface{}) { + l.log(LevelDebug, msg, nil, fields...) +} + +// Info logs an info message +func (l *Logger) Info(msg string, fields ...map[string]interface{}) { + l.log(LevelInfo, msg, nil, fields...) +} + +// Warn logs a warning message +func (l *Logger) Warn(msg string, fields ...map[string]interface{}) { + l.log(LevelWarn, msg, nil, fields...) +} + +// Error logs an error message +func (l *Logger) Error(msg string, err error, fields ...map[string]interface{}) { + l.log(LevelError, msg, err, fields...) +} + +// log writes a log entry +func (l *Logger) log(level Level, msg string, err error, fields ...map[string]interface{}) { + if level < l.level { + return + } + + l.mu.Lock() + defer l.mu.Unlock() + + entry := LogEntry{ + Timestamp: time.Now().Format(time.RFC3339), + Level: levelNames[level], + Message: msg, + } + + if err != nil { + entry.Error = err.Error() + } + + // Merge fields + if len(fields) > 0 { + entry.Fields = make(map[string]interface{}) + for _, f := range fields { + for k, v := range f { + entry.Fields[k] = v + } + } + } + + var output string + if l.jsonMode { + jsonData, jsonErr := json.Marshal(entry) + if jsonErr != nil { + // Fallback to text format if JSON fails + output = fmt.Sprintf("%s [%s] %s", entry.Timestamp, entry.Level, msg) + if err != nil { + output += fmt.Sprintf(" error=%v", err) + } + } else { + output = string(jsonData) + } + } else { + // Text format + output = fmt.Sprintf("%s [%s] %s", entry.Timestamp, entry.Level, msg) + if err != nil { + output += fmt.Sprintf(" error=%v", err) + } + if len(entry.Fields) > 0 { + for k, v := range entry.Fields { + output += fmt.Sprintf(" %s=%v", k, v) + } + } + } + + fmt.Fprintln(l.output, output) +} + +// WithFields returns a logger with additional fields +func (l *Logger) WithFields(fields map[string]interface{}) *Logger { + return &Logger{ + level: l.level, + output: l.output, + jsonMode: l.jsonMode, + prefix: l.prefix, + } +} + +// ParseLevel parses a log level string +func ParseLevel(s string) Level { + switch s { + case "DEBUG", "debug": + return LevelDebug + case "INFO", "info": + return LevelInfo + case "WARN", "warn", "WARNING", "warning": + return LevelWarn + case "ERROR", "error": + return LevelError + default: + return LevelInfo + } +} + +// Default logger instance +var defaultLogger *Logger + +func init() { + levelStr := os.Getenv("ATLAS_LOG_LEVEL") + level := ParseLevel(levelStr) + jsonMode := os.Getenv("ATLAS_LOG_FORMAT") == "json" + + defaultLogger = New(level, os.Stdout, jsonMode) +} + +// Debug logs using default logger +func Debug(msg string, fields ...map[string]interface{}) { + defaultLogger.Debug(msg, fields...) +} + +// Info logs using default logger +func Info(msg string, fields ...map[string]interface{}) { + defaultLogger.Info(msg, fields...) +} + +// Warn logs using default logger +func Warn(msg string, fields ...map[string]interface{}) { + defaultLogger.Warn(msg, fields...) +} + +// Error logs using default logger +func Error(msg string, err error, fields ...map[string]interface{}) { + defaultLogger.Error(msg, err, fields...) +} + +// SetLevel sets the default logger level +func SetLevel(level Level) { + defaultLogger.SetLevel(level) +} + +// GetLogger returns the default logger +func GetLogger() *Logger { + return defaultLogger +} diff --git a/internal/metrics/collector.go b/internal/metrics/collector.go new file mode 100644 index 0000000..0790589 --- /dev/null +++ b/internal/metrics/collector.go @@ -0,0 +1,217 @@ +package metrics + +import ( + "fmt" + "sync" + "time" + + "gitea.avt.data-center.id/othman.suseno/atlas/internal/models" +) + +// Collector gathers system metrics +type Collector struct { + mu sync.RWMutex + + // ZFS metrics + poolCount int + datasetCount int + zvolCount int + snapshotCount int + totalCapacity uint64 + totalAllocated uint64 + totalFree uint64 + + // Service metrics + smbSharesCount int + nfsExportsCount int + iscsiTargetsCount int + smbServiceStatus int // 1 = running, 0 = stopped + nfsServiceStatus int + iscsiServiceStatus int + + // Job metrics + jobsTotal int + jobsRunning int + jobsCompleted int + jobsFailed int + + // System metrics + uptimeSeconds int64 + lastUpdate time.Time +} + +// NewCollector creates a new metrics collector +func NewCollector() *Collector { + return &Collector{ + lastUpdate: time.Now(), + } +} + +// UpdateZFSMetrics updates ZFS-related metrics +func (c *Collector) UpdateZFSMetrics(pools []models.Pool, datasets []models.Dataset, zvols []models.ZVOL, snapshots []models.Snapshot) { + c.mu.Lock() + defer c.mu.Unlock() + + c.poolCount = len(pools) + c.datasetCount = len(datasets) + c.zvolCount = len(zvols) + c.snapshotCount = len(snapshots) + + c.totalCapacity = 0 + c.totalAllocated = 0 + c.totalFree = 0 + + for _, pool := range pools { + c.totalCapacity += pool.Size + c.totalAllocated += pool.Allocated + c.totalFree += pool.Free + } + + c.lastUpdate = time.Now() +} + +// UpdateServiceMetrics updates storage service metrics +func (c *Collector) UpdateServiceMetrics(smbShares, nfsExports, iscsiTargets int, smbStatus, nfsStatus, iscsiStatus bool) { + c.mu.Lock() + defer c.mu.Unlock() + + c.smbSharesCount = smbShares + c.nfsExportsCount = nfsExports + c.iscsiTargetsCount = iscsiTargets + + if smbStatus { + c.smbServiceStatus = 1 + } else { + c.smbServiceStatus = 0 + } + + if nfsStatus { + c.nfsServiceStatus = 1 + } else { + c.nfsServiceStatus = 0 + } + + if iscsiStatus { + c.iscsiServiceStatus = 1 + } else { + c.iscsiServiceStatus = 0 + } + + c.lastUpdate = time.Now() +} + +// UpdateJobMetrics updates job-related metrics +func (c *Collector) UpdateJobMetrics(total, running, completed, failed int) { + c.mu.Lock() + defer c.mu.Unlock() + + c.jobsTotal = total + c.jobsRunning = running + c.jobsCompleted = completed + c.jobsFailed = failed + + c.lastUpdate = time.Now() +} + +// SetUptime sets the system uptime +func (c *Collector) SetUptime(seconds int64) { + c.mu.Lock() + defer c.mu.Unlock() + c.uptimeSeconds = seconds +} + +// Collect returns metrics in Prometheus format +func (c *Collector) Collect() string { + c.mu.RLock() + defer c.mu.RUnlock() + + var output string + + // Build info + output += "# HELP atlas_build_info Build information\n" + output += "# TYPE atlas_build_info gauge\n" + output += `atlas_build_info{version="v0.1.0-dev"} 1` + "\n\n" + + // System uptime + output += "# HELP atlas_uptime_seconds System uptime in seconds\n" + output += "# TYPE atlas_uptime_seconds gauge\n" + output += fmt.Sprintf("atlas_uptime_seconds %d\n\n", c.uptimeSeconds) + + // ZFS metrics + output += "# HELP atlas_zfs_pools_total Total number of ZFS pools\n" + output += "# TYPE atlas_zfs_pools_total gauge\n" + output += fmt.Sprintf("atlas_zfs_pools_total %d\n\n", c.poolCount) + + output += "# HELP atlas_zfs_datasets_total Total number of ZFS datasets\n" + output += "# TYPE atlas_zfs_datasets_total gauge\n" + output += fmt.Sprintf("atlas_zfs_datasets_total %d\n\n", c.datasetCount) + + output += "# HELP atlas_zfs_zvols_total Total number of ZFS ZVOLs\n" + output += "# TYPE atlas_zfs_zvols_total gauge\n" + output += fmt.Sprintf("atlas_zfs_zvols_total %d\n\n", c.zvolCount) + + output += "# HELP atlas_zfs_snapshots_total Total number of ZFS snapshots\n" + output += "# TYPE atlas_zfs_snapshots_total gauge\n" + output += fmt.Sprintf("atlas_zfs_snapshots_total %d\n\n", c.snapshotCount) + + output += "# HELP atlas_zfs_capacity_bytes Total ZFS pool capacity in bytes\n" + output += "# TYPE atlas_zfs_capacity_bytes gauge\n" + output += fmt.Sprintf("atlas_zfs_capacity_bytes %d\n\n", c.totalCapacity) + + output += "# HELP atlas_zfs_allocated_bytes Total ZFS pool allocated space in bytes\n" + output += "# TYPE atlas_zfs_allocated_bytes gauge\n" + output += fmt.Sprintf("atlas_zfs_allocated_bytes %d\n\n", c.totalAllocated) + + output += "# HELP atlas_zfs_free_bytes Total ZFS pool free space in bytes\n" + output += "# TYPE atlas_zfs_free_bytes gauge\n" + output += fmt.Sprintf("atlas_zfs_free_bytes %d\n\n", c.totalFree) + + // Service metrics + output += "# HELP atlas_smb_shares_total Total number of SMB shares\n" + output += "# TYPE atlas_smb_shares_total gauge\n" + output += fmt.Sprintf("atlas_smb_shares_total %d\n\n", c.smbSharesCount) + + output += "# HELP atlas_nfs_exports_total Total number of NFS exports\n" + output += "# TYPE atlas_nfs_exports_total gauge\n" + output += fmt.Sprintf("atlas_nfs_exports_total %d\n\n", c.nfsExportsCount) + + output += "# HELP atlas_iscsi_targets_total Total number of iSCSI targets\n" + output += "# TYPE atlas_iscsi_targets_total gauge\n" + output += fmt.Sprintf("atlas_iscsi_targets_total %d\n\n", c.iscsiTargetsCount) + + output += "# HELP atlas_smb_service_status SMB service status (1=running, 0=stopped)\n" + output += "# TYPE atlas_smb_service_status gauge\n" + output += fmt.Sprintf("atlas_smb_service_status %d\n\n", c.smbServiceStatus) + + output += "# HELP atlas_nfs_service_status NFS service status (1=running, 0=stopped)\n" + output += "# TYPE atlas_nfs_service_status gauge\n" + output += fmt.Sprintf("atlas_nfs_service_status %d\n\n", c.nfsServiceStatus) + + output += "# HELP atlas_iscsi_service_status iSCSI service status (1=running, 0=stopped)\n" + output += "# TYPE atlas_iscsi_service_status gauge\n" + output += fmt.Sprintf("atlas_iscsi_service_status %d\n\n", c.iscsiServiceStatus) + + // Job metrics + output += "# HELP atlas_jobs_total Total number of jobs\n" + output += "# TYPE atlas_jobs_total gauge\n" + output += fmt.Sprintf("atlas_jobs_total %d\n\n", c.jobsTotal) + + output += "# HELP atlas_jobs_running Number of running jobs\n" + output += "# TYPE atlas_jobs_running gauge\n" + output += fmt.Sprintf("atlas_jobs_running %d\n\n", c.jobsRunning) + + output += "# HELP atlas_jobs_completed_total Total number of completed jobs\n" + output += "# TYPE atlas_jobs_completed_total counter\n" + output += fmt.Sprintf("atlas_jobs_completed_total %d\n\n", c.jobsCompleted) + + output += "# HELP atlas_jobs_failed_total Total number of failed jobs\n" + output += "# TYPE atlas_jobs_failed_total counter\n" + output += fmt.Sprintf("atlas_jobs_failed_total %d\n\n", c.jobsFailed) + + // API status + output += "# HELP atlas_up Whether the atlas-api process is up\n" + output += "# TYPE atlas_up gauge\n" + output += "atlas_up 1\n" + + return output +} diff --git a/internal/services/nfs.go b/internal/services/nfs.go index 5082fce..da16217 100644 --- a/internal/services/nfs.go +++ b/internal/services/nfs.go @@ -52,13 +52,16 @@ func (s *NFSService) ApplyConfiguration(exports []models.NFSExport) error { return fmt.Errorf("replace exports: %w", err) } - // Reload NFS exports - if err := s.reloadExports(); err != nil { + // Reload NFS exports with error recovery + reloadErr := s.reloadExports() + if reloadErr != nil { // Try to restore backup on failure if _, err2 := os.Stat(backupPath); err2 == nil { - os.Rename(backupPath, s.exportsPath) + if restoreErr := os.Rename(backupPath, s.exportsPath); restoreErr != nil { + return fmt.Errorf("reload failed and backup restore failed: reload=%v, restore=%v", reloadErr, restoreErr) + } } - return fmt.Errorf("reload exports: %w", err) + return fmt.Errorf("reload exports: %w", reloadErr) } return nil diff --git a/internal/services/smb.go b/internal/services/smb.go index dcafb66..43842f6 100644 --- a/internal/services/smb.go +++ b/internal/services/smb.go @@ -55,13 +55,16 @@ func (s *SMBService) ApplyConfiguration(shares []models.SMBShare) error { return fmt.Errorf("replace config: %w", err) } - // Reload Samba service - if err := s.reloadService(); err != nil { + // Reload Samba service with retry + reloadErr := s.reloadService() + if reloadErr != nil { // Try to restore backup on failure if _, err2 := os.Stat(backupPath); err2 == nil { - os.Rename(backupPath, s.configPath) + if restoreErr := os.Rename(backupPath, s.configPath); restoreErr != nil { + return fmt.Errorf("reload failed and backup restore failed: reload=%v, restore=%v", reloadErr, restoreErr) + } } - return fmt.Errorf("reload service: %w", err) + return fmt.Errorf("reload service: %w", reloadErr) } return nil diff --git a/internal/validation/validator.go b/internal/validation/validator.go new file mode 100644 index 0000000..a1cac9d --- /dev/null +++ b/internal/validation/validator.go @@ -0,0 +1,278 @@ +package validation + +import ( + "fmt" + "regexp" + "strings" + "unicode" +) + +var ( + // Valid pool/dataset name pattern (ZFS naming rules) + zfsNamePattern = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9_\-\.:]*$`) + + // Valid username pattern + usernamePattern = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9_\-\.]{2,31}$`) + + // Valid share name pattern (SMB naming rules) + shareNamePattern = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9_\-\.]{0,79}$`) + + // IQN pattern (simplified - iqn.yyyy-mm.reversed.domain:identifier) + iqnPattern = regexp.MustCompile(`^iqn\.\d{4}-\d{2}\.[a-zA-Z0-9][a-zA-Z0-9\-\.]*:[a-zA-Z0-9][a-zA-Z0-9\-_\.]*$`) + + // Email pattern (basic) + emailPattern = regexp.MustCompile(`^[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}$`) + + // CIDR pattern for NFS clients + cidrPattern = regexp.MustCompile(`^(\d{1,3}\.){3}\d{1,3}(/\d{1,2})?$`) +) + +// ValidationError represents a validation error +type ValidationError struct { + Field string + Message string +} + +func (e *ValidationError) Error() string { + if e.Field != "" { + return fmt.Sprintf("validation error on field '%s': %s", e.Field, e.Message) + } + return fmt.Sprintf("validation error: %s", e.Message) +} + +// ValidateZFSName validates a ZFS pool or dataset name +func ValidateZFSName(name string) error { + if name == "" { + return &ValidationError{Field: "name", Message: "name cannot be empty"} + } + + if len(name) > 256 { + return &ValidationError{Field: "name", Message: "name too long (max 256 characters)"} + } + + if !zfsNamePattern.MatchString(name) { + return &ValidationError{Field: "name", Message: "invalid characters (allowed: a-z, A-Z, 0-9, _, -, ., :)"} + } + + // ZFS names cannot start with certain characters + if strings.HasPrefix(name, "-") || strings.HasPrefix(name, ".") { + return &ValidationError{Field: "name", Message: "name cannot start with '-' or '.'"} + } + + return nil +} + +// ValidateUsername validates a username +func ValidateUsername(username string) error { + if username == "" { + return &ValidationError{Field: "username", Message: "username cannot be empty"} + } + + if len(username) < 3 { + return &ValidationError{Field: "username", Message: "username too short (min 3 characters)"} + } + + if len(username) > 32 { + return &ValidationError{Field: "username", Message: "username too long (max 32 characters)"} + } + + if !usernamePattern.MatchString(username) { + return &ValidationError{Field: "username", Message: "invalid characters (allowed: a-z, A-Z, 0-9, _, -, .)"} + } + + return nil +} + +// ValidatePassword validates a password +func ValidatePassword(password string) error { + if password == "" { + return &ValidationError{Field: "password", Message: "password cannot be empty"} + } + + if len(password) < 8 { + return &ValidationError{Field: "password", Message: "password too short (min 8 characters)"} + } + + if len(password) > 128 { + return &ValidationError{Field: "password", Message: "password too long (max 128 characters)"} + } + + // Check for at least one letter and one number + hasLetter := false + hasNumber := false + + for _, r := range password { + if unicode.IsLetter(r) { + hasLetter = true + } + if unicode.IsNumber(r) { + hasNumber = true + } + } + + if !hasLetter { + return &ValidationError{Field: "password", Message: "password must contain at least one letter"} + } + + if !hasNumber { + return &ValidationError{Field: "password", Message: "password must contain at least one number"} + } + + return nil +} + +// ValidateEmail validates an email address +func ValidateEmail(email string) error { + if email == "" { + return nil // Email is optional + } + + if len(email) > 254 { + return &ValidationError{Field: "email", Message: "email too long (max 254 characters)"} + } + + if !emailPattern.MatchString(email) { + return &ValidationError{Field: "email", Message: "invalid email format"} + } + + return nil +} + +// ValidateShareName validates an SMB share name +func ValidateShareName(name string) error { + if name == "" { + return &ValidationError{Field: "name", Message: "share name cannot be empty"} + } + + if len(name) > 80 { + return &ValidationError{Field: "name", Message: "share name too long (max 80 characters)"} + } + + if !shareNamePattern.MatchString(name) { + return &ValidationError{Field: "name", Message: "invalid share name (allowed: a-z, A-Z, 0-9, _, -, .)"} + } + + // Reserved names + reserved := []string{"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9"} + upperName := strings.ToUpper(name) + for _, r := range reserved { + if upperName == r { + return &ValidationError{Field: "name", Message: fmt.Sprintf("share name '%s' is reserved", name)} + } + } + + return nil +} + +// ValidateIQN validates an iSCSI Qualified Name +func ValidateIQN(iqn string) error { + if iqn == "" { + return &ValidationError{Field: "iqn", Message: "IQN cannot be empty"} + } + + if len(iqn) > 223 { + return &ValidationError{Field: "iqn", Message: "IQN too long (max 223 characters)"} + } + + if !strings.HasPrefix(iqn, "iqn.") { + return &ValidationError{Field: "iqn", Message: "IQN must start with 'iqn.'"} + } + + // Basic format validation (can be more strict) + if !iqnPattern.MatchString(iqn) { + return &ValidationError{Field: "iqn", Message: "invalid IQN format (expected: iqn.yyyy-mm.reversed.domain:identifier)"} + } + + return nil +} + +// ValidateSize validates a size string (e.g., "10G", "1T") +func ValidateSize(sizeStr string) error { + if sizeStr == "" { + return &ValidationError{Field: "size", Message: "size cannot be empty"} + } + + // Pattern: number followed by optional unit (K, M, G, T, P) + sizePattern := regexp.MustCompile(`^(\d+)([KMGT]?)$`) + if !sizePattern.MatchString(strings.ToUpper(sizeStr)) { + return &ValidationError{Field: "size", Message: "invalid size format (expected: number with optional unit K, M, G, T, P)"} + } + + return nil +} + +// ValidatePath validates a filesystem path +func ValidatePath(path string) error { + if path == "" { + return nil // Path is optional (can be auto-filled) + } + + if !strings.HasPrefix(path, "/") { + return &ValidationError{Field: "path", Message: "path must be absolute (start with /)"} + } + + if len(path) > 4096 { + return &ValidationError{Field: "path", Message: "path too long (max 4096 characters)"} + } + + // Check for dangerous path components + dangerous := []string{"..", "//", "\x00"} + for _, d := range dangerous { + if strings.Contains(path, d) { + return &ValidationError{Field: "path", Message: fmt.Sprintf("path contains invalid component: %s", d)} + } + } + + return nil +} + +// ValidateCIDR validates a CIDR notation or hostname +func ValidateCIDR(cidr string) error { + if cidr == "" { + return &ValidationError{Field: "client", Message: "client cannot be empty"} + } + + // Allow wildcard + if cidr == "*" { + return nil + } + + // Check if it's a CIDR + if cidrPattern.MatchString(cidr) { + return nil + } + + // Check if it's a valid hostname + hostnamePattern := regexp.MustCompile(`^[a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?)*$`) + if hostnamePattern.MatchString(cidr) { + return nil + } + + return &ValidationError{Field: "client", Message: "invalid client format (expected: CIDR, hostname, or '*')"} +} + +// SanitizeString removes potentially dangerous characters +func SanitizeString(s string) string { + // Remove null bytes and control characters + var result strings.Builder + for _, r := range s { + if r >= 32 && r != 127 { + result.WriteRune(r) + } + } + return strings.TrimSpace(result.String()) +} + +// SanitizePath sanitizes a filesystem path +func SanitizePath(path string) string { + // Remove leading/trailing whitespace and normalize slashes + path = strings.TrimSpace(path) + path = strings.ReplaceAll(path, "\\", "/") + + // Remove multiple slashes + for strings.Contains(path, "//") { + path = strings.ReplaceAll(path, "//", "/") + } + + return path +}