still fixing i40 vtl issue

This commit is contained in:
Warp Agent
2025-12-31 03:04:11 +07:00
parent 2de3c5f6ab
commit a558c97088
11 changed files with 3901 additions and 19 deletions

Binary file not shown.

View File

@@ -175,3 +175,209 @@ func (h *Handler) ListClients(c *gin.Context) {
"total": len(clients),
})
}
// GetDashboardStats returns dashboard statistics
func (h *Handler) GetDashboardStats(c *gin.Context) {
stats, err := h.service.GetDashboardStats(c.Request.Context())
if err != nil {
h.logger.Error("Failed to get dashboard stats", "error", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to get dashboard stats"})
return
}
c.JSON(http.StatusOK, stats)
}
// ListStoragePools lists all storage pools
func (h *Handler) ListStoragePools(c *gin.Context) {
pools, err := h.service.ListStoragePools(c.Request.Context())
if err != nil {
h.logger.Error("Failed to list storage pools", "error", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to list storage pools"})
return
}
if pools == nil {
pools = []StoragePool{}
}
h.logger.Info("Listed storage pools", "count", len(pools))
c.JSON(http.StatusOK, gin.H{
"pools": pools,
"total": len(pools),
})
}
// ListStorageVolumes lists all storage volumes
func (h *Handler) ListStorageVolumes(c *gin.Context) {
poolName := c.Query("pool_name")
volumes, err := h.service.ListStorageVolumes(c.Request.Context(), poolName)
if err != nil {
h.logger.Error("Failed to list storage volumes", "error", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to list storage volumes"})
return
}
if volumes == nil {
volumes = []StorageVolume{}
}
c.JSON(http.StatusOK, gin.H{
"volumes": volumes,
"total": len(volumes),
})
}
// ListStorageDaemons lists all storage daemons
func (h *Handler) ListStorageDaemons(c *gin.Context) {
daemons, err := h.service.ListStorageDaemons(c.Request.Context())
if err != nil {
h.logger.Error("Failed to list storage daemons", "error", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to list storage daemons"})
return
}
if daemons == nil {
daemons = []StorageDaemon{}
}
c.JSON(http.StatusOK, gin.H{
"daemons": daemons,
"total": len(daemons),
})
}
// CreateStoragePool creates a new storage pool
func (h *Handler) CreateStoragePool(c *gin.Context) {
var req CreatePoolRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
pool, err := h.service.CreateStoragePool(c.Request.Context(), req)
if err != nil {
h.logger.Error("Failed to create storage pool", "error", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusCreated, pool)
}
// DeleteStoragePool deletes a storage pool
func (h *Handler) DeleteStoragePool(c *gin.Context) {
idStr := c.Param("id")
if idStr == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "pool ID is required"})
return
}
var poolID int
if _, err := fmt.Sscanf(idStr, "%d", &poolID); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid pool ID"})
return
}
err := h.service.DeleteStoragePool(c.Request.Context(), poolID)
if err != nil {
h.logger.Error("Failed to delete storage pool", "error", err, "pool_id", poolID)
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{"message": "pool deleted successfully"})
}
// CreateStorageVolume creates a new storage volume
func (h *Handler) CreateStorageVolume(c *gin.Context) {
var req CreateVolumeRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
volume, err := h.service.CreateStorageVolume(c.Request.Context(), req)
if err != nil {
h.logger.Error("Failed to create storage volume", "error", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusCreated, volume)
}
// UpdateStorageVolume updates a storage volume
func (h *Handler) UpdateStorageVolume(c *gin.Context) {
idStr := c.Param("id")
if idStr == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "volume ID is required"})
return
}
var volumeID int
if _, err := fmt.Sscanf(idStr, "%d", &volumeID); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid volume ID"})
return
}
var req UpdateVolumeRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
volume, err := h.service.UpdateStorageVolume(c.Request.Context(), volumeID, req)
if err != nil {
h.logger.Error("Failed to update storage volume", "error", err, "volume_id", volumeID)
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, volume)
}
// DeleteStorageVolume deletes a storage volume
func (h *Handler) DeleteStorageVolume(c *gin.Context) {
idStr := c.Param("id")
if idStr == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "volume ID is required"})
return
}
var volumeID int
if _, err := fmt.Sscanf(idStr, "%d", &volumeID); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid volume ID"})
return
}
err := h.service.DeleteStorageVolume(c.Request.Context(), volumeID)
if err != nil {
h.logger.Error("Failed to delete storage volume", "error", err, "volume_id", volumeID)
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{"message": "volume deleted successfully"})
}
// ListMedia lists all media from bconsole "list media" command
func (h *Handler) ListMedia(c *gin.Context) {
media, err := h.service.ListMedia(c.Request.Context())
if err != nil {
h.logger.Error("Failed to list media", "error", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
if media == nil {
media = []Media{}
}
h.logger.Info("Listed media", "count", len(media))
c.JSON(http.StatusOK, gin.H{
"media": media,
"total": len(media),
})
}

File diff suppressed because it is too large Load Diff

View File

@@ -346,10 +346,20 @@ func NewRouter(cfg *config.Config, db *database.DB, log *logger.Logger) *gin.Eng
backupGroup := protected.Group("/backup")
backupGroup.Use(requirePermission("backup", "read"))
{
backupGroup.GET("/dashboard/stats", backupHandler.GetDashboardStats)
backupGroup.GET("/jobs", backupHandler.ListJobs)
backupGroup.GET("/jobs/:id", backupHandler.GetJob)
backupGroup.POST("/jobs", requirePermission("backup", "write"), backupHandler.CreateJob)
backupGroup.GET("/clients", backupHandler.ListClients)
backupGroup.GET("/storage/pools", backupHandler.ListStoragePools)
backupGroup.POST("/storage/pools", requirePermission("backup", "write"), backupHandler.CreateStoragePool)
backupGroup.DELETE("/storage/pools/:id", requirePermission("backup", "write"), backupHandler.DeleteStoragePool)
backupGroup.GET("/storage/volumes", backupHandler.ListStorageVolumes)
backupGroup.POST("/storage/volumes", requirePermission("backup", "write"), backupHandler.CreateStorageVolume)
backupGroup.PUT("/storage/volumes/:id", requirePermission("backup", "write"), backupHandler.UpdateStorageVolume)
backupGroup.DELETE("/storage/volumes/:id", requirePermission("backup", "write"), backupHandler.DeleteStorageVolume)
backupGroup.GET("/media", backupHandler.ListMedia)
backupGroup.GET("/storage/daemons", backupHandler.ListStorageDaemons)
backupGroup.POST("/console/execute", requirePermission("backup", "write"), backupHandler.ExecuteBconsoleCommand)
}

1
bacula-config Symbolic link
View File

@@ -0,0 +1 @@
/etc/bacula

View File

@@ -0,0 +1,354 @@
# Bacula VTL Integration - Root Cause Analysis & Troubleshooting
## Issue Summary
Bacula Storage Daemon was unable to read slots from mhVTL (Virtual Tape Library) autochanger devices, reporting "Device has 0 slots" despite mtx-changer script working correctly when called manually.
## Environment
- **OS**: Ubuntu Linux
- **Bacula Version**: 13.0.4
- **VTL**: mhVTL (Virtual Tape Library)
- **Autochangers**:
- Quantum Scalar i500 (4 drives, 43 slots)
- Quantum Scalar i40 (4 drives, 44 slots)
- **Tape Drives**: 8x QUANTUM ULTRIUM-HH8 (LTO-8)
## Root Cause Analysis
### Primary Issues Identified
#### 1. **Incorrect Tape Device Type**
**Problem**: Using rewinding tape devices (`/dev/st*`) instead of non-rewinding devices (`/dev/nst*`)
**Impact**: Tape would rewind after each operation, causing data loss and operational failures
**Solution**: Changed all Archive Device directives from `/dev/st*` to `/dev/nst*`
```diff
Device {
Name = Drive-0
- Archive Device = /dev/st0
+ Archive Device = /dev/nst0
}
```
#### 2. **Missing Drive Index Parameter**
**Problem**: Device configurations lacked Drive Index parameter
**Impact**: Bacula couldn't properly identify which physical drive in the autochanger to use
**Solution**: Added Drive Index (0-3) to each Device resource
```diff
Device {
Name = Drive-0
+ Drive Index = 0
Archive Device = /dev/nst0
}
```
#### 3. **Incorrect AlwaysOpen Setting**
**Problem**: AlwaysOpen was set to `no`
**Impact**: Device wouldn't remain open, causing connection issues with VTL
**Solution**: Changed AlwaysOpen to `yes` for all tape devices
```diff
Device {
Name = Drive-0
- AlwaysOpen = no
+ AlwaysOpen = yes
}
```
#### 4. **Wrong Changer Device Path**
**Problem**: Using `/dev/sch*` (medium changer device) instead of `/dev/sg*` (generic SCSI device)
**Impact**: bacula user couldn't access the changer due to permission issues (cdrom group vs tape group)
**Solution**: Changed Changer Device to use sg devices
```diff
Autochanger {
Name = Scalar-i500
- Changer Device = /dev/sch0
+ Changer Device = /dev/sg7
}
```
**Device Mapping**:
- `/dev/sch0``/dev/sg7` (Scalar i500)
- `/dev/sch1``/dev/sg8` (Scalar i40)
#### 5. **Missing User Permissions**
**Problem**: bacula user not in required groups for device access
**Impact**: "Permission denied" errors when accessing tape and changer devices
**Solution**: Added bacula user to tape and cdrom groups
```bash
usermod -a -G tape,cdrom bacula
systemctl restart bacula-sd
```
#### 6. **Incorrect Storage Resource Configuration**
**Problem**: Storage resource in Director config referenced autochanger name instead of individual drives
**Impact**: Bacula couldn't properly communicate with individual tape drives
**Solution**: Listed all drives explicitly in Storage resource
```diff
Storage {
Name = Scalar-i500
- Device = Scalar-i500
+ Device = Drive-0
+ Device = Drive-1
+ Device = Drive-2
+ Device = Drive-3
Autochanger = Scalar-i500
}
```
#### 7. **mtx-changer List Output Format**
**Problem**: Script output format didn't match Bacula's expected format
**Impact**: "Invalid Slot number" errors, preventing volume labeling
**Original Output**: `1 Full:VolumeTag=E01001L8`
**Expected Output**: `1:E01001L8`
**Solution**: Fixed sed pattern in list command
```bash
# Original (incorrect)
list)
${MTX} -f $ctl status | grep "Storage Element" | grep "Full" | awk '{print $3 $4}' | sed 's/:/ /'
;;
# Fixed
list)
${MTX} -f $ctl status | grep "Storage Element" | grep "Full" | awk '{print $3 $4}' | sed 's/:Full:VolumeTag=/:/'
;;
```
## Troubleshooting Steps
### Step 1: Verify mtx-changer Script Works Manually
```bash
# Test slots command
/usr/lib/bacula/scripts/mtx-changer /dev/sg7 slots
# Expected output: 43
# Test list command
/usr/lib/bacula/scripts/mtx-changer /dev/sg7 list
# Expected output: 1:E01001L8, 2:E01002L8, etc.
```
### Step 2: Test as bacula User
```bash
# Test if bacula user can access devices
su -s /bin/bash bacula -c "/usr/lib/bacula/scripts/mtx-changer /dev/sg7 slots"
# If permission denied, check groups
groups bacula
# Should include: bacula tape cdrom
```
### Step 3: Verify Device Permissions
```bash
# Check changer devices
ls -l /dev/sch* /dev/sg7 /dev/sg8
# sg devices should be in tape group
# Check tape devices
ls -l /dev/nst*
# Should be in tape group with rw permissions
```
### Step 4: Test Bacula Storage Daemon Connection
```bash
# From bconsole
echo "status storage=Scalar-i500" | bconsole
# Should show autochanger and drives
```
### Step 5: Update Slots
```bash
echo -e "update slots storage=Scalar-i500\n0\n" | bconsole
# Should show: Device "Drive-0" has 43 slots
# NOT: Device has 0 slots
```
### Step 6: Label Tapes
```bash
echo -e "label barcodes storage=Scalar-i500 pool=Default\n0\nyes\n" | bconsole
# Should successfully label tapes using barcodes
```
## Configuration Files
### /etc/bacula/bacula-sd.conf (Storage Daemon)
```bash
Autochanger {
Name = Scalar-i500
Device = Drive-0, Drive-1, Drive-2, Drive-3
Changer Command = "/usr/lib/bacula/scripts/mtx-changer %c %o %S %a %d"
Changer Device = /dev/sg7
}
Device {
Name = Drive-0
Drive Index = 0
Changer Device = /dev/sg7
Media Type = LTO-8
Archive Device = /dev/nst0
AutomaticMount = yes
AlwaysOpen = yes
RemovableMedia = yes
RandomAccess = no
AutoChanger = yes
Maximum Concurrent Jobs = 1
}
```
### /etc/bacula/bacula-dir.conf (Director)
```bash
Storage {
Name = Scalar-i500
Address = localhost
SDPort = 9103
Password = "QJQPnZ5Q5p6D73RcvR7ksrOm9UG3mAhvV"
Device = Drive-0
Device = Drive-1
Device = Drive-2
Device = Drive-3
Media Type = LTO-8
Autochanger = Scalar-i500
Maximum Concurrent Jobs = 4
}
```
### /usr/lib/bacula/scripts/mtx-changer
```bash
#!/bin/sh
MTX=/usr/sbin/mtx
ctl=$1
cmd="$2"
slot=$3
device=$4
drive=$5
case "$cmd" in
loaded)
${MTX} -f $ctl status | grep "Data Transfer Element $slot:Full" >/dev/null 2>&1
if [ $? -eq 0 ]; then
${MTX} -f $ctl status | grep "Data Transfer Element $slot:Full" | awk '{print $7}' | sed 's/.*=//'
else
echo "0"
fi
;;
load)
${MTX} -f $ctl load $slot $drive
;;
unload)
${MTX} -f $ctl unload $slot $drive
;;
list)
${MTX} -f $ctl status | grep "Storage Element" | grep "Full" | awk '{print $3 $4}' | sed 's/:Full:VolumeTag=/:/'
;;
slots)
${MTX} -f $ctl status | grep "Storage Changer" | awk '{print $5}'
;;
*)
echo "Invalid command: $cmd"
exit 1
;;
esac
exit 0
```
## Verification Commands
### Check Device Mapping
```bash
lsscsi -g | grep -E "mediumx|tape"
```
### Check VTL Services
```bash
systemctl list-units 'vtl*'
```
### Test Manual Tape Load
```bash
# Load tape to drive
mtx -f /dev/sg7 load 1 0
# Check drive status
mt -f /dev/nst0 status
# Unload tape
mtx -f /dev/sg7 unload 1 0
```
### List Labeled Volumes
```bash
echo "list volumes pool=Default" | bconsole
```
## Common Errors and Solutions
### Error: "Device has 0 slots"
**Cause**: Wrong changer device or permission issues
**Solution**: Use /dev/sg* devices and verify bacula user in tape/cdrom groups
### Error: "Permission denied" accessing /dev/sch0
**Cause**: bacula user not in cdrom group
**Solution**: `usermod -a -G cdrom bacula && systemctl restart bacula-sd`
### Error: "Invalid Slot number"
**Cause**: mtx-changer list output format incorrect
**Solution**: Fix sed pattern to output `slot:volumetag` format
### Error: "No medium found" after successful load
**Cause**: Using rewinding devices (/dev/st*) or AlwaysOpen=no
**Solution**: Use /dev/nst* and set AlwaysOpen=yes
### Error: "READ ELEMENT STATUS Command Failed"
**Cause**: Permission issue or VTL service problem
**Solution**: Check user permissions and restart vtllibrary service
## Results
### Scalar i500 (WORKING)
- ✅ 43 slots detected
- ✅ 20 tapes successfully labeled (E01001L8 - E01020L8)
- ✅ Autochanger operations functional
- ✅ Ready for backup jobs
### Scalar i40 (ISSUE)
- ⚠️ 44 slots detected
- ❌ Hardware Error during tape load operations
- ❌ 0 tapes labeled
- **Status**: Requires mhVTL configuration investigation or system restart
## References
- Bacula Documentation: https://www.bacula.org/
- Article: "Using Bacula with mhVTL" - https://karellen.blogspot.com/2012/02/using-bacula-with-mhvtl.html
- mhVTL Project: https://github.com/markh794/mhvtl
## Date
Created: 2025-12-31
Author: Warp AI Agent

344
docs/healthcheck-script.md Normal file
View File

@@ -0,0 +1,344 @@
# Calypso Appliance Health Check Script
## Overview
Comprehensive health check script for all Calypso Appliance components. Performs automated checks across system resources, services, network, storage, and backup infrastructure.
## Installation
Script location: `/usr/local/bin/calypso-healthcheck`
## Usage
### Basic Usage
```bash
# Run health check (requires root)
calypso-healthcheck
# Run and save to specific location
calypso-healthcheck 2>&1 | tee /root/healthcheck-$(date +%Y%m%d).log
```
### Exit Codes
- `0` - All checks passed (100% healthy)
- `1` - Healthy with warnings (some non-critical issues)
- `2` - Degraded (80%+ checks passed, some failures)
- `3` - Critical (less than 80% checks passed)
### Automated Checks
#### System Resources (4 checks)
- Root filesystem usage (threshold: 80%)
- /var filesystem usage (threshold: 80%)
- Memory usage (threshold: 90%)
- CPU load average
#### Database Services (2 checks)
- PostgreSQL service status
- Database presence (calypso, bacula)
#### Calypso Application (7 checks)
- calypso-api service
- calypso-frontend service
- calypso-logger service
- API port 8443
- Frontend port 3000
- API health endpoint
- Frontend health endpoint
#### Backup Services - Bacula (8 checks)
- bacula-director service
- bacula-fd service
- bacula-sd service
- Director bconsole connectivity
- Storage (Scalar-i500) accessibility
- Director port 9101
- FD port 9102
- SD port 9103
#### Virtual Tape Library - mhVTL (4 checks)
- mhvtl.target status
- vtllibrary@10 (Scalar i500)
- vtllibrary@30 (Scalar i40)
- VTL device count (2 changers, 8 tape drives)
- Scalar i500 slots detection
#### Storage Protocols (9 checks)
- NFS server service
- Samba (smbd) service
- NetBIOS (nmbd) service
- SCST service
- iSCSI target service
- NFS port 2049
- SMB port 445
- NetBIOS port 139
- iSCSI port 3260
#### Monitoring & Management (2 checks)
- SNMP daemon
- SNMP port 161
#### Network Connectivity (2 checks)
- Internet connectivity (ping 8.8.8.8)
- Network manager status
**Total: 39+ automated checks**
## Output Format
### Console Output
- Color-coded status indicators:
- ✓ Green = Passed
- ⚠ Yellow = Warning
- ✗ Red = Failed
### Example Output
```
==========================================
CALYPSO APPLIANCE HEALTH CHECK
==========================================
Date: 2025-12-31 01:46:27
Hostname: calypso
Uptime: up 6 days, 2 hours, 50 minutes
Log file: /var/log/calypso-healthcheck-20251231-014627.log
========================================
SYSTEM RESOURCES
========================================
✓ Root filesystem (18% used)
✓ Var filesystem (18% used)
✓ Memory usage (49% used, 8206MB available)
✓ CPU load average (2.18, 8 cores)
...
========================================
HEALTH CHECK SUMMARY
========================================
Total Checks: 39
Passed: 35
Warnings: 0
Failed: 4
⚠ OVERALL STATUS: DEGRADED (89%)
```
### Log Files
All checks are logged to: `/var/log/calypso-healthcheck-YYYYMMDD-HHMMSS.log`
Logs include:
- Timestamp and system information
- Detailed check results
- Summary statistics
- Overall health status
## Scheduling
### Manual Execution
```bash
# Run on demand
sudo calypso-healthcheck
```
### Cron Job (Recommended)
Add to crontab for automated checks:
```bash
# Daily health check at 2 AM
0 2 * * * /usr/local/bin/calypso-healthcheck > /dev/null 2>&1
# Weekly health check on Monday at 6 AM with email notification
0 6 * * 1 /usr/local/bin/calypso-healthcheck 2>&1 | mail -s "Calypso Health Check" admin@example.com
```
### Systemd Timer (Alternative)
Create `/etc/systemd/system/calypso-healthcheck.timer`:
```ini
[Unit]
Description=Daily Calypso Health Check
Requires=calypso-healthcheck.service
[Timer]
OnCalendar=daily
Persistent=true
[Install]
WantedBy=timers.target
```
Create `/etc/systemd/system/calypso-healthcheck.service`:
```ini
[Unit]
Description=Calypso Appliance Health Check
[Service]
Type=oneshot
ExecStart=/usr/local/bin/calypso-healthcheck
```
Enable:
```bash
systemctl enable --now calypso-healthcheck.timer
```
## Troubleshooting
### Common Failures
#### API/Frontend Health Endpoints Failing
```bash
# Check if services are running
systemctl status calypso-api calypso-frontend
# Check service logs
journalctl -u calypso-api -n 50
journalctl -u calypso-frontend -n 50
# Test manually
curl -k https://localhost:8443/health
curl -k https://localhost:3000/health
```
#### Bacula Director Not Responding
```bash
# Check service
systemctl status bacula-director
# Test bconsole
echo "status director" | bconsole
# Check logs
tail -50 /var/log/bacula/bacula.log
```
#### VTL Slots Not Detected
```bash
# Check VTL services
systemctl status mhvtl.target
# Check devices
lsscsi | grep -E "mediumx|tape"
# Test manually
mtx -f /dev/sg7 status
echo "update slots storage=Scalar-i500" | bconsole
```
#### Storage Protocols Port Not Listening
```bash
# Check service status
systemctl status nfs-server smbd nmbd scst iscsi-scstd
# Check listening ports
ss -tuln | grep -E "2049|445|139|3260"
# Restart services if needed
systemctl restart nfs-server
systemctl restart smbd nmbd
```
## Customization
### Modify Thresholds
Edit `/usr/local/bin/calypso-healthcheck`:
```bash
# Disk usage threshold (default: 80%)
check_disk "/" 80 "Root filesystem"
# Memory usage threshold (default: 90%)
if [ "$mem_percent" -lt 90 ]; then
# Change expected VTL devices
if [ "$changer_count" -ge 2 ] && [ "$tape_count" -ge 8 ]; then
```
### Add Custom Checks
Add new check functions:
```bash
check_custom() {
TOTAL_CHECKS=$((TOTAL_CHECKS + 1))
if [[ condition ]]; then
echo -e "${GREEN}${CHECK}${NC} Custom check passed" | tee -a "$LOG_FILE"
PASSED_CHECKS=$((PASSED_CHECKS + 1))
else
echo -e "${RED}${CROSS}${NC} Custom check failed" | tee -a "$LOG_FILE"
FAILED_CHECKS=$((FAILED_CHECKS + 1))
fi
}
# Call in main script
check_custom
```
## Integration
### Monitoring Systems
Export metrics for monitoring:
```bash
# Nagios/Icinga format
calypso-healthcheck
if [ $? -eq 0 ]; then
echo "OK - All checks passed"
exit 0
elif [ $? -eq 1 ]; then
echo "WARNING - Healthy with warnings"
exit 1
else
echo "CRITICAL - System degraded"
exit 2
fi
```
### API Integration
Parse JSON output:
```bash
# Add JSON output option
calypso-healthcheck --json > /tmp/health.json
```
## Maintenance
### Log Rotation
Logs are stored in `/var/log/calypso-healthcheck-*.log`
Create `/etc/logrotate.d/calypso-healthcheck`:
```
/var/log/calypso-healthcheck-*.log {
weekly
rotate 12
compress
delaycompress
missingok
notifempty
}
```
### Cleanup Old Logs
```bash
# Remove logs older than 30 days
find /var/log -name "calypso-healthcheck-*.log" -mtime +30 -delete
```
## Best Practices
1. **Run after reboot** - Verify all services started correctly
2. **Schedule regular checks** - Daily or weekly automated runs
3. **Monitor exit codes** - Alert on degraded/critical status
4. **Review logs periodically** - Identify patterns or recurring issues
5. **Update checks** - Add new components as system evolves
6. **Baseline health** - Establish normal operating parameters
7. **Document exceptions** - Note known warnings that are acceptable
## See Also
- `pre-reboot-checklist.md` - Pre-reboot verification
- `bacula-vtl-troubleshooting.md` - VTL troubleshooting guide
- System logs: `/var/log/syslog`, `/var/log/bacula/`
---
*Created: 2025-12-31*
*Script: `/usr/local/bin/calypso-healthcheck`*

View File

@@ -0,0 +1,225 @@
# Calypso Appliance - Pre-Reboot Checklist
**Date:** 2025-12-31
**Status:** ✅ READY FOR REBOOT
---
## Enabled Services (Auto-start on boot)
### Core Application Services
| Service | Status | Purpose |
|---------|--------|---------|
| postgresql.service | ✅ enabled | Database backend |
| calypso-api.service | ✅ enabled | REST API backend |
| calypso-frontend.service | ✅ enabled | Web UI (React) |
| calypso-logger.service | ✅ enabled | Application logging |
### Backup Services (Bacula)
| Service | Status | Purpose |
|---------|--------|---------|
| bacula-director.service | ✅ enabled | Backup orchestration |
| bacula-fd.service | ✅ enabled | File daemon (client) |
| bacula-sd.service | ✅ enabled | Storage daemon (VTL) |
### Virtual Tape Library (mhVTL)
| Service | Status | Purpose |
|---------|--------|---------|
| mhvtl.target | ✅ enabled | VTL master target |
| vtllibrary@10.service | ✅ enabled | Scalar i500 library |
| vtllibrary@30.service | ✅ enabled | Scalar i40 library |
| vtltape@11-14.service | ✅ enabled | i500 tape drives (4) |
| vtltape@31-34.service | ✅ enabled | i40 tape drives (4) |
### Storage Protocols
| Service | Status | Purpose |
|---------|--------|---------|
| nfs-server.service | ✅ enabled | NFS file sharing |
| nfs-blkmap.service | ✅ enabled | NFS block mapping |
| smbd.service | ✅ enabled | Samba/CIFS server |
| nmbd.service | ✅ enabled | NetBIOS name service |
| scst.service | ✅ enabled | SCSI target subsystem |
| iscsi-scstd.service | ✅ enabled | iSCSI target daemon |
### Monitoring & Management
| Service | Status | Purpose |
|---------|--------|---------|
| snmpd.service | ✅ enabled | SNMP monitoring |
---
## Boot Order & Dependencies
```
1. Network (systemd-networkd)
2. Storage Foundation
- NFS server
- Samba (smbd/nmbd)
- SCST/iSCSI
3. PostgreSQL Database
4. VTL Services (mhvtl.target)
- vtllibrary services
- vtltape services
5. Bacula Services
- bacula-director (after postgresql)
- bacula-fd
- bacula-sd (after VTL)
6. Calypso Application
- calypso-api (after postgresql)
- calypso-frontend (wants calypso-api)
- calypso-logger (wants api & frontend)
```
---
## Post-Reboot Verification
### 1. Check System Boot
```bash
# Check boot time
systemd-analyze
systemd-analyze blame | head -20
```
### 2. Check Core Services
```bash
# Calypso application
systemctl status calypso-api calypso-frontend calypso-logger
# Database
systemctl status postgresql
# Check API health
curl -k https://localhost:8443/health
curl -k https://localhost:3000/health
```
### 3. Check Backup Services
```bash
# Bacula status
systemctl status bacula-director bacula-fd bacula-sd
# Test bconsole connection
echo "status director" | bconsole
# Check VTL connection
echo "status storage=Scalar-i500" | bconsole
```
### 4. Check Storage Protocols
```bash
# NFS
systemctl status nfs-server
showmount -e localhost
# Samba
systemctl status smbd nmbd
smbstatus
# iSCSI/SCST
systemctl status scst iscsi-scstd
scstadmin -list_target
```
### 5. Check VTL Devices
```bash
# VTL services
systemctl status mhvtl.target
# Check devices
lsscsi | grep -E "mediumx|tape"
# Test autochanger
mtx -f /dev/sg7 status | head -10
```
### 6. Check Monitoring
```bash
# SNMP
systemctl status snmpd
snmpwalk -v2c -c public localhost system
```
---
## Network Access Points
| Service | URL/Port | Description |
|---------|----------|-------------|
| Web UI | https://[IP]:3000 | Calypso frontend |
| API | https://[IP]:8443 | REST API |
| Bacula Director | localhost:9101 | bconsole access |
| PostgreSQL | localhost:5432 | Database |
| NFS | tcp/2049 | NFS shares |
| Samba | tcp/445, tcp/139 | CIFS/SMB shares |
| iSCSI | tcp/3260 | iSCSI targets |
| SNMP | udp/161 | Monitoring |
---
## Important Notes
### Bacula VTL Configuration
- **Scalar i500**: 43 slots, 20 tapes labeled (E01001L8-E01020L8) ✅
- **Scalar i40**: 44 slots, needs investigation after reboot ⚠️
- Changer devices: /dev/sg7 (i500), /dev/sg8 (i40)
- Tape devices: /dev/nst0-7 (non-rewinding)
- User permissions: bacula in tape+cdrom groups
### Storage Paths
- Calypso working directory: `/development/calypso`
- Bacula configs: `/etc/bacula/`
- VTL configs: `/etc/mhvtl/`
- PostgreSQL data: `/var/lib/postgresql/`
### Known Issues
- Scalar i40 VTL: Hardware error during tape load (requires investigation)
---
## Emergency Recovery
If services fail to start after reboot:
```bash
# Check failed services
systemctl --failed
# View service logs
journalctl -xeu calypso-api
journalctl -xeu bacula-director
journalctl -xeu mhvtl.target
# Manual service restart
systemctl restart calypso-api
systemctl restart bacula-sd
systemctl restart mhvtl.target
```
---
## Checklist Summary
- [x] PostgreSQL database: enabled
- [x] Calypso services (api, frontend, logger): enabled
- [x] Bacula services (director, fd, sd): enabled
- [x] mhVTL services (libraries, tape drives): enabled
- [x] NFS server: enabled
- [x] Samba (smbd, nmbd): enabled
- [x] SCST/iSCSI: enabled
- [x] SNMP monitoring: enabled
- [x] Network services: configured
- [x] User permissions: configured
- [x] Service dependencies: verified
**Status: SAFE TO REBOOT**
---
*Generated: 2025-12-31*
*Documentation: /development/calypso/docs/*

View File

@@ -70,6 +70,21 @@ export interface ListClientsParams {
search?: string
}
export interface PoolStats {
name: string
used_bytes: number
total_bytes: number
usage_percent: number
}
export interface DashboardStats {
director_status: string
director_uptime: string
last_job?: BackupJob
active_jobs_count: number
default_pool?: PoolStats
}
export const backupAPI = {
listJobs: async (params?: ListJobsParams): Promise<ListJobsResponse> => {
const queryParams = new URLSearchParams()
@@ -111,5 +126,132 @@ export const backupAPI = {
)
return response.data
},
getDashboardStats: async (): Promise<DashboardStats> => {
const response = await apiClient.get<DashboardStats>('/backup/dashboard/stats')
return response.data
},
listStoragePools: async (): Promise<{ pools: StoragePool[]; total: number }> => {
const response = await apiClient.get<{ pools: StoragePool[]; total: number }>('/backup/storage/pools')
return response.data
},
listStorageVolumes: async (poolName?: string): Promise<{ volumes: StorageVolume[]; total: number }> => {
const queryParams = new URLSearchParams()
if (poolName) queryParams.append('pool_name', poolName)
const response = await apiClient.get<{ volumes: StorageVolume[]; total: number }>(
`/backup/storage/volumes${queryParams.toString() ? `?${queryParams.toString()}` : ''}`
)
return response.data
},
listStorageDaemons: async (): Promise<{ daemons: StorageDaemon[]; total: number }> => {
const response = await apiClient.get<{ daemons: StorageDaemon[]; total: number }>('/backup/storage/daemons')
return response.data
},
createStoragePool: async (data: CreateStoragePoolRequest): Promise<StoragePool> => {
const response = await apiClient.post<StoragePool>('/backup/storage/pools', data)
return response.data
},
deleteStoragePool: async (poolId: number): Promise<void> => {
await apiClient.delete(`/backup/storage/pools/${poolId}`)
},
createStorageVolume: async (data: CreateStorageVolumeRequest): Promise<StorageVolume> => {
const response = await apiClient.post<StorageVolume>('/backup/storage/volumes', data)
return response.data
},
updateStorageVolume: async (volumeId: number, data: UpdateStorageVolumeRequest): Promise<StorageVolume> => {
const response = await apiClient.put<StorageVolume>(`/backup/storage/volumes/${volumeId}`, data)
return response.data
},
deleteStorageVolume: async (volumeId: number): Promise<void> => {
await apiClient.delete(`/backup/storage/volumes/${volumeId}`)
},
listMedia: async (): Promise<{ media: Media[]; total: number }> => {
const response = await apiClient.get<{ media: Media[]; total: number }>('/backup/media')
return response.data
},
}
export interface CreateStoragePoolRequest {
name: string
pool_type?: string
label_format?: string
recycle?: boolean
auto_prune?: boolean
}
export interface CreateStorageVolumeRequest {
volume_name: string
pool_name: string
media_type?: string
max_vol_bytes?: number
vol_retention?: number
}
export interface UpdateStorageVolumeRequest {
max_vol_bytes?: number
vol_retention?: number
}
export interface Media {
media_id: number
volume_name: string
pool_name: string
media_type: string
status: string
vol_bytes: number
max_vol_bytes: number
vol_files: number
last_written?: string
recycle_count: number
slot?: number
in_changer?: number
library_name?: string
}
export interface StoragePool {
pool_id: number
name: string
pool_type: string
label_format?: string
recycle?: boolean
auto_prune?: boolean
volume_count: number
used_bytes: number
total_bytes: number
usage_percent: number
}
export interface StorageVolume {
volume_id: number
media_id: number
volume_name: string
pool_name: string
media_type: string
vol_status: string
vol_bytes: number
max_vol_bytes: number
vol_files: number
vol_retention?: string
last_written?: string
recycle_count: number
}
export interface StorageDaemon {
storage_id: number
name: string
address: string
port: number
device_name: string
media_type: string
status: string
}

File diff suppressed because it is too large Load Diff

View File

@@ -29,6 +29,21 @@ export default function LoginPage() {
}
return (
<>
<style>{`
input:-webkit-autofill,
input:-webkit-autofill:hover,
input:-webkit-autofill:focus,
input:-webkit-autofill:active {
-webkit-box-shadow: 0 0 0 30px #111a22 inset !important;
-webkit-text-fill-color: #ffffff !important;
box-shadow: 0 0 0 30px #111a22 inset !important;
caret-color: #ffffff !important;
}
input:-webkit-autofill::first-line {
color: #ffffff !important;
}
`}</style>
<div className="min-h-screen flex items-center justify-center bg-background-dark">
<div className="max-w-md w-full space-y-8 p-8 bg-card-dark border border-border-dark rounded-lg shadow-md">
<div className="flex flex-col items-center">
@@ -73,10 +88,11 @@ export default function LoginPage() {
name="username"
type="text"
required
className="appearance-none rounded-none relative block w-full px-3 py-2 border border-border-dark bg-[#111a22] placeholder-text-secondary text-white rounded-t-md focus:outline-none focus:ring-primary focus:border-primary focus:z-10 sm:text-sm"
className="appearance-none rounded-none relative block w-full px-3 py-2 border border-border-dark bg-[#111a22] placeholder-text-secondary text-white rounded-t-md focus:outline-none focus:ring-primary focus:border-primary focus:z-10 sm:text-sm autofill:bg-[#111a22] autofill:text-white"
placeholder="Username"
value={username}
onChange={(e) => setUsername(e.target.value)}
autoComplete="username"
/>
</div>
<div>
@@ -88,10 +104,11 @@ export default function LoginPage() {
name="password"
type="password"
required
className="appearance-none rounded-none relative block w-full px-3 py-2 border border-border-dark bg-[#111a22] placeholder-text-secondary text-white rounded-b-md focus:outline-none focus:ring-primary focus:border-primary focus:z-10 sm:text-sm"
className="appearance-none rounded-none relative block w-full px-3 py-2 border border-border-dark bg-[#111a22] placeholder-text-secondary text-white rounded-b-md focus:outline-none focus:ring-primary focus:border-primary focus:z-10 sm:text-sm autofill:bg-[#111a22] autofill:text-white"
placeholder="Password"
value={password}
onChange={(e) => setPassword(e.target.value)}
autoComplete="current-password"
/>
</div>
</div>
@@ -121,6 +138,7 @@ export default function LoginPage() {
</form>
</div>
</div>
</>
)
}