Skip to content

Performance Tuning

This guide covers performance optimization strategies for backupx, including memory management, parallel processing, and bottleneck identification.

Understanding Performance Characteristics

Backup Operation Costs

ts
interface PerformanceProfile {
  operation: 'read' | 'compress' | 'write' | 'network'
  cpuIntensive: boolean
  memoryIntensive: boolean
  ioIntensive: boolean
  networkBound: boolean
  scalability: 'linear' | 'logarithmic' | 'constant'
}

const operationProfiles: Record<string, PerformanceProfile> = {
  sqliteBackup: {
    operation: 'read',
    cpuIntensive: false,
    memoryIntensive: false,
    ioIntensive: true,
    networkBound: false,
    scalability: 'linear',
  },
  postgresBackup: {
    operation: 'read',
    cpuIntensive: false,
    memoryIntensive: true,
    ioIntensive: false,
    networkBound: true,
    scalability: 'linear',
  },
  fileCompression: {
    operation: 'compress',
    cpuIntensive: true,
    memoryIntensive: true,
    ioIntensive: true,
    networkBound: false,
    scalability: 'linear',
  },
  directoryScanning: {
    operation: 'read',
    cpuIntensive: false,
    memoryIntensive: false,
    ioIntensive: true,
    networkBound: false,
    scalability: 'logarithmic',
  },
}

Memory Optimization

Streaming vs Buffering

ts
import type { BackupConfig } from 'backupx'
import { createReadStream, createWriteStream } from 'node:fs'
import { pipeline } from 'node:stream/promises'
import { createGzip } from 'node:zlib'

class MemoryOptimizedBackupManager extends BackupManager {
  private readonly CHUNK_SIZE = 64 * 1024 // 64KB chunks
  private readonly MAX_MEMORY_USAGE = 100 * 1024 * 1024 // 100MB limit

  async backupLargeFile(filePath: string, outputPath: string): Promise<void> {
    // Use streaming to avoid loading entire file into memory
    const readStream = createReadStream(filePath, {
      highWaterMark: this.CHUNK_SIZE,
    })

    const gzipStream = createGzip({
      level: 6, // Balance between compression and speed
      chunkSize: this.CHUNK_SIZE,
    })

    const writeStream = createWriteStream(outputPath, {
      highWaterMark: this.CHUNK_SIZE,
    })

    // Pipeline automatically handles backpressure
    await pipeline(readStream, gzipStream, writeStream)
  }

  async backupLargeDirectory(config: FileConfig): Promise<void> {
    const files = await this.getFilesToBackup(config)

    // Process files in batches to control memory usage
    const batchSize = this.calculateOptimalBatchSize(files)

    for (let i = 0; i < files.length; i += batchSize) {
      const batch = files.slice(i, i + batchSize)
      await this.processBatch(batch)

      // Force garbage collection between batches
      if (globalThis.gc) {
        globalThis.gc()
      }
    }
  }

  private calculateOptimalBatchSize(files: string[]): number {
    const averageFileSize = this.estimateAverageFileSize(files)
    const maxFilesInMemory = Math.floor(this.MAX_MEMORY_USAGE / averageFileSize)
    return Math.max(1, Math.min(maxFilesInMemory, 50)) // Min 1, max 50 files
  }

  private estimateAverageFileSize(files: string[]): number {
    // Sample first 10 files to estimate average size
    // Implementation would stat() first few files
    return 1024 * 1024 // 1MB default estimate
  }
}

Memory Monitoring

ts
import { EventEmitter } from 'node:events'

class MemoryMonitor extends EventEmitter {
  private readonly WARNING_THRESHOLD = 0.8 // 80% of heap limit
  private readonly CRITICAL_THRESHOLD = 0.9 // 90% of heap limit
  private monitoringInterval?: NodeJS.Timeout

  startMonitoring(intervalMs = 5000): void {
    this.monitoringInterval = setInterval(() => {
      const usage = this.getMemoryUsage()
      this.checkThresholds(usage)
    }, intervalMs)
  }

  stopMonitoring(): void {
    if (this.monitoringInterval) {
      clearInterval(this.monitoringInterval)
    }
  }

  private getMemoryUsage() {
    const usage = process.memoryUsage()
    const heapLimit = this.getHeapLimit()

    return {
      used: usage.heapUsed,
      total: usage.heapTotal,
      limit: heapLimit,
      utilization: usage.heapUsed / heapLimit,
      external: usage.external,
      rss: usage.rss,
    }
  }

  private getHeapLimit(): number {
    // Get V8 heap limit
    const v8 = require('node:v8')
    const stats = v8.getHeapStatistics()
    return stats.heap_size_limit
  }

  private checkThresholds(usage: any): void {
    if (usage.utilization > this.CRITICAL_THRESHOLD) {
      this.emit('memory:critical', usage)
    }
    else if (usage.utilization > this.WARNING_THRESHOLD) {
      this.emit('memory:warning', usage)
    }
  }
}

// Usage with backup manager
const memoryMonitor = new MemoryMonitor()

memoryMonitor.on('memory:warning', (usage) => {
  console.warn(`⚠️ High memory usage: ${(usage.utilization * 100).toFixed(1)}%`)
  // Trigger garbage collection
  if (globalThis.gc) {
    globalThis.gc()
  }
})

memoryMonitor.on('memory:critical', (usage) => {
  console.error(`🚨 Critical memory usage: ${(usage.utilization * 100).toFixed(1)}%`)
  // Consider pausing backup operations
})

memoryMonitor.startMonitoring()

Parallel Processing

Concurrent Database Backups

ts
import type { BackupResult, DatabaseConfig } from 'backupx'

class ParallelBackupManager extends BackupManager {
  private readonly MAX_CONCURRENT_DATABASES = 3
  private readonly MAX_CONCURRENT_FILES = 5

  async createBackup(): Promise<BackupSummary> {
    const startTime = Date.now()

    // Separate database and file operations
    const { databases, files } = this.categorizeBackups()

    // Run database and file backups in parallel
    const [databaseResults, fileResults] = await Promise.all([
      this.runDatabaseBackupsInParallel(databases),
      this.runFileBackupsInParallel(files),
    ])

    const results = [...databaseResults, ...fileResults]
    const duration = Date.now() - startTime

    return this.createSummary(results, duration)
  }

  private async runDatabaseBackupsInParallel(
    configs: DatabaseConfig[],
  ): Promise<BackupResult[]> {
    // Use semaphore to limit concurrent database connections
    const semaphore = new Semaphore(this.MAX_CONCURRENT_DATABASES)

    return Promise.all(
      configs.map(async (config) => {
        await semaphore.acquire()
        try {
          return await this.backupDatabase(config)
        }
        finally {
          semaphore.release()
        }
      }),
    )
  }

  private async runFileBackupsInParallel(
    configs: FileConfig[],
  ): Promise<BackupResult[]> {
    // Group files by size for optimal scheduling
    const groups = this.groupFilesBySize(configs)
    const semaphore = new Semaphore(this.MAX_CONCURRENT_FILES)

    const allPromises = groups.map(group =>
      group.map(async (config) => {
        await semaphore.acquire()
        try {
          return await this.backupFile(config)
        }
        finally {
          semaphore.release()
        }
      }),
    ).flat()

    return Promise.all(allPromises)
  }

  private groupFilesBySize(configs: FileConfig[]): FileConfig[][] {
    // Sort by estimated size (large files first)
    const sorted = configs.sort((a, b) =>
      this.estimateFileSize(b) - this.estimateFileSize(a)
    )

    // Group to balance workload
    const groups: FileConfig[][] = [[], [], [], []]
    sorted.forEach((config, index) => {
      groups[index % groups.length].push(config)
    })

    return groups.filter(group => group.length > 0)
  }
}

class Semaphore {
  private permits: number
  private waiting: Array<() => void> = []

  constructor(permits: number) {
    this.permits = permits
  }

  async acquire(): Promise<void> {
    if (this.permits > 0) {
      this.permits--
      return
    }

    return new Promise((resolve) => {
      this.waiting.push(resolve)
    })
  }

  release(): void {
    if (this.waiting.length > 0) {
      const resolve = this.waiting.shift()!
      resolve()
    }
    else {
      this.permits++
    }
  }
}

Worker Threads for CPU-Intensive Tasks

ts
import type { FileConfig } from 'backupx'
import { isMainThread, parentPort, Worker, workerData } from 'node:worker_threads'

// Worker script (compression-worker.ts)
if (!isMainThread) {
  const { filePath, outputPath, options } = workerData

  async function compressFile() {
    try {
      const fs = await import('node:fs')
      const zlib = await import('node:zlib')
      const { pipeline } = await import('node:stream/promises')

      const readStream = fs.createReadStream(filePath)
      const gzipStream = zlib.createGzip(options)
      const writeStream = fs.createWriteStream(outputPath)

      await pipeline(readStream, gzipStream, writeStream)

      const stats = await fs.promises.stat(outputPath)
      parentPort?.postMessage({ success: true, size: stats.size })
    }
    catch (error) {
      parentPort?.postMessage({
        success: false,
        error: error instanceof Error ? error.message : String(error),
      })
    }
  }

  compressFile()
}

// Main thread usage
class WorkerPoolBackupManager extends BackupManager {
  private readonly workerPool = new WorkerPool(4) // 4 worker threads

  async compressFileWithWorker(
    filePath: string,
    outputPath: string,
  ): Promise<{ success: boolean, size?: number, error?: string }> {
    return this.workerPool.execute({
      filePath,
      outputPath,
      options: { level: 6 },
    })
  }
}

class WorkerPool {
  private workers: Worker[] = []
  private queue: Array<{
    data: any
    resolve: (value: any) => void
    reject: (error: any) => void
  }> = []

  private available: Worker[] = []

  constructor(size: number) {
    for (let i = 0; i < size; i++) {
      this.createWorker()
    }
  }

  private createWorker(): void {
    const worker = new Worker(__filename)

    worker.on('message', (result) => {
      this.available.push(worker)
      this.processQueue()
    })

    worker.on('error', (error) => {
      // Handle worker errors
      console.error('Worker error:', error)
    })

    this.workers.push(worker)
    this.available.push(worker)
  }

  async execute(data: any): Promise<any> {
    return new Promise((resolve, reject) => {
      this.queue.push({ data, resolve, reject })
      this.processQueue()
    })
  }

  private processQueue(): void {
    if (this.queue.length === 0 || this.available.length === 0) {
      return
    }

    const worker = this.available.pop()!
    const task = this.queue.shift()!

    worker.postMessage(task.data)

    const onMessage = (result: any) => {
      worker.off('message', onMessage)
      if (result.success) {
        task.resolve(result)
      }
      else {
        task.reject(new Error(result.error))
      }
      this.available.push(worker)
      this.processQueue()
    }

    worker.on('message', onMessage)
  }

  async terminate(): Promise<void> {
    await Promise.all(this.workers.map(worker => worker.terminate()))
  }
}

I/O Optimization

Disk I/O Patterns

ts
interface IOStrategy {
  pattern: 'sequential' | 'random' | 'mixed'
  bufferSize: number
  syncWrites: boolean
  directIO: boolean
}

class IOOptimizedBackupManager extends BackupManager {
  private getOptimalIOStrategy(operation: string): IOStrategy {
    switch (operation) {
      case 'large-file-backup':
        return {
          pattern: 'sequential',
          bufferSize: 1024 * 1024, // 1MB buffer
          syncWrites: false, // Use OS caching
          directIO: false,
        }

      case 'many-small-files':
        return {
          pattern: 'random',
          bufferSize: 64 * 1024, // 64KB buffer
          syncWrites: true, // Ensure data persistence
          directIO: false,
        }

      case 'database-dump':
        return {
          pattern: 'sequential',
          bufferSize: 256 * 1024, // 256KB buffer
          syncWrites: true, // Critical data
          directIO: false,
        }

      default:
        return {
          pattern: 'mixed',
          bufferSize: 128 * 1024,
          syncWrites: false,
          directIO: false,
        }
    }
  }

  async optimizeFileOperations(config: FileConfig): Promise<void> {
    const strategy = this.getOptimalIOStrategy('large-file-backup')

    // Configure read stream with optimal buffer size
    const readOptions = {
      highWaterMark: strategy.bufferSize,
      // Add more platform-specific optimizations
    }

    // Implement strategy...
  }
}

Network Optimization for Database Backups

ts
class NetworkOptimizedBackupManager extends BackupManager {
  async optimizePostgreSQLConnection(config: PostgreSQLConfig): Promise<void> {
    // Connection pooling for multiple table backups
    const pool = new Pool({
      ...config.connection,
      max: 3, // Limit concurrent connections
      idleTimeoutMillis: 30000,
      connectionTimeoutMillis: 10000,

      // Performance optimizations
      statement_timeout: 0, // No statement timeout for dumps
      lock_timeout: 60000, // 1 minute lock timeout

      // Network optimizations
      tcp_keepalives_idle: 600,
      tcp_keepalives_interval: 30,
      tcp_keepalives_count: 3,
    })

    // Use COPY for faster data transfer
    const copyQuery = `
      COPY (SELECT * FROM ${tableName})
      TO STDOUT
      WITH (FORMAT csv, HEADER true, DELIMITER ',')
    `
  }

  async batchTableBackup(tables: string[]): Promise<void> {
    // Process tables in parallel but limit concurrent connections
    const semaphore = new Semaphore(2)

    await Promise.all(
      tables.map(async (table) => {
        await semaphore.acquire()
        try {
          await this.backupTable(table)
        }
        finally {
          semaphore.release()
        }
      }),
    )
  }
}

Performance Monitoring

Real-time Performance Metrics

ts
interface PerformanceMetrics {
  timestamp: number
  operation: string
  duration: number
  bytesProcessed: number
  throughput: number // bytes per second
  memoryUsage: number
  cpuUsage: number
  ioWait: number
}

class PerformanceTracker {
  private metrics: PerformanceMetrics[] = []
  private startTime = 0
  private startCPU: any = null

  startOperation(operation: string): void {
    this.startTime = performance.now()
    this.startCPU = process.cpuUsage()
  }

  endOperation(operation: string, bytesProcessed: number): PerformanceMetrics {
    const endTime = performance.now()
    const endCPU = process.cpuUsage(this.startCPU)
    const duration = endTime - this.startTime

    const metrics: PerformanceMetrics = {
      timestamp: Date.now(),
      operation,
      duration,
      bytesProcessed,
      throughput: bytesProcessed / (duration / 1000),
      memoryUsage: process.memoryUsage().heapUsed,
      cpuUsage: (endCPU.user + endCPU.system) / 1000000, // Convert to seconds
      ioWait: 0, // Would need OS-specific implementation
    }

    this.metrics.push(metrics)
    return metrics
  }

  getAverageMetrics(operation?: string): Partial<PerformanceMetrics> {
    const filtered = operation
      ? this.metrics.filter(m => m.operation === operation)
      : this.metrics

    if (filtered.length === 0)
      return {}

    return {
      duration: this.average(filtered, 'duration'),
      throughput: this.average(filtered, 'throughput'),
      memoryUsage: this.average(filtered, 'memoryUsage'),
      cpuUsage: this.average(filtered, 'cpuUsage'),
    }
  }

  private average(metrics: PerformanceMetrics[], key: keyof PerformanceMetrics): number {
    return metrics.reduce((sum, m) => sum + (m[key] as number), 0) / metrics.length
  }

  exportMetrics(): string {
    return JSON.stringify(this.metrics, null, 2)
  }
}

// Usage with backup manager
class MonitoredBackupManager extends BackupManager {
  private tracker = new PerformanceTracker()

  async createBackup(): Promise<BackupSummary> {
    this.tracker.startOperation('full-backup')

    try {
      const summary = await super.createBackup()

      const totalBytes = summary.results.reduce((sum, r) => sum + r.size, 0)
      const metrics = this.tracker.endOperation('full-backup', totalBytes)

      if (this.config.verbose) {
        console.warn(`📊 Performance metrics:`)
        console.warn(`   Duration: ${metrics.duration.toFixed(2)}ms`)
        console.warn(`   Throughput: ${this.formatThroughput(metrics.throughput)}`)
        console.warn(`   Memory: ${this.formatBytes(metrics.memoryUsage)}`)
      }

      return summary
    }
    catch (error) {
      this.tracker.endOperation('full-backup', 0)
      throw error
    }
  }

  private formatThroughput(bytesPerSecond: number): string {
    const mbps = bytesPerSecond / (1024 * 1024)
    return `${mbps.toFixed(2)} MB/s`
  }

  private formatBytes(bytes: number): string {
    const sizes = ['Bytes', 'KB', 'MB', 'GB']
    if (bytes === 0)
      return '0 Bytes'
    const i = Math.floor(Math.log(bytes) / Math.log(1024))
    return `${(bytes / 1024 ** i).toFixed(2)} ${sizes[i]}`
  }
}

Bottleneck Identification

Performance Profiling

ts
class PerformanceProfiler {
  private profiles = new Map<string, number[]>()

  async profile<T>(name: string, fn: () => Promise<T>): Promise<T> {
    const start = performance.now()

    try {
      const result = await fn()
      const duration = performance.now() - start

      if (!this.profiles.has(name)) {
        this.profiles.set(name, [])
      }
      this.profiles.get(name)!.push(duration)

      return result
    }
    catch (error) {
      const duration = performance.now() - start
      this.profiles.get(name)?.push(duration)
      throw error
    }
  }

  getReport(): Record<string, any> {
    const report: Record<string, any> = {}

    for (const [name, durations] of this.profiles) {
      const sorted = durations.sort((a, b) => a - b)
      const sum = durations.reduce((a, b) => a + b, 0)

      report[name] = {
        count: durations.length,
        total: sum,
        average: sum / durations.length,
        min: sorted[0],
        max: sorted[sorted.length - 1],
        median: sorted[Math.floor(sorted.length / 2)],
        p95: sorted[Math.floor(sorted.length * 0.95)],
        p99: sorted[Math.floor(sorted.length * 0.99)],
      }
    }

    return report
  }
}

// Usage in backup operations
const profiler = new PerformanceProfiler()

// Profile different operations
await profiler.profile('sqlite-backup', () => backupSQLite(config))
await profiler.profile('file-compression', () => compressFile(path))
await profiler.profile('directory-scan', () => scanDirectory(dir))

// Generate performance report
console.log(JSON.stringify(profiler.getReport(), null, 2))

This performance tuning guide provides comprehensive strategies for optimizing backupx operations, from memory management to parallel processing and bottleneck identification.

Released under the MIT License.