Skip to content

Custom Filtering

This guide covers advanced filtering strategies for backupx, including custom glob patterns, dynamic filtering, and content-based exclusions.

File Filtering Patterns

Basic Glob Patterns

ts
interface FilterConfig {
  include?: string[]
  exclude?: string[]
  caseSensitive?: boolean
  followSymlinks?: boolean
}

// Common filtering patterns
const commonPatterns = {
  // Development files
  development: [
    'node_modules/**',
    '*.log',
    '*.tmp',
    '.git/**',
    'dist/**',
    'build/**',
    'coverage/**',
  ],

  // Source code only
  sourceCode: [
    '**/*.ts',
    '**/*.js',
    '**/*.json',
    '**/*.md',
    '**/*.yml',
    '**/*.yaml',
  ],

  // Media files
  media: [
    '**/*.jpg',
    '**/*.jpeg',
    '**/*.png',
    '**/*.gif',
    '**/*.mp4',
    '**/*.mp3',
  ],

  // Documents
  documents: [
    '**/*.pdf',
    '**/*.doc',
    '**/*.docx',
    '**/*.xls',
    '**/*.xlsx',
    '**/*.ppt',
    '**/*.pptx',
  ],
}

const config: FileConfig = {
  name: 'source-files',
  path: './project',
  include: commonPatterns.sourceCode,
  exclude: commonPatterns.development,
}

Advanced Pattern Matching

ts
class AdvancedFilter {
  private includePatterns: RegExp[] = []
  private excludePatterns: RegExp[] = []

  constructor(config: FilterConfig) {
    this.compilePatterns(config)
  }

  private compilePatterns(config: FilterConfig): void {
    // Compile include patterns
    if (config.include) {
      this.includePatterns = config.include.map(pattern =>
        this.globToRegex(pattern, config.caseSensitive)
      )
    }

    // Compile exclude patterns
    if (config.exclude) {
      this.excludePatterns = config.exclude.map(pattern =>
        this.globToRegex(pattern, config.caseSensitive)
      )
    }
  }

  private globToRegex(pattern: string, caseSensitive = true): RegExp {
    // Convert glob pattern to regex
    let regexPattern = pattern
      .replace(/\./g, '\\.') // Escape dots
      .replace(/\*\*/g, '§DOUBLESTAR§') // Temporary placeholder
      .replace(/\*/g, '[^/]*') // Single * matches anything except /
      .replace(/§DOUBLESTAR§/g, '.*') // ** matches anything including /
      .replace(/\?/g, '[^/]') // ? matches single character except /
      .replace(/\//g, '[\\/\\\\]') // Handle both / and \ path separators

    // Add anchors
    regexPattern = `^${regexPattern}$`

    const flags = caseSensitive ? 'g' : 'gi'
    return new RegExp(regexPattern, flags)
  }

  shouldInclude(filePath: string): boolean {
    const normalizedPath = filePath.replace(/\\/g, '/')

    // If include patterns exist, file must match at least one
    if (this.includePatterns.length > 0) {
      const included = this.includePatterns.some(pattern =>
        pattern.test(normalizedPath)
      )
      if (!included)
        return false
    }

    // File must not match any exclude pattern
    const excluded = this.excludePatterns.some(pattern =>
      pattern.test(normalizedPath)
    )

    return !excluded
  }

  // Test multiple patterns efficiently
  filterPaths(paths: string[]): string[] {
    return paths.filter(path => this.shouldInclude(path))
  }
}

// Usage
const filter = new AdvancedFilter({
  include: ['src/**/*.ts', 'docs/**/*.md'],
  exclude: ['**/*.test.ts', '**/node_modules/**'],
  caseSensitive: false,
})

const filteredFiles = filter.filterPaths([
  'src/index.ts',
  'src/utils.test.ts', // Excluded
  'docs/README.md',
  'node_modules/lib/index.js', // Excluded
])

Dynamic Filtering

Content-Based Filtering

ts
import { readFile, stat } from 'node:fs/promises'

interface ContentFilter {
  maxSize?: number
  minSize?: number
  contentPatterns?: {
    include?: RegExp[]
    exclude?: RegExp[]
  }
  mimeTypes?: {
    include?: string[]
    exclude?: string[]
  }
}

class ContentBasedFilter {
  constructor(private config: ContentFilter) {}

  async shouldIncludeFile(filePath: string): Promise<boolean> {
    try {
      const stats = await stat(filePath)

      // Size filtering
      if (this.config.maxSize && stats.size > this.config.maxSize) {
        return false
      }

      if (this.config.minSize && stats.size < this.config.minSize) {
        return false
      }

      // MIME type filtering (basic implementation)
      if (this.config.mimeTypes) {
        const mimeType = this.guessMimeType(filePath)

        if (this.config.mimeTypes.exclude?.includes(mimeType)) {
          return false
        }

        if (this.config.mimeTypes.include
          && !this.config.mimeTypes.include.includes(mimeType)) {
          return false
        }
      }

      // Content pattern filtering (for text files)
      if (this.config.contentPatterns && this.isTextFile(filePath)) {
        const content = await readFile(filePath, 'utf-8')

        // Must match include patterns if specified
        if (this.config.contentPatterns.include) {
          const matches = this.config.contentPatterns.include.some(pattern =>
            pattern.test(content)
          )
          if (!matches)
            return false
        }

        // Must not match exclude patterns
        if (this.config.contentPatterns.exclude) {
          const matches = this.config.contentPatterns.exclude.some(pattern =>
            pattern.test(content)
          )
          if (matches)
            return false
        }
      }

      return true
    }
    catch (error) {
      // If we can't read the file, exclude it
      return false
    }
  }

  private guessMimeType(filePath: string): string {
    const ext = filePath.split('.').pop()?.toLowerCase() || ''

    const mimeMap: Record<string, string> = {
      js: 'text/javascript',
      ts: 'text/typescript',
      json: 'application/json',
      md: 'text/markdown',
      txt: 'text/plain',
      pdf: 'application/pdf',
      jpg: 'image/jpeg',
      jpeg: 'image/jpeg',
      png: 'image/png',
      gif: 'image/gif',
      mp4: 'video/mp4',
      mp3: 'audio/mpeg',
    }

    return mimeMap[ext] || 'application/octet-stream'
  }

  private isTextFile(filePath: string): boolean {
    const textExtensions = [
      'txt',
      'md',
      'js',
      'ts',
      'json',
      'xml',
      'html',
      'css',
      'sql',
      'py',
      'java',
      'c',
      'cpp',
      'h',
      'yml',
      'yaml',
    ]

    const ext = filePath.split('.').pop()?.toLowerCase() || ''
    return textExtensions.includes(ext)
  }
}

// Example: Filter large images and files with secrets
const contentFilter = new ContentBasedFilter({
  maxSize: 10 * 1024 * 1024, // 10MB max
  mimeTypes: {
    exclude: ['image/jpeg', 'image/png'], // No images
  },
  contentPatterns: {
    exclude: [
      /password\s*=\s*["'].*["']/i,
      /api[_-]?key\s*=\s*["'].*["']/i,
      /secret\s*=\s*["'].*["']/i,
    ],
  },
})

Time-Based Filtering

ts
interface TimeFilter {
  modifiedAfter?: Date
  modifiedBefore?: Date
  createdAfter?: Date
  createdBefore?: Date
  accessedAfter?: Date
  accessedBefore?: Date
}

class TimeBasedFilter {
  constructor(private config: TimeFilter) {}

  async shouldIncludeFile(filePath: string): Promise<boolean> {
    try {
      const stats = await stat(filePath)

      // Modified time checks
      if (this.config.modifiedAfter && stats.mtime < this.config.modifiedAfter) {
        return false
      }

      if (this.config.modifiedBefore && stats.mtime > this.config.modifiedBefore) {
        return false
      }

      // Birth time checks (creation time)
      if (this.config.createdAfter && stats.birthtime < this.config.createdAfter) {
        return false
      }

      if (this.config.createdBefore && stats.birthtime > this.config.createdBefore) {
        return false
      }

      // Access time checks
      if (this.config.accessedAfter && stats.atime < this.config.accessedAfter) {
        return false
      }

      if (this.config.accessedBefore && stats.atime > this.config.accessedBefore) {
        return false
      }

      return true
    }
    catch (error) {
      return false
    }
  }
}

// Example: Only files modified in the last week
const recentFilter = new TimeBasedFilter({
  modifiedAfter: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000),
})

Composite Filtering

Combining Multiple Filters

ts
interface CompositeFilterConfig {
  pathFilter?: FilterConfig
  contentFilter?: ContentFilter
  timeFilter?: TimeFilter
  customFilters?: Array<(filePath: string) => Promise<boolean>>
}

class CompositeFilter {
  private pathFilter?: AdvancedFilter
  private contentFilter?: ContentBasedFilter
  private timeFilter?: TimeBasedFilter
  private customFilters: Array<(filePath: string) => Promise<boolean>> = []

  constructor(config: CompositeFilterConfig) {
    if (config.pathFilter) {
      this.pathFilter = new AdvancedFilter(config.pathFilter)
    }

    if (config.contentFilter) {
      this.contentFilter = new ContentBasedFilter(config.contentFilter)
    }

    if (config.timeFilter) {
      this.timeFilter = new TimeBasedFilter(config.timeFilter)
    }

    if (config.customFilters) {
      this.customFilters = config.customFilters
    }
  }

  async shouldIncludeFile(filePath: string): Promise<boolean> {
    // Path-based filtering (fastest, check first)
    if (this.pathFilter && !this.pathFilter.shouldInclude(filePath)) {
      return false
    }

    // Time-based filtering (fast, requires stat)
    if (this.timeFilter && !(await this.timeFilter.shouldIncludeFile(filePath))) {
      return false
    }

    // Content-based filtering (slower, requires file read)
    if (this.contentFilter && !(await this.contentFilter.shouldIncludeFile(filePath))) {
      return false
    }

    // Custom filters
    for (const customFilter of this.customFilters) {
      if (!(await customFilter(filePath))) {
        return false
      }
    }

    return true
  }

  async filterFiles(filePaths: string[]): Promise<string[]> {
    const results = await Promise.allSettled(
      filePaths.map(async (path) => {
        const include = await this.shouldIncludeFile(path)
        return include ? path : null
      }),
    )

    return results
      .filter((result): result is PromiseFulfilledResult<string> =>
        result.status === 'fulfilled' && result.value !== null
      )
      .map(result => result.value)
  }
}

// Example: Comprehensive filtering for source code backup
const sourceCodeFilter = new CompositeFilter({
  pathFilter: {
    include: [
      'src/**/*.ts',
      'src/**/*.js',
      '*.md',
      'package.json',
      'tsconfig.json',
    ],
    exclude: [
      '**/*.test.ts',
      '**/*.spec.ts',
      '**/node_modules/**',
      'dist/**',
      'build/**',
    ],
  },

  contentFilter: {
    maxSize: 1024 * 1024, // 1MB max per file
    contentPatterns: {
      exclude: [
        /console\.log\(/g, // Exclude files with console.log
        /debugger;/g, // Exclude files with debugger statements
      ],
    },
  },

  timeFilter: {
    modifiedAfter: new Date(Date.now() - 30 * 24 * 60 * 60 * 1000), // Last 30 days
  },

  customFilters: [
    // Custom filter: exclude empty files
    async (filePath: string) => {
      try {
        const stats = await stat(filePath)
        return stats.size > 0
      }
      catch {
        return false
      }
    },

    // Custom filter: only TypeScript files with exports
    async (filePath: string) => {
      if (!filePath.endsWith('.ts'))
        return true

      try {
        const content = await readFile(filePath, 'utf-8')
        return /^export /m.test(content) // Has export statements
      }
      catch {
        return false
      }
    },
  ],
})

Database Filtering

Table and Schema Filtering

ts
interface DatabaseFilter {
  includeTables?: string[]
  excludeTables?: string[]
  includeSchemas?: string[]
  excludeSchemas?: string[]
  tablePatterns?: {
    include?: RegExp[]
    exclude?: RegExp[]
  }
}

class DatabaseTableFilter {
  constructor(private config: DatabaseFilter) {}

  shouldIncludeTable(tableName: string, schemaName?: string): boolean {
    // Schema filtering
    if (schemaName) {
      if (this.config.includeSchemas
        && !this.config.includeSchemas.includes(schemaName)) {
        return false
      }

      if (this.config.excludeSchemas?.includes(schemaName)) {
        return false
      }
    }

    // Explicit table lists
    if (this.config.includeTables
      && !this.config.includeTables.includes(tableName)) {
      return false
    }

    if (this.config.excludeTables?.includes(tableName)) {
      return false
    }

    // Pattern matching
    if (this.config.tablePatterns?.include) {
      const matches = this.config.tablePatterns.include.some(pattern =>
        pattern.test(tableName)
      )
      if (!matches)
        return false
    }

    if (this.config.tablePatterns?.exclude) {
      const matches = this.config.tablePatterns.exclude.some(pattern =>
        pattern.test(tableName)
      )
      if (matches)
        return false
    }

    return true
  }

  filterTables(tables: Array<{ name: string, schema?: string }>): string[] {
    return tables
      .filter(table => this.shouldIncludeTable(table.name, table.schema))
      .map(table => table.name)
  }
}

// Example: Skip temporary and log tables
const dbFilter = new DatabaseTableFilter({
  excludeTables: ['sessions', 'cache', 'logs'],
  tablePatterns: {
    exclude: [
      /^temp_/i, // Tables starting with "temp_"
      /_backup$/i, // Tables ending with "_backup"
      /^_/, // Tables starting with underscore
    ],
  },
})

// Usage in database config
const dbConfig: PostgreSQLConfig = {
  type: BackupType.POSTGRESQL,
  name: 'main-db',
  connection: 'postgres://user:pass@localhost/db',
  // Apply filtering
  tables: dbFilter.filterTables([
    { name: 'users' },
    { name: 'orders' },
    { name: 'temp_processing' }, // Will be excluded
    { name: 'logs' }, // Will be excluded
  ]),
}

This guide provides comprehensive filtering strategies for creating precise, efficient backup operations that only include the data you actually need.

Released under the MIT License.