skills/patricio0312rev/skills/data-integrity-auditor

data-integrity-auditor

SKILL.md

Data Integrity Auditor

Detect and fix data integrity issues automatically.

Integrity Check Types

1. Orphaned Records

-- Find orphaned orders (no matching user)
SELECT o.id, o.user_id
FROM orders o
LEFT JOIN users u ON u.id = o.user_id
WHERE u.id IS NULL;

-- Find orphaned order items (no matching order)
SELECT oi.id, oi.order_id
FROM order_items oi
LEFT JOIN orders o ON o.id = oi.order_id
WHERE o.id IS NULL;

2. Broken Foreign Keys

// scripts/check-foreign-keys.ts
async function checkForeignKeys() {
  const issues: string[] = [];

  // Orders → Users
  const orphanedOrders = await prisma.$queryRaw<any[]>`
    SELECT o.id, o.user_id
    FROM orders o
    LEFT JOIN users u ON u.id = o.user_id
    WHERE u.id IS NULL
    LIMIT 100
  `;

  if (orphanedOrders.length > 0) {
    issues.push(
      `❌ Found ${orphanedOrders.length} orders with invalid user_id`
    );
    console.log(
      "  Sample IDs:",
      orphanedOrders.slice(0, 5).map((o) => o.id)
    );
  }

  // Order Items → Orders
  const orphanedItems = await prisma.$queryRaw<any[]>`
    SELECT oi.id, oi.order_id
    FROM order_items oi
    LEFT JOIN orders o ON o.id = oi.order_id
    WHERE o.id IS NULL
    LIMIT 100
  `;

  if (orphanedItems.length > 0) {
    issues.push(
      `❌ Found ${orphanedItems.length} order items with invalid order_id`
    );
  }

  // Products → Categories
  const orphanedProducts = await prisma.$queryRaw<any[]>`
    SELECT p.id, p.category_id
    FROM products p
    LEFT JOIN categories c ON c.id = p.category_id
    WHERE p.category_id IS NOT NULL
    AND c.id IS NULL
    LIMIT 100
  `;

  if (orphanedProducts.length > 0) {
    issues.push(
      `❌ Found ${orphanedProducts.length} products with invalid category_id`
    );
  }

  return issues;
}

3. Constraint Violations

async function checkConstraints() {
  const issues: string[] = [];

  // Check email uniqueness (should be caught by DB, but verify)
  const duplicateEmails = await prisma.$queryRaw<any[]>`
    SELECT email, COUNT(*) as count
    FROM users
    GROUP BY email
    HAVING COUNT(*) > 1
  `;

  if (duplicateEmails.length > 0) {
    issues.push(`❌ Found ${duplicateEmails.length} duplicate emails`);
  }

  // Check negative quantities
  const negativeStock = await prisma.$queryRaw<any[]>`
    SELECT id, name, stock
    FROM products
    WHERE stock < 0
  `;

  if (negativeStock.length > 0) {
    issues.push(
      `❌ Found ${negativeStock.length} products with negative stock`
    );
  }

  // Check negative prices
  const negativePrices = await prisma.$queryRaw<any[]>`
    SELECT id, name, price
    FROM products
    WHERE price < 0
  `;

  if (negativePrices.length > 0) {
    issues.push(
      `❌ Found ${negativePrices.length} products with negative prices`
    );
  }

  // Check invalid order status
  const invalidStatus = await prisma.$queryRaw<any[]>`
    SELECT id, status
    FROM orders
    WHERE status NOT IN ('pending', 'paid', 'shipped', 'delivered', 'cancelled')
  `;

  if (invalidStatus.length > 0) {
    issues.push(`❌ Found ${invalidStatus.length} orders with invalid status`);
  }

  return issues;
}

4. Missing Required Fields

async function checkMissingFields() {
  const issues: string[] = [];

  // Users missing required fields
  const usersNoEmail = await prisma.user.count({
    where: { email: null },
  });

  if (usersNoEmail > 0) {
    issues.push(`❌ Found ${usersNoEmail} users without email`);
  }

  // Orders with NULL totals
  const ordersNoTotal = await prisma.order.count({
    where: { total: null },
  });

  if (ordersNoTotal > 0) {
    issues.push(`❌ Found ${ordersNoTotal} orders without total`);
  }

  return issues;
}

Comprehensive Audit Script

// scripts/audit-data-integrity.ts
interface IntegrityIssue {
  severity: "critical" | "warning" | "info";
  category: string;
  message: string;
  count: number;
  query?: string;
  fix?: string;
}

async function auditDataIntegrity(): Promise<IntegrityIssue[]> {
  const issues: IntegrityIssue[] = [];

  console.log("🔍 Auditing data integrity...\n");

  // 1. Check orphaned records
  const orphanedOrders = await prisma.$queryRaw<any[]>`
    SELECT COUNT(*) as count FROM orders o
    LEFT JOIN users u ON u.id = o.user_id
    WHERE u.id IS NULL
  `;

  if (orphanedOrders[0].count > 0) {
    issues.push({
      severity: "critical",
      category: "orphaned-records",
      message: "Orders with invalid user references",
      count: orphanedOrders[0].count,
      query:
        "SELECT id, user_id FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL",
      fix: "DELETE FROM orders WHERE id IN (...)",
    });
  }

  // 2. Check duplicate unique constraints
  const duplicateEmails = await prisma.$queryRaw<any[]>`
    SELECT email, COUNT(*) as count
    FROM users
    GROUP BY email
    HAVING COUNT(*) > 1
  `;

  if (duplicateEmails.length > 0) {
    issues.push({
      severity: "critical",
      category: "constraint-violation",
      message: "Duplicate email addresses",
      count: duplicateEmails.length,
      fix: "Keep newest record, delete duplicates",
    });
  }

  // 3. Check data inconsistencies
  const invalidPrices = await prisma.$queryRaw<any[]>`
    SELECT COUNT(*) as count FROM products WHERE price < 0
  `;

  if (invalidPrices[0].count > 0) {
    issues.push({
      severity: "warning",
      category: "data-quality",
      message: "Products with negative prices",
      count: invalidPrices[0].count,
      fix: "UPDATE products SET price = ABS(price) WHERE price < 0",
    });
  }

  // 4. Check referential integrity
  const brokenOrderItems = await prisma.$queryRaw<any[]>`
    SELECT COUNT(*) as count FROM order_items oi
    LEFT JOIN orders o ON o.id = oi.order_id
    WHERE o.id IS NULL
  `;

  if (brokenOrderItems[0].count > 0) {
    issues.push({
      severity: "critical",
      category: "referential-integrity",
      message: "Order items referencing non-existent orders",
      count: brokenOrderItems[0].count,
      fix: "DELETE FROM order_items WHERE order_id NOT IN (SELECT id FROM orders)",
    });
  }

  return issues;
}

async function generateReport() {
  const issues = await auditDataIntegrity();

  console.log("\n📊 Data Integrity Report\n");
  console.log(`Total issues: ${issues.length}\n`);

  const grouped = issues.reduce((acc, issue) => {
    if (!acc[issue.severity]) acc[issue.severity] = [];
    acc[issue.severity].push(issue);
    return acc;
  }, {} as Record<string, IntegrityIssue[]>);

  (["critical", "warning", "info"] as const).forEach((severity) => {
    const items = grouped[severity] || [];
    if (items.length === 0) return;

    console.log(`\n${severity.toUpperCase()} (${items.length})\n`);

    items.forEach((issue, i) => {
      console.log(`${i + 1}. [${issue.category}] ${issue.message}`);
      console.log(`   Count: ${issue.count}`);
      if (issue.query) {
        console.log(`   Query: ${issue.query.substring(0, 80)}...`);
      }
      if (issue.fix) {
        console.log(`   Fix: ${issue.fix}`);
      }
      console.log();
    });
  });

  // Exit with error if critical issues
  process.exit(grouped.critical?.length > 0 ? 1 : 0);
}

generateReport();

Automated Fixes

// scripts/fix-integrity-issues.ts
async function fixOrphanedRecords() {
  console.log("🔧 Fixing orphaned records...\n");

  // Delete orphaned orders
  const deletedOrders = await prisma.$executeRaw`
    DELETE FROM orders
    WHERE id IN (
      SELECT o.id FROM orders o
      LEFT JOIN users u ON u.id = o.user_id
      WHERE u.id IS NULL
    )
  `;
  console.log(`✅ Deleted ${deletedOrders} orphaned orders`);

  // Delete orphaned order items
  const deletedItems = await prisma.$executeRaw`
    DELETE FROM order_items
    WHERE id IN (
      SELECT oi.id FROM order_items oi
      LEFT JOIN orders o ON o.id = oi.order_id
      WHERE o.id IS NULL
    )
  `;
  console.log(`✅ Deleted ${deletedItems} orphaned order items`);
}

async function fixDuplicates() {
  console.log("🔧 Fixing duplicate records...\n");

  // Keep newest user, delete old duplicates
  await prisma.$executeRaw`
    DELETE FROM users
    WHERE id IN (
      SELECT id FROM (
        SELECT id,
               ROW_NUMBER() OVER (PARTITION BY email ORDER BY created_at DESC) as rn
        FROM users
      ) t
      WHERE rn > 1
    )
  `;
  console.log(`✅ Fixed duplicate emails`);
}

async function fixConstraintViolations() {
  console.log("🔧 Fixing constraint violations...\n");

  // Fix negative prices
  const fixedPrices = await prisma.$executeRaw`
    UPDATE products
    SET price = ABS(price)
    WHERE price < 0
  `;
  console.log(`✅ Fixed ${fixedPrices} negative prices`);

  // Fix negative stock
  const fixedStock = await prisma.$executeRaw`
    UPDATE products
    SET stock = 0
    WHERE stock < 0
  `;
  console.log(`✅ Fixed ${fixedStock} negative stock values`);
}

Prevention: Add Missing Constraints

-- Migration to add missing constraints

-- 1. Add foreign key constraints
ALTER TABLE orders
  ADD CONSTRAINT fk_orders_user_id
  FOREIGN KEY (user_id) REFERENCES users(id)
  ON DELETE CASCADE;

ALTER TABLE order_items
  ADD CONSTRAINT fk_order_items_order_id
  FOREIGN KEY (order_id) REFERENCES orders(id)
  ON DELETE CASCADE;

-- 2. Add check constraints
ALTER TABLE products
  ADD CONSTRAINT chk_products_price_positive
  CHECK (price >= 0);

ALTER TABLE products
  ADD CONSTRAINT chk_products_stock_non_negative
  CHECK (stock >= 0);

-- 3. Add unique constraints
CREATE UNIQUE INDEX idx_users_email_unique
  ON users(LOWER(email));

-- 4. Add NOT NULL constraints
ALTER TABLE users
  ALTER COLUMN email SET NOT NULL;

ALTER TABLE orders
  ALTER COLUMN total SET NOT NULL;

Automated Testing

// tests/data-integrity.test.ts
describe("Data Integrity", () => {
  it("should not allow orphaned orders", async () => {
    // Try to create order with non-existent user
    await expect(
      prisma.order.create({
        data: {
          userId: 99999, // Non-existent
          total: 100,
          status: "pending",
        },
      })
    ).rejects.toThrow("Foreign key constraint");
  });

  it("should not allow negative prices", async () => {
    await expect(
      prisma.product.create({
        data: {
          name: "Test",
          price: -10, // Invalid
          stock: 100,
        },
      })
    ).rejects.toThrow("Check constraint");
  });

  it("should not allow duplicate emails", async () => {
    await prisma.user.create({
      data: { email: "test@example.com", name: "Test" },
    });

    await expect(
      prisma.user.create({
        data: { email: "test@example.com", name: "Test 2" },
      })
    ).rejects.toThrow("Unique constraint");
  });
});

Monitoring Dashboard

// Monitor data quality metrics
async function getDataQualityMetrics() {
  return {
    orphanedOrders: await prisma.$queryRaw`
      SELECT COUNT(*) FROM orders o
      LEFT JOIN users u ON u.id = o.user_id
      WHERE u.id IS NULL
    `,
    duplicateEmails: await prisma.$queryRaw`
      SELECT COUNT(*) FROM (
        SELECT email FROM users GROUP BY email HAVING COUNT(*) > 1
      ) t
    `,
    invalidPrices: await prisma.$queryRaw`
      SELECT COUNT(*) FROM products WHERE price < 0
    `,
    missingData: await prisma.$queryRaw`
      SELECT
        SUM(CASE WHEN email IS NULL THEN 1 ELSE 0 END) as users_no_email,
        SUM(CASE WHEN total IS NULL THEN 1 ELSE 0 END) as orders_no_total
      FROM users
      CROSS JOIN orders
    `,
  };
}

Best Practices

  1. Add constraints: Prevent issues at database level
  2. Regular audits: Weekly integrity checks
  3. Automated fixes: Safe, reversible repairs
  4. Monitor metrics: Track data quality over time
  5. Test constraints: Ensure they work
  6. Soft deletes: Easier recovery
  7. Backup before fixes: Always

Output Checklist

  • Orphaned record detection
  • Foreign key integrity checks
  • Constraint violation detection
  • Missing field checks
  • Automated audit script
  • Fix scripts (with dry-run)
  • Prevention migrations (add constraints)
  • Automated tests
  • Monitoring dashboard
  • Regular audit schedule
Weekly Installs
11
First Seen
10 days ago
Installed on
claude-code9
windsurf8
opencode8
gemini-cli7
antigravity7
github-copilot7