flaky-test-detective

Installation

SKILL.md

Flaky Test Detective

Diagnose and eliminate flaky tests systematically.

Common Flaky Test Patterns

1. Timing Issues

// ❌ Flaky: Race condition
test("should load user data", async () => {
  render(<UserProfile userId="123" />);

  // Race condition - might pass or fail
  expect(screen.getByText("John Doe")).toBeInTheDocument();
});

// ✅ Fixed: Wait for element
test("should load user data", async () => {
  render(<UserProfile userId="123" />);

  await waitFor(() => {
    expect(screen.getByText("John Doe")).toBeInTheDocument();
  });
});

// ❌ Flaky: Fixed timeout
test("should complete animation", async () => {
  render(<AnimatedComponent />);
  await new Promise((resolve) => setTimeout(resolve, 500)); // Brittle!
  expect(element).toHaveClass("animated");
});

// ✅ Fixed: Wait for condition
test("should complete animation", async () => {
  render(<AnimatedComponent />);
  await waitFor(
    () => {
      expect(element).toHaveClass("animated");
    },
    { timeout: 2000 }
  );
});

2. Shared State

// ❌ Flaky: Global state pollution
let userId = "123";

test("test A", () => {
  userId = "456"; // Modifies global
  // ...
});

test("test B", () => {
  expect(userId).toBe("123"); // Fails if test A runs first!
});

// ✅ Fixed: Isolated state
test("test A", () => {
  const userId = "456"; // Local variable
  // ...
});

test("test B", () => {
  const userId = "123";
  expect(userId).toBe("123");
});

// ❌ Flaky: Database not cleaned
test("should create user", async () => {
  await db.user.create({ email: "test@example.com" });
  // No cleanup!
});

test("should create another user", async () => {
  await db.user.create({ email: "test@example.com" }); // Fails! Duplicate
});

// ✅ Fixed: Proper cleanup
afterEach(async () => {
  await db.user.deleteMany();
});

3. Randomness

// ❌ Flaky: Random data
test("should sort users", () => {
  const users = generateRandomUsers(10); // Different each time!
  const sorted = sortUsers(users);
  expect(sorted[0].name).toBe("Alice"); // Might not be Alice
});

// ✅ Fixed: Deterministic data
test("should sort users", () => {
  const users = [
    { name: "Charlie", age: 30 },
    { name: "Alice", age: 25 },
    { name: "Bob", age: 35 },
  ];
  const sorted = sortUsers(users);
  expect(sorted[0].name).toBe("Alice");
});

// ✅ Fixed: Seeded randomness
import { faker } from "@faker-js/faker";

beforeEach(() => {
  faker.seed(12345); // Same data every time
});

4. Network Dependencies

// ❌ Flaky: Real API call
test("should fetch users", async () => {
  const users = await fetchUsers(); // External API!
  expect(users).toHaveLength(10); // Might fail if API down
});

// ✅ Fixed: Mocked API
test("should fetch users", async () => {
  server.use(
    http.get("/api/users", () => {
      return HttpResponse.json([
        { id: "1", name: "User 1" },
        { id: "2", name: "User 2" },
      ]);
    })
  );

  const users = await fetchUsers();
  expect(users).toHaveLength(2);
});

Flaky Test Detection Script

// scripts/detect-flaky-tests.ts
import { execSync } from "child_process";

async function detectFlakyTests(iterations: number = 10) {
  const results = new Map<string, { passed: number; failed: number }>();

  for (let i = 0; i < iterations; i++) {
    console.log(`\nRun ${i + 1}/${iterations}`);

    try {
      const output = execSync("npm test -- --reporter=json", {
        encoding: "utf-8",
      });

      const testResults = JSON.parse(output);

      testResults.testResults.forEach((file: any) => {
        file.assertionResults.forEach((test: any) => {
          const key = `${file.name}::${test.fullName}`;
          const stats = results.get(key) || { passed: 0, failed: 0 };

          if (test.status === "passed") {
            stats.passed++;
          } else {
            stats.failed++;
          }

          results.set(key, stats);
        });
      });
    } catch (error) {
      console.error("Test run failed:", error);
    }
  }

  // Analyze results
  console.log("\n🔍 Flaky Test Report\n");

  const flakyTests: string[] = [];

  results.forEach((stats, testName) => {
    if (stats.failed > 0 && stats.passed > 0) {
      const failureRate = (stats.failed / iterations) * 100;
      console.log(`❌ FLAKY: ${testName}`);
      console.log(`   Passed: ${stats.passed}/${iterations}`);
      console.log(`   Failed: ${stats.failed}/${iterations}`);
      console.log(`   Failure rate: ${failureRate.toFixed(1)}%\n`);
      flakyTests.push(testName);
    }
  });

  if (flakyTests.length === 0) {
    console.log("✅ No flaky tests detected!");
  } else {
    console.log(`\n🚨 Found ${flakyTests.length} flaky tests`);
    process.exit(1);
  }
}

detectFlakyTests(20); // Run tests 20 times

Root Cause Analysis

// Framework for analyzing flaky tests
interface FlakyTestAnalysis {
  testName: string;
  failureRate: number;
  symptoms: string[];
  rootCause: "timing" | "state" | "randomness" | "network" | "unknown";
  recommendation: string;
}

function analyzeTest(
  testName: string,
  errorMessages: string[]
): FlakyTestAnalysis {
  const analysis: FlakyTestAnalysis = {
    testName,
    failureRate: 0,
    symptoms: [],
    rootCause: "unknown",
    recommendation: "",
  };

  // Detect timing issues
  if (
    errorMessages.some(
      (msg) => msg.includes("timeout") || msg.includes("not found")
    )
  ) {
    analysis.symptoms.push("Timeout or element not found");
    analysis.rootCause = "timing";
    analysis.recommendation =
      "Add explicit waits using waitFor() or findBy* queries";
  }

  // Detect shared state
  if (
    errorMessages.some(
      (msg) =>
        msg.includes("already exists") || msg.includes("unique constraint")
    )
  ) {
    analysis.symptoms.push("Duplicate or existing data");
    analysis.rootCause = "state";
    analysis.recommendation =
      "Add beforeEach/afterEach cleanup or use unique test data";
  }

  // Detect randomness
  if (
    errorMessages.some(
      (msg) => msg.includes("expected") && msg.includes("received")
    )
  ) {
    analysis.symptoms.push("Inconsistent values");
    analysis.rootCause = "randomness";
    analysis.recommendation =
      "Use deterministic test data or seed random generators";
  }

  // Detect network issues
  if (
    errorMessages.some(
      (msg) => msg.includes("network") || msg.includes("ECONNREFUSED")
    )
  ) {
    analysis.symptoms.push("Network or connection errors");
    analysis.rootCause = "network";
    analysis.recommendation = "Mock all network requests using MSW or similar";
  }

  return analysis;
}

Stabilization Guidelines

// Test stability checklist
const stabilityChecklist = {
  timing: [
    "Use waitFor() instead of fixed timeouts",
    "Use findBy* queries (built-in waiting)",
    "Set appropriate timeout values",
    "Wait for loading states to disappear",
  ],
  state: [
    "Clear database before each test",
    "Reset mocks after each test",
    "Use test-specific data (unique IDs)",
    "Avoid global variables",
  ],
  randomness: [
    "Use fixed seed for random generators",
    "Use deterministic test data",
    "Avoid Date.now() - mock time instead",
    "Generate IDs deterministically",
  ],
  network: [
    "Mock all API calls",
    "Use MSW for HTTP mocking",
    "Avoid real external services",
    "Test network errors explicitly",
  ],
  parallelism: [
    "Use isolated databases per test worker",
    "Avoid port conflicts (random ports)",
    "Dont share file system state",
    "Use test.concurrent cautiously",
  ],
};

Auto-Fix Patterns

// Automated fixes for common issues

// Fix 1: Add waitFor to assertions
function addWaitFor(code: string): string {
  // Replace: expect(screen.getByText('...')).toBeInTheDocument()
  // With: await waitFor(() => expect(screen.getByText('...')).toBeInTheDocument())

  return code
    .replace(
      /expect\(screen\.getBy/g,
      "await waitFor(() => expect(screen.getBy"
    )
    .replace(/\)\.toBeInTheDocument\(\)/g, ").toBeInTheDocument())");
}

// Fix 2: Replace getBy with findBy
function replaceGetByWithFindBy(code: string): string {
  return code.replace(/screen\.getBy/g, "await screen.findBy");
}

// Fix 3: Add cleanup
function addCleanup(code: string): string {
  if (!code.includes("afterEach")) {
    const insertPoint = code.indexOf("test(");
    return (
      code.slice(0, insertPoint) +
      "afterEach(async () => {\n  await cleanup();\n});\n\n" +
      code.slice(insertPoint)
    );
  }
  return code;
}

Monitoring Flaky Tests in CI

# .github/workflows/test-stability.yml
name: Test Stability

on:
  schedule:
    - cron: "0 2 * * *" # Run nightly

jobs:
  stability-check:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - uses: actions/setup-node@v4
        with:
          node-version: "20"

      - run: npm ci

      - name: Run tests 20 times
        run: |
          for i in {1..20}; do
            echo "Run $i/20"
            npm test || echo "FAILED: Run $i"
          done

      - name: Analyze results
        run: npm run detect-flaky-tests

Best Practices

Explicit waits: Never use sleep/timeout
Clean state: Reset between tests
Deterministic data: No randomness
Mock external deps: APIs, time, randomness
Run tests multiple times: Catch intermittent failures
Isolate tests: No shared state
Monitor CI: Track flaky test trends

Output Checklist

Common patterns identified
Root cause analysis performed
Timing issues fixed (waitFor)
Shared state eliminated (cleanup)
Randomness removed (fixed seeds)
Network mocked (MSW)
Detection script implemented
Stabilization guidelines documented
CI monitoring configured

Related skills

More from monkey1sai/openai-cli

Installs

Repository

monkey1sai/openai-cli

GitHub Stars

First Seen

Feb 16, 2026

Security Audits

Gen Agent Trust HubPass

SocketPass

SnykPass

flaky-test-detective

Flaky Test Detective

Common Flaky Test Patterns

1. Timing Issues

2. Shared State

3. Randomness

4. Network Dependencies

Flaky Test Detection Script

Root Cause Analysis

Stabilization Guidelines

Auto-Fix Patterns

Monitoring Flaky Tests in CI

Best Practices

Output Checklist

More from monkey1sai/openai-cli

multi-tenant-safety-checker

modal-drawer-system

eslint-prettier-config

api-security-hardener

secure-headers-csp-builder

security-incident-playbook-generator