flaky-test-detective
SKILL.md
Flaky Test Detective
Diagnose and eliminate flaky tests systematically.
Common Flaky Test Patterns
1. Timing Issues
// ❌ Flaky: Race condition
test("should load user data", async () => {
render(<UserProfile userId="123" />);
// Race condition - might pass or fail
expect(screen.getByText("John Doe")).toBeInTheDocument();
});
// ✅ Fixed: Wait for element
test("should load user data", async () => {
render(<UserProfile userId="123" />);
await waitFor(() => {
expect(screen.getByText("John Doe")).toBeInTheDocument();
});
});
// ❌ Flaky: Fixed timeout
test("should complete animation", async () => {
render(<AnimatedComponent />);
await new Promise((resolve) => setTimeout(resolve, 500)); // Brittle!
expect(element).toHaveClass("animated");
});
// ✅ Fixed: Wait for condition
test("should complete animation", async () => {
render(<AnimatedComponent />);
await waitFor(
() => {
expect(element).toHaveClass("animated");
},
{ timeout: 2000 }
);
});
2. Shared State
// ❌ Flaky: Global state pollution
let userId = "123";
test("test A", () => {
userId = "456"; // Modifies global
// ...
});
test("test B", () => {
expect(userId).toBe("123"); // Fails if test A runs first!
});
// ✅ Fixed: Isolated state
test("test A", () => {
const userId = "456"; // Local variable
// ...
});
test("test B", () => {
const userId = "123";
expect(userId).toBe("123");
});
// ❌ Flaky: Database not cleaned
test("should create user", async () => {
await db.user.create({ email: "test@example.com" });
// No cleanup!
});
test("should create another user", async () => {
await db.user.create({ email: "test@example.com" }); // Fails! Duplicate
});
// ✅ Fixed: Proper cleanup
afterEach(async () => {
await db.user.deleteMany();
});
3. Randomness
// ❌ Flaky: Random data
test("should sort users", () => {
const users = generateRandomUsers(10); // Different each time!
const sorted = sortUsers(users);
expect(sorted[0].name).toBe("Alice"); // Might not be Alice
});
// ✅ Fixed: Deterministic data
test("should sort users", () => {
const users = [
{ name: "Charlie", age: 30 },
{ name: "Alice", age: 25 },
{ name: "Bob", age: 35 },
];
const sorted = sortUsers(users);
expect(sorted[0].name).toBe("Alice");
});
// ✅ Fixed: Seeded randomness
import { faker } from "@faker-js/faker";
beforeEach(() => {
faker.seed(12345); // Same data every time
});
4. Network Dependencies
// ❌ Flaky: Real API call
test("should fetch users", async () => {
const users = await fetchUsers(); // External API!
expect(users).toHaveLength(10); // Might fail if API down
});
// ✅ Fixed: Mocked API
test("should fetch users", async () => {
server.use(
http.get("/api/users", () => {
return HttpResponse.json([
{ id: "1", name: "User 1" },
{ id: "2", name: "User 2" },
]);
})
);
const users = await fetchUsers();
expect(users).toHaveLength(2);
});
Flaky Test Detection Script
// scripts/detect-flaky-tests.ts
import { execSync } from "child_process";
async function detectFlakyTests(iterations: number = 10) {
const results = new Map<string, { passed: number; failed: number }>();
for (let i = 0; i < iterations; i++) {
console.log(`\nRun ${i + 1}/${iterations}`);
try {
const output = execSync("npm test -- --reporter=json", {
encoding: "utf-8",
});
const testResults = JSON.parse(output);
testResults.testResults.forEach((file: any) => {
file.assertionResults.forEach((test: any) => {
const key = `${file.name}::${test.fullName}`;
const stats = results.get(key) || { passed: 0, failed: 0 };
if (test.status === "passed") {
stats.passed++;
} else {
stats.failed++;
}
results.set(key, stats);
});
});
} catch (error) {
console.error("Test run failed:", error);
}
}
// Analyze results
console.log("\n🔍 Flaky Test Report\n");
const flakyTests: string[] = [];
results.forEach((stats, testName) => {
if (stats.failed > 0 && stats.passed > 0) {
const failureRate = (stats.failed / iterations) * 100;
console.log(`❌ FLAKY: ${testName}`);
console.log(` Passed: ${stats.passed}/${iterations}`);
console.log(` Failed: ${stats.failed}/${iterations}`);
console.log(` Failure rate: ${failureRate.toFixed(1)}%\n`);
flakyTests.push(testName);
}
});
if (flakyTests.length === 0) {
console.log("✅ No flaky tests detected!");
} else {
console.log(`\n🚨 Found ${flakyTests.length} flaky tests`);
process.exit(1);
}
}
detectFlakyTests(20); // Run tests 20 times
Root Cause Analysis
// Framework for analyzing flaky tests
interface FlakyTestAnalysis {
testName: string;
failureRate: number;
symptoms: string[];
rootCause: "timing" | "state" | "randomness" | "network" | "unknown";
recommendation: string;
}
function analyzeTest(
testName: string,
errorMessages: string[]
): FlakyTestAnalysis {
const analysis: FlakyTestAnalysis = {
testName,
failureRate: 0,
symptoms: [],
rootCause: "unknown",
recommendation: "",
};
// Detect timing issues
if (
errorMessages.some(
(msg) => msg.includes("timeout") || msg.includes("not found")
)
) {
analysis.symptoms.push("Timeout or element not found");
analysis.rootCause = "timing";
analysis.recommendation =
"Add explicit waits using waitFor() or findBy* queries";
}
// Detect shared state
if (
errorMessages.some(
(msg) =>
msg.includes("already exists") || msg.includes("unique constraint")
)
) {
analysis.symptoms.push("Duplicate or existing data");
analysis.rootCause = "state";
analysis.recommendation =
"Add beforeEach/afterEach cleanup or use unique test data";
}
// Detect randomness
if (
errorMessages.some(
(msg) => msg.includes("expected") && msg.includes("received")
)
) {
analysis.symptoms.push("Inconsistent values");
analysis.rootCause = "randomness";
analysis.recommendation =
"Use deterministic test data or seed random generators";
}
// Detect network issues
if (
errorMessages.some(
(msg) => msg.includes("network") || msg.includes("ECONNREFUSED")
)
) {
analysis.symptoms.push("Network or connection errors");
analysis.rootCause = "network";
analysis.recommendation = "Mock all network requests using MSW or similar";
}
return analysis;
}
Stabilization Guidelines
// Test stability checklist
const stabilityChecklist = {
timing: [
"Use waitFor() instead of fixed timeouts",
"Use findBy* queries (built-in waiting)",
"Set appropriate timeout values",
"Wait for loading states to disappear",
],
state: [
"Clear database before each test",
"Reset mocks after each test",
"Use test-specific data (unique IDs)",
"Avoid global variables",
],
randomness: [
"Use fixed seed for random generators",
"Use deterministic test data",
"Avoid Date.now() - mock time instead",
"Generate IDs deterministically",
],
network: [
"Mock all API calls",
"Use MSW for HTTP mocking",
"Avoid real external services",
"Test network errors explicitly",
],
parallelism: [
"Use isolated databases per test worker",
"Avoid port conflicts (random ports)",
"Dont share file system state",
"Use test.concurrent cautiously",
],
};
Auto-Fix Patterns
// Automated fixes for common issues
// Fix 1: Add waitFor to assertions
function addWaitFor(code: string): string {
// Replace: expect(screen.getByText('...')).toBeInTheDocument()
// With: await waitFor(() => expect(screen.getByText('...')).toBeInTheDocument())
return code
.replace(
/expect\(screen\.getBy/g,
"await waitFor(() => expect(screen.getBy"
)
.replace(/\)\.toBeInTheDocument\(\)/g, ").toBeInTheDocument())");
}
// Fix 2: Replace getBy with findBy
function replaceGetByWithFindBy(code: string): string {
return code.replace(/screen\.getBy/g, "await screen.findBy");
}
// Fix 3: Add cleanup
function addCleanup(code: string): string {
if (!code.includes("afterEach")) {
const insertPoint = code.indexOf("test(");
return (
code.slice(0, insertPoint) +
"afterEach(async () => {\n await cleanup();\n});\n\n" +
code.slice(insertPoint)
);
}
return code;
}
Monitoring Flaky Tests in CI
# .github/workflows/test-stability.yml
name: Test Stability
on:
schedule:
- cron: "0 2 * * *" # Run nightly
jobs:
stability-check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: "20"
- run: npm ci
- name: Run tests 20 times
run: |
for i in {1..20}; do
echo "Run $i/20"
npm test || echo "FAILED: Run $i"
done
- name: Analyze results
run: npm run detect-flaky-tests
Best Practices
- Explicit waits: Never use sleep/timeout
- Clean state: Reset between tests
- Deterministic data: No randomness
- Mock external deps: APIs, time, randomness
- Run tests multiple times: Catch intermittent failures
- Isolate tests: No shared state
- Monitor CI: Track flaky test trends
Output Checklist
- Common patterns identified
- Root cause analysis performed
- Timing issues fixed (waitFor)
- Shared state eliminated (cleanup)
- Randomness removed (fixed seeds)
- Network mocked (MSW)
- Detection script implemented
- Stabilization guidelines documented
- CI monitoring configured
Weekly Installs
10
Repository
patricio0312rev/skillsFirst Seen
10 days ago
Installed on
claude-code8
gemini-cli7
antigravity7
windsurf7
github-copilot7
codex7