db-performance-watchlist
SKILL.md
DB Performance Watchlist
Monitor database performance and prevent regressions.
Key Performance Metrics
// performance-metrics.ts
export interface DBMetrics {
// Query Performance
slowQueries: {
threshold: number; // ms
count: number;
queries: SlowQuery[];
};
// Connection Pool
connections: {
active: number;
idle: number;
total: number;
maxConnections: number;
utilizationPercent: number;
};
// Resource Usage
resources: {
cpuPercent: number;
memoryPercent: number;
diskUsagePercent: number;
iops: number;
};
// Query Statistics
queryStats: {
selectsPerSecond: number;
insertsPerSecond: number;
updatesPerSecond: number;
deletesPerSecond: number;
};
// Cache Performance
cache: {
hitRate: number; // %
size: number; // MB
evictions: number;
};
// Index Usage
indexes: {
unusedIndexes: string[];
missingIndexes: string[];
};
}
interface SlowQuery {
query: string;
duration: number;
calls: number;
avgDuration: number;
table: string;
}
Slow Query Detection
// scripts/detect-slow-queries.ts
async function detectSlowQueries(thresholdMs: number = 100) {
// Enable slow query logging (PostgreSQL)
await prisma.$executeRaw`
ALTER DATABASE mydb
SET log_min_duration_statement = ${thresholdMs};
`;
// Query pg_stat_statements for slow queries
const slowQueries = await prisma.$queryRaw<SlowQuery[]>`
SELECT
query,
calls,
total_exec_time / 1000 as total_time_ms,
mean_exec_time / 1000 as avg_time_ms,
max_exec_time / 1000 as max_time_ms,
(total_exec_time / sum(total_exec_time) OVER()) * 100 as percent_of_total
FROM pg_stat_statements
WHERE mean_exec_time > ${thresholdMs}
ORDER BY mean_exec_time DESC
LIMIT 20
`;
console.log("š Slow Queries Detected:\n");
slowQueries.forEach((q, i) => {
console.log(`${i + 1}. ${q.query.substring(0, 80)}...`);
console.log(` Calls: ${q.calls}`);
console.log(` Avg: ${q.avg_time_ms.toFixed(2)}ms`);
console.log(` Max: ${q.max_time_ms.toFixed(2)}ms`);
console.log(` % of total time: ${q.percent_of_total.toFixed(1)}%\n`);
});
return slowQueries;
}
Connection Pool Monitoring
async function monitorConnectionPool() {
const stats = await prisma.$queryRaw<any[]>`
SELECT
sum(numbackends) as total_connections,
sum(CASE WHEN state = 'active' THEN 1 ELSE 0 END) as active,
sum(CASE WHEN state = 'idle' THEN 1 ELSE 0 END) as idle,
max_connections
FROM pg_stat_database
CROSS JOIN (SELECT setting::int as max_connections FROM pg_settings WHERE name = 'max_connections')
WHERE datname = current_database()
GROUP BY max_connections
`;
const { total_connections, active, idle, max_connections } = stats[0];
const utilization = (total_connections / max_connections) * 100;
console.log("š Connection Pool Status:");
console.log(
` Total: ${total_connections}/${max_connections} (${utilization.toFixed(
1
)}%)`
);
console.log(` Active: ${active}`);
console.log(` Idle: ${idle}`);
// Alert if > 80% utilization
if (utilization > 80) {
console.warn("ā ļø Connection pool >80% utilized!");
await sendAlert({
title: "High connection pool usage",
message: `${utilization.toFixed(1)}% of connections in use`,
});
}
}
Resource Monitoring
async function monitorResources() {
// CPU Usage
const cpuStats = await prisma.$queryRaw<any[]>`
SELECT
(sum(total_exec_time) / (extract(epoch from (now() - stats_reset)) * 1000 * 100)) as cpu_percent
FROM pg_stat_statements, pg_stat_database
WHERE datname = current_database()
`;
// Memory Usage
const memStats = await prisma.$queryRaw<any[]>`
SELECT
pg_size_pretty(pg_database_size(current_database())) as db_size,
pg_size_pretty(sum(pg_relation_size(schemaname||'.'||tablename))) as tables_size
FROM pg_tables
WHERE schemaname = 'public'
`;
// Cache Hit Rate
const cacheStats = await prisma.$queryRaw<any[]>`
SELECT
sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) * 100 as cache_hit_rate
FROM pg_statio_user_tables
`;
console.log("š Resource Usage:");
console.log(` CPU: ${cpuStats[0].cpu_percent.toFixed(1)}%`);
console.log(` Database Size: ${memStats[0].db_size}`);
console.log(` Cache Hit Rate: ${cacheStats[0].cache_hit_rate.toFixed(1)}%`);
// Alert if cache hit rate < 90%
if (cacheStats[0].cache_hit_rate < 90) {
console.warn("ā ļø Cache hit rate below 90%!");
await sendAlert({
title: "Low cache hit rate",
message: `Cache hit rate: ${cacheStats[0].cache_hit_rate.toFixed(1)}%`,
});
}
}
Index Usage Analysis
async function analyzeIndexUsage() {
// Find unused indexes
const unusedIndexes = await prisma.$queryRaw<any[]>`
SELECT
schemaname,
tablename,
indexname,
idx_scan
FROM pg_stat_user_indexes
WHERE idx_scan = 0
AND indexname NOT LIKE '%_pkey'
ORDER BY pg_relation_size(indexrelid) DESC
`;
console.log("šļø Unused Indexes:\n");
unusedIndexes.forEach((idx) => {
console.log(` ${idx.tablename}.${idx.indexname} (0 scans)`);
});
// Find missing indexes (sequential scans on large tables)
const missingIndexes = await prisma.$queryRaw<any[]>`
SELECT
schemaname,
tablename,
seq_scan,
seq_tup_read,
idx_scan,
n_live_tup
FROM pg_stat_user_tables
WHERE seq_scan > 1000
AND n_live_tup > 10000
ORDER BY seq_scan * n_live_tup DESC
LIMIT 10
`;
console.log("\nš Tables with High Sequential Scans:\n");
missingIndexes.forEach((table) => {
console.log(` ${table.tablename}:`);
console.log(` Sequential scans: ${table.seq_scan}`);
console.log(` Rows: ${table.n_live_tup}`);
console.log(` Index scans: ${table.idx_scan}`);
});
}
Alert Thresholds
const ALERT_THRESHOLDS = {
slowQuery: {
avgDuration: 500, // ms
maxDuration: 2000, // ms
callsPerMinute: 100,
},
connections: {
utilizationWarning: 70, // %
utilizationCritical: 85, // %
},
resources: {
cpuWarning: 70, // %
cpuCritical: 85, // %
memoryWarning: 80, // %
memoryCritical: 90, // %
diskWarning: 75, // %
diskCritical: 85, // %
},
cache: {
hitRateWarning: 90, // %
hitRateCritical: 80, // %
},
queryRate: {
maxSelectsPerSecond: 10000,
maxWritesPerSecond: 1000,
},
};
async function checkThresholds() {
const metrics = await gatherMetrics();
// Check slow queries
if (metrics.slowQueries.count > 10) {
await sendAlert({
level: "warning",
title: "Slow queries detected",
message: `${metrics.slowQueries.count} queries exceeding ${ALERT_THRESHOLDS.slowQuery.avgDuration}ms`,
});
}
// Check connection pool
if (
metrics.connections.utilizationPercent >
ALERT_THRESHOLDS.connections.utilizationCritical
) {
await sendAlert({
level: "critical",
title: "Connection pool critical",
message: `${metrics.connections.utilizationPercent.toFixed(
1
)}% utilization`,
});
}
// Check cache hit rate
if (metrics.cache.hitRate < ALERT_THRESHOLDS.cache.hitRateCritical) {
await sendAlert({
level: "critical",
title: "Cache hit rate critical",
message: `${metrics.cache.hitRate.toFixed(1)}% hit rate`,
});
}
}
Monitoring Dashboard
// Generate monitoring report
async function generatePerformanceReport() {
console.log("š Database Performance Report\n");
console.log("=".repeat(50) + "\n");
// Slow queries
const slowQueries = await detectSlowQueries(100);
console.log(`Slow Queries (>100ms): ${slowQueries.length}\n`);
// Connection pool
await monitorConnectionPool();
console.log();
// Resources
await monitorResources();
console.log();
// Index usage
await analyzeIndexUsage();
console.log();
// Query rates
const queryStats = await prisma.$queryRaw<any[]>`
SELECT
sum(xact_commit + xact_rollback) as transactions,
sum(tup_returned) as rows_read,
sum(tup_inserted) as rows_inserted,
sum(tup_updated) as rows_updated,
sum(tup_deleted) as rows_deleted
FROM pg_stat_database
WHERE datname = current_database()
`;
console.log("š Query Statistics:");
console.log(` Transactions: ${queryStats[0].transactions}`);
console.log(` Rows read: ${queryStats[0].rows_read}`);
console.log(` Rows inserted: ${queryStats[0].rows_inserted}`);
console.log(` Rows updated: ${queryStats[0].rows_updated}`);
console.log(` Rows deleted: ${queryStats[0].rows_deleted}`);
}
Automated Monitoring Script
// scripts/monitor-db.ts
import cron from "node-cron";
// Run every 5 minutes
cron.schedule("*/5 * * * *", async () => {
await checkThresholds();
});
// Generate report every hour
cron.schedule("0 * * * *", async () => {
await generatePerformanceReport();
});
// Analyze indexes weekly
cron.schedule("0 0 * * 0", async () => {
await analyzeIndexUsage();
});
Grafana Dashboard Queries
-- Query latency over time
SELECT
bucket,
AVG(mean_exec_time) as avg_latency,
MAX(max_exec_time) as max_latency,
SUM(calls) as total_calls
FROM pg_stat_statements
WHERE query NOT LIKE '%pg_stat_statements%'
GROUP BY time_bucket('5 minutes', queryid)
ORDER BY bucket;
-- Connection count over time
SELECT
now() as time,
count(*) as total,
count(*) FILTER (WHERE state = 'active') as active,
count(*) FILTER (WHERE state = 'idle') as idle
FROM pg_stat_activity;
-- Cache hit rate
SELECT
sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) * 100 as cache_hit_rate
FROM pg_statio_user_tables;
Best Practices
- Monitor continuously: Don't wait for problems
- Set appropriate thresholds: Based on your SLAs
- Alert on trends: Not just absolute values
- Review regularly: Weekly performance reviews
- Automate everything: No manual checks
- Document baselines: Know what's normal
- Test alerts: Ensure they work
Output Checklist
- Slow query detection configured
- Connection pool monitoring
- Resource usage tracking
- Cache hit rate monitoring
- Index usage analysis
- Alert thresholds defined
- Monitoring dashboard setup
- Automated checks scheduled
- Grafana/alerting integration
- Performance baseline documented
Weekly Installs
10
Repository
patricio0312rev/skillsFirst Seen
10 days ago
Installed on
claude-code8
gemini-cli7
antigravity7
windsurf7
github-copilot7
codex7