puppeteer-automation
SKILL.md
Puppeteer Browser Automation
You are an expert in Puppeteer, Node.js browser automation, web scraping, and building reliable automation scripts for Chrome and Chromium browsers.
Core Expertise
- Puppeteer API and browser automation patterns
- Page navigation and interaction
- Element selection and manipulation
- Screenshot and PDF generation
- Network request interception
- Headless and headful browser modes
- Performance optimization and memory management
- Integration with testing frameworks (Jest, Mocha)
Key Principles
- Write clean, async/await based code for readability
- Use proper error handling with try/catch blocks
- Implement robust waiting strategies for dynamic content
- Close browser instances properly to prevent memory leaks
- Follow modular design patterns for reusable automation code
- Handle browser context and page lifecycle appropriately
Project Setup
npm init -y
npm install puppeteer
Basic Structure
const puppeteer = require('puppeteer');
async function main() {
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
try {
const page = await browser.newPage();
await page.goto('https://example.com');
// Your automation code here
} finally {
await browser.close();
}
}
main().catch(console.error);
Browser Launch Options
const browser = await puppeteer.launch({
headless: 'new', // 'new' for new headless mode, false for visible browser
slowMo: 50, // Slow down operations for debugging
devtools: true, // Open DevTools automatically
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--disable-gpu',
'--window-size=1920,1080'
],
defaultViewport: {
width: 1920,
height: 1080
}
});
Page Navigation
// Navigate to URL
await page.goto('https://example.com', {
waitUntil: 'networkidle2', // Wait until network is idle
timeout: 30000
});
// Wait options:
// - 'load': Wait for load event
// - 'domcontentloaded': Wait for DOMContentLoaded event
// - 'networkidle0': No network connections for 500ms
// - 'networkidle2': No more than 2 network connections for 500ms
// Navigate back/forward
await page.goBack();
await page.goForward();
// Reload page
await page.reload({ waitUntil: 'networkidle2' });
Element Selection
Query Selectors
// Single element
const element = await page.$('selector');
// Multiple elements
const elements = await page.$$('selector');
// Wait for element
const element = await page.waitForSelector('selector', {
visible: true,
timeout: 5000
});
// XPath selection
const elements = await page.$x('//xpath/expression');
Evaluation in Page Context
// Get text content
const text = await page.$eval('selector', el => el.textContent);
// Get attribute
const href = await page.$eval('a', el => el.getAttribute('href'));
// Multiple elements
const texts = await page.$$eval('.items', elements =>
elements.map(el => el.textContent)
);
// Execute arbitrary JavaScript
const result = await page.evaluate(() => {
return document.title;
});
Page Interactions
Clicking
await page.click('button#submit');
// Click with options
await page.click('button', {
button: 'left', // 'left', 'right', 'middle'
clickCount: 1,
delay: 100 // Time between mousedown and mouseup
});
// Click and wait for navigation
await Promise.all([
page.waitForNavigation(),
page.click('a.nav-link')
]);
Typing
// Type text
await page.type('input#username', 'myuser', { delay: 50 });
// Clear and type
await page.click('input#username', { clickCount: 3 });
await page.type('input#username', 'newvalue');
// Press keys
await page.keyboard.press('Enter');
await page.keyboard.down('Shift');
await page.keyboard.press('Tab');
await page.keyboard.up('Shift');
Form Handling
// Select dropdown
await page.select('select#country', 'us');
// Check checkbox
await page.click('input[type="checkbox"]');
// File upload
const inputFile = await page.$('input[type="file"]');
await inputFile.uploadFile('/path/to/file.pdf');
Waiting Strategies
// Wait for selector
await page.waitForSelector('.loaded');
// Wait for selector to disappear
await page.waitForSelector('.loading', { hidden: true });
// Wait for function
await page.waitForFunction(
() => document.querySelector('.count').textContent === '10'
);
// Wait for navigation
await page.waitForNavigation({ waitUntil: 'networkidle2' });
// Wait for network request
await page.waitForRequest(request =>
request.url().includes('/api/data')
);
// Wait for network response
await page.waitForResponse(response =>
response.url().includes('/api/data') && response.status() === 200
);
// Fixed timeout (use sparingly)
await page.waitForTimeout(1000);
Screenshots and PDFs
Screenshots
// Full page screenshot
await page.screenshot({
path: 'screenshot.png',
fullPage: true
});
// Element screenshot
const element = await page.$('.chart');
await element.screenshot({ path: 'chart.png' });
// Screenshot options
await page.screenshot({
path: 'screenshot.png',
type: 'png', // 'png' or 'jpeg'
quality: 80, // jpeg only, 0-100
clip: {
x: 0,
y: 0,
width: 800,
height: 600
}
});
PDF Generation
await page.pdf({
path: 'document.pdf',
format: 'A4',
printBackground: true,
margin: {
top: '20px',
right: '20px',
bottom: '20px',
left: '20px'
}
});
Network Interception
// Enable request interception
await page.setRequestInterception(true);
page.on('request', request => {
// Block images and stylesheets
if (['image', 'stylesheet'].includes(request.resourceType())) {
request.abort();
} else {
request.continue();
}
});
// Modify requests
page.on('request', request => {
request.continue({
headers: {
...request.headers(),
'X-Custom-Header': 'value'
}
});
});
// Monitor responses
page.on('response', async response => {
if (response.url().includes('/api/')) {
const data = await response.json();
console.log('API Response:', data);
}
});
Authentication and Cookies
// Basic HTTP authentication
await page.authenticate({
username: 'user',
password: 'pass'
});
// Set cookies
await page.setCookie({
name: 'session',
value: 'abc123',
domain: 'example.com'
});
// Get cookies
const cookies = await page.cookies();
// Clear cookies
await page.deleteCookie({ name: 'session' });
Browser Context and Multiple Pages
// Create incognito context
const context = await browser.createIncognitoBrowserContext();
const page = await context.newPage();
// Multiple pages
const page1 = await browser.newPage();
const page2 = await browser.newPage();
// Get all pages
const pages = await browser.pages();
// Handle popups
page.on('popup', async popup => {
await popup.waitForLoadState();
console.log('Popup URL:', popup.url());
});
Error Handling
async function scrapeWithRetry(url, maxRetries = 3) {
for (let i = 0; i < maxRetries; i++) {
try {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// Set timeout
page.setDefaultTimeout(30000);
await page.goto(url, { waitUntil: 'networkidle2' });
const data = await page.$eval('.content', el => el.textContent);
await browser.close();
return data;
} catch (error) {
console.error(`Attempt ${i + 1} failed:`, error.message);
if (i === maxRetries - 1) throw error;
await new Promise(r => setTimeout(r, 2000 * (i + 1)));
}
}
}
Performance Optimization
// Disable unnecessary features
await page.setRequestInterception(true);
page.on('request', request => {
const blockedTypes = ['image', 'stylesheet', 'font'];
if (blockedTypes.includes(request.resourceType())) {
request.abort();
} else {
request.continue();
}
});
// Reuse browser instance
const browser = await puppeteer.launch();
async function scrape(url) {
const page = await browser.newPage();
try {
await page.goto(url);
// ... scraping logic
} finally {
await page.close(); // Close page, not browser
}
}
// Use connection pool for parallel scraping
const cluster = require('puppeteer-cluster');
Key Dependencies
- puppeteer
- puppeteer-core (for custom Chrome installations)
- puppeteer-cluster (for parallel scraping)
- puppeteer-extra (for plugins)
- puppeteer-extra-plugin-stealth (anti-detection)
Best Practices
- Always close browser instances in finally blocks
- Use
waitForSelectorbefore interacting with elements - Prefer
networkidle2overnetworkidle0for faster loads - Use stealth plugin for anti-bot bypass
- Implement proper error handling and retries
- Monitor memory usage in long-running scripts
- Use browser context for isolated sessions
- Set reasonable timeouts for all operations
Weekly Installs
3
Repository
mindrally/skillsInstalled on
codex2
claude-code2
antigravity2
gemini-cli2
windsurf1
trae1