puppeteer
Puppeteer Skill
Complete guide for Puppeteer - headless Chrome automation.
Quick Reference
| Command | Purpose |
|---|---|
npm install puppeteer |
Install with bundled Chrome |
npm install puppeteer-core |
Install without Chrome (BYOB) |
browser.newPage() |
Create new page |
page.goto(url) |
Navigate to URL |
page.screenshot() |
Take screenshot |
page.pdf() |
Generate PDF |
1. Installation
# npm (includes Chromium)
npm install puppeteer
# Puppeteer core (BYOB - Bring Your Own Browser)
npm install puppeteer-core
2. Basic Usage
Launch Browser
const puppeteer = require("puppeteer");
(async () => {
// Launch browser
const browser = await puppeteer.launch({
headless: true, // or 'new' for new headless mode
args: ["--no-sandbox", "--disable-setuid-sandbox"],
});
// Create page
const page = await browser.newPage();
// Navigate
await page.goto("https://example.com");
// Close
await browser.close();
})();
Launch Options
const browser = await puppeteer.launch({
headless: false, // Show browser
slowMo: 50, // Slow down operations
devtools: true, // Open DevTools
defaultViewport: {
width: 1920,
height: 1080,
},
args: ["--start-maximized", "--disable-notifications", "--disable-gpu"],
executablePath: "/path/to/chrome", // Custom Chrome
userDataDir: "./user-data", // Persist session
});
3. Navigation
Page Navigation
// Go to URL
await page.goto("https://example.com", {
waitUntil: "networkidle2", // Wait for network idle
timeout: 30000,
});
// Wait options
// 'load' - window load event
// 'domcontentloaded' - DOMContentLoaded event
// 'networkidle0' - no network connections for 500ms
// 'networkidle2' - max 2 network connections for 500ms
// Navigate
await page.goBack();
await page.goForward();
await page.reload();
// Get URL
const url = page.url();
Wait for Elements
// Wait for selector
await page.waitForSelector(".my-class");
await page.waitForSelector("#my-id", { visible: true });
// Wait for function
await page.waitForFunction('document.querySelector(".loaded")');
// Wait for navigation
await Promise.all([page.waitForNavigation(), page.click("a.link")]);
// Custom wait
await page.waitForTimeout(1000); // 1 second
4. Interactions
Click and Type
// Click element
await page.click("#submit-button");
await page.click("button.primary");
// Type text
await page.type("#username", "myuser");
await page.type("#password", "mypass", { delay: 100 }); // Human-like
// Clear and type
await page.click("#input", { clickCount: 3 });
await page.type("#input", "new value");
// Press keys
await page.keyboard.press("Enter");
await page.keyboard.press("Tab");
await page.keyboard.down("Shift");
await page.keyboard.press("Tab");
await page.keyboard.up("Shift");
Forms
// Select dropdown
await page.select("#dropdown", "option-value");
await page.select("#multi-select", "value1", "value2");
// Checkbox
await page.click("#checkbox");
// File upload
const input = await page.$("input[type=file]");
await input.uploadFile("./file.pdf");
// Submit form
await page.click("button[type=submit]");
// or
await page.$eval("form", (form) => form.submit());
Mouse Actions
// Move mouse
await page.mouse.move(100, 200);
// Click at position
await page.mouse.click(100, 200);
// Drag and drop
await page.mouse.move(100, 100);
await page.mouse.down();
await page.mouse.move(200, 200);
await page.mouse.up();
// Scroll
await page.mouse.wheel({ deltaY: 500 });
5. Data Extraction
Get Content
// Get text content
const text = await page.$eval(".title", (el) => el.textContent);
// Get attribute
const href = await page.$eval("a", (el) => el.getAttribute("href"));
// Get multiple elements
const items = await page.$$eval(".item", (elements) =>
elements.map((el) => ({
title: el.querySelector(".title").textContent,
price: el.querySelector(".price").textContent,
}))
);
// Get inner HTML
const html = await page.$eval(".content", (el) => el.innerHTML);
// Get all text
const pageText = await page.evaluate(() => document.body.innerText);
Element Handles
// Get single element
const element = await page.$(".my-class");
if (element) {
const text = await element.evaluate((el) => el.textContent);
await element.click();
}
// Get multiple elements
const elements = await page.$$(".items");
for (const el of elements) {
console.log(await el.evaluate((node) => node.textContent));
}
Evaluate JavaScript
// Run in page context
const result = await page.evaluate(() => {
return {
title: document.title,
url: window.location.href,
data: window.myData,
};
});
// Pass arguments
const text = await page.evaluate((selector) => {
return document.querySelector(selector).textContent;
}, ".my-selector");
6. Screenshots & PDF
Screenshots
// Full page
await page.screenshot({ path: "fullpage.png", fullPage: true });
// Viewport only
await page.screenshot({ path: "viewport.png" });
// Element screenshot
const element = await page.$(".card");
await element.screenshot({ path: "element.png" });
// With options
await page.screenshot({
path: "screenshot.png",
type: "png", // or 'jpeg', 'webp'
quality: 80, // for jpeg/webp
fullPage: true,
clip: { x: 0, y: 0, width: 800, height: 600 },
omitBackground: true, // Transparent
});
// As buffer
const buffer = await page.screenshot({ encoding: "binary" });
PDF Generation
await page.pdf({
path: "page.pdf",
format: "A4",
printBackground: true,
margin: {
top: "20mm",
right: "20mm",
bottom: "20mm",
left: "20mm",
},
});
// Custom size
await page.pdf({
path: "custom.pdf",
width: "8.5in",
height: "11in",
landscape: true,
});
7. Network Interception
Request Interception
await page.setRequestInterception(true);
page.on("request", (request) => {
// Block images
if (request.resourceType() === "image") {
request.abort();
}
// Modify headers
else if (request.url().includes("api")) {
request.continue({
headers: {
...request.headers(),
Authorization: "Bearer token",
},
});
} else {
request.continue();
}
});
Response Handling
page.on("response", async (response) => {
if (response.url().includes("/api/data")) {
const data = await response.json();
console.log("API Response:", data);
}
});
// Wait for specific response
const response = await page.waitForResponse((response) =>
response.url().includes("/api/data")
);
const data = await response.json();
Block Resources
await page.setRequestInterception(true);
const blockedTypes = ["image", "stylesheet", "font", "media"];
page.on("request", (request) => {
if (blockedTypes.includes(request.resourceType())) {
request.abort();
} else {
request.continue();
}
});
8. Authentication
Basic Auth
await page.authenticate({
username: "user",
password: "pass",
});
await page.goto("https://httpbin.org/basic-auth/user/pass");
Cookies
// Set cookies
await page.setCookie({
name: "session",
value: "abc123",
domain: "example.com",
});
// Get cookies
const cookies = await page.cookies();
console.log(cookies);
// Clear cookies
await page.deleteCookie({ name: "session" });
Local Storage
// Set local storage
await page.evaluate(() => {
localStorage.setItem("token", "mytoken");
});
// Get local storage
const token = await page.evaluate(() => {
return localStorage.getItem("token");
});
9. Multiple Pages/Tabs
Handle Popups
// Listen for new page
const [popup] = await Promise.all([
new Promise((resolve) =>
browser.once("targetcreated", async (target) => {
resolve(await target.page());
})
),
page.click('a[target="_blank"]'),
]);
await popup.waitForSelector(".content");
const text = await popup.$eval(".content", (el) => el.textContent);
await popup.close();
Multiple Tabs
const page1 = await browser.newPage();
const page2 = await browser.newPage();
await page1.goto("https://example.com");
await page2.goto("https://example.org");
// Get all pages
const pages = await browser.pages();
10. Advanced Patterns
Scraping with Pagination
async function scrapeAllPages(url) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const allData = [];
await page.goto(url);
while (true) {
// Scrape current page
const items = await page.$$eval(".item", (elements) =>
elements.map((el) => el.textContent)
);
allData.push(...items);
// Check for next page
const nextButton = await page.$(".next-page:not(.disabled)");
if (!nextButton) break;
await Promise.all([page.waitForNavigation(), nextButton.click()]);
}
await browser.close();
return allData;
}
Parallel Scraping
async function scrapeUrls(urls) {
const browser = await puppeteer.launch();
const results = await Promise.all(
urls.map(async (url) => {
const page = await browser.newPage();
await page.goto(url);
const title = await page.title();
await page.close();
return { url, title };
})
);
await browser.close();
return results;
}
Retry Logic
async function withRetry(fn, retries = 3) {
for (let i = 0; i < retries; i++) {
try {
return await fn();
} catch (error) {
if (i === retries - 1) throw error;
await new Promise((r) => setTimeout(r, 1000 * (i + 1)));
}
}
}
await withRetry(async () => {
await page.goto("https://example.com");
await page.waitForSelector(".content");
});
11. Troubleshooting
Common Issues
Timeout errors:
// Increase timeout
await page.goto(url, { timeout: 60000 });
await page.setDefaultNavigationTimeout(60000);
await page.setDefaultTimeout(30000);
Element not found:
// Wait before interacting
await page.waitForSelector("#element", { visible: true });
await page.click("#element");
// Check if exists
const element = await page.$("#element");
if (element) {
await element.click();
}
Memory issues:
// Close pages when done
await page.close();
// Limit concurrent pages
const semaphore = new Semaphore(5);
Debugging:
const browser = await puppeteer.launch({
headless: false,
slowMo: 100,
devtools: true,
});
// Console logs
page.on("console", (msg) => console.log("PAGE LOG:", msg.text()));
// Page errors
page.on("pageerror", (err) => console.log("PAGE ERROR:", err));
Best Practices
- Use waitFor - Don't rely on timing
- Handle errors - Try/catch and retry
- Close resources - Prevent memory leaks
- Block unnecessary - Images, fonts for speed
- Use headless - Faster in production
- Set viewport - Consistent rendering
- User agent - Avoid bot detection
- Rate limiting - Be respectful
- Parallel carefully - Limit concurrency
- Log operations - Debug easier
More from housegarofalo/claude-code-base
mqtt-iot
Configure MQTT brokers (Mosquitto, EMQX) for IoT messaging, device communication, and smart home integration. Manage topics, QoS levels, authentication, and bridging. Use when setting up IoT messaging, smart home communication, or device-to-cloud connectivity. (project)
22devops-engineer-agent
Infrastructure and DevOps specialist. Manages Docker, Kubernetes, CI/CD pipelines, and cloud deployments. Expert in GitHub Actions, Azure DevOps, Terraform, and container orchestration. Use for deployment automation, infrastructure setup, or CI/CD optimization.
6postgresql
Design, optimize, and manage PostgreSQL databases. Covers indexing, pgvector for AI embeddings, JSON operations, full-text search, and query optimization. Use when working with PostgreSQL, database design, or building data-intensive applications.
6home-assistant
Ultimate Home Assistant skill - complete administration, wireless protocols (Zigbee/ZHA/Z2M, Z-Wave JS, Thread, Matter), ESPHome device building, advanced troubleshooting, performance optimization, security hardening, custom integration development, and professional dashboard design. Covers configuration, REST API, automation debugging, database optimization, SSL/TLS, Jinja2 templating, and HACS custom cards. Use for any HA task.
6testing
Comprehensive testing skill covering unit, integration, and E2E testing with pytest, Jest, Cypress, and Playwright. Use for writing tests, improving coverage, debugging test failures, and setting up testing infrastructure.
5react-typescript
Build modern React applications with TypeScript. Covers React 18+ patterns, hooks, component architecture, state management (Zustand, Redux Toolkit), server components, and best practices. Use for React development, TypeScript integration, component design, and frontend architecture.
5