adobe-core-workflow-b
Installation
SKILL.md
Adobe Core Workflow B — PDF Services
Overview
Document automation using Adobe PDF Services API: create PDFs from HTML/DOCX, extract structured text and tables with Sensei AI, generate documents from Word templates with JSON data, and convert PDFs to LLM-friendly Markdown.
Prerequisites
- Completed
adobe-install-authwith PDF Services credentials npm install @adobe/pdfservices-node-sdk(v4.x+)- 500 free document transactions/month on the free tier
Instructions
Step 1: Create PDF from HTML
// src/workflows/pdf-create.ts
import {
ServicePrincipalCredentials,
PDFServices,
MimeType,
CreatePDFJob,
CreatePDFResult,
} from '@adobe/pdfservices-node-sdk';
import * as fs from 'fs';
const credentials = new ServicePrincipalCredentials({
clientId: process.env.ADOBE_CLIENT_ID!,
clientSecret: process.env.ADOBE_CLIENT_SECRET!,
});
const pdfServices = new PDFServices({ credentials });
export async function htmlToPdf(htmlPath: string, outputPath: string): Promise<void> {
const inputStream = fs.createReadStream(htmlPath);
const inputAsset = await pdfServices.upload({
readStream: inputStream,
mimeType: MimeType.HTML,
});
const job = new CreatePDFJob({ inputAsset });
const pollingURL = await pdfServices.submit({ job });
const result = await pdfServices.getJobResult({
pollingURL,
resultType: CreatePDFResult,
});
const resultAsset = result.result!.asset;
const streamAsset = await pdfServices.getContent({ asset: resultAsset });
const output = fs.createWriteStream(outputPath);
streamAsset.readStream.pipe(output);
await new Promise((resolve, reject) => {
output.on('finish', resolve);
output.on('error', reject);
});
console.log(`PDF created: ${outputPath}`);
}
Step 2: Extract Text and Tables from PDF (Sensei AI)
// src/workflows/pdf-extract.ts
import {
PDFServices,
MimeType,
ExtractPDFParams,
ExtractElementType,
ExtractPDFJob,
ExtractPDFResult,
ExtractRenditionsElementType,
} from '@adobe/pdfservices-node-sdk';
import * as fs from 'fs';
import AdmZip from 'adm-zip';
export async function extractPdfContent(
pdfPath: string,
options?: { tables?: boolean; figures?: boolean }
): Promise<{ text: string; tables: any[]; }> {
const inputStream = fs.createReadStream(pdfPath);
const inputAsset = await pdfServices.upload({
readStream: inputStream,
mimeType: MimeType.PDF,
});
const elements = [ExtractElementType.TEXT];
if (options?.tables !== false) elements.push(ExtractElementType.TABLES);
const params = new ExtractPDFParams({
elementsToExtract: elements,
...(options?.figures && {
elementsToExtractRenditions: [ExtractRenditionsElementType.FIGURES],
}),
});
const job = new ExtractPDFJob({ inputAsset, params });
const pollingURL = await pdfServices.submit({ job });
const result = await pdfServices.getJobResult({
pollingURL,
resultType: ExtractPDFResult,
});
// Download and parse the result ZIP
const resultAsset = result.result!.resource;
const streamAsset = await pdfServices.getContent({ asset: resultAsset });
const chunks: Buffer[] = [];
for await (const chunk of streamAsset.readStream) {
chunks.push(Buffer.from(chunk));
}
const zip = new AdmZip(Buffer.concat(chunks));
const structuredData = JSON.parse(
zip.readAsText('structuredData.json')
);
// Parse text elements
const textElements = structuredData.elements
.filter((el: any) => el.Text)
.map((el: any) => el.Text);
// Parse table elements
const tableElements = structuredData.elements
.filter((el: any) => el.Path?.includes('/Table'));
return { text: textElements.join('\n'), tables: tableElements };
}
Step 3: Document Generation from Word Template
// src/workflows/pdf-docgen.ts
import {
PDFServices,
MimeType,
DocumentMergeJob,
DocumentMergeParams,
DocumentMergeResult,
OutputFormat,
} from '@adobe/pdfservices-node-sdk';
import * as fs from 'fs';
export async function generateDocument(
templatePath: string, // .docx Word template with {{tags}}
data: Record<string, any>,
outputPath: string,
format: 'pdf' | 'docx' = 'pdf'
): Promise<void> {
const inputStream = fs.createReadStream(templatePath);
const inputAsset = await pdfServices.upload({
readStream: inputStream,
mimeType: MimeType.DOCX,
});
const params = new DocumentMergeParams({
jsonDataForMerge: data,
outputFormat: format === 'pdf' ? OutputFormat.PDF : OutputFormat.DOCX,
});
const job = new DocumentMergeJob({ inputAsset, params });
const pollingURL = await pdfServices.submit({ job });
const result = await pdfServices.getJobResult({
pollingURL,
resultType: DocumentMergeResult,
});
const resultAsset = result.result!.asset;
const streamAsset = await pdfServices.getContent({ asset: resultAsset });
const output = fs.createWriteStream(outputPath);
streamAsset.readStream.pipe(output);
console.log(`Document generated: ${outputPath}`);
}
// Usage: Invoice generation
// await generateDocument('./templates/invoice.docx', {
// company: 'Acme Corp',
// invoiceNumber: 'INV-2026-001',
// items: [
// { description: 'API Integration', quantity: 1, price: 5000 },
// { description: 'Support Plan', quantity: 12, price: 200 },
// ],
// total: '$7,400.00',
// }, './output/invoice.pdf');
Step 4: PDF to Markdown (LLM-Friendly)
// PDF Extract API supports structured output for LLM ingestion
export async function pdfToMarkdown(pdfPath: string): Promise<string> {
const { text } = await extractPdfContent(pdfPath, { tables: false });
// The structuredData.json includes element paths indicating heading levels
// For full Markdown fidelity, parse element Paths:
// /H1 -> # heading, /H2 -> ## heading, /L/LI -> bullet
return text;
}
Output
- PDF files created from HTML, DOCX, or other formats
- Structured JSON with text, tables, and figures extracted from PDFs
- Dynamic documents generated from Word templates with JSON data
- Markdown text extracted from PDFs for LLM consumption
Error Handling
| Error | Cause | Solution |
|---|---|---|
DISQUALIFIED |
Encrypted or DRM-protected PDF | Remove encryption before processing |
BAD_PDF |
Corrupted PDF file | Validate PDF with pdfinfo before upload |
TIMEOUT |
Large PDF (100+ pages) | Split into smaller PDFs first |
QUOTA_EXCEEDED |
Free tier limit (500 tx/month) | Upgrade plan or wait for monthly reset |
UNSUPPORTED_MEDIA_TYPE |
Wrong MimeType for input | Match MimeType to actual file format |
Resources
Next Steps
For common errors, see adobe-common-errors.
Weekly Installs
2
Repository
jeremylongshore…s-skillsGitHub Stars
2.1K
First Seen
Apr 8, 2026
Security Audits