json-and-csv-data-transformation
SKILL.md
JSON and CSV Data Transformation
Transform data between JSON, CSV, and other formats. Filter, map, flatten nested objects, and reshape data for analysis, reporting, and API integration.
When to use
- Use case 1: When the user asks to convert data between JSON and CSV formats
- Use case 2: When you need to filter, extract, or transform specific fields from data
- Use case 3: For flattening nested JSON structures into tabular format
- Use case 4: When processing API responses for analysis or reporting
Required tools / APIs
- jq — Command-line JSON processor (essential for JSON manipulation)
- csvkit — Suite of CSV tools (csvjson, csvcut, csvgrep, etc.)
- No external API required
Install options:
# Ubuntu/Debian
sudo apt-get install -y jq csvkit
# macOS
brew install jq csvkit
# Node.js (native support, no packages needed for basic operations)
# For advanced CSV parsing: npm install csv-parse csv-stringify
Skills
json_to_csv
Convert JSON array to CSV format.
# Simple JSON array to CSV
echo '[{"name":"Alice","age":30},{"name":"Bob","age":25}]' | jq -r '(.[0] | keys_unsorted) as $keys | $keys, (map([.[ $keys[] ]]) | .[] | @csv)'
# JSON file to CSV file
jq -r '(.[0] | keys_unsorted) as $keys | $keys, (map([.[ $keys[] ]]) | .[] | @csv)' data.json > output.csv
# JSON to CSV with specific fields
jq -r '.[] | [.id, .name, .email] | @csv' users.json
# Using csvkit (simpler syntax)
cat data.json | in2csv -f json > output.csv
Node.js:
function jsonToCSV(jsonArray) {
if (!Array.isArray(jsonArray) || jsonArray.length === 0) {
return '';
}
// Get headers from first object
const headers = Object.keys(jsonArray[0]);
// Escape CSV values
const escape = (val) => {
if (val === null || val === undefined) return '';
const str = String(val);
if (str.includes(',') || str.includes('"') || str.includes('\n')) {
return `"${str.replace(/"/g, '""')}"`;
}
return str;
};
// Build CSV
const headerRow = headers.join(',');
const dataRows = jsonArray.map(obj =>
headers.map(header => escape(obj[header])).join(',')
);
return [headerRow, ...dataRows].join('\n');
}
// Usage
// const data = [
// { name: 'Alice', age: 30, city: 'New York' },
// { name: 'Bob', age: 25, city: 'San Francisco' }
// ];
// console.log(jsonToCSV(data));
csv_to_json
Convert CSV to JSON array.
# CSV to JSON
csvjson data.csv
# CSV to JSON with pretty printing
csvjson data.csv | jq '.'
# CSV to JSON array of objects
csvjson --stream data.csv
# CSV file to JSON file
csvjson input.csv > output.json
# Using pure jq (if headers are in first row)
jq -Rsn '[inputs | split(",") | {name: .[0], age: .[1], city: .[2]}]' < data.csv
Node.js:
function csvToJSON(csvString) {
const lines = csvString.trim().split('\n');
if (lines.length < 2) return [];
// Parse CSV value (handle quotes)
const parseCSVValue = (val) => {
val = val.trim();
if (val.startsWith('"') && val.endsWith('"')) {
return val.slice(1, -1).replace(/""/g, '"');
}
return val;
};
// Split CSV line (basic implementation)
const splitCSVLine = (line) => {
const result = [];
let current = '';
let inQuotes = false;
for (let i = 0; i < line.length; i++) {
const char = line[i];
if (char === '"') {
inQuotes = !inQuotes;
current += char;
} else if (char === ',' && !inQuotes) {
result.push(parseCSVValue(current));
current = '';
} else {
current += char;
}
}
result.push(parseCSVValue(current));
return result;
};
const headers = splitCSVLine(lines[0]);
const data = lines.slice(1).map(line => {
const values = splitCSVLine(line);
const obj = {};
headers.forEach((header, i) => {
obj[header] = values[i] || '';
});
return obj;
});
return data;
}
// Usage
// const csv = `name,age,city
// Alice,30,New York
// Bob,25,"San Francisco"`;
// console.log(JSON.stringify(csvToJSON(csv), null, 2));
filter_and_extract_json
Filter and extract specific fields from JSON.
# Extract specific fields
jq '.[] | {name: .name, email: .email}' users.json
# Filter by condition
jq '.[] | select(.age > 25)' users.json
# Filter and extract
jq '[.[] | select(.active == true) | {id: .id, name: .name}]' data.json
# Extract nested fields
jq '.[] | {name: .name, street: .address.street, city: .address.city}' data.json
# Get array of single field
jq '.[].name' users.json
# Filter with multiple conditions
jq '.[] | select(.age > 20 and .country == "USA")' users.json
# Map and transform values
jq '.[] | .price = (.price * 1.1)' products.json
Node.js:
function filterAndExtractJSON(data, options) {
const { filter, extract } = options;
let result = Array.isArray(data) ? data : [data];
// Apply filter function
if (filter) {
result = result.filter(filter);
}
// Extract specific fields
if (extract) {
result = result.map(item => {
const extracted = {};
extract.forEach(field => {
// Support nested fields with dot notation
const value = field.split('.').reduce((obj, key) => obj?.[key], item);
extracted[field] = value;
});
return extracted;
});
}
return result;
}
// Usage
// const users = [
// { id: 1, name: 'Alice', age: 30, address: { city: 'NYC' } },
// { id: 2, name: 'Bob', age: 25, address: { city: 'SF' } },
// { id: 3, name: 'Charlie', age: 35, address: { city: 'LA' } }
// ];
//
// const result = filterAndExtractJSON(users, {
// filter: user => user.age > 25,
// extract: ['name', 'age', 'address.city']
// });
// console.log(result);
flatten_nested_json
Flatten nested JSON objects into flat structure.
# Flatten nested JSON with jq
jq '[.[] | {id: .id, name: .name, street: .address.street, city: .address.city, zip: .address.zip}]' users.json
# Flatten all nested fields with custom separator
jq '[.[] | to_entries | map({key: .key, value: .value}) | from_entries]' data.json
# Flatten deeply nested structure
jq 'recurse | select(type != "object" and type != "array")' complex.json
Node.js:
function flattenJSON(obj, prefix = '', separator = '.') {
const flattened = {};
for (const key in obj) {
const value = obj[key];
const newKey = prefix ? `${prefix}${separator}${key}` : key;
if (value !== null && typeof value === 'object' && !Array.isArray(value)) {
// Recursively flatten nested objects
Object.assign(flattened, flattenJSON(value, newKey, separator));
} else if (Array.isArray(value)) {
// Convert arrays to string or flatten each item
flattened[newKey] = JSON.stringify(value);
} else {
flattened[newKey] = value;
}
}
return flattened;
}
// Usage
// const nested = {
// id: 1,
// name: 'Alice',
// address: {
// street: '123 Main St',
// city: 'NYC',
// coordinates: { lat: 40.7, lon: -74.0 }
// },
// tags: ['user', 'active']
// };
// console.log(flattenJSON(nested));
// Output: {
// id: 1,
// name: 'Alice',
// 'address.street': '123 Main St',
// 'address.city': 'NYC',
// 'address.coordinates.lat': 40.7,
// 'address.coordinates.lon': -74.0,
// tags: '["user","active"]'
// }
transform_csv_data
Transform and manipulate CSV data.
# Select specific columns
csvcut -c name,email,age users.csv
# Filter rows by value
csvgrep -c age -r "^[3-9][0-9]$" users.csv # age >= 30
# Sort CSV
csvsort -c age -r users.csv # reverse sort by age
# Remove duplicate rows
csvcut -c name,email users.csv | uniq
# Combine: filter, select columns, sort
csvgrep -c country -m "USA" users.csv | csvcut -c name,age | csvsort -c age
# Add calculated column (requires csvpy or awk)
awk -F',' 'BEGIN{OFS=","} NR==1{print $0,"total"} NR>1{print $0,$2*$3}' data.csv
# Merge two CSV files by column
csvjoin -c id users.csv orders.csv
Node.js:
function transformCSV(csvData, transformations) {
const { selectColumns, filterRows, sortBy } = transformations;
// Parse CSV to objects
const data = csvToJSON(csvData);
let result = data;
// Filter rows
if (filterRows) {
result = result.filter(filterRows);
}
// Select columns
if (selectColumns) {
result = result.map(row => {
const selected = {};
selectColumns.forEach(col => {
selected[col] = row[col];
});
return selected;
});
}
// Sort
if (sortBy) {
const { column, reverse } = sortBy;
result.sort((a, b) => {
const aVal = a[column];
const bVal = b[column];
const comparison = aVal > bVal ? 1 : aVal < bVal ? -1 : 0;
return reverse ? -comparison : comparison;
});
}
// Convert back to CSV
return jsonToCSV(result);
}
// Usage
// const csv = `name,age,country
// Alice,30,USA
// Bob,25,Canada
// Charlie,35,USA`;
//
// const transformed = transformCSV(csv, {
// filterRows: row => row.country === 'USA',
// selectColumns: ['name', 'age'],
// sortBy: { column: 'age', reverse: true }
// });
// console.log(transformed);
aggregate_and_group_json
Aggregate and group JSON data (similar to SQL GROUP BY).
# Group by field and count
jq 'group_by(.country) | map({country: .[0].country, count: length})' users.json
# Sum values by group
jq 'group_by(.category) | map({category: .[0].category, total: map(.price) | add})' products.json
# Average by group
jq 'group_by(.department) | map({department: .[0].department, avg_salary: (map(.salary) | add / length)})' employees.json
# Multiple aggregations
jq 'group_by(.region) | map({
region: .[0].region,
count: length,
total_sales: map(.sales) | add,
avg_sales: (map(.sales) | add / length)
})' sales.json
Node.js:
function groupAndAggregate(data, groupBy, aggregations) {
// Group data
const grouped = {};
data.forEach(item => {
const key = item[groupBy];
if (!grouped[key]) grouped[key] = [];
grouped[key].push(item);
});
// Apply aggregations
return Object.entries(grouped).map(([key, items]) => {
const result = { [groupBy]: key };
aggregations.forEach(agg => {
if (agg.type === 'count') {
result[agg.name] = items.length;
} else if (agg.type === 'sum') {
result[agg.name] = items.reduce((sum, item) => sum + (item[agg.field] || 0), 0);
} else if (agg.type === 'avg') {
const sum = items.reduce((s, item) => s + (item[agg.field] || 0), 0);
result[agg.name] = items.length > 0 ? sum / items.length : 0;
} else if (agg.type === 'min') {
result[agg.name] = Math.min(...items.map(item => item[agg.field] || Infinity));
} else if (agg.type === 'max') {
result[agg.name] = Math.max(...items.map(item => item[agg.field] || -Infinity));
}
});
return result;
});
}
// Usage
// const sales = [
// { region: 'East', product: 'A', amount: 100 },
// { region: 'East', product: 'B', amount: 200 },
// { region: 'West', product: 'A', amount: 150 },
// { region: 'West', product: 'B', amount: 250 }
// ];
//
// const result = groupAndAggregate(sales, 'region', [
// { name: 'count', type: 'count' },
// { name: 'total_amount', type: 'sum', field: 'amount' },
// { name: 'avg_amount', type: 'avg', field: 'amount' }
// ]);
// console.log(result);
Rate limits / Best practices
- ✅ Stream large files — Use jq with
-cflag and process line by line for large datasets - ✅ Validate data — Check JSON/CSV format before transformation
- ✅ Handle missing fields — Use default values for null/undefined fields
- ✅ Memory management — For files >100MB, use streaming parsers
- ✅ Type conversion — Be aware of number/string conversions in CSV
- ✅ Preserve data types — JSON maintains types, CSV converts everything to strings
- ⚠️ Character encoding — Ensure UTF-8 encoding for international characters
- ⚠️ Quote escaping — Properly escape quotes in CSV values
Agent prompt
You have JSON and CSV data transformation capability. When a user asks to transform data:
1. Identify the input format:
- JSON: Look for {...} or [...]
- CSV: Look for comma-separated values with headers
2. For JSON to CSV:
- Use jq with @csv filter: `jq -r '... | @csv'`
- Or csvkit: `in2csv -f json`
- Node.js: Convert array of objects to CSV string
3. For CSV to JSON:
- Use csvjson from csvkit: `csvjson file.csv`
- Node.js: Parse CSV headers and data rows into objects
4. For filtering/extracting:
- Use jq select(): `jq '.[] | select(.age > 25)'`
- Use csvkit csvgrep: `csvgrep -c column -m value`
- Node.js: Use Array.filter() and map()
5. For flattening:
- Flatten nested JSON objects into dot notation
- Convert nested structures to tabular format
- Handle arrays by stringifying or creating separate rows
6. For aggregation:
- Use jq group_by(): `jq 'group_by(.field) | map({...})'`
- CSV: Convert to JSON, aggregate, convert back
- Node.js: Implement grouping and aggregation functions
Always:
- Preserve data integrity (no data loss)
- Handle edge cases (empty values, special characters)
- Validate output format matches expected structure
- For large files (>100MB), recommend streaming approaches
Troubleshooting
Error: "parse error: Invalid numeric literal"
- Symptom: jq fails to parse JSON
- Solution: Validate JSON format with
jq empty file.json, fix syntax errors
CSV columns not aligned:
- Symptom: Data appears in wrong columns after transformation
- Solution: Check for unescaped commas in data, ensure quotes are properly escaped
Empty output from jq:
- Symptom: jq returns no results
- Solution: Check filter expression syntax, verify data structure matches filter
Special characters broken in CSV:
- Symptom: Non-ASCII characters appear garbled
- Solution: Ensure UTF-8 encoding:
iconv -f UTF-8 -t UTF-8 file.csv
Memory error with large files:
- Symptom: Process runs out of memory
- Solution: Use streaming mode:
jq -cor Node.js streams for line-by-line processing
JSON doesn't convert to flat CSV:
- Symptom: Nested objects create complex CSV structure
- Solution: Flatten JSON first before converting to CSV
See also
- ../database-query-and-export/SKILL.md — Export database results as JSON/CSV
- ../web-search-api/SKILL.md — Transform API responses to desired format
- ../using-web-scraping/SKILL.md — Process scraped data into structured formats
Weekly Installs
8
Repository
besoeasy/open-skillsGitHub Stars
89
First Seen
Mar 1, 2026
Security Audits
Installed on
kimi-cli8
opencode7
gemini-cli7
github-copilot7
codex7
amp7