bio-methylation-methylkit
SKILL.md
methylKit Analysis
Read Bismark Coverage Files
library(methylKit)
file_list <- list('sample1.bismark.cov.gz', 'sample2.bismark.cov.gz',
'sample3.bismark.cov.gz', 'sample4.bismark.cov.gz')
sample_ids <- c('ctrl_1', 'ctrl_2', 'treat_1', 'treat_2')
treatment <- c(0, 0, 1, 1) # 0 = control, 1 = treatment
meth_obj <- methRead(
location = as.list(file_list),
sample.id = as.list(sample_ids),
treatment = treatment,
assembly = 'hg38',
context = 'CpG',
pipeline = 'bismarkCoverage'
)
Read Bismark cytosine Report
meth_obj <- methRead(
location = as.list(file_list),
sample.id = as.list(sample_ids),
treatment = treatment,
assembly = 'hg38',
context = 'CpG',
pipeline = 'bismarkCytosineReport'
)
Basic Statistics
# Coverage statistics
getMethylationStats(meth_obj[[1]], plot = TRUE, both.strands = FALSE)
# Coverage per sample
getCoverageStats(meth_obj[[1]], plot = TRUE, both.strands = FALSE)
Filter by Coverage
# Remove CpGs with very low or very high coverage
meth_filtered <- filterByCoverage(
meth_obj,
lo.count = 10, # Minimum 10 reads
lo.perc = NULL,
hi.count = NULL,
hi.perc = 99.9 # Remove top 0.1% (likely PCR artifacts)
)
Normalize Coverage
# Normalize coverage between samples (recommended)
meth_norm <- normalizeCoverage(meth_filtered, method = 'median')
Merge Samples (Unite)
# Find common CpGs across all samples
meth_united <- unite(meth_norm, destrand = TRUE) # Combine strands
# Allow some missing data
meth_united <- unite(meth_norm, destrand = TRUE, min.per.group = 2L)
Visualize Samples
# Correlation between samples
getCorrelation(meth_united, plot = TRUE)
# PCA of samples
PCASamples(meth_united, screeplot = TRUE)
PCASamples(meth_united)
# Clustering
clusterSamples(meth_united, dist = 'correlation', method = 'ward.D', plot = TRUE)
Differential Methylation (Single CpGs)
# Calculate differential methylation
diff_meth <- calculateDiffMeth(
meth_united,
overdispersion = 'MN', # Use shrinkage
test = 'Chisq',
mc.cores = 4
)
# Get significant differentially methylated CpGs
dmcs <- getMethylDiff(diff_meth, difference = 25, qvalue = 0.01)
# Hyper vs hypomethylated
dmcs_hyper <- getMethylDiff(diff_meth, difference = 25, qvalue = 0.01, type = 'hyper')
dmcs_hypo <- getMethylDiff(diff_meth, difference = 25, qvalue = 0.01, type = 'hypo')
Tile-Based Analysis (Regions)
# Aggregate CpGs into tiles/windows
tiles <- tileMethylCounts(meth_obj, win.size = 1000, step.size = 1000)
tiles_united <- unite(tiles, destrand = TRUE)
# Differential methylation on tiles
diff_tiles <- calculateDiffMeth(tiles_united, overdispersion = 'MN', mc.cores = 4)
dmrs <- getMethylDiff(diff_tiles, difference = 25, qvalue = 0.01)
Export Results
# To data frame
diff_df <- getData(dmcs)
write.csv(diff_df, 'dmcs_results.csv', row.names = FALSE)
# To BED file
library(genomation)
dmcs_gr <- as(dmcs, 'GRanges')
export(dmcs_gr, 'dmcs.bed', format = 'BED')
Annotate with Genomic Features
library(genomation)
gene_obj <- readTranscriptFeatures('genes.bed')
annotated <- annotateWithGeneParts(as(dmcs, 'GRanges'), gene_obj)
# Or with annotatr
library(annotatr)
annotations <- build_annotations(genome = 'hg38', annotations = 'hg38_basicgenes')
dmcs_annotated <- annotate_regions(regions = as(dmcs, 'GRanges'), annotations = annotations)
Reorganize for Multi-Group Comparison
# For more than 2 groups
meth_obj <- reorganize(
meth_united,
sample.ids = c('A1', 'A2', 'B1', 'B2', 'C1', 'C2'),
treatment = c(0, 0, 1, 1, 2, 2)
)
Pool Replicates
# Combine biological replicates
meth_pooled <- pool(meth_united, sample.ids = c('control', 'treatment'))
Key Functions
| Function | Purpose |
|---|---|
| methRead | Read methylation files |
| filterByCoverage | Remove low/high coverage |
| normalizeCoverage | Normalize between samples |
| unite | Find common CpGs |
| calculateDiffMeth | Statistical test |
| getMethylDiff | Filter significant results |
| tileMethylCounts | Region-level analysis |
| PCASamples | PCA visualization |
| getCorrelation | Sample correlation |
Key Parameters for calculateDiffMeth
| Parameter | Default | Description |
|---|---|---|
| overdispersion | none | MN (shrinkage) or shrinkMN |
| test | Chisq | Chisq, F, fast.fisher |
| mc.cores | 1 | Parallel cores |
| slim | TRUE | Remove unused columns |
Related Skills
- bismark-alignment - Generate input BAM files
- methylation-calling - Extract coverage files
- dmr-detection - Advanced DMR methods
- pathway-analysis/go-enrichment - Functional annotation
Weekly Installs
3
Repository
gptomics/bioskillsGitHub Stars
339
First Seen
Jan 24, 2026
Security Audits
Installed on
trae2
windsurf1
opencode1
codex1
claude-code1
antigravity1