skills/gptomics/bioskills/bio-data-visualization-specialized-omics-plots

bio-data-visualization-specialized-omics-plots

SKILL.md

Specialized Omics Plots

Scope

This skill provides reusable plotting functions for common omics visualizations that can be applied across different analysis types:

  • Volcano plots (any DE result)
  • MA plots (any log-fold-change data)
  • PCA plots (any high-dimensional data)
  • Enrichment dotplots (manual, not enrichplot)
  • Expression boxplots with statistics
  • Survival curves

For DESeq2/edgeR built-in functions (plotMA, plotPCA, plotDispEsts), see differential-expression/de-visualization. For enrichplot-specific functions (dotplot, cnetplot, emapplot, gseaplot2), see pathway-analysis/enrichment-visualization.

Volcano Plot (R)

library(ggplot2)
library(ggrepel)

volcano_plot <- function(res, fdr = 0.05, lfc = 1, top_n = 10) {
    res <- res %>%
        mutate(
            significance = case_when(
                padj < fdr & log2FoldChange > lfc ~ 'Up',
                padj < fdr & log2FoldChange < -lfc ~ 'Down',
                TRUE ~ 'NS'
            ),
            label = ifelse(rank(padj) <= top_n & significance != 'NS', gene, '')
        )

    ggplot(res, aes(log2FoldChange, -log10(pvalue), color = significance)) +
        geom_point(alpha = 0.6, size = 1.5) +
        geom_text_repel(aes(label = label), color = 'black', size = 3, max.overlaps = 20) +
        scale_color_manual(values = c('Up' = '#E64B35', 'Down' = '#4DBBD5', 'NS' = 'grey60')) +
        geom_vline(xintercept = c(-lfc, lfc), linetype = 'dashed', color = 'grey40') +
        geom_hline(yintercept = -log10(fdr), linetype = 'dashed', color = 'grey40') +
        labs(x = expression(Log[2]~Fold~Change), y = expression(-Log[10]~P-value)) +
        theme_bw() + theme(panel.grid = element_blank())
}

Volcano Plot (Python)

import matplotlib.pyplot as plt
import numpy as np

def volcano_plot(df, fdr=0.05, lfc=1, ax=None):
    if ax is None:
        fig, ax = plt.subplots(figsize=(8, 6))

    sig_up = (df['padj'] < fdr) & (df['log2FoldChange'] > lfc)
    sig_down = (df['padj'] < fdr) & (df['log2FoldChange'] < -lfc)
    ns = ~(sig_up | sig_down)

    ax.scatter(df.loc[ns, 'log2FoldChange'], -np.log10(df.loc[ns, 'pvalue']),
               c='grey', alpha=0.5, s=10, label='NS')
    ax.scatter(df.loc[sig_up, 'log2FoldChange'], -np.log10(df.loc[sig_up, 'pvalue']),
               c='#E64B35', alpha=0.7, s=15, label='Up')
    ax.scatter(df.loc[sig_down, 'log2FoldChange'], -np.log10(df.loc[sig_down, 'pvalue']),
               c='#4DBBD5', alpha=0.7, s=15, label='Down')

    ax.axhline(-np.log10(fdr), ls='--', c='grey', lw=0.8)
    ax.axvline(-lfc, ls='--', c='grey', lw=0.8)
    ax.axvline(lfc, ls='--', c='grey', lw=0.8)

    ax.set_xlabel('Log2 Fold Change')
    ax.set_ylabel('-Log10 P-value')
    ax.legend()
    return ax

MA Plot (R)

ma_plot <- function(res, fdr = 0.05) {
    res <- res %>%
        mutate(significant = padj < fdr & !is.na(padj))

    ggplot(res, aes(log10(baseMean), log2FoldChange, color = significant)) +
        geom_point(alpha = 0.5, size = 1) +
        scale_color_manual(values = c('FALSE' = 'grey60', 'TRUE' = '#E64B35')) +
        geom_hline(yintercept = 0, color = 'black', linewidth = 0.5) +
        labs(x = expression(Log[10]~Mean~Expression), y = expression(Log[2]~Fold~Change)) +
        theme_bw() + theme(panel.grid = element_blank(), legend.position = 'none')
}

PCA Plot (R)

pca_plot <- function(vsd, intgroup = 'condition', ntop = 500) {
    rv <- rowVars(assay(vsd))
    select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
    pca <- prcomp(t(assay(vsd)[select, ]))
    percentVar <- round(100 * pca$sdev^2 / sum(pca$sdev^2), 1)

    pca_df <- data.frame(PC1 = pca$x[, 1], PC2 = pca$x[, 2], colData(vsd))

    ggplot(pca_df, aes(PC1, PC2, color = .data[[intgroup]])) +
        geom_point(size = 3) +
        stat_ellipse(level = 0.95, linetype = 'dashed') +
        labs(x = paste0('PC1 (', percentVar[1], '%)'),
             y = paste0('PC2 (', percentVar[2], '%)')) +
        theme_bw() + theme(panel.grid = element_blank())
}

PCA Plot (Python)

from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

def pca_plot(df, metadata, color_by, ax=None):
    if ax is None:
        fig, ax = plt.subplots(figsize=(8, 6))

    pca = PCA(n_components=2)
    pcs = pca.fit_transform(df.T)

    for group in metadata[color_by].unique():
        mask = metadata[color_by] == group
        ax.scatter(pcs[mask, 0], pcs[mask, 1], label=group, alpha=0.8, s=50)

    ax.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]*100:.1f}%)')
    ax.set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]*100:.1f}%)')
    ax.legend()
    return ax

Dotplot for Enrichment (R)

library(ggplot2)

enrichment_dotplot <- function(enrich_result, top_n = 20) {
    df <- enrich_result %>%
        arrange(p.adjust) %>%
        head(top_n) %>%
        mutate(Description = factor(Description, levels = rev(Description)),
               GeneRatio_numeric = sapply(strsplit(GeneRatio, '/'), function(x) as.numeric(x[1])/as.numeric(x[2])))

    ggplot(df, aes(GeneRatio_numeric, Description, size = Count, color = p.adjust)) +
        geom_point() +
        scale_color_gradient(low = '#E64B35', high = '#4DBBD5', trans = 'log10') +
        scale_size_continuous(range = c(3, 10)) +
        labs(x = 'Gene Ratio', y = NULL, color = 'Adj. P-value', size = 'Count') +
        theme_bw() + theme(panel.grid.major.y = element_blank())
}

Boxplot with Statistics (R)

library(ggpubr)

expression_boxplot <- function(df, gene, group_var) {
    ggboxplot(df, x = group_var, y = gene, color = group_var,
              add = 'jitter', palette = 'npg') +
        stat_compare_means(method = 't.test', label = 'p.signif') +
        labs(y = paste0(gene, ' Expression')) +
        theme(legend.position = 'none')
}

UMAP/tSNE Plot (Python)

import scanpy as sc
import matplotlib.pyplot as plt

def umap_plot(adata, color, ax=None, **kwargs):
    if ax is None:
        fig, ax = plt.subplots(figsize=(8, 6))

    sc.pl.umap(adata, color=color, ax=ax, show=False, **kwargs)
    return ax

# With custom styling
sc.pl.umap(adata, color='leiden', palette='tab20', frameon=False,
           title='', legend_loc='on data', legend_fontsize=8)

Correlation Plot (R)

library(corrplot)

cor_mat <- cor(t(top_genes_mat), method = 'pearson')
corrplot(cor_mat, method = 'color', type = 'lower', order = 'hclust',
         tl.col = 'black', tl.cex = 0.7, col = colorRampPalette(c('#4DBBD5', 'white', '#E64B35'))(100))

Violin Plot with Split (R)

ggplot(df, aes(cluster, expression, fill = condition)) +
    geom_split_violin(alpha = 0.7) +
    geom_boxplot(width = 0.2, position = position_dodge(0.5), outlier.shape = NA) +
    scale_fill_manual(values = c('#4DBBD5', '#E64B35')) +
    theme_bw()

Survival Curves (R)

library(survival)
library(survminer)

fit <- survfit(Surv(time, status) ~ group, data = df)
ggsurvplot(fit, data = df, risk.table = TRUE, pval = TRUE,
           palette = c('#4DBBD5', '#E64B35'),
           legend.labs = c('Low', 'High'))

Related Skills

  • data-visualization/ggplot2-fundamentals - Base plotting
  • data-visualization/color-palettes - Color selection
  • differential-expression/de-visualization - DE-specific plots
  • pathway-analysis/enrichment-visualization - Enrichment plots
Weekly Installs
3
Installed on
windsurf2
trae2
opencode2
codex2
claude-code2
antigravity2