skills/gptomics/bioskills/bio-imaging-mass-cytometry-phenotyping

bio-imaging-mass-cytometry-phenotyping

SKILL.md

Cell Phenotyping for IMC

Load Single-Cell Data

import anndata as ad
import scanpy as sc
import pandas as pd
import numpy as np

# Load from h5ad
adata = ad.read_h5ad('imc_segmented.h5ad')

# Or create from CSVs
intensities = pd.read_csv('cell_intensities.csv')
cell_info = pd.read_csv('cell_info.csv')

adata = ad.AnnData(X=intensities.values)
adata.var_names = intensities.columns
adata.obs = cell_info

Data Transformation

# Arcsinh transformation (standard for cytometry)
def arcsinh_transform(adata, cofactor=5):
    adata.X = np.arcsinh(adata.X / cofactor)
    return adata

adata = arcsinh_transform(adata)

# Z-score normalization
sc.pp.scale(adata, max_value=10)

Clustering-Based Phenotyping

# PCA and neighbors
sc.pp.pca(adata, n_comps=15)
sc.pp.neighbors(adata, n_neighbors=15, n_pcs=15)

# Clustering
sc.tl.leiden(adata, resolution=0.5)

# UMAP for visualization
sc.tl.umap(adata)

# Plot
sc.pl.umap(adata, color='leiden', save='_clusters.png')

Manual Gating

def gate_cells(adata, marker, threshold, above=True):
    '''Gate cells based on marker expression'''
    values = adata[:, marker].X.flatten()
    if above:
        return values > threshold
    else:
        return values < threshold

# Example gating strategy for T cells
adata.obs['CD45_pos'] = gate_cells(adata, 'CD45', 1.5)
adata.obs['CD3_pos'] = gate_cells(adata, 'CD3', 1.0)
adata.obs['CD8_pos'] = gate_cells(adata, 'CD8', 0.8)
adata.obs['CD4_pos'] = gate_cells(adata, 'CD4', 0.8)

# Assign cell types
def assign_cell_type(row):
    if not row['CD45_pos']:
        return 'Other'
    if not row['CD3_pos']:
        return 'Non-T immune'
    if row['CD8_pos']:
        return 'CD8 T cell'
    if row['CD4_pos']:
        return 'CD4 T cell'
    return 'T cell (other)'

adata.obs['cell_type'] = adata.obs.apply(assign_cell_type, axis=1)

Cluster Annotation

# Find marker genes per cluster
sc.tl.rank_genes_groups(adata, 'leiden', method='wilcoxon')
sc.pl.rank_genes_groups_heatmap(adata, n_genes=5, save='_markers.png')

# Manual annotation based on markers
cluster_annotation = {
    '0': 'Epithelial',
    '1': 'CD8 T cell',
    '2': 'CD4 T cell',
    '3': 'Macrophage',
    '4': 'Stromal',
    '5': 'B cell'
}

adata.obs['cell_type'] = adata.obs['leiden'].map(cluster_annotation)

SOM-Based Clustering (FlowSOM-Style)

# FlowSOM-style clustering using minisom
# Note: For authentic FlowSOM, use the R CATALYST package which wraps FlowSOM
# This Python approach approximates the SOM + meta-clustering concept
from minisom import MiniSom
from sklearn.cluster import AgglomerativeClustering

# Markers for clustering
phenotype_markers = ['CD45', 'CD3', 'CD8', 'CD4', 'CD20', 'CD68', 'E-cadherin']
X = adata[:, phenotype_markers].X

# Self-Organizing Map
som = MiniSom(10, 10, X.shape[1], sigma=1.5, learning_rate=0.5)
som.random_weights_init(X)
som.train_random(X, 1000)

# Get cluster assignments
winner_coordinates = np.array([som.winner(x) for x in X])
som_clusters = winner_coordinates[:, 0] * 10 + winner_coordinates[:, 1]

# Meta-clustering
meta_clustering = AgglomerativeClustering(n_clusters=10)
meta_labels = meta_clustering.fit_predict(som.get_weights().reshape(-1, X.shape[1]))

# Assign to cells
adata.obs['som_cluster'] = [meta_labels[c] for c in som_clusters]

Automated Annotation

# Use reference-based annotation (similar to CellTypist)
from sklearn.neighbors import KNeighborsClassifier

# If you have a reference dataset with known labels
ref_data = ad.read_h5ad('reference_imc.h5ad')

# Train classifier
knn = KNeighborsClassifier(n_neighbors=15)
knn.fit(ref_data.X, ref_data.obs['cell_type'])

# Predict
adata.obs['predicted_type'] = knn.predict(adata.X)
adata.obs['prediction_prob'] = knn.predict_proba(adata.X).max(axis=1)

Visualize Phenotypes

import matplotlib.pyplot as plt

# UMAP colored by cell type
sc.pl.umap(adata, color='cell_type', save='_celltypes.png')

# Heatmap of markers by cell type
sc.pl.matrixplot(adata, phenotype_markers, groupby='cell_type',
                  dendrogram=True, cmap='RdBu_r', save='_heatmap.png')

# Spatial plot colored by cell type
fig, ax = plt.subplots(figsize=(10, 10))
spatial = adata.obsm['spatial']
for ct in adata.obs['cell_type'].unique():
    mask = adata.obs['cell_type'] == ct
    ax.scatter(spatial[mask, 0], spatial[mask, 1], s=1, label=ct, alpha=0.7)
ax.legend(markerscale=5)
ax.set_aspect('equal')
plt.savefig('spatial_celltypes.png', dpi=150)

Cell Type Frequencies

# Frequencies per image/ROI
freq = adata.obs.groupby(['image_id', 'cell_type']).size().unstack(fill_value=0)
freq_pct = freq.div(freq.sum(axis=1), axis=0) * 100

# Plot
freq_pct.plot(kind='bar', stacked=True, figsize=(12, 6))
plt.ylabel('Percentage')
plt.title('Cell Type Composition')
plt.tight_layout()
plt.savefig('celltype_frequencies.png')

Save Results

# Add annotations to adata
adata.write('imc_phenotyped.h5ad')

# Export cell types
adata.obs[['cell_id', 'cell_type', 'centroid_x', 'centroid_y']].to_csv('cell_phenotypes.csv', index=False)

Related Skills

  • cell-segmentation - Generate single-cell data
  • spatial-analysis - Analyze spatial patterns of cell types
  • single-cell/cell-annotation - Similar annotation concepts
Weekly Installs
3
Installed on
windsurf2
trae2
opencode2
codex2
claude-code2
antigravity2